-
Notifications
You must be signed in to change notification settings - Fork 52
/
util.h
53 lines (44 loc) · 1.73 KB
/
util.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
#pragma once
#include <fstream>
#define BARK_ASSERT(x) \
do { \
if (!(x)) { \
fprintf(stderr, "BARK_ASSERT: %s:%d: %s\n", __FILE__, __LINE__, #x); \
abort(); \
} \
} while (0)
static const size_t MB = 4*1024*1024;
template<typename T>
static void read_safe(std::ifstream& infile, T& dest) {
infile.read((char*)& dest, sizeof(T));
}
static size_t utf8_len(char src) {
const size_t lookup[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 3, 4};
uint8_t highbits = static_cast<uint8_t>(src) >> 4;
return lookup[highbits];
}
std::string strip_accents(const std::string &in_str) {
std::string out_str;
std::map<std::string, char> accent_map = {{"À", 'A'},{"Á", 'A'},
{"Â", 'A'},{"Ã", 'A'},{"Ä", 'A'},{"Å", 'A'},{"à", 'a'},{"á", 'a'},
{"â", 'a'},{"ã", 'a'},{"ä", 'a'},{"å", 'a'},{"È", 'E'},{"É", 'E'},
{"Ê", 'E'},{"Ë", 'E'},{"è", 'e'},{"é", 'e'},{"ê", 'e'},{"ë", 'e'},
{"Ì", 'I'},{"Í", 'I'},{"Î", 'I'},{"Ï", 'I'},{"ì", 'i'},{"í", 'i'},
{"î", 'i'},{"ï", 'i'},{"Ò", 'O'},{"Ó", 'O'},{"Ô", 'O'},{"Õ", 'O'},
{"Ö", 'O'},{"ò", 'o'},{"ó", 'o'},{"ô", 'o'},{"õ", 'o'},{"ö", 'o'},
{"Ù", 'U'},{"Ú", 'U'},{"Û", 'U'},{"Ü", 'U'},{"ù", 'u'},{"ú", 'u'},
{"û", 'u'},{"ü", 'u'},{"Ý", 'Y'},{"ý", 'y'},{"Ç", 'C'},{"ç", 'c'},
{"Ñ", 'N'},{"ñ", 'n'},
};
for (size_t i = 0; i < in_str.length();) {
int len = utf8_len(in_str[i]);
std::string cur = in_str.substr(i, len);
auto iter = accent_map.find(cur);
if (iter != accent_map.end())
out_str += iter->second;
else
out_str += cur;
i += len;
}
return out_str;
}