blob: ef7e4038b7aa9127d723399fae0bb4fe991d68b2 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
|
# python3 script to generate caseconvert.h.
# It uses difference in lower() and upper() on a character to make a mapping
# that maps a given unicode point to either a lower or upper case UTF-8 character.
# this also include multi-byte characters.
import codecs
toupper = {}
tolower = {}
def writeMapping(file,mapping):
for k,v in sorted(mapping.items()):
file.write(u" case %s /* %s */: BSEQ(%s) /* %s */;\n" %
(hex(ord(k[0])), k, ",".join(f"0x{b:02x}" for b in v.encode('utf-8')), v))
# create mappings of characters whose upper and lower case differ
for codeValue in range(0,0x1FFFF):
s = chr(codeValue)
sl = s.lower()
su = s.upper()
if ord(s[0])!=ord(sl[0]):
tolower[s]=sl
if ord(s[0])!=ord(su[0]):
toupper[s]=su
file = codecs.open("caseconvert.h", "w", "utf-8")
file.write(r'''/** This file is generated by python3 caseconvert.py. DO NOT EDIT! */
#ifndef CASECONVERT_H
#define CASECONVERT_H
#include <cstdint>
#include <string>
#define BSEQ(...) { static unsigned char s[] = { __VA_ARGS__, 0x00 }; \
return reinterpret_cast<const char *>(s); }
inline const char *convertUnicodeToUpper(uint32_t code)
{
switch(code)
{
''');
writeMapping(file,toupper);
file.write(r''' default: return nullptr;
}
}
inline const char *convertUnicodeToLower(uint32_t code)
{
switch(code)
{
''');
writeMapping(file,tolower);
file.write(r''' default: return nullptr;
}
}
#endif
''');
|