summaryrefslogtreecommitdiffstats
path: root/src/caseconvert.py
diff options
context:
space:
mode:
Diffstat (limited to 'src/caseconvert.py')
-rw-r--r--src/caseconvert.py59
1 files changed, 59 insertions, 0 deletions
diff --git a/src/caseconvert.py b/src/caseconvert.py
new file mode 100644
index 0000000..ef7e403
--- /dev/null
+++ b/src/caseconvert.py
@@ -0,0 +1,59 @@
+# python3 script to generate caseconvert.h.
+# It uses difference in lower() and upper() on a character to make a mapping
+# that maps a given unicode point to either a lower or upper case UTF-8 character.
+# this also include multi-byte characters.
+
+import codecs
+
+toupper = {}
+tolower = {}
+
+def writeMapping(file,mapping):
+ for k,v in sorted(mapping.items()):
+ file.write(u" case %s /* %s */: BSEQ(%s) /* %s */;\n" %
+ (hex(ord(k[0])), k, ",".join(f"0x{b:02x}" for b in v.encode('utf-8')), v))
+
+# create mappings of characters whose upper and lower case differ
+for codeValue in range(0,0x1FFFF):
+ s = chr(codeValue)
+ sl = s.lower()
+ su = s.upper()
+ if ord(s[0])!=ord(sl[0]):
+ tolower[s]=sl
+ if ord(s[0])!=ord(su[0]):
+ toupper[s]=su
+
+file = codecs.open("caseconvert.h", "w", "utf-8")
+file.write(r'''/** This file is generated by python3 caseconvert.py. DO NOT EDIT! */
+
+#ifndef CASECONVERT_H
+#define CASECONVERT_H
+
+#include <cstdint>
+#include <string>
+
+#define BSEQ(...) { static unsigned char s[] = { __VA_ARGS__, 0x00 }; \
+ return reinterpret_cast<const char *>(s); }
+
+inline const char *convertUnicodeToUpper(uint32_t code)
+{
+ switch(code)
+ {
+''');
+writeMapping(file,toupper);
+file.write(r''' default: return nullptr;
+ }
+}
+
+inline const char *convertUnicodeToLower(uint32_t code)
+{
+ switch(code)
+ {
+''');
+writeMapping(file,tolower);
+file.write(r''' default: return nullptr;
+ }
+}
+
+#endif
+''');