diff options
Diffstat (limited to 'util')
-rwxr-xr-x | util/local_database/cldr2qlocalexml.py | 31 | ||||
-rw-r--r-- | util/local_database/enumdata.py | 20 | ||||
-rwxr-xr-x | util/local_database/qlocalexml2cpp.py | 148 |
3 files changed, 177 insertions, 22 deletions
diff --git a/util/local_database/cldr2qlocalexml.py b/util/local_database/cldr2qlocalexml.py index c70cf18..fb0e5db 100755 --- a/util/local_database/cldr2qlocalexml.py +++ b/util/local_database/cldr2qlocalexml.py @@ -120,8 +120,10 @@ def generateLocaleInfo(path): if not country_code: return {} - # we do not support scripts and variants - if variant_code or script_code: + # we do not support variants + # ### actually there is only one locale with variant: en_US_POSIX + # does anybody care about it at all? + if variant_code: return {} language_id = enumdata.languageCodeToId(language_code) @@ -130,6 +132,14 @@ def generateLocaleInfo(path): return {} language = enumdata.language_list[language_id][0] + script_id = enumdata.scriptCodeToId(script_code) + if script_code == -1: + sys.stderr.write("unnknown script code \"" + script_code + "\"\n") + return {} + script = "AnyScript" + if script_id != -1: + script = enumdata.script_list[script_id][0] + country_id = enumdata.countryCodeToId(country_code) country = "" if country_id != -1: @@ -146,12 +156,14 @@ def generateLocaleInfo(path): result = {} result['language'] = language + result['script'] = script result['country'] = country result['language_code'] = language_code result['country_code'] = country_code result['script_code'] = script_code result['variant_code'] = variant_code result['language_id'] = language_id + result['script_id'] = script_id result['country_id'] = country_id supplementalPath = dir_name + "/../supplemental/supplementalData.xml" @@ -511,7 +523,7 @@ for file in cldr_files: sys.stderr.write("skipping file \"" + file + "\"\n") continue - locale_database[(l['language_id'], l['country_id'], l['script_code'], l['variant_code'])] = l + locale_database[(l['language_id'], l['script_id'], l['country_id'], l['variant_code'])] = l integrateWeekData(cldr_dir+"/../supplemental/supplementalData.xml") locale_keys = locale_database.keys() @@ -535,6 +547,16 @@ for id in enumdata.language_list: print " </language>" print " </languageList>" +print " <scriptList>" +for id in enumdata.script_list: + l = enumdata.script_list[id] + print " <script>" + print " <name>" + l[0] + "</name>" + print " <id>" + str(id) + "</id>" + print " <code>" + l[1] + "</code>" + print " </script>" +print " </scriptList>" + print " <countryList>" for id in enumdata.country_list: l = enumdata.country_list[id] @@ -653,6 +675,7 @@ print " <localeList>" print \ " <locale>\n\ <language>C</language>\n\ + <script>AnyScript</script>\n\ <country>AnyCountry</country>\n\ <decimal>46</decimal>\n\ <group>44</group>\n\ @@ -701,8 +724,10 @@ for key in locale_keys: print " <locale>" print " <language>" + l['language'] + "</language>" + print " <script>" + l['script'] + "</script>" print " <country>" + l['country'] + "</country>" print " <languagecode>" + l['language_code'] + "</languagecode>" + print " <scriptcode>" + l['script_code'] + "</scriptcode>" print " <countrycode>" + l['country_code'] + "</countrycode>" print " <decimal>" + ordStr(l['decimal']) + "</decimal>" print " <group>" + ordStr(l['group']) + "</group>" diff --git a/util/local_database/enumdata.py b/util/local_database/enumdata.py index e957349..9e0d7d8 100644 --- a/util/local_database/enumdata.py +++ b/util/local_database/enumdata.py @@ -45,6 +45,7 @@ # need to be *appended* to this list. language_list = { + 0 : [ "AnyLanguage", " " ], 1 : [ "C", " " ], 2 : [ "Abkhazian", "ab" ], 3 : [ "Afan", "om" ], @@ -511,6 +512,19 @@ country_list = { 246 : [ "LatinAmericaAndTheCaribbean", "419" ] } +script_list = { + 0 : [ "AnyScript", "" ], + 1 : [ "Arabic", "Arab" ], + 2 : [ "Cyrillic", "Cyrl" ], + 3 : [ "Deseret", "Dsrt" ], + 4 : [ "Gurmukhi", "Guru" ], + 5 : [ "Simplified Han", "Hans" ], + 6 : [ "Traditional Han", "Hant" ], + 7 : [ "Latin", "Latn" ], + 8 : [ "Mongolian", "Mong" ], + 9 : [ "Tifinagh", "Tfng" ] +} + def countryCodeToId(code): for country_id in country_list: if country_list[country_id][1] == code: @@ -522,3 +536,9 @@ def languageCodeToId(code): if language_list[language_id][1] == code: return language_id return -1 + +def scriptCodeToId(code): + for script_id in script_list: + if script_list[script_id][1] == code: + return script_id + return -1 diff --git a/util/local_database/qlocalexml2cpp.py b/util/local_database/qlocalexml2cpp.py index 8b68984..b8e4e89 100755 --- a/util/local_database/qlocalexml2cpp.py +++ b/util/local_database/qlocalexml2cpp.py @@ -46,6 +46,12 @@ import tempfile import datetime import xml.dom.minidom +class Error: + def __init__(self, msg): + self.msg = msg + def __str__(self): + return self.msg + def check_static_char_array_length(name, array): # some compilers like VC6 doesn't allow static arrays more than 64K bytes size. size = reduce(lambda x, y: x+len(escapedString(y)), array, 0) @@ -103,6 +109,20 @@ def loadLanguageMap(doc): return result +def loadScriptMap(doc): + result = {} + + script_list_elt = firstChildElt(doc.documentElement, "scriptList") + script_elt = firstChildElt(script_list_elt, "script") + while script_elt: + script_id = int(eltText(firstChildElt(script_elt, "id"))) + script_name = eltText(firstChildElt(script_elt, "name")) + script_code = eltText(firstChildElt(script_elt, "code")) + result[script_id] = (script_name, script_code) + script_elt = nextSiblingElt(script_elt, "script") + + return result + def loadCountryMap(doc): result = {} @@ -129,6 +149,15 @@ def loadDefaultMap(doc): elt = nextSiblingElt(elt, "defaultCountry"); return result +def fixedScriptName(name, dupes): + name = name.replace(" ", "") + if name[-6:] != "Script": + name = name + "Script"; + if name in dupes: + sys.stderr.write("\n\n\nERROR: The script name '%s' is messy" % name) + sys.exit(1); + return name + def fixedCountryName(name, dupes): if name in dupes: return name.replace(" ", "") + "Country" @@ -150,6 +179,12 @@ def languageNameToId(name, language_map): return key return -1 +def scriptNameToId(name, script_map): + for key in script_map.keys(): + if script_map[key][0] == name: + return key + return -1 + def countryNameToId(name, country_map): for key in country_map.keys(): if country_map[key][0] == name: @@ -202,6 +237,7 @@ def assertSingleChar(string): class Locale: def __init__(self, elt): self.language = eltText(firstChildElt(elt, "language")) + self.script = eltText(firstChildElt(elt, "script")) self.country = eltText(firstChildElt(elt, "country")) self.decimal = int(eltText(firstChildElt(elt, "decimal"))) self.group = int(eltText(firstChildElt(elt, "group"))) @@ -244,7 +280,7 @@ class Locale: self.currencyFormat = eltText(firstChildElt(elt, "currencyFormat")) self.currencyNegativeFormat = eltText(firstChildElt(elt, "currencyNegativeFormat")) -def loadLocaleMap(doc, language_map, country_map): +def loadLocaleMap(doc, language_map, script_map, country_map): result = {} locale_list_elt = firstChildElt(doc.documentElement, "localeList") @@ -253,11 +289,14 @@ def loadLocaleMap(doc, language_map, country_map): locale = Locale(locale_elt) language_id = languageNameToId(locale.language, language_map) if language_id == -1: - sys.stderr.write("Cannot find a language id for %s\n" % locale.language) + sys.stderr.write("Cannot find a language id for '%s'\n" % locale.language) + script_id = scriptNameToId(locale.script, script_map) + if script_id == -1: + sys.stderr.write("Cannot find a script id for '%s'\n" % locale.script) country_id = countryNameToId(locale.country, country_map) if country_id == -1: - sys.stderr.write("Cannot find a country id for %s\n" % locale.country) - result[(language_id, country_id)] = locale + sys.stderr.write("Cannot find a country id for '%s'\n" % locale.country) + result[(language_id, script_id, country_id)] = locale locale_elt = nextSiblingElt(locale_elt, "locale") @@ -273,14 +312,17 @@ def compareLocaleKeys(key1, key2): if l1.language in compareLocaleKeys.default_map: default = compareLocaleKeys.default_map[l1.language] - if l1.country == default: + if l1.country == default and key1[1] == 0: return -1 - if l2.country == default: + if l2.country == default and key2[1] == 0: return 1 + + if key1[1] != key2[1]: + return key1[1] - key2[1] else: return key1[0] - key2[0] - return key1[1] - key2[1] + return key1[2] - key2[2] def languageCount(language_id, locale_map): @@ -290,8 +332,25 @@ def languageCount(language_id, locale_map): result += 1 return result +def unicode2hex(s): + lst = [] + for x in s: + v = ord(x) + if v > 0xFFFF: + # make a surrogate pair + # copied from qchar.h + high = (v >> 10) + 0xd7c0 + low = (v % 0x400 + 0xdc00) + lst.append(hex(high)) + lst.append(hex(low)) + else: + lst.append(hex(v)) + return lst + class StringDataToken: def __init__(self, index, length): + if index > 0xFFFF or length > 0xFFFF: + raise Error("Position exceeds ushort range: %d,%d " % (index, length)) self.index = index self.length = length def __str__(self): @@ -305,9 +364,9 @@ class StringData: if s in self.hash: return self.hash[s] - lst = map(lambda x: hex(ord(x)), s) + lst = unicode2hex(s) index = len(self.data) - if index >= 65535: + if index > 65535: print "\n\n\n#error Data index is too big!" sys.stderr.write ("\n\n\nERROR: index exceeds the uint16 range! index = %d\n" % index) sys.exit(1) @@ -316,7 +375,12 @@ class StringData: print "\n\n\n#error Data is too big!" sys.stderr.write ("\n\n\nERROR: data size exceeds the uint16 range! size = %d\n" % size) sys.exit(1) - token = StringDataToken(index, size) + token = None + try: + token = StringDataToken(index, size) + except Error as e: + sys.stderr.write("\n\n\nERROR: %s: on data '%s'" % (e, s)) + sys.exit(1) self.hash[s] = token self.data += lst return token @@ -395,9 +459,10 @@ def main(): doc = xml.dom.minidom.parse(localexml) language_map = loadLanguageMap(doc) + script_map = loadScriptMap(doc) country_map = loadCountryMap(doc) default_map = loadDefaultMap(doc) - locale_map = loadLocaleMap(doc, language_map, country_map) + locale_map = loadLocaleMap(doc, language_map, script_map, country_map) dupes = findDupes(language_map, country_map) cldr_version = eltText(firstChildElt(doc.documentElement, "version")) @@ -416,7 +481,6 @@ def main(): # Locale index data_temp_file.write("static const quint16 locale_index[] = {\n") - data_temp_file.write(" 0, // unused\n") index = 0 for key in language_map.keys(): i = 0 @@ -443,7 +507,7 @@ def main(): # Locale data data_temp_file.write("static const QLocalePrivate locale_data[] = {\n") - data_temp_file.write("// lang terr dec group list prcnt zero minus plus exp quotStart quotEnd altQuotStart altQuotEnd sDtFmt lDtFmt sTmFmt lTmFmt ssMonth slMonth sMonth lMonth sDays lDays am,len pm,len\n") + data_temp_file.write("// lang script terr dec group list prcnt zero minus plus exp quotStart quotEnd altQuotStart altQuotEnd sDtFmt lDtFmt sTmFmt lTmFmt ssMonth slMonth sMonth lMonth sDays lDays am,len pm,len\n") locale_keys = locale_map.keys() compareLocaleKeys.default_map = default_map @@ -452,9 +516,8 @@ def main(): for key in locale_keys: l = locale_map[key] - - data_temp_file.write(" { %6d,%6d,%6d,%6d,%6d,%6d,%6d,%6d,%6d,%6d,%6d,%6d,%6d,%6d,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s, {%s}, %s,%s,%s,%s,%6d,%6d,%6d,%6d,%6d }, // %s/%s\n" \ - % (key[0], key[1], + data_temp_file.write(" { %6d,%6d,%6d,%6d,%6d,%6d,%6d,%6d,%6d,%6d,%6d,%6d,%6d,%6d,%6d,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s, {%s}, %s,%s,%s,%s,%6d,%6d,%6d,%6d,%6d }, // %s/%s/%s\n" \ + % (key[0], key[1], key[2], l.decimal, l.group, l.listDelim, @@ -496,8 +559,9 @@ def main(): l.weekendStart, l.weekendEnd, l.language, + l.script, l.country)) - data_temp_file.write(" { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, {0,0,0}, 0,0, 0,0, 0,0, 0,0, 0, 0, 0, 0, 0 } // trailing 0s\n") + data_temp_file.write(" { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, {0,0,0}, 0,0, 0,0, 0,0, 0,0, 0, 0, 0, 0, 0 } // trailing 0s\n") data_temp_file.write("};\n") data_temp_file.write("\n") @@ -586,6 +650,8 @@ def main(): data_temp_file.write("static const char language_name_list[] =\n") data_temp_file.write("\"Default\\0\"\n") for key in language_map.keys(): + if key == 0: + continue data_temp_file.write("\"" + language_map[key][0] + "\\0\"\n") data_temp_file.write(";\n") @@ -593,9 +659,11 @@ def main(): # Language name index data_temp_file.write("static const quint16 language_name_index[] = {\n") - data_temp_file.write(" 0, // Unused\n") + data_temp_file.write(" 0, // AnyLanguage\n") index = 8 for key in language_map.keys(): + if key == 0: + continue language = language_map[key][0] data_temp_file.write("%6d, // %s\n" % (index, language)) index += len(language) + 1 @@ -603,6 +671,31 @@ def main(): data_temp_file.write("\n") + # Script name list + data_temp_file.write("static const char script_name_list[] =\n") + data_temp_file.write("\"Default\\0\"\n") + for key in script_map.keys(): + if key == 0: + continue + data_temp_file.write("\"" + script_map[key][0] + "\\0\"\n") + data_temp_file.write(";\n") + + data_temp_file.write("\n") + + # Script name index + data_temp_file.write("static const quint16 script_name_index[] = {\n") + data_temp_file.write(" 0, // AnyScript\n") + index = 8 + for key in script_map.keys(): + if key == 0: + continue + script = script_map[key][0] + data_temp_file.write("%6d, // %s\n" % (index, script)) + index += len(script) + 1 + data_temp_file.write("};\n") + + data_temp_file.write("\n") + # Country name list data_temp_file.write("static const char country_name_list[] =\n") data_temp_file.write("\"Default\\0\"\n") @@ -630,7 +723,6 @@ def main(): # Language code list data_temp_file.write("static const unsigned char language_code_list[] =\n") - data_temp_file.write("\" \\0\" // Unused\n") for key in language_map.keys(): code = language_map[key][1] if len(code) == 2: @@ -640,6 +732,15 @@ def main(): data_temp_file.write("\n") + # Script code list + data_temp_file.write("static const unsigned char script_code_list[] =\n") + for key in script_map.keys(): + code = script_map[key][1] + for i in range(4 - len(code)): + code += "\\0" + data_temp_file.write("\"%2s\" // %s\n" % (code, script_map[key][0])) + data_temp_file.write(";\n") + # Country code list data_temp_file.write("static const unsigned char country_code_list[] =\n") for key in country_map.keys(): @@ -691,6 +792,15 @@ def main(): qlocaleh_temp_file.write("\n") + # Script enum + qlocaleh_temp_file.write(" enum Script {\n") + script = "" + for key in script_map.keys(): + script = fixedScriptName(script_map[key][0], dupes) + qlocaleh_temp_file.write(" " + script + " = " + str(key) + ",\n") + qlocaleh_temp_file.write(" LastScript = " + script + "\n") + qlocaleh_temp_file.write(" };\n") + # Country enum qlocaleh_temp_file.write(" enum Country {\n") country = "" |