summaryrefslogtreecommitdiffstats
path: root/util
diff options
context:
space:
mode:
Diffstat (limited to 'util')
-rwxr-xr-xutil/local_database/cldr2qlocalexml.py31
-rw-r--r--util/local_database/enumdata.py20
-rwxr-xr-xutil/local_database/qlocalexml2cpp.py148
3 files changed, 177 insertions, 22 deletions
diff --git a/util/local_database/cldr2qlocalexml.py b/util/local_database/cldr2qlocalexml.py
index c70cf18..fb0e5db 100755
--- a/util/local_database/cldr2qlocalexml.py
+++ b/util/local_database/cldr2qlocalexml.py
@@ -120,8 +120,10 @@ def generateLocaleInfo(path):
if not country_code:
return {}
- # we do not support scripts and variants
- if variant_code or script_code:
+ # we do not support variants
+ # ### actually there is only one locale with variant: en_US_POSIX
+ # does anybody care about it at all?
+ if variant_code:
return {}
language_id = enumdata.languageCodeToId(language_code)
@@ -130,6 +132,14 @@ def generateLocaleInfo(path):
return {}
language = enumdata.language_list[language_id][0]
+ script_id = enumdata.scriptCodeToId(script_code)
+ if script_code == -1:
+ sys.stderr.write("unnknown script code \"" + script_code + "\"\n")
+ return {}
+ script = "AnyScript"
+ if script_id != -1:
+ script = enumdata.script_list[script_id][0]
+
country_id = enumdata.countryCodeToId(country_code)
country = ""
if country_id != -1:
@@ -146,12 +156,14 @@ def generateLocaleInfo(path):
result = {}
result['language'] = language
+ result['script'] = script
result['country'] = country
result['language_code'] = language_code
result['country_code'] = country_code
result['script_code'] = script_code
result['variant_code'] = variant_code
result['language_id'] = language_id
+ result['script_id'] = script_id
result['country_id'] = country_id
supplementalPath = dir_name + "/../supplemental/supplementalData.xml"
@@ -511,7 +523,7 @@ for file in cldr_files:
sys.stderr.write("skipping file \"" + file + "\"\n")
continue
- locale_database[(l['language_id'], l['country_id'], l['script_code'], l['variant_code'])] = l
+ locale_database[(l['language_id'], l['script_id'], l['country_id'], l['variant_code'])] = l
integrateWeekData(cldr_dir+"/../supplemental/supplementalData.xml")
locale_keys = locale_database.keys()
@@ -535,6 +547,16 @@ for id in enumdata.language_list:
print " </language>"
print " </languageList>"
+print " <scriptList>"
+for id in enumdata.script_list:
+ l = enumdata.script_list[id]
+ print " <script>"
+ print " <name>" + l[0] + "</name>"
+ print " <id>" + str(id) + "</id>"
+ print " <code>" + l[1] + "</code>"
+ print " </script>"
+print " </scriptList>"
+
print " <countryList>"
for id in enumdata.country_list:
l = enumdata.country_list[id]
@@ -653,6 +675,7 @@ print " <localeList>"
print \
" <locale>\n\
<language>C</language>\n\
+ <script>AnyScript</script>\n\
<country>AnyCountry</country>\n\
<decimal>46</decimal>\n\
<group>44</group>\n\
@@ -701,8 +724,10 @@ for key in locale_keys:
print " <locale>"
print " <language>" + l['language'] + "</language>"
+ print " <script>" + l['script'] + "</script>"
print " <country>" + l['country'] + "</country>"
print " <languagecode>" + l['language_code'] + "</languagecode>"
+ print " <scriptcode>" + l['script_code'] + "</scriptcode>"
print " <countrycode>" + l['country_code'] + "</countrycode>"
print " <decimal>" + ordStr(l['decimal']) + "</decimal>"
print " <group>" + ordStr(l['group']) + "</group>"
diff --git a/util/local_database/enumdata.py b/util/local_database/enumdata.py
index e957349..9e0d7d8 100644
--- a/util/local_database/enumdata.py
+++ b/util/local_database/enumdata.py
@@ -45,6 +45,7 @@
# need to be *appended* to this list.
language_list = {
+ 0 : [ "AnyLanguage", " " ],
1 : [ "C", " " ],
2 : [ "Abkhazian", "ab" ],
3 : [ "Afan", "om" ],
@@ -511,6 +512,19 @@ country_list = {
246 : [ "LatinAmericaAndTheCaribbean", "419" ]
}
+script_list = {
+ 0 : [ "AnyScript", "" ],
+ 1 : [ "Arabic", "Arab" ],
+ 2 : [ "Cyrillic", "Cyrl" ],
+ 3 : [ "Deseret", "Dsrt" ],
+ 4 : [ "Gurmukhi", "Guru" ],
+ 5 : [ "Simplified Han", "Hans" ],
+ 6 : [ "Traditional Han", "Hant" ],
+ 7 : [ "Latin", "Latn" ],
+ 8 : [ "Mongolian", "Mong" ],
+ 9 : [ "Tifinagh", "Tfng" ]
+}
+
def countryCodeToId(code):
for country_id in country_list:
if country_list[country_id][1] == code:
@@ -522,3 +536,9 @@ def languageCodeToId(code):
if language_list[language_id][1] == code:
return language_id
return -1
+
+def scriptCodeToId(code):
+ for script_id in script_list:
+ if script_list[script_id][1] == code:
+ return script_id
+ return -1
diff --git a/util/local_database/qlocalexml2cpp.py b/util/local_database/qlocalexml2cpp.py
index 8b68984..b8e4e89 100755
--- a/util/local_database/qlocalexml2cpp.py
+++ b/util/local_database/qlocalexml2cpp.py
@@ -46,6 +46,12 @@ import tempfile
import datetime
import xml.dom.minidom
+class Error:
+ def __init__(self, msg):
+ self.msg = msg
+ def __str__(self):
+ return self.msg
+
def check_static_char_array_length(name, array):
# some compilers like VC6 doesn't allow static arrays more than 64K bytes size.
size = reduce(lambda x, y: x+len(escapedString(y)), array, 0)
@@ -103,6 +109,20 @@ def loadLanguageMap(doc):
return result
+def loadScriptMap(doc):
+ result = {}
+
+ script_list_elt = firstChildElt(doc.documentElement, "scriptList")
+ script_elt = firstChildElt(script_list_elt, "script")
+ while script_elt:
+ script_id = int(eltText(firstChildElt(script_elt, "id")))
+ script_name = eltText(firstChildElt(script_elt, "name"))
+ script_code = eltText(firstChildElt(script_elt, "code"))
+ result[script_id] = (script_name, script_code)
+ script_elt = nextSiblingElt(script_elt, "script")
+
+ return result
+
def loadCountryMap(doc):
result = {}
@@ -129,6 +149,15 @@ def loadDefaultMap(doc):
elt = nextSiblingElt(elt, "defaultCountry");
return result
+def fixedScriptName(name, dupes):
+ name = name.replace(" ", "")
+ if name[-6:] != "Script":
+ name = name + "Script";
+ if name in dupes:
+ sys.stderr.write("\n\n\nERROR: The script name '%s' is messy" % name)
+ sys.exit(1);
+ return name
+
def fixedCountryName(name, dupes):
if name in dupes:
return name.replace(" ", "") + "Country"
@@ -150,6 +179,12 @@ def languageNameToId(name, language_map):
return key
return -1
+def scriptNameToId(name, script_map):
+ for key in script_map.keys():
+ if script_map[key][0] == name:
+ return key
+ return -1
+
def countryNameToId(name, country_map):
for key in country_map.keys():
if country_map[key][0] == name:
@@ -202,6 +237,7 @@ def assertSingleChar(string):
class Locale:
def __init__(self, elt):
self.language = eltText(firstChildElt(elt, "language"))
+ self.script = eltText(firstChildElt(elt, "script"))
self.country = eltText(firstChildElt(elt, "country"))
self.decimal = int(eltText(firstChildElt(elt, "decimal")))
self.group = int(eltText(firstChildElt(elt, "group")))
@@ -244,7 +280,7 @@ class Locale:
self.currencyFormat = eltText(firstChildElt(elt, "currencyFormat"))
self.currencyNegativeFormat = eltText(firstChildElt(elt, "currencyNegativeFormat"))
-def loadLocaleMap(doc, language_map, country_map):
+def loadLocaleMap(doc, language_map, script_map, country_map):
result = {}
locale_list_elt = firstChildElt(doc.documentElement, "localeList")
@@ -253,11 +289,14 @@ def loadLocaleMap(doc, language_map, country_map):
locale = Locale(locale_elt)
language_id = languageNameToId(locale.language, language_map)
if language_id == -1:
- sys.stderr.write("Cannot find a language id for %s\n" % locale.language)
+ sys.stderr.write("Cannot find a language id for '%s'\n" % locale.language)
+ script_id = scriptNameToId(locale.script, script_map)
+ if script_id == -1:
+ sys.stderr.write("Cannot find a script id for '%s'\n" % locale.script)
country_id = countryNameToId(locale.country, country_map)
if country_id == -1:
- sys.stderr.write("Cannot find a country id for %s\n" % locale.country)
- result[(language_id, country_id)] = locale
+ sys.stderr.write("Cannot find a country id for '%s'\n" % locale.country)
+ result[(language_id, script_id, country_id)] = locale
locale_elt = nextSiblingElt(locale_elt, "locale")
@@ -273,14 +312,17 @@ def compareLocaleKeys(key1, key2):
if l1.language in compareLocaleKeys.default_map:
default = compareLocaleKeys.default_map[l1.language]
- if l1.country == default:
+ if l1.country == default and key1[1] == 0:
return -1
- if l2.country == default:
+ if l2.country == default and key2[1] == 0:
return 1
+
+ if key1[1] != key2[1]:
+ return key1[1] - key2[1]
else:
return key1[0] - key2[0]
- return key1[1] - key2[1]
+ return key1[2] - key2[2]
def languageCount(language_id, locale_map):
@@ -290,8 +332,25 @@ def languageCount(language_id, locale_map):
result += 1
return result
+def unicode2hex(s):
+ lst = []
+ for x in s:
+ v = ord(x)
+ if v > 0xFFFF:
+ # make a surrogate pair
+ # copied from qchar.h
+ high = (v >> 10) + 0xd7c0
+ low = (v % 0x400 + 0xdc00)
+ lst.append(hex(high))
+ lst.append(hex(low))
+ else:
+ lst.append(hex(v))
+ return lst
+
class StringDataToken:
def __init__(self, index, length):
+ if index > 0xFFFF or length > 0xFFFF:
+ raise Error("Position exceeds ushort range: %d,%d " % (index, length))
self.index = index
self.length = length
def __str__(self):
@@ -305,9 +364,9 @@ class StringData:
if s in self.hash:
return self.hash[s]
- lst = map(lambda x: hex(ord(x)), s)
+ lst = unicode2hex(s)
index = len(self.data)
- if index >= 65535:
+ if index > 65535:
print "\n\n\n#error Data index is too big!"
sys.stderr.write ("\n\n\nERROR: index exceeds the uint16 range! index = %d\n" % index)
sys.exit(1)
@@ -316,7 +375,12 @@ class StringData:
print "\n\n\n#error Data is too big!"
sys.stderr.write ("\n\n\nERROR: data size exceeds the uint16 range! size = %d\n" % size)
sys.exit(1)
- token = StringDataToken(index, size)
+ token = None
+ try:
+ token = StringDataToken(index, size)
+ except Error as e:
+ sys.stderr.write("\n\n\nERROR: %s: on data '%s'" % (e, s))
+ sys.exit(1)
self.hash[s] = token
self.data += lst
return token
@@ -395,9 +459,10 @@ def main():
doc = xml.dom.minidom.parse(localexml)
language_map = loadLanguageMap(doc)
+ script_map = loadScriptMap(doc)
country_map = loadCountryMap(doc)
default_map = loadDefaultMap(doc)
- locale_map = loadLocaleMap(doc, language_map, country_map)
+ locale_map = loadLocaleMap(doc, language_map, script_map, country_map)
dupes = findDupes(language_map, country_map)
cldr_version = eltText(firstChildElt(doc.documentElement, "version"))
@@ -416,7 +481,6 @@ def main():
# Locale index
data_temp_file.write("static const quint16 locale_index[] = {\n")
- data_temp_file.write(" 0, // unused\n")
index = 0
for key in language_map.keys():
i = 0
@@ -443,7 +507,7 @@ def main():
# Locale data
data_temp_file.write("static const QLocalePrivate locale_data[] = {\n")
- data_temp_file.write("// lang terr dec group list prcnt zero minus plus exp quotStart quotEnd altQuotStart altQuotEnd sDtFmt lDtFmt sTmFmt lTmFmt ssMonth slMonth sMonth lMonth sDays lDays am,len pm,len\n")
+ data_temp_file.write("// lang script terr dec group list prcnt zero minus plus exp quotStart quotEnd altQuotStart altQuotEnd sDtFmt lDtFmt sTmFmt lTmFmt ssMonth slMonth sMonth lMonth sDays lDays am,len pm,len\n")
locale_keys = locale_map.keys()
compareLocaleKeys.default_map = default_map
@@ -452,9 +516,8 @@ def main():
for key in locale_keys:
l = locale_map[key]
-
- data_temp_file.write(" { %6d,%6d,%6d,%6d,%6d,%6d,%6d,%6d,%6d,%6d,%6d,%6d,%6d,%6d,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s, {%s}, %s,%s,%s,%s,%6d,%6d,%6d,%6d,%6d }, // %s/%s\n" \
- % (key[0], key[1],
+ data_temp_file.write(" { %6d,%6d,%6d,%6d,%6d,%6d,%6d,%6d,%6d,%6d,%6d,%6d,%6d,%6d,%6d,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s, {%s}, %s,%s,%s,%s,%6d,%6d,%6d,%6d,%6d }, // %s/%s/%s\n" \
+ % (key[0], key[1], key[2],
l.decimal,
l.group,
l.listDelim,
@@ -496,8 +559,9 @@ def main():
l.weekendStart,
l.weekendEnd,
l.language,
+ l.script,
l.country))
- data_temp_file.write(" { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, {0,0,0}, 0,0, 0,0, 0,0, 0,0, 0, 0, 0, 0, 0 } // trailing 0s\n")
+ data_temp_file.write(" { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, {0,0,0}, 0,0, 0,0, 0,0, 0,0, 0, 0, 0, 0, 0 } // trailing 0s\n")
data_temp_file.write("};\n")
data_temp_file.write("\n")
@@ -586,6 +650,8 @@ def main():
data_temp_file.write("static const char language_name_list[] =\n")
data_temp_file.write("\"Default\\0\"\n")
for key in language_map.keys():
+ if key == 0:
+ continue
data_temp_file.write("\"" + language_map[key][0] + "\\0\"\n")
data_temp_file.write(";\n")
@@ -593,9 +659,11 @@ def main():
# Language name index
data_temp_file.write("static const quint16 language_name_index[] = {\n")
- data_temp_file.write(" 0, // Unused\n")
+ data_temp_file.write(" 0, // AnyLanguage\n")
index = 8
for key in language_map.keys():
+ if key == 0:
+ continue
language = language_map[key][0]
data_temp_file.write("%6d, // %s\n" % (index, language))
index += len(language) + 1
@@ -603,6 +671,31 @@ def main():
data_temp_file.write("\n")
+ # Script name list
+ data_temp_file.write("static const char script_name_list[] =\n")
+ data_temp_file.write("\"Default\\0\"\n")
+ for key in script_map.keys():
+ if key == 0:
+ continue
+ data_temp_file.write("\"" + script_map[key][0] + "\\0\"\n")
+ data_temp_file.write(";\n")
+
+ data_temp_file.write("\n")
+
+ # Script name index
+ data_temp_file.write("static const quint16 script_name_index[] = {\n")
+ data_temp_file.write(" 0, // AnyScript\n")
+ index = 8
+ for key in script_map.keys():
+ if key == 0:
+ continue
+ script = script_map[key][0]
+ data_temp_file.write("%6d, // %s\n" % (index, script))
+ index += len(script) + 1
+ data_temp_file.write("};\n")
+
+ data_temp_file.write("\n")
+
# Country name list
data_temp_file.write("static const char country_name_list[] =\n")
data_temp_file.write("\"Default\\0\"\n")
@@ -630,7 +723,6 @@ def main():
# Language code list
data_temp_file.write("static const unsigned char language_code_list[] =\n")
- data_temp_file.write("\" \\0\" // Unused\n")
for key in language_map.keys():
code = language_map[key][1]
if len(code) == 2:
@@ -640,6 +732,15 @@ def main():
data_temp_file.write("\n")
+ # Script code list
+ data_temp_file.write("static const unsigned char script_code_list[] =\n")
+ for key in script_map.keys():
+ code = script_map[key][1]
+ for i in range(4 - len(code)):
+ code += "\\0"
+ data_temp_file.write("\"%2s\" // %s\n" % (code, script_map[key][0]))
+ data_temp_file.write(";\n")
+
# Country code list
data_temp_file.write("static const unsigned char country_code_list[] =\n")
for key in country_map.keys():
@@ -691,6 +792,15 @@ def main():
qlocaleh_temp_file.write("\n")
+ # Script enum
+ qlocaleh_temp_file.write(" enum Script {\n")
+ script = ""
+ for key in script_map.keys():
+ script = fixedScriptName(script_map[key][0], dupes)
+ qlocaleh_temp_file.write(" " + script + " = " + str(key) + ",\n")
+ qlocaleh_temp_file.write(" LastScript = " + script + "\n")
+ qlocaleh_temp_file.write(" };\n")
+
# Country enum
qlocaleh_temp_file.write(" enum Country {\n")
country = ""