4 files changed, 729 insertions, 108 deletions
diff --git a/util/local_database/cldr2qlocalexml.py b/util/local_database/cldr2qlocalexml.py
index 2567a89..a67971b 100755
--- a/util/local_database/cldr2qlocalexml.py
+++ b/util/local_database/cldr2qlocalexml.py
@@ -50,6 +50,45 @@ import re
 
 findEntry = xpathlite.findEntry
 findEntryInFile = xpathlite._findEntryInFile
+findTagsInFile = xpathlite.findTagsInFile
+
+def parse_number_format(patterns, data):
+    # this is a very limited parsing of the number format for currency only.
+    def skip_repeating_pattern(x):
+        p = x.replace('0', '#').replace(',', '').replace('.', '')
+        seen = False
+        result = ''
+        for c in p:
+            if c == '#':
+                if seen:
+                    continue
+                seen = True
+            else:
+                seen = False
+            result = result + c
+        return result
+    patterns = patterns.split(';')
+    result = []
+    for pattern in patterns:
+        pattern = skip_repeating_pattern(pattern)
+        pattern = pattern.replace('#', "%1")
+        # according to http://www.unicode.org/reports/tr35/#Number_Format_Patterns
+        # there can be doubled or trippled currency sign, however none of the
+        # locales use that.
+        pattern = pattern.replace(u'\xa4', "%2")
+        pattern = pattern.replace("''", "###").replace("'", '').replace("###", "'")
+        pattern = pattern.replace('-', data['minus'])
+        pattern = pattern.replace('+', data['plus'])
+        result.append(pattern)
+    return result
+
+def parse_list_pattern_part_format(pattern):
+    # this is a very limited parsing of the format for list pattern part only.
+    result = ""
+    result = pattern.replace("{0}", "%1")
+    result = result.replace("{1}", "%2")
+    result = result.replace("{2}", "%3")
+    return result
 
 def ordStr(c):
     if len(c) == 1:
@@ -89,8 +128,10 @@ def generateLocaleInfo(path):
     if not country_code:
         return {}
 
-    # we do not support scripts and variants
-    if variant_code or script_code:
+    # we do not support variants
+    # ### actually there is only one locale with variant: en_US_POSIX
+    #     does anybody care about it at all?
+    if variant_code:
         return {}
 
     language_id = enumdata.languageCodeToId(language_code)
@@ -99,6 +140,14 @@ def generateLocaleInfo(path):
         return {}
     language = enumdata.language_list[language_id][0]
 
+    script_id = enumdata.scriptCodeToId(script_code)
+    if script_code == -1:
+        sys.stderr.write("unnknown script code \"" + script_code + "\"\n")
+        return {}
+    script = "AnyScript"
+    if script_id != -1:
+        script = enumdata.script_list[script_id][0]
+
     country_id = enumdata.countryCodeToId(country_code)
     country = ""
     if country_id != -1:
@@ -115,19 +164,46 @@ def generateLocaleInfo(path):
 
     result = {}
     result['language'] = language
+    result['script'] = script
     result['country'] = country
     result['language_code'] = language_code
     result['country_code'] = country_code
     result['script_code'] = script_code
     result['variant_code'] = variant_code
     result['language_id'] = language_id
+    result['script_id'] = script_id
     result['country_id'] = country_id
 
+    supplementalPath = dir_name + "/../supplemental/supplementalData.xml"
+    currencies = findTagsInFile(supplementalPath, "currencyData/region[iso3166=%s]"%country_code);
+    result['currencyIsoCode'] = ''
+    result['currencyDigits'] = 2
+    result['currencyRounding'] = 1
+    if currencies:
+        for e in currencies:
+            if e[0] == 'currency':
+                tender = True
+                t = filter(lambda x: x[0] == 'tender', e[1])
+                if t and t[0][1] == 'false':
+                    tender = False;
+                if tender and not filter(lambda x: x[0] == 'to', e[1]):
+                    result['currencyIsoCode'] = filter(lambda x: x[0] == 'iso4217', e[1])[0][1]
+                    break
+        if result['currencyIsoCode']:
+            t = findTagsInFile(supplementalPath, "currencyData/fractions/info[iso4217=%s]"%result['currencyIsoCode']);
+            if t and t[0][0] == 'info':
+                result['currencyDigits'] = int(filter(lambda x: x[0] == 'digits', t[0][1])[0][1])
+                result['currencyRounding'] = int(filter(lambda x: x[0] == 'rounding', t[0][1])[0][1])
     numbering_system = None
     try:
         numbering_system = findEntry(path, "numbers/defaultNumberingSystem")
     except:
         pass
+    def findEntryDef(path, xpath, value=''):
+        try:
+            return findEntry(path, xpath)
+        except xpathlite.Error:
+            return value
     def get_number_in_system(path, xpath, numbering_system):
         if numbering_system:
             try:
@@ -143,6 +219,14 @@ def generateLocaleInfo(path):
     result['minus'] = get_number_in_system(path, "numbers/symbols/minusSign", numbering_system)
     result['plus'] = get_number_in_system(path, "numbers/symbols/plusSign", numbering_system)
     result['exp'] = get_number_in_system(path, "numbers/symbols/exponential", numbering_system).lower()
+    result['quotationStart'] = findEntry(path, "delimiters/quotationStart")
+    result['quotationEnd'] = findEntry(path, "delimiters/quotationEnd")
+    result['alternateQuotationStart'] = findEntry(path, "delimiters/alternateQuotationStart")
+    result['alternateQuotationEnd'] = findEntry(path, "delimiters/alternateQuotationEnd")
+    result['listPatternPartStart'] = parse_list_pattern_part_format(findEntry(path, "listPatterns/listPattern/listPatternPart[start]"))
+    result['listPatternPartMiddle'] = parse_list_pattern_part_format(findEntry(path, "listPatterns/listPattern/listPatternPart[middle]"))
+    result['listPatternPartEnd'] = parse_list_pattern_part_format(findEntry(path, "listPatterns/listPattern/listPatternPart[end]"))
+    result['listPatternPartTwo'] = parse_list_pattern_part_format(findEntry(path, "listPatterns/listPattern/listPatternPart[2]"))
     result['am'] = findEntry(path, "dates/calendars/calendar[gregorian]/dayPeriods/dayPeriodContext[format]/dayPeriodWidth[wide]/dayPeriod[am]", draft)
     result['pm'] = findEntry(path, "dates/calendars/calendar[gregorian]/dayPeriods/dayPeriodContext[format]/dayPeriodWidth[wide]/dayPeriod[pm]", draft)
     result['longDateFormat'] = convert_date(findEntry(path, "dates/calendars/calendar[gregorian]/dateFormats/dateFormatLength[full]/dateFormat/pattern"))
@@ -150,6 +234,27 @@ def generateLocaleInfo(path):
     result['longTimeFormat'] = convert_date(findEntry(path, "dates/calendars/calendar[gregorian]/timeFormats/timeFormatLength[full]/timeFormat/pattern"))
     result['shortTimeFormat'] = convert_date(findEntry(path, "dates/calendars/calendar[gregorian]/timeFormats/timeFormatLength[short]/timeFormat/pattern"))
 
+    currency_format = get_number_in_system(path, "numbers/currencyFormats/currencyFormatLength/currencyFormat/pattern", numbering_system)
+    currency_format = parse_number_format(currency_format, result)
+    result['currencyFormat'] = currency_format[0]
+    result['currencyNegativeFormat'] = ''
+    if len(currency_format) > 1:
+        result['currencyNegativeFormat'] = currency_format[1]
+
+    result['currencySymbol'] = ''
+    result['currencyDisplayName'] = ''
+    if result['currencyIsoCode']:
+        result['currencySymbol'] = findEntryDef(path, "numbers/currencies/currency[%s]/symbol" % result['currencyIsoCode'])
+        display_name_path = "numbers/currencies/currency[%s]/displayName" % result['currencyIsoCode']
+        result['currencyDisplayName'] \
+            = findEntryDef(path, display_name_path) + ";" \
+            + findEntryDef(path, display_name_path + "[count=zero]")  + ";" \
+            + findEntryDef(path, display_name_path + "[count=one]")   + ";" \
+            + findEntryDef(path, display_name_path + "[count=two]")   + ";" \
+            + findEntryDef(path, display_name_path + "[count=few]")   + ";" \
+            + findEntryDef(path, display_name_path + "[count=many]")  + ";" \
+            + findEntryDef(path, display_name_path + "[count=other]") + ";"
+
     standalone_long_month_path = "dates/calendars/calendar[gregorian]/months/monthContext[stand-alone]/monthWidth[wide]/month"
     result['standaloneLongMonths'] \
         = findEntry(path, standalone_long_month_path + "[1]") + ";" \
@@ -300,7 +405,6 @@ def generateLocaleInfo(path):
         + findEntry(path, standalone_narrow_day_path + "[fri]") + ";" \
         + findEntry(path, standalone_narrow_day_path + "[sat]") + ";"
 
-
     return result
 
 def addEscapes(s):
@@ -322,6 +426,98 @@ def usage():
     print "Usage: cldr2qlocalexml.py <path-to-cldr-main>"
     sys.exit()
 
+def integrateWeekData(filePath):
+    if not filePath.endswith(".xml"):
+        return {}
+    monFirstDayIn = findEntryInFile(filePath, "weekData/firstDay[day=mon]", attribute="territories")[0].split(" ")
+    tueFirstDayIn = findEntryInFile(filePath, "weekData/firstDay[day=tue]", attribute="territories")[0].split(" ")
+    wedFirstDayIn = findEntryInFile(filePath, "weekData/firstDay[day=wed]", attribute="territories")[0].split(" ")
+    thuFirstDayIn = findEntryInFile(filePath, "weekData/firstDay[day=thu]", attribute="territories")[0].split(" ")
+    friFirstDayIn = findEntryInFile(filePath, "weekData/firstDay[day=fri]", attribute="territories")[0].split(" ")
+    satFirstDayIn = findEntryInFile(filePath, "weekData/firstDay[day=sat]", attribute="territories")[0].split(" ")
+    sunFirstDayIn = findEntryInFile(filePath, "weekData/firstDay[day=sun]", attribute="territories")[0].split(" ")
+
+    monWeekendStart = findEntryInFile(filePath, "weekData/weekendStart[day=mon]", attribute="territories")[0].split(" ")
+    tueWeekendStart = findEntryInFile(filePath, "weekData/weekendStart[day=tue]", attribute="territories")[0].split(" ")
+    wedWeekendStart = findEntryInFile(filePath, "weekData/weekendStart[day=wed]", attribute="territories")[0].split(" ")
+    thuWeekendStart = findEntryInFile(filePath, "weekData/weekendStart[day=thu]", attribute="territories")[0].split(" ")
+    friWeekendStart = findEntryInFile(filePath, "weekData/weekendStart[day=fri]", attribute="territories")[0].split(" ")
+    satWeekendStart = findEntryInFile(filePath, "weekData/weekendStart[day=sat]", attribute="territories")[0].split(" ")
+    sunWeekendStart = findEntryInFile(filePath, "weekData/weekendStart[day=sun]", attribute="territories")[0].split(" ")
+
+    monWeekendEnd = findEntryInFile(filePath, "weekData/weekendEnd[day=mon]", attribute="territories")[0].split(" ")
+    tueWeekendEnd = findEntryInFile(filePath, "weekData/weekendEnd[day=tue]", attribute="territories")[0].split(" ")
+    wedWeekendEnd = findEntryInFile(filePath, "weekData/weekendEnd[day=wed]", attribute="territories")[0].split(" ")
+    thuWeekendEnd = findEntryInFile(filePath, "weekData/weekendEnd[day=thu]", attribute="territories")[0].split(" ")
+    friWeekendEnd = findEntryInFile(filePath, "weekData/weekendEnd[day=fri]", attribute="territories")[0].split(" ")
+    satWeekendEnd = findEntryInFile(filePath, "weekData/weekendEnd[day=sat]", attribute="territories")[0].split(" ")
+    sunWeekendEnd = findEntryInFile(filePath, "weekData/weekendEnd[day=sun]", attribute="territories")[0].split(" ")
+
+    firstDayByCountryCode = {}
+    for countryCode in monFirstDayIn:
+        firstDayByCountryCode[countryCode] = "mon"
+    for countryCode in tueFirstDayIn:
+        firstDayByCountryCode[countryCode] = "tue"
+    for countryCode in wedFirstDayIn:
+        firstDayByCountryCode[countryCode] = "wed"
+    for countryCode in thuFirstDayIn:
+        firstDayByCountryCode[countryCode] = "thu"
+    for countryCode in friFirstDayIn:
+        firstDayByCountryCode[countryCode] = "fri"
+    for countryCode in satFirstDayIn:
+        firstDayByCountryCode[countryCode] = "sat"
+    for countryCode in sunFirstDayIn:
+        firstDayByCountryCode[countryCode] = "sun"
+
+    weekendStartByCountryCode = {}
+    for countryCode in monWeekendStart:
+        weekendStartByCountryCode[countryCode] = "mon"
+    for countryCode in tueWeekendStart:
+        weekendStartByCountryCode[countryCode] = "tue"
+    for countryCode in wedWeekendStart:
+        weekendStartByCountryCode[countryCode] = "wed"
+    for countryCode in thuWeekendStart:
+        weekendStartByCountryCode[countryCode] = "thu"
+    for countryCode in friWeekendStart:
+        weekendStartByCountryCode[countryCode] = "fri"
+    for countryCode in satWeekendStart:
+        weekendStartByCountryCode[countryCode] = "sat"
+    for countryCode in sunWeekendStart:
+        weekendStartByCountryCode[countryCode] = "sun"
+
+    weekendEndByCountryCode = {}
+    for countryCode in monWeekendEnd:
+        weekendEndByCountryCode[countryCode] = "mon"
+    for countryCode in tueWeekendEnd:
+        weekendEndByCountryCode[countryCode] = "tue"
+    for countryCode in wedWeekendEnd:
+        weekendEndByCountryCode[countryCode] = "wed"
+    for countryCode in thuWeekendEnd:
+        weekendEndByCountryCode[countryCode] = "thu"
+    for countryCode in friWeekendEnd:
+        weekendEndByCountryCode[countryCode] = "fri"
+    for countryCode in satWeekendEnd:
+        weekendEndByCountryCode[countryCode] = "sat"
+    for countryCode in sunWeekendEnd:
+        weekendEndByCountryCode[countryCode] = "sun"
+
+    for (key,locale) in locale_database.iteritems():
+        countryCode = locale['country_code']
+        if countryCode in firstDayByCountryCode:
+            locale_database[key]['firstDayOfWeek'] = firstDayByCountryCode[countryCode]
+        else:
+            locale_database[key]['firstDayOfWeek'] = firstDayByCountryCode["001"]
+
+        if countryCode in weekendStartByCountryCode:
+            locale_database[key]['weekendStart'] = weekendStartByCountryCode[countryCode]
+        else:
+            locale_database[key]['weekendStart'] = weekendStartByCountryCode["001"]
+
+        if countryCode in weekendEndByCountryCode:
+            locale_database[key]['weekendEnd'] = weekendEndByCountryCode[countryCode]
+        else:
+            locale_database[key]['weekendEnd'] = weekendEndByCountryCode["001"]
+
 if len(sys.argv) != 2:
     usage()
 
@@ -339,12 +535,20 @@ for file in cldr_files:
         sys.stderr.write("skipping file \"" + file + "\"\n")
         continue
 
-    locale_database[(l['language_id'], l['country_id'], l['script_code'], l['variant_code'])] = l
+    locale_database[(l['language_id'], l['script_id'], l['country_id'], l['variant_code'])] = l
 
+integrateWeekData(cldr_dir+"/../supplemental/supplementalData.xml")
 locale_keys = locale_database.keys()
 locale_keys.sort()
 
+cldr_version = 'unknown'
+ldml = open(cldr_dir+"/../dtd/ldml.dtd", "r")
+for line in ldml:
+    if 'version cldrVersion CDATA #FIXED' in line:
+        cldr_version = line.split('"')[1]
+
 print "<localeDatabase>"
+print "    <version>" + cldr_version + "</version>"
 print "    <languageList>"
 for id in enumdata.language_list:
     l = enumdata.language_list[id]
@@ -355,6 +559,16 @@ for id in enumdata.language_list:
     print "        </language>"
 print "    </languageList>"
 
+print "    <scriptList>"
+for id in enumdata.script_list:
+    l = enumdata.script_list[id]
+    print "        <script>"
+    print "            <name>" + l[0] + "</name>"
+    print "            <id>" + str(id) + "</id>"
+    print "            <code>" + l[1] + "</code>"
+    print "        </script>"
+print "    </scriptList>"
+
 print "    <countryList>"
 for id in enumdata.country_list:
     l = enumdata.country_list[id]
@@ -473,6 +687,7 @@ print "    <localeList>"
 print \
 "        <locale>\n\
             <language>C</language>\n\
+            <script>AnyScript</script>\n\
             <country>AnyCountry</country>\n\
             <decimal>46</decimal>\n\
             <group>44</group>\n\
@@ -482,8 +697,19 @@ print \
             <minus>45</minus>\n\
             <plus>43</plus>\n\
             <exp>101</exp>\n\
+            <quotationStart>\"</quotationStart>\n\
+            <quotationEnd>\"</quotationEnd>\n\
+            <alternateQuotationStart>\'</alternateQuotationStart>\n\
+            <alternateQuotationEnd>\'</alternateQuotationEnd>\n\
+            <listPatternPartStart>%1, %2</listPatternPartStart>\n\
+            <listPatternPartMiddle>%1, %2</listPatternPartMiddle>\n\
+            <listPatternPartEnd>%1, %2</listPatternPartEnd>\n\
+            <listPatternPartTwo>%1, %2</listPatternPartTwo>\n\
             <am>AM</am>\n\
             <pm>PM</pm>\n\
+            <firstDayOfWeek>mon</firstDayOfWeek>\n\
+            <weekendStart>sat</weekendStart>\n\
+            <weekendEnd>sun</weekendEnd>\n\
             <longDateFormat>EEEE, d MMMM yyyy</longDateFormat>\n\
             <shortDateFormat>d MMM yyyy</shortDateFormat>\n\
             <longTimeFormat>HH:mm:ss z</longTimeFormat>\n\
@@ -500,6 +726,13 @@ print \
             <standaloneLongDays>Sunday;Monday;Tuesday;Wednesday;Thursday;Friday;Saturday;</standaloneLongDays>\n\
             <standaloneShortDays>Sun;Mon;Tue;Wed;Thu;Fri;Sat;</standaloneShortDays>\n\
             <standaloneNarrowDays>S;M;T;W;T;F;S;</standaloneNarrowDays>\n\
+            <currencyIsoCode></currencyIsoCode>\n\
+            <currencySymbol></currencySymbol>\n\
+            <currencyDisplayName>;;;;;;;</currencyDisplayName>\n\
+            <currencyDigits>2</currencyDigits>\n\
+            <currencyRounding>1</currencyRounding>\n\
+            <currencyFormat>%1%2</currencyFormat>\n\
+            <currencyNegativeFormat></currencyNegativeFormat>\n\
         </locale>"
 
 for key in locale_keys:
@@ -507,8 +740,10 @@ for key in locale_keys:
 
     print "        <locale>"
     print "            <language>" + l['language']        + "</language>"
+    print "            <script>" + l['script']        + "</script>"
     print "            <country>"  + l['country']         + "</country>"
     print "            <languagecode>" + l['language_code']        + "</languagecode>"
+    print "            <scriptcode>" + l['script_code']        + "</scriptcode>"
     print "            <countrycode>"  + l['country_code']         + "</countrycode>"
     print "            <decimal>"  + ordStr(l['decimal']) + "</decimal>"
     print "            <group>"    + ordStr(l['group'])   + "</group>"
@@ -518,8 +753,19 @@ for key in locale_keys:
     print "            <minus>"    + ordStr(l['minus'])   + "</minus>"
     print "            <plus>"     + ordStr(l['plus'])   + "</plus>"
     print "            <exp>"      + fixOrdStrExp(l['exp'])     + "</exp>"
+    print "            <quotationStart>" + l['quotationStart'].encode('utf-8') + "</quotationStart>"
+    print "            <quotationEnd>" + l['quotationEnd'].encode('utf-8')   + "</quotationEnd>"
+    print "            <alternateQuotationStart>" + l['alternateQuotationStart'].encode('utf-8') + "</alternateQuotationStart>"
+    print "            <alternateQuotationEnd>" + l['alternateQuotationEnd'].encode('utf-8')   + "</alternateQuotationEnd>"
+    print "            <listPatternPartStart>" + l['listPatternPartStart'].encode('utf-8')   + "</listPatternPartStart>"
+    print "            <listPatternPartMiddle>" + l['listPatternPartMiddle'].encode('utf-8')   + "</listPatternPartMiddle>"
+    print "            <listPatternPartEnd>" + l['listPatternPartEnd'].encode('utf-8')   + "</listPatternPartEnd>"
+    print "            <listPatternPartTwo>" + l['listPatternPartTwo'].encode('utf-8')   + "</listPatternPartTwo>"
     print "            <am>"       + l['am'].encode('utf-8') + "</am>"
     print "            <pm>"       + l['pm'].encode('utf-8') + "</pm>"
+    print "            <firstDayOfWeek>"  + l['firstDayOfWeek'].encode('utf-8') + "</firstDayOfWeek>"
+    print "            <weekendStart>"  + l['weekendStart'].encode('utf-8') + "</weekendStart>"
+    print "            <weekendEnd>"  + l['weekendEnd'].encode('utf-8') + "</weekendEnd>"
     print "            <longDateFormat>"  + l['longDateFormat'].encode('utf-8')  + "</longDateFormat>"
     print "            <shortDateFormat>" + l['shortDateFormat'].encode('utf-8') + "</shortDateFormat>"
     print "            <longTimeFormat>"  + l['longTimeFormat'].encode('utf-8')  + "</longTimeFormat>"
@@ -536,6 +782,13 @@ for key in locale_keys:
     print "            <standaloneLongDays>" + l['standaloneLongDays'].encode('utf-8')        + "</standaloneLongDays>"
     print "            <standaloneShortDays>" + l['standaloneShortDays'].encode('utf-8')       + "</standaloneShortDays>"
     print "            <standaloneNarrowDays>" + l['standaloneNarrowDays'].encode('utf-8')       + "</standaloneNarrowDays>"
+    print "            <currencyIsoCode>" + l['currencyIsoCode'].encode('utf-8') + "</currencyIsoCode>"
+    print "            <currencySymbol>" + l['currencySymbol'].encode('utf-8') + "</currencySymbol>"
+    print "            <currencyDisplayName>" + l['currencyDisplayName'].encode('utf-8') + "</currencyDisplayName>"
+    print "            <currencyDigits>" + str(l['currencyDigits']) + "</currencyDigits>"
+    print "            <currencyRounding>" + str(l['currencyRounding']) + "</currencyRounding>"
+    print "            <currencyFormat>" + l['currencyFormat'].encode('utf-8') + "</currencyFormat>"
+    print "            <currencyNegativeFormat>" + l['currencyNegativeFormat'].encode('utf-8') + "</currencyNegativeFormat>"
     print "        </locale>"
 print "    </localeList>"
 print "</localeDatabase>"
diff --git a/util/local_database/enumdata.py b/util/local_database/enumdata.py
index e957349..9e0d7d8 100644
--- a/util/local_database/enumdata.py
+++ b/util/local_database/enumdata.py
@@ -45,6 +45,7 @@
 # need to be *appended* to this list.
 
 language_list = {
+    0 : [ "AnyLanguage",          "  " ],
     1 : [ "C",                    "  " ],
     2 : [ "Abkhazian",            "ab" ],
     3 : [ "Afan",                 "om" ],
@@ -511,6 +512,19 @@ country_list = {
     246 : [ "LatinAmericaAndTheCaribbean",              "419" ]
 }
 
+script_list = {
+    0   : [ "AnyScript",         "" ],
+    1   : [ "Arabic",            "Arab" ],
+    2   : [ "Cyrillic",          "Cyrl" ],
+    3   : [ "Deseret",           "Dsrt" ],
+    4   : [ "Gurmukhi",          "Guru" ],
+    5   : [ "Simplified Han",    "Hans" ],
+    6   : [ "Traditional Han",   "Hant" ],
+    7   : [ "Latin",             "Latn" ],
+    8   : [ "Mongolian",         "Mong" ],
+    9   : [ "Tifinagh",          "Tfng" ]
+}
+
 def countryCodeToId(code):
     for country_id in country_list:
         if country_list[country_id][1] == code:
@@ -522,3 +536,9 @@ def languageCodeToId(code):
         if language_list[language_id][1] == code:
             return language_id
     return -1
+
+def scriptCodeToId(code):
+    for script_id in script_list:
+        if script_list[script_id][1] == code:
+            return script_id
+    return -1
diff --git a/util/local_database/qlocalexml2cpp.py b/util/local_database/qlocalexml2cpp.py
index 9251e1f..f16ecbf 100755
--- a/util/local_database/qlocalexml2cpp.py
+++ b/util/local_database/qlocalexml2cpp.py
@@ -40,9 +40,18 @@
 ##
 #############################################################################
 
+import os
 import sys
+import tempfile
+import datetime
 import xml.dom.minidom
 
+class Error:
+    def __init__(self, msg):
+        self.msg = msg
+    def __str__(self):
+        return self.msg
+
 def check_static_char_array_length(name, array):
     # some compilers like VC6 doesn't allow static arrays more than 64K bytes size.
     size = reduce(lambda x, y: x+len(escapedString(y)), array, 0)
@@ -100,6 +109,20 @@ def loadLanguageMap(doc):
 
     return result
 
+def loadScriptMap(doc):
+    result = {}
+
+    script_list_elt = firstChildElt(doc.documentElement, "scriptList")
+    script_elt = firstChildElt(script_list_elt, "script")
+    while script_elt:
+        script_id = int(eltText(firstChildElt(script_elt, "id")))
+        script_name = eltText(firstChildElt(script_elt, "name"))
+        script_code = eltText(firstChildElt(script_elt, "code"))
+        result[script_id] = (script_name, script_code)
+        script_elt = nextSiblingElt(script_elt, "script")
+
+    return result
+
 def loadCountryMap(doc):
     result = {}
 
@@ -126,6 +149,15 @@ def loadDefaultMap(doc):
         elt = nextSiblingElt(elt, "defaultCountry");
     return result
 
+def fixedScriptName(name, dupes):
+    name = name.replace(" ", "")
+    if name[-6:] != "Script":
+        name = name + "Script";
+    if name in dupes:
+        sys.stderr.write("\n\n\nERROR: The script name '%s' is messy" % name)
+        sys.exit(1);
+    return name
+
 def fixedCountryName(name, dupes):
     if name in dupes:
         return name.replace(" ", "") + "Country"
@@ -147,6 +179,12 @@ def languageNameToId(name, language_map):
             return key
     return -1
 
+def scriptNameToId(name, script_map):
+    for key in script_map.keys():
+        if script_map[key][0] == name:
+            return key
+    return -1
+
 def countryNameToId(name, country_map):
     for key in country_map.keys():
         if country_map[key][0] == name:
@@ -188,9 +226,18 @@ def convertFormat(format):
 
     return result
 
+def convertToQtDayOfWeek(firstDay):
+    qtDayOfWeek = {"mon":1, "tue":2, "wed":3, "thu":4, "fri":5, "sat":6, "sun":7}
+    return qtDayOfWeek[firstDay]
+
+def assertSingleChar(string):
+    assert len(string) == 1, "This string is not allowed to be longer than 1 character"
+    return string
+
 class Locale:
     def __init__(self, elt):
         self.language = eltText(firstChildElt(elt, "language"))
+        self.script = eltText(firstChildElt(elt, "script"))
         self.country = eltText(firstChildElt(elt, "country"))
         self.decimal = int(eltText(firstChildElt(elt, "decimal")))
         self.group = int(eltText(firstChildElt(elt, "group")))
@@ -200,8 +247,19 @@ class Locale:
         self.minus = int(eltText(firstChildElt(elt, "minus")))
         self.plus = int(eltText(firstChildElt(elt, "plus")))
         self.exp = int(eltText(firstChildElt(elt, "exp")))
+        self.quotationStart = ord(assertSingleChar(eltText(firstChildElt(elt, "quotationStart"))))
+        self.quotationEnd = ord(assertSingleChar(eltText(firstChildElt(elt, "quotationEnd"))))
+        self.alternateQuotationStart = ord(assertSingleChar(eltText(firstChildElt(elt, "alternateQuotationStart"))))
+        self.alternateQuotationEnd = ord(assertSingleChar(eltText(firstChildElt(elt, "alternateQuotationEnd"))))
+        self.listPatternPartStart = eltText(firstChildElt(elt, "listPatternPartStart"))
+        self.listPatternPartMiddle = eltText(firstChildElt(elt, "listPatternPartMiddle"))
+        self.listPatternPartEnd = eltText(firstChildElt(elt, "listPatternPartEnd"))
+        self.listPatternPartTwo = eltText(firstChildElt(elt, "listPatternPartTwo"))
         self.am = eltText(firstChildElt(elt, "am"))
         self.pm = eltText(firstChildElt(elt, "pm"))
+        self.firstDayOfWeek = convertToQtDayOfWeek(eltText(firstChildElt(elt, "firstDayOfWeek")))
+        self.weekendStart = convertToQtDayOfWeek(eltText(firstChildElt(elt, "weekendStart")))
+        self.weekendEnd = convertToQtDayOfWeek(eltText(firstChildElt(elt, "weekendEnd")))
         self.longDateFormat = convertFormat(eltText(firstChildElt(elt, "longDateFormat")))
         self.shortDateFormat = convertFormat(eltText(firstChildElt(elt, "shortDateFormat")))
         self.longTimeFormat = convertFormat(eltText(firstChildElt(elt, "longTimeFormat")))
@@ -218,8 +276,15 @@ class Locale:
         self.longDays = eltText(firstChildElt(elt, "longDays"))
         self.shortDays = eltText(firstChildElt(elt, "shortDays"))
         self.narrowDays = eltText(firstChildElt(elt, "narrowDays"))
-
-def loadLocaleMap(doc, language_map, country_map):
+        self.currencyIsoCode = eltText(firstChildElt(elt, "currencyIsoCode"))
+        self.currencySymbol = eltText(firstChildElt(elt, "currencySymbol"))
+        self.currencyDisplayName = eltText(firstChildElt(elt, "currencyDisplayName"))
+        self.currencyDigits = int(eltText(firstChildElt(elt, "currencyDigits")))
+        self.currencyRounding = int(eltText(firstChildElt(elt, "currencyRounding")))
+        self.currencyFormat = eltText(firstChildElt(elt, "currencyFormat"))
+        self.currencyNegativeFormat = eltText(firstChildElt(elt, "currencyNegativeFormat"))
+
+def loadLocaleMap(doc, language_map, script_map, country_map):
     result = {}
 
     locale_list_elt = firstChildElt(doc.documentElement, "localeList")
@@ -228,11 +293,14 @@ def loadLocaleMap(doc, language_map, country_map):
         locale = Locale(locale_elt)
         language_id = languageNameToId(locale.language, language_map)
         if language_id == -1:
-            sys.stderr.write("Cannot find a language id for %s\n" % locale.language)
+            sys.stderr.write("Cannot find a language id for '%s'\n" % locale.language)
+        script_id = scriptNameToId(locale.script, script_map)
+        if script_id == -1:
+            sys.stderr.write("Cannot find a script id for '%s'\n" % locale.script)
         country_id = countryNameToId(locale.country, country_map)
         if country_id == -1:
-            sys.stderr.write("Cannot find a country id for %s\n" % locale.country)
-        result[(language_id, country_id)] = locale
+            sys.stderr.write("Cannot find a country id for '%s'\n" % locale.country)
+        result[(language_id, script_id, country_id)] = locale
 
         locale_elt = nextSiblingElt(locale_elt, "locale")
 
@@ -248,14 +316,17 @@ def compareLocaleKeys(key1, key2):
 
         if l1.language in compareLocaleKeys.default_map:
             default = compareLocaleKeys.default_map[l1.language]
-            if l1.country == default:
+            if l1.country == default and key1[1] == 0:
                 return -1
-            if l2.country == default:
+            if l2.country == default and key2[1] == 0:
                 return 1
+
+        if key1[1] != key2[1]:
+            return key1[1] - key2[1]
     else:
         return key1[0] - key2[0]
 
-    return key1[1] - key2[1]
+    return key1[2] - key2[2]
 
 
 def languageCount(language_id, locale_map):
@@ -265,8 +336,25 @@ def languageCount(language_id, locale_map):
             result += 1
     return result
 
+def unicode2hex(s):
+    lst = []
+    for x in s:
+        v = ord(x)
+        if v > 0xFFFF:
+            # make a surrogate pair
+            # copied from qchar.h
+            high = (v >> 10) + 0xd7c0
+            low = (v % 0x400 + 0xdc00)
+            lst.append(hex(high))
+            lst.append(hex(low))
+        else:
+            lst.append(hex(v))
+    return lst
+
 class StringDataToken:
     def __init__(self, index, length):
+        if index > 0xFFFF or length > 0xFFFF:
+            raise Error("Position exceeds ushort range: %d,%d " % (index, length))
         self.index = index
         self.length = length
     def __str__(self):
@@ -280,9 +368,9 @@ class StringData:
         if s in self.hash:
             return self.hash[s]
 
-        lst = map(lambda x: hex(ord(x)), s)
+        lst = unicode2hex(s)
         index = len(self.data)
-        if index >= 65535:
+        if index > 65535:
             print "\n\n\n#error Data index is too big!"
             sys.stderr.write ("\n\n\nERROR: index exceeds the uint16 range! index = %d\n" % index)
             sys.exit(1)
@@ -291,7 +379,12 @@ class StringData:
             print "\n\n\n#error Data is too big!"
             sys.stderr.write ("\n\n\nERROR: data size exceeds the uint16 range! size = %d\n" % size)
             sys.exit(1)
-        token = StringDataToken(index, size)
+        token = None
+        try:
+            token = StringDataToken(index, size)
+        except Error as e:
+            sys.stderr.write("\n\n\nERROR: %s: on data '%s'" % (e, s))
+            sys.exit(1)
         self.hash[s] = token
         self.data += lst
         return token
@@ -331,39 +424,67 @@ def printEscapedString(s):
     print escapedString(s);
 
 
+def currencyIsoCodeData(s):
+    if s:
+        return ",".join(map(lambda x: str(ord(x)), s))
+    return "0,0,0"
+
+def usage():
+    print "Usage: qlocalexml2cpp.py <path-to-locale.xml> <path-to-qt-src-tree>"
+    sys.exit(1)
+
+GENERATED_BLOCK_START = "// GENERATED PART STARTS HERE\n"
+GENERATED_BLOCK_END = "// GENERATED PART ENDS HERE\n"
+
 def main():
-    doc = xml.dom.minidom.parse("locale.xml")
+    if len(sys.argv) != 3:
+        usage()
+
+    localexml = sys.argv[1]
+    qtsrcdir = sys.argv[2]
+
+    if not os.path.exists(qtsrcdir) or not os.path.exists(qtsrcdir):
+        usage()
+    if not os.path.isfile(qtsrcdir + "/src/corelib/tools/qlocale_data_p.h"):
+        usage()
+    if not os.path.isfile(qtsrcdir + "/src/corelib/tools/qlocale.h"):
+        usage()
+    if not os.path.isfile(qtsrcdir + "/src/corelib/tools/qlocale.qdoc"):
+        usage()
+
+    (data_temp_file, data_temp_file_path) = tempfile.mkstemp("qlocale_data_p", dir=qtsrcdir)
+    data_temp_file = os.fdopen(data_temp_file, "w")
+    qlocaledata_file = open(qtsrcdir + "/src/corelib/tools/qlocale_data_p.h", "r")
+    s = qlocaledata_file.readline()
+    while s and s != GENERATED_BLOCK_START:
+        data_temp_file.write(s)
+        s = qlocaledata_file.readline()
+    data_temp_file.write(GENERATED_BLOCK_START)
+
+    doc = xml.dom.minidom.parse(localexml)
     language_map = loadLanguageMap(doc)
+    script_map = loadScriptMap(doc)
     country_map = loadCountryMap(doc)
     default_map = loadDefaultMap(doc)
-    locale_map = loadLocaleMap(doc, language_map, country_map)
+    locale_map = loadLocaleMap(doc, language_map, script_map, country_map)
     dupes = findDupes(language_map, country_map)
 
-    # Language enum
-    print "enum Language {"
-    language = ""
-    for key in language_map.keys():
-        language = fixedLanguageName(language_map[key][0], dupes)
-        print "    " + language + " = " + str(key) + ","
-    print "    LastLanguage = " + language
-    print "};"
+    cldr_version = eltText(firstChildElt(doc.documentElement, "version"))
 
-    print
-
-    # Country enum
-    print "enum Country {"
-    country = ""
-    for key in country_map.keys():
-        country = fixedCountryName(country_map[key][0], dupes)
-        print "    " + country + " = " + str(key) + ","
-    print "    LastCountry = " + country
-    print "};"
-
-    print
+    data_temp_file.write("\n\
+/*\n\
+    This part of the file was generated on %s from the\n\
+    Common Locale Data Repository v%s\n\
+\n\
+    http://www.unicode.org/cldr/\n\
+\n\
+    Do not change it, instead edit CLDR data and regenerate this file using\n\
+    cldr2qlocalexml.py and qlocalexml2cpp.py.\n\
+*/\n\n\n\
+" % (str(datetime.date.today()), cldr_version) )
 
     # Locale index
-    print "static const quint16 locale_index[] = {"
-    print "     0, // unused"
+    data_temp_file.write("static const quint16 locale_index[] = {\n")
     index = 0
     for key in language_map.keys():
         i = 0
@@ -371,12 +492,13 @@ def main():
         if count > 0:
             i = index
             index += count
-        print "%6d, // %s" % (i, language_map[key][0])
-    print "     0 // trailing 0"
-    print "};"
+        data_temp_file.write("%6d, // %s\n" % (i, language_map[key][0]))
+    data_temp_file.write("     0 // trailing 0\n")
+    data_temp_file.write("};\n")
 
-    print
+    data_temp_file.write("\n")
 
+    list_pattern_part_data = StringData()
     date_format_data = StringData()
     time_format_data = StringData()
     months_data = StringData()
@@ -384,10 +506,13 @@ def main():
     days_data = StringData()
     am_data = StringData()
     pm_data = StringData()
+    currency_symbol_data = StringData()
+    currency_display_name_data = StringData()
+    currency_format_data = StringData()
 
     # Locale data
-    print "static const QLocalePrivate locale_data[] = {"
-    print "//      lang   terr    dec  group   list  prcnt   zero  minus  plus    exp sDtFmt lDtFmt sTmFmt lTmFmt ssMonth slMonth  sMonth lMonth  sDays  lDays  am,len      pm,len"
+    data_temp_file.write("static const QLocalePrivate locale_data[] = {\n")
+    data_temp_file.write("//      lang   script terr    dec  group   list  prcnt   zero  minus  plus    exp quotStart quotEnd altQuotStart altQuotEnd lpStart lpMid lpEnd lpTwo sDtFmt lDtFmt sTmFmt lTmFmt ssMonth slMonth  sMonth lMonth  sDays  lDays  am,len      pm,len\n")
 
     locale_keys = locale_map.keys()
     compareLocaleKeys.default_map = default_map
@@ -396,9 +521,8 @@ def main():
 
     for key in locale_keys:
         l = locale_map[key]
-
-        print "    { %6d,%6d,%6d,%6d,%6d,%6d,%6d,%6d,%6d,%6d,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s }, // %s/%s" \
-                    % (key[0], key[1],
+        data_temp_file.write("    { %6d,%6d,%6d,%6d,%6d,%6d,%6d,%6d,%6d,%6d,%6d,%6d,%6d,%6d,%6d,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s, {%s}, %s,%s,%s,%s,%6d,%6d,%6d,%6d,%6d }, // %s/%s/%s\n" \
+                    % (key[0], key[1], key[2],
                         l.decimal,
                         l.group,
                         l.listDelim,
@@ -407,6 +531,14 @@ def main():
                         l.minus,
                         l.plus,
                         l.exp,
+                        l.quotationStart,
+                        l.quotationEnd,
+                        l.alternateQuotationStart,
+                        l.alternateQuotationEnd,
+                        list_pattern_part_data.append(l.listPatternPartStart),
+                        list_pattern_part_data.append(l.listPatternPartMiddle),
+                        list_pattern_part_data.append(l.listPatternPartEnd),
+                        list_pattern_part_data.append(l.listPatternPartTwo),
                         date_format_data.append(l.shortDateFormat),
                         date_format_data.append(l.longDateFormat),
                         time_format_data.append(l.shortTimeFormat),
@@ -425,136 +557,310 @@ def main():
                         days_data.append(l.narrowDays),
                         am_data.append(l.am),
                         pm_data.append(l.pm),
+                        currencyIsoCodeData(l.currencyIsoCode),
+                        currency_symbol_data.append(l.currencySymbol),
+                        currency_display_name_data.append(l.currencyDisplayName),
+                        currency_format_data.append(l.currencyFormat),
+                        currency_format_data.append(l.currencyNegativeFormat),
+                        l.currencyDigits,
+                        l.currencyRounding,
+                        l.firstDayOfWeek,
+                        l.weekendStart,
+                        l.weekendEnd,
                         l.language,
-                        l.country)
-    print "    {      0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,0,     0,0,     0,0,     0,0,     0,0,     0,0,     0,0,    0,0,    0,0,    0,0,   0,0,   0,0,   0,0,   0,0,   0,0,   0,0,   0,0,   0,0 }  // trailing 0s"
-    print "};"
+                        l.script,
+                        l.country))
+    data_temp_file.write("    {      0,      0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,    0,0,    0,0,    0,0,   0,0,     0,0,     0,0,     0,0,     0,0,     0,0,     0,0,     0,0,    0,0,    0,0,    0,0,   0,0,   0,0,   0,0,   0,0,   0,0,   0,0,   0,0,   0,0, {0,0,0}, 0,0, 0,0, 0,0, 0,0, 0, 0, 0, 0, 0 }  // trailing 0s\n")
+    data_temp_file.write("};\n")
+
+    data_temp_file.write("\n")
 
-    print
+    # List patterns data
+    #check_static_char_array_length("list_pattern_part", list_pattern_part_data.data)
+    data_temp_file.write("static const ushort list_pattern_part_data[] = {\n")
+    data_temp_file.write(wrap_list(list_pattern_part_data.data))
+    data_temp_file.write("\n};\n")
+
+    data_temp_file.write("\n")
 
     # Date format data
     #check_static_char_array_length("date_format", date_format_data.data)
-    print "static const ushort date_format_data[] = {"
-    print wrap_list(date_format_data.data)
-    print "};"
+    data_temp_file.write("static const ushort date_format_data[] = {\n")
+    data_temp_file.write(wrap_list(date_format_data.data))
+    data_temp_file.write("\n};\n")
 
-    print
+    data_temp_file.write("\n")
 
     # Time format data
     #check_static_char_array_length("time_format", time_format_data.data)
-    print "static const ushort time_format_data[] = {"
-    print wrap_list(time_format_data.data)
-    print "};"
+    data_temp_file.write("static const ushort time_format_data[] = {\n")
+    data_temp_file.write(wrap_list(time_format_data.data))
+    data_temp_file.write("\n};\n")
 
-    print
+    data_temp_file.write("\n")
 
     # Months data
     #check_static_char_array_length("months", months_data.data)
-    print "static const ushort months_data[] = {"
-    print wrap_list(months_data.data)
-    print "};"
+    data_temp_file.write("static const ushort months_data[] = {\n")
+    data_temp_file.write(wrap_list(months_data.data))
+    data_temp_file.write("\n};\n")
 
-    print
+    data_temp_file.write("\n")
 
     # Standalone months data
     #check_static_char_array_length("standalone_months", standalone_months_data.data)
-    print "static const ushort standalone_months_data[] = {"
-    print wrap_list(standalone_months_data.data)
-    print "};"
+    data_temp_file.write("static const ushort standalone_months_data[] = {\n")
+    data_temp_file.write(wrap_list(standalone_months_data.data))
+    data_temp_file.write("\n};\n")
 
-    print
+    data_temp_file.write("\n")
 
     # Days data
     #check_static_char_array_length("days", days_data.data)
-    print "static const ushort days_data[] = {"
-    print wrap_list(days_data.data)
-    print "};"
+    data_temp_file.write("static const ushort days_data[] = {\n")
+    data_temp_file.write(wrap_list(days_data.data))
+    data_temp_file.write("\n};\n")
 
-    print
+    data_temp_file.write("\n")
 
     # AM data
     #check_static_char_array_length("am", am_data.data)
-    print "static const ushort am_data[] = {"
-    print wrap_list(am_data.data)
-    print "};"
+    data_temp_file.write("static const ushort am_data[] = {\n")
+    data_temp_file.write(wrap_list(am_data.data))
+    data_temp_file.write("\n};\n")
 
-    print
+    data_temp_file.write("\n")
 
     # PM data
     #check_static_char_array_length("pm", am_data.data)
-    print "static const ushort pm_data[] = {"
-    print wrap_list(pm_data.data)
-    print "};"
+    data_temp_file.write("static const ushort pm_data[] = {\n")
+    data_temp_file.write(wrap_list(pm_data.data))
+    data_temp_file.write("\n};\n")
+
+    data_temp_file.write("\n")
+
+    # Currency symbol data
+    #check_static_char_array_length("currency_symbol", currency_symbol_data.data)
+    data_temp_file.write("static const ushort currency_symbol_data[] = {\n")
+    data_temp_file.write(wrap_list(currency_symbol_data.data))
+    data_temp_file.write("\n};\n")
+
+    data_temp_file.write("\n")
+
+    # Currency display name data
+    #check_static_char_array_length("currency_display_name", currency_display_name_data.data)
+    data_temp_file.write("static const ushort currency_display_name_data[] = {\n")
+    data_temp_file.write(wrap_list(currency_display_name_data.data))
+    data_temp_file.write("\n};\n")
 
-    print
+    data_temp_file.write("\n")
+
+    # Currency format data
+    #check_static_char_array_length("currency_format", currency_format_data.data)
+    data_temp_file.write("static const ushort currency_format_data[] = {\n")
+    data_temp_file.write(wrap_list(currency_format_data.data))
+    data_temp_file.write("\n};\n")
+
+    data_temp_file.write("\n")
 
     # Language name list
-    print "static const char language_name_list[] ="
-    print "\"Default\\0\""
+    data_temp_file.write("static const char language_name_list[] =\n")
+    data_temp_file.write("\"Default\\0\"\n")
     for key in language_map.keys():
-        print "\"" + language_map[key][0] + "\\0\""
-    print ";"
+        if key == 0:
+            continue
+        data_temp_file.write("\"" + language_map[key][0] + "\\0\"\n")
+    data_temp_file.write(";\n")
 
-    print
+    data_temp_file.write("\n")
 
     # Language name index
-    print "static const quint16 language_name_index[] = {"
-    print "     0, // Unused"
+    data_temp_file.write("static const quint16 language_name_index[] = {\n")
+    data_temp_file.write("     0, // AnyLanguage\n")
     index = 8
     for key in language_map.keys():
+        if key == 0:
+            continue
         language = language_map[key][0]
-        print "%6d, // %s" % (index, language)
+        data_temp_file.write("%6d, // %s\n" % (index, language))
         index += len(language) + 1
-    print "};"
+    data_temp_file.write("};\n")
+
+    data_temp_file.write("\n")
+
+    # Script name list
+    data_temp_file.write("static const char script_name_list[] =\n")
+    data_temp_file.write("\"Default\\0\"\n")
+    for key in script_map.keys():
+        if key == 0:
+            continue
+        data_temp_file.write("\"" + script_map[key][0] + "\\0\"\n")
+    data_temp_file.write(";\n")
 
-    print
+    data_temp_file.write("\n")
+
+    # Script name index
+    data_temp_file.write("static const quint16 script_name_index[] = {\n")
+    data_temp_file.write("     0, // AnyScript\n")
+    index = 8
+    for key in script_map.keys():
+        if key == 0:
+            continue
+        script = script_map[key][0]
+        data_temp_file.write("%6d, // %s\n" % (index, script))
+        index += len(script) + 1
+    data_temp_file.write("};\n")
+
+    data_temp_file.write("\n")
 
     # Country name list
-    print "static const char country_name_list[] ="
-    print "\"Default\\0\""
+    data_temp_file.write("static const char country_name_list[] =\n")
+    data_temp_file.write("\"Default\\0\"\n")
     for key in country_map.keys():
         if key == 0:
             continue
-        print "\"" + country_map[key][0] + "\\0\""
-    print ";"
+        data_temp_file.write("\"" + country_map[key][0] + "\\0\"\n")
+    data_temp_file.write(";\n")
 
-    print
+    data_temp_file.write("\n")
 
     # Country name index
-    print "static const quint16 country_name_index[] = {"
-    print "     0, // AnyCountry"
+    data_temp_file.write("static const quint16 country_name_index[] = {\n")
+    data_temp_file.write("     0, // AnyCountry\n")
     index = 8
     for key in country_map.keys():
         if key == 0:
             continue
         country = country_map[key][0]
-        print "%6d, // %s" % (index, country)
+        data_temp_file.write("%6d, // %s\n" % (index, country))
         index += len(country) + 1
-    print "};"
+    data_temp_file.write("};\n")
 
-    print
+    data_temp_file.write("\n")
 
     # Language code list
-    print "static const unsigned char language_code_list[] ="
-    print "\"  \\0\" // Unused"
+    data_temp_file.write("static const unsigned char language_code_list[] =\n")
     for key in language_map.keys():
         code = language_map[key][1]
         if len(code) == 2:
             code += r"\0"
-        print "\"%2s\" // %s" % (code, language_map[key][0])
-    print ";"
+        data_temp_file.write("\"%2s\" // %s\n" % (code, language_map[key][0]))
+    data_temp_file.write(";\n")
+
+    data_temp_file.write("\n")
 
-    print
+    # Script code list
+    data_temp_file.write("static const unsigned char script_code_list[] =\n")
+    for key in script_map.keys():
+        code = script_map[key][1]
+        for i in range(4 - len(code)):
+            code += "\\0"
+        data_temp_file.write("\"%2s\" // %s\n" % (code, script_map[key][0]))
+    data_temp_file.write(";\n")
 
     # Country code list
-    print "static const unsigned char country_code_list[] ="
+    data_temp_file.write("static const unsigned char country_code_list[] =\n")
     for key in country_map.keys():
         code = country_map[key][1]
         if len(code) == 2:
             code += "\\0"
-        print "\"%2s\" // %s" % (code, country_map[key][0])
-    print ";"
+        data_temp_file.write("\"%2s\" // %s\n" % (code, country_map[key][0]))
+    data_temp_file.write(";\n")
+
+    data_temp_file.write("\n")
+    data_temp_file.write(GENERATED_BLOCK_END)
+    s = qlocaledata_file.readline()
+    # skip until end of the block
+    while s and s != GENERATED_BLOCK_END:
+        s = qlocaledata_file.readline()
+
+    s = qlocaledata_file.readline()
+    while s:
+        data_temp_file.write(s)
+        s = qlocaledata_file.readline()
+    data_temp_file.close()
+    qlocaledata_file.close()
+
+    os.rename(data_temp_file_path, qtsrcdir + "/src/corelib/tools/qlocale_data_p.h")
+
+    # qlocale.h
+
+    (qlocaleh_temp_file, qlocaleh_temp_file_path) = tempfile.mkstemp("qlocale.h", dir=qtsrcdir)
+    qlocaleh_temp_file = os.fdopen(qlocaleh_temp_file, "w")
+    qlocaleh_file = open(qtsrcdir + "/src/corelib/tools/qlocale.h", "r")
+    s = qlocaleh_file.readline()
+    while s and s != GENERATED_BLOCK_START:
+        qlocaleh_temp_file.write(s)
+        s = qlocaleh_file.readline()
+    qlocaleh_temp_file.write(GENERATED_BLOCK_START)
+    qlocaleh_temp_file.write("// see qlocale_data_p.h for more info on generated data\n")
+
+    # Language enum
+    qlocaleh_temp_file.write("    enum Language {\n")
+    language = ""
+    for key in language_map.keys():
+        language = fixedLanguageName(language_map[key][0], dupes)
+        qlocaleh_temp_file.write("        " + language + " = " + str(key) + ",\n")
+    # special cases for norwegian. we really need to make it right at some point.
+    qlocaleh_temp_file.write("        NorwegianBokmal = Norwegian,\n")
+    qlocaleh_temp_file.write("        NorwegianNynorsk = Nynorsk,\n")
+    qlocaleh_temp_file.write("        LastLanguage = " + language + "\n")
+    qlocaleh_temp_file.write("    };\n")
+
+    qlocaleh_temp_file.write("\n")
+
+    # Script enum
+    qlocaleh_temp_file.write("    enum Script {\n")
+    script = ""
+    for key in script_map.keys():
+        script = fixedScriptName(script_map[key][0], dupes)
+        qlocaleh_temp_file.write("        " + script + " = " + str(key) + ",\n")
+    qlocaleh_temp_file.write("        SimplifiedChineseScript = SimplifiedHanScript,\n")
+    qlocaleh_temp_file.write("        TraditionalChineseScript = TraditionalHanScript,\n")
+    qlocaleh_temp_file.write("        LastScript = " + script + "\n")
+    qlocaleh_temp_file.write("    };\n")
+
+    # Country enum
+    qlocaleh_temp_file.write("    enum Country {\n")
+    country = ""
+    for key in country_map.keys():
+        country = fixedCountryName(country_map[key][0], dupes)
+        qlocaleh_temp_file.write("        " + country + " = " + str(key) + ",\n")
+    qlocaleh_temp_file.write("        LastCountry = " + country + "\n")
+    qlocaleh_temp_file.write("    };\n")
+
+    qlocaleh_temp_file.write(GENERATED_BLOCK_END)
+    s = qlocaleh_file.readline()
+    # skip until end of the block
+    while s and s != GENERATED_BLOCK_END:
+        s = qlocaleh_file.readline()
+
+    s = qlocaleh_file.readline()
+    while s:
+        qlocaleh_temp_file.write(s)
+        s = qlocaleh_file.readline()
+    qlocaleh_temp_file.close()
+    qlocaleh_file.close()
+
+    os.rename(qlocaleh_temp_file_path, qtsrcdir + "/src/corelib/tools/qlocale.h")
+
+    # qlocale.qdoc
+
+    (qlocaleqdoc_temp_file, qlocaleqdoc_temp_file_path) = tempfile.mkstemp("qlocale.qdoc", dir=qtsrcdir)
+    qlocaleqdoc_temp_file = os.fdopen(qlocaleqdoc_temp_file, "w")
+    qlocaleqdoc_file = open(qtsrcdir + "/src/corelib/tools/qlocale.qdoc", "r")
+    s = qlocaleqdoc_file.readline()
+    DOCSTRING="    QLocale's data is based on Common Locale Data Repository "
+    while s:
+        if DOCSTRING in s:
+            qlocaleqdoc_temp_file.write(DOCSTRING + "v" + cldr_version + ".\n")
+        else:
+            qlocaleqdoc_temp_file.write(s)
+        s = qlocaleqdoc_file.readline()
+    qlocaleqdoc_temp_file.close()
+    qlocaleqdoc_file.close()
 
+    os.rename(qlocaleqdoc_temp_file_path, qtsrcdir + "/src/corelib/tools/qlocale.qdoc")
 
 if __name__ == "__main__":
     main()
diff --git a/util/local_database/xpathlite.py b/util/local_database/xpathlite.py
index 95e6711..502d85d 100644
--- a/util/local_database/xpathlite.py
+++ b/util/local_database/xpathlite.py
@@ -87,6 +87,48 @@ def findChild(parent, tag_name, arg_name=None, arg_value=None, draft=None):
         return node
     return False
 
+def findTagsInFile(file, path):
+    doc = False
+    if doc_cache.has_key(file):
+        doc = doc_cache[file]
+    else:
+        doc = xml.dom.minidom.parse(file)
+        doc_cache[file] = doc
+
+    elt = doc.documentElement
+    tag_spec_list = path.split("/")
+    last_entry = None
+    for i in range(len(tag_spec_list)):
+        tag_spec = tag_spec_list[i]
+        tag_name = tag_spec
+        arg_name = 'type'
+        arg_value = ''
+        left_bracket = tag_spec.find('[')
+        if left_bracket != -1:
+            tag_name = tag_spec[:left_bracket]
+            arg_value = tag_spec[left_bracket+1:-1].split("=")
+            if len(arg_value) == 2:
+                arg_name = arg_value[0]
+                arg_value = arg_value[1]
+            else:
+                arg_value = arg_value[0]
+        elt = findChild(elt, tag_name, arg_name, arg_value)
+        if not elt:
+            return None
+    ret = []
+    if elt.childNodes:
+        for node in elt.childNodes:
+            if node.attributes:
+                element = [node.nodeName, None]
+                element[1] = node.attributes.items()
+                ret.append(element)
+    else:
+        if elt.attributes:
+            element = [elt.nodeName, None]
+            element[1] = elt.attributes.items()
+            ret.append(element)
+    return ret
+
 def _findEntryInFile(file, path, draft=None, attribute=None):
     doc = False
     if doc_cache.has_key(file):