#!/usr/bin/env python
#############################################################################
##
## Copyright (C) 2011 Nokia Corporation and/or its subsidiary(-ies).
## All rights reserved.
## Contact: Nokia Corporation (qt-info@nokia.com)
##
## This file is part of the test suite of the Qt Toolkit.
##
## $QT_BEGIN_LICENSE:LGPL$
## No Commercial Usage
## This file contains pre-release code and may not be distributed.
## You may use this file in accordance with the terms and conditions
## contained in the Technology Preview License Agreement accompanying
## this package.
##
## GNU Lesser General Public License Usage
## Alternatively, this file may be used under the terms of the GNU Lesser
## General Public License version 2.1 as published by the Free Software
## Foundation and appearing in the file LICENSE.LGPL included in the
## packaging of this file.  Please review the following information to
## ensure the GNU Lesser General Public License version 2.1 requirements
## will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
##
## In addition, as a special exception, Nokia gives you certain additional
## rights.  These rights are described in the Nokia Qt LGPL Exception
## version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
##
## If you have questions regarding the use of this file, please contact
## Nokia at qt-info@nokia.com.
##
##
##
##
##
##
##
##
## $QT_END_LICENSE$
##
#############################################################################

import os
import sys
import enumdata
import xpathlite
from  xpathlite import DraftResolution
from dateconverter import convert_date
import re

findEntry = xpathlite.findEntry
findEntryInFile = xpathlite._findEntryInFile
findTagsInFile = xpathlite.findTagsInFile

def parse_number_format(patterns, data):
    # this is a very limited parsing of the number format for currency only.
    def skip_repeating_pattern(x):
        p = x.replace('0', '#').replace(',', '').replace('.', '')
        seen = False
        result = ''
        for c in p:
            if c == '#':
                if seen:
                    continue
                seen = True
            else:
                seen = False
            result = result + c
        return result
    patterns = patterns.split(';')
    result = []
    for pattern in patterns:
        pattern = skip_repeating_pattern(pattern)
        pattern = pattern.replace('#', "%1")
        # according to http://www.unicode.org/reports/tr35/#Number_Format_Patterns
        # there can be doubled or trippled currency sign, however none of the
        # locales use that.
        pattern = pattern.replace(u'\xa4', "%2")
        pattern = pattern.replace("''", "###").replace("'", '').replace("###", "'")
        pattern = pattern.replace('-', data['minus'])
        pattern = pattern.replace('+', data['plus'])
        result.append(pattern)
    return result

def parse_list_pattern_part_format(pattern):
    # this is a very limited parsing of the format for list pattern part only.
    result = ""
    result = pattern.replace("{0}", "%1")
    result = result.replace("{1}", "%2")
    result = result.replace("{2}", "%3")
    return result

def ordStr(c):
    if len(c) == 1:
        return str(ord(c))
    return "##########"

# the following functions are supposed to fix the problem with QLocale
# returning a character instead of strings for QLocale::exponential()
# and others. So we fallback to default values in these cases.
def fixOrdStrExp(c):
    if len(c) == 1:
        return str(ord(c))
    return str(ord('e'))
def fixOrdStrPercent(c):
    if len(c) == 1:
        return str(ord(c))
    return str(ord('%'))
def fixOrdStrList(c):
    if len(c) == 1:
        return str(ord(c))
    return str(ord(';'))

def generateLocaleInfo(path):
    (dir_name, file_name) = os.path.split(path)

    if not path.endswith(".xml"):
        return {}
    language_code = findEntryInFile(path, "identity/language", attribute="type")[0]
    if language_code == 'root':
        # just skip it
        return {}
    country_code = findEntryInFile(path, "identity/territory", attribute="type")[0]
    script_code = findEntryInFile(path, "identity/script", attribute="type")[0]
    variant_code = findEntryInFile(path, "identity/variant", attribute="type")[0]

    # we should handle fully qualified names with the territory
    if not country_code:
        return {}

    # we do not support variants
    # ### actually there is only one locale with variant: en_US_POSIX
    #     does anybody care about it at all?
    if variant_code:
        return {}

    language_id = enumdata.languageCodeToId(language_code)
    if language_id == -1:
        sys.stderr.write("unnknown language code \"" + language_code + "\"\n")
        return {}
    language = enumdata.language_list[language_id][0]

    script_id = enumdata.scriptCodeToId(script_code)
    if script_code == -1:
        sys.stderr.write("unnknown script code \"" + script_code + "\"\n")
        return {}
    script = "AnyScript"
    if script_id != -1:
        script = enumdata.script_list[script_id][0]

    country_id = enumdata.countryCodeToId(country_code)
    country = ""
    if country_id != -1:
        country = enumdata.country_list[country_id][0]
    if country == "":
        sys.stderr.write("unnknown country code \"" + country_code + "\"\n")
        return {}

    # So we say we accept only those values that have "contributed" or
    # "approved" resolution. see http://www.unicode.org/cldr/process.html
    # But we only respect the resolution for new datas for backward
    # compatibility.
    draft = DraftResolution.contributed

    result = {}
    result['language'] = language
    result['script'] = script
    result['country'] = country
    result['language_code'] = language_code
    result['country_code'] = country_code
    result['script_code'] = script_code
    result['variant_code'] = variant_code
    result['language_id'] = language_id
    result['script_id'] = script_id
    result['country_id'] = country_id

    supplementalPath = dir_name + "/../supplemental/supplementalData.xml"
    currencies = findTagsInFile(supplementalPath, "currencyData/region[iso3166=%s]"%country_code);
    result['currencyIsoCode'] = ''
    result['currencyDigits'] = 2
    result['currencyRounding'] = 1
    if currencies:
        for e in currencies:
            if e[0] == 'currency':
                tender = True
                t = filter(lambda x: x[0] == 'tender', e[1])
                if t and t[0][1] == 'false':
                    tender = False;
                if tender and not filter(lambda x: x[0] == 'to', e[1]):
                    result['currencyIsoCode'] = filter(lambda x: x[0] == 'iso4217', e[1])[0][1]
                    break
        if result['currencyIsoCode']:
            t = findTagsInFile(supplementalPath, "currencyData/fractions/info[iso4217=%s]"%result['currencyIsoCode']);
            if t and t[0][0] == 'info':
                result['currencyDigits'] = int(filter(lambda x: x[0] == 'digits', t[0][1])[0][1])
                result['currencyRounding'] = int(filter(lambda x: x[0] == 'rounding', t[0][1])[0][1])
    numbering_system = None
    try:
        numbering_system = findEntry(path, "numbers/defaultNumberingSystem")
    except:
        pass
    def findEntryDef(path, xpath, value=''):
        try:
            return findEntry(path, xpath)
        except xpathlite.Error:
            return value
    def get_number_in_system(path, xpath, numbering_system):
        if numbering_system:
            try:
                return findEntry(path, xpath + "[numberSystem=" + numbering_system + "]")
            except xpathlite.Error:
                pass
        return findEntry(path, xpath)
    result['decimal'] = get_number_in_system(path, "numbers/symbols/decimal", numbering_system)
    result['group'] = get_number_in_system(path, "numbers/symbols/group", numbering_system)
    result['list'] = get_number_in_system(path, "numbers/symbols/list", numbering_system)
    result['percent'] = get_number_in_system(path, "numbers/symbols/percentSign", numbering_system)
    result['zero'] = get_number_in_system(path, "numbers/symbols/nativeZeroDigit", numbering_system)
    result['minus'] = get_number_in_system(path, "numbers/symbols/minusSign", numbering_system)
    result['plus'] = get_number_in_system(path, "numbers/symbols/plusSign", numbering_system)
    result['exp'] = get_number_in_system(path, "numbers/symbols/exponential", numbering_system).lower()
    result['quotationStart'] = findEntry(path, "delimiters/quotationStart")
    result['quotationEnd'] = findEntry(path, "delimiters/quotationEnd")
    result['alternateQuotationStart'] = findEntry(path, "delimiters/alternateQuotationStart")
    result['alternateQuotationEnd'] = findEntry(path, "delimiters/alternateQuotationEnd")
    result['listPatternPartStart'] = parse_list_pattern_part_format(findEntry(path, "listPatterns/listPattern/listPatternPart[start]"))
    result['listPatternPartMiddle'] = parse_list_pattern_part_format(findEntry(path, "listPatterns/listPattern/listPatternPart[middle]"))
    result['listPatternPartEnd'] = parse_list_pattern_part_format(findEntry(path, "listPatterns/listPattern/listPatternPart[end]"))
    result['listPatternPartTwo'] = parse_list_pattern_part_format(findEntry(path, "listPatterns/listPattern/listPatternPart[2]"))
    result['am'] = findEntry(path, "dates/calendars/calendar[gregorian]/dayPeriods/dayPeriodContext[format]/dayPeriodWidth[wide]/dayPeriod[am]", draft)
    result['pm'] = findEntry(path, "dates/calendars/calendar[gregorian]/dayPeriods/dayPeriodContext[format]/dayPeriodWidth[wide]/dayPeriod[pm]", draft)
    result['longDateFormat'] = convert_date(findEntry(path, "dates/calendars/calendar[gregorian]/dateFormats/dateFormatLength[full]/dateFormat/pattern"))
    result['shortDateFormat'] = convert_date(findEntry(path, "dates/calendars/calendar[gregorian]/dateFormats/dateFormatLength[short]/dateFormat/pattern"))
    result['longTimeFormat'] = convert_date(findEntry(path, "dates/calendars/calendar[gregorian]/timeFormats/timeFormatLength[full]/timeFormat/pattern"))
    result['shortTimeFormat'] = convert_date(findEntry(path, "dates/calendars/calendar[gregorian]/timeFormats/timeFormatLength[short]/timeFormat/pattern"))

    endonym = None
    if country_code and script_code:
        endonym = findEntryDef(path, "localeDisplayNames/languages/language[type=%s_%s_%s]" % (language_code, script_code, country_code))
    if not endonym and script_code:
        endonym = findEntryDef(path, "localeDisplayNames/languages/language[type=%s_%s]" % (language_code, script_code))
    if not endonym and country_code:
        endonym = findEntryDef(path, "localeDisplayNames/languages/language[type=%s_%s]" % (language_code, country_code))
    if not endonym:
        endonym = findEntryDef(path, "localeDisplayNames/languages/language[type=%s]" % (language_code))
    result['language_endonym'] = endonym
    result['country_endonym'] = findEntryDef(path, "localeDisplayNames/territories/territory[type=%s]" % (country_code))

    currency_format = get_number_in_system(path, "numbers/currencyFormats/currencyFormatLength/currencyFormat/pattern", numbering_system)
    currency_format = parse_number_format(currency_format, result)
    result['currencyFormat'] = currency_format[0]
    result['currencyNegativeFormat'] = ''
    if len(currency_format) > 1:
        result['currencyNegativeFormat'] = currency_format[1]

    result['currencySymbol'] = ''
    result['currencyDisplayName'] = ''
    if result['currencyIsoCode']:
        result['currencySymbol'] = findEntryDef(path, "numbers/currencies/currency[%s]/symbol" % result['currencyIsoCode'])
        display_name_path = "numbers/currencies/currency[%s]/displayName" % result['currencyIsoCode']
        result['currencyDisplayName'] \
            = findEntryDef(path, display_name_path) + ";" \
            + findEntryDef(path, display_name_path + "[count=zero]")  + ";" \
            + findEntryDef(path, display_name_path + "[count=one]")   + ";" \
            + findEntryDef(path, display_name_path + "[count=two]")   + ";" \
            + findEntryDef(path, display_name_path + "[count=few]")   + ";" \
            + findEntryDef(path, display_name_path + "[count=many]")  + ";" \
            + findEntryDef(path, display_name_path + "[count=other]") + ";"

    standalone_long_month_path = "dates/calendars/calendar[gregorian]/months/monthContext[stand-alone]/monthWidth[wide]/month"
    result['standaloneLongMonths'] \
        = findEntry(path, standalone_long_month_path + "[1]") + ";" \
        + findEntry(path, standalone_long_month_path + "[2]") + ";" \
        + findEntry(path, standalone_long_month_path + "[3]") + ";" \
        + findEntry(path, standalone_long_month_path + "[4]") + ";" \
        + findEntry(path, standalone_long_month_path + "[5]") + ";" \
        + findEntry(path, standalone_long_month_path + "[6]") + ";" \
        + findEntry(path, standalone_long_month_path + "[7]") + ";" \
        + findEntry(path, standalone_long_month_path + "[8]") + ";" \
        + findEntry(path, standalone_long_month_path + "[9]") + ";" \
        + findEntry(path, standalone_long_month_path + "[10]") + ";" \
        + findEntry(path, standalone_long_month_path + "[11]") + ";" \
        + findEntry(path, standalone_long_month_path + "[12]") + ";"

    standalone_short_month_path = "dates/calendars/calendar[gregorian]/months/monthContext[stand-alone]/monthWidth[abbreviated]/month"
    result['standaloneShortMonths'] \
        = findEntry(path, standalone_short_month_path + "[1]") + ";" \
        + findEntry(path, standalone_short_month_path + "[2]") + ";" \
        + findEntry(path, standalone_short_month_path + "[3]") + ";" \
        + findEntry(path, standalone_short_month_path + "[4]") + ";" \
        + findEntry(path, standalone_short_month_path + "[5]") + ";" \
        + findEntry(path, standalone_short_month_path + "[6]") + ";" \
        + findEntry(path, standalone_short_month_path + "[7]") + ";" \
        + findEntry(path, standalone_short_month_path + "[8]") + ";" \
        + findEntry(path, standalone_short_month_path + "[9]") + ";" \
        + findEntry(path, standalone_short_month_path + "[10]") + ";" \
        + findEntry(path, standalone_short_month_path + "[11]") + ";" \
        + findEntry(path, standalone_short_month_path + "[12]") + ";"

    standalone_narrow_month_path = "dates/calendars/calendar[gregorian]/months/monthContext[stand-alone]/monthWidth[narrow]/month"
    result['standaloneNarrowMonths'] \
        = findEntry(path, standalone_narrow_month_path + "[1]") + ";" \
        + findEntry(path, standalone_narrow_month_path + "[2]") + ";" \
        + findEntry(path, standalone_narrow_month_path + "[3]") + ";" \
        + findEntry(path, standalone_narrow_month_path + "[4]") + ";" \
        + findEntry(path, standalone_narrow_month_path + "[5]") + ";" \
        + findEntry(path, standalone_narrow_month_path + "[6]") + ";" \
        + findEntry(path, standalone_narrow_month_path + "[7]") + ";" \
        + findEntry(path, standalone_narrow_month_path + "[8]") + ";" \
        + findEntry(path, standalone_narrow_month_path + "[9]") + ";" \
        + findEntry(path, standalone_narrow_month_path + "[10]") + ";" \
        + findEntry(path, standalone_narrow_month_path + "[11]") + ";" \
        + findEntry(path, standalone_narrow_month_path + "[12]") + ";"

    long_month_path = "dates/calendars/calendar[gregorian]/months/monthContext[format]/monthWidth[wide]/month"
    result['longMonths'] \
        = findEntry(path, long_month_path + "[1]") + ";" \
        + findEntry(path, long_month_path + "[2]") + ";" \
        + findEntry(path, long_month_path + "[3]") + ";" \
        + findEntry(path, long_month_path + "[4]") + ";" \
        + findEntry(path, long_month_path + "[5]") + ";" \
        + findEntry(path, long_month_path + "[6]") + ";" \
        + findEntry(path, long_month_path + "[7]") + ";" \
        + findEntry(path, long_month_path + "[8]") + ";" \
        + findEntry(path, long_month_path + "[9]") + ";" \
        + findEntry(path, long_month_path + "[10]") + ";" \
        + findEntry(path, long_month_path + "[11]") + ";" \
        + findEntry(path, long_month_path + "[12]") + ";"

    short_month_path = "dates/calendars/calendar[gregorian]/months/monthContext[format]/monthWidth[abbreviated]/month"
    result['shortMonths'] \
        = findEntry(path, short_month_path + "[1]") + ";" \
        + findEntry(path, short_month_path + "[2]") + ";" \
        + findEntry(path, short_month_path + "[3]") + ";" \
        + findEntry(path, short_month_path + "[4]") + ";" \
        + findEntry(path, short_month_path + "[5]") + ";" \
        + findEntry(path, short_month_path + "[6]") + ";" \
        + findEntry(path, short_month_path + "[7]") + ";" \
        + findEntry(path, short_month_path + "[8]") + ";" \
        + findEntry(path, short_month_path + "[9]") + ";" \
        + findEntry(path, short_month_path + "[10]") + ";" \
        + findEntry(path, short_month_path + "[11]") + ";" \
        + findEntry(path, short_month_path + "[12]") + ";"

    narrow_month_path = "dates/calendars/calendar[gregorian]/months/monthContext[format]/monthWidth[narrow]/month"
    result['narrowMonths'] \
        = findEntry(path, narrow_month_path + "[1]") + ";" \
        + findEntry(path, narrow_month_path + "[2]") + ";" \
        + findEntry(path, narrow_month_path + "[3]") + ";" \
        + findEntry(path, narrow_month_path + "[4]") + ";" \
        + findEntry(path, narrow_month_path + "[5]") + ";" \
        + findEntry(path, narrow_month_path + "[6]") + ";" \
        + findEntry(path, narrow_month_path + "[7]") + ";" \
        + findEntry(path, narrow_month_path + "[8]") + ";" \
        + findEntry(path, narrow_month_path + "[9]") + ";" \
        + findEntry(path, narrow_month_path + "[10]") + ";" \
        + findEntry(path, narrow_month_path + "[11]") + ";" \
        + findEntry(path, narrow_month_path + "[12]") + ";"

    long_day_path = "dates/calendars/calendar[gregorian]/days/dayContext[format]/dayWidth[wide]/day"
    result['longDays'] \
        = findEntry(path, long_day_path + "[sun]") + ";" \
        + findEntry(path, long_day_path + "[mon]") + ";" \
        + findEntry(path, long_day_path + "[tue]") + ";" \
        + findEntry(path, long_day_path + "[wed]") + ";" \
        + findEntry(path, long_day_path + "[thu]") + ";" \
        + findEntry(path, long_day_path + "[fri]") + ";" \
        + findEntry(path, long_day_path + "[sat]") + ";"

    short_day_path = "dates/calendars/calendar[gregorian]/days/dayContext[format]/dayWidth[abbreviated]/day"
    result['shortDays'] \
        = findEntry(path, short_day_path + "[sun]") + ";" \
        + findEntry(path, short_day_path + "[mon]") + ";" \
        + findEntry(path, short_day_path + "[tue]") + ";" \
        + findEntry(path, short_day_path + "[wed]") + ";" \
        + findEntry(path, short_day_path + "[thu]") + ";" \
        + findEntry(path, short_day_path + "[fri]") + ";" \
        + findEntry(path, short_day_path + "[sat]") + ";"

    narrow_day_path = "dates/calendars/calendar[gregorian]/days/dayContext[format]/dayWidth[narrow]/day"
    result['narrowDays'] \
        = findEntry(path, narrow_day_path + "[sun]") + ";" \
        + findEntry(path, narrow_day_path + "[mon]") + ";" \
        + findEntry(path, narrow_day_path + "[tue]") + ";" \
        + findEntry(path, narrow_day_path + "[wed]") + ";" \
        + findEntry(path, narrow_day_path + "[thu]") + ";" \
        + findEntry(path, narrow_day_path + "[fri]") + ";" \
        + findEntry(path, narrow_day_path + "[sat]") + ";"

    standalone_long_day_path = "dates/calendars/calendar[gregorian]/days/dayContext[stand-alone]/dayWidth[wide]/day"
    result['standaloneLongDays'] \
        = findEntry(path, standalone_long_day_path + "[sun]") + ";" \
        + findEntry(path, standalone_long_day_path + "[mon]") + ";" \
        + findEntry(path, standalone_long_day_path + "[tue]") + ";" \
        + findEntry(path, standalone_long_day_path + "[wed]") + ";" \
        + findEntry(path, standalone_long_day_path + "[thu]") + ";" \
        + findEntry(path, standalone_long_day_path + "[fri]") + ";" \
        + findEntry(path, standalone_long_day_path + "[sat]") + ";"

    standalone_short_day_path = "dates/calendars/calendar[gregorian]/days/dayContext[stand-alone]/dayWidth[abbreviated]/day"
    result['standaloneShortDays'] \
        = findEntry(path, standalone_short_day_path + "[sun]") + ";" \
        + findEntry(path, standalone_short_day_path + "[mon]") + ";" \
        + findEntry(path, standalone_short_day_path + "[tue]") + ";" \
        + findEntry(path, standalone_short_day_path + "[wed]") + ";" \
        + findEntry(path, standalone_short_day_path + "[thu]") + ";" \
        + findEntry(path, standalone_short_day_path + "[fri]") + ";" \
        + findEntry(path, standalone_short_day_path + "[sat]") + ";"

    standalone_narrow_day_path = "dates/calendars/calendar[gregorian]/days/dayContext[stand-alone]/dayWidth[narrow]/day"
    result['standaloneNarrowDays'] \
        = findEntry(path, standalone_narrow_day_path + "[sun]") + ";" \
        + findEntry(path, standalone_narrow_day_path + "[mon]") + ";" \
        + findEntry(path, standalone_narrow_day_path + "[tue]") + ";" \
        + findEntry(path, standalone_narrow_day_path + "[wed]") + ";" \
        + findEntry(path, standalone_narrow_day_path + "[thu]") + ";" \
        + findEntry(path, standalone_narrow_day_path + "[fri]") + ";" \
        + findEntry(path, standalone_narrow_day_path + "[sat]") + ";"

    return result

def addEscapes(s):
    result = ''
    for c in s:
        n = ord(c)
        if n < 128:
            result += c
        else:
            result += "\\x"
            result += "%02x" % (n)
    return result

def unicodeStr(s):
    utf8 = s.encode('utf-8')
    return "<size>" + str(len(utf8)) + "</size><data>" + addEscapes(utf8) + "</data>"

def usage():
    print "Usage: cldr2qlocalexml.py <path-to-cldr-main>"
    sys.exit()

def integrateWeekData(filePath):
    if not filePath.endswith(".xml"):
        return {}
    monFirstDayIn = findEntryInFile(filePath, "weekData/firstDay[day=mon]", attribute="territories")[0].split(" ")
    tueFirstDayIn = findEntryInFile(filePath, "weekData/firstDay[day=tue]", attribute="territories")[0].split(" ")
    wedFirstDayIn = findEntryInFile(filePath, "weekData/firstDay[day=wed]", attribute="territories")[0].split(" ")
    thuFirstDayIn = findEntryInFile(filePath, "weekData/firstDay[day=thu]", attribute="territories")[0].split(" ")
    friFirstDayIn = findEntryInFile(filePath, "weekData/firstDay[day=fri]", attribute="territories")[0].split(" ")
    satFirstDayIn = findEntryInFile(filePath, "weekData/firstDay[day=sat]", attribute="territories")[0].split(" ")
    sunFirstDayIn = findEntryInFile(filePath, "weekData/firstDay[day=sun]", attribute="territories")[0].split(" ")

    monWeekendStart = findEntryInFile(filePath, "weekData/weekendStart[day=mon]", attribute="territories")[0].split(" ")
    tueWeekendStart = findEntryInFile(filePath, "weekData/weekendStart[day=tue]", attribute="territories")[0].split(" ")
    wedWeekendStart = findEntryInFile(filePath, "weekData/weekendStart[day=wed]", attribute="territories")[0].split(" ")
    thuWeekendStart = findEntryInFile(filePath, "weekData/weekendStart[day=thu]", attribute="territories")[0].split(" ")
    friWeekendStart = findEntryInFile(filePath, "weekData/weekendStart[day=fri]", attribute="territories")[0].split(" ")
    satWeekendStart = findEntryInFile(filePath, "weekData/weekendStart[day=sat]", attribute="territories")[0].split(" ")
    sunWeekendStart = findEntryInFile(filePath, "weekData/weekendStart[day=sun]", attribute="territories")[0].split(" ")

    monWeekendEnd = findEntryInFile(filePath, "weekData/weekendEnd[day=mon]", attribute="territories")[0].split(" ")
    tueWeekendEnd = findEntryInFile(filePath, "weekData/weekendEnd[day=tue]", attribute="territories")[0].split(" ")
    wedWeekendEnd = findEntryInFile(filePath, "weekData/weekendEnd[day=wed]", attribute="territories")[0].split(" ")
    thuWeekendEnd = findEntryInFile(filePath, "weekData/weekendEnd[day=thu]", attribute="territories")[0].split(" ")
    friWeekendEnd = findEntryInFile(filePath, "weekData/weekendEnd[day=fri]", attribute="territories")[0].split(" ")
    satWeekendEnd = findEntryInFile(filePath, "weekData/weekendEnd[day=sat]", attribute="territories")[0].split(" ")
    sunWeekendEnd = findEntryInFile(filePath, "weekData/weekendEnd[day=sun]", attribute="territories")[0].split(" ")

    firstDayByCountryCode = {}
    for countryCode in monFirstDayIn:
        firstDayByCountryCode[countryCode] = "mon"
    for countryCode in tueFirstDayIn:
        firstDayByCountryCode[countryCode] = "tue"
    for countryCode in wedFirstDayIn:
        firstDayByCountryCode[countryCode] = "wed"
    for countryCode in thuFirstDayIn:
        firstDayByCountryCode[countryCode] = "thu"
    for countryCode in friFirstDayIn:
        firstDayByCountryCode[countryCode] = "fri"
    for countryCode in satFirstDayIn:
        firstDayByCountryCode[countryCode] = "sat"
    for countryCode in sunFirstDayIn:
        firstDayByCountryCode[countryCode] = "sun"

    weekendStartByCountryCode = {}
    for countryCode in monWeekendStart:
        weekendStartByCountryCode[countryCode] = "mon"
    for countryCode in tueWeekendStart:
        weekendStartByCountryCode[countryCode] = "tue"
    for countryCode in wedWeekendStart:
        weekendStartByCountryCode[countryCode] = "wed"
    for countryCode in thuWeekendStart:
        weekendStartByCountryCode[countryCode] = "thu"
    for countryCode in friWeekendStart:
        weekendStartByCountryCode[countryCode] = "fri"
    for countryCode in satWeekendStart:
        weekendStartByCountryCode[countryCode] = "sat"
    for countryCode in sunWeekendStart:
        weekendStartByCountryCode[countryCode] = "sun"

    weekendEndByCountryCode = {}
    for countryCode in monWeekendEnd:
        weekendEndByCountryCode[countryCode] = "mon"
    for countryCode in tueWeekendEnd:
        weekendEndByCountryCode[countryCode] = "tue"
    for countryCode in wedWeekendEnd:
        weekendEndByCountryCode[countryCode] = "wed"
    for countryCode in thuWeekendEnd:
        weekendEndByCountryCode[countryCode] = "thu"
    for countryCode in friWeekendEnd:
        weekendEndByCountryCode[countryCode] = "fri"
    for countryCode in satWeekendEnd:
        weekendEndByCountryCode[countryCode] = "sat"
    for countryCode in sunWeekendEnd:
        weekendEndByCountryCode[countryCode] = "sun"

    for (key,locale) in locale_database.iteritems():
        countryCode = locale['country_code']
        if countryCode in firstDayByCountryCode:
            locale_database[key]['firstDayOfWeek'] = firstDayByCountryCode[countryCode]
        else:
            locale_database[key]['firstDayOfWeek'] = firstDayByCountryCode["001"]

        if countryCode in weekendStartByCountryCode:
            locale_database[key]['weekendStart'] = weekendStartByCountryCode[countryCode]
        else:
            locale_database[key]['weekendStart'] = weekendStartByCountryCode["001"]

        if countryCode in weekendEndByCountryCode:
            locale_database[key]['weekendEnd'] = weekendEndByCountryCode[countryCode]
        else:
            locale_database[key]['weekendEnd'] = weekendEndByCountryCode["001"]

if len(sys.argv) != 2:
    usage()

cldr_dir = sys.argv[1]

if not os.path.isdir(cldr_dir):
    usage()

cldr_files = os.listdir(cldr_dir)

locale_database = {}
for file in cldr_files:
    l = generateLocaleInfo(cldr_dir + "/" + file)
    if not l:
        sys.stderr.write("skipping file \"" + file + "\"\n")
        continue

    locale_database[(l['language_id'], l['script_id'], l['country_id'], l['variant_code'])] = l

integrateWeekData(cldr_dir+"/../supplemental/supplementalData.xml")
locale_keys = locale_database.keys()
locale_keys.sort()

cldr_version = 'unknown'
ldml = open(cldr_dir+"/../dtd/ldml.dtd", "r")
for line in ldml:
    if 'version cldrVersion CDATA #FIXED' in line:
        cldr_version = line.split('"')[1]

print "<localeDatabase>"
print "    <version>" + cldr_version + "</version>"
print "    <languageList>"
for id in enumdata.language_list:
    l = enumdata.language_list[id]
    print "        <language>"
    print "            <name>" + l[0] + "</name>"
    print "            <id>" + str(id) + "</id>"
    print "            <code>" + l[1] + "</code>"
    print "        </language>"
print "    </languageList>"

print "    <scriptList>"
for id in enumdata.script_list:
    l = enumdata.script_list[id]
    print "        <script>"
    print "            <name>" + l[0] + "</name>"
    print "            <id>" + str(id) + "</id>"
    print "            <code>" + l[1] + "</code>"
    print "        </script>"
print "    </scriptList>"

print "    <countryList>"
for id in enumdata.country_list:
    l = enumdata.country_list[id]
    print "        <country>"
    print "            <name>" + l[0] + "</name>"
    print "            <id>" + str(id) + "</id>"
    print "            <code>" + l[1] + "</code>"
    print "        </country>"
print "    </countryList>"

print \
"    <defaultCountryList>\n\
        <defaultCountry>\n\
            <language>Afrikaans</language>\n\
            <country>SouthAfrica</country>\n\
        </defaultCountry>\n\
        <defaultCountry>\n\
            <language>Afan</language>\n\
            <country>Ethiopia</country>\n\
        </defaultCountry>\n\
        <defaultCountry>\n\
            <language>Afar</language>\n\
            <country>Djibouti</country>\n\
        </defaultCountry>\n\
        <defaultCountry>\n\
            <language>Arabic</language>\n\
            <country>SaudiArabia</country>\n\
        </defaultCountry>\n\
        <defaultCountry>\n\
            <language>Chinese</language>\n\
            <country>China</country>\n\
        </defaultCountry>\n\
        <defaultCountry>\n\
            <language>Dutch</language>\n\
            <country>Netherlands</country>\n\
        </defaultCountry>\n\
        <defaultCountry>\n\
            <language>English</language>\n\
            <country>UnitedStates</country>\n\
        </defaultCountry>\n\
        <defaultCountry>\n\
            <language>French</language>\n\
            <country>France</country>\n\
        </defaultCountry>\n\
        <defaultCountry>\n\
            <language>German</language>\n\
            <country>Germany</country>\n\
        </defaultCountry>\n\
        <defaultCountry>\n\
            <language>Greek</language>\n\
            <country>Greece</country>\n\
        </defaultCountry>\n\
        <defaultCountry>\n\
            <language>Italian</language>\n\
            <country>Italy</country>\n\
        </defaultCountry>\n\
        <defaultCountry>\n\
            <language>Malay</language>\n\
            <country>Malaysia</country>\n\
        </defaultCountry>\n\
        <defaultCountry>\n\
            <language>Portuguese</language>\n\
            <country>Portugal</country>\n\
        </defaultCountry>\n\
        <defaultCountry>\n\
            <language>Russian</language>\n\
            <country>RussianFederation</country>\n\
        </defaultCountry>\n\
        <defaultCountry>\n\
            <language>Serbian</language>\n\
            <country>SerbiaAndMontenegro</country>\n\
        </defaultCountry>\n\
        <defaultCountry>\n\
            <language>SerboCroatian</language>\n\
            <country>SerbiaAndMontenegro</country>\n\
        </defaultCountry>\n\
        <defaultCountry>\n\
            <language>Somali</language>\n\
            <country>Somalia</country>\n\
        </defaultCountry>\n\
        <defaultCountry>\n\
            <language>Spanish</language>\n\
            <country>Spain</country>\n\
        </defaultCountry>\n\
        <defaultCountry>\n\
            <language>Swahili</language>\n\
            <country>Kenya</country>\n\
        </defaultCountry>\n\
        <defaultCountry>\n\
            <language>Swedish</language>\n\
            <country>Sweden</country>\n\
        </defaultCountry>\n\
        <defaultCountry>\n\
            <language>Tigrinya</language>\n\
            <country>Eritrea</country>\n\
        </defaultCountry>\n\
        <defaultCountry>\n\
            <language>Uzbek</language>\n\
            <country>Uzbekistan</country>\n\
        </defaultCountry>\n\
        <defaultCountry>\n\
            <language>Persian</language>\n\
            <country>Iran</country>\n\
        </defaultCountry>\n\
        <defaultCountry>\n\
            <language>Mongolian</language>\n\
            <country>Mongolia</country>\n\
        </defaultCountry>\n\
        <defaultCountry>\n\
            <language>Nepali</language>\n\
            <country>Nepal</country>\n\
        </defaultCountry>\n\
    </defaultCountryList>"

print "    <localeList>"
print \
"        <locale>\n\
            <language>C</language>\n\
            <languageEndonym></languageEndonym>\n\
            <script>AnyScript</script>\n\
            <country>AnyCountry</country>\n\
            <countryEndonym></countryEndonym>\n\
            <decimal>46</decimal>\n\
            <group>44</group>\n\
            <list>59</list>\n\
            <percent>37</percent>\n\
            <zero>48</zero>\n\
            <minus>45</minus>\n\
            <plus>43</plus>\n\
            <exp>101</exp>\n\
            <quotationStart>\"</quotationStart>\n\
            <quotationEnd>\"</quotationEnd>\n\
            <alternateQuotationStart>\'</alternateQuotationStart>\n\
            <alternateQuotationEnd>\'</alternateQuotationEnd>\n\
            <listPatternPartStart>%1, %2</listPatternPartStart>\n\
            <listPatternPartMiddle>%1, %2</listPatternPartMiddle>\n\
            <listPatternPartEnd>%1, %2</listPatternPartEnd>\n\
            <listPatternPartTwo>%1, %2</listPatternPartTwo>\n\
            <am>AM</am>\n\
            <pm>PM</pm>\n\
            <firstDayOfWeek>mon</firstDayOfWeek>\n\
            <weekendStart>sat</weekendStart>\n\
            <weekendEnd>sun</weekendEnd>\n\
            <longDateFormat>EEEE, d MMMM yyyy</longDateFormat>\n\
            <shortDateFormat>d MMM yyyy</shortDateFormat>\n\
            <longTimeFormat>HH:mm:ss z</longTimeFormat>\n\
            <shortTimeFormat>HH:mm:ss</shortTimeFormat>\n\
            <standaloneLongMonths>January;February;March;April;May;June;July;August;September;October;November;December;</standaloneLongMonths>\n\
            <standaloneShortMonths>Jan;Feb;Mar;Apr;May;Jun;Jul;Aug;Sep;Oct;Nov;Dec;</standaloneShortMonths>\n\
            <standaloneNarrowMonths>J;F;M;A;M;J;J;A;S;O;N;D;</standaloneNarrowMonths>\n\
            <longMonths>January;February;March;April;May;June;July;August;September;October;November;December;</longMonths>\n\
            <shortMonths>Jan;Feb;Mar;Apr;May;Jun;Jul;Aug;Sep;Oct;Nov;Dec;</shortMonths>\n\
            <narrowMonths>1;2;3;4;5;6;7;8;9;10;11;12;</narrowMonths>\n\
            <longDays>Sunday;Monday;Tuesday;Wednesday;Thursday;Friday;Saturday;</longDays>\n\
            <shortDays>Sun;Mon;Tue;Wed;Thu;Fri;Sat;</shortDays>\n\
            <narrowDays>7;1;2;3;4;5;6;</narrowDays>\n\
            <standaloneLongDays>Sunday;Monday;Tuesday;Wednesday;Thursday;Friday;Saturday;</standaloneLongDays>\n\
            <standaloneShortDays>Sun;Mon;Tue;Wed;Thu;Fri;Sat;</standaloneShortDays>\n\
            <standaloneNarrowDays>S;M;T;W;T;F;S;</standaloneNarrowDays>\n\
            <currencyIsoCode></currencyIsoCode>\n\
            <currencySymbol></currencySymbol>\n\
            <currencyDisplayName>;;;;;;;</currencyDisplayName>\n\
            <currencyDigits>2</currencyDigits>\n\
            <currencyRounding>1</currencyRounding>\n\
            <currencyFormat>%1%2</currencyFormat>\n\
            <currencyNegativeFormat></currencyNegativeFormat>\n\
        </locale>"

for key in locale_keys:
    l = locale_database[key]

    print "        <locale>"
    print "            <language>" + l['language']        + "</language>"
    print "            <languageEndonym>" + l['language_endonym'].encode('utf-8') + "</languageEndonym>"
    print "            <script>" + l['script']        + "</script>"
    print "            <country>"  + l['country']         + "</country>"
    print "            <countryEndonym>"  + l['country_endonym'].encode('utf-8') + "</countryEndonym>"
    print "            <languagecode>" + l['language_code']        + "</languagecode>"
    print "            <scriptcode>" + l['script_code']        + "</scriptcode>"
    print "            <countrycode>"  + l['country_code']         + "</countrycode>"
    print "            <decimal>"  + ordStr(l['decimal']) + "</decimal>"
    print "            <group>"    + ordStr(l['group'])   + "</group>"
    print "            <list>"     + fixOrdStrList(l['list'])    + "</list>"
    print "            <percent>"  + fixOrdStrPercent(l['percent']) + "</percent>"
    print "            <zero>"     + ordStr(l['zero'])    + "</zero>"
    print "            <minus>"    + ordStr(l['minus'])   + "</minus>"
    print "            <plus>"     + ordStr(l['plus'])   + "</plus>"
    print "            <exp>"      + fixOrdStrExp(l['exp'])     + "</exp>"
    print "            <quotationStart>" + l['quotationStart'].encode('utf-8') + "</quotationStart>"
    print "            <quotationEnd>" + l['quotationEnd'].encode('utf-8')   + "</quotationEnd>"
    print "            <alternateQuotationStart>" + l['alternateQuotationStart'].encode('utf-8') + "</alternateQuotationStart>"
    print "            <alternateQuotationEnd>" + l['alternateQuotationEnd'].encode('utf-8')   + "</alternateQuotationEnd>"
    print "            <listPatternPartStart>" + l['listPatternPartStart'].encode('utf-8')   + "</listPatternPartStart>"
    print "            <listPatternPartMiddle>" + l['listPatternPartMiddle'].encode('utf-8')   + "</listPatternPartMiddle>"
    print "            <listPatternPartEnd>" + l['listPatternPartEnd'].encode('utf-8')   + "</listPatternPartEnd>"
    print "            <listPatternPartTwo>" + l['listPatternPartTwo'].encode('utf-8')   + "</listPatternPartTwo>"
    print "            <am>"       + l['am'].encode('utf-8') + "</am>"
    print "            <pm>"       + l['pm'].encode('utf-8') + "</pm>"
    print "            <firstDayOfWeek>"  + l['firstDayOfWeek'].encode('utf-8') + "</firstDayOfWeek>"
    print "            <weekendStart>"  + l['weekendStart'].encode('utf-8') + "</weekendStart>"
    print "            <weekendEnd>"  + l['weekendEnd'].encode('utf-8') + "</weekendEnd>"
    print "            <longDateFormat>"  + l['longDateFormat'].encode('utf-8')  + "</longDateFormat>"
    print "            <shortDateFormat>" + l['shortDateFormat'].encode('utf-8') + "</shortDateFormat>"
    print "            <longTimeFormat>"  + l['longTimeFormat'].encode('utf-8')  + "</longTimeFormat>"
    print "            <shortTimeFormat>" + l['shortTimeFormat'].encode('utf-8') + "</shortTimeFormat>"
    print "            <standaloneLongMonths>" + l['standaloneLongMonths'].encode('utf-8')      + "</standaloneLongMonths>"
    print "            <standaloneShortMonths>"+ l['standaloneShortMonths'].encode('utf-8')      + "</standaloneShortMonths>"
    print "            <standaloneNarrowMonths>"+ l['standaloneNarrowMonths'].encode('utf-8')      + "</standaloneNarrowMonths>"
    print "            <longMonths>"      + l['longMonths'].encode('utf-8')      + "</longMonths>"
    print "            <shortMonths>"     + l['shortMonths'].encode('utf-8')     + "</shortMonths>"
    print "            <narrowMonths>"     + l['narrowMonths'].encode('utf-8')     + "</narrowMonths>"
    print "            <longDays>"        + l['longDays'].encode('utf-8')        + "</longDays>"
    print "            <shortDays>"       + l['shortDays'].encode('utf-8')       + "</shortDays>"
    print "            <narrowDays>"       + l['narrowDays'].encode('utf-8')       + "</narrowDays>"
    print "            <standaloneLongDays>" + l['standaloneLongDays'].encode('utf-8')        + "</standaloneLongDays>"
    print "            <standaloneShortDays>" + l['standaloneShortDays'].encode('utf-8')       + "</standaloneShortDays>"
    print "            <standaloneNarrowDays>" + l['standaloneNarrowDays'].encode('utf-8')       + "</standaloneNarrowDays>"
    print "            <currencyIsoCode>" + l['currencyIsoCode'].encode('utf-8') + "</currencyIsoCode>"
    print "            <currencySymbol>" + l['currencySymbol'].encode('utf-8') + "</currencySymbol>"
    print "            <currencyDisplayName>" + l['currencyDisplayName'].encode('utf-8') + "</currencyDisplayName>"
    print "            <currencyDigits>" + str(l['currencyDigits']) + "</currencyDigits>"
    print "            <currencyRounding>" + str(l['currencyRounding']) + "</currencyRounding>"
    print "            <currencyFormat>" + l['currencyFormat'].encode('utf-8') + "</currencyFormat>"
    print "            <currencyNegativeFormat>" + l['currencyNegativeFormat'].encode('utf-8') + "</currencyNegativeFormat>"
    print "        </locale>"
print "    </localeList>"
print "</localeDatabase>"