diff options
author | Denis Dzyubenko <denis.dzyubenko@nokia.com> | 2011-02-28 17:48:36 (GMT) |
---|---|---|
committer | Denis Dzyubenko <denis.dzyubenko@nokia.com> | 2011-03-02 14:56:44 (GMT) |
commit | 440394d05989e1d20f82071045106117e907a378 (patch) | |
tree | 5e73663e33a12c70c3352f8e63f1293d2d79086a | |
parent | 010471faf918cfab75032a569aab8e510e47a72a (diff) | |
download | Qt-440394d05989e1d20f82071045106117e907a378.zip Qt-440394d05989e1d20f82071045106117e907a378.tar.gz Qt-440394d05989e1d20f82071045106117e907a378.tar.bz2 |
Added writing script support to QLocale.
Task-number: QTBUG-17105
Reviewed-by: trustme
-rw-r--r-- | src/corelib/tools/qlocale.cpp | 427 | ||||
-rw-r--r-- | src/corelib/tools/qlocale.h | 23 | ||||
-rw-r--r-- | src/corelib/tools/qlocale.qdoc | 1 | ||||
-rw-r--r-- | src/corelib/tools/qlocale_mac.mm | 15 | ||||
-rw-r--r-- | src/corelib/tools/qlocale_p.h | 28 | ||||
-rw-r--r-- | src/corelib/tools/qlocale_symbian.cpp | 8 | ||||
-rw-r--r-- | src/corelib/tools/qlocale_unix.cpp | 17 | ||||
-rw-r--r-- | src/corelib/tools/qlocale_win.cpp | 10 | ||||
-rw-r--r-- | tests/auto/qlocale/tst_qlocale.cpp | 39 | ||||
-rw-r--r-- | tests/manual/qlocale/window.cpp | 28 | ||||
-rwxr-xr-x | util/local_database/cldr2qlocalexml.py | 31 | ||||
-rw-r--r-- | util/local_database/enumdata.py | 20 | ||||
-rwxr-xr-x | util/local_database/qlocalexml2cpp.py | 148 |
13 files changed, 613 insertions, 182 deletions
diff --git a/src/corelib/tools/qlocale.cpp b/src/corelib/tools/qlocale.cpp index 0a67daf..efa1f6c 100644 --- a/src/corelib/tools/qlocale.cpp +++ b/src/corelib/tools/qlocale.cpp @@ -94,14 +94,14 @@ QT_BEGIN_INCLUDE_NAMESPACE #include "qlocale_data_p.h" QT_END_INCLUDE_NAMESPACE -// Assumes that code is a -// QChar code[3]; -// If the code is two-digit the third digit must be 0 -QLocale::Language codeToLanguage(const QChar *code) +QLocale::Language QLocalePrivate::codeToLanguage(const QString &code) { - ushort uc1 = code[0].unicode(); - ushort uc2 = code[1].unicode(); - ushort uc3 = code[2].unicode(); + int len = code.length(); + if (len != 2 && len != 3) + return QLocale::C; + ushort uc1 = len-- > 0 ? code[0].toLower().unicode() : 0; + ushort uc2 = len-- > 0 ? code[1].toLower().unicode() : 0; + ushort uc3 = len-- > 0 ? code[2].toLower().unicode() : 0; if (uc1 == 'n' && uc2 == 'o' && uc3 == 0) uc2 = 'b'; @@ -115,13 +115,34 @@ QLocale::Language codeToLanguage(const QChar *code) return QLocale::C; } -// Assumes that code is a -// QChar code[3]; -QLocale::Country codeToCountry(const QChar *code) +QLocale::Script QLocalePrivate::codeToScript(const QString &code) { - ushort uc1 = code[0].unicode(); - ushort uc2 = code[1].unicode(); - ushort uc3 = code[2].unicode(); + int len = code.length(); + if (len != 4) + return QLocale::AnyScript; + + // script is titlecased in our data + unsigned char c0 = code.at(0).toUpper().toLatin1(); + unsigned char c1 = code.at(1).toLower().toLatin1(); + unsigned char c2 = code.at(2).toLower().toLatin1(); + unsigned char c3 = code.at(3).toLower().toLatin1(); + + const unsigned char *c = script_code_list; + for (int i = 0; i < QLocale::LastScript; ++i, c += 4) { + if (c0 == c[0] && c1 == c[1] && c2 == c[2] && c3 == c[3]) + return QLocale::Script(i); + } + return QLocale::AnyScript; +} + +QLocale::Country QLocalePrivate::codeToCountry(const QString &code) +{ + int len = code.length(); + if (len != 2 && len != 3) + return QLocale::AnyCountry; + ushort uc1 = len-- > 0 ? code[0].toUpper().unicode() : 0; + ushort uc2 = len-- > 0 ? code[1].toUpper().unicode() : 0; + ushort uc3 = len-- > 0 ? code[2].toUpper().unicode() : 0; const unsigned char *c = country_code_list; for (; *c != 0; c += 3) { @@ -132,12 +153,14 @@ QLocale::Country codeToCountry(const QChar *code) return QLocale::AnyCountry; } -QString languageToCode(QLocale::Language language) +QString QLocalePrivate::languageCode() const { - if (language == QLocale::C) + if (m_language_id == QLocale::AnyLanguage) + return QString(); + if (m_language_id == QLocale::C) return QLatin1String("C"); - const unsigned char *c = language_code_list + 3*(uint(language)); + const unsigned char *c = language_code_list + 3*(uint(m_language_id)); QString code(c[2] == 0 ? 2 : 3, Qt::Uninitialized); @@ -149,12 +172,20 @@ QString languageToCode(QLocale::Language language) return code; } -QString countryToCode(QLocale::Country country) +QString QLocalePrivate::scriptCode() const { - if (country == QLocale::AnyCountry) + if (m_script_id == QLocale::AnyScript || m_script_id > QLocale::LastScript) return QString(); + const unsigned char *c = script_code_list + 4*(uint(m_script_id)); + return QString::fromLatin1((const char *)c, 4); +} - const unsigned char *c = country_code_list + 3*(uint(country)); +QString QLocalePrivate::countryCode() const +{ + if (m_country_id == QLocale::AnyCountry) + return QString(); + + const unsigned char *c = country_code_list + 3*(uint(m_country_id)); QString code(c[2] == 0 ? 2 : 3, Qt::Uninitialized); @@ -166,10 +197,46 @@ QString countryToCode(QLocale::Country country) return code; } -const QLocalePrivate *findLocale(QLocale::Language language, QLocale::Country country) +QString QLocalePrivate::bcp47Name() const { - unsigned language_id = language; - unsigned country_id = country; + if (m_language_id == QLocale::AnyLanguage) + return QString(); + if (m_language_id == QLocale::C) + return QLatin1String("C"); + const unsigned char *lang = language_code_list + 3*(uint(m_language_id)); + const unsigned char *script = + (m_script_id != QLocale::AnyScript ? script_code_list + 4*(uint(m_script_id)) : 0); + const unsigned char *country = + (m_country_id != QLocale::AnyCountry ? country_code_list + 3*(uint(m_country_id)) : 0); + char len = (lang[2] != 0 ? 3 : 2) + (script ? 4+1 : 0) + (country ? (country[2] != 0 ? 3 : 2)+1 : 0); + QString name(len, Qt::Uninitialized); + QChar *uc = name.data(); + *uc++ = ushort(lang[0]); + *uc++ = ushort(lang[1]); + if (lang[2] != 0) + *uc++ = ushort(lang[2]); + if (script) { + *uc++ = QLatin1Char('-'); + *uc++ = ushort(script[0]); + *uc++ = ushort(script[1]); + *uc++ = ushort(script[2]); + *uc++ = ushort(script[3]); + } + if (country) { + *uc++ = QLatin1Char('-'); + *uc++ = ushort(country[0]); + *uc++ = ushort(country[1]); + if (country[2] != 0) + *uc++ = ushort(country[2]); + } + return name; +} + +const QLocalePrivate *findLocale(QLocale::Language language, QLocale::Script script, QLocale::Country country) +{ + const unsigned language_id = language; + const unsigned script_id = script; + const unsigned country_id = country; uint idx = locale_index[language_id]; @@ -178,111 +245,133 @@ const QLocalePrivate *findLocale(QLocale::Language language, QLocale::Country co if (idx == 0) // default language has no associated country return d; - if (country == QLocale::AnyCountry) + if (script == QLocale::AnyScript && country == QLocale::AnyCountry) return d; Q_ASSERT(d->languageId() == language_id); - while (d->languageId() == language_id - && d->countryId() != country_id) - ++d; - - if (d->countryId() == country_id - && d->languageId() == language_id) - return d; + if (country == QLocale::AnyCountry) { + while (d->m_language_id == language_id && d->m_script_id != script_id) + ++d; + if (d->m_language_id == language_id && d->m_script_id == script_id) + return d; + } else if (script == QLocale::AnyScript) { + while (d->m_language_id == language_id) { + if (d->m_script_id == script_id && d->m_country_id == country_id) + return d; + ++d; + } + } else { + // both script and country are explicitely specified + while (d->m_language_id == language_id) { + if (d->m_script_id == script_id && d->m_country_id == country_id) + return d; + ++d; + } + } return locale_data + idx; } -bool splitLocaleName(const QString &name, - QChar *lang_begin, QChar *cntry_begin, - int *lang_len, int *cntry_len) +static bool parse_locale_tag(const QString &input, int &i, QString *result, const QString &separators) { - for (int i = 0; i < 3; ++i) - lang_begin[i] = 0; - for (int i = 0; i < 3; ++i) - cntry_begin[i] = 0; - if (lang_len) - *lang_len = 0; - if (cntry_len) - *cntry_len = 0; + *result = QString(8, Qt::Uninitialized); // worst case according to BCP47 + QChar *pch = result->data(); + const QChar *uc = input.data() + i; + const int l = input.length(); + int size = 0; + for (; i < l && size < 5; ++i, ++size) { + if (separators.contains(*uc)) + break; + if (uc->unicode() > 0xFF) // latin only + return false; + *pch++ = *uc++; + } + result->truncate(size); + return true; +} - int l = name.length(); +bool splitLocaleName(const QString &name, QString &lang, QString &script, QString &cntry) +{ + const int length = name.length(); - QChar *lang = lang_begin; - QChar *cntry = cntry_begin; + lang = script = cntry = QString(); - int state = 0; - const QChar *uc = name.unicode(); - for (int i = 0; i < l; ++i) { - if (uc->unicode() == '.' || uc->unicode() == '@') + const QString separators = QLatin1String("_-.@"); + enum ParserState { NoState, LangState, ScriptState, CountryState }; + ParserState state = LangState; + for (int i = 0; i < length && state != NoState; ) { + QString value; + if (!parse_locale_tag(name, i, &value, separators) ||value.isEmpty()) break; - + QChar sep = i < length ? name.at(i) : QChar(); switch (state) { - case 0: - // parsing language - if (uc->unicode() == '_' || uc->unicode() == '-') { - state = 1; - break; - } - if (lang - lang_begin == 3) - return false; - if (uc->unicode() < 'a' || uc->unicode() > 'z') - return false; - - *lang = *uc; - ++lang; - break; - case 1: - // parsing country - if (cntry - cntry_begin == 3) { - cntry_begin[0] = 0; - break; - } - - *cntry = *uc; - ++cntry; + case LangState: + lang = value; + if (i == length) { + // just language was specified + state = NoState; break; + } + state = ScriptState; + break; + case ScriptState: { + QString scripts = QString::fromLatin1((const char *)script_code_list, sizeof(script_code_list)); + if (value.length() == 4 && scripts.indexOf(value) % 4 == 0) { + // script name is always 4 characters + script = value; + state = CountryState; + } else { + // it wasn't a script, maybe it is a country then? + cntry = value; + state = NoState; + } + break; } - - ++uc; + case CountryState: + cntry = value; + state = NoState; + break; + case NoState: + // shouldn't happen + qWarning("QLocale: This should never happen"); + break; + } + ++i; } - - if (lang_len) - *lang_len = lang - lang_begin; - if (cntry_len) - *cntry_len = cntry - cntry_begin; - - int lang_length = lang - lang_begin; - return lang_length == 2 || lang_length == 3; + return lang.length() == 2 || lang.length() == 3; } -void getLangAndCountry(const QString &name, QLocale::Language &lang, QLocale::Country &cntry) +void QLocalePrivate::getLangAndCountry(const QString &name, QLocale::Language &lang, + QLocale::Script &script, QLocale::Country &cntry) { lang = QLocale::C; + script = QLocale::AnyScript; cntry = QLocale::AnyCountry; - QChar lang_code[3]; - QChar cntry_code[3]; - if (!splitLocaleName(name, lang_code, cntry_code)) + QString lang_code; + QString script_code; + QString cntry_code; + if (!splitLocaleName(name, lang_code, script_code, cntry_code)) return; - lang = codeToLanguage(lang_code); + lang = QLocalePrivate::codeToLanguage(lang_code); if (lang == QLocale::C) return; - - if (cntry_code[0].unicode() != 0) - cntry = codeToCountry(cntry_code); + script = QLocalePrivate::codeToScript(script_code); + cntry = QLocalePrivate::codeToCountry(cntry_code); } const QLocalePrivate *findLocale(const QString &name) { QLocale::Language lang; + QLocale::Script script; QLocale::Country cntry; - getLangAndCountry(name, lang, cntry); + QLocalePrivate::getLangAndCountry(name, lang, script, cntry); - return findLocale(lang, cntry); + return findLocale(lang, script, cntry); } + QString readEscapedFormatString(const QString &format, int *idx) { int &i = *idx; @@ -389,11 +478,18 @@ void QLocalePrivate::updateSystemPrivate() #endif QVariant res = sys_locale->query(QSystemLocale::LanguageId, QVariant()); - if (!res.isNull()) + if (!res.isNull()) { system_lp->m_language_id = res.toInt(); + system_lp->m_script_id = QLocale::AnyScript; // default for compatibility + } res = sys_locale->query(QSystemLocale::CountryId, QVariant()); - if (!res.isNull()) + if (!res.isNull()) { system_lp->m_country_id = res.toInt(); + system_lp->m_script_id = QLocale::AnyScript; // default for compatibility + } + res = sys_locale->query(QSystemLocale::ScriptId, QVariant()); + if (!res.isNull()) + system_lp->m_script_id = res.toInt(); res = sys_locale->query(QSystemLocale::DecimalPoint, QVariant()); if (!res.isNull()) @@ -510,11 +606,12 @@ static quint16 localePrivateIndex(const QLocalePrivate *p) /*! Constructs a QLocale object with the specified \a name, which has the format - "language[_-country][.codeset][@modifier]" or "C", where: + "language[_script][_country][.codeset][@modifier]" or "C", where: \list \i language is a lowercase, two-letter, ISO 639 language code, - \i territory is an uppercase, two-letter, ISO 3166 country code, + \i script is a titlecase, four-letter, ISO 15924 script code, + \i country is an uppercase, two- or three-letter, ISO 3166 country code (also "419" as defined by United Nations), \i and codeset and modifier are ignored. \endlist @@ -525,12 +622,12 @@ static quint16 localePrivateIndex(const QLocalePrivate *p) is not present, or is not a valid ISO 3166 code, the most appropriate country is chosen for the specified language. - The language and country codes are converted to their respective - \c Language and \c Country enums. After this conversion is - performed the constructor behaves exactly like QLocale(Country, + The language, script and country codes are converted to their respective + \c Language, \c Script and \c Country enums. After this conversion is + performed the constructor behaves exactly like QLocale(Country, Script, Language). - This constructor is much slower than QLocale(Country, Language). + This constructor is much slower than QLocale(Country, Script, Language). \sa name() */ @@ -579,7 +676,46 @@ QLocale::QLocale() QLocale::QLocale(Language language, Country country) : v(0) { - const QLocalePrivate *d = findLocale(language, country); + const QLocalePrivate *d = findLocale(language, QLocale::AnyScript, country); + + // If not found, should default to system + if (d->languageId() == QLocale::C && language != QLocale::C) { + p.numberOptions = default_number_options; + p.index = localePrivateIndex(defaultPrivate()); + } else { + p.numberOptions = 0; + p.index = localePrivateIndex(d); + } +} +\ +/*! + \since 4.8 + + Constructs a QLocale object with the specified \a language, \a script and + \a country. + + \list + \i If the language/script/country is found in the database, it is used. + \i If both \a script is AnyScript and \a country is AnyCountry, the + language is used with the most appropriate available script and country + (for example, Germany for German), + \i If either \a script is AnyScript or \a country is AnyCountry, the + language is used with the first locale that matches the given \a script + and \a country. + \i If neither the language nor the country are found, QLocale + defaults to the default locale (see setDefault()). + \endlist + + The language, script and country that are actually used can be queried + using language(), script() and country(). + + \sa setDefault() language() script() country() +*/ + +QLocale::QLocale(Language language, Script script, Country country) + : v(0) +{ + const QLocalePrivate *d = findLocale(language, script, country); // If not found, should default to system if (d->languageId() == QLocale::C && language != QLocale::C) { @@ -708,6 +844,11 @@ QLocale::Language QLocale::language() const return Language(d()->languageId()); } +QLocale::Script QLocale::script() const +{ + return Script(d()->m_script_id); +} + /*! Returns the country of this locale. @@ -722,16 +863,20 @@ QLocale::Country QLocale::country() const Returns the language and country of this locale as a string of the form "language_country", where language is a lowercase, two-letter ISO 639 language code, - and country is an uppercase, two-letter ISO 3166 country code. + and country is an uppercase, two- or three-letter ISO 3166 country code. - \sa language(), country() + Note that even if QLocale object was constructed with a specific script, + name() will ignore it for compatibility reasons. Use bcp47Name() instead + if you need a full locale name. + + \sa QLocale(const QString &), language(), country(), bcp47Name() */ QString QLocale::name() const { Language l = language(); - QString result = languageToCode(l); + QString result = d()->languageCode(); if (l == C) return result; @@ -741,15 +886,30 @@ QString QLocale::name() const return result; result.append(QLatin1Char('_')); - result.append(countryToCode(c)); + result.append(d()->countryCode()); return result; } /*! + \since 4.8 + + Returns the dash-separated language, script and country (and possibly other BCP47 fields) + of this locale as a string. + + This function tries to conform the locale name to BCP47. + + \sa language(), country(), script() +*/ +QString QLocale::bcp47Name() const +{ + return d()->bcp47Name(); +} + +/*! Returns a QString containing the name of \a language. - \sa countryToString(), name() + \sa countryToString(), scriptToString(), bcp47Name() */ QString QLocale::languageToString(Language language) @@ -762,7 +922,7 @@ QString QLocale::languageToString(Language language) /*! Returns a QString containing the name of \a country. - \sa country(), name() + \sa languageToString(), scriptToString(), country(), bcp47Name() */ QString QLocale::countryToString(Country country) @@ -773,6 +933,20 @@ QString QLocale::countryToString(Country country) } /*! + \since 4.8 + + Returns a QString containing the name of \a script. + + \sa languageToString(), countryToString(), script(), bcp47Name() +*/ +QString QLocale::scriptToString(QLocale::Script script) +{ + if (uint(script) > uint(QLocale::LastScript)) + return QLatin1String("Unknown"); + return QLatin1String(script_name_list + script_name_index[script]); +} + +/*! Returns the short int represented by the localized string \a s, using base \a base. If \a base is 0 the base is determined automatically using the following rules: If the string begins with @@ -1579,12 +1753,47 @@ QLocale QLocale::system() return result; } + +/*! + \since 4.8 + + Returns the list of valid locale names that match the given \a language, \a + script and \a country. + + Getting a list of all locales: + QStringList allLocales = QLocale::matchingLocales(QLocale::AnyLanguage, QLocale::AnyScript, QLocale::AnyCountry); +*/ +QStringList QLocale::matchingLocales(QLocale::Language language, + QLocale::Script script, + QLocale::Country country) +{ + if (uint(language) > QLocale::LastLanguage || uint(script) > QLocale::LastScript || + uint(country) > QLocale::LastCountry) + return QStringList(); + + QStringList result; + const QLocalePrivate *d = locale_data; + if (language == QLocale::AnyLanguage && script == QLocale::AnyScript && country == QLocale::AnyCountry) + result.reserve(locale_data_size); + if (language != QLocale::C) + d += locale_index[language]; + while ( (d != locale_data + locale_data_size) + && (language == QLocale::AnyLanguage || d->m_language_id == uint(language))) { + result.append(d->bcp47Name()); + ++d; + } + return result; +} + /*! + \obsolete \since 4.3 Returns the list of countries that have entires for \a language in Qt's locale database. If the result is an empty list, then \a language is not represented in Qt's locale database. + + \sa matchingLocales */ QList<QLocale::Country> QLocale::countriesForLanguage(Language language) { @@ -2939,7 +3148,7 @@ QString QLocale::toCurrencyString(double value, const QString &symbol) const Returns a sorted list of locale names that could be used for translation of messages presented to the user. - \sa QTranslator + \sa QTranslator, bcp47Name() */ QStringList QLocale::uiLanguages() const { @@ -2953,7 +3162,7 @@ QStringList QLocale::uiLanguages() const } } #endif - return QStringList(name()); + return QStringList(bcp47Name()); } QT_END_NAMESPACE diff --git a/src/corelib/tools/qlocale.h b/src/corelib/tools/qlocale.h index 280e040..d5c41d0 100644 --- a/src/corelib/tools/qlocale.h +++ b/src/corelib/tools/qlocale.h @@ -112,7 +112,8 @@ public: CurrencyToString, // QString in: qlonglong, qulonglong or double UILanguages, // QStringList StringToStandardQuotation, // QString in: QStringRef to quote - StringToAlternateQuotation // QString in: QStringRef to quote + StringToAlternateQuotation, // QString in: QStringRef to quote + ScriptId // uint }; virtual QVariant query(QueryType type, QVariant in) const; virtual QLocale fallbackLocale() const; @@ -140,6 +141,7 @@ public: // GENERATED PART STARTS HERE // see qlocale_data_p.h for more info on generated data enum Language { + AnyLanguage = 0, C = 1, Abkhazian = 2, Afan = 3, @@ -359,6 +361,19 @@ public: LastLanguage = Shambala }; + enum Script { + AnyScript = 0, + ArabicScript = 1, + CyrillicScript = 2, + DeseretScript = 3, + GurmukhiScript = 4, + SimplifiedHanScript = 5, + TraditionalHanScript = 6, + LatinScript = 7, + MongolianScript = 8, + TifinaghScript = 9, + LastScript = TifinaghScript + }; enum Country { AnyCountry = 0, Afghanistan = 1, @@ -629,14 +644,18 @@ public: QLocale(); QLocale(const QString &name); QLocale(Language language, Country country = AnyCountry); + QLocale(Language language, Script script, Country country); QLocale(const QLocale &other); QLocale &operator=(const QLocale &other); Language language() const; + Script script() const; Country country() const; QString name() const; + QString bcp47Name() const; + short toShort(const QString &s, bool *ok = 0, int base = 0) const; ushort toUShort(const QString &s, bool *ok = 0, int base = 0) const; int toInt(const QString &s, bool *ok = 0, int base = 0) const; @@ -716,11 +735,13 @@ public: static QString languageToString(Language language); static QString countryToString(Country country); + static QString scriptToString(Script script); static void setDefault(const QLocale &locale); static QLocale c() { return QLocale(C); } static QLocale system(); + static QStringList matchingLocales(QLocale::Language language, QLocale::Script script, QLocale::Country country); static QList<Country> countriesForLanguage(Language lang); void setNumberOptions(NumberOptions options); diff --git a/src/corelib/tools/qlocale.qdoc b/src/corelib/tools/qlocale.qdoc index dcabcd9..4f5e0e5 100644 --- a/src/corelib/tools/qlocale.qdoc +++ b/src/corelib/tools/qlocale.qdoc @@ -707,6 +707,7 @@ the type of information to return from the query() method is listed. \value LanguageId a uint specifying the language. + \value ScriptId a uint specifying the script. \value CountryId a uint specifying the country. \value DecimalPoint a QString specifying the decimal point. \value GroupSeparator a QString specifying the group separator. diff --git a/src/corelib/tools/qlocale_mac.mm b/src/corelib/tools/qlocale_mac.mm index 8a527e4..ba7ca7d 100644 --- a/src/corelib/tools/qlocale_mac.mm +++ b/src/corelib/tools/qlocale_mac.mm @@ -73,10 +73,9 @@ static QByteArray getMacLocaleName() { QByteArray result = envVarLocale(); - QChar lang[3]; - QChar cntry[3]; + QString lang, script, cntry; if (result.isEmpty() || result != "C" - && !splitLocaleName(QString::fromLocal8Bit(result), lang, cntry)) { + && !splitLocaleName(QString::fromLocal8Bit(result), lang, script, cntry)) { QCFType<CFLocaleRef> l = CFLocaleCopyCurrent(); CFStringRef locale = CFLocaleGetIdentifier(l); result = QCFString::toQString(locale).toUtf8(); @@ -436,15 +435,17 @@ QVariant QSystemLocale::query(QueryType type, QVariant in = QVariant()) const case LanguageId: case CountryId: { QString preferredLanguage; - QString preferredCountry(3, QChar()); // codeToCountry assumes QChar[3] + QString preferredCountry; getMacPreferredLanguageAndCountry(&preferredLanguage, &preferredCountry); - QLocale::Language languageCode = (preferredLanguage.isEmpty() ? QLocale::C : codeToLanguage(preferredLanguage.data())); - QLocale::Country countryCode = (preferredCountry.isEmpty() ? QLocale::AnyCountry : codeToCountry(preferredCountry.data())); - const QLocalePrivate *d = findLocale(languageCode, countryCode); + QLocale::Language languageCode = (preferredLanguage.isEmpty() ? QLocale::C : QLocalePrivate::codeToLanguage(preferredLanguage)); + QLocale::Country countryCode = (preferredCountry.isEmpty() ? QLocale::AnyCountry : QLocalePrivate::codeToCountry(preferredCountry)); + const QLocalePrivate *d = findLocale(languageCode, QLocale::AnyScript, countryCode); if (type == LanguageId) return (QLocale::Language)d->languageId(); return (QLocale::Country)d->countryId(); } + case ScriptId: + return QVariant(QLocale::AnyScript); case MeasurementSystem: return QVariant(static_cast<int>(macMeasurementSystem())); diff --git a/src/corelib/tools/qlocale_p.h b/src/corelib/tools/qlocale_p.h index 2e95096..1f73982 100644 --- a/src/corelib/tools/qlocale_p.h +++ b/src/corelib/tools/qlocale_p.h @@ -77,8 +77,20 @@ public: QChar minus() const { return QChar(m_minus); } QChar exponential() const { return QChar(m_exponential); } - quint32 languageId() const { return m_language_id; } - quint32 countryId() const { return m_country_id; } + quint16 languageId() const { return m_language_id; } + quint16 countryId() const { return m_country_id; } + + QString bcp47Name() const; + + QString languageCode() const; // ### QByteArray::fromRawData would be more optimal + QString scriptCode() const; + QString countryCode() const; + + static QLocale::Language codeToLanguage(const QString &code); + static QLocale::Script codeToScript(const QString &code); + static QLocale::Country codeToCountry(const QString &code); + static void getLangAndCountry(const QString &name, QLocale::Language &lang, + QLocale::Script &script, QLocale::Country &cntry); QLocale::MeasurementSystem measurementSystem() const; @@ -161,7 +173,7 @@ public: QString dateTimeToString(const QString &format, const QDate *date, const QTime *time, const QLocale *q) const; - quint16 m_language_id, m_country_id; + quint16 m_language_id, m_script_id, m_country_id; quint16 m_decimal, m_group, m_list, m_percent, m_zero, m_minus, m_plus, m_exponential; @@ -248,15 +260,13 @@ private: }; #endif -const QLocalePrivate *findLocale(QLocale::Language language, QLocale::Country country); +const QLocalePrivate *findLocale(QLocale::Language language, + QLocale::Script script, + QLocale::Country country); const QLocalePrivate *findLocale(const QString &name); QString readEscapedFormatString(const QString &format, int *idx); -bool splitLocaleName(const QString &name, QChar *lang_begin, QChar *cntry_begin, - int *lang_len = 0, int *cntry_len = 0); +bool splitLocaleName(const QString &name, QString &lang, QString &script, QString &cntry); int repeatCount(const QString &s, int i); -QLocale::Language codeToLanguage(const QChar *code); -QLocale::Country codeToCountry(const QChar *code); -void getLangAndCountry(const QString &name, QLocale::Language &lang, QLocale::Country &cntry); QT_END_NAMESPACE diff --git a/src/corelib/tools/qlocale_symbian.cpp b/src/corelib/tools/qlocale_symbian.cpp index 85ffb9e..09d74d0 100644 --- a/src/corelib/tools/qlocale_symbian.cpp +++ b/src/corelib/tools/qlocale_symbian.cpp @@ -57,9 +57,6 @@ QT_BEGIN_NAMESPACE -// Located in qlocale.cpp -extern void getLangAndCountry(const QString &name, QLocale::Language &lang, QLocale::Country &cntry); - static TExtendedLocale _s60Locale; // Type definitions for runtime resolved function pointers @@ -872,8 +869,9 @@ QVariant QSystemLocale::query(QueryType type, QVariant in = QVariant()) const TLanguage language = User::Language(); QString locale = QLatin1String(qt_symbianLocaleName(language)); QLocale::Language lang; + QLocale::Script script; QLocale::Country cntry; - getLangAndCountry(locale, lang, cntry); + QLocalePrivate::getLangAndCountry(locale, lang, script, cntry); if (type == LanguageId) return lang; // few iso codes have no country and will use this @@ -882,6 +880,8 @@ QVariant QSystemLocale::query(QueryType type, QVariant in = QVariant()) const return cntry; } + case ScriptId: + return QVariant(QLocale::AnyScript); case NegativeSign: case PositiveSign: break; diff --git a/src/corelib/tools/qlocale_unix.cpp b/src/corelib/tools/qlocale_unix.cpp index c4aafa0..3701b2b 100644 --- a/src/corelib/tools/qlocale_unix.cpp +++ b/src/corelib/tools/qlocale_unix.cpp @@ -191,9 +191,8 @@ QVariant QSystemLocale::query(QueryType type, QVariant in) const QStringList lst = languages.split(QLatin1Char(':')); for (int i = 0; i < lst.size();) { const QString &name = lst.at(i); - QChar lang[3]; - QChar cntry[3]; - if (name.isEmpty() || !splitLocaleName(name, lang, cntry)) + QString lang, script, cntry; + if (name.isEmpty() || !splitLocaleName(name, lang, script, cntry)) lst.removeAt(i); else ++i; @@ -201,14 +200,12 @@ QVariant QSystemLocale::query(QueryType type, QVariant in) const return lst; } if (!d->lc_messages_var.isEmpty()) { - QChar lang[3]; - QChar cntry[3]; - int lang_len, cntry_len; + QString lang, script, cntry; if (splitLocaleName(QString::fromLatin1(d->lc_messages_var.constData(), d->lc_messages_var.size()), - lang, cntry, &lang_len, &cntry_len)) { - if (!cntry_len && lang_len) - return QStringList(QString(lang, lang_len)); - return QStringList(QString(lang, lang_len) % QLatin1Char('-') % QString(cntry, cntry_len)); + lang, script, cntry)) { + if (!cntry.length() && lang.length()) + return QStringList(lang); + return QStringList(lang % QLatin1Char('-') % cntry); } } return QVariant(); diff --git a/src/corelib/tools/qlocale_win.cpp b/src/corelib/tools/qlocale_win.cpp index d62c4eb..10e9ba1 100644 --- a/src/corelib/tools/qlocale_win.cpp +++ b/src/corelib/tools/qlocale_win.cpp @@ -113,10 +113,9 @@ QByteArray getWinLocaleName(LCID id = LOCALE_USER_DEFAULT) QByteArray result; if (id == LOCALE_USER_DEFAULT) { result = envVarLocale(); - QChar lang[3]; - QChar cntry[3]; + QString lang, script, cntry; if ( result == "C" || (!result.isEmpty() - && splitLocaleName(QString::fromLocal8Bit(result), lang, cntry)) ) { + && splitLocaleName(QString::fromLocal8Bit(result), lang, script, cntry)) ) { long id = 0; bool ok = false; id = qstrtoll(result.data(), 0, 0, &ok); @@ -562,14 +561,17 @@ QVariant QSystemLocale::query(QueryType type, QVariant in = QVariant()) const case CountryId: { QString locale = QString::fromLatin1(getWinLocaleName()); QLocale::Language lang; + QLocale::Script script; QLocale::Country cntry; - getLangAndCountry(locale, lang, cntry); + getLangAndCountry(locale, lang, script, cntry); if (type == LanguageId) return lang; if (cntry == QLocale::AnyCountry) return fallbackLocale().country(); return cntry; } + case ScriptId: + return QVariant(QLocale::AnyScript); case MeasurementSystem: return QVariant(static_cast<int>(winSystemMeasurementSystem())); diff --git a/tests/auto/qlocale/tst_qlocale.cpp b/tests/auto/qlocale/tst_qlocale.cpp index 7a3e339..cb69d54 100644 --- a/tests/auto/qlocale/tst_qlocale.cpp +++ b/tests/auto/qlocale/tst_qlocale.cpp @@ -185,6 +185,7 @@ void tst_QLocale::ctor() QCOMPARE(l.language(), QLocale::C); QCOMPARE(l.country(), QLocale::AnyCountry); } + TEST_CTOR(AnyLanguage, AnyCountry, default_lang, default_country) TEST_CTOR(C, AnyCountry, QLocale::C, QLocale::AnyCountry) TEST_CTOR(Aymara, AnyCountry, default_lang, default_country) TEST_CTOR(Aymara, France, default_lang, default_country) @@ -310,6 +311,7 @@ void tst_QLocale::ctor() TEST_CTOR("en@", English, UnitedStates) TEST_CTOR("en.@", English, UnitedStates) TEST_CTOR("en_", English, UnitedStates) + TEST_CTOR("en_U", English, UnitedStates) TEST_CTOR("en_.", English, UnitedStates) TEST_CTOR("en_.@", English, UnitedStates) TEST_CTOR("en.bla", English, UnitedStates) @@ -335,11 +337,35 @@ void tst_QLocale::ctor() TEST_CTOR("es-419", Spanish, LatinAmericaAndTheCaribbean) // test default countries for languages + TEST_CTOR("zh", Chinese, China) + TEST_CTOR("zh-Hans", Chinese, China) TEST_CTOR("mn", Mongolian, Mongolia) TEST_CTOR("ne", Nepali, Nepal) #undef TEST_CTOR +#define TEST_CTOR(req_lc, exp_lang, exp_script, exp_country) \ + { \ + QLocale l(req_lc); \ + QVERIFY2(l.language() == QLocale::exp_lang \ + && l.script() == QLocale::exp_script \ + && l.country() == QLocale::exp_country, \ + QString("requested: \"" + QString(req_lc) + "\", got: " \ + + QLocale::languageToString(l.language()) \ + + "/" + QLocale::scriptToString(l.script()) \ + + "/" + QLocale::countryToString(l.country())).toLatin1().constData()); \ + } + + TEST_CTOR("zh_CN", Chinese, AnyScript, China) + TEST_CTOR("zh_Hans_CN", Chinese, SimplifiedHanScript, China) + TEST_CTOR("zh_Hans", Chinese, SimplifiedHanScript, China) + TEST_CTOR("zh_Hant", Chinese, TraditionalHanScript, HongKong) + TEST_CTOR("zh_Hans_MO", Chinese, SimplifiedHanScript, Macau) + TEST_CTOR("zh_Hant_MO", Chinese, TraditionalHanScript, Macau) + TEST_CTOR("az_Latn_AZ", Azerbaijani, LatinScript, Azerbaijan) + TEST_CTOR("ha_Arab_NG", Hausa, ArabicScript, Nigeria) + TEST_CTOR("ha_Latn_NG", Hausa, LatinScript, Nigeria) +#undef TEST_CTOR } void tst_QLocale::emptyCtor() @@ -412,11 +438,12 @@ void tst_QLocale::emptyCtor() TEST_CTOR("nb_NO", "nb_NO") TEST_CTOR("nn_NO", "nn_NO") + TEST_CTOR("DE", "de_DE"); + TEST_CTOR("EN", "en_US"); + TEST_CTOR("en/", defaultLoc) - TEST_CTOR("DE", defaultLoc); TEST_CTOR("asdfghj", defaultLoc); TEST_CTOR("123456", defaultLoc); - TEST_CTOR("EN", defaultLoc); #undef TEST_CTOR #endif @@ -2086,6 +2113,10 @@ void tst_QLocale::monthName() QCOMPARE(ru.monthName(1, QLocale::LongFormat), QString::fromUtf8("\321\217\320\275\320\262\320\260\321\200\321\217")); QCOMPARE(ru.monthName(1, QLocale::ShortFormat), QString::fromUtf8("\321\217\320\275\320\262\56")); QCOMPARE(ru.monthName(1, QLocale::NarrowFormat), QString::fromUtf8("\320\257")); + + // check that our CLDR scripts handle surrogate pairs correctly + QLocale dsrt("en-Dsrt-US"); + QCOMPARE(dsrt.monthName(1, QLocale::LongFormat), QString::fromUtf8("\xf0\x90\x90\x96\xf0\x90\x90\xb0\xf0\x90\x91\x8c\xf0\x90\x90\xb7\xf0\x90\x90\xad\xf0\x90\x90\xaf\xf0\x90\x91\x89\xf0\x90\x90\xa8")); } void tst_QLocale::standaloneMonthName() @@ -2191,11 +2222,11 @@ void tst_QLocale::uiLanguages() const QLocale en_US("en_US"); QCOMPARE(en_US.uiLanguages().size(), 1); - QCOMPARE(en_US.uiLanguages().at(0), QLatin1String("en_US")); + QCOMPARE(en_US.uiLanguages().at(0), QLatin1String("en-US")); const QLocale ru_RU("ru_RU"); QCOMPARE(ru_RU.uiLanguages().size(), 1); - QCOMPARE(ru_RU.uiLanguages().at(0), QLatin1String("ru_RU")); + QCOMPARE(ru_RU.uiLanguages().at(0), QLatin1String("ru-RU")); } void tst_QLocale::weekendDays() diff --git a/tests/manual/qlocale/window.cpp b/tests/manual/qlocale/window.cpp index ed66c57..fae88a3 100644 --- a/tests/manual/qlocale/window.cpp +++ b/tests/manual/qlocale/window.cpp @@ -46,19 +46,19 @@ Window::Window() localeCombo->addItem("System", QLocale::system()); - int index = 0; - for (int _lang = QLocale::C; _lang <= QLocale::LastLanguage; ++_lang) { - QLocale::Language lang = static_cast<QLocale::Language>(_lang); - QList<QLocale::Country> countries = QLocale::countriesForLanguage(lang); - for (int i = 0; i < countries.count(); ++i) { - QLocale::Country country = countries.at(i); - QString label = QLocale::languageToString(lang); + QStringList locales = QLocale::matchingLocales(QLocale::AnyLanguage, QLocale::AnyScript, QLocale::AnyCountry); + foreach (const QString &name, locales) { + QLocale locale(name); + QString label = QLocale::languageToString(locale.language()); + label += QLatin1Char('/'); + if (locale.script() != QLocale::AnyScript) { + label += QLocale::scriptToString(locale.script()); label += QLatin1Char('/'); - label += QLocale::countryToString(country); - localeCombo->addItem(label, QLocale(lang, country)); - ++index; } + label += QLocale::countryToString(locale.country()); + localeCombo->addItem(label, locale); } + connect(localeCombo, SIGNAL(currentIndexChanged(int)), this, SLOT(localeChanged(int))); @@ -102,15 +102,19 @@ void Window::systemLocaleChanged() { QLocale l = QLocale::system(); QString lang = QLocale::languageToString(l.language()); + QString script = QLocale::scriptToString(l.script()); QString country = QLocale::countryToString(l.country()); - localeCombo->setItemText(0, QString("System: %1/%2").arg(lang, country)); + if (l.script() != QLocale::AnyScript) + localeCombo->setItemText(0, QString("System: %1-%2-%3").arg(lang, script, country)); + else + localeCombo->setItemText(0, QString("System: %1-%2").arg(lang, country)); emit localeChanged(0); } void Window::localeChanged(int idx) { QLocale locale = localeCombo->itemData(idx).toLocale(); - localeName->setText(QString("Locale: %1").arg(locale.name())); + localeName->setText(QString("Locale: %1 (%2)").arg(locale.bcp47Name(), locale.name())); emit localeChanged(locale); } diff --git a/util/local_database/cldr2qlocalexml.py b/util/local_database/cldr2qlocalexml.py index c70cf18..fb0e5db 100755 --- a/util/local_database/cldr2qlocalexml.py +++ b/util/local_database/cldr2qlocalexml.py @@ -120,8 +120,10 @@ def generateLocaleInfo(path): if not country_code: return {} - # we do not support scripts and variants - if variant_code or script_code: + # we do not support variants + # ### actually there is only one locale with variant: en_US_POSIX + # does anybody care about it at all? + if variant_code: return {} language_id = enumdata.languageCodeToId(language_code) @@ -130,6 +132,14 @@ def generateLocaleInfo(path): return {} language = enumdata.language_list[language_id][0] + script_id = enumdata.scriptCodeToId(script_code) + if script_code == -1: + sys.stderr.write("unnknown script code \"" + script_code + "\"\n") + return {} + script = "AnyScript" + if script_id != -1: + script = enumdata.script_list[script_id][0] + country_id = enumdata.countryCodeToId(country_code) country = "" if country_id != -1: @@ -146,12 +156,14 @@ def generateLocaleInfo(path): result = {} result['language'] = language + result['script'] = script result['country'] = country result['language_code'] = language_code result['country_code'] = country_code result['script_code'] = script_code result['variant_code'] = variant_code result['language_id'] = language_id + result['script_id'] = script_id result['country_id'] = country_id supplementalPath = dir_name + "/../supplemental/supplementalData.xml" @@ -511,7 +523,7 @@ for file in cldr_files: sys.stderr.write("skipping file \"" + file + "\"\n") continue - locale_database[(l['language_id'], l['country_id'], l['script_code'], l['variant_code'])] = l + locale_database[(l['language_id'], l['script_id'], l['country_id'], l['variant_code'])] = l integrateWeekData(cldr_dir+"/../supplemental/supplementalData.xml") locale_keys = locale_database.keys() @@ -535,6 +547,16 @@ for id in enumdata.language_list: print " </language>" print " </languageList>" +print " <scriptList>" +for id in enumdata.script_list: + l = enumdata.script_list[id] + print " <script>" + print " <name>" + l[0] + "</name>" + print " <id>" + str(id) + "</id>" + print " <code>" + l[1] + "</code>" + print " </script>" +print " </scriptList>" + print " <countryList>" for id in enumdata.country_list: l = enumdata.country_list[id] @@ -653,6 +675,7 @@ print " <localeList>" print \ " <locale>\n\ <language>C</language>\n\ + <script>AnyScript</script>\n\ <country>AnyCountry</country>\n\ <decimal>46</decimal>\n\ <group>44</group>\n\ @@ -701,8 +724,10 @@ for key in locale_keys: print " <locale>" print " <language>" + l['language'] + "</language>" + print " <script>" + l['script'] + "</script>" print " <country>" + l['country'] + "</country>" print " <languagecode>" + l['language_code'] + "</languagecode>" + print " <scriptcode>" + l['script_code'] + "</scriptcode>" print " <countrycode>" + l['country_code'] + "</countrycode>" print " <decimal>" + ordStr(l['decimal']) + "</decimal>" print " <group>" + ordStr(l['group']) + "</group>" diff --git a/util/local_database/enumdata.py b/util/local_database/enumdata.py index e957349..9e0d7d8 100644 --- a/util/local_database/enumdata.py +++ b/util/local_database/enumdata.py @@ -45,6 +45,7 @@ # need to be *appended* to this list. language_list = { + 0 : [ "AnyLanguage", " " ], 1 : [ "C", " " ], 2 : [ "Abkhazian", "ab" ], 3 : [ "Afan", "om" ], @@ -511,6 +512,19 @@ country_list = { 246 : [ "LatinAmericaAndTheCaribbean", "419" ] } +script_list = { + 0 : [ "AnyScript", "" ], + 1 : [ "Arabic", "Arab" ], + 2 : [ "Cyrillic", "Cyrl" ], + 3 : [ "Deseret", "Dsrt" ], + 4 : [ "Gurmukhi", "Guru" ], + 5 : [ "Simplified Han", "Hans" ], + 6 : [ "Traditional Han", "Hant" ], + 7 : [ "Latin", "Latn" ], + 8 : [ "Mongolian", "Mong" ], + 9 : [ "Tifinagh", "Tfng" ] +} + def countryCodeToId(code): for country_id in country_list: if country_list[country_id][1] == code: @@ -522,3 +536,9 @@ def languageCodeToId(code): if language_list[language_id][1] == code: return language_id return -1 + +def scriptCodeToId(code): + for script_id in script_list: + if script_list[script_id][1] == code: + return script_id + return -1 diff --git a/util/local_database/qlocalexml2cpp.py b/util/local_database/qlocalexml2cpp.py index 8b68984..b8e4e89 100755 --- a/util/local_database/qlocalexml2cpp.py +++ b/util/local_database/qlocalexml2cpp.py @@ -46,6 +46,12 @@ import tempfile import datetime import xml.dom.minidom +class Error: + def __init__(self, msg): + self.msg = msg + def __str__(self): + return self.msg + def check_static_char_array_length(name, array): # some compilers like VC6 doesn't allow static arrays more than 64K bytes size. size = reduce(lambda x, y: x+len(escapedString(y)), array, 0) @@ -103,6 +109,20 @@ def loadLanguageMap(doc): return result +def loadScriptMap(doc): + result = {} + + script_list_elt = firstChildElt(doc.documentElement, "scriptList") + script_elt = firstChildElt(script_list_elt, "script") + while script_elt: + script_id = int(eltText(firstChildElt(script_elt, "id"))) + script_name = eltText(firstChildElt(script_elt, "name")) + script_code = eltText(firstChildElt(script_elt, "code")) + result[script_id] = (script_name, script_code) + script_elt = nextSiblingElt(script_elt, "script") + + return result + def loadCountryMap(doc): result = {} @@ -129,6 +149,15 @@ def loadDefaultMap(doc): elt = nextSiblingElt(elt, "defaultCountry"); return result +def fixedScriptName(name, dupes): + name = name.replace(" ", "") + if name[-6:] != "Script": + name = name + "Script"; + if name in dupes: + sys.stderr.write("\n\n\nERROR: The script name '%s' is messy" % name) + sys.exit(1); + return name + def fixedCountryName(name, dupes): if name in dupes: return name.replace(" ", "") + "Country" @@ -150,6 +179,12 @@ def languageNameToId(name, language_map): return key return -1 +def scriptNameToId(name, script_map): + for key in script_map.keys(): + if script_map[key][0] == name: + return key + return -1 + def countryNameToId(name, country_map): for key in country_map.keys(): if country_map[key][0] == name: @@ -202,6 +237,7 @@ def assertSingleChar(string): class Locale: def __init__(self, elt): self.language = eltText(firstChildElt(elt, "language")) + self.script = eltText(firstChildElt(elt, "script")) self.country = eltText(firstChildElt(elt, "country")) self.decimal = int(eltText(firstChildElt(elt, "decimal"))) self.group = int(eltText(firstChildElt(elt, "group"))) @@ -244,7 +280,7 @@ class Locale: self.currencyFormat = eltText(firstChildElt(elt, "currencyFormat")) self.currencyNegativeFormat = eltText(firstChildElt(elt, "currencyNegativeFormat")) -def loadLocaleMap(doc, language_map, country_map): +def loadLocaleMap(doc, language_map, script_map, country_map): result = {} locale_list_elt = firstChildElt(doc.documentElement, "localeList") @@ -253,11 +289,14 @@ def loadLocaleMap(doc, language_map, country_map): locale = Locale(locale_elt) language_id = languageNameToId(locale.language, language_map) if language_id == -1: - sys.stderr.write("Cannot find a language id for %s\n" % locale.language) + sys.stderr.write("Cannot find a language id for '%s'\n" % locale.language) + script_id = scriptNameToId(locale.script, script_map) + if script_id == -1: + sys.stderr.write("Cannot find a script id for '%s'\n" % locale.script) country_id = countryNameToId(locale.country, country_map) if country_id == -1: - sys.stderr.write("Cannot find a country id for %s\n" % locale.country) - result[(language_id, country_id)] = locale + sys.stderr.write("Cannot find a country id for '%s'\n" % locale.country) + result[(language_id, script_id, country_id)] = locale locale_elt = nextSiblingElt(locale_elt, "locale") @@ -273,14 +312,17 @@ def compareLocaleKeys(key1, key2): if l1.language in compareLocaleKeys.default_map: default = compareLocaleKeys.default_map[l1.language] - if l1.country == default: + if l1.country == default and key1[1] == 0: return -1 - if l2.country == default: + if l2.country == default and key2[1] == 0: return 1 + + if key1[1] != key2[1]: + return key1[1] - key2[1] else: return key1[0] - key2[0] - return key1[1] - key2[1] + return key1[2] - key2[2] def languageCount(language_id, locale_map): @@ -290,8 +332,25 @@ def languageCount(language_id, locale_map): result += 1 return result +def unicode2hex(s): + lst = [] + for x in s: + v = ord(x) + if v > 0xFFFF: + # make a surrogate pair + # copied from qchar.h + high = (v >> 10) + 0xd7c0 + low = (v % 0x400 + 0xdc00) + lst.append(hex(high)) + lst.append(hex(low)) + else: + lst.append(hex(v)) + return lst + class StringDataToken: def __init__(self, index, length): + if index > 0xFFFF or length > 0xFFFF: + raise Error("Position exceeds ushort range: %d,%d " % (index, length)) self.index = index self.length = length def __str__(self): @@ -305,9 +364,9 @@ class StringData: if s in self.hash: return self.hash[s] - lst = map(lambda x: hex(ord(x)), s) + lst = unicode2hex(s) index = len(self.data) - if index >= 65535: + if index > 65535: print "\n\n\n#error Data index is too big!" sys.stderr.write ("\n\n\nERROR: index exceeds the uint16 range! index = %d\n" % index) sys.exit(1) @@ -316,7 +375,12 @@ class StringData: print "\n\n\n#error Data is too big!" sys.stderr.write ("\n\n\nERROR: data size exceeds the uint16 range! size = %d\n" % size) sys.exit(1) - token = StringDataToken(index, size) + token = None + try: + token = StringDataToken(index, size) + except Error as e: + sys.stderr.write("\n\n\nERROR: %s: on data '%s'" % (e, s)) + sys.exit(1) self.hash[s] = token self.data += lst return token @@ -395,9 +459,10 @@ def main(): doc = xml.dom.minidom.parse(localexml) language_map = loadLanguageMap(doc) + script_map = loadScriptMap(doc) country_map = loadCountryMap(doc) default_map = loadDefaultMap(doc) - locale_map = loadLocaleMap(doc, language_map, country_map) + locale_map = loadLocaleMap(doc, language_map, script_map, country_map) dupes = findDupes(language_map, country_map) cldr_version = eltText(firstChildElt(doc.documentElement, "version")) @@ -416,7 +481,6 @@ def main(): # Locale index data_temp_file.write("static const quint16 locale_index[] = {\n") - data_temp_file.write(" 0, // unused\n") index = 0 for key in language_map.keys(): i = 0 @@ -443,7 +507,7 @@ def main(): # Locale data data_temp_file.write("static const QLocalePrivate locale_data[] = {\n") - data_temp_file.write("// lang terr dec group list prcnt zero minus plus exp quotStart quotEnd altQuotStart altQuotEnd sDtFmt lDtFmt sTmFmt lTmFmt ssMonth slMonth sMonth lMonth sDays lDays am,len pm,len\n") + data_temp_file.write("// lang script terr dec group list prcnt zero minus plus exp quotStart quotEnd altQuotStart altQuotEnd sDtFmt lDtFmt sTmFmt lTmFmt ssMonth slMonth sMonth lMonth sDays lDays am,len pm,len\n") locale_keys = locale_map.keys() compareLocaleKeys.default_map = default_map @@ -452,9 +516,8 @@ def main(): for key in locale_keys: l = locale_map[key] - - data_temp_file.write(" { %6d,%6d,%6d,%6d,%6d,%6d,%6d,%6d,%6d,%6d,%6d,%6d,%6d,%6d,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s, {%s}, %s,%s,%s,%s,%6d,%6d,%6d,%6d,%6d }, // %s/%s\n" \ - % (key[0], key[1], + data_temp_file.write(" { %6d,%6d,%6d,%6d,%6d,%6d,%6d,%6d,%6d,%6d,%6d,%6d,%6d,%6d,%6d,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s, {%s}, %s,%s,%s,%s,%6d,%6d,%6d,%6d,%6d }, // %s/%s/%s\n" \ + % (key[0], key[1], key[2], l.decimal, l.group, l.listDelim, @@ -496,8 +559,9 @@ def main(): l.weekendStart, l.weekendEnd, l.language, + l.script, l.country)) - data_temp_file.write(" { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, {0,0,0}, 0,0, 0,0, 0,0, 0,0, 0, 0, 0, 0, 0 } // trailing 0s\n") + data_temp_file.write(" { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, {0,0,0}, 0,0, 0,0, 0,0, 0,0, 0, 0, 0, 0, 0 } // trailing 0s\n") data_temp_file.write("};\n") data_temp_file.write("\n") @@ -586,6 +650,8 @@ def main(): data_temp_file.write("static const char language_name_list[] =\n") data_temp_file.write("\"Default\\0\"\n") for key in language_map.keys(): + if key == 0: + continue data_temp_file.write("\"" + language_map[key][0] + "\\0\"\n") data_temp_file.write(";\n") @@ -593,9 +659,11 @@ def main(): # Language name index data_temp_file.write("static const quint16 language_name_index[] = {\n") - data_temp_file.write(" 0, // Unused\n") + data_temp_file.write(" 0, // AnyLanguage\n") index = 8 for key in language_map.keys(): + if key == 0: + continue language = language_map[key][0] data_temp_file.write("%6d, // %s\n" % (index, language)) index += len(language) + 1 @@ -603,6 +671,31 @@ def main(): data_temp_file.write("\n") + # Script name list + data_temp_file.write("static const char script_name_list[] =\n") + data_temp_file.write("\"Default\\0\"\n") + for key in script_map.keys(): + if key == 0: + continue + data_temp_file.write("\"" + script_map[key][0] + "\\0\"\n") + data_temp_file.write(";\n") + + data_temp_file.write("\n") + + # Script name index + data_temp_file.write("static const quint16 script_name_index[] = {\n") + data_temp_file.write(" 0, // AnyScript\n") + index = 8 + for key in script_map.keys(): + if key == 0: + continue + script = script_map[key][0] + data_temp_file.write("%6d, // %s\n" % (index, script)) + index += len(script) + 1 + data_temp_file.write("};\n") + + data_temp_file.write("\n") + # Country name list data_temp_file.write("static const char country_name_list[] =\n") data_temp_file.write("\"Default\\0\"\n") @@ -630,7 +723,6 @@ def main(): # Language code list data_temp_file.write("static const unsigned char language_code_list[] =\n") - data_temp_file.write("\" \\0\" // Unused\n") for key in language_map.keys(): code = language_map[key][1] if len(code) == 2: @@ -640,6 +732,15 @@ def main(): data_temp_file.write("\n") + # Script code list + data_temp_file.write("static const unsigned char script_code_list[] =\n") + for key in script_map.keys(): + code = script_map[key][1] + for i in range(4 - len(code)): + code += "\\0" + data_temp_file.write("\"%2s\" // %s\n" % (code, script_map[key][0])) + data_temp_file.write(";\n") + # Country code list data_temp_file.write("static const unsigned char country_code_list[] =\n") for key in country_map.keys(): @@ -691,6 +792,15 @@ def main(): qlocaleh_temp_file.write("\n") + # Script enum + qlocaleh_temp_file.write(" enum Script {\n") + script = "" + for key in script_map.keys(): + script = fixedScriptName(script_map[key][0], dupes) + qlocaleh_temp_file.write(" " + script + " = " + str(key) + ",\n") + qlocaleh_temp_file.write(" LastScript = " + script + "\n") + qlocaleh_temp_file.write(" };\n") + # Country enum qlocaleh_temp_file.write(" enum Country {\n") country = "" |