diff options
Diffstat (limited to 'qtools/qtextcodec.cpp')
-rw-r--r-- | qtools/qtextcodec.cpp | 173 |
1 files changed, 104 insertions, 69 deletions
diff --git a/qtools/qtextcodec.cpp b/qtools/qtextcodec.cpp index af43a3a..9f94cb6 100644 --- a/qtools/qtextcodec.cpp +++ b/qtools/qtextcodec.cpp @@ -450,6 +450,9 @@ static const char * const iso8859_2locales[] = { static const char * const iso8859_3locales[] = { "eo", 0 }; +static const char * const iso8859_4locales[] = { + "ee", "ee_EE", "lt", "lt_LT", "lv", "lv_LV", 0 }; + static const char * const iso8859_5locales[] = { "bg", "bg_BG", "bulgarian", "mk", "mk_MK", "sp", "sp_YU", 0 }; @@ -461,13 +464,19 @@ static const char * const iso8859_7locales[] = { "el", "el_GR", "greek", 0 }; static const char * const iso8859_8locales[] = { - "hebrew", "iw", "iw_IL", 0 }; + "hebrew", "he", "he_IL", "iw", "iw_IL", 0 }; static const char * const iso8859_9locales[] = { "tr", "tr_TR", "turkish", 0 }; static const char * const iso8859_15locales[] = { - "fr", "fi", "french", "finnish", 0 }; + "fr", "fi", "french", "finnish", "et", "et_EE", 0 }; + +static const char * const koi8_ulocales[] = { + "uk", "uk_UA", "ru_UA", "ukrainian", 0 }; + +static const char * const tis_620locales[] = { + "th", "th_TH", "thai", 0 }; static bool try_locale_list( const char * const locale[], const char * lang ) @@ -523,6 +532,11 @@ static QTextCodec * ru_RU_hack( const char * i ) { static QTextCodec * localeMapper = 0; +void qt_set_locale_codec( QTextCodec *codec ) +{ + localeMapper = codec; +} + /*! Returns a pointer to the codec most suitable for this locale. */ QTextCodec* QTextCodec::codecForLocale() @@ -572,19 +586,25 @@ QTextCodec* QTextCodec::codecForLocale() localeMapper = codecForName( "ISO 8859-2" ); else if ( try_locale_list( iso8859_3locales, lang ) ) localeMapper = codecForName( "ISO 8859-3" ); + else if ( try_locale_list( iso8859_4locales, lang ) ) + localeMapper = codecForName( "ISO 8859-4" ); else if ( try_locale_list( iso8859_5locales, lang ) ) localeMapper = codecForName( "ISO 8859-5" ); else if ( try_locale_list( iso8859_6locales, lang ) ) - localeMapper = codecForName( "ISO 8859-6" ); + localeMapper = codecForName( "ISO 8859-6-I" ); else if ( try_locale_list( iso8859_7locales, lang ) ) localeMapper = codecForName( "ISO 8859-7" ); else if ( try_locale_list( iso8859_8locales, lang ) ) - localeMapper = codecForName( "ISO 8859-8" ); + localeMapper = codecForName( "ISO 8859-8-I" ); else if ( try_locale_list( iso8859_9locales, lang ) ) localeMapper = codecForName( "ISO 8859-9" ); else if ( try_locale_list( iso8859_15locales, lang ) ) localeMapper = codecForName( "ISO 8859-15" ); - else if ( try_locale_list( probably_koi8_rlocales, lang ) ) + else if ( try_locale_list( tis_620locales, lang ) ) + localeMapper = codecForName( "ISO 8859-11" ); + else if ( try_locale_list( koi8_ulocales, lang ) ) + localeMapper = codecForName( "KOI8-U" ); + else if ( try_locale_list( probably_koi8_rlocales, lang ) ) localeMapper = ru_RU_hack( lang ); else if (!lang || !(localeMapper = codecForName(lang) )) localeMapper = codecForName( "ISO 8859-1" ); @@ -1335,6 +1355,25 @@ static struct { // /**/ - The BULLET OPERATOR is confused. Some people think // it should be 0x2022 (BULLET). + // from RFC 2319, ftp://ftp.isi.edu/in-notes/rfc2319.txt + { "KOI8-U", 2088, + { 0x2500, 0x2502, 0x250C, 0x2510, 0x2514, 0x2518, 0x251C, 0x2524, + 0x252C, 0x2534, 0x253C, 0x2580, 0x2584, 0x2588, 0x258C, 0x2590, + 0x2591, 0x2592, 0x2593, 0x2320, 0x25A0, 0x2219, 0x221A, 0x2248, + 0x2264, 0x2265, 0x00A0, 0x2321, 0x00B0, 0x00B2, 0x00B7, 0x00F7, + 0x2550, 0x2551, 0x2552, 0x0451, 0x0454, 0x2554, 0x0456, 0x0457, + 0x2557, 0x2558, 0x2559, 0x255A, 0x255B, 0x0491, 0x255D, 0x255E, + 0x255F, 0x2560, 0x2561, 0x0401, 0x0404, 0x2563, 0x0406, 0x0407, + 0x2566, 0x2567, 0x2568, 0x2569, 0x256A, 0x0490, 0x256C, 0x00A9, + 0x044E, 0x0430, 0x0431, 0x0446, 0x0434, 0x0435, 0x0444, 0x0433, + 0x0445, 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, + 0x043F, 0x044F, 0x0440, 0x0441, 0x0442, 0x0443, 0x0436, 0x0432, + 0x044C, 0x044B, 0x0437, 0x0448, 0x044D, 0x0449, 0x0447, 0x044A, + 0x042E, 0x0410, 0x0411, 0x0426, 0x0414, 0x0415, 0x0424, 0x0413, + 0x0425, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, + 0x041F, 0x042F, 0x0420, 0x0421, 0x0422, 0x0423, 0x0416, 0x0412, + 0x042C, 0x042B, 0x0417, 0x0428, 0x042D, 0x0429, 0x0427, 0x042A } }, + // next bits generated from tables on the Unicode 2.0 CD. we can // use these tables since this is part of the transition to using // unicode everywhere in qt. @@ -1342,23 +1381,6 @@ static struct { // $ for A in 8 9 A B C D E F ; do for B in 0 1 2 3 4 5 6 7 8 9 A B C D E F ; do echo 0x${A}${B} 0xFFFD ; done ; done > /tmp/digits ; for a in 8859-* ; do ( awk '/^0x[89ABCDEF]/{ print $1, $2 }' < $a ; cat /tmp/digits ) | sort | uniq -w4 | cut -c6- | paste '-d ' - - - - - - - - | sed -e 's/ /, /g' -e 's/$/,/' -e '$ s/,$/} },/' -e '1 s/^/{ /' > ~/tmp/$a ; done // then I inserted the files manually. - { "ISO 8859-1", 4, - { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, - 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, - 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, - 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, - 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, - 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF, - 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, - 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF, - 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, - 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF, - 0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7, - 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF, - 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, - 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF, - 0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7, - 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF} }, { "ISO 8859-2", 5, { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, @@ -1512,7 +1534,7 @@ static struct { 0x010D, 0x00E9, 0x0119, 0x00EB, 0x0117, 0x00ED, 0x00EE, 0x00EF, 0x00F0, 0x0146, 0x014D, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x0169, 0x00F8, 0x0173, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x0138} }, - { "ISO 8859-13", 0, // ############# what is the mib? + { "ISO 8859-13", 109, { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, @@ -1529,7 +1551,7 @@ static struct { 0x010D, 0x00E9, 0x017A, 0x0117, 0x0123, 0x0137, 0x012B, 0x013C, 0x0161, 0x0144, 0x0146, 0x00F3, 0x014D, 0x00F5, 0x00F6, 0x00F7, 0x0173, 0x0142, 0x015B, 0x016B, 0x00FC, 0x017C, 0x017E, 0x2019} }, - { "ISO 8859-14", 0, // ############# what is the mib? + { "ISO 8859-14", 110, { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, @@ -1546,7 +1568,7 @@ static struct { 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF, 0x0175, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x1E6B, 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x0177, 0x00FF} }, - { "ISO 8859-15", 0, // ############# what is the mib? + { "ISO 8859-15", 111, { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, @@ -1762,6 +1784,8 @@ static struct { 0x0E50, 0x0E51, 0x0E52, 0x0E53, 0x0E54, 0x0E55, 0x0E56, 0x0E57, 0x0E58, 0x0E59, 0x0E5A, 0x0E5B, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD} }, + // change LAST_MIB if you add more, and edit unicodevalues in + // kernel/qpsprinter.cpp too. }; @@ -1787,13 +1811,21 @@ QSimpleTextCodec::~QSimpleTextCodec() // what happens if strlen(chars)<len? what happens if !chars? if len<1? QString QSimpleTextCodec::toUnicode(const char* chars, int len) const { + if(len <= 0) + return QString::null; + + int clen = qstrlen(chars); + len = QMIN(len, clen); // Note: NUL ends string + QString r; + r.setUnicode(0, len); + QChar* uc = (QChar*)r.unicode(); // const_cast const unsigned char * c = (const unsigned char *)chars; - for( int i=0; i<len && c[i]; i++ ) { // Note: NUL ends string + for( int i=0; i<len; i++ ) { if ( c[i] > 127 ) - r[i] = unicodevalues[forwardIndex].values[c[i]-128]; + uc[i] = unicodevalues[forwardIndex].values[c[i]-128]; else - r[i] = c[i]; + uc[i] = c[i]; } return r; } @@ -1829,12 +1861,17 @@ QCString QSimpleTextCodec::fromUnicode(const QString& uc, int& len ) const if ( len <0 || len > (int)uc.length() ) len = uc.length(); QCString r( len+1 ); - int i; + int i = len; int u; - for( i=0; i<len; i++ ) { - u = uc[i].cell() + 256* uc[i].row(); - r[i] = u < 128 ? u : ( - ( u < (int)reverseMap->size() ) ? (*reverseMap)[u] : '?' ); + const QChar* ucp = uc.unicode(); + char* rp = r.data(); + char* rmp = reverseMap->data(); + int rmsize = (int) reverseMap->size(); + while( i-- ) + { + u = ucp->unicode(); + *rp++ = u < 128 ? u : (( u < rmsize ) ? (*(rmp+u)) : '?' ); + ucp++; } r[len] = 0; return r; @@ -1862,7 +1899,9 @@ int QSimpleTextCodec::heuristicNameMatch(const char* hint) const return QTextCodec::heuristicNameMatch("koi8-r")-1; } else if ( hint[0] == 't' && QCString(name()) == "ISO 8859-11" ) { // 8859-11 and tis620 are byte by bute equivalent - int i = simpleHeuristicNameMatch("tis-620", hint); + int i = simpleHeuristicNameMatch("tis620-0", hint); + if( !i ) + i = simpleHeuristicNameMatch("tis-620", hint); if( i ) return i; } return QTextCodec::heuristicNameMatch(hint); @@ -1892,27 +1931,7 @@ int QSimpleTextCodec::heuristicContentMatch(const char* chars, int len) const } -static void setupBuiltinCodecs() -{ - int i = 0; - do { - (void)new QSimpleTextCodec( i ); - } while( unicodevalues[i++].mib != LAST_MIB ); - - (void)new QEucJpCodec; - (void)new QSjisCodec; - (void)new QJisCodec; - (void)new QEucKrCodec; - (void)new QGbkCodec; - (void)new QBig5Codec; - (void)new QUtf8Codec; - (void)new QUtf16Codec; - (void)new QHebrewCodec; - (void)new QArabicCodec; - (void)new QTsciiCodec; -} - -#else +#endif // QT_NO_CODECS class QLatin1Codec: public QTextCodec { @@ -1948,12 +1967,10 @@ QLatin1Codec::~QLatin1Codec() // what happens if strlen(chars)<len? what happens if !chars? if len<1? QString QLatin1Codec::toUnicode(const char* chars, int len) const { - QString r; - const unsigned char * c = (const unsigned char *)chars; - for( int i=0; i<len && c[i]; i++ ) { // Note: NUL ends string - r[i] = c[i]; - } - return r; + if(len <= 0) + return QString::null; + + return QString::fromLatin1(chars, len); } @@ -1962,11 +1979,12 @@ QCString QLatin1Codec::fromUnicode(const QString& uc, int& len ) const if ( len <0 || len > (int)uc.length() ) len = uc.length(); QCString r( len+1 ); - int i; - int u; - for( i=0; i<len; i++ ) { - u = uc[i].cell() + 256* uc[i].row(); - r[i] = u < 255 ? u : '?'; + int i = 0; + const QChar *ch = uc.unicode(); + while ( i < len ) { + r[i] = ch->row() ? '?' : ch->cell(); + i++; + ch++; } r[len] = 0; return r; @@ -1975,7 +1993,7 @@ QCString QLatin1Codec::fromUnicode(const QString& uc, int& len ) const const char* QLatin1Codec::name() const { - return "iso8859-1"; + return "ISO 8859-1"; } @@ -2009,11 +2027,28 @@ int QLatin1Codec::heuristicContentMatch(const char* chars, int len) const } - static void setupBuiltinCodecs() { (void)new QLatin1Codec; -} + +#ifndef QT_NO_CODECS + int i = 0; + do { + (void)new QSimpleTextCodec( i ); + } while( unicodevalues[i++].mib != LAST_MIB ); + + (void)new QEucJpCodec; + (void)new QSjisCodec; + (void)new QJisCodec; + (void)new QEucKrCodec; + (void)new QGbkCodec; + (void)new QBig5Codec; + (void)new QUtf8Codec; + (void)new QUtf16Codec; + (void)new QHebrewCodec; + (void)new QArabicCodec; + (void)new QTsciiCodec; #endif // QT_NO_CODECS +} #endif // QT_NO_TEXTCODEC |