summaryrefslogtreecommitdiffstats
path: root/qtools/qtextcodec.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'qtools/qtextcodec.cpp')
-rw-r--r--qtools/qtextcodec.cpp173
1 files changed, 104 insertions, 69 deletions
diff --git a/qtools/qtextcodec.cpp b/qtools/qtextcodec.cpp
index af43a3a..9f94cb6 100644
--- a/qtools/qtextcodec.cpp
+++ b/qtools/qtextcodec.cpp
@@ -450,6 +450,9 @@ static const char * const iso8859_2locales[] = {
static const char * const iso8859_3locales[] = {
"eo", 0 };
+static const char * const iso8859_4locales[] = {
+ "ee", "ee_EE", "lt", "lt_LT", "lv", "lv_LV", 0 };
+
static const char * const iso8859_5locales[] = {
"bg", "bg_BG", "bulgarian", "mk", "mk_MK",
"sp", "sp_YU", 0 };
@@ -461,13 +464,19 @@ static const char * const iso8859_7locales[] = {
"el", "el_GR", "greek", 0 };
static const char * const iso8859_8locales[] = {
- "hebrew", "iw", "iw_IL", 0 };
+ "hebrew", "he", "he_IL", "iw", "iw_IL", 0 };
static const char * const iso8859_9locales[] = {
"tr", "tr_TR", "turkish", 0 };
static const char * const iso8859_15locales[] = {
- "fr", "fi", "french", "finnish", 0 };
+ "fr", "fi", "french", "finnish", "et", "et_EE", 0 };
+
+static const char * const koi8_ulocales[] = {
+ "uk", "uk_UA", "ru_UA", "ukrainian", 0 };
+
+static const char * const tis_620locales[] = {
+ "th", "th_TH", "thai", 0 };
static bool try_locale_list( const char * const locale[], const char * lang )
@@ -523,6 +532,11 @@ static QTextCodec * ru_RU_hack( const char * i ) {
static QTextCodec * localeMapper = 0;
+void qt_set_locale_codec( QTextCodec *codec )
+{
+ localeMapper = codec;
+}
+
/*! Returns a pointer to the codec most suitable for this locale. */
QTextCodec* QTextCodec::codecForLocale()
@@ -572,19 +586,25 @@ QTextCodec* QTextCodec::codecForLocale()
localeMapper = codecForName( "ISO 8859-2" );
else if ( try_locale_list( iso8859_3locales, lang ) )
localeMapper = codecForName( "ISO 8859-3" );
+ else if ( try_locale_list( iso8859_4locales, lang ) )
+ localeMapper = codecForName( "ISO 8859-4" );
else if ( try_locale_list( iso8859_5locales, lang ) )
localeMapper = codecForName( "ISO 8859-5" );
else if ( try_locale_list( iso8859_6locales, lang ) )
- localeMapper = codecForName( "ISO 8859-6" );
+ localeMapper = codecForName( "ISO 8859-6-I" );
else if ( try_locale_list( iso8859_7locales, lang ) )
localeMapper = codecForName( "ISO 8859-7" );
else if ( try_locale_list( iso8859_8locales, lang ) )
- localeMapper = codecForName( "ISO 8859-8" );
+ localeMapper = codecForName( "ISO 8859-8-I" );
else if ( try_locale_list( iso8859_9locales, lang ) )
localeMapper = codecForName( "ISO 8859-9" );
else if ( try_locale_list( iso8859_15locales, lang ) )
localeMapper = codecForName( "ISO 8859-15" );
- else if ( try_locale_list( probably_koi8_rlocales, lang ) )
+ else if ( try_locale_list( tis_620locales, lang ) )
+ localeMapper = codecForName( "ISO 8859-11" );
+ else if ( try_locale_list( koi8_ulocales, lang ) )
+ localeMapper = codecForName( "KOI8-U" );
+ else if ( try_locale_list( probably_koi8_rlocales, lang ) )
localeMapper = ru_RU_hack( lang );
else if (!lang || !(localeMapper = codecForName(lang) ))
localeMapper = codecForName( "ISO 8859-1" );
@@ -1335,6 +1355,25 @@ static struct {
// /**/ - The BULLET OPERATOR is confused. Some people think
// it should be 0x2022 (BULLET).
+ // from RFC 2319, ftp://ftp.isi.edu/in-notes/rfc2319.txt
+ { "KOI8-U", 2088,
+ { 0x2500, 0x2502, 0x250C, 0x2510, 0x2514, 0x2518, 0x251C, 0x2524,
+ 0x252C, 0x2534, 0x253C, 0x2580, 0x2584, 0x2588, 0x258C, 0x2590,
+ 0x2591, 0x2592, 0x2593, 0x2320, 0x25A0, 0x2219, 0x221A, 0x2248,
+ 0x2264, 0x2265, 0x00A0, 0x2321, 0x00B0, 0x00B2, 0x00B7, 0x00F7,
+ 0x2550, 0x2551, 0x2552, 0x0451, 0x0454, 0x2554, 0x0456, 0x0457,
+ 0x2557, 0x2558, 0x2559, 0x255A, 0x255B, 0x0491, 0x255D, 0x255E,
+ 0x255F, 0x2560, 0x2561, 0x0401, 0x0404, 0x2563, 0x0406, 0x0407,
+ 0x2566, 0x2567, 0x2568, 0x2569, 0x256A, 0x0490, 0x256C, 0x00A9,
+ 0x044E, 0x0430, 0x0431, 0x0446, 0x0434, 0x0435, 0x0444, 0x0433,
+ 0x0445, 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E,
+ 0x043F, 0x044F, 0x0440, 0x0441, 0x0442, 0x0443, 0x0436, 0x0432,
+ 0x044C, 0x044B, 0x0437, 0x0448, 0x044D, 0x0449, 0x0447, 0x044A,
+ 0x042E, 0x0410, 0x0411, 0x0426, 0x0414, 0x0415, 0x0424, 0x0413,
+ 0x0425, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E,
+ 0x041F, 0x042F, 0x0420, 0x0421, 0x0422, 0x0423, 0x0416, 0x0412,
+ 0x042C, 0x042B, 0x0417, 0x0428, 0x042D, 0x0429, 0x0427, 0x042A } },
+
// next bits generated from tables on the Unicode 2.0 CD. we can
// use these tables since this is part of the transition to using
// unicode everywhere in qt.
@@ -1342,23 +1381,6 @@ static struct {
// $ for A in 8 9 A B C D E F ; do for B in 0 1 2 3 4 5 6 7 8 9 A B C D E F ; do echo 0x${A}${B} 0xFFFD ; done ; done > /tmp/digits ; for a in 8859-* ; do ( awk '/^0x[89ABCDEF]/{ print $1, $2 }' < $a ; cat /tmp/digits ) | sort | uniq -w4 | cut -c6- | paste '-d ' - - - - - - - - | sed -e 's/ /, /g' -e 's/$/,/' -e '$ s/,$/} },/' -e '1 s/^/{ /' > ~/tmp/$a ; done
// then I inserted the files manually.
- { "ISO 8859-1", 4,
- { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
- 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
- 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
- 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
- 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
- 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
- 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
- 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF,
- 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
- 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
- 0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7,
- 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF,
- 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
- 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
- 0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7,
- 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF} },
{ "ISO 8859-2", 5,
{ 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
@@ -1512,7 +1534,7 @@ static struct {
0x010D, 0x00E9, 0x0119, 0x00EB, 0x0117, 0x00ED, 0x00EE, 0x00EF,
0x00F0, 0x0146, 0x014D, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x0169,
0x00F8, 0x0173, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x0138} },
- { "ISO 8859-13", 0, // ############# what is the mib?
+ { "ISO 8859-13", 109,
{ 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
@@ -1529,7 +1551,7 @@ static struct {
0x010D, 0x00E9, 0x017A, 0x0117, 0x0123, 0x0137, 0x012B, 0x013C,
0x0161, 0x0144, 0x0146, 0x00F3, 0x014D, 0x00F5, 0x00F6, 0x00F7,
0x0173, 0x0142, 0x015B, 0x016B, 0x00FC, 0x017C, 0x017E, 0x2019} },
- { "ISO 8859-14", 0, // ############# what is the mib?
+ { "ISO 8859-14", 110,
{ 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
@@ -1546,7 +1568,7 @@ static struct {
0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
0x0175, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x1E6B,
0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x0177, 0x00FF} },
- { "ISO 8859-15", 0, // ############# what is the mib?
+ { "ISO 8859-15", 111,
{ 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
@@ -1762,6 +1784,8 @@ static struct {
0x0E50, 0x0E51, 0x0E52, 0x0E53, 0x0E54, 0x0E55, 0x0E56, 0x0E57,
0x0E58, 0x0E59, 0x0E5A, 0x0E5B, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD} },
+ // change LAST_MIB if you add more, and edit unicodevalues in
+ // kernel/qpsprinter.cpp too.
};
@@ -1787,13 +1811,21 @@ QSimpleTextCodec::~QSimpleTextCodec()
// what happens if strlen(chars)<len? what happens if !chars? if len<1?
QString QSimpleTextCodec::toUnicode(const char* chars, int len) const
{
+ if(len <= 0)
+ return QString::null;
+
+ int clen = qstrlen(chars);
+ len = QMIN(len, clen); // Note: NUL ends string
+
QString r;
+ r.setUnicode(0, len);
+ QChar* uc = (QChar*)r.unicode(); // const_cast
const unsigned char * c = (const unsigned char *)chars;
- for( int i=0; i<len && c[i]; i++ ) { // Note: NUL ends string
+ for( int i=0; i<len; i++ ) {
if ( c[i] > 127 )
- r[i] = unicodevalues[forwardIndex].values[c[i]-128];
+ uc[i] = unicodevalues[forwardIndex].values[c[i]-128];
else
- r[i] = c[i];
+ uc[i] = c[i];
}
return r;
}
@@ -1829,12 +1861,17 @@ QCString QSimpleTextCodec::fromUnicode(const QString& uc, int& len ) const
if ( len <0 || len > (int)uc.length() )
len = uc.length();
QCString r( len+1 );
- int i;
+ int i = len;
int u;
- for( i=0; i<len; i++ ) {
- u = uc[i].cell() + 256* uc[i].row();
- r[i] = u < 128 ? u : (
- ( u < (int)reverseMap->size() ) ? (*reverseMap)[u] : '?' );
+ const QChar* ucp = uc.unicode();
+ char* rp = r.data();
+ char* rmp = reverseMap->data();
+ int rmsize = (int) reverseMap->size();
+ while( i-- )
+ {
+ u = ucp->unicode();
+ *rp++ = u < 128 ? u : (( u < rmsize ) ? (*(rmp+u)) : '?' );
+ ucp++;
}
r[len] = 0;
return r;
@@ -1862,7 +1899,9 @@ int QSimpleTextCodec::heuristicNameMatch(const char* hint) const
return QTextCodec::heuristicNameMatch("koi8-r")-1;
} else if ( hint[0] == 't' && QCString(name()) == "ISO 8859-11" ) {
// 8859-11 and tis620 are byte by bute equivalent
- int i = simpleHeuristicNameMatch("tis-620", hint);
+ int i = simpleHeuristicNameMatch("tis620-0", hint);
+ if( !i )
+ i = simpleHeuristicNameMatch("tis-620", hint);
if( i ) return i;
}
return QTextCodec::heuristicNameMatch(hint);
@@ -1892,27 +1931,7 @@ int QSimpleTextCodec::heuristicContentMatch(const char* chars, int len) const
}
-static void setupBuiltinCodecs()
-{
- int i = 0;
- do {
- (void)new QSimpleTextCodec( i );
- } while( unicodevalues[i++].mib != LAST_MIB );
-
- (void)new QEucJpCodec;
- (void)new QSjisCodec;
- (void)new QJisCodec;
- (void)new QEucKrCodec;
- (void)new QGbkCodec;
- (void)new QBig5Codec;
- (void)new QUtf8Codec;
- (void)new QUtf16Codec;
- (void)new QHebrewCodec;
- (void)new QArabicCodec;
- (void)new QTsciiCodec;
-}
-
-#else
+#endif // QT_NO_CODECS
class QLatin1Codec: public QTextCodec
{
@@ -1948,12 +1967,10 @@ QLatin1Codec::~QLatin1Codec()
// what happens if strlen(chars)<len? what happens if !chars? if len<1?
QString QLatin1Codec::toUnicode(const char* chars, int len) const
{
- QString r;
- const unsigned char * c = (const unsigned char *)chars;
- for( int i=0; i<len && c[i]; i++ ) { // Note: NUL ends string
- r[i] = c[i];
- }
- return r;
+ if(len <= 0)
+ return QString::null;
+
+ return QString::fromLatin1(chars, len);
}
@@ -1962,11 +1979,12 @@ QCString QLatin1Codec::fromUnicode(const QString& uc, int& len ) const
if ( len <0 || len > (int)uc.length() )
len = uc.length();
QCString r( len+1 );
- int i;
- int u;
- for( i=0; i<len; i++ ) {
- u = uc[i].cell() + 256* uc[i].row();
- r[i] = u < 255 ? u : '?';
+ int i = 0;
+ const QChar *ch = uc.unicode();
+ while ( i < len ) {
+ r[i] = ch->row() ? '?' : ch->cell();
+ i++;
+ ch++;
}
r[len] = 0;
return r;
@@ -1975,7 +1993,7 @@ QCString QLatin1Codec::fromUnicode(const QString& uc, int& len ) const
const char* QLatin1Codec::name() const
{
- return "iso8859-1";
+ return "ISO 8859-1";
}
@@ -2009,11 +2027,28 @@ int QLatin1Codec::heuristicContentMatch(const char* chars, int len) const
}
-
static void setupBuiltinCodecs()
{
(void)new QLatin1Codec;
-}
+
+#ifndef QT_NO_CODECS
+ int i = 0;
+ do {
+ (void)new QSimpleTextCodec( i );
+ } while( unicodevalues[i++].mib != LAST_MIB );
+
+ (void)new QEucJpCodec;
+ (void)new QSjisCodec;
+ (void)new QJisCodec;
+ (void)new QEucKrCodec;
+ (void)new QGbkCodec;
+ (void)new QBig5Codec;
+ (void)new QUtf8Codec;
+ (void)new QUtf16Codec;
+ (void)new QHebrewCodec;
+ (void)new QArabicCodec;
+ (void)new QTsciiCodec;
#endif // QT_NO_CODECS
+}
#endif // QT_NO_TEXTCODEC