diff options
author | Lars Knoll <lars.knoll@nokia.com> | 2009-03-23 09:18:55 (GMT) |
---|---|---|
committer | Simon Hausmann <simon.hausmann@nokia.com> | 2009-03-23 09:18:55 (GMT) |
commit | e5fcad302d86d316390c6b0f62759a067313e8a9 (patch) | |
tree | c2afbf6f1066b6ce261f14341cf6d310e5595bc1 /src/corelib/codecs | |
download | Qt-e5fcad302d86d316390c6b0f62759a067313e8a9.zip Qt-e5fcad302d86d316390c6b0f62759a067313e8a9.tar.gz Qt-e5fcad302d86d316390c6b0f62759a067313e8a9.tar.bz2 |
Long live Qt 4.5!
Diffstat (limited to 'src/corelib/codecs')
-rw-r--r-- | src/corelib/codecs/codecs.pri | 53 | ||||
-rw-r--r-- | src/corelib/codecs/qfontlaocodec.cpp | 124 | ||||
-rw-r--r-- | src/corelib/codecs/qfontlaocodec_p.h | 78 | ||||
-rw-r--r-- | src/corelib/codecs/qiconvcodec.cpp | 536 | ||||
-rw-r--r-- | src/corelib/codecs/qiconvcodec_p.h | 104 | ||||
-rw-r--r-- | src/corelib/codecs/qisciicodec.cpp | 288 | ||||
-rw-r--r-- | src/corelib/codecs/qisciicodec_p.h | 81 | ||||
-rw-r--r-- | src/corelib/codecs/qlatincodec.cpp | 246 | ||||
-rw-r--r-- | src/corelib/codecs/qlatincodec_p.h | 94 | ||||
-rw-r--r-- | src/corelib/codecs/qsimplecodec.cpp | 733 | ||||
-rw-r--r-- | src/corelib/codecs/qsimplecodec_p.h | 87 | ||||
-rw-r--r-- | src/corelib/codecs/qtextcodec.cpp | 1598 | ||||
-rw-r--r-- | src/corelib/codecs/qtextcodec.h | 189 | ||||
-rw-r--r-- | src/corelib/codecs/qtextcodec_p.h | 84 | ||||
-rw-r--r-- | src/corelib/codecs/qtextcodecplugin.cpp | 161 | ||||
-rw-r--r-- | src/corelib/codecs/qtextcodecplugin.h | 96 | ||||
-rw-r--r-- | src/corelib/codecs/qtsciicodec.cpp | 500 | ||||
-rw-r--r-- | src/corelib/codecs/qtsciicodec_p.h | 106 | ||||
-rw-r--r-- | src/corelib/codecs/qutfcodec.cpp | 634 | ||||
-rw-r--r-- | src/corelib/codecs/qutfcodec_p.h | 155 |
20 files changed, 5947 insertions, 0 deletions
diff --git a/src/corelib/codecs/codecs.pri b/src/corelib/codecs/codecs.pri new file mode 100644 index 0000000..2e247e5 --- /dev/null +++ b/src/corelib/codecs/codecs.pri @@ -0,0 +1,53 @@ +# Qt core library codecs module + +HEADERS += \ + codecs/qisciicodec_p.h \ + codecs/qlatincodec_p.h \ + codecs/qsimplecodec_p.h \ + codecs/qtextcodec.h \ + codecs/qtsciicodec_p.h \ + codecs/qutfcodec_p.h \ + codecs/qtextcodecplugin.h + +SOURCES += \ + codecs/qisciicodec.cpp \ + codecs/qlatincodec.cpp \ + codecs/qsimplecodec.cpp \ + codecs/qtextcodec.cpp \ + codecs/qtsciicodec.cpp \ + codecs/qutfcodec.cpp \ + codecs/qtextcodecplugin.cpp + +unix { + SOURCES += codecs/qfontlaocodec.cpp + + contains(QT_CONFIG,iconv) { + HEADERS += codecs/qiconvcodec_p.h + SOURCES += codecs/qiconvcodec.cpp + } else:contains(QT_CONFIG,gnu-libiconv) { + HEADERS += codecs/qiconvcodec_p.h + SOURCES += codecs/qiconvcodec.cpp + + DEFINES += GNU_LIBICONV + !mac:LIBS *= -liconv + } else { + # no iconv, so we put all plugins in the library + HEADERS += \ + ../plugins/codecs/cn/qgb18030codec.h \ + ../plugins/codecs/jp/qeucjpcodec.h \ + ../plugins/codecs/jp/qjiscodec.h \ + ../plugins/codecs/jp/qsjiscodec.h \ + ../plugins/codecs/kr/qeuckrcodec.h \ + ../plugins/codecs/tw/qbig5codec.h \ + ../plugins/codecs/jp/qfontjpcodec.h + SOURCES += \ + ../plugins/codecs/cn/qgb18030codec.cpp \ + ../plugins/codecs/jp/qjpunicode.cpp \ + ../plugins/codecs/jp/qeucjpcodec.cpp \ + ../plugins/codecs/jp/qjiscodec.cpp \ + ../plugins/codecs/jp/qsjiscodec.cpp \ + ../plugins/codecs/kr/qeuckrcodec.cpp \ + ../plugins/codecs/tw/qbig5codec.cpp \ + ../plugins/codecs/jp/qfontjpcodec.cpp + } +} diff --git a/src/corelib/codecs/qfontlaocodec.cpp b/src/corelib/codecs/qfontlaocodec.cpp new file mode 100644 index 0000000..496ac02 --- /dev/null +++ b/src/corelib/codecs/qfontlaocodec.cpp @@ -0,0 +1,124 @@ +/**************************************************************************** +** +** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies). +** Contact: Qt Software Information (qt-info@nokia.com) +** +** This file is part of the QtCore module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL$ +** No Commercial Usage +** This file contains pre-release code and may not be distributed. +** You may use this file in accordance with the terms and conditions +** contained in the either Technology Preview License Agreement or the +** Beta Release License Agreement. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 2.1 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPL included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 2.1 requirements +** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. +** +** In addition, as a special exception, Nokia gives you certain +** additional rights. These rights are described in the Nokia Qt LGPL +** Exception version 1.0, included in the file LGPL_EXCEPTION.txt in this +** package. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU +** General Public License version 3.0 as published by the Free Software +** Foundation and appearing in the file LICENSE.GPL included in the +** packaging of this file. Please review the following information to +** ensure the GNU General Public License version 3.0 requirements will be +** met: http://www.gnu.org/copyleft/gpl.html. +** +** If you are unsure which license is appropriate for your use, please +** contact the sales department at qt-sales@nokia.com. +** $QT_END_LICENSE$ +** +****************************************************************************/ + +#include "qfontlaocodec_p.h" +#include "qlist.h" + +#ifndef QT_NO_CODECS +#ifndef QT_NO_BIG_CODECS + +QT_BEGIN_NAMESPACE + +static unsigned char const unicode_to_mulelao[256] = + { + // U+0E80 + 0x00, 0xa1, 0xa2, 0x00, 0xa4, 0x00, 0x00, 0xa7, + 0xa8, 0x00, 0xaa, 0x00, 0x00, 0xad, 0x00, 0x00, + // U+0E90 + 0x00, 0x00, 0x00, 0x00, 0xb4, 0xb5, 0xb6, 0xb7, + 0x00, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, + // U+0EA0 + 0x00, 0xc1, 0xc2, 0xc3, 0x00, 0xc5, 0x00, 0xc7, + 0x00, 0x00, 0xca, 0xcb, 0x00, 0xcd, 0xce, 0xcf, + // U+0EB0 + 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, + 0xd8, 0xd9, 0x00, 0xdb, 0xdc, 0xdd, 0x00, 0x00, + // U+0EC0 + 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0x00, 0xe6, 0x00, + 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0x00, 0x00, + // U+0ED0 + 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, + 0xf8, 0xf9, 0x00, 0x00, 0xfc, 0xfd, 0x00, 0x00, + // U+0EE0 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + // U+0EF0 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 + }; + + +QFontLaoCodec::~QFontLaoCodec() +{ +} + +QByteArray QFontLaoCodec::name() const +{ + return "mulelao-1"; +} + +int QFontLaoCodec::mibEnum() const +{ + return -4242; +} + +QString QFontLaoCodec::convertToUnicode(const char *, int, ConverterState *) const +{ + return QString(); +} + +QByteArray QFontLaoCodec::convertFromUnicode(const QChar *uc, int len, ConverterState *) const +{ + QByteArray rstring; + rstring.resize(len); + uchar *rdata = (uchar *) rstring.data(); + const QChar *sdata = uc; + int i = 0; + for (; i < len; ++i, ++sdata, ++rdata) { + if (sdata->unicode() < 0x80) { + *rdata = (uchar) sdata->unicode(); + } else if (sdata->unicode() >= 0x0e80 && sdata->unicode() <= 0x0eff) { + uchar lao = unicode_to_mulelao[sdata->unicode() - 0x0e80]; + if (lao) + *rdata = lao; + else + *rdata = 0; + } else { + *rdata = 0; + } + } + return rstring; +} + +QT_END_NAMESPACE + +#endif // QT_NO_BIG_CODECS +#endif // QT_NO_CODECS diff --git a/src/corelib/codecs/qfontlaocodec_p.h b/src/corelib/codecs/qfontlaocodec_p.h new file mode 100644 index 0000000..a8e142d --- /dev/null +++ b/src/corelib/codecs/qfontlaocodec_p.h @@ -0,0 +1,78 @@ +/**************************************************************************** +** +** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies). +** Contact: Qt Software Information (qt-info@nokia.com) +** +** This file is part of the QtCore module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL$ +** No Commercial Usage +** This file contains pre-release code and may not be distributed. +** You may use this file in accordance with the terms and conditions +** contained in the either Technology Preview License Agreement or the +** Beta Release License Agreement. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 2.1 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPL included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 2.1 requirements +** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. +** +** In addition, as a special exception, Nokia gives you certain +** additional rights. These rights are described in the Nokia Qt LGPL +** Exception version 1.0, included in the file LGPL_EXCEPTION.txt in this +** package. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU +** General Public License version 3.0 as published by the Free Software +** Foundation and appearing in the file LICENSE.GPL included in the +** packaging of this file. Please review the following information to +** ensure the GNU General Public License version 3.0 requirements will be +** met: http://www.gnu.org/copyleft/gpl.html. +** +** If you are unsure which license is appropriate for your use, please +** contact the sales department at qt-sales@nokia.com. +** $QT_END_LICENSE$ +** +****************************************************************************/ + +#ifndef QFONTLAOCODEC_P_H +#define QFONTLAOCODEC_P_H + +// +// W A R N I N G +// ------------- +// +// This file is not part of the Qt API. It exists for the convenience +// of qfontencodings_x11.cpp and qfont_x11.cpp. This header file may +// change from version to version without notice, or even be removed. +// +// We mean it. +// + +#include "QtCore/qtextcodec.h" + +QT_BEGIN_NAMESPACE + +#ifndef QT_NO_CODECS + +class Q_CORE_EXPORT QFontLaoCodec : public QTextCodec +{ +public: + ~QFontLaoCodec(); + + QByteArray name() const; + int mibEnum() const; + + QString convertToUnicode(const char *, int, ConverterState *) const; + QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const; +}; + +#endif // QT_NO_CODECS + +QT_END_NAMESPACE + +#endif // QFONTLAOCODEC_P_H diff --git a/src/corelib/codecs/qiconvcodec.cpp b/src/corelib/codecs/qiconvcodec.cpp new file mode 100644 index 0000000..c8f28d9 --- /dev/null +++ b/src/corelib/codecs/qiconvcodec.cpp @@ -0,0 +1,536 @@ +/**************************************************************************** +** +** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies). +** Contact: Qt Software Information (qt-info@nokia.com) +** +** This file is part of the QtCore module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL$ +** No Commercial Usage +** This file contains pre-release code and may not be distributed. +** You may use this file in accordance with the terms and conditions +** contained in the either Technology Preview License Agreement or the +** Beta Release License Agreement. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 2.1 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPL included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 2.1 requirements +** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. +** +** In addition, as a special exception, Nokia gives you certain +** additional rights. These rights are described in the Nokia Qt LGPL +** Exception version 1.0, included in the file LGPL_EXCEPTION.txt in this +** package. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU +** General Public License version 3.0 as published by the Free Software +** Foundation and appearing in the file LICENSE.GPL included in the +** packaging of this file. Please review the following information to +** ensure the GNU General Public License version 3.0 requirements will be +** met: http://www.gnu.org/copyleft/gpl.html. +** +** If you are unsure which license is appropriate for your use, please +** contact the sales department at qt-sales@nokia.com. +** $QT_END_LICENSE$ +** +****************************************************************************/ + +#include "qiconvcodec_p.h" +#include "qtextcodec_p.h" +#include <qlibrary.h> +#include <qdebug.h> +#include <qthreadstorage.h> + +#include <errno.h> +#include <locale.h> +#include <stdio.h> +#include <dlfcn.h> + +// unistd.h is needed for the _XOPEN_UNIX macro +#include <unistd.h> +#if defined(_XOPEN_UNIX) && !defined(Q_OS_QNX6) && !defined(Q_OS_OSF) +# include <langinfo.h> +#endif + +#if defined(Q_OS_HPUX) +# define NO_BOM +# define UTF16 "ucs2" +#elif defined(Q_OS_AIX) +# define NO_BOM +# define UTF16 "UCS-2" +#elif defined(Q_OS_MAC) +# define NO_BOM +# if Q_BYTE_ORDER == Q_BIG_ENDIAN +# define UTF16 "UTF-16BE" +# else +# define UTF16 "UTF-16LE" +# endif +#else +# define UTF16 "UTF-16" +#endif + +#if defined(Q_OS_MAC) +#ifndef GNU_LIBICONV +#define GNU_LIBICONV +#endif +typedef iconv_t (*Ptr_iconv_open) (const char*, const char*); +typedef size_t (*Ptr_iconv) (iconv_t, const char **, size_t *, char **, size_t *); +typedef int (*Ptr_iconv_close) (iconv_t); + +static Ptr_iconv_open ptr_iconv_open = 0; +static Ptr_iconv ptr_iconv = 0; +static Ptr_iconv_close ptr_iconv_close = 0; +#endif + +QT_BEGIN_NAMESPACE + +extern bool qt_locale_initialized; + +QIconvCodec::QIconvCodec() + : utf16Codec(0) +{ + utf16Codec = QTextCodec::codecForMib(1015); + Q_ASSERT_X(utf16Codec != 0, + "QIconvCodec::convertToUnicode", + "internal error, UTF-16 codec not found"); + if (!utf16Codec) { + fprintf(stderr, "QIconvCodec::convertToUnicode: internal error, UTF-16 codec not found\n"); + utf16Codec = reinterpret_cast<QTextCodec *>(~0); + } +#if defined(Q_OS_MAC) + if (ptr_iconv_open == 0) { + QLibrary libiconv(QLatin1String("/usr/lib/libiconv")); + libiconv.setLoadHints(QLibrary::ExportExternalSymbolsHint); + + ptr_iconv_open = reinterpret_cast<Ptr_iconv_open>(libiconv.resolve("libiconv_open")); + if (!ptr_iconv_open) + ptr_iconv_open = reinterpret_cast<Ptr_iconv_open>(libiconv.resolve("iconv_open")); + ptr_iconv = reinterpret_cast<Ptr_iconv>(libiconv.resolve("libiconv")); + if (!ptr_iconv) + ptr_iconv = reinterpret_cast<Ptr_iconv>(libiconv.resolve("iconv")); + ptr_iconv_close = reinterpret_cast<Ptr_iconv_close>(libiconv.resolve("libiconv_close")); + if (!ptr_iconv_close) + ptr_iconv_close = reinterpret_cast<Ptr_iconv_close>(libiconv.resolve("iconv_close")); + + Q_ASSERT_X(ptr_iconv_open && ptr_iconv && ptr_iconv_close, + "QIconvCodec::QIconvCodec()", + "internal error, could not resolve the iconv functions"); + +# undef iconv_open +# define iconv_open ptr_iconv_open +# undef iconv +# define iconv ptr_iconv +# undef iconv_close +# define iconv_close ptr_iconv_close + } +#endif +} + +QIconvCodec::~QIconvCodec() +{ +} + +QIconvCodec::IconvState::IconvState(iconv_t x) + : buffer(array), bufferLen(sizeof array), cd(x) +{ +} + +QIconvCodec::IconvState::~IconvState() +{ + if (cd != reinterpret_cast<iconv_t>(-1)) + iconv_close(cd); + if (buffer != array) + delete[] buffer; +} + +void QIconvCodec::IconvState::saveChars(const char *c, int count) +{ + if (count > bufferLen) { + if (buffer != array) + delete[] buffer; + buffer = new char[bufferLen = count]; + } + + memcpy(buffer, c, count); +} + +static void qIconvCodecStateFree(QTextCodec::ConverterState *state) +{ + delete reinterpret_cast<QIconvCodec::IconvState *>(state->d); +} + +Q_GLOBAL_STATIC(QThreadStorage<QIconvCodec::IconvState *>, toUnicodeState) + +QString QIconvCodec::convertToUnicode(const char* chars, int len, ConverterState *convState) const +{ + if (utf16Codec == reinterpret_cast<QTextCodec *>(~0)) + return QString::fromAscii(chars, len); + + int invalidCount = 0; + int remainingCount = 0; + char *remainingBuffer = 0; + IconvState **pstate; + + if (convState) { + // stateful conversion + pstate = reinterpret_cast<IconvState **>(&convState->d); + if (convState->d) { + // restore state + remainingCount = convState->remainingChars; + remainingBuffer = (*pstate)->buffer; + } else { + // first time + convState->flags |= FreeFunction; + QTextCodecUnalignedPointer::encode(convState->state_data, qIconvCodecStateFree); + } + } else { + QThreadStorage<QIconvCodec::IconvState *> *ts = toUnicodeState(); + if (!qt_locale_initialized || !ts) { + // we're running after the Q_GLOBAL_STATIC has been deleted + // or before the QCoreApplication initialization + // bad programmer, no cookie for you + return QString::fromLatin1(chars, len); + } + + // stateless conversion -- use thread-local data + pstate = &toUnicodeState()->localData(); + } + + if (!*pstate) { + // first time, create the state + iconv_t cd = QIconvCodec::createIconv_t(UTF16, 0); + if (cd == reinterpret_cast<iconv_t>(-1)) { + static int reported = 0; + if (!reported++) { + fprintf(stderr, + "QIconvCodec::convertToUnicode: using ASCII for conversion, iconv_open failed\n"); + } + return QString::fromAscii(chars, len); + } + + *pstate = new IconvState(cd); + } + + IconvState *state = *pstate; + size_t inBytesLeft = len; + // best case assumption, each byte is converted into one UTF-16 character, plus 2 bytes for the BOM +#ifdef GNU_LIBICONV + // GNU doesn't disagree with POSIX :/ + const char *inBytes = chars; +#else + char *inBytes = const_cast<char *>(chars); +#endif + + QByteArray in; + if (remainingCount) { + // we have to prepend the remaining bytes from the previous conversion + inBytesLeft += remainingCount; + in.resize(inBytesLeft); + inBytes = in.data(); + + memcpy(in.data(), remainingBuffer, remainingCount); + memcpy(in.data() + remainingCount, chars, len); + + remainingCount = 0; + } + + QByteArray ba; + size_t outBytesLeft = len * 2 + 2; + ba.resize(outBytesLeft); + char *outBytes = ba.data(); + do { + size_t ret = iconv(state->cd, &inBytes, &inBytesLeft, &outBytes, &outBytesLeft); + if (ret == (size_t) -1) { + if (errno == E2BIG) { + int offset = ba.size() - outBytesLeft; + ba.resize(ba.size() * 2); + outBytes = ba.data() + offset; + outBytesLeft = ba.size() - offset; + + continue; + } + + if (errno == EILSEQ) { + // conversion stopped because of an invalid character in the sequence + ++invalidCount; + } else if (errno == EINVAL && convState) { + // conversion stopped because the remaining inBytesLeft make up + // an incomplete multi-byte sequence; save them for later + state->saveChars(inBytes, inBytesLeft); + remainingCount = inBytesLeft; + break; + } + + if (errno == EILSEQ || errno == EINVAL) { + // skip the next character + ++inBytes; + --inBytesLeft; + continue; + } + + // some other error + // note, cannot use qWarning() since we are implementing the codecForLocale :) + perror("QIconvCodec::convertToUnicode: using ASCII for conversion, iconv failed"); + + if (!convState) { + // reset state + iconv(state->cd, 0, &inBytesLeft, 0, &outBytesLeft); + } + + return QString::fromAscii(chars, len); + } + } while (inBytesLeft != 0); + + QString s = utf16Codec->toUnicode(ba.constData(), ba.size() - outBytesLeft); + + if (convState) { + convState->invalidChars = invalidCount; + convState->remainingChars = remainingCount; + } else { + // reset state + iconv(state->cd, 0, &inBytesLeft, 0, &outBytesLeft); + } + + return s; +} + +Q_GLOBAL_STATIC(QThreadStorage<QIconvCodec::IconvState *>, fromUnicodeState) + +QByteArray QIconvCodec::convertFromUnicode(const QChar *uc, int len, ConverterState *convState) const +{ + char *inBytes; + char *outBytes; + size_t inBytesLeft; + +#if defined(GNU_LIBICONV) + const char **inBytesPtr = const_cast<const char **>(&inBytes); +#else + char **inBytesPtr = &inBytes; +#endif + + QThreadStorage<QIconvCodec::IconvState *> *ts = fromUnicodeState(); + if (!qt_locale_initialized || !ts) { + // we're running after the Q_GLOBAL_STATIC has been deleted + // or before the QCoreApplication initialization + // bad programmer, no cookie for you + if (!len) + // this is a special case - zero-sized string should be + // translated to empty but not-null QByteArray. + return QByteArray(""); + return QString::fromRawData(uc, len).toLatin1(); + } + IconvState *&state = ts->localData(); + if (!state) { + state = new IconvState(QIconvCodec::createIconv_t(0, UTF16)); + if (state->cd != reinterpret_cast<iconv_t>(-1)) { + size_t outBytesLeft = len + 3; // +3 for the BOM + QByteArray ba; + ba.resize(outBytesLeft); + outBytes = ba.data(); + +#if !defined(NO_BOM) + // give iconv() a BOM + QChar bom[] = { QChar(QChar::ByteOrderMark) }; + inBytes = reinterpret_cast<char *>(bom); + inBytesLeft = sizeof(bom); + if (iconv(state->cd, inBytesPtr, &inBytesLeft, &outBytes, &outBytesLeft) == (size_t) -1) { + perror("QIconvCodec::convertFromUnicode: using ASCII for conversion, iconv failed for BOM"); + + iconv_close(state->cd); + state->cd = reinterpret_cast<iconv_t>(-1); + + return QString(uc, len).toAscii(); + } +#endif // NO_BOM + } + } + if (state->cd == reinterpret_cast<iconv_t>(-1)) { + static int reported = 0; + if (!reported++) { + fprintf(stderr, + "QIconvCodec::convertFromUnicode: using ASCII for conversion, iconv_open failed\n"); + } + return QString(uc, len).toAscii(); + } + + size_t outBytesLeft = len; + QByteArray ba; + ba.resize(outBytesLeft); + outBytes = ba.data(); + + // now feed iconv() the real data + inBytes = const_cast<char *>(reinterpret_cast<const char *>(uc)); + inBytesLeft = len * sizeof(QChar); + + QByteArray in; + if (convState && convState->remainingChars) { + // we have one surrogate char to be prepended + in.resize(sizeof(QChar) + len); + inBytes = in.data(); + + QChar remaining = convState->state_data[0]; + memcpy(in.data(), &remaining, sizeof(QChar)); + memcpy(in.data() + sizeof(QChar), uc, inBytesLeft); + + inBytesLeft += sizeof(QChar); + convState->remainingChars = 0; + } + + int invalidCount = 0; + do { + if (iconv(state->cd, inBytesPtr, &inBytesLeft, &outBytes, &outBytesLeft) == (size_t) -1) { + if (errno == EINVAL && convState) { + // buffer ends in a surrogate + Q_ASSERT(inBytesLeft == 2); + convState->remainingChars = 1; + convState->state_data[0] = uc[len - 1].unicode(); + break; + } + + switch (errno) { + case EILSEQ: + ++invalidCount; + // fall through + case EINVAL: + { + inBytes += sizeof(QChar); + inBytesLeft -= sizeof(QChar); + break; + } + case E2BIG: + { + int offset = ba.size() - outBytesLeft; + ba.resize(ba.size() * 2); + outBytes = ba.data() + offset; + outBytesLeft = ba.size() - offset; + break; + } + default: + { + // note, cannot use qWarning() since we are implementing the codecForLocale :) + perror("QIconvCodec::convertFromUnicode: using ASCII for conversion, iconv failed"); + + // reset to initial state + iconv(state->cd, 0, &inBytesLeft, 0, &outBytesLeft); + + return QString(uc, len).toAscii(); + } + } + } + } while (inBytesLeft != 0); + + // reset to initial state + iconv(state->cd, 0, &inBytesLeft, 0, &outBytesLeft); + + ba.resize(ba.size() - outBytesLeft); + + if (convState) + convState->invalidChars = invalidCount; + + return ba; +} + +QByteArray QIconvCodec::name() const +{ + return "System"; +} + +int QIconvCodec::mibEnum() const +{ + return 0; +} + +iconv_t QIconvCodec::createIconv_t(const char *to, const char *from) +{ + Q_ASSERT((to == 0 && from != 0) || (to != 0 && from == 0)); + + iconv_t cd = (iconv_t) -1; +#if defined(__GLIBC__) || defined(GNU_LIBICONV) + // both GLIBC and libgnuiconv will use the locale's encoding if from or to is an empty string + static const char empty_codeset[] = ""; + const char *codeset = empty_codeset; + cd = iconv_open(to ? to : codeset, from ? from : codeset); +#else + char *codeset = 0; +#endif + +#if defined(_XOPEN_UNIX) && !defined(Q_OS_QNX6) && !defined(Q_OS_OSF) + if (cd == (iconv_t) -1) { + codeset = nl_langinfo(CODESET); + if (codeset) + cd = iconv_open(to ? to : codeset, from ? from : codeset); + } +#endif + + if (cd == (iconv_t) -1) { + // Very poorly defined and followed standards causes lots of + // code to try to get all the cases... This logic is + // duplicated in QTextCodec, so if you change it here, change + // it there too. + + // Try to determine locale codeset from locale name assigned to + // LC_CTYPE category. + + // First part is getting that locale name. First try setlocale() which + // definitely knows it, but since we cannot fully trust it, get ready + // to fall back to environment variables. + char * ctype = qstrdup(setlocale(LC_CTYPE, 0)); + + // Get the first nonempty value from $LC_ALL, $LC_CTYPE, and $LANG + // environment variables. + char * lang = qstrdup(qgetenv("LC_ALL").constData()); + if (!lang || lang[0] == 0 || strcmp(lang, "C") == 0) { + if (lang) delete [] lang; + lang = qstrdup(qgetenv("LC_CTYPE").constData()); + } + if (!lang || lang[0] == 0 || strcmp(lang, "C") == 0) { + if (lang) delete [] lang; + lang = qstrdup(qgetenv("LANG").constData()); + } + + // Now try these in order: + // 1. CODESET from ctype if it contains a .CODESET part (e.g. en_US.ISO8859-15) + // 2. CODESET from lang if it contains a .CODESET part + // 3. ctype (maybe the locale is named "ISO-8859-1" or something) + // 4. locale (ditto) + // 5. check for "@euro" + + // 1. CODESET from ctype if it contains a .CODESET part (e.g. en_US.ISO8859-15) + codeset = ctype ? strchr(ctype, '.') : 0; + if (codeset && *codeset == '.') { + ++codeset; + cd = iconv_open(to ? to : codeset, from ? from : codeset); + } + + // 2. CODESET from lang if it contains a .CODESET part + codeset = lang ? strchr(lang, '.') : 0; + if (cd == (iconv_t) -1 && codeset && *codeset == '.') { + ++codeset; + cd = iconv_open(to ? to : codeset, from ? from : codeset); + } + + // 3. ctype (maybe the locale is named "ISO-8859-1" or something) + if (cd == (iconv_t) -1 && ctype && *ctype != 0 && strcmp (ctype, "C") != 0) + cd = iconv_open(to ? to : ctype, from ? from : ctype); + + + // 4. locale (ditto) + if (cd == (iconv_t) -1 && lang && *lang != 0) + cd = iconv_open(to ? to : lang, from ? from : lang); + + // 5. "@euro" + if ((cd == (iconv_t) -1 && ctype && strstr(ctype, "@euro")) || (lang && strstr(lang, "@euro"))) + cd = iconv_open(to ? to : "ISO8859-15", from ? from : "ISO8859-15"); + + delete [] ctype; + delete [] lang; + } + + return cd; +} + +QT_END_NAMESPACE diff --git a/src/corelib/codecs/qiconvcodec_p.h b/src/corelib/codecs/qiconvcodec_p.h new file mode 100644 index 0000000..839bee7 --- /dev/null +++ b/src/corelib/codecs/qiconvcodec_p.h @@ -0,0 +1,104 @@ +/**************************************************************************** +** +** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies). +** Contact: Qt Software Information (qt-info@nokia.com) +** +** This file is part of the QtCore module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL$ +** No Commercial Usage +** This file contains pre-release code and may not be distributed. +** You may use this file in accordance with the terms and conditions +** contained in the either Technology Preview License Agreement or the +** Beta Release License Agreement. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 2.1 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPL included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 2.1 requirements +** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. +** +** In addition, as a special exception, Nokia gives you certain +** additional rights. These rights are described in the Nokia Qt LGPL +** Exception version 1.0, included in the file LGPL_EXCEPTION.txt in this +** package. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU +** General Public License version 3.0 as published by the Free Software +** Foundation and appearing in the file LICENSE.GPL included in the +** packaging of this file. Please review the following information to +** ensure the GNU General Public License version 3.0 requirements will be +** met: http://www.gnu.org/copyleft/gpl.html. +** +** If you are unsure which license is appropriate for your use, please +** contact the sales department at qt-sales@nokia.com. +** $QT_END_LICENSE$ +** +****************************************************************************/ + +#ifndef QICONVCODEC_P_H +#define QICONVCODEC_P_H + +// +// W A R N I N G +// ------------- +// +// This file is not part of the Qt API. It exists for the convenience +// of the QLibrary class. This header file may change from +// version to version without notice, or even be removed. +// +// We mean it. +// + +#include "qtextcodec.h" + +#if defined(Q_OS_UNIX) && !defined(QT_NO_ICONV) && !defined(QT_BOOTSTRAPPED) + +#ifdef Q_OS_MAC +typedef void * iconv_t; +#else +#include <iconv.h> +#endif + +QT_BEGIN_NAMESPACE + +class QIconvCodec: public QTextCodec +{ +private: + mutable QTextCodec *utf16Codec; + +public: + QIconvCodec(); + ~QIconvCodec(); + + QString convertToUnicode(const char *, int, ConverterState *) const; + QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const; + + QByteArray name() const; + int mibEnum() const; + + static iconv_t createIconv_t(const char *to, const char *from); + + class IconvState + { + public: + IconvState(iconv_t x); + ~IconvState(); + char *buffer; + int bufferLen; + iconv_t cd; + + char array[8]; + + void saveChars(const char *c, int count); + }; +}; + +QT_END_NAMESPACE + +#endif // Q_OS_UNIX && !QT_NO_ICONV && !QT_BOOTSTRAPPED + +#endif // QICONVCODEC_P_H diff --git a/src/corelib/codecs/qisciicodec.cpp b/src/corelib/codecs/qisciicodec.cpp new file mode 100644 index 0000000..dd2bc8d --- /dev/null +++ b/src/corelib/codecs/qisciicodec.cpp @@ -0,0 +1,288 @@ +/**************************************************************************** +** +** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies). +** Contact: Qt Software Information (qt-info@nokia.com) +** +** This file is part of the QtCore module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL$ +** No Commercial Usage +** This file contains pre-release code and may not be distributed. +** You may use this file in accordance with the terms and conditions +** contained in the either Technology Preview License Agreement or the +** Beta Release License Agreement. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 2.1 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPL included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 2.1 requirements +** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. +** +** In addition, as a special exception, Nokia gives you certain +** additional rights. These rights are described in the Nokia Qt LGPL +** Exception version 1.0, included in the file LGPL_EXCEPTION.txt in this +** package. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU +** General Public License version 3.0 as published by the Free Software +** Foundation and appearing in the file LICENSE.GPL included in the +** packaging of this file. Please review the following information to +** ensure the GNU General Public License version 3.0 requirements will be +** met: http://www.gnu.org/copyleft/gpl.html. +** +** If you are unsure which license is appropriate for your use, please +** contact the sales department at qt-sales@nokia.com. +** $QT_END_LICENSE$ +** +****************************************************************************/ + +#include "qisciicodec_p.h" +#include "qlist.h" + +#ifndef QT_NO_CODECS + +QT_BEGIN_NAMESPACE + +/*! + \class QIsciiCodec + \brief The QIsciiCodec class provides conversion to and from the ISCII encoding. + + \internal +*/ + + +struct Codecs { + const char *name; + ushort base; +}; + +static const Codecs codecs [] = { + { "Iscii-Dev", 0x900 }, + { "Iscii-Bng", 0x980 }, + { "Iscii-Pnj", 0xa00 }, + { "Iscii-Gjr", 0xa80 }, + { "Iscii-Ori", 0xb00 }, + { "Iscii-Tml", 0xb80 }, + { "Iscii-Tlg", 0xc00 }, + { "Iscii-Knd", 0xc80 }, + { "Iscii-Mlm", 0xd00 } +}; + +QIsciiCodec::~QIsciiCodec() +{ +} + +QByteArray QIsciiCodec::name() const +{ + return codecs[idx].name; +} + +int QIsciiCodec::mibEnum() const +{ + /* There is no MIBEnum for Iscii */ + return -3000-idx; +} + +static const uchar inv = 0xFF; + +/* iscii range from 0xa0 - 0xff */ +static const uchar iscii_to_uni_table[0x60] = { + 0x00, 0x01, 0x02, 0x03, + 0x05, 0x06, 0x07, 0x08, + 0x09, 0x0a, 0x0b, 0x0e, + 0x0f, 0x20, 0x0d, 0x12, + + 0x13, 0x14, 0x11, 0x15, + 0x16, 0x17, 0x18, 0x19, + 0x1a, 0x1b, 0x1c, 0x1d, + 0x1e, 0x1f, 0x20, 0x21, + + 0x22, 0x23, 0x24, 0x25, + 0x26, 0x27, 0x28, 0x29, + 0x2a, 0x2b, 0x2c, 0x2d, + 0x2e, 0x2f, 0x5f, 0x30, + + 0x31, 0x32, 0x33, 0x34, + 0x35, 0x36, 0x37, 0x38, + 0x39, inv, 0x3e, 0x3f, + 0x40, 0x41, 0x42, 0x43, + + 0x46, 0x47, 0x48, 0x45, + 0x4a, 0x4b, 0x4c, 0x49, + 0x4d, 0x3c, 0x64, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x00, 0x66, 0x67, 0x68, + 0x69, 0x6a, 0x6b, 0x6c, + 0x6d, 0x6e, 0x6f, 0x00, + 0x00, 0x00, 0x00, 0x00 +}; + +static const uchar uni_to_iscii_table[0x80] = { + 0x00, 0xa1, 0xa2, 0xa3, + 0x00, 0xa4, 0xa5, 0xa6, + 0xa7, 0xa8, 0xa9, 0xaa, + 0x00, 0xae, 0xab, 0xac, + + 0xad, 0xb2, 0xaf, 0xb0, + 0xb1, 0xb3, 0xb4, 0xb5, + 0xb6, 0xb7, 0xb8, 0xb9, + 0xba, 0xbb, 0xbc, 0xbd, + + 0xbe, 0xbf, 0xc0, 0xc1, + 0xc2, 0xc3, 0xc4, 0xc5, + 0xc6, 0xc7, 0xc8, 0xc9, + 0xca, 0xcb, 0xcc, 0xcd, + + 0xcf, 0xd0, 0xd1, 0xd2, + 0xd3, 0xd4, 0xd5, 0xd6, + 0xd7, 0xd8, 0x00, 0x00, + 0xe9, 0x00, 0xda, 0xdb, + + 0xdc, 0xdd, 0xde, 0xdf, + 0x00, 0xe3, 0xe0, 0xe1, + 0xe2, 0xe7, 0xe4, 0xe5, + 0xe6, 0xe8, 0x00, 0x00, + + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x01, 0x02, 0x03, 0x04, // decomposable into the uc codes listed here + nukta + 0x05, 0x06, 0x07, 0xce, + + 0x00, 0x00, 0x00, 0x00, + 0xea, 0x08, 0xf1, 0xf2, + 0xf3, 0xf4, 0xf5, 0xf6, + 0xf7, 0xf8, 0xf9, 0xfa, + + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00 +}; + +static const uchar uni_to_iscii_pairs[] = { + 0x00, 0x00, + 0x15, 0x3c, // 0x958 + 0x16, 0x3c, // 0x959 + 0x17, 0x3c, // 0x95a + 0x1c, 0x3c, // 0x95b + 0x21, 0x3c, // 0x95c + 0x22, 0x3c, // 0x95d + 0x2b, 0x3c, // 0x95e + 0x64, 0x64 // 0x965 +}; + + +QByteArray QIsciiCodec::convertFromUnicode(const QChar *uc, int len, ConverterState *state) const +{ + char replacement = '?'; + bool halant = false; + if (state) { + if (state->flags & ConvertInvalidToNull) + replacement = 0; + halant = state->state_data[0]; + } + int invalid = 0; + + QByteArray result; + result.resize(2*len); //worst case + + uchar *ch = reinterpret_cast<uchar *>(result.data()); + + const int base = codecs[idx].base; + + for (int i =0; i < len; ++i) { + const ushort codePoint = uc[i].unicode(); + + /* The low 7 bits of ISCII is plain ASCII. However, we go all the + * way up to 0xA0 such that we can roundtrip with convertToUnicode()'s + * behavior. */ + if(codePoint < 0xA0) { + *ch++ = static_cast<uchar>(codePoint); + continue; + } + + const int pos = codePoint - base; + if (pos > 0 && pos < 0x80) { + uchar iscii = uni_to_iscii_table[pos]; + if (iscii > 0x80) { + *ch++ = iscii; + } else if (iscii) { + const uchar *pair = uni_to_iscii_pairs + 2*iscii; + *ch++ = *pair++; + *ch++ = *pair++; + } else { + *ch++ = replacement; + ++invalid; + } + } else { + if (uc[i].unicode() == 0x200c) { // ZWNJ + if (halant) + // Consonant Halant ZWNJ -> Consonant Halant Halant + *ch++ = 0xe8; + } else if (uc[i].unicode() == 0x200d) { // ZWJ + if (halant) + // Consonant Halant ZWJ -> Consonant Halant Nukta + *ch++ = 0xe9; + } else { + *ch++ = replacement; + ++invalid; + } + } + halant = (pos == 0x4d); + } + result.truncate(ch - (uchar *)result.data()); + + if (state) { + state->invalidChars += invalid; + state->state_data[0] = halant; + } + return result; +} + +QString QIsciiCodec::convertToUnicode(const char* chars, int len, ConverterState *state) const +{ + bool halant = false; + if (state) { + halant = state->state_data[0]; + } + + QString result; + result.resize(len); + QChar *uc = result.data(); + + const int base = codecs[idx].base; + + for (int i = 0; i < len; ++i) { + ushort ch = (uchar) chars[i]; + if (ch < 0xa0) + *uc++ = ch; + else { + ushort c = iscii_to_uni_table[ch - 0xa0]; + if (halant && (c == inv || c == 0xe9)) { + // Consonant Halant inv -> Consonant Halant ZWJ + // Consonant Halant Nukta -> Consonant Halant ZWJ + *uc++ = QChar(0x200d); + } else if (halant && c == 0xe8) { + // Consonant Halant Halant -> Consonant Halant ZWNJ + *uc++ = QChar(0x200c); + } else { + *uc++ = QChar(c+base); + } + } + halant = ((uchar)chars[i] == 0xe8); + } + result.resize(uc - result.unicode()); + + if (state) { + state->state_data[0] = halant; + } + return result; +} + +QT_END_NAMESPACE + +#endif // QT_NO_CODECS diff --git a/src/corelib/codecs/qisciicodec_p.h b/src/corelib/codecs/qisciicodec_p.h new file mode 100644 index 0000000..0477ca6 --- /dev/null +++ b/src/corelib/codecs/qisciicodec_p.h @@ -0,0 +1,81 @@ +/**************************************************************************** +** +** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies). +** Contact: Qt Software Information (qt-info@nokia.com) +** +** This file is part of the QtCore module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL$ +** No Commercial Usage +** This file contains pre-release code and may not be distributed. +** You may use this file in accordance with the terms and conditions +** contained in the either Technology Preview License Agreement or the +** Beta Release License Agreement. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 2.1 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPL included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 2.1 requirements +** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. +** +** In addition, as a special exception, Nokia gives you certain +** additional rights. These rights are described in the Nokia Qt LGPL +** Exception version 1.0, included in the file LGPL_EXCEPTION.txt in this +** package. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU +** General Public License version 3.0 as published by the Free Software +** Foundation and appearing in the file LICENSE.GPL included in the +** packaging of this file. Please review the following information to +** ensure the GNU General Public License version 3.0 requirements will be +** met: http://www.gnu.org/copyleft/gpl.html. +** +** If you are unsure which license is appropriate for your use, please +** contact the sales department at qt-sales@nokia.com. +** $QT_END_LICENSE$ +** +****************************************************************************/ + +#ifndef QISCIICODEC_P_H +#define QISCIICODEC_P_H + +// +// W A R N I N G +// ------------- +// +// This file is not part of the Qt API. It exists purely as an +// implementation detail. This header file may change from version to +// version without notice, or even be removed. +// +// We mean it. +// + +#include "QtCore/qtextcodec.h" + +QT_BEGIN_NAMESPACE + +#ifndef QT_NO_CODECS + +class QIsciiCodec : public QTextCodec { +public: + explicit QIsciiCodec(int i) : idx(i) {} + ~QIsciiCodec(); + + QByteArray name() const; + int mibEnum() const; + + QString convertToUnicode(const char *, int, ConverterState *) const; + QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const; + +private: + int idx; +}; + +#endif // QT_NO_CODECS + +QT_END_NAMESPACE + +#endif // QISCIICODEC_P_H diff --git a/src/corelib/codecs/qlatincodec.cpp b/src/corelib/codecs/qlatincodec.cpp new file mode 100644 index 0000000..aae436e --- /dev/null +++ b/src/corelib/codecs/qlatincodec.cpp @@ -0,0 +1,246 @@ +/**************************************************************************** +** +** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies). +** Contact: Qt Software Information (qt-info@nokia.com) +** +** This file is part of the QtCore module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL$ +** No Commercial Usage +** This file contains pre-release code and may not be distributed. +** You may use this file in accordance with the terms and conditions +** contained in the either Technology Preview License Agreement or the +** Beta Release License Agreement. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 2.1 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPL included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 2.1 requirements +** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. +** +** In addition, as a special exception, Nokia gives you certain +** additional rights. These rights are described in the Nokia Qt LGPL +** Exception version 1.0, included in the file LGPL_EXCEPTION.txt in this +** package. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU +** General Public License version 3.0 as published by the Free Software +** Foundation and appearing in the file LICENSE.GPL included in the +** packaging of this file. Please review the following information to +** ensure the GNU General Public License version 3.0 requirements will be +** met: http://www.gnu.org/copyleft/gpl.html. +** +** If you are unsure which license is appropriate for your use, please +** contact the sales department at qt-sales@nokia.com. +** $QT_END_LICENSE$ +** +****************************************************************************/ + +#include "qlatincodec_p.h" +#include "qlist.h" + +#ifndef QT_NO_TEXTCODEC + +QT_BEGIN_NAMESPACE + +QLatin1Codec::~QLatin1Codec() +{ +} + +QString QLatin1Codec::convertToUnicode(const char *chars, int len, ConverterState *) const +{ + if (chars == 0) + return QString(); + + return QString::fromLatin1(chars, len); +} + + +QByteArray QLatin1Codec::convertFromUnicode(const QChar *ch, int len, ConverterState *state) const +{ + const char replacement = (state && state->flags & ConvertInvalidToNull) ? 0 : '?'; + QByteArray r; + r.resize(len); + char *d = r.data(); + int invalid = 0; + for (int i = 0; i < len; ++i) { + if (ch[i] > 0xff) { + d[i] = replacement; + ++invalid; + } else { + d[i] = (char)ch[i].cell(); + } + } + if (state) { + state->invalidChars += invalid; + } + return r; +} + +QByteArray QLatin1Codec::name() const +{ + return "ISO-8859-1"; +} + +QList<QByteArray> QLatin1Codec::aliases() const +{ + QList<QByteArray> list; + list << "latin1" + << "CP819" + << "IBM819" + << "iso-ir-100" + << "csISOLatin1"; + return list; +} + + +int QLatin1Codec::mibEnum() const +{ + return 4; +} + + +QLatin15Codec::~QLatin15Codec() +{ +} + +QString QLatin15Codec::convertToUnicode(const char* chars, int len, ConverterState *) const +{ + if (chars == 0) + return QString(); + + QString str = QString::fromLatin1(chars, len); + QChar *uc = str.data(); + while(len--) { + switch(uc->unicode()) { + case 0xa4: + *uc = 0x20ac; + break; + case 0xa6: + *uc = 0x0160; + break; + case 0xa8: + *uc = 0x0161; + break; + case 0xb4: + *uc = 0x017d; + break; + case 0xb8: + *uc = 0x017e; + break; + case 0xbc: + *uc = 0x0152; + break; + case 0xbd: + *uc = 0x0153; + break; + case 0xbe: + *uc = 0x0178; + break; + default: + break; + } + uc++; + } + return str; +} + +QByteArray QLatin15Codec::convertFromUnicode(const QChar *in, int length, ConverterState *state) const +{ + const char replacement = (state && state->flags & ConvertInvalidToNull) ? 0 : '?'; + QByteArray r; + r.resize(length); + char *d = r.data(); + int invalid = 0; + for (int i = 0; i < length; ++i) { + uchar c; + ushort uc = in[i].unicode(); + if (uc < 0x0100) { + if (uc > 0xa3) { + switch(uc) { + case 0xa4: + case 0xa6: + case 0xa8: + case 0xb4: + case 0xb8: + case 0xbc: + case 0xbd: + case 0xbe: + c = replacement; + ++invalid; + break; + default: + c = (unsigned char) uc; + break; + } + } else { + c = (unsigned char) uc; + } + } else { + if (uc == 0x20ac) + c = 0xa4; + else if ((uc & 0xff00) == 0x0100) { + switch(uc) { + case 0x0160: + c = 0xa6; + break; + case 0x0161: + c = 0xa8; + break; + case 0x017d: + c = 0xb4; + break; + case 0x017e: + c = 0xb8; + break; + case 0x0152: + c = 0xbc; + break; + case 0x0153: + c = 0xbd; + break; + case 0x0178: + c = 0xbe; + break; + default: + c = replacement; + ++invalid; + } + } else { + c = replacement; + ++invalid; + } + } + d[i] = (char)c; + } + if (state) { + state->remainingChars = 0; + state->invalidChars += invalid; + } + return r; +} + + +QByteArray QLatin15Codec::name() const +{ + return "ISO-8859-15"; +} + +QList<QByteArray> QLatin15Codec::aliases() const +{ + QList<QByteArray> list; + list << "latin9"; + return list; +} + +int QLatin15Codec::mibEnum() const +{ + return 111; +} + +QT_END_NAMESPACE + +#endif // QT_NO_TEXTCODEC diff --git a/src/corelib/codecs/qlatincodec_p.h b/src/corelib/codecs/qlatincodec_p.h new file mode 100644 index 0000000..676647a --- /dev/null +++ b/src/corelib/codecs/qlatincodec_p.h @@ -0,0 +1,94 @@ +/**************************************************************************** +** +** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies). +** Contact: Qt Software Information (qt-info@nokia.com) +** +** This file is part of the QtCore module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL$ +** No Commercial Usage +** This file contains pre-release code and may not be distributed. +** You may use this file in accordance with the terms and conditions +** contained in the either Technology Preview License Agreement or the +** Beta Release License Agreement. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 2.1 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPL included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 2.1 requirements +** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. +** +** In addition, as a special exception, Nokia gives you certain +** additional rights. These rights are described in the Nokia Qt LGPL +** Exception version 1.0, included in the file LGPL_EXCEPTION.txt in this +** package. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU +** General Public License version 3.0 as published by the Free Software +** Foundation and appearing in the file LICENSE.GPL included in the +** packaging of this file. Please review the following information to +** ensure the GNU General Public License version 3.0 requirements will be +** met: http://www.gnu.org/copyleft/gpl.html. +** +** If you are unsure which license is appropriate for your use, please +** contact the sales department at qt-sales@nokia.com. +** $QT_END_LICENSE$ +** +****************************************************************************/ + +#ifndef QLATINCODEC_P_H +#define QLATINCODEC_P_H + +// +// W A R N I N G +// ------------- +// +// This file is not part of the Qt API. It exists purely as an +// implementation detail. This header file may change from version to +// version without notice, or even be removed. +// +// We mean it. +// + +#include "QtCore/qtextcodec.h" + +QT_BEGIN_NAMESPACE + +#ifndef QT_NO_TEXTCODEC + +class QLatin1Codec : public QTextCodec +{ +public: + ~QLatin1Codec(); + + QString convertToUnicode(const char *, int, ConverterState *) const; + QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const; + + QByteArray name() const; + QList<QByteArray> aliases() const; + int mibEnum() const; +}; + + + +class QLatin15Codec: public QTextCodec +{ +public: + ~QLatin15Codec(); + + QString convertToUnicode(const char *, int, ConverterState *) const; + QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const; + + QByteArray name() const; + QList<QByteArray> aliases() const; + int mibEnum() const; +}; + +#endif // QT_NO_TEXTCODEC + +QT_END_NAMESPACE + +#endif // QLATINCODEC_P_H diff --git a/src/corelib/codecs/qsimplecodec.cpp b/src/corelib/codecs/qsimplecodec.cpp new file mode 100644 index 0000000..0184a5a --- /dev/null +++ b/src/corelib/codecs/qsimplecodec.cpp @@ -0,0 +1,733 @@ +/**************************************************************************** +** +** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies). +** Contact: Qt Software Information (qt-info@nokia.com) +** +** This file is part of the QtCore module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL$ +** No Commercial Usage +** This file contains pre-release code and may not be distributed. +** You may use this file in accordance with the terms and conditions +** contained in the either Technology Preview License Agreement or the +** Beta Release License Agreement. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 2.1 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPL included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 2.1 requirements +** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. +** +** In addition, as a special exception, Nokia gives you certain +** additional rights. These rights are described in the Nokia Qt LGPL +** Exception version 1.0, included in the file LGPL_EXCEPTION.txt in this +** package. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU +** General Public License version 3.0 as published by the Free Software +** Foundation and appearing in the file LICENSE.GPL included in the +** packaging of this file. Please review the following information to +** ensure the GNU General Public License version 3.0 requirements will be +** met: http://www.gnu.org/copyleft/gpl.html. +** +** If you are unsure which license is appropriate for your use, please +** contact the sales department at qt-sales@nokia.com. +** $QT_END_LICENSE$ +** +****************************************************************************/ + +#include "qsimplecodec_p.h" +#include "qlist.h" + +#ifndef QT_NO_TEXTCODEC + +QT_BEGIN_NAMESPACE + +#define LAST_MIB 2004 + +static const struct { + const char *mime; + const char *aliases[7]; + int mib; + quint16 values[128]; +} unicodevalues[QSimpleTextCodec::numSimpleCodecs] = { + // from RFC 1489, ftp://ftp.isi.edu/in-notes/rfc1489.txt + { "KOI8-R", { "csKOI8R", 0 }, 2084, + { 0x2500, 0x2502, 0x250C, 0x2510, 0x2514, 0x2518, 0x251C, 0x2524, + 0x252C, 0x2534, 0x253C, 0x2580, 0x2584, 0x2588, 0x258C, 0x2590, + 0x2591, 0x2592, 0x2593, 0x2320, 0x25A0, 0x2219/**/, 0x221A, 0x2248, + 0x2264, 0x2265, 0x00A0, 0x2321, 0x00B0, 0x00B2, 0x00B7, 0x00F7, + 0x2550, 0x2551, 0x2552, 0x0451, 0x2553, 0x2554, 0x2555, 0x2556, + 0x2557, 0x2558, 0x2559, 0x255A, 0x255B, 0x255C, 0x255D, 0x255E, + 0x255F, 0x2560, 0x2561, 0x0401, 0x2562, 0x2563, 0x2564, 0x2565, + 0x2566, 0x2567, 0x2568, 0x2569, 0x256A, 0x256B, 0x256C, 0x00A9, + 0x044E, 0x0430, 0x0431, 0x0446, 0x0434, 0x0435, 0x0444, 0x0433, + 0x0445, 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, + 0x043F, 0x044F, 0x0440, 0x0441, 0x0442, 0x0443, 0x0436, 0x0432, + 0x044C, 0x044B, 0x0437, 0x0448, 0x044D, 0x0449, 0x0447, 0x044A, + 0x042E, 0x0410, 0x0411, 0x0426, 0x0414, 0x0415, 0x0424, 0x0413, + 0x0425, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, + 0x041F, 0x042F, 0x0420, 0x0421, 0x0422, 0x0423, 0x0416, 0x0412, + 0x042C, 0x042B, 0x0417, 0x0428, 0x042D, 0x0429, 0x0427, 0x042A } }, + // /**/ - The BULLET OPERATOR is confused. Some people think + // it should be 0x2022 (BULLET). + + // from RFC 2319, ftp://ftp.isi.edu/in-notes/rfc2319.txt + { "KOI8-U", { "KOI8-RU", 0 }, 2088, + { 0x2500, 0x2502, 0x250C, 0x2510, 0x2514, 0x2518, 0x251C, 0x2524, + 0x252C, 0x2534, 0x253C, 0x2580, 0x2584, 0x2588, 0x258C, 0x2590, + 0x2591, 0x2592, 0x2593, 0x2320, 0x25A0, 0x2219, 0x221A, 0x2248, + 0x2264, 0x2265, 0x00A0, 0x2321, 0x00B0, 0x00B2, 0x00B7, 0x00F7, + 0x2550, 0x2551, 0x2552, 0x0451, 0x0454, 0x2554, 0x0456, 0x0457, + 0x2557, 0x2558, 0x2559, 0x255A, 0x255B, 0x0491, 0x255D, 0x255E, + 0x255F, 0x2560, 0x2561, 0x0401, 0x0404, 0x2563, 0x0406, 0x0407, + 0x2566, 0x2567, 0x2568, 0x2569, 0x256A, 0x0490, 0x256C, 0x00A9, + 0x044E, 0x0430, 0x0431, 0x0446, 0x0434, 0x0435, 0x0444, 0x0433, + 0x0445, 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, + 0x043F, 0x044F, 0x0440, 0x0441, 0x0442, 0x0443, 0x0436, 0x0432, + 0x044C, 0x044B, 0x0437, 0x0448, 0x044D, 0x0449, 0x0447, 0x044A, + 0x042E, 0x0410, 0x0411, 0x0426, 0x0414, 0x0415, 0x0424, 0x0413, + 0x0425, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, + 0x041F, 0x042F, 0x0420, 0x0421, 0x0422, 0x0423, 0x0416, 0x0412, + 0x042C, 0x042B, 0x0417, 0x0428, 0x042D, 0x0429, 0x0427, 0x042A } }, + + // next bits generated from tables on the Unicode 2.0 CD. we can + // use these tables since this is part of the transition to using + // unicode everywhere in qt. + + // $ for A in 8 9 A B C D E F ; do for B in 0 1 2 3 4 5 6 7 8 9 A B C D E F ; do echo 0x${A}${B} 0xFFFD ; done ; done > /tmp/digits ; for a in 8859-* ; do (awk '/^0x[89ABCDEF]/{ print $1, $2 }' < $a ; cat /tmp/digits) | sort | uniq -w4 | cut -c6- | paste '-d ' - - - - - - - - | sed -e 's/ /, /g' -e 's/$/,/' -e '$ s/,$/} },/' -e '1 s/^/{ /' > ~/tmp/$a ; done + + // then I inserted the files manually. + { "ISO-8859-2", {"latin2", "iso-ir-101", "csISOLatin2", 0 }, 5, + { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, + 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, + 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, + 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, + 0x00A0, 0x0104, 0x02D8, 0x0141, 0x00A4, 0x013D, 0x015A, 0x00A7, + 0x00A8, 0x0160, 0x015E, 0x0164, 0x0179, 0x00AD, 0x017D, 0x017B, + 0x00B0, 0x0105, 0x02DB, 0x0142, 0x00B4, 0x013E, 0x015B, 0x02C7, + 0x00B8, 0x0161, 0x015F, 0x0165, 0x017A, 0x02DD, 0x017E, 0x017C, + 0x0154, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0139, 0x0106, 0x00C7, + 0x010C, 0x00C9, 0x0118, 0x00CB, 0x011A, 0x00CD, 0x00CE, 0x010E, + 0x0110, 0x0143, 0x0147, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x00D7, + 0x0158, 0x016E, 0x00DA, 0x0170, 0x00DC, 0x00DD, 0x0162, 0x00DF, + 0x0155, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x013A, 0x0107, 0x00E7, + 0x010D, 0x00E9, 0x0119, 0x00EB, 0x011B, 0x00ED, 0x00EE, 0x010F, + 0x0111, 0x0144, 0x0148, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x00F7, + 0x0159, 0x016F, 0x00FA, 0x0171, 0x00FC, 0x00FD, 0x0163, 0x02D9} }, + { "ISO-8859-3", { "latin3", "iso-ir-109", "csISOLatin3", 0 }, 6, + { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, + 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, + 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, + 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, + 0x00A0, 0x0126, 0x02D8, 0x00A3, 0x00A4, 0xFFFD, 0x0124, 0x00A7, + 0x00A8, 0x0130, 0x015E, 0x011E, 0x0134, 0x00AD, 0xFFFD, 0x017B, + 0x00B0, 0x0127, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x0125, 0x00B7, + 0x00B8, 0x0131, 0x015F, 0x011F, 0x0135, 0x00BD, 0xFFFD, 0x017C, + 0x00C0, 0x00C1, 0x00C2, 0xFFFD, 0x00C4, 0x010A, 0x0108, 0x00C7, + 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF, + 0xFFFD, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x0120, 0x00D6, 0x00D7, + 0x011C, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x016C, 0x015C, 0x00DF, + 0x00E0, 0x00E1, 0x00E2, 0xFFFD, 0x00E4, 0x010B, 0x0109, 0x00E7, + 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF, + 0xFFFD, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x0121, 0x00F6, 0x00F7, + 0x011D, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x016D, 0x015D, 0x02D9} }, + { "ISO-8859-4", { "latin4", "iso-ir-110", "csISOLatin4", 0 }, 7, + { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, + 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, + 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, + 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, + 0x00A0, 0x0104, 0x0138, 0x0156, 0x00A4, 0x0128, 0x013B, 0x00A7, + 0x00A8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00AD, 0x017D, 0x00AF, + 0x00B0, 0x0105, 0x02DB, 0x0157, 0x00B4, 0x0129, 0x013C, 0x02C7, + 0x00B8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014A, 0x017E, 0x014B, + 0x0100, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x012E, + 0x010C, 0x00C9, 0x0118, 0x00CB, 0x0116, 0x00CD, 0x00CE, 0x012A, + 0x0110, 0x0145, 0x014C, 0x0136, 0x00D4, 0x00D5, 0x00D6, 0x00D7, + 0x00D8, 0x0172, 0x00DA, 0x00DB, 0x00DC, 0x0168, 0x016A, 0x00DF, + 0x0101, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x012F, + 0x010D, 0x00E9, 0x0119, 0x00EB, 0x0117, 0x00ED, 0x00EE, 0x012B, + 0x0111, 0x0146, 0x014D, 0x0137, 0x00F4, 0x00F5, 0x00F6, 0x00F7, + 0x00F8, 0x0173, 0x00FA, 0x00FB, 0x00FC, 0x0169, 0x016B, 0x02D9} }, + { "ISO-8859-5", { "cyrillic", "iso-ir-144", "csISOLatinCyrillic", 0 }, 8, + { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, + 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, + 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, + 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, + 0x00A0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407, + 0x0408, 0x0409, 0x040A, 0x040B, 0x040C, 0x00AD, 0x040E, 0x040F, + 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, + 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F, + 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, + 0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F, + 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, + 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F, + 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, + 0x0448, 0x0449, 0x044A, 0x044B, 0x044C, 0x044D, 0x044E, 0x044F, + 0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457, + 0x0458, 0x0459, 0x045A, 0x045B, 0x045C, 0x00A7, 0x045E, 0x045F} }, + { "ISO-8859-6", { "ISO-8859-6-I", "ECMA-114", "ASMO-708", "arabic", "iso-ir-127", "csISOLatinArabic", 0 }, 82, + { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, + 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, + 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, + 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, + 0x00A0, 0xFFFD, 0xFFFD, 0xFFFD, 0x00A4, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x060C, 0x00AD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0x061B, 0xFFFD, 0xFFFD, 0xFFFD, 0x061F, + 0xFFFD, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627, + 0x0628, 0x0629, 0x062A, 0x062B, 0x062C, 0x062D, 0x062E, 0x062F, + 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637, + 0x0638, 0x0639, 0x063A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647, + 0x0648, 0x0649, 0x064A, 0x064B, 0x064C, 0x064D, 0x064E, 0x064F, + 0x0650, 0x0651, 0x0652, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD} }, + { "ISO-8859-7", { "ECMA-118", "greek", "iso-ir-126", "csISOLatinGreek", 0 }, 10, + { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, + 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, + 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, + 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, + 0x00A0, 0x2018, 0x2019, 0x00A3, 0xFFFD, 0xFFFD, 0x00A6, 0x00A7, + 0x00A8, 0x00A9, 0xFFFD, 0x00AB, 0x00AC, 0x00AD, 0xFFFD, 0x2015, + 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x0384, 0x0385, 0x0386, 0x00B7, + 0x0388, 0x0389, 0x038A, 0x00BB, 0x038C, 0x00BD, 0x038E, 0x038F, + 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, + 0x0398, 0x0399, 0x039A, 0x039B, 0x039C, 0x039D, 0x039E, 0x039F, + 0x03A0, 0x03A1, 0xFFFD, 0x03A3, 0x03A4, 0x03A5, 0x03A6, 0x03A7, + 0x03A8, 0x03A9, 0x03AA, 0x03AB, 0x03AC, 0x03AD, 0x03AE, 0x03AF, + 0x03B0, 0x03B1, 0x03B2, 0x03B3, 0x03B4, 0x03B5, 0x03B6, 0x03B7, + 0x03B8, 0x03B9, 0x03BA, 0x03BB, 0x03BC, 0x03BD, 0x03BE, 0x03BF, + 0x03C0, 0x03C1, 0x03C2, 0x03C3, 0x03C4, 0x03C5, 0x03C6, 0x03C7, + 0x03C8, 0x03C9, 0x03CA, 0x03CB, 0x03CC, 0x03CD, 0x03CE, 0xFFFD} }, + { "ISO-8859-8", { "ISO 8859-8-I", "iso-ir-138", "hebrew", "csISOLatinHebrew", 0 }, 85, + { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, + 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, + 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, + 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, + 0x00A0, 0xFFFD, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, + 0x00A8, 0x00A9, 0x00D7, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x203E, + 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, + 0x00B8, 0x00B9, 0x00F7, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x2017, + 0x05D0, 0x05D1, 0x05D2, 0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7, + 0x05D8, 0x05D9, 0x05DA, 0x05DB, 0x05DC, 0x05DD, 0x05DE, 0x05DF, + 0x05E0, 0x05E1, 0x05E2, 0x05E3, 0x05E4, 0x05E5, 0x05E6, 0x05E7, + 0x05E8, 0x05E9, 0x05EA, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD} }, + { "ISO-8859-9", { "iso-ir-148", "latin5", "csISOLatin5", 0 }, 12, + { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, + 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, + 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, + 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, + 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, + 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF, + 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, + 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF, + 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, + 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF, + 0x011E, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7, + 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x0130, 0x015E, 0x00DF, + 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, + 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF, + 0x011F, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7, + 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x0131, 0x015F, 0x00FF} }, + { "ISO-8859-10", { "iso-ir-157", "latin6", "ISO-8859-10:1992", "csISOLatin6", 0 }, 13, + { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, + 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, + 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, + 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, + 0x00A0, 0x0104, 0x0112, 0x0122, 0x012A, 0x0128, 0x0136, 0x00A7, + 0x013B, 0x0110, 0x0160, 0x0166, 0x017D, 0x00AD, 0x016A, 0x014A, + 0x00B0, 0x0105, 0x0113, 0x0123, 0x012B, 0x0129, 0x0137, 0x00B7, + 0x013C, 0x0111, 0x0161, 0x0167, 0x017E, 0x2015, 0x016B, 0x014B, + 0x0100, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x012E, + 0x010C, 0x00C9, 0x0118, 0x00CB, 0x0116, 0x00CD, 0x00CE, 0x00CF, + 0x00D0, 0x0145, 0x014C, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x0168, + 0x00D8, 0x0172, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF, + 0x0101, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x012F, + 0x010D, 0x00E9, 0x0119, 0x00EB, 0x0117, 0x00ED, 0x00EE, 0x00EF, + 0x00F0, 0x0146, 0x014D, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x0169, + 0x00F8, 0x0173, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x0138} }, + { "ISO-8859-13", { 0 }, 109, + { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, + 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, + 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, + 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, + 0x00A0, 0x201D, 0x00A2, 0x00A3, 0x00A4, 0x201E, 0x00A6, 0x00A7, + 0x00D8, 0x00A9, 0x0156, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00C6, + 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x201C, 0x00B5, 0x00B6, 0x00B7, + 0x00F8, 0x00B9, 0x0157, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00E6, + 0x0104, 0x012E, 0x0100, 0x0106, 0x00C4, 0x00C5, 0x0118, 0x0112, + 0x010C, 0x00C9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012A, 0x013B, + 0x0160, 0x0143, 0x0145, 0x00D3, 0x014C, 0x00D5, 0x00D6, 0x00D7, + 0x0172, 0x0141, 0x015A, 0x016A, 0x00DC, 0x017B, 0x017D, 0x00DF, + 0x0105, 0x012F, 0x0101, 0x0107, 0x00E4, 0x00E5, 0x0119, 0x0113, + 0x010D, 0x00E9, 0x017A, 0x0117, 0x0123, 0x0137, 0x012B, 0x013C, + 0x0161, 0x0144, 0x0146, 0x00F3, 0x014D, 0x00F5, 0x00F6, 0x00F7, + 0x0173, 0x0142, 0x015B, 0x016B, 0x00FC, 0x017C, 0x017E, 0x2019} }, + { "ISO-8859-14", { "iso-ir-199", "latin8", "iso-celtic", 0 }, 110, + { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, + 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, + 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, + 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, + 0x00A0, 0x1E02, 0x1E03, 0x00A3, 0x010A, 0x010B, 0x1E0A, 0x00A7, + 0x1E80, 0x00A9, 0x1E82, 0x1E0B, 0x1EF2, 0x00AD, 0x00AE, 0x0178, + 0x1E1E, 0x1E1F, 0x0120, 0x0121, 0x1E40, 0x1E41, 0x00B6, 0x1E56, + 0x1E81, 0x1E57, 0x1E83, 0x1E60, 0x1EF3, 0x1E84, 0x1E85, 0x1E61, + 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, + 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF, + 0x0174, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x1E6A, + 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x0176, 0x00DF, + 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, + 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF, + 0x0175, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x1E6B, + 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x0177, 0x00FF} }, + { "ISO-8859-16", { "iso-ir-226", "latin10", 0 }, 112, + { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, + 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, + 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, + 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, + 0x00A0, 0x0104, 0x0105, 0x0141, 0x20AC, 0x201E, 0x0160, 0x00A7, + 0x0161, 0x00A9, 0x0218, 0x00AB, 0x0179, 0x00AD, 0x017A, 0x017B, + 0x00B0, 0x00B1, 0x010C, 0x0142, 0x017D, 0x201D, 0x00B6, 0x00B7, + 0x017E, 0x010D, 0x0219, 0x00BB, 0x0152, 0x0153, 0x0178, 0x017C, + 0x00C0, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0106, 0x00C6, 0x00C7, + 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF, + 0x0110, 0x0143, 0x00D2, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x015A, + 0x0170, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x0118, 0x021A, 0x00DF, + 0x00E0, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x0107, 0x00E6, 0x00E7, + 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF, + 0x0111, 0x0144, 0x00F2, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x015B, + 0x0171, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x0119, 0x021B, 0x00FF} }, + + // next bits generated again from tables on the Unicode 3.0 CD. + + // $ for a in CP* ; do (awk '/^0x[89ABCDEF]/{ print $1, $2 }' < $a) | sort | sed -e 's/#UNDEF.*$/0xFFFD/' | cut -c6- | paste '-d ' - - - - - - - - | sed -e 's/ /, /g' -e 's/$/,/' -e '$ s/,$/} },/' -e '1 s/^/{ /' > ~/tmp/$a ; done + + { "IBM850", { "CP850", "csPC850Multilingual", 0 }, 2009, + { 0x00C7, 0x00FC, 0x00E9, 0x00E2, 0x00E4, 0x00E0, 0x00E5, 0x00E7, + 0x00EA, 0x00EB, 0x00E8, 0x00EF, 0x00EE, 0x00EC, 0x00C4, 0x00C5, + 0x00C9, 0x00E6, 0x00C6, 0x00F4, 0x00F6, 0x00F2, 0x00FB, 0x00F9, + 0x00FF, 0x00D6, 0x00DC, 0x00F8, 0x00A3, 0x00D8, 0x00D7, 0x0192, + 0x00E1, 0x00ED, 0x00F3, 0x00FA, 0x00F1, 0x00D1, 0x00AA, 0x00BA, + 0x00BF, 0x00AE, 0x00AC, 0x00BD, 0x00BC, 0x00A1, 0x00AB, 0x00BB, + 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x00C1, 0x00C2, 0x00C0, + 0x00A9, 0x2563, 0x2551, 0x2557, 0x255D, 0x00A2, 0x00A5, 0x2510, + 0x2514, 0x2534, 0x252C, 0x251C, 0x2500, 0x253C, 0x00E3, 0x00C3, + 0x255A, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256C, 0x00A4, + 0x00F0, 0x00D0, 0x00CA, 0x00CB, 0x00C8, 0x0131, 0x00CD, 0x00CE, + 0x00CF, 0x2518, 0x250C, 0x2588, 0x2584, 0x00A6, 0x00CC, 0x2580, + 0x00D3, 0x00DF, 0x00D4, 0x00D2, 0x00F5, 0x00D5, 0x00B5, 0x00FE, + 0x00DE, 0x00DA, 0x00DB, 0x00D9, 0x00FD, 0x00DD, 0x00AF, 0x00B4, + 0x00AD, 0x00B1, 0x2017, 0x00BE, 0x00B6, 0x00A7, 0x00F7, 0x00B8, + 0x00B0, 0x00A8, 0x00B7, 0x00B9, 0x00B3, 0x00B2, 0x25A0, 0x00A0} }, + { "IBM874", { "CP874", 0 }, -874, //### what is the mib? + { 0x20AC, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x2026, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0x00A0, 0x0E01, 0x0E02, 0x0E03, 0x0E04, 0x0E05, 0x0E06, 0x0E07, + 0x0E08, 0x0E09, 0x0E0A, 0x0E0B, 0x0E0C, 0x0E0D, 0x0E0E, 0x0E0F, + 0x0E10, 0x0E11, 0x0E12, 0x0E13, 0x0E14, 0x0E15, 0x0E16, 0x0E17, + 0x0E18, 0x0E19, 0x0E1A, 0x0E1B, 0x0E1C, 0x0E1D, 0x0E1E, 0x0E1F, + 0x0E20, 0x0E21, 0x0E22, 0x0E23, 0x0E24, 0x0E25, 0x0E26, 0x0E27, + 0x0E28, 0x0E29, 0x0E2A, 0x0E2B, 0x0E2C, 0x0E2D, 0x0E2E, 0x0E2F, + 0x0E30, 0x0E31, 0x0E32, 0x0E33, 0x0E34, 0x0E35, 0x0E36, 0x0E37, + 0x0E38, 0x0E39, 0x0E3A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x0E3F, + 0x0E40, 0x0E41, 0x0E42, 0x0E43, 0x0E44, 0x0E45, 0x0E46, 0x0E47, + 0x0E48, 0x0E49, 0x0E4A, 0x0E4B, 0x0E4C, 0x0E4D, 0x0E4E, 0x0E4F, + 0x0E50, 0x0E51, 0x0E52, 0x0E53, 0x0E54, 0x0E55, 0x0E56, 0x0E57, + 0x0E58, 0x0E59, 0x0E5A, 0x0E5B, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD} }, + { "IBM866", { "CP866", "csIBM866", 0 }, 2086, + { 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, + 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F, + 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, + 0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F, + 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, + 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F, + 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561, 0x2562, 0x2556, + 0x2555, 0x2563, 0x2551, 0x2557, 0x255D, 0x255C, 0x255B, 0x2510, + 0x2514, 0x2534, 0x252C, 0x251C, 0x2500, 0x253C, 0x255E, 0x255F, + 0x255A, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256C, 0x2567, + 0x2568, 0x2564, 0x2565, 0x2559, 0x2558, 0x2552, 0x2553, 0x256B, + 0x256A, 0x2518, 0x250C, 0x2588, 0x2584, 0x258C, 0x2590, 0x2580, + 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, + 0x0448, 0x0449, 0x044A, 0x044B, 0x044C, 0x044D, 0x044E, 0x044F, + 0x0401, 0x0451, 0x0404, 0x0454, 0x0407, 0x0457, 0x040E, 0x045E, + 0x00B0, 0x2219, 0x00B7, 0x221A, 0x2116, 0x00A4, 0x25A0, 0x00A0} }, + + { "windows-1250", { "CP1250", 0 }, 2250, + { 0x20AC, 0xFFFD, 0x201A, 0xFFFD, 0x201E, 0x2026, 0x2020, 0x2021, + 0xFFFD, 0x2030, 0x0160, 0x2039, 0x015A, 0x0164, 0x017D, 0x0179, + 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, + 0xFFFD, 0x2122, 0x0161, 0x203A, 0x015B, 0x0165, 0x017E, 0x017A, + 0x00A0, 0x02C7, 0x02D8, 0x0141, 0x00A4, 0x0104, 0x00A6, 0x00A7, + 0x00A8, 0x00A9, 0x015E, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x017B, + 0x00B0, 0x00B1, 0x02DB, 0x0142, 0x00B4, 0x00B5, 0x00B6, 0x00B7, + 0x00B8, 0x0105, 0x015F, 0x00BB, 0x013D, 0x02DD, 0x013E, 0x017C, + 0x0154, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0139, 0x0106, 0x00C7, + 0x010C, 0x00C9, 0x0118, 0x00CB, 0x011A, 0x00CD, 0x00CE, 0x010E, + 0x0110, 0x0143, 0x0147, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x00D7, + 0x0158, 0x016E, 0x00DA, 0x0170, 0x00DC, 0x00DD, 0x0162, 0x00DF, + 0x0155, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x013A, 0x0107, 0x00E7, + 0x010D, 0x00E9, 0x0119, 0x00EB, 0x011B, 0x00ED, 0x00EE, 0x010F, + 0x0111, 0x0144, 0x0148, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x00F7, + 0x0159, 0x016F, 0x00FA, 0x0171, 0x00FC, 0x00FD, 0x0163, 0x02D9} }, + { "windows-1251", { "CP1251", 0 }, 2251, + { 0x0402, 0x0403, 0x201A, 0x0453, 0x201E, 0x2026, 0x2020, 0x2021, + 0x20AC, 0x2030, 0x0409, 0x2039, 0x040A, 0x040C, 0x040B, 0x040F, + 0x0452, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, + 0xFFFD, 0x2122, 0x0459, 0x203A, 0x045A, 0x045C, 0x045B, 0x045F, + 0x00A0, 0x040E, 0x045E, 0x0408, 0x00A4, 0x0490, 0x00A6, 0x00A7, + 0x0401, 0x00A9, 0x0404, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x0407, + 0x00B0, 0x00B1, 0x0406, 0x0456, 0x0491, 0x00B5, 0x00B6, 0x00B7, + 0x0451, 0x2116, 0x0454, 0x00BB, 0x0458, 0x0405, 0x0455, 0x0457, + 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, + 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F, + 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, + 0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F, + 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, + 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F, + 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, + 0x0448, 0x0449, 0x044A, 0x044B, 0x044C, 0x044D, 0x044E, 0x044F} }, + { "windows-1252", { "CP1252", 0 }, 2252, + { 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, + 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0xFFFD, 0x017D, 0xFFFD, + 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, + 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0xFFFD, 0x017E, 0x0178, + 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, + 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF, + 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, + 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF, + 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, + 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF, + 0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7, + 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF, + 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, + 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF, + 0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7, + 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF} }, + { "windows-1253", {"CP1253", 0 }, 2253, + { 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, + 0xFFFD, 0x2030, 0xFFFD, 0x2039, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, + 0xFFFD, 0x2122, 0xFFFD, 0x203A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0x00A0, 0x0385, 0x0386, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, + 0x00A8, 0x00A9, 0xFFFD, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x2015, + 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x0384, 0x00B5, 0x00B6, 0x00B7, + 0x0388, 0x0389, 0x038A, 0x00BB, 0x038C, 0x00BD, 0x038E, 0x038F, + 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, + 0x0398, 0x0399, 0x039A, 0x039B, 0x039C, 0x039D, 0x039E, 0x039F, + 0x03A0, 0x03A1, 0xFFFD, 0x03A3, 0x03A4, 0x03A5, 0x03A6, 0x03A7, + 0x03A8, 0x03A9, 0x03AA, 0x03AB, 0x03AC, 0x03AD, 0x03AE, 0x03AF, + 0x03B0, 0x03B1, 0x03B2, 0x03B3, 0x03B4, 0x03B5, 0x03B6, 0x03B7, + 0x03B8, 0x03B9, 0x03BA, 0x03BB, 0x03BC, 0x03BD, 0x03BE, 0x03BF, + 0x03C0, 0x03C1, 0x03C2, 0x03C3, 0x03C4, 0x03C5, 0x03C6, 0x03C7, + 0x03C8, 0x03C9, 0x03CA, 0x03CB, 0x03CC, 0x03CD, 0x03CE, 0xFFFD} }, + { "windows-1254", { "CP1254", 0 }, 2254, + { 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, + 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, + 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0xFFFD, 0xFFFD, 0x0178, + 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, + 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF, + 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, + 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF, + 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, + 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF, + 0x011E, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7, + 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x0130, 0x015E, 0x00DF, + 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, + 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF, + 0x011F, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7, + 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x0131, 0x015F, 0x00FF} }, + { "windows-1255", { "CP1255", 0 }, 2255, + { 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, + 0x02C6, 0x2030, 0xFFFD, 0x2039, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, + 0x02DC, 0x2122, 0xFFFD, 0x203A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x20AA, 0x00A5, 0x00A6, 0x00A7, + 0x00A8, 0x00A9, 0x00D7, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF, + 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, + 0x00B8, 0x00B9, 0x00F7, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF, + 0x05B0, 0x05B1, 0x05B2, 0x05B3, 0x05B4, 0x05B5, 0x05B6, 0x05B7, + 0x05B8, 0x05B9, 0xFFFD, 0x05BB, 0x05BC, 0x05BD, 0x05BE, 0x05BF, + 0x05C0, 0x05C1, 0x05C2, 0x05C3, 0x05F0, 0x05F1, 0x05F2, 0x05F3, + 0x05F4, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0x05D0, 0x05D1, 0x05D2, 0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7, + 0x05D8, 0x05D9, 0x05DA, 0x05DB, 0x05DC, 0x05DD, 0x05DE, 0x05DF, + 0x05E0, 0x05E1, 0x05E2, 0x05E3, 0x05E4, 0x05E5, 0x05E6, 0x05E7, + 0x05E8, 0x05E9, 0x05EA, 0xFFFD, 0xFFFD, 0x200E, 0x200F, 0xFFFD} }, + { "windows-1256", { "CP1256", 0 }, 2256, + { 0x20AC, 0x067E, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, + 0x02C6, 0x2030, 0x0679, 0x2039, 0x0152, 0x0686, 0x0698, 0x0688, + 0x06AF, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, + 0x06A9, 0x2122, 0x0691, 0x203A, 0x0153, 0x200C, 0x200D, 0x06BA, + 0x00A0, 0x060C, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, + 0x00A8, 0x00A9, 0x06BE, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF, + 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, + 0x00B8, 0x00B9, 0x061B, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x061F, + 0x06C1, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627, + 0x0628, 0x0629, 0x062A, 0x062B, 0x062C, 0x062D, 0x062E, 0x062F, + 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x00D7, + 0x0637, 0x0638, 0x0639, 0x063A, 0x0640, 0x0641, 0x0642, 0x0643, + 0x00E0, 0x0644, 0x00E2, 0x0645, 0x0646, 0x0647, 0x0648, 0x00E7, + 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x0649, 0x064A, 0x00EE, 0x00EF, + 0x064B, 0x064C, 0x064D, 0x064E, 0x00F4, 0x064F, 0x0650, 0x00F7, + 0x0651, 0x00F9, 0x0652, 0x00FB, 0x00FC, 0x200E, 0x200F, 0x06D2} }, + { "windows-1257", { "CP1257", 0 }, 2257, + { 0x20AC, 0xFFFD, 0x201A, 0xFFFD, 0x201E, 0x2026, 0x2020, 0x2021, + 0xFFFD, 0x2030, 0xFFFD, 0x2039, 0xFFFD, 0x00A8, 0x02C7, 0x00B8, + 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, + 0xFFFD, 0x2122, 0xFFFD, 0x203A, 0xFFFD, 0x00AF, 0x02DB, 0xFFFD, + 0x00A0, 0xFFFD, 0x00A2, 0x00A3, 0x00A4, 0xFFFD, 0x00A6, 0x00A7, + 0x00D8, 0x00A9, 0x0156, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00C6, + 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, + 0x00F8, 0x00B9, 0x0157, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00E6, + 0x0104, 0x012E, 0x0100, 0x0106, 0x00C4, 0x00C5, 0x0118, 0x0112, + 0x010C, 0x00C9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012A, 0x013B, + 0x0160, 0x0143, 0x0145, 0x00D3, 0x014C, 0x00D5, 0x00D6, 0x00D7, + 0x0172, 0x0141, 0x015A, 0x016A, 0x00DC, 0x017B, 0x017D, 0x00DF, + 0x0105, 0x012F, 0x0101, 0x0107, 0x00E4, 0x00E5, 0x0119, 0x0113, + 0x010D, 0x00E9, 0x017A, 0x0117, 0x0123, 0x0137, 0x012B, 0x013C, + 0x0161, 0x0144, 0x0146, 0x00F3, 0x014D, 0x00F5, 0x00F6, 0x00F7, + 0x0173, 0x0142, 0x015B, 0x016B, 0x00FC, 0x017C, 0x017E, 0x02D9} }, + { "windows-1258", { "CP1258", 0 }, 2258, + { 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, + 0x02C6, 0x2030, 0xFFFD, 0x2039, 0x0152, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, + 0x02DC, 0x2122, 0xFFFD, 0x203A, 0x0153, 0xFFFD, 0xFFFD, 0x0178, + 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, + 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF, + 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, + 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF, + 0x00C0, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x00C5, 0x00C6, 0x00C7, + 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x0300, 0x00CD, 0x00CE, 0x00CF, + 0x0110, 0x00D1, 0x0309, 0x00D3, 0x00D4, 0x01A0, 0x00D6, 0x00D7, + 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x01AF, 0x0303, 0x00DF, + 0x00E0, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x00E5, 0x00E6, 0x00E7, + 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x0301, 0x00ED, 0x00EE, 0x00EF, + 0x0111, 0x00F1, 0x0323, 0x00F3, 0x00F4, 0x01A1, 0x00F6, 0x00F7, + 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x01B0, 0x20AB, 0x00FF} }, + + { "Apple Roman", { "macintosh", "MacRoman", 0 }, -168, + { 0x00C4, 0x00C5, 0x00C7, 0x00C9, 0x00D1, 0x00D6, 0x00DC, 0x00E1, + 0x00E0, 0x00E2, 0x00E4, 0x00E3, 0x00E5, 0x00E7, 0x00E9, 0x00E8, + 0x00EA, 0x00EB, 0x00ED, 0x00EC, 0x00EE, 0x00EF, 0x00F1, 0x00F3, + 0x00F2, 0x00F4, 0x00F6, 0x00F5, 0x00FA, 0x00F9, 0x00FB, 0x00FC, + 0x2020, 0x00B0, 0x00A2, 0x00A3, 0x00A7, 0x2022, 0x00B6, 0x00DF, + 0x00AE, 0x00A9, 0x2122, 0x00B4, 0x00A8, 0x2260, 0x00C6, 0x00D8, + 0x221E, 0x00B1, 0x2264, 0x2265, 0x00A5, 0x00B5, 0x2202, 0x2211, + 0x220F, 0x03C0, 0x222B, 0x00AA, 0x00BA, 0x03A9, 0x00E6, 0x00F8, + 0x00BF, 0x00A1, 0x00AC, 0x221A, 0x0192, 0x2248, 0x2206, 0x00AB, + 0x00BB, 0x2026, 0x00A0, 0x00C0, 0x00C3, 0x00D5, 0x0152, 0x0153, + 0x2013, 0x2014, 0x201C, 0x201D, 0x2018, 0x2019, 0x00F7, 0x25CA, + 0x00FF, 0x0178, 0x2044, 0x20AC, 0x2039, 0x203A, 0xFB01, 0xFB02, + 0x2021, 0x00B7, 0x201A, 0x201E, 0x2030, 0x00C2, 0x00CA, 0x00C1, + 0x00CB, 0x00C8, 0x00CD, 0x00CE, 0x00CF, 0x00CC, 0x00D3, 0x00D4, + 0xF8FF, 0x00D2, 0x00DA, 0x00DB, 0x00D9, 0x0131, 0x02C6, 0x02DC, + 0x00AF, 0x02D8, 0x02D9, 0x02DA, 0x00B8, 0x02DD, 0x02DB, 0x02C7} }, + + + + // This one is based on the charmap file + // /usr/share/i18n/charmaps/SAMI-WS2.gz, which is manually adapted + // to this format by Boerre Gaup <boerre@subdimension.com> + { "WINSAMI2", { "WS2", 0 }, -165, + { 0x20AC, 0xFFFD, 0x010C, 0x0192, 0x010D, 0x01B7, 0x0292, 0x01EE, + 0x01EF, 0x0110, 0x0160, 0x2039, 0x0152, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, + 0x0111, 0x01E6, 0x0161, 0x203A, 0x0153, 0xFFFD, 0xFFFD, 0x0178, + 0x00A0, 0x01E7, 0x01E4, 0x00A3, 0x00A4, 0x01E5, 0x00A6, 0x00A7, + 0x00A8, 0x00A9, 0x021E, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x021F, + 0x00B0, 0x00B1, 0x01E8, 0x01E9, 0x00B4, 0x00B5, 0x00B6, 0x00B7, + 0x014A, 0x014B, 0x0166, 0x00BB, 0x0167, 0x00BD, 0x017D, 0x017E, + 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, + 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF, + 0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7, + 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF, + 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, + 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF, + 0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7, + 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF} }, + + + // this one is generated from the charmap file located in /usr/share/i18n/charmaps + // on most Linux distributions. The thai character set tis620 is byte by byte equivalent + // to iso8859-11, so we name it 8859-11 here, but recognise the name tis620 too. + + // $ for A in 8 9 A B C D E F ; do for B in 0 1 2 3 4 5 6 7 8 9 A B C D E F ; do echo x${A}${B} 0xFFFD ; done ; done > /tmp/digits ; (cut -c25- < TIS-620 ; cat /tmp/digits) | awk '/^x[89ABCDEF]/{ print $1, $2 }' | sed -e 's/<U/0x/' -e 's/>//' | sort | uniq -w4 | cut -c5- | paste '-d ' - - - - - - - - | sed -e 's/ /, /g' -e 's/$/,/' -e '$ s/,$/} },/' -e '1 s/^/{ /' > ~/tmp/tis-620 + { "TIS-620", { "ISO 8859-11", 0 }, 2259, // Thai character set mib enum taken from tis620 (which is byte by byte equivalent) + { 0x20AC, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x2026, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0x0E01, 0x0E02, 0x0E03, 0x0E04, 0x0E05, 0x0E06, 0x0E07, + 0x0E08, 0x0E09, 0x0E0A, 0x0E0B, 0x0E0C, 0x0E0D, 0x0E0E, 0x0E0F, + 0x0E10, 0x0E11, 0x0E12, 0x0E13, 0x0E14, 0x0E15, 0x0E16, 0x0E17, + 0x0E18, 0x0E19, 0x0E1A, 0x0E1B, 0x0E1C, 0x0E1D, 0x0E1E, 0x0E1F, + 0x0E20, 0x0E21, 0x0E22, 0x0E23, 0x0E24, 0x0E25, 0x0E26, 0x0E27, + 0x0E28, 0x0E29, 0x0E2A, 0x0E2B, 0x0E2C, 0x0E2D, 0x0E2E, 0x0E2F, + 0x0E30, 0x0E31, 0x0E32, 0x0E33, 0x0E34, 0x0E35, 0x0E36, 0x0E37, + 0x0E38, 0x0E39, 0x0E3A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x0E3F, + 0x0E40, 0x0E41, 0x0E42, 0x0E43, 0x0E44, 0x0E45, 0x0E46, 0x0E47, + 0x0E48, 0x0E49, 0x0E4A, 0x0E4B, 0x0E4C, 0x0E4D, 0x0E4E, 0x0E4F, + 0x0E50, 0x0E51, 0x0E52, 0x0E53, 0x0E54, 0x0E55, 0x0E56, 0x0E57, + 0x0E58, 0x0E59, 0x0E5A, 0x0E5B, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD } }, + + /* + Name: hp-roman8 [HP-PCL5,RFC1345,KXS2] + MIBenum: 2004 + Source: LaserJet IIP Printer User's Manual, + HP part no 33471-90901, Hewlet-Packard, June 1989. + Alias: roman8 + Alias: r8 + Alias: csHPRoman8 + */ + { "roman8", { "hp-roman8", "csHPRoman8", 0 }, 2004, + { 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0x00A0, 0x00C0, 0x00C2, 0x00C8, 0x00CA, 0x00CB, 0x00CE, 0x00CF, + 0x00B4, 0x02CB, 0x02C6, 0x00A8, 0x02DC, 0x00D9, 0x00DB, 0x20A4, + 0x00AF, 0x00DD, 0x00FD, 0x00B0, 0x00C7, 0x00E7, 0x00D1, 0x00F1, + 0x00A1, 0x00BF, 0x00A4, 0x00A3, 0x00A5, 0x00A7, 0x0192, 0x00A2, + 0x00E2, 0x00EA, 0x00F4, 0x00FB, 0x00E1, 0x00E9, 0x00F3, 0x00FA, + 0x00E0, 0x00E8, 0x00F2, 0x00F9, 0x00E4, 0x00EB, 0x00F6, 0x00FC, + 0x00C5, 0x00EE, 0x00D8, 0x00C6, 0x00E5, 0x00ED, 0x00F8, 0x00E6, + 0x00C4, 0x00EC, 0x00D6, 0x00DC, 0x00C9, 0x00EF, 0x00DF, 0x00D4, + 0x00C1, 0x00C3, 0x00E3, 0x00D0, 0x00F0, 0x00CD, 0x00CC, 0x00D3, + 0x00D2, 0x00D5, 0x00F5, 0x0160, 0x0161, 0x00DA, 0x0178, 0x00FF, + 0x00DE, 0x00FE, 0x00B7, 0x00B5, 0x00B6, 0x00BE, 0x2014, 0x00BC, + 0x00BD, 0x00AA, 0x00BA, 0x00AB, 0x25A0, 0x00BB, 0x00B1, 0xFFFD } } + + // if you add more chacater sets at the end, change LAST_MIB above +}; + +QSimpleTextCodec::QSimpleTextCodec(int i) : forwardIndex(i), reverseMap(0) +{ +} + + +QSimpleTextCodec::~QSimpleTextCodec() +{ + delete reverseMap; +} + +static QByteArray *buildReverseMap(int forwardIndex) +{ + QByteArray *map = new QByteArray(); + int m = 0; + int i = 0; + while(i < 128) { + if (unicodevalues[forwardIndex].values[i] > m && + unicodevalues[forwardIndex].values[i] < 0xfffd) + m = unicodevalues[forwardIndex].values[i]; + i++; + } + m++; + map->resize(m); + for(i = 0; i < 128 && i < m; i++) + (*map)[i] = (char)i; + for(;i < m; i++) + (*map)[i] = 0; + for(i=128; i<256; i++) { + int u = unicodevalues[forwardIndex].values[i-128]; + if (u < m) + (*map)[u] = (char)(unsigned char)(i); + } + return map; +} + +QString QSimpleTextCodec::convertToUnicode(const char* chars, int len, ConverterState *) const +{ + if (len <= 0 || chars == 0) + return QString(); + + const unsigned char * c = (const unsigned char *)chars; + + QString r; + r.resize(len); + QChar* uc = r.data(); + + for (int i = 0; i < len; i++) { + if (c[i] > 127) + uc[i] = unicodevalues[forwardIndex].values[c[i]-128]; + else + uc[i] = QLatin1Char(c[i]); + } + return r; +} + +QByteArray QSimpleTextCodec::convertFromUnicode(const QChar *in, int length, ConverterState *state) const +{ + const char replacement = (state && state->flags & ConvertInvalidToNull) ? 0 : '?'; + int invalid = 0; + + if (!reverseMap){ + QByteArray *tmp = buildReverseMap(this->forwardIndex); + if (!reverseMap.testAndSetOrdered(0, tmp)) + delete tmp; + } + + QByteArray r; + r.resize(length); + int i = length; + int u; + const QChar* ucp = in; + unsigned char* rp = (unsigned char *)r.data(); + const unsigned char* rmp = (const unsigned char *)reverseMap->data(); + int rmsize = (int) reverseMap->size(); + while(i--) + { + u = ucp->unicode(); + if (u < 128) { + *rp = (char)u; + } else { + *rp = ((u < rmsize) ? (*(rmp+u)) : 0); + if (*rp == 0) { + *rp = replacement; + ++invalid; + } + } + rp++; + ucp++; + } + + if (state) { + state->invalidChars += invalid; + } + return r; +} + +QByteArray QSimpleTextCodec::name() const +{ + return unicodevalues[forwardIndex].mime; +} + +QList<QByteArray> QSimpleTextCodec::aliases() const +{ + QList<QByteArray> list; + const char * const*a = unicodevalues[forwardIndex].aliases; + while (*a) { + list << *a; + ++a; + } + return list; +} + +int QSimpleTextCodec::mibEnum() const +{ + return unicodevalues[forwardIndex].mib; +} + +QT_END_NAMESPACE + +#endif // QT_NO_TEXTCODEC diff --git a/src/corelib/codecs/qsimplecodec_p.h b/src/corelib/codecs/qsimplecodec_p.h new file mode 100644 index 0000000..0fa874a --- /dev/null +++ b/src/corelib/codecs/qsimplecodec_p.h @@ -0,0 +1,87 @@ +/**************************************************************************** +** +** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies). +** Contact: Qt Software Information (qt-info@nokia.com) +** +** This file is part of the QtCore module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL$ +** No Commercial Usage +** This file contains pre-release code and may not be distributed. +** You may use this file in accordance with the terms and conditions +** contained in the either Technology Preview License Agreement or the +** Beta Release License Agreement. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 2.1 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPL included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 2.1 requirements +** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. +** +** In addition, as a special exception, Nokia gives you certain +** additional rights. These rights are described in the Nokia Qt LGPL +** Exception version 1.0, included in the file LGPL_EXCEPTION.txt in this +** package. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU +** General Public License version 3.0 as published by the Free Software +** Foundation and appearing in the file LICENSE.GPL included in the +** packaging of this file. Please review the following information to +** ensure the GNU General Public License version 3.0 requirements will be +** met: http://www.gnu.org/copyleft/gpl.html. +** +** If you are unsure which license is appropriate for your use, please +** contact the sales department at qt-sales@nokia.com. +** $QT_END_LICENSE$ +** +****************************************************************************/ + +#ifndef QSIMPLECODEC_P_H +#define QSIMPLECODEC_P_H + +// +// W A R N I N G +// ------------- +// +// This file is not part of the Qt API. It exists purely as an +// implementation detail. This header file may change from version to +// version without notice, or even be removed. +// +// We mean it. +// + +#include "QtCore/qtextcodec.h" + +QT_BEGIN_NAMESPACE + +#ifndef QT_NO_TEXTCODEC + +template <typename T> class QAtomicPointer; + +class QSimpleTextCodec: public QTextCodec +{ +public: + enum { numSimpleCodecs = 30 }; + explicit QSimpleTextCodec(int); + ~QSimpleTextCodec(); + + QString convertToUnicode(const char *, int, ConverterState *) const; + QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const; + + QByteArray name() const; + QList<QByteArray> aliases() const; + int mibEnum() const; + +private: + int forwardIndex; + mutable QAtomicPointer<QByteArray> reverseMap; +}; + +#endif // QT_NO_TEXTCODEC + +QT_END_NAMESPACE + +#endif // QSIMPLECODEC_P_H diff --git a/src/corelib/codecs/qtextcodec.cpp b/src/corelib/codecs/qtextcodec.cpp new file mode 100644 index 0000000..6e8ffa1 --- /dev/null +++ b/src/corelib/codecs/qtextcodec.cpp @@ -0,0 +1,1598 @@ +/**************************************************************************** +** +** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies). +** Contact: Qt Software Information (qt-info@nokia.com) +** +** This file is part of the QtCore module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL$ +** No Commercial Usage +** This file contains pre-release code and may not be distributed. +** You may use this file in accordance with the terms and conditions +** contained in the either Technology Preview License Agreement or the +** Beta Release License Agreement. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 2.1 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPL included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 2.1 requirements +** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. +** +** In addition, as a special exception, Nokia gives you certain +** additional rights. These rights are described in the Nokia Qt LGPL +** Exception version 1.0, included in the file LGPL_EXCEPTION.txt in this +** package. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU +** General Public License version 3.0 as published by the Free Software +** Foundation and appearing in the file LICENSE.GPL included in the +** packaging of this file. Please review the following information to +** ensure the GNU General Public License version 3.0 requirements will be +** met: http://www.gnu.org/copyleft/gpl.html. +** +** If you are unsure which license is appropriate for your use, please +** contact the sales department at qt-sales@nokia.com. +** $QT_END_LICENSE$ +** +****************************************************************************/ + +#include "qplatformdefs.h" +#include "qtextcodec.h" +#include "qtextcodec_p.h" + +#ifndef QT_NO_TEXTCODEC + +#include "qlist.h" +#include "qfile.h" +#ifndef QT_NO_LIBRARY +# include "qcoreapplication.h" +# include "qtextcodecplugin.h" +# include "private/qfactoryloader_p.h" +#endif +#include "qstringlist.h" + +#ifdef Q_OS_UNIX +# include "qiconvcodec_p.h" +#endif + +#include "qutfcodec_p.h" +#include "qsimplecodec_p.h" +#include "qlatincodec_p.h" +#ifndef QT_NO_CODECS +# include "qtsciicodec_p.h" +# include "qisciicodec_p.h" +# if defined(QT_NO_ICONV) && !defined(QT_BOOTSTRAPPED) +// no iconv(3) support, must build all codecs into the library +# include "../../plugins/codecs/cn/qgb18030codec.h" +# include "../../plugins/codecs/jp/qeucjpcodec.h" +# include "../../plugins/codecs/jp/qjiscodec.h" +# include "../../plugins/codecs/jp/qsjiscodec.h" +# include "../../plugins/codecs/kr/qeuckrcodec.h" +# include "../../plugins/codecs/tw/qbig5codec.h" +# endif // QT_NO_ICONV +# if defined(Q_WS_X11) && !defined(QT_BOOTSTRAPPED) +# include "qfontlaocodec_p.h" +# include "../../plugins/codecs/jp/qfontjpcodec.h" +# endif +#endif // QT_NO_CODECS +#include "qlocale.h" +#include "private/qmutexpool_p.h" + +#include <stdlib.h> +#include <ctype.h> +#include <locale.h> +#if defined (_XOPEN_UNIX) && !defined(Q_OS_QNX6) && !defined(Q_OS_OSF) +#include <langinfo.h> +#endif + +#if defined(Q_OS_WINCE) +# define QT_NO_SETLOCALE +#endif + +QT_BEGIN_NAMESPACE + +#ifndef QT_NO_TEXTCODECPLUGIN +Q_GLOBAL_STATIC_WITH_ARGS(QFactoryLoader, loader, + (QTextCodecFactoryInterface_iid, QLatin1String("/codecs"))) +#endif + + +static bool nameMatch(const QByteArray &name, const QByteArray &test) +{ + // if they're the same, return a perfect score + if (qstricmp(name, test) == 0) + return true; + + const char *n = name.constData(); + const char *h = test.constData(); + + // if the letters and numbers are the same, we have a match + while (*n != '\0') { + if (isalnum((uchar)*n)) { + for (;;) { + if (*h == '\0') + return false; + if (isalnum((uchar)*h)) + break; + ++h; + } + if (tolower((uchar)*n) != tolower((uchar)*h)) + return false; + ++h; + } + ++n; + } + while (*h && !isalnum((uchar)*h)) + ++h; + return (*h == '\0'); +} + + +static QTextCodec *createForName(const QByteArray &name) +{ +#ifndef QT_NO_TEXTCODECPLUGIN + QFactoryLoader *l = loader(); + QStringList keys = l->keys(); + for (int i = 0; i < keys.size(); ++i) { + if (nameMatch(name, keys.at(i).toLatin1())) { + QString realName = keys.at(i); + if (QTextCodecFactoryInterface *factory + = qobject_cast<QTextCodecFactoryInterface*>(l->instance(realName))) { + return factory->create(realName); + } + } + } +#else + Q_UNUSED(name); +#endif + return 0; +} + +static QTextCodec *createForMib(int mib) +{ +#ifndef QT_NO_TEXTCODECPLUGIN + QString name = QLatin1String("MIB: ") + QString::number(mib); + if (QTextCodecFactoryInterface *factory + = qobject_cast<QTextCodecFactoryInterface*>(loader()->instance(name))) + return factory->create(name); +#else + Q_UNUSED(mib); +#endif + return 0; +} + +static QList<QTextCodec*> *all = 0; +static bool destroying_is_ok = false; + +static QTextCodec *localeMapper = 0; +QTextCodec *QTextCodec::cftr = 0; + + +class QTextCodecCleanup +{ +public: + ~QTextCodecCleanup(); +}; + +/* + Deletes all the created codecs. This destructor is called just + before exiting to delete any QTextCodec objects that may be lying + around. +*/ +QTextCodecCleanup::~QTextCodecCleanup() +{ + if (!all) + return; + + destroying_is_ok = true; + + while (all->size()) + delete all->takeFirst(); + delete all; + all = 0; + localeMapper = 0; + + destroying_is_ok = false; +} + +Q_GLOBAL_STATIC(QTextCodecCleanup, createQTextCodecCleanup) + +#if defined(Q_OS_WIN32) || defined(Q_OS_WINCE) +class QWindowsLocalCodec: public QTextCodec +{ +public: + QWindowsLocalCodec(); + ~QWindowsLocalCodec(); + + QString convertToUnicode(const char *, int, ConverterState *) const; + QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const; + QString convertToUnicodeCharByChar(const char *chars, int length, ConverterState *state) const; + + QByteArray name() const; + int mibEnum() const; + +}; + +QWindowsLocalCodec::QWindowsLocalCodec() +{ +} + +QWindowsLocalCodec::~QWindowsLocalCodec() +{ +} + +QString QWindowsLocalCodec::convertToUnicode(const char *chars, int length, ConverterState *state) const +{ + const char *mb = chars; + int mblen = length; + + if (!mb || !mblen) + return QString(); + + const int wclen_auto = 4096; + WCHAR wc_auto[wclen_auto]; + int wclen = wclen_auto; + WCHAR *wc = wc_auto; + int len; + QString sp; + bool prepend = false; + char state_data = 0; + int remainingChars = 0; + + //save the current state information + if (state) { + state_data = (char)state->state_data[0]; + remainingChars = state->remainingChars; + } + + //convert the pending charcter (if available) + if (state && remainingChars) { + char prev[3] = {0}; + prev[0] = state_data; + prev[1] = mb[0]; + remainingChars = 0; + len = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, + prev, 2, wc, wclen); + if (len) { + prepend = true; + sp.append(QChar(wc[0])); + mb++; + mblen--; + wc[0] = 0; + } + } + + while (!(len=MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED|MB_ERR_INVALID_CHARS, + mb, mblen, wc, wclen))) { + int r = GetLastError(); + if (r == ERROR_INSUFFICIENT_BUFFER) { + if (wc != wc_auto) { + qWarning("MultiByteToWideChar: Size changed"); + break; + } else { + wclen = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, + mb, mblen, 0, 0); + wc = new WCHAR[wclen]; + // and try again... + } + } else if (r == ERROR_NO_UNICODE_TRANSLATION) { + //find the last non NULL character + while (mblen > 1 && !(mb[mblen-1])) + mblen--; + //check whether, we hit an invalid character in the middle + if ((mblen <= 1) || (remainingChars && state_data)) + return convertToUnicodeCharByChar(chars, length, state); + //Remove the last character and try again... + state_data = mb[mblen-1]; + remainingChars = 1; + mblen--; + } else { + // Fail. + qWarning("MultiByteToWideChar: Cannot convert multibyte text"); + break; + } + } + if (len <= 0) + return QString(); + if (wc[len-1] == 0) // len - 1: we don't want terminator + --len; + + //save the new state information + if (state) { + state->state_data[0] = (char)state_data; + state->remainingChars = remainingChars; + } + QString s((QChar*)wc, len); + if (wc != wc_auto) + delete [] wc; + if (prepend) { + return sp+s; + } + return s; +} + +QString QWindowsLocalCodec::convertToUnicodeCharByChar(const char *chars, int length, ConverterState *state) const +{ + if (!chars || !length) + return QString(); + + int copyLocation = 0; + int extra = 2; + if (state && state->remainingChars) { + copyLocation = state->remainingChars; + extra += copyLocation; + } + int newLength = length + extra; + char *mbcs = new char[newLength]; + //ensure that we have a NULL terminated string + mbcs[newLength-1] = 0; + mbcs[newLength-2] = 0; + memcpy(&(mbcs[copyLocation]), chars, length); + if (copyLocation) { + //copy the last character from the state + mbcs[0] = (char)state->state_data[0]; + state->remainingChars = 0; + } + const char *mb = mbcs; +#ifndef Q_OS_WINCE + const char *next = 0; + QString s; + while((next = CharNextExA(CP_ACP, mb, 0)) != mb) { + WCHAR wc[2] ={0}; + int charlength = next - mb; + int len = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED|MB_ERR_INVALID_CHARS, mb, charlength, wc, 2); + if (len>0) { + s.append(QChar(wc[0])); + } else { + int r = GetLastError(); + //check if the character being dropped is the last character + if (r == ERROR_NO_UNICODE_TRANSLATION && mb == (mbcs+newLength -3) && state) { + state->remainingChars = 1; + state->state_data[0] = (char)*mb; + } + } + mb = next; + } +#else + QString s; + int size = mbstowcs(NULL, mb, length); + if (size < 0) { + Q_ASSERT("Error in CE TextCodec"); + return QString(); + } + wchar_t* ws = new wchar_t[size + 2]; + ws[size +1] = 0; + ws[size] = 0; + size = mbstowcs(ws, mb, length); + for (int i=0; i< size; i++) + s.append(QChar(ws[i])); + delete [] ws; +#endif + delete mbcs; + return s; +} + +QByteArray QWindowsLocalCodec::convertFromUnicode(const QChar *uc, int len, ConverterState *) const +{ + return qt_winQString2MB(uc, len); +} + + +QByteArray QWindowsLocalCodec::name() const +{ + return "System"; +} + +int QWindowsLocalCodec::mibEnum() const +{ + return 0; +} + +#else + +/* locale names mostly copied from XFree86 */ +static const char * const iso8859_2locales[] = { + "croatian", "cs", "cs_CS", "cs_CZ","cz", "cz_CZ", "czech", "hr", + "hr_HR", "hu", "hu_HU", "hungarian", "pl", "pl_PL", "polish", "ro", + "ro_RO", "rumanian", "serbocroatian", "sh", "sh_SP", "sh_YU", "sk", + "sk_SK", "sl", "sl_CS", "sl_SI", "slovak", "slovene", "sr_SP", 0 }; + +static const char * const iso8859_3locales[] = { + "eo", 0 }; + +static const char * const iso8859_4locales[] = { + "ee", "ee_EE", 0 }; + +static const char * const iso8859_5locales[] = { + "mk", "mk_MK", "sp", "sp_YU", 0 }; + +static const char * const cp_1251locales[] = { + "be", "be_BY", "bg", "bg_BG", "bulgarian", 0 }; + +static const char * const pt_154locales[] = { + "ba_RU", "ky", "ky_KG", "kk", "kk_KZ", 0 }; + +static const char * const iso8859_6locales[] = { + "ar_AA", "ar_SA", "arabic", 0 }; + +static const char * const iso8859_7locales[] = { + "el", "el_GR", "greek", 0 }; + +static const char * const iso8859_8locales[] = { + "hebrew", "he", "he_IL", "iw", "iw_IL", 0 }; + +static const char * const iso8859_9locales[] = { + "tr", "tr_TR", "turkish", 0 }; + +static const char * const iso8859_13locales[] = { + "lt", "lt_LT", "lv", "lv_LV", 0 }; + +static const char * const iso8859_15locales[] = { + "et", "et_EE", + // Euro countries + "br_FR", "ca_ES", "de", "de_AT", "de_BE", "de_DE", "de_LU", "en_IE", + "es", "es_ES", "eu_ES", "fi", "fi_FI", "finnish", "fr", "fr_FR", + "fr_BE", "fr_LU", "french", "ga_IE", "gl_ES", "it", "it_IT", "oc_FR", + "nl", "nl_BE", "nl_NL", "pt", "pt_PT", "sv_FI", "wa_BE", + 0 }; + +static const char * const koi8_ulocales[] = { + "uk", "uk_UA", "ru_UA", "ukrainian", 0 }; + +static const char * const tis_620locales[] = { + "th", "th_TH", "thai", 0 }; + +// static const char * const tcvnlocales[] = { +// "vi", "vi_VN", 0 }; + +static bool try_locale_list(const char * const locale[], const char * lang) +{ + int i; + for(i=0; locale[i] && *locale[i] && strcmp(locale[i], lang); i++) + ; + return locale[i] != 0; +} + +// For the probably_koi8_locales we have to look. the standard says +// these are 8859-5, but almost all Russian users use KOI8-R and +// incorrectly set $LANG to ru_RU. We'll check tolower() to see what +// it thinks ru_RU means. + +// If you read the history, it seems that many Russians blame ISO and +// Perestroika for the confusion. +// +// The real bug is that some programs break if the user specifies +// ru_RU.KOI8-R. + +static const char * const probably_koi8_rlocales[] = { + "ru", "ru_SU", "ru_RU", "russian", 0 }; + +static QTextCodec * ru_RU_hack(const char * i) { + QTextCodec * ru_RU_codec = 0; + +#if !defined(QT_NO_SETLOCALE) + QByteArray origlocale(setlocale(LC_CTYPE, i)); +#else + QByteArray origlocale(i); +#endif + // unicode koi8r latin5 name + // 0x044E 0xC0 0xEE CYRILLIC SMALL LETTER YU + // 0x042E 0xE0 0xCE CYRILLIC CAPITAL LETTER YU + int latin5 = tolower(0xCE); + int koi8r = tolower(0xE0); + if (koi8r == 0xC0 && latin5 != 0xEE) { + ru_RU_codec = QTextCodec::codecForName("KOI8-R"); + } else if (koi8r != 0xC0 && latin5 == 0xEE) { + ru_RU_codec = QTextCodec::codecForName("ISO 8859-5"); + } else { + // something else again... let's assume... *throws dice* + ru_RU_codec = QTextCodec::codecForName("KOI8-R"); + qWarning("QTextCodec: Using KOI8-R, probe failed (%02x %02x %s)", + koi8r, latin5, i); + } +#if !defined(QT_NO_SETLOCALE) + setlocale(LC_CTYPE, origlocale); +#endif + + return ru_RU_codec; +} + +#endif + +#if !defined(Q_OS_WIN32) && !defined(Q_OS_WINCE) +static QTextCodec *checkForCodec(const char *name) { + QTextCodec *c = QTextCodec::codecForName(name); + if (!c) { + const char *at = strchr(name, '@'); + if (at) { + QByteArray n(name, at - name); + c = QTextCodec::codecForName(n.data()); + } + } + return c; +} +#endif + +/* the next two functions are implicitely thread safe, + as they are only called by setup() which uses a mutex. +*/ +static void setupLocaleMapper() +{ +#if defined(Q_OS_WIN32) || defined(Q_OS_WINCE) + localeMapper = QTextCodec::codecForName("System"); +#else + +#ifndef QT_NO_ICONV + localeMapper = QTextCodec::codecForName("System"); +#endif + +#if defined (_XOPEN_UNIX) && !defined(Q_OS_QNX6) && !defined(Q_OS_OSF) + if (!localeMapper) { + char *charset = nl_langinfo (CODESET); + if (charset) + localeMapper = QTextCodec::codecForName(charset); + } +#endif + + if (!localeMapper) { + // Very poorly defined and followed standards causes lots of + // code to try to get all the cases... This logic is + // duplicated in QIconvCodec, so if you change it here, change + // it there too. + + // Try to determine locale codeset from locale name assigned to + // LC_CTYPE category. + + // First part is getting that locale name. First try setlocale() which + // definitely knows it, but since we cannot fully trust it, get ready + // to fall back to environment variables. +#if !defined(QT_NO_SETLOCALE) + char * ctype = qstrdup(setlocale(LC_CTYPE, 0)); +#else + char * ctype = qstrdup(""); +#endif + + // Get the first nonempty value from $LC_ALL, $LC_CTYPE, and $LANG + // environment variables. + char * lang = qstrdup(qgetenv("LC_ALL").constData()); + if (!lang || lang[0] == 0 || strcmp(lang, "C") == 0) { + if (lang) delete [] lang; + lang = qstrdup(qgetenv("LC_CTYPE").constData()); + } + if (!lang || lang[0] == 0 || strcmp(lang, "C") == 0) { + if (lang) delete [] lang; + lang = qstrdup(qgetenv("LANG").constData()); + } + + // Now try these in order: + // 1. CODESET from ctype if it contains a .CODESET part (e.g. en_US.ISO8859-15) + // 2. CODESET from lang if it contains a .CODESET part + // 3. ctype (maybe the locale is named "ISO-8859-1" or something) + // 4. locale (ditto) + // 5. check for "@euro" + // 6. guess locale from ctype unless ctype is "C" + // 7. guess locale from lang + + // 1. CODESET from ctype if it contains a .CODESET part (e.g. en_US.ISO8859-15) + char * codeset = ctype ? strchr(ctype, '.') : 0; + if (codeset && *codeset == '.') + localeMapper = checkForCodec(codeset + 1); + + // 2. CODESET from lang if it contains a .CODESET part + codeset = lang ? strchr(lang, '.') : 0; + if (!localeMapper && codeset && *codeset == '.') + localeMapper = checkForCodec(codeset + 1); + + // 3. ctype (maybe the locale is named "ISO-8859-1" or something) + if (!localeMapper && ctype && *ctype != 0 && strcmp (ctype, "C") != 0) + localeMapper = checkForCodec(ctype); + + // 4. locale (ditto) + if (!localeMapper && lang && *lang != 0) + localeMapper = checkForCodec(lang); + + // 5. "@euro" + if ((!localeMapper && ctype && strstr(ctype, "@euro")) || (lang && strstr(lang, "@euro"))) + localeMapper = checkForCodec("ISO 8859-15"); + + // 6. guess locale from ctype unless ctype is "C" + // 7. guess locale from lang + char * try_by_name = ctype; + if (ctype && *ctype != 0 && strcmp (ctype, "C") != 0) + try_by_name = lang; + + // Now do the guessing. + if (lang && *lang && !localeMapper && try_by_name && *try_by_name) { + if (try_locale_list(iso8859_15locales, lang)) + localeMapper = QTextCodec::codecForName("ISO 8859-15"); + else if (try_locale_list(iso8859_2locales, lang)) + localeMapper = QTextCodec::codecForName("ISO 8859-2"); + else if (try_locale_list(iso8859_3locales, lang)) + localeMapper = QTextCodec::codecForName("ISO 8859-3"); + else if (try_locale_list(iso8859_4locales, lang)) + localeMapper = QTextCodec::codecForName("ISO 8859-4"); + else if (try_locale_list(iso8859_5locales, lang)) + localeMapper = QTextCodec::codecForName("ISO 8859-5"); + else if (try_locale_list(iso8859_6locales, lang)) + localeMapper = QTextCodec::codecForName("ISO 8859-6"); + else if (try_locale_list(iso8859_7locales, lang)) + localeMapper = QTextCodec::codecForName("ISO 8859-7"); + else if (try_locale_list(iso8859_8locales, lang)) + localeMapper = QTextCodec::codecForName("ISO 8859-8-I"); + else if (try_locale_list(iso8859_9locales, lang)) + localeMapper = QTextCodec::codecForName("ISO 8859-9"); + else if (try_locale_list(iso8859_13locales, lang)) + localeMapper = QTextCodec::codecForName("ISO 8859-13"); + else if (try_locale_list(tis_620locales, lang)) + localeMapper = QTextCodec::codecForName("ISO 8859-11"); + else if (try_locale_list(koi8_ulocales, lang)) + localeMapper = QTextCodec::codecForName("KOI8-U"); + else if (try_locale_list(cp_1251locales, lang)) + localeMapper = QTextCodec::codecForName("CP 1251"); + else if (try_locale_list(pt_154locales, lang)) + localeMapper = QTextCodec::codecForName("PT 154"); + else if (try_locale_list(probably_koi8_rlocales, lang)) + localeMapper = ru_RU_hack(lang); + } + + delete [] ctype; + delete [] lang; + } + + // If everything failed, we default to 8859-1 + // We could perhaps default to 8859-15. + if (!localeMapper) + localeMapper = QTextCodec::codecForName("ISO 8859-1"); +#endif +} + + +static void setup() +{ +#ifndef QT_NO_THREAD + QMutexLocker locker(QMutexPool::globalInstanceGet(&all)); +#endif + + if (all) + return; + + if (destroying_is_ok) + qWarning("QTextCodec: Creating new codec during codec cleanup"); + all = new QList<QTextCodec*>; + // create the cleanup object to cleanup all codecs on exit + (void) createQTextCodecCleanup(); + +#ifndef QT_NO_CODECS +# if defined(Q_WS_X11) && !defined(QT_BOOTSTRAPPED) + // no font codecs when bootstrapping + (void)new QFontLaoCodec; +# if defined(QT_NO_ICONV) + // no iconv(3) support, must build all codecs into the library + (void)new QFontGb2312Codec; + (void)new QFontGbkCodec; + (void)new QFontGb18030_0Codec; + (void)new QFontJis0208Codec; + (void)new QFontJis0201Codec; + (void)new QFontKsc5601Codec; + (void)new QFontBig5hkscsCodec; + (void)new QFontBig5Codec; +# endif // QT_NO_ICONV && !QT_BOOTSTRAPPED +# endif // Q_WS_X11 + + (void)new QTsciiCodec; + + for (int i = 0; i < 9; ++i) + (void)new QIsciiCodec(i); + + +# if defined(QT_NO_ICONV) && !defined(QT_BOOTSTRAPPED) + // no asian codecs when bootstrapping, sorry + (void)new QGb18030Codec; + (void)new QGbkCodec; + (void)new QGb2312Codec; + (void)new QEucJpCodec; + (void)new QJisCodec; + (void)new QSjisCodec; + (void)new QEucKrCodec; + (void)new QBig5Codec; + (void)new QBig5hkscsCodec; +# endif // QT_NO_ICONV && !QT_BOOTSTRAPPED +#endif // QT_NO_CODECS + +#if defined(Q_OS_WIN32) || defined(Q_OS_WINCE) + (void) new QWindowsLocalCodec; +#endif // Q_OS_WIN32 + + (void)new QUtf16Codec; + (void)new QUtf16BECodec; + (void)new QUtf16LECodec; + (void)new QUtf32Codec; + (void)new QUtf32BECodec; + (void)new QUtf32LECodec; + (void)new QLatin15Codec; + (void)new QLatin1Codec; + (void)new QUtf8Codec; + + for (int i = 0; i < QSimpleTextCodec::numSimpleCodecs; ++i) + (void)new QSimpleTextCodec(i); + +#if defined(Q_OS_UNIX) && !defined(QT_NO_ICONV) && !defined(QT_BOOTSTRAPPED) + // QIconvCodec depends on the UTF-16 codec, so it needs to be created last + (void) new QIconvCodec(); +#endif + + if (!localeMapper) + setupLocaleMapper(); +} + +QTextCodec::ConverterState::~ConverterState() +{ + if (flags & FreeFunction) + (QTextCodecUnalignedPointer::decode(state_data))(this); + else if (d) + qFree(d); +} + +/*! + \class QTextCodec + \brief The QTextCodec class provides conversions between text encodings. + \reentrant + \ingroup i18n + + Qt uses Unicode to store, draw and manipulate strings. In many + situations you may wish to deal with data that uses a different + encoding. For example, most Japanese documents are still stored + in Shift-JIS or ISO 2022-JP, while Russian users often have their + documents in KOI8-R or Windows-1251. + + Qt provides a set of QTextCodec classes to help with converting + non-Unicode formats to and from Unicode. You can also create your + own codec classes. + + The supported encodings are: + + \list + \o Apple Roman + \o \l{Big5 Text Codec}{Big5} + \o \l{Big5-HKSCS Text Codec}{Big5-HKSCS} + \o CP949 + \o \l{EUC-JP Text Codec}{EUC-JP} + \o \l{EUC-KR Text Codec}{EUC-KR} + \o \l{GBK Text Codec}{GB18030-0} + \o IBM 850 + \o IBM 866 + \o IBM 874 + \o \l{ISO 2022-JP (JIS) Text Codec}{ISO 2022-JP} + \o ISO 8859-1 to 10 + \o ISO 8859-13 to 16 + \o Iscii-Bng, Dev, Gjr, Knd, Mlm, Ori, Pnj, Tlg, and Tml + \o JIS X 0201 + \o JIS X 0208 + \o KOI8-R + \o KOI8-U + \o MuleLao-1 + \o ROMAN8 + \o \l{Shift-JIS Text Codec}{Shift-JIS} + \o TIS-620 + \o \l{TSCII Text Codec}{TSCII} + \o UTF-8 + \o UTF-16 + \o UTF-16BE + \o UTF-16LE + \o UTF-32 + \o UTF-32BE + \o UTF-32LE + \o Windows-1250 to 1258 + \o WINSAMI2 + \endlist + + QTextCodecs can be used as follows to convert some locally encoded + string to Unicode. Suppose you have some string encoded in Russian + KOI8-R encoding, and want to convert it to Unicode. The simple way + to do it is like this: + + \snippet doc/src/snippets/code/src_corelib_codecs_qtextcodec.cpp 0 + + After this, \c string holds the text converted to Unicode. + Converting a string from Unicode to the local encoding is just as + easy: + + \snippet doc/src/snippets/code/src_corelib_codecs_qtextcodec.cpp 1 + + To read or write files in various encodings, use QTextStream and + its \l{QTextStream::setCodec()}{setCodec()} function. See the + \l{tools/codecs}{Codecs} example for an application of QTextCodec + to file I/O. + + Some care must be taken when trying to convert the data in chunks, + for example, when receiving it over a network. In such cases it is + possible that a multi-byte character will be split over two + chunks. At best this might result in the loss of a character and + at worst cause the entire conversion to fail. + + The approach to use in these situations is to create a QTextDecoder + object for the codec and use this QTextDecoder for the whole + decoding process, as shown below: + + \snippet doc/src/snippets/code/src_corelib_codecs_qtextcodec.cpp 2 + + The QTextDecoder object maintains state between chunks and therefore + works correctly even if a multi-byte character is split between + chunks. + + \section1 Creating Your Own Codec Class + + Support for new text encodings can be added to Qt by creating + QTextCodec subclasses. + + The pure virtual functions describe the encoder to the system and + the coder is used as required in the different text file formats + supported by QTextStream, and under X11, for the locale-specific + character input and output. + + To add support for another encoding to Qt, make a subclass of + QTextCodec and implement the functions listed in the table below. + + \table + \header \o Function \o Description + + \row \o name() + \o Returns the official name for the encoding. If the + encoding is listed in the + \l{IANA character-sets encoding file}, the name + should be the preferred MIME name for the encoding. + + \row \o aliases() + \o Returns a list of alternative names for the encoding. + QTextCodec provides a default implementation that returns + an empty list. For example, "ISO-8859-1" has "latin1", + "CP819", "IBM819", and "iso-ir-100" as aliases. + + \row \o mibEnum() + \o Return the MIB enum for the encoding if it is listed in + the \l{IANA character-sets encoding file}. + + \row \o convertToUnicode() + \o Converts an 8-bit character string to Unicode. + + \row \o convertFromUnicode() + \o Converts a Unicode string to an 8-bit character string. + \endtable + + You may find it more convenient to make your codec class + available as a plugin; see \l{How to Create Qt Plugins} for + details. + + \sa QTextStream, QTextDecoder, QTextEncoder, {Codecs Example} +*/ + +/*! + \enum QTextCodec::ConversionFlag + + \value DefaultConversion No flag is set. + \value ConvertInvalidToNull If this flag is set, each invalid input + character is output as a null character. + \value IgnoreHeader Ignore any Unicode byte-order mark and don't generate any. + + \omitvalue FreeFunction +*/ + +/*! + \fn QTextCodec::ConverterState::ConverterState(ConversionFlags flags) + + Constructs a ConverterState object initialized with the given \a flags. +*/ + +/*! + \fn QTextCodec::ConverterState::~ConverterState() + + Destroys the ConverterState object. +*/ + +/*! + \nonreentrant + + Constructs a QTextCodec, and gives it the highest precedence. The + QTextCodec should always be constructed on the heap (i.e. with \c + new). Qt takes ownership and will delete it when the application + terminates. +*/ +QTextCodec::QTextCodec() +{ + setup(); + all->prepend(this); +} + + +/*! + \nonreentrant + + Destroys the QTextCodec. Note that you should not delete codecs + yourself: once created they become Qt's responsibility. +*/ +QTextCodec::~QTextCodec() +{ + if (!destroying_is_ok) + qWarning("QTextCodec::~QTextCodec: Called by application"); + if (all) + all->removeAll(this); +} + +/*! + \fn QTextCodec *QTextCodec::codecForName(const char *name) + + Searches all installed QTextCodec objects and returns the one + which best matches \a name; the match is case-insensitive. Returns + 0 if no codec matching the name \a name could be found. +*/ + +/*! + Searches all installed QTextCodec objects and returns the one + which best matches \a name; the match is case-insensitive. Returns + 0 if no codec matching the name \a name could be found. +*/ +QTextCodec *QTextCodec::codecForName(const QByteArray &name) +{ + if (name.isEmpty()) + return 0; + + setup(); + + for (int i = 0; i < all->size(); ++i) { + QTextCodec *cursor = all->at(i); + if (nameMatch(cursor->name(), name)) + return cursor; + QList<QByteArray> aliases = cursor->aliases(); + for (int i = 0; i < aliases.size(); ++i) + if (nameMatch(aliases.at(i), name)) + return cursor; + } + + return createForName(name); +} + + +/*! + Returns the QTextCodec which matches the \link + QTextCodec::mibEnum() MIBenum\endlink \a mib. +*/ +QTextCodec* QTextCodec::codecForMib(int mib) +{ + setup(); + + // Qt 3 used 1000 (mib for UCS2) as its identifier for the utf16 codec. Map + // this correctly for compatibility. + if (mib == 1000) + mib = 1015; + + QList<QTextCodec*>::ConstIterator i; + for (int i = 0; i < all->size(); ++i) { + QTextCodec *cursor = all->at(i); + if (cursor->mibEnum() == mib) + return cursor; + } + + return createForMib(mib); +} + +/*! + Returns the list of all available codecs, by name. Call + QTextCodec::codecForName() to obtain the QTextCodec for the name. + + The list may contain many mentions of the same codec + if the codec has aliases. + + \sa availableMibs(), name(), aliases() +*/ +QList<QByteArray> QTextCodec::availableCodecs() +{ + setup(); + + QList<QByteArray> codecs; + for (int i = 0; i < all->size(); ++i) { + codecs += all->at(i)->name(); + codecs += all->at(i)->aliases(); + } +#ifndef QT_NO_TEXTCODECPLUGIN + QFactoryLoader *l = loader(); + QStringList keys = l->keys(); + for (int i = 0; i < keys.size(); ++i) { + if (!keys.at(i).startsWith(QLatin1String("MIB: "))) { + QByteArray name = keys.at(i).toLatin1(); + if (!codecs.contains(name)) + codecs += name; + } + } +#endif + + return codecs; +} + +/*! + Returns the list of MIBs for all available codecs. Call + QTextCodec::codecForMib() to obtain the QTextCodec for the MIB. + + \sa availableCodecs(), mibEnum() +*/ +QList<int> QTextCodec::availableMibs() +{ + setup(); + + QList<int> codecs; + for (int i = 0; i < all->size(); ++i) + codecs += all->at(i)->mibEnum(); +#ifndef QT_NO_TEXTCODECPLUGIN + QFactoryLoader *l = loader(); + QStringList keys = l->keys(); + for (int i = 0; i < keys.size(); ++i) { + if (keys.at(i).startsWith(QLatin1String("MIB: "))) { + int mib = keys.at(i).mid(5).toInt(); + if (!codecs.contains(mib)) + codecs += mib; + } + } +#endif + + return codecs; +} + +/*! + Set the codec to \a c; this will be returned by + codecForLocale(). If \a c is a null pointer, the codec is reset to + the default. + + This might be needed for some applications that want to use their + own mechanism for setting the locale. + + Setting this codec is not supported on DOS based Windows. + + \sa codecForLocale() +*/ +void QTextCodec::setCodecForLocale(QTextCodec *c) +{ +#ifdef Q_WS_WIN + if (QSysInfo::WindowsVersion& QSysInfo::WV_DOS_based) + return; +#endif + localeMapper = c; + if (!localeMapper) + setupLocaleMapper(); +} + +/*! + Returns a pointer to the codec most suitable for this locale. + + On Windows, the codec will be based on a system locale. On Unix + systems, starting with Qt 4.2, the codec will be using the \e + iconv library. Note that in both cases the codec's name will be + "System". +*/ + +QTextCodec* QTextCodec::codecForLocale() +{ + if (localeMapper) + return localeMapper; + + setup(); + + return localeMapper; +} + + +/*! + \fn QByteArray QTextCodec::name() const + + QTextCodec subclasses must reimplement this function. It returns + the name of the encoding supported by the subclass. + + If the codec is registered as a character set in the + \l{IANA character-sets encoding file} this method should + return the preferred mime name for the codec if defined, + otherwise its name. +*/ + +/*! + \fn int QTextCodec::mibEnum() const + + Subclasses of QTextCodec must reimplement this function. It + returns the MIBenum (see \l{IANA character-sets encoding file} + for more information). It is important that each QTextCodec + subclass returns the correct unique value for this function. +*/ + +/*! + Subclasses can return a number of aliases for the codec in question. + + Standard aliases for codecs can be found in the + \l{IANA character-sets encoding file}. +*/ +QList<QByteArray> QTextCodec::aliases() const +{ + return QList<QByteArray>(); +} + +/*! + \fn QString QTextCodec::convertToUnicode(const char *chars, int len, + ConverterState *state) const + + QTextCodec subclasses must reimplement this function. + + Converts the first \a len characters of \a chars from the + encoding of the subclass to Unicode, and returns the result in a + QString. + + \a state can be 0, in which case the conversion is stateless and + default conversion rules should be used. If state is not 0, the + codec should save the state after the conversion in \a state, and + adjust the remainingChars and invalidChars members of the struct. +*/ + +/*! + \fn QByteArray QTextCodec::convertFromUnicode(const QChar *input, int number, + ConverterState *state) const + + QTextCodec subclasses must reimplement this function. + + Converts the first \a number of characters from the \a input array + from Unicode to the encoding of the subclass, and returns the result + in a QByteArray. + + \a state can be 0 in which case the conversion is stateless and + default conversion rules should be used. If state is not 0, the + codec should save the state after the conversion in \a state, and + adjust the remainingChars and invalidChars members of the struct. +*/ + +/*! + Creates a QTextDecoder which stores enough state to decode chunks + of \c{char *} data to create chunks of Unicode data. + + The caller is responsible for deleting the returned object. +*/ +QTextDecoder* QTextCodec::makeDecoder() const +{ + return new QTextDecoder(this); +} + + +/*! + Creates a QTextEncoder which stores enough state to encode chunks + of Unicode data as \c{char *} data. + + The caller is responsible for deleting the returned object. +*/ +QTextEncoder* QTextCodec::makeEncoder() const +{ + return new QTextEncoder(this); +} + +/*! + \fn QByteArray QTextCodec::fromUnicode(const QChar *input, int number, + ConverterState *state) const + + Converts the first \a number of characters from the \a input array + from Unicode to the encoding of this codec, and returns the result + in a QByteArray. + + The \a state of the convertor used is updated. +*/ + +/*! + Converts \a str from Unicode to the encoding of this codec, and + returns the result in a QByteArray. +*/ +QByteArray QTextCodec::fromUnicode(const QString& str) const +{ + return convertFromUnicode(str.constData(), str.length(), 0); +} + +/*! + \fn QString QTextCodec::toUnicode(const char *input, int size, + ConverterState *state) const + + Converts the first \a size characters from the \a input from the + encoding of this codec to Unicode, and returns the result in a + QString. + + The \a state of the convertor used is updated. +*/ + +/*! + Converts \a a from the encoding of this codec to Unicode, and + returns the result in a QString. +*/ +QString QTextCodec::toUnicode(const QByteArray& a) const +{ + return convertToUnicode(a.constData(), a.length(), 0); +} + +/*! + Returns true if the Unicode character \a ch can be fully encoded + with this codec; otherwise returns false. +*/ +bool QTextCodec::canEncode(QChar ch) const +{ + ConverterState state; + state.flags = ConvertInvalidToNull; + convertFromUnicode(&ch, 1, &state); + return (state.invalidChars == 0); +} + +/*! + \overload + + \a s contains the string being tested for encode-ability. +*/ +bool QTextCodec::canEncode(const QString& s) const +{ + ConverterState state; + state.flags = ConvertInvalidToNull; + convertFromUnicode(s.constData(), s.length(), &state); + return (state.invalidChars == 0); +} + +#ifdef QT3_SUPPORT +/*! + Returns a string representing the current language and + sublanguage, e.g. "pt" for Portuguese, or "pt_br" for Portuguese/Brazil. + + \sa QLocale +*/ +const char *QTextCodec::locale() +{ + static char locale[6]; + QByteArray l = QLocale::system().name().toLatin1(); + int len = qMin(l.length(), 5); + memcpy(locale, l.constData(), len); + locale[len] = '\0'; + + return locale; +} + +/*! + \overload +*/ + +QByteArray QTextCodec::fromUnicode(const QString& uc, int& lenInOut) const +{ + QByteArray result = convertFromUnicode(uc.constData(), lenInOut, 0); + lenInOut = result.length(); + return result; +} + +/*! + \overload + + \a a contains the source characters; \a len contains the number of + characters in \a a to use. +*/ +QString QTextCodec::toUnicode(const QByteArray& a, int len) const +{ + len = qMin(a.size(), len); + return convertToUnicode(a.constData(), len, 0); +} +#endif + +/*! + \overload + + \a chars contains the source characters. +*/ +QString QTextCodec::toUnicode(const char *chars) const +{ + int len = qstrlen(chars); + return convertToUnicode(chars, len, 0); +} + + +/*! + \class QTextEncoder + \brief The QTextEncoder class provides a state-based encoder. + \reentrant + \ingroup i18n + + A text encoder converts text from Unicode into an encoded text format + using a specific codec. + + The encoder converts Unicode into another format, remembering any + state that is required between calls. + + \sa QTextCodec::makeEncoder(), QTextDecoder +*/ + +/*! + \fn QTextEncoder::QTextEncoder(const QTextCodec *codec) + + Constructs a text encoder for the given \a codec. +*/ + +/*! + Destroys the encoder. +*/ +QTextEncoder::~QTextEncoder() +{ +} + +/*! \internal + \since 4.5 + Determines whether the eecoder encountered a failure while decoding the input. If + an error was encountered, the produced result is undefined, and gets converted as according + to the conversion flags. + */ +bool QTextEncoder::hasFailure() const +{ + return state.invalidChars != 0; +} + +/*! + Converts the Unicode string \a str into an encoded QByteArray. +*/ +QByteArray QTextEncoder::fromUnicode(const QString& str) +{ + QByteArray result = c->fromUnicode(str.constData(), str.length(), &state); + return result; +} + +/*! + \overload + + Converts \a len characters (not bytes) from \a uc, and returns the + result in a QByteArray. +*/ +QByteArray QTextEncoder::fromUnicode(const QChar *uc, int len) +{ + QByteArray result = c->fromUnicode(uc, len, &state); + return result; +} + +#ifdef QT3_SUPPORT +/*! + \overload + + Converts \a lenInOut characters (not bytes) from \a uc, and returns the + result in a QByteArray. The number of characters read is returned in + the \a lenInOut parameter. +*/ +QByteArray QTextEncoder::fromUnicode(const QString& uc, int& lenInOut) +{ + QByteArray result = c->fromUnicode(uc.constData(), lenInOut, &state); + lenInOut = result.length(); + return result; +} +#endif + +/*! + \class QTextDecoder + \brief The QTextDecoder class provides a state-based decoder. + \reentrant + \ingroup i18n + + A text decoder converts text from an encoded text format into Unicode + using a specific codec. + + The decoder converts text in this format into Unicode, remembering any + state that is required between calls. + + \sa QTextCodec::makeDecoder(), QTextEncoder +*/ + +/*! + \fn QTextDecoder::QTextDecoder(const QTextCodec *codec) + + Constructs a text decoder for the given \a codec. +*/ + +/*! + Destroys the decoder. +*/ +QTextDecoder::~QTextDecoder() +{ +} + +/*! + \fn QString QTextDecoder::toUnicode(const char *chars, int len) + + Converts the first \a len bytes in \a chars to Unicode, returning + the result. + + If not all characters are used (e.g. if only part of a multi-byte + encoding is at the end of the characters), the decoder remembers + enough state to continue with the next call to this function. +*/ +QString QTextDecoder::toUnicode(const char *chars, int len) +{ + return c->toUnicode(chars, len, &state); +} + + +/*! \overload + + The converted string is returned in \a target. + */ +void QTextDecoder::toUnicode(QString *target, const char *chars, int len) +{ + Q_ASSERT(target); + switch (c->mibEnum()) { + case 106: // utf8 + static_cast<const QUtf8Codec*>(c)->convertToUnicode(target, chars, len, &state); + break; + case 4: { // latin1 + target->resize(len); + ushort *data = (ushort*)target->data(); + for (int i = len; i >=0; --i) + data[i] = (uchar) chars[i]; + } break; + default: + *target = c->toUnicode(chars, len, &state); + } +} + + +/*! + \overload + + Converts the bytes in the byte array specified by \a ba to Unicode + and returns the result. +*/ +QString QTextDecoder::toUnicode(const QByteArray &ba) +{ + return c->toUnicode(ba.constData(), ba.length(), &state); +} + + +/*! + \fn QTextCodec* QTextCodec::codecForTr() + + Returns the codec used by QObject::tr() on its argument. If this + function returns 0 (the default), tr() assumes Latin-1. + + \sa setCodecForTr() +*/ + +/*! + \fn void QTextCodec::setCodecForTr(QTextCodec *c) + \nonreentrant + + Sets the codec used by QObject::tr() on its argument to \a c. If + \a c is 0 (the default), tr() assumes Latin-1. + + If the literal quoted text in the program is not in the Latin-1 + encoding, this function can be used to set the appropriate + encoding. For example, software developed by Korean programmers + might use eucKR for all the text in the program, in which case the + main() function might look like this: + + \snippet doc/src/snippets/code/src_corelib_codecs_qtextcodec.cpp 3 + + Note that this is not the way to select the encoding that the \e + user has chosen. For example, to convert an application containing + literal English strings to Korean, all that is needed is for the + English strings to be passed through tr() and for translation + files to be loaded. For details of internationalization, see + \l{Internationalization with Qt}. + + \sa codecForTr(), setCodecForCStrings() +*/ + + +/*! + \fn QTextCodec* QTextCodec::codecForCStrings() + + Returns the codec used by QString to convert to and from \c{const + char *} and QByteArrays. If this function returns 0 (the default), + QString assumes Latin-1. + + \sa setCodecForCStrings() +*/ + +/*! + \fn void QTextCodec::setCodecForCStrings(QTextCodec *codec) + \nonreentrant + + Sets the codec used by QString to convert to and from \c{const + char *} and QByteArrays. If the \a codec is 0 (the default), + QString assumes Latin-1. + + \warning Some codecs do not preserve the characters in the ASCII + range (0x00 to 0x7F). For example, the Japanese Shift-JIS + encoding maps the backslash character (0x5A) to the Yen + character. To avoid undesirable side-effects, we recommend + avoiding such codecs with setCodecsForCString(). + + \sa codecForCStrings(), setCodecForTr() +*/ + +/*! + \since 4.4 + + Tries to detect the encoding of the provided snippet of HTML in the given byte array, \a ba, + and returns a QTextCodec instance that is capable of decoding the html to unicode. + If the codec cannot be detected from the content provided, \a defaultCodec is returned. +*/ +QTextCodec *QTextCodec::codecForHtml(const QByteArray &ba, QTextCodec *defaultCodec) +{ + // determine charset + int pos; + QTextCodec *c = 0; + + if (ba.size() > 1 && (((uchar)ba[0] == 0xfe && (uchar)ba[1] == 0xff) + || ((uchar)ba[0] == 0xff && (uchar)ba[1] == 0xfe))) { + c = QTextCodec::codecForMib(1015); // utf16 + } else if (ba.size() > 2 + && (uchar)ba[0] == 0xef + && (uchar)ba[1] == 0xbb + && (uchar)ba[2] == 0xbf) { + c = QTextCodec::codecForMib(106); // utf-8 + } else { + QByteArray header = ba.left(512).toLower(); + if ((pos = header.indexOf("http-equiv=")) != -1) { + pos = header.indexOf("charset=", pos) + int(strlen("charset=")); + if (pos != -1) { + int pos2 = header.indexOf('\"', pos+1); + QByteArray cs = header.mid(pos, pos2-pos); + // qDebug("found charset: %s", cs.data()); + c = QTextCodec::codecForName(cs); + } + } + } + if (!c) + c = defaultCodec; + + return c; +} + +/*! + \overload + + If the codec cannot be detected, this overload returns a Latin-1 QTextCodec. +*/ +QTextCodec *QTextCodec::codecForHtml(const QByteArray &ba) +{ + return codecForHtml(ba, QTextCodec::codecForMib(/*Latin 1*/ 4)); +} + + +/*! \internal + \since 4.3 + Determines whether the decoder encountered a failure while decoding the input. If + an error was encountered, the produced result is undefined, and gets converted as according + to the conversion flags. + */ +bool QTextDecoder::hasFailure() const +{ + return state.invalidChars != 0; +} + +/*! + \fn QTextCodec *QTextCodec::codecForContent(const char *str, int size) + + This functionality is no longer provided by Qt. This + compatibility function always returns a null pointer. +*/ + +/*! + \fn QTextCodec *QTextCodec::codecForName(const char *hint, int accuracy) + + Use the codecForName(const QByteArray &) overload instead. +*/ + +/*! + \fn QTextCodec *QTextCodec::codecForIndex(int i) + + Use availableCodecs() or availableMibs() instead and iterate + through the resulting list. +*/ + + +/*! + \fn QByteArray QTextCodec::mimeName() const + + Use name() instead. +*/ + +QT_END_NAMESPACE + +#endif // QT_NO_TEXTCODEC diff --git a/src/corelib/codecs/qtextcodec.h b/src/corelib/codecs/qtextcodec.h new file mode 100644 index 0000000..e32650f --- /dev/null +++ b/src/corelib/codecs/qtextcodec.h @@ -0,0 +1,189 @@ +/**************************************************************************** +** +** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies). +** Contact: Qt Software Information (qt-info@nokia.com) +** +** This file is part of the QtCore module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL$ +** No Commercial Usage +** This file contains pre-release code and may not be distributed. +** You may use this file in accordance with the terms and conditions +** contained in the either Technology Preview License Agreement or the +** Beta Release License Agreement. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 2.1 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPL included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 2.1 requirements +** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. +** +** In addition, as a special exception, Nokia gives you certain +** additional rights. These rights are described in the Nokia Qt LGPL +** Exception version 1.0, included in the file LGPL_EXCEPTION.txt in this +** package. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU +** General Public License version 3.0 as published by the Free Software +** Foundation and appearing in the file LICENSE.GPL included in the +** packaging of this file. Please review the following information to +** ensure the GNU General Public License version 3.0 requirements will be +** met: http://www.gnu.org/copyleft/gpl.html. +** +** If you are unsure which license is appropriate for your use, please +** contact the sales department at qt-sales@nokia.com. +** $QT_END_LICENSE$ +** +****************************************************************************/ + +#ifndef QTEXTCODEC_H +#define QTEXTCODEC_H + +#include <QtCore/qstring.h> +#include <QtCore/qlist.h> + +QT_BEGIN_HEADER + +QT_BEGIN_NAMESPACE + +QT_MODULE(Core) + +#ifndef QT_NO_TEXTCODEC + +class QTextCodec; +class QIODevice; + +class QTextDecoder; +class QTextEncoder; + +class Q_CORE_EXPORT QTextCodec +{ + Q_DISABLE_COPY(QTextCodec) +public: + static QTextCodec* codecForName(const QByteArray &name); + static QTextCodec* codecForName(const char *name) { return codecForName(QByteArray(name)); } + static QTextCodec* codecForMib(int mib); + + static QList<QByteArray> availableCodecs(); + static QList<int> availableMibs(); + + static QTextCodec* codecForLocale(); + static void setCodecForLocale(QTextCodec *c); + + static QTextCodec* codecForTr(); + static void setCodecForTr(QTextCodec *c); + + static QTextCodec* codecForCStrings(); + static void setCodecForCStrings(QTextCodec *c); + + static QTextCodec *codecForHtml(const QByteArray &ba); + static QTextCodec *codecForHtml(const QByteArray &ba, QTextCodec *defaultCodec); + + QTextDecoder* makeDecoder() const; + QTextEncoder* makeEncoder() const; + + bool canEncode(QChar) const; + bool canEncode(const QString&) const; + + QString toUnicode(const QByteArray&) const; + QString toUnicode(const char* chars) const; + QByteArray fromUnicode(const QString& uc) const; + enum ConversionFlag { + DefaultConversion, + ConvertInvalidToNull = 0x80000000, + IgnoreHeader = 0x1, + FreeFunction = 0x2 + }; + Q_DECLARE_FLAGS(ConversionFlags, ConversionFlag) + + struct Q_CORE_EXPORT ConverterState { + ConverterState(ConversionFlags f = DefaultConversion) + : flags(f), remainingChars(0), invalidChars(0), d(0) { state_data[0] = state_data[1] = state_data[2] = 0; } + ~ConverterState(); + ConversionFlags flags; + int remainingChars; + int invalidChars; + uint state_data[3]; + void *d; + private: + Q_DISABLE_COPY(ConverterState) + }; + + QString toUnicode(const char *in, int length, ConverterState *state = 0) const + { return convertToUnicode(in, length, state); } + QByteArray fromUnicode(const QChar *in, int length, ConverterState *state = 0) const + { return convertFromUnicode(in, length, state); } + + virtual QByteArray name() const = 0; + virtual QList<QByteArray> aliases() const; + virtual int mibEnum() const = 0; + +protected: + virtual QString convertToUnicode(const char *in, int length, ConverterState *state) const = 0; + virtual QByteArray convertFromUnicode(const QChar *in, int length, ConverterState *state) const = 0; + + QTextCodec(); + virtual ~QTextCodec(); + +public: +#ifdef QT3_SUPPORT + static QT3_SUPPORT QTextCodec* codecForContent(const char*, int) { return 0; } + static QT3_SUPPORT const char* locale(); + static QT3_SUPPORT QTextCodec* codecForName(const char* hint, int) { return codecForName(QByteArray(hint)); } + QT3_SUPPORT QByteArray fromUnicode(const QString& uc, int& lenInOut) const; + QT3_SUPPORT QString toUnicode(const QByteArray&, int len) const; + QT3_SUPPORT QByteArray mimeName() const { return name(); } + static QT3_SUPPORT QTextCodec *codecForIndex(int i) { return codecForName(availableCodecs().value(i)); } +#endif + +private: + friend class QTextCodecCleanup; + static QTextCodec *cftr; +}; +Q_DECLARE_OPERATORS_FOR_FLAGS(QTextCodec::ConversionFlags) + +inline QTextCodec* QTextCodec::codecForTr() { return cftr; } +inline void QTextCodec::setCodecForTr(QTextCodec *c) { cftr = c; } +inline QTextCodec* QTextCodec::codecForCStrings() { return QString::codecForCStrings; } +inline void QTextCodec::setCodecForCStrings(QTextCodec *c) { QString::codecForCStrings = c; } + +class Q_CORE_EXPORT QTextEncoder { + Q_DISABLE_COPY(QTextEncoder) +public: + explicit QTextEncoder(const QTextCodec *codec) : c(codec), state() {} + ~QTextEncoder(); + QByteArray fromUnicode(const QString& str); + QByteArray fromUnicode(const QChar *uc, int len); +#ifdef QT3_SUPPORT + QByteArray fromUnicode(const QString& uc, int& lenInOut); +#endif + bool hasFailure() const; +private: + const QTextCodec *c; + QTextCodec::ConverterState state; +}; + +class Q_CORE_EXPORT QTextDecoder { + Q_DISABLE_COPY(QTextDecoder) +public: + explicit QTextDecoder(const QTextCodec *codec) : c(codec), state() {} + ~QTextDecoder(); + QString toUnicode(const char* chars, int len); + QString toUnicode(const QByteArray &ba); + void toUnicode(QString *target, const char *chars, int len); + bool hasFailure() const; +private: + const QTextCodec *c; + QTextCodec::ConverterState state; +}; + +#endif // QT_NO_TEXTCODEC + +QT_END_NAMESPACE + +QT_END_HEADER + +#endif // QTEXTCODEC_H diff --git a/src/corelib/codecs/qtextcodec_p.h b/src/corelib/codecs/qtextcodec_p.h new file mode 100644 index 0000000..39f643d --- /dev/null +++ b/src/corelib/codecs/qtextcodec_p.h @@ -0,0 +1,84 @@ +/**************************************************************************** +** +** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies). +** Contact: Qt Software Information (qt-info@nokia.com) +** +** This file is part of the QtCore module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL$ +** No Commercial Usage +** This file contains pre-release code and may not be distributed. +** You may use this file in accordance with the terms and conditions +** contained in the either Technology Preview License Agreement or the +** Beta Release License Agreement. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 2.1 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPL included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 2.1 requirements +** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. +** +** In addition, as a special exception, Nokia gives you certain +** additional rights. These rights are described in the Nokia Qt LGPL +** Exception version 1.0, included in the file LGPL_EXCEPTION.txt in this +** package. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU +** General Public License version 3.0 as published by the Free Software +** Foundation and appearing in the file LICENSE.GPL included in the +** packaging of this file. Please review the following information to +** ensure the GNU General Public License version 3.0 requirements will be +** met: http://www.gnu.org/copyleft/gpl.html. +** +** If you are unsure which license is appropriate for your use, please +** contact the sales department at qt-sales@nokia.com. +** $QT_END_LICENSE$ +** +****************************************************************************/ + +#ifndef QTEXTCODEC_P_H +#define QTEXTCODEC_P_H + +// +// W A R N I N G +// ------------- +// +// This file is not part of the Qt API. It exists for the convenience +// of the QTextCodec class. This header file may change from +// version to version without notice, or even be removed. +// +// We mean it. +// + +#include "qtextcodec.h" +#include <string.h> + +QT_BEGIN_NAMESPACE + +#ifndef QT_NO_TEXTCODEC + +typedef void (*QTextCodecStateFreeFunction)(QTextCodec::ConverterState*); + +struct QTextCodecUnalignedPointer +{ + static inline QTextCodecStateFreeFunction decode(const uint *src) + { + quintptr data; + memcpy(&data, src, sizeof(data)); + return reinterpret_cast<QTextCodecStateFreeFunction>(data); + } + static inline void encode(uint *dst, QTextCodecStateFreeFunction fn) + { + quintptr data = reinterpret_cast<quintptr>(fn); + memcpy(dst, &data, sizeof(data)); + } +}; + +#endif //QT_NO_TEXTCODEC + +QT_END_NAMESPACE + +#endif diff --git a/src/corelib/codecs/qtextcodecplugin.cpp b/src/corelib/codecs/qtextcodecplugin.cpp new file mode 100644 index 0000000..7342b45 --- /dev/null +++ b/src/corelib/codecs/qtextcodecplugin.cpp @@ -0,0 +1,161 @@ +/**************************************************************************** +** +** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies). +** Contact: Qt Software Information (qt-info@nokia.com) +** +** This file is part of the QtCore module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL$ +** No Commercial Usage +** This file contains pre-release code and may not be distributed. +** You may use this file in accordance with the terms and conditions +** contained in the either Technology Preview License Agreement or the +** Beta Release License Agreement. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 2.1 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPL included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 2.1 requirements +** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. +** +** In addition, as a special exception, Nokia gives you certain +** additional rights. These rights are described in the Nokia Qt LGPL +** Exception version 1.0, included in the file LGPL_EXCEPTION.txt in this +** package. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU +** General Public License version 3.0 as published by the Free Software +** Foundation and appearing in the file LICENSE.GPL included in the +** packaging of this file. Please review the following information to +** ensure the GNU General Public License version 3.0 requirements will be +** met: http://www.gnu.org/copyleft/gpl.html. +** +** If you are unsure which license is appropriate for your use, please +** contact the sales department at qt-sales@nokia.com. +** $QT_END_LICENSE$ +** +****************************************************************************/ + +#include "qtextcodecplugin.h" +#include "qstringlist.h" + +#ifndef QT_NO_TEXTCODECPLUGIN + +QT_BEGIN_NAMESPACE + +/*! + \class QTextCodecPlugin + \brief The QTextCodecPlugin class provides an abstract base for custom QTextCodec plugins. + \reentrant + \ingroup plugins + + The text codec plugin is a simple plugin interface that makes it + easy to create custom text codecs that can be loaded dynamically + into applications. + + Writing a text codec plugin is achieved by subclassing this base + class, reimplementing the pure virtual functions names(), + aliases(), createForName(), mibEnums() and createForMib(), and + exporting the class with the Q_EXPORT_PLUGIN2() macro. See \l{How + to Create Qt Plugins} for details. + + See the \l{http://www.iana.org/assignments/character-sets}{IANA + character-sets encoding file} for more information on mime + names and mib enums. +*/ + +/*! + \fn QStringList QTextCodecPlugin::names() const + + Returns the list of MIME names supported by this plugin. + + If a codec has several names, the extra names are returned by aliases(). + + \sa createForName(), aliases() +*/ + +/*! + \fn QList<QByteArray> QTextCodecPlugin::aliases() const + + Returns the list of aliases supported by this plugin. +*/ + +/*! + \fn QTextCodec *QTextCodecPlugin::createForName(const QByteArray &name) + + Creates a QTextCodec object for the codec called \a name. The \a name + must come from the list of encodings returned by names(). Encoding + names are case sensitive. + + Example: + + \snippet doc/src/snippets/code/src_corelib_codecs_qtextcodecplugin.cpp 0 + + \sa names() +*/ + + +/*! + \fn QList<int> QTextCodecPlugin::mibEnums() const + + Returns the list of mib enums supported by this plugin. + + \sa createForMib() +*/ + +/*! + \fn QTextCodec *QTextCodecPlugin::createForMib(int mib); + + Creates a QTextCodec object for the mib enum \a mib. + + See \l{http://www.iana.org/assignments/character-sets}{the + IANA character-sets encoding file} for more information. + + \sa mibEnums() +*/ + +/*! + Constructs a text codec plugin with the given \a parent. This is + invoked automatically by the Q_EXPORT_PLUGIN2() macro. +*/ +QTextCodecPlugin::QTextCodecPlugin(QObject *parent) + : QObject(parent) +{ +} + +/*! + Destroys the text codec plugin. + + You never have to call this explicitly. Qt destroys a plugin + automatically when it is no longer used. +*/ +QTextCodecPlugin::~QTextCodecPlugin() +{ +} + +QStringList QTextCodecPlugin::keys() const +{ + QStringList keys; + QList<QByteArray> list = names(); + list += aliases(); + for (int i = 0; i < list.size(); ++i) + keys += QString::fromLatin1(list.at(i)); + QList<int> mibs = mibEnums(); + for (int i = 0; i < mibs.count(); ++i) + keys += QLatin1String("MIB: ") + QString::number(mibs.at(i)); + return keys; +} + +QTextCodec *QTextCodecPlugin::create(const QString &name) +{ + if (name.startsWith(QLatin1String("MIB: "))) + return createForMib(name.mid(4).toInt()); + return createForName(name.toLatin1()); +} + +QT_END_NAMESPACE + +#endif // QT_NO_TEXTCODECPLUGIN diff --git a/src/corelib/codecs/qtextcodecplugin.h b/src/corelib/codecs/qtextcodecplugin.h new file mode 100644 index 0000000..4600fec --- /dev/null +++ b/src/corelib/codecs/qtextcodecplugin.h @@ -0,0 +1,96 @@ +/**************************************************************************** +** +** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies). +** Contact: Qt Software Information (qt-info@nokia.com) +** +** This file is part of the QtCore module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL$ +** No Commercial Usage +** This file contains pre-release code and may not be distributed. +** You may use this file in accordance with the terms and conditions +** contained in the either Technology Preview License Agreement or the +** Beta Release License Agreement. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 2.1 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPL included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 2.1 requirements +** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. +** +** In addition, as a special exception, Nokia gives you certain +** additional rights. These rights are described in the Nokia Qt LGPL +** Exception version 1.0, included in the file LGPL_EXCEPTION.txt in this +** package. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU +** General Public License version 3.0 as published by the Free Software +** Foundation and appearing in the file LICENSE.GPL included in the +** packaging of this file. Please review the following information to +** ensure the GNU General Public License version 3.0 requirements will be +** met: http://www.gnu.org/copyleft/gpl.html. +** +** If you are unsure which license is appropriate for your use, please +** contact the sales department at qt-sales@nokia.com. +** $QT_END_LICENSE$ +** +****************************************************************************/ + +#ifndef QTEXTCODECPLUGIN_H +#define QTEXTCODECPLUGIN_H + +#include <QtCore/qplugin.h> +#include <QtCore/qfactoryinterface.h> +#include <QtCore/qlist.h> +#include <QtCore/qbytearray.h> + +QT_BEGIN_HEADER + +QT_BEGIN_NAMESPACE + +QT_MODULE(Core) + +#ifndef QT_NO_TEXTCODECPLUGIN + +class QTextCodec; + +struct Q_CORE_EXPORT QTextCodecFactoryInterface : public QFactoryInterface +{ + virtual QTextCodec *create(const QString &key) = 0; +}; + +#define QTextCodecFactoryInterface_iid "com.trolltech.Qt.QTextCodecFactoryInterface" + +Q_DECLARE_INTERFACE(QTextCodecFactoryInterface, QTextCodecFactoryInterface_iid) + + +class Q_CORE_EXPORT QTextCodecPlugin : public QObject, public QTextCodecFactoryInterface +{ + Q_OBJECT + Q_INTERFACES(QTextCodecFactoryInterface:QFactoryInterface) +public: + explicit QTextCodecPlugin(QObject *parent = 0); + ~QTextCodecPlugin(); + + virtual QList<QByteArray> names() const = 0; + virtual QList<QByteArray> aliases() const = 0; + virtual QTextCodec *createForName(const QByteArray &name) = 0; + + virtual QList<int> mibEnums() const = 0; + virtual QTextCodec *createForMib(int mib) = 0; + +private: + QStringList keys() const; + QTextCodec *create(const QString &name); +}; + +#endif // QT_NO_TEXTCODECPLUGIN + +QT_END_NAMESPACE + +QT_END_HEADER + +#endif // QTEXTCODECPLUGIN_H diff --git a/src/corelib/codecs/qtsciicodec.cpp b/src/corelib/codecs/qtsciicodec.cpp new file mode 100644 index 0000000..14d2c9c --- /dev/null +++ b/src/corelib/codecs/qtsciicodec.cpp @@ -0,0 +1,500 @@ +/**************************************************************************** +** +** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies). +** Contact: Qt Software Information (qt-info@nokia.com) +** +** This file is part of the QtCore module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL$ +** No Commercial Usage +** This file contains pre-release code and may not be distributed. +** You may use this file in accordance with the terms and conditions +** contained in the either Technology Preview License Agreement or the +** Beta Release License Agreement. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 2.1 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPL included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 2.1 requirements +** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. +** +** In addition, as a special exception, Nokia gives you certain +** additional rights. These rights are described in the Nokia Qt LGPL +** Exception version 1.0, included in the file LGPL_EXCEPTION.txt in this +** package. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU +** General Public License version 3.0 as published by the Free Software +** Foundation and appearing in the file LICENSE.GPL included in the +** packaging of this file. Please review the following information to +** ensure the GNU General Public License version 3.0 requirements will be +** met: http://www.gnu.org/copyleft/gpl.html. +** +** If you are unsure which license is appropriate for your use, please +** contact the sales department at qt-sales@nokia.com. +** $QT_END_LICENSE$ +** +****************************************************************************/ + +// Most of the code here was originally written by Hans Petter Bieker, +// and is included in Qt with the author's permission, and the grateful +// thanks of the Trolltech team. + +#include "qtsciicodec_p.h" +#include "qlist.h" + +#ifndef QT_NO_CODECS + +QT_BEGIN_NAMESPACE + +static unsigned char qt_UnicodeToTSCII(ushort u1, ushort u2, ushort u3); +static unsigned int qt_TSCIIToUnicode(unsigned int code, uint *s); + +#define IsTSCIIChar(c) (((c) >= 0x80) && ((c) <= 0xfd)) + +/*! \class QTsciiCodec + \reentrant + \internal +*/ + +/*! + Destroys the text codec object. +*/ +QTsciiCodec::~QTsciiCodec() +{ +} + +/*! + Converts the first \a len characters in \a uc from Unicode to this + encoding, and returns the result in a byte array. The \a state contains + some conversion flags, and is used by the codec to maintain state + information. +*/ +QByteArray QTsciiCodec::convertFromUnicode(const QChar *uc, int len, ConverterState *state) const +{ + char replacement = '?'; + if (state) { + if (state->flags & ConvertInvalidToNull) + replacement = 0; + } + int invalid = 0; + + QByteArray rstr; + rstr.resize(len); + uchar* cursor = (uchar*)rstr.data(); + for (int i = 0; i < len; i++) { + QChar ch = uc[i]; + uchar j; + if (ch.row() == 0x00 && ch.cell() < 0x80) { + // ASCII + j = ch.cell(); + } else if ((j = qt_UnicodeToTSCII(uc[i].unicode(), + uc[i + 1].unicode(), + uc[i + 2].unicode()))) { + // We have to check the combined chars first! + i += 2; + } else if ((j = qt_UnicodeToTSCII(uc[i].unicode(), + uc[i + 1].unicode(), 0))) { + i++; + } else if ((j = qt_UnicodeToTSCII(uc[i].unicode(), 0, 0))) { + } else { + // Error + j = replacement; + ++invalid; + } + *cursor++ = j; + } + rstr.resize(cursor - (const uchar*)rstr.constData()); + + if (state) { + state->invalidChars += invalid; + } + return rstr; +} + +/*! + Converts the first \a len characters in \a chars from this encoding + to Unicode, and returns the result in a QString. The \a state contains + some conversion flags, and is used by the codec to maintain state + information. +*/ +QString QTsciiCodec::convertToUnicode(const char* chars, int len, ConverterState *state) const +{ + QChar replacement = QChar::ReplacementCharacter; + if (state) { + if (state->flags & ConvertInvalidToNull) + replacement = QChar::Null; + } + int invalid = 0; + + QString result; + for (int i = 0; i < len; i++) { + uchar ch = chars[i]; + if (ch < 0x80) { + // ASCII + result += QLatin1Char(ch); + } else if (IsTSCIIChar(ch)) { + // TSCII + uint s[3]; + uint u = qt_TSCIIToUnicode(ch, s); + uint *p = s; + while (u--) { + uint c = *p++; + if (c) + result += QChar(c); + else { + result += replacement; + ++invalid; + } + } + } else { + // Invalid + result += replacement; + ++invalid; + } + } + + if (state) { + state->invalidChars += invalid; + } + return result; +} + +/*! + Returns the official name for the encoding that is handled by the codec. + + \sa QTextCodec::name() +*/ +QByteArray QTsciiCodec::name() const +{ + return "TSCII"; +} + +/*! + Returns the MIB enum for the encoding. + + \sa QTextCodec::mibEnum() +*/ +int QTsciiCodec::mibEnum() const +{ + /* There is no MIBEnum for TSCII now */ + return -3197; +} + +static const int UnToTsLast = 124; // 125 items -- so the last will be 124 +static const ushort UnToTs [][4] = { + // *Sorted* list of TSCII maping for unicode chars + //FIRST SECOND THIRD TSCII + {0x00A0, 0x0000, 0x0000, 0xA0}, + {0x00A9, 0x0000, 0x0000, 0xA9}, + {0x0B83, 0x0000, 0x0000, 0xB7}, + {0x0B85, 0x0000, 0x0000, 0xAB}, + {0x0B86, 0x0000, 0x0000, 0xAC}, + {0x0B87, 0x0000, 0x0000, 0xAD}, + {0x0B88, 0x0000, 0x0000, 0xAE}, + {0x0B89, 0x0000, 0x0000, 0xAF}, + {0x0B8A, 0x0000, 0x0000, 0xB0}, + {0x0B8E, 0x0000, 0x0000, 0xB1}, + {0x0B8F, 0x0000, 0x0000, 0xB2}, + {0x0B90, 0x0000, 0x0000, 0xB3}, + {0x0B92, 0x0000, 0x0000, 0xB4}, + {0x0B93, 0x0000, 0x0000, 0xB5}, + {0x0B94, 0x0000, 0x0000, 0xB6}, + {0x0B95, 0x0000, 0x0000, 0xB8}, + {0x0B95, 0x0B82, 0x0000, 0xEC}, + {0x0B95, 0x0BC1, 0x0000, 0xCC}, + {0x0B95, 0x0BC2, 0x0000, 0xDC}, + {0x0B99, 0x0000, 0x0000, 0xB9}, + {0x0B99, 0x0B82, 0x0000, 0xED}, + {0x0B99, 0x0BC1, 0x0000, 0x99}, + {0x0B99, 0x0BC2, 0x0000, 0x9B}, + {0x0B9A, 0x0000, 0x0000, 0xBA}, + {0x0B9A, 0x0B82, 0x0000, 0xEE}, + {0x0B9A, 0x0BC1, 0x0000, 0xCD}, + {0x0B9A, 0x0BC2, 0x0000, 0xDD}, + {0x0B9C, 0x0000, 0x0000, 0x83}, + {0x0B9C, 0x0B82, 0x0000, 0x88}, + {0x0B9E, 0x0000, 0x0000, 0xBB}, + {0x0B9E, 0x0B82, 0x0000, 0xEF}, + {0x0B9E, 0x0BC1, 0x0000, 0x9A}, + {0x0B9E, 0x0BC2, 0x0000, 0x9C}, + {0x0B9F, 0x0000, 0x0000, 0xBC}, + {0x0B9F, 0x0B82, 0x0000, 0xF0}, + {0x0B9F, 0x0BBF, 0x0000, 0xCA}, + {0x0B9F, 0x0BC0, 0x0000, 0xCB}, + {0x0B9F, 0x0BC1, 0x0000, 0xCE}, + {0x0B9F, 0x0BC2, 0x0000, 0xDE}, + {0x0BA1, 0x0B82, 0x0000, 0xF2}, + {0x0BA3, 0x0000, 0x0000, 0xBD}, + {0x0BA3, 0x0B82, 0x0000, 0xF1}, + {0x0BA3, 0x0BC1, 0x0000, 0xCF}, + {0x0BA3, 0x0BC2, 0x0000, 0xDF}, + {0x0BA4, 0x0000, 0x0000, 0xBE}, + {0x0BA4, 0x0BC1, 0x0000, 0xD0}, + {0x0BA4, 0x0BC2, 0x0000, 0xE0}, + {0x0BA8, 0x0000, 0x0000, 0xBF}, + {0x0BA8, 0x0B82, 0x0000, 0xF3}, + {0x0BA8, 0x0BC1, 0x0000, 0xD1}, + {0x0BA8, 0x0BC2, 0x0000, 0xE1}, + {0x0BA9, 0x0000, 0x0000, 0xC9}, + {0x0BA9, 0x0B82, 0x0000, 0xFD}, + {0x0BA9, 0x0BC1, 0x0000, 0xDB}, + {0x0BA9, 0x0BC2, 0x0000, 0xEB}, + {0x0BAA, 0x0000, 0x0000, 0xC0}, + {0x0BAA, 0x0B82, 0x0000, 0xF4}, + {0x0BAA, 0x0BC1, 0x0000, 0xD2}, + {0x0BAA, 0x0BC2, 0x0000, 0xE2}, + {0x0BAE, 0x0000, 0x0000, 0xC1}, + {0x0BAE, 0x0B82, 0x0000, 0xF5}, + {0x0BAE, 0x0BC1, 0x0000, 0xD3}, + {0x0BAE, 0x0BC2, 0x0000, 0xE3}, + {0x0BAF, 0x0000, 0x0000, 0xC2}, + {0x0BAF, 0x0B82, 0x0000, 0xF6}, + {0x0BAF, 0x0BC1, 0x0000, 0xD4}, + {0x0BAF, 0x0BC2, 0x0000, 0xE4}, + {0x0BB0, 0x0000, 0x0000, 0xC3}, + {0x0BB0, 0x0B82, 0x0000, 0xF7}, + {0x0BB0, 0x0BC1, 0x0000, 0xD5}, + {0x0BB0, 0x0BC2, 0x0000, 0xE5}, + {0x0BB1, 0x0000, 0x0000, 0xC8}, + {0x0BB1, 0x0B82, 0x0000, 0xFC}, + {0x0BB1, 0x0BC1, 0x0000, 0xDA}, + {0x0BB1, 0x0BC2, 0x0000, 0xEA}, + {0x0BB2, 0x0000, 0x0000, 0xC4}, + {0x0BB2, 0x0B82, 0x0000, 0xF8}, + {0x0BB2, 0x0BC1, 0x0000, 0xD6}, + {0x0BB2, 0x0BC2, 0x0000, 0xE6}, + {0x0BB3, 0x0000, 0x0000, 0xC7}, + {0x0BB3, 0x0B82, 0x0000, 0xFB}, + {0x0BB3, 0x0BC1, 0x0000, 0xD9}, + {0x0BB3, 0x0BC2, 0x0000, 0xE9}, + {0x0BB4, 0x0000, 0x0000, 0xC6}, + {0x0BB4, 0x0B82, 0x0000, 0xFA}, + {0x0BB4, 0x0BC1, 0x0000, 0xD8}, + {0x0BB4, 0x0BC2, 0x0000, 0xE8}, + {0x0BB5, 0x0000, 0x0000, 0xC5}, + {0x0BB5, 0x0B82, 0x0000, 0xF9}, + {0x0BB5, 0x0BC1, 0x0000, 0xD7}, + {0x0BB5, 0x0BC2, 0x0000, 0xE7}, + {0x0BB7, 0x0000, 0x0000, 0x84}, + {0x0BB7, 0x0B82, 0x0000, 0x89}, + {0x0BB8, 0x0000, 0x0000, 0x85}, + {0x0BB8, 0x0B82, 0x0000, 0x8A}, + {0x0BB9, 0x0000, 0x0000, 0x86}, + {0x0BB9, 0x0B82, 0x0000, 0x8B}, + {0x0BBE, 0x0000, 0x0000, 0xA1}, + {0x0BBF, 0x0000, 0x0000, 0xA2}, + {0x0BC0, 0x0000, 0x0000, 0xA3}, + {0x0BC1, 0x0000, 0x0000, 0xA4}, + {0x0BC2, 0x0000, 0x0000, 0xA5}, + {0x0BC6, 0x0000, 0x0000, 0xA6}, + {0x0BC7, 0x0000, 0x0000, 0xA7}, + {0x0BC8, 0x0000, 0x0000, 0xA8}, + {0x0BCC, 0x0000, 0x0000, 0xAA}, + {0x0BE6, 0x0000, 0x0000, 0x80}, + {0x0BE7, 0x0000, 0x0000, 0x81}, + {0x0BE7, 0x0BB7, 0x0000, 0x87}, + {0x0BE7, 0x0BB7, 0x0B82, 0x8C}, + {0x0BE8, 0x0000, 0x0000, 0x8D}, + {0x0BE9, 0x0000, 0x0000, 0x8E}, + {0x0BEA, 0x0000, 0x0000, 0x8F}, + {0x0BEB, 0x0000, 0x0000, 0x90}, + {0x0BEC, 0x0000, 0x0000, 0x95}, + {0x0BED, 0x0000, 0x0000, 0x96}, + {0x0BEE, 0x0000, 0x0000, 0x97}, + {0x0BEF, 0x0000, 0x0000, 0x98}, + {0x0BF0, 0x0000, 0x0000, 0x9D}, + {0x0BF1, 0x0000, 0x0000, 0x9E}, + {0x0BF2, 0x0000, 0x0000, 0x9F}, + {0x2018, 0x0000, 0x0000, 0x91}, + {0x2019, 0x0000, 0x0000, 0x92}, + {0x201C, 0x0000, 0x0000, 0x93}, + {0x201C, 0x0000, 0x0000, 0x94} +}; + +static const ushort TsToUn [][3] = { + // Starting at 0x80 + {0x0BE6, 0x0000, 0x0000}, + {0x0BE7, 0x0000, 0x0000}, + {0x0000, 0x0000, 0x0000}, // unknown + {0x0B9C, 0x0000, 0x0000}, + {0x0BB7, 0x0000, 0x0000}, + {0x0BB8, 0x0000, 0x0000}, + {0x0BB9, 0x0000, 0x0000}, + {0x0BE7, 0x0BB7, 0x0000}, + {0x0B9C, 0x0B82, 0x0000}, + {0x0BB7, 0x0B82, 0x0000}, + {0x0BB8, 0x0B82, 0x0000}, + {0x0BB9, 0x0B82, 0x0000}, + {0x0BE7, 0x0BB7, 0x0B82}, + {0x0BE8, 0x0000, 0x0000}, + {0x0BE9, 0x0000, 0x0000}, + {0x0BEA, 0x0000, 0x0000}, + {0x0BEB, 0x0000, 0x0000}, + {0x2018, 0x0000, 0x0000}, + {0x2019, 0x0000, 0x0000}, + {0x201C, 0x0000, 0x0000}, + {0x201C, 0x0000, 0x0000}, // two of the same?? + {0x0BEC, 0x0000, 0x0000}, + {0x0BED, 0x0000, 0x0000}, + {0x0BEE, 0x0000, 0x0000}, + {0x0BEF, 0x0000, 0x0000}, + {0x0B99, 0x0BC1, 0x0000}, + {0x0B9E, 0x0BC1, 0x0000}, + {0x0B99, 0x0BC2, 0x0000}, + {0x0B9E, 0x0BC2, 0x0000}, + {0x0BF0, 0x0000, 0x0000}, + {0x0BF1, 0x0000, 0x0000}, + {0x0BF2, 0x0000, 0x0000}, + {0x00A0, 0x0000, 0x0000}, + {0x0BBE, 0x0000, 0x0000}, + {0x0BBF, 0x0000, 0x0000}, + {0x0BC0, 0x0000, 0x0000}, + {0x0BC1, 0x0000, 0x0000}, + {0x0BC2, 0x0000, 0x0000}, + {0x0BC6, 0x0000, 0x0000}, + {0x0BC7, 0x0000, 0x0000}, + {0x0BC8, 0x0000, 0x0000}, + {0x00A9, 0x0000, 0x0000}, + {0x0BCC, 0x0000, 0x0000}, + {0x0B85, 0x0000, 0x0000}, + {0x0B86, 0x0000, 0x0000}, + {0x0B87, 0x0000, 0x0000}, + {0x0B88, 0x0000, 0x0000}, + {0x0B89, 0x0000, 0x0000}, + {0x0B8A, 0x0000, 0x0000}, + {0x0B8E, 0x0000, 0x0000}, + {0x0B8F, 0x0000, 0x0000}, + {0x0B90, 0x0000, 0x0000}, + {0x0B92, 0x0000, 0x0000}, + {0x0B93, 0x0000, 0x0000}, + {0x0B94, 0x0000, 0x0000}, + {0x0B83, 0x0000, 0x0000}, + {0x0B95, 0x0000, 0x0000}, + {0x0B99, 0x0000, 0x0000}, + {0x0B9A, 0x0000, 0x0000}, + {0x0B9E, 0x0000, 0x0000}, + {0x0B9F, 0x0000, 0x0000}, + {0x0BA3, 0x0000, 0x0000}, + {0x0BA4, 0x0000, 0x0000}, + {0x0BA8, 0x0000, 0x0000}, + {0x0BAA, 0x0000, 0x0000}, + {0x0BAE, 0x0000, 0x0000}, + {0x0BAF, 0x0000, 0x0000}, + {0x0BB0, 0x0000, 0x0000}, + {0x0BB2, 0x0000, 0x0000}, + {0x0BB5, 0x0000, 0x0000}, + {0x0BB4, 0x0000, 0x0000}, + {0x0BB3, 0x0000, 0x0000}, + {0x0BB1, 0x0000, 0x0000}, + {0x0BA9, 0x0000, 0x0000}, + {0x0B9F, 0x0BBF, 0x0000}, + {0x0B9F, 0x0BC0, 0x0000}, + {0x0B95, 0x0BC1, 0x0000}, + {0x0B9A, 0x0BC1, 0x0000}, + {0x0B9F, 0x0BC1, 0x0000}, + {0x0BA3, 0x0BC1, 0x0000}, + {0x0BA4, 0x0BC1, 0x0000}, + {0x0BA8, 0x0BC1, 0x0000}, + {0x0BAA, 0x0BC1, 0x0000}, + {0x0BAE, 0x0BC1, 0x0000}, + {0x0BAF, 0x0BC1, 0x0000}, + {0x0BB0, 0x0BC1, 0x0000}, + {0x0BB2, 0x0BC1, 0x0000}, + {0x0BB5, 0x0BC1, 0x0000}, + {0x0BB4, 0x0BC1, 0x0000}, + {0x0BB3, 0x0BC1, 0x0000}, + {0x0BB1, 0x0BC1, 0x0000}, + {0x0BA9, 0x0BC1, 0x0000}, + {0x0B95, 0x0BC2, 0x0000}, + {0x0B9A, 0x0BC2, 0x0000}, + {0x0B9F, 0x0BC2, 0x0000}, + {0x0BA3, 0x0BC2, 0x0000}, + {0x0BA4, 0x0BC2, 0x0000}, + {0x0BA8, 0x0BC2, 0x0000}, + {0x0BAA, 0x0BC2, 0x0000}, + {0x0BAE, 0x0BC2, 0x0000}, + {0x0BAF, 0x0BC2, 0x0000}, + {0x0BB0, 0x0BC2, 0x0000}, + {0x0BB2, 0x0BC2, 0x0000}, + {0x0BB5, 0x0BC2, 0x0000}, + {0x0BB4, 0x0BC2, 0x0000}, + {0x0BB3, 0x0BC2, 0x0000}, + {0x0BB1, 0x0BC2, 0x0000}, + {0x0BA9, 0x0BC2, 0x0000}, + {0x0B95, 0x0B82, 0x0000}, + {0x0B99, 0x0B82, 0x0000}, + {0x0B9A, 0x0B82, 0x0000}, + {0x0B9E, 0x0B82, 0x0000}, + {0x0B9F, 0x0B82, 0x0000}, + {0x0BA3, 0x0B82, 0x0000}, + {0x0BA1, 0x0B82, 0x0000}, + {0x0BA8, 0x0B82, 0x0000}, + {0x0BAA, 0x0B82, 0x0000}, + {0x0BAE, 0x0B82, 0x0000}, + {0x0BAF, 0x0B82, 0x0000}, + {0x0BB0, 0x0B82, 0x0000}, + {0x0BB2, 0x0B82, 0x0000}, + {0x0BB5, 0x0B82, 0x0000}, + {0x0BB4, 0x0B82, 0x0000}, + {0x0BB3, 0x0B82, 0x0000}, + {0x0BB1, 0x0B82, 0x0000}, + {0x0BA9, 0x0B82, 0x0000} +}; + +static int cmp(const ushort *s1, const ushort *s2, size_t len) +{ + int diff = 0; + + while (len-- && (diff = *s1++ - *s2++) == 0) + ; + + return diff; +} + +static unsigned char qt_UnicodeToTSCII(ushort u1, ushort u2, ushort u3) +{ + ushort s[3]; + s[0] = u1; + s[1] = u2; + s[2] = u3; + + int a = 0; // start pos + int b = UnToTsLast; // end pos + + // do a binary search for the composed unicode in the list + while (a <= b) { + int w = (a + b) / 2; + int j = cmp(UnToTs[w], s, 3); + + if (j == 0) + // found it + return UnToTs[w][3]; + + if (j < 0) + a = w + 1; + else + b = w - 1; + } + + return 0; +} + +static unsigned int qt_TSCIIToUnicode(uint code, uint *s) +{ + int len = 0; + for (int i = 0; i < 3; i++) { + uint u = TsToUn[code & 0x7f][i]; + s[i] = u; + if (s[i]) len = i + 1; + } + + return len; +} + +QT_END_NAMESPACE + +#endif // QT_NO_CODECS diff --git a/src/corelib/codecs/qtsciicodec_p.h b/src/corelib/codecs/qtsciicodec_p.h new file mode 100644 index 0000000..8f11e48 --- /dev/null +++ b/src/corelib/codecs/qtsciicodec_p.h @@ -0,0 +1,106 @@ +/**************************************************************************** +** +** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies). +** Contact: Qt Software Information (qt-info@nokia.com) +** +** This file is part of the QtCore module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL$ +** No Commercial Usage +** This file contains pre-release code and may not be distributed. +** You may use this file in accordance with the terms and conditions +** contained in the either Technology Preview License Agreement or the +** Beta Release License Agreement. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 2.1 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPL included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 2.1 requirements +** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. +** +** In addition, as a special exception, Nokia gives you certain +** additional rights. These rights are described in the Nokia Qt LGPL +** Exception version 1.0, included in the file LGPL_EXCEPTION.txt in this +** package. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU +** General Public License version 3.0 as published by the Free Software +** Foundation and appearing in the file LICENSE.GPL included in the +** packaging of this file. Please review the following information to +** ensure the GNU General Public License version 3.0 requirements will be +** met: http://www.gnu.org/copyleft/gpl.html. +** +** If you are unsure which license is appropriate for your use, please +** contact the sales department at qt-sales@nokia.com. +** $QT_END_LICENSE$ +** +****************************************************************************/ + +// Most of the code here was originally written by Hans Petter Bieker, +// and is included in Qt with the author's permission, and the grateful +// thanks of the Trolltech team. + +/* + * Copyright (C) 2000 Hans Petter Bieker. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef QTSCIICODEC_P_H +#define QTSCIICODEC_P_H + +// +// W A R N I N G +// ------------- +// +// This file is not part of the Qt API. It exists for the convenience +// of other Qt classes. This header file may change from version to +// version without notice, or even be removed. +// +// We mean it. +// + +#include "QtCore/qtextcodec.h" + +QT_BEGIN_NAMESPACE + +#ifndef QT_NO_CODECS + +class Q_CORE_EXPORT QTsciiCodec : public QTextCodec { +public: + ~QTsciiCodec(); + + QByteArray name() const; + int mibEnum() const; + + QString convertToUnicode(const char *, int, ConverterState *) const; + QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const; +}; + +#endif // QT_NO_CODECS + +QT_END_NAMESPACE + +#endif // QTSCIICODEC_P_H diff --git a/src/corelib/codecs/qutfcodec.cpp b/src/corelib/codecs/qutfcodec.cpp new file mode 100644 index 0000000..281bf75 --- /dev/null +++ b/src/corelib/codecs/qutfcodec.cpp @@ -0,0 +1,634 @@ +/**************************************************************************** +** +** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies). +** Contact: Qt Software Information (qt-info@nokia.com) +** +** This file is part of the QtCore module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL$ +** No Commercial Usage +** This file contains pre-release code and may not be distributed. +** You may use this file in accordance with the terms and conditions +** contained in the either Technology Preview License Agreement or the +** Beta Release License Agreement. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 2.1 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPL included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 2.1 requirements +** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. +** +** In addition, as a special exception, Nokia gives you certain +** additional rights. These rights are described in the Nokia Qt LGPL +** Exception version 1.0, included in the file LGPL_EXCEPTION.txt in this +** package. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU +** General Public License version 3.0 as published by the Free Software +** Foundation and appearing in the file LICENSE.GPL included in the +** packaging of this file. Please review the following information to +** ensure the GNU General Public License version 3.0 requirements will be +** met: http://www.gnu.org/copyleft/gpl.html. +** +** If you are unsure which license is appropriate for your use, please +** contact the sales department at qt-sales@nokia.com. +** $QT_END_LICENSE$ +** +****************************************************************************/ + +#include "qutfcodec_p.h" +#include "qlist.h" +#include "qendian.h" +#include "qchar.h" + +#ifndef QT_NO_TEXTCODEC + +QT_BEGIN_NAMESPACE + +QUtf8Codec::~QUtf8Codec() +{ +} + +QByteArray QUtf8Codec::convertFromUnicode(const QChar *uc, int len, ConverterState *state) const +{ + uchar replacement = '?'; + int rlen = 3*len; + int surrogate_high = -1; + if (state) { + if (state->flags & ConvertInvalidToNull) + replacement = 0; + if (!(state->flags & IgnoreHeader)) + rlen += 3; + if (state->remainingChars) + surrogate_high = state->state_data[0]; + } + + QByteArray rstr; + rstr.resize(rlen); + uchar* cursor = (uchar*)rstr.data(); + const QChar *ch = uc; + int invalid = 0; + if (state && !(state->flags & IgnoreHeader)) { + *cursor++ = 0xef; + *cursor++ = 0xbb; + *cursor++ = 0xbf; + } + + const QChar *end = ch + len; + while (ch < end) { + uint u = ch->unicode(); + if (surrogate_high >= 0) { + if (u >= 0xdc00 && u < 0xe000) { + u = (surrogate_high - 0xd800)*0x400 + (u - 0xdc00) + 0x10000; + surrogate_high = -1; + } else { + // high surrogate without low + *cursor = replacement; + ++ch; + ++invalid; + surrogate_high = -1; + continue; + } + } else if (u >= 0xdc00 && u < 0xe000) { + // low surrogate without high + *cursor = replacement; + ++ch; + ++invalid; + continue; + } else if (u >= 0xd800 && u < 0xdc00) { + surrogate_high = u; + ++ch; + continue; + } + + if (u < 0x80) { + *cursor++ = (uchar)u; + } else { + if (u < 0x0800) { + *cursor++ = 0xc0 | ((uchar) (u >> 6)); + } else { + if (u > 0xffff) { + // see QString::fromUtf8() and QString::utf8() for explanations + if (u > 0x10fe00 && u < 0x10ff00) { + *cursor++ = (u - 0x10fe00); + ++ch; + continue; + } else { + *cursor++ = 0xf0 | ((uchar) (u >> 18)); + *cursor++ = 0x80 | (((uchar) (u >> 12)) & 0x3f); + } + } else { + *cursor++ = 0xe0 | (((uchar) (u >> 12)) & 0x3f); + } + *cursor++ = 0x80 | (((uchar) (u >> 6)) & 0x3f); + } + *cursor++ = 0x80 | ((uchar) (u&0x3f)); + } + ++ch; + } + + rstr.resize(cursor - (const uchar*)rstr.constData()); + if (state) { + state->invalidChars += invalid; + state->flags |= IgnoreHeader; + state->remainingChars = 0; + if (surrogate_high >= 0) { + state->remainingChars = 1; + state->state_data[0] = surrogate_high; + } + } + return rstr; +} + +void QUtf8Codec::convertToUnicode(QString *target, const char *chars, int len, ConverterState *state) const +{ + bool headerdone = false; + QChar replacement = QChar::ReplacementCharacter; + int need = 0; + int error = -1; + uint uc = 0; + uint min_uc = 0; + if (state) { + if (state->flags & IgnoreHeader) + headerdone = true; + if (state->flags & ConvertInvalidToNull) + replacement = QChar::Null; + need = state->remainingChars; + if (need) { + uc = state->state_data[0]; + min_uc = state->state_data[1]; + } + } + if (!headerdone && len > 3 + && (uchar)chars[0] == 0xef && (uchar)chars[1] == 0xbb && (uchar)chars[2] == 0xbf) { + // starts with a byte order mark + chars += 3; + len -= 3; + headerdone = true; + } + + int originalLength = target->length(); + QString &result = *target; + result.resize(originalLength + len + 1); // worst case + QChar *qch = result.data() + originalLength; + uchar ch; + int invalid = 0; + + for (int i=0; i<len; i++) { + ch = chars[i]; + if (need) { + if ((ch&0xc0) == 0x80) { + uc = (uc << 6) | (ch & 0x3f); + need--; + if (!need) { + if (uc > 0xffff && uc < 0x110000) { + // surrogate pair + uc -= 0x10000; + unsigned short high = uc/0x400 + 0xd800; + unsigned short low = uc%0x400 + 0xdc00; + + // resize if necessary + long where = qch - result.unicode(); + if (where + 2 >= result.length()) { + result.resize(where + 2); + qch = result.data() + where; + } + + *qch++ = QChar(high); + *qch++ = QChar(low); + } else if ((uc < min_uc) || (uc >= 0xd800 && uc <= 0xdfff) || (uc >= 0xfffe)) { + // error + *qch++ = replacement; + ++invalid; + } else { + *qch++ = uc; + } + } + } else { + // error + i = error; + *qch++ = replacement; + ++invalid; + need = 0; + } + } else { + if (ch < 128) { + *qch++ = QLatin1Char(ch); + } else if ((ch & 0xe0) == 0xc0) { + uc = ch & 0x1f; + need = 1; + error = i; + min_uc = 0x80; + } else if ((ch & 0xf0) == 0xe0) { + uc = ch & 0x0f; + need = 2; + error = i; + min_uc = 0x800; + } else if ((ch&0xf8) == 0xf0) { + uc = ch & 0x07; + need = 3; + error = i; + min_uc = 0x10000; + } else { + // error + *qch++ = replacement; + ++invalid; + } + } + } + if (!state && need > 0) { + // unterminated UTF sequence + for (int i = error; i < len; ++i) { + *qch++ = replacement; + ++invalid; + } + } + result.truncate(qch - result.unicode()); + if (state) { + state->invalidChars += invalid; + state->remainingChars = need; + if (headerdone) + state->flags |= IgnoreHeader; + state->state_data[0] = need ? uc : 0; + state->state_data[1] = need ? min_uc : 0; + } +} + +QString QUtf8Codec::convertToUnicode(const char *chars, int len, ConverterState *state) const +{ + QString result; + convertToUnicode(&result, chars, len, state); + return result; +} + +QByteArray QUtf8Codec::name() const +{ + return "UTF-8"; +} + +int QUtf8Codec::mibEnum() const +{ + return 106; +} + +enum { Endian = 0, Data = 1 }; + +QUtf16Codec::~QUtf16Codec() +{ +} + +QByteArray QUtf16Codec::convertFromUnicode(const QChar *uc, int len, ConverterState *state) const +{ + Endianness endian = e; + int length = 2*len; + if (!state || (!(state->flags & IgnoreHeader))) { + length += 2; + } + if (e == Detect) { + endian = (QSysInfo::ByteOrder == QSysInfo::BigEndian) ? BE : LE; + } + + QByteArray d; + d.resize(length); + char *data = d.data(); + if (!state || !(state->flags & IgnoreHeader)) { + QChar bom(QChar::ByteOrderMark); + if (endian == BE) { + data[0] = bom.row(); + data[1] = bom.cell(); + } else { + data[0] = bom.cell(); + data[1] = bom.row(); + } + data += 2; + } + if (endian == BE) { + for (int i = 0; i < len; ++i) { + *(data++) = uc[i].row(); + *(data++) = uc[i].cell(); + } + } else { + for (int i = 0; i < len; ++i) { + *(data++) = uc[i].cell(); + *(data++) = uc[i].row(); + } + } + + if (state) { + state->remainingChars = 0; + state->flags |= IgnoreHeader; + } + return d; +} + +QString QUtf16Codec::convertToUnicode(const char *chars, int len, ConverterState *state) const +{ + Endianness endian = e; + bool half = false; + uchar buf = 0; + bool headerdone = false; + if (state) { + headerdone = state->flags & IgnoreHeader; + if (endian == Detect) + endian = (Endianness)state->state_data[Endian]; + if (state->remainingChars) { + half = true; + buf = state->state_data[Data]; + } + } + if (headerdone && endian == Detect) + endian = (QSysInfo::ByteOrder == QSysInfo::BigEndian) ? BE : LE; + + QString result; + result.resize(len); // worst case + QChar *qch = (QChar *)result.unicode(); + while (len--) { + if (half) { + QChar ch; + if (endian == LE) { + ch.setRow(*chars++); + ch.setCell(buf); + } else { + ch.setRow(buf); + ch.setCell(*chars++); + } + if (!headerdone) { + if (endian == Detect) { + if (ch == QChar::ByteOrderSwapped && endian != BE) { + endian = LE; + } else if (ch == QChar::ByteOrderMark && endian != LE) { + // ignore BOM + endian = BE; + } else { + if (QSysInfo::ByteOrder == QSysInfo::BigEndian) { + endian = BE; + } else { + endian = LE; + ch = QChar((ch.unicode() >> 8) | ((ch.unicode() & 0xff) << 8)); + } + *qch++ = ch; + } + } else if (ch != QChar::ByteOrderMark) { + *qch++ = ch; + } + headerdone = true; + } else { + *qch++ = ch; + } + half = false; + } else { + buf = *chars++; + half = true; + } + } + result.truncate(qch - result.unicode()); + + if (state) { + if (endian != Detect) + state->flags |= IgnoreHeader; + state->state_data[Endian] = endian; + if (half) { + state->remainingChars = 1; + state->state_data[Data] = buf; + } else { + state->remainingChars = 0; + state->state_data[Data] = 0; + } + } + return result; +} + +int QUtf16Codec::mibEnum() const +{ + return 1015; +} + +QByteArray QUtf16Codec::name() const +{ + return "UTF-16"; +} + +QList<QByteArray> QUtf16Codec::aliases() const +{ + QList<QByteArray> list; + list << "ISO-10646-UCS-2"; + return list; +} + +int QUtf16BECodec::mibEnum() const +{ + return 1013; +} + +QByteArray QUtf16BECodec::name() const +{ + return "UTF-16BE"; +} + +QList<QByteArray> QUtf16BECodec::aliases() const +{ + QList<QByteArray> list; + return list; +} + +int QUtf16LECodec::mibEnum() const +{ + return 1014; +} + +QByteArray QUtf16LECodec::name() const +{ + return "UTF-16LE"; +} + +QList<QByteArray> QUtf16LECodec::aliases() const +{ + QList<QByteArray> list; + return list; +} + +QUtf32Codec::~QUtf32Codec() +{ +} + +QByteArray QUtf32Codec::convertFromUnicode(const QChar *uc, int len, ConverterState *state) const +{ + Endianness endian = e; + int length = 4*len; + if (!state || (!(state->flags & IgnoreHeader))) { + length += 4; + } + if (e == Detect) { + endian = (QSysInfo::ByteOrder == QSysInfo::BigEndian) ? BE : LE; + } + + QByteArray d; + d.resize(length); + char *data = d.data(); + if (!state || !(state->flags & IgnoreHeader)) { + if (endian == BE) { + data[0] = 0; + data[1] = 0; + data[2] = (char)0xfe; + data[3] = (char)0xff; + } else { + data[0] = (char)0xff; + data[1] = (char)0xfe; + data[2] = 0; + data[3] = 0; + } + data += 2; + } + if (endian == BE) { + for (int i = 0; i < len; ++i) { + uint cp = uc[i].unicode(); + if (uc[i].isHighSurrogate() && i < len - 1) + cp = QChar::surrogateToUcs4(cp, uc[++i].unicode()); + *(data++) = cp >> 24; + *(data++) = (cp >> 16) & 0xff; + *(data++) = (cp >> 8) & 0xff; + *(data++) = cp & 0xff; + } + } else { + for (int i = 0; i < len; ++i) { + uint cp = uc[i].unicode(); + if (uc[i].isHighSurrogate() && i < len - 1) + cp = QChar::surrogateToUcs4(cp, uc[++i].unicode()); + *(data++) = cp & 0xff; + *(data++) = (cp >> 8) & 0xff; + *(data++) = (cp >> 16) & 0xff; + *(data++) = cp >> 24; + } + } + + if (state) { + state->remainingChars = 0; + state->flags |= IgnoreHeader; + } + return d; +} + +QString QUtf32Codec::convertToUnicode(const char *chars, int len, ConverterState *state) const +{ + Endianness endian = e; + uchar tuple[4]; + int num = 0; + bool headerdone = false; + if (state) { + headerdone = state->flags & IgnoreHeader; + if (endian == Detect) { + endian = (Endianness)state->state_data[Endian]; + } + num = state->remainingChars; + memcpy(tuple, &state->state_data[Data], 4); + } + if (headerdone && endian == Detect) + endian = (QSysInfo::ByteOrder == QSysInfo::BigEndian) ? BE : LE; + + QString result; + result.resize((num + len) >> 2 << 1); // worst case + QChar *qch = (QChar *)result.unicode(); + + const char *end = chars + len; + while (chars < end) { + tuple[num++] = *chars++; + if (num == 4) { + if (!headerdone) { + if (endian == Detect) { + if (endian == Detect) { + if (tuple[0] == 0xff && tuple[1] == 0xfe && tuple[2] == 0 && tuple[3] == 0 && endian != BE) { + endian = LE; + num = 0; + continue; + } else if (tuple[0] == 0 && tuple[1] == 0 && tuple[2] == 0xfe && tuple[3] == 0xff && endian != LE) { + endian = BE; + num = 0; + continue; + } else if (QSysInfo::ByteOrder == QSysInfo::BigEndian) { + endian = BE; + } else { + endian = LE; + } + } + } else if (((endian == BE) ? qFromBigEndian<quint32>(tuple) : qFromLittleEndian<quint32>(tuple)) == QChar::ByteOrderMark) { + num = 0; + continue; + } + } + uint code = (endian == BE) ? qFromBigEndian<quint32>(tuple) : qFromLittleEndian<quint32>(tuple); + if (code >= 0x10000) { + *qch++ = QChar::highSurrogate(code); + *qch++ = QChar::lowSurrogate(code); + } else { + *qch++ = code; + } + num = 0; + } + } + result.truncate(qch - result.unicode()); + + if (state) { + if (endian != Detect) + state->flags |= IgnoreHeader; + state->state_data[Endian] = endian; + state->remainingChars = num; + memcpy(&state->state_data[Data], tuple, 4); + } + return result; +} + +int QUtf32Codec::mibEnum() const +{ + return 1017; +} + +QByteArray QUtf32Codec::name() const +{ + return "UTF-32"; +} + +QList<QByteArray> QUtf32Codec::aliases() const +{ + QList<QByteArray> list; + return list; +} + +int QUtf32BECodec::mibEnum() const +{ + return 1018; +} + +QByteArray QUtf32BECodec::name() const +{ + return "UTF-32BE"; +} + +QList<QByteArray> QUtf32BECodec::aliases() const +{ + QList<QByteArray> list; + return list; +} + +int QUtf32LECodec::mibEnum() const +{ + return 1019; +} + +QByteArray QUtf32LECodec::name() const +{ + return "UTF-32LE"; +} + +QList<QByteArray> QUtf32LECodec::aliases() const +{ + QList<QByteArray> list; + return list; +} + + +QT_END_NAMESPACE + +#endif //QT_NO_TEXTCODEC diff --git a/src/corelib/codecs/qutfcodec_p.h b/src/corelib/codecs/qutfcodec_p.h new file mode 100644 index 0000000..0abcfaf --- /dev/null +++ b/src/corelib/codecs/qutfcodec_p.h @@ -0,0 +1,155 @@ +/**************************************************************************** +** +** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies). +** Contact: Qt Software Information (qt-info@nokia.com) +** +** This file is part of the QtCore module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL$ +** No Commercial Usage +** This file contains pre-release code and may not be distributed. +** You may use this file in accordance with the terms and conditions +** contained in the either Technology Preview License Agreement or the +** Beta Release License Agreement. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 2.1 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPL included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 2.1 requirements +** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. +** +** In addition, as a special exception, Nokia gives you certain +** additional rights. These rights are described in the Nokia Qt LGPL +** Exception version 1.0, included in the file LGPL_EXCEPTION.txt in this +** package. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU +** General Public License version 3.0 as published by the Free Software +** Foundation and appearing in the file LICENSE.GPL included in the +** packaging of this file. Please review the following information to +** ensure the GNU General Public License version 3.0 requirements will be +** met: http://www.gnu.org/copyleft/gpl.html. +** +** If you are unsure which license is appropriate for your use, please +** contact the sales department at qt-sales@nokia.com. +** $QT_END_LICENSE$ +** +****************************************************************************/ + +#ifndef QUTFCODEC_P_H +#define QUTFCODEC_P_H + +// +// W A R N I N G +// ------------- +// +// This file is not part of the Qt API. It exists purely as an +// implementation detail. This header file may change from version to +// version without notice, or even be removed. +// +// We mean it. +// + +#include "QtCore/qtextcodec.h" + +QT_BEGIN_NAMESPACE + +#ifndef QT_NO_TEXTCODEC + +class QUtf8Codec : public QTextCodec { +public: + ~QUtf8Codec(); + + QByteArray name() const; + int mibEnum() const; + + QString convertToUnicode(const char *, int, ConverterState *) const; + QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const; + void convertToUnicode(QString *target, const char *, int, ConverterState *) const; +}; + +class QUtf16Codec : public QTextCodec { +protected: + enum Endianness { + Detect, + BE, + LE + }; +public: + QUtf16Codec() { e = Detect; } + ~QUtf16Codec(); + + QByteArray name() const; + QList<QByteArray> aliases() const; + int mibEnum() const; + + QString convertToUnicode(const char *, int, ConverterState *) const; + QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const; + +protected: + Endianness e; +}; + +class QUtf16BECodec : public QUtf16Codec { +public: + QUtf16BECodec() : QUtf16Codec() { e = BE; } + QByteArray name() const; + QList<QByteArray> aliases() const; + int mibEnum() const; +}; + +class QUtf16LECodec : public QUtf16Codec { +public: + QUtf16LECodec() : QUtf16Codec() { e = LE; } + QByteArray name() const; + QList<QByteArray> aliases() const; + int mibEnum() const; +}; + +class QUtf32Codec : public QTextCodec { +protected: + enum Endianness { + Detect, + BE, + LE + }; +public: + QUtf32Codec() { e = Detect; } + ~QUtf32Codec(); + + QByteArray name() const; + QList<QByteArray> aliases() const; + int mibEnum() const; + + QString convertToUnicode(const char *, int, ConverterState *) const; + QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const; + +protected: + Endianness e; +}; + +class QUtf32BECodec : public QUtf32Codec { +public: + QUtf32BECodec() : QUtf32Codec() { e = BE; } + QByteArray name() const; + QList<QByteArray> aliases() const; + int mibEnum() const; +}; + +class QUtf32LECodec : public QUtf32Codec { +public: + QUtf32LECodec() : QUtf32Codec() { e = LE; } + QByteArray name() const; + QList<QByteArray> aliases() const; + int mibEnum() const; +}; + + +#endif // QT_NO_TEXTCODEC + +QT_END_NAMESPACE + +#endif // QUTFCODEC_P_H |