diff options
Diffstat (limited to 'src/plugins/codecs/jp/qjiscodec.cpp')
-rw-r--r-- | src/plugins/codecs/jp/qjiscodec.cpp | 367 |
1 files changed, 367 insertions, 0 deletions
diff --git a/src/plugins/codecs/jp/qjiscodec.cpp b/src/plugins/codecs/jp/qjiscodec.cpp new file mode 100644 index 0000000..6dd90da --- /dev/null +++ b/src/plugins/codecs/jp/qjiscodec.cpp @@ -0,0 +1,367 @@ +/**************************************************************************** +** +** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies). +** Contact: Qt Software Information (qt-info@nokia.com) +** +** This file is part of the plugins of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL$ +** No Commercial Usage +** This file contains pre-release code and may not be distributed. +** You may use this file in accordance with the terms and conditions +** contained in the either Technology Preview License Agreement or the +** Beta Release License Agreement. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 2.1 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPL included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 2.1 requirements +** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. +** +** In addition, as a special exception, Nokia gives you certain +** additional rights. These rights are described in the Nokia Qt LGPL +** Exception version 1.0, included in the file LGPL_EXCEPTION.txt in this +** package. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU +** General Public License version 3.0 as published by the Free Software +** Foundation and appearing in the file LICENSE.GPL included in the +** packaging of this file. Please review the following information to +** ensure the GNU General Public License version 3.0 requirements will be +** met: http://www.gnu.org/copyleft/gpl.html. +** +** If you are unsure which license is appropriate for your use, please +** contact the sales department at qt-sales@nokia.com. +** $QT_END_LICENSE$ +** +****************************************************************************/ + +// Most of the code here was originally written by Serika Kurusugawa, +// a.k.a. Junji Takagi, and is included in Qt with the author's permission +// and the grateful thanks of the Trolltech team. + +/*! \class QJisCodec + \reentrant + \internal +*/ + +#include "qjiscodec.h" +#include "qlist.h" + +QT_BEGIN_NAMESPACE + +#ifndef QT_NO_TEXTCODEC +enum { + Esc = 0x1b, + So = 0x0e, // Shift Out + Si = 0x0f, // Shift In + + ReverseSolidus = 0x5c, + YenSign = 0x5c, + Tilde = 0x7e, + Overline = 0x7e +}; + +#define IsKana(c) (((c) >= 0xa1) && ((c) <= 0xdf)) +#define IsJisChar(c) (((c) >= 0x21) && ((c) <= 0x7e)) + +#define QValidChar(u) ((u) ? QChar((ushort)(u)) : QChar(QChar::ReplacementCharacter)) + +enum Iso2022State{ Ascii, MinState = Ascii, + JISX0201_Latin, JISX0201_Kana, + JISX0208_1978, JISX0208_1983, + JISX0212, MaxState = JISX0212, + UnknownState }; + +static const char Esc_CHARS[] = "()*+-./"; + +static const char Esc_Ascii[] = {Esc, '(', 'B', 0 }; +static const char Esc_JISX0201_Latin[] = {Esc, '(', 'J', 0 }; +static const char Esc_JISX0201_Kana[] = {Esc, '(', 'I', 0 }; +static const char Esc_JISX0208_1978[] = {Esc, '$', '@', 0 }; +static const char Esc_JISX0208_1983[] = {Esc, '$', 'B', 0 }; +static const char Esc_JISX0212[] = {Esc, '$', '(', 'D', 0 }; +static const char * const Esc_SEQ[] = { Esc_Ascii, + Esc_JISX0201_Latin, + Esc_JISX0201_Kana, + Esc_JISX0208_1978, + Esc_JISX0208_1983, + Esc_JISX0212 }; + +/*! \internal */ +QJisCodec::QJisCodec() : conv(QJpUnicodeConv::newConverter(QJpUnicodeConv::Default)) +{ +} + + +/*! \internal */ +QJisCodec::~QJisCodec() +{ + delete (QJpUnicodeConv*)conv; + conv = 0; +} + +QByteArray QJisCodec::convertFromUnicode(const QChar *uc, int len, ConverterState *cs) const +{ + char replacement = '?'; + if (cs) { + if (cs->flags & ConvertInvalidToNull) + replacement = 0; + } + int invalid = 0; + + QByteArray result; + Iso2022State state = Ascii; + Iso2022State prev = Ascii; + for (int i = 0; i < len; i++) { + QChar ch = uc[i]; + uint j; + if (ch.row() == 0x00 && ch.cell() < 0x80) { + // Ascii + if (state != JISX0201_Latin || + ch.cell() == ReverseSolidus || ch.cell() == Tilde) { + state = Ascii; + } + j = ch.cell(); + } else if ((j = conv->unicodeToJisx0201(ch.row(), ch.cell())) != 0) { + if (j < 0x80) { + // JIS X 0201 Latin + if (state != Ascii || + ch.cell() == YenSign || ch.cell() == Overline) { + state = JISX0201_Latin; + } + } else { + // JIS X 0201 Kana + state = JISX0201_Kana; + j &= 0x7f; + } + } else if ((j = conv->unicodeToJisx0208(ch.row(), ch.cell())) != 0) { + // JIS X 0208 + state = JISX0208_1983; + } else if ((j = conv->unicodeToJisx0212(ch.row(), ch.cell())) != 0) { + // JIS X 0212 + state = JISX0212; + } else { + // Invalid + state = UnknownState; + j = replacement; + ++invalid; + } + if (state != prev) { + if (state == UnknownState) { + result += Esc_Ascii; + } else { + result += Esc_SEQ[state - MinState]; + } + prev = state; + } + if (j < 0x0100) { + result += j & 0xff; + } else { + result += (j >> 8) & 0xff; + result += j & 0xff; + } + } + if (prev != Ascii) { + result += Esc_Ascii; + } + + if (cs) { + cs->invalidChars += invalid; + } + return result; +} + +QString QJisCodec::convertToUnicode(const char* chars, int len, ConverterState *cs) const +{ + uchar buf[4] = {0, 0, 0, 0}; + int nbuf = 0; + Iso2022State state = Ascii, prev = Ascii; + bool esc = false; + QChar replacement = QChar::ReplacementCharacter; + if (cs) { + if (cs->flags & ConvertInvalidToNull) + replacement = QChar::Null; + nbuf = cs->remainingChars; + buf[0] = (cs->state_data[0] >> 24) & 0xff; + buf[1] = (cs->state_data[0] >> 16) & 0xff; + buf[2] = (cs->state_data[0] >> 8) & 0xff; + buf[3] = (cs->state_data[0] >> 0) & 0xff; + state = (Iso2022State)((cs->state_data[1] >> 0) & 0xff); + prev = (Iso2022State)((cs->state_data[1] >> 8) & 0xff); + esc = cs->state_data[2]; + } + int invalid = 0; + + QString result; + for (int i=0; i<len; i++) { + uchar ch = chars[i]; + if (esc) { + // Escape sequence + state = UnknownState; + switch (nbuf) { + case 0: + if (ch == '$' || strchr(Esc_CHARS, ch)) { + buf[nbuf++] = ch; + } else { + nbuf = 0; + esc = false; + } + break; + case 1: + if (buf[0] == '$') { + if (strchr(Esc_CHARS, ch)) { + buf[nbuf++] = ch; + } else { + switch (ch) { + case '@': + state = JISX0208_1978; // Esc $ @ + break; + case 'B': + state = JISX0208_1983; // Esc $ B + break; + } + nbuf = 0; + esc = false; + } + } else { + if (buf[0] == '(') { + switch (ch) { + case 'B': + state = Ascii; // Esc (B + break; + case 'I': + state = JISX0201_Kana; // Esc (I + break; + case 'J': + state = JISX0201_Latin; // Esc (J + break; + } + } + nbuf = 0; + esc = false; + } + break; + case 2: + if (buf[1] == '(') { + switch (ch) { + case 'D': + state = JISX0212; // Esc $ (D + break; + } + } + nbuf = 0; + esc = false; + break; + } + } else { + if (ch == Esc) { + // Escape sequence + nbuf = 0; + esc = true; + } else if (ch == So) { + // Shift out + prev = state; + state = JISX0201_Kana; + nbuf = 0; + } else if (ch == Si) { + // Shift in + if (prev == Ascii || prev == JISX0201_Latin) { + state = prev; + } else { + state = Ascii; + } + nbuf = 0; + } else { + uint u; + switch (nbuf) { + case 0: + switch (state) { + case Ascii: + if (ch < 0x80) { + result += QLatin1Char(ch); + break; + } + /* fall through */ + case JISX0201_Latin: + u = conv->jisx0201ToUnicode(ch); + result += QValidChar(u); + break; + case JISX0201_Kana: + u = conv->jisx0201ToUnicode(ch | 0x80); + result += QValidChar(u); + break; + case JISX0208_1978: + case JISX0208_1983: + case JISX0212: + buf[nbuf++] = ch; + break; + default: + result += QChar::ReplacementCharacter; + break; + } + break; + case 1: + switch (state) { + case JISX0208_1978: + case JISX0208_1983: + u = conv->jisx0208ToUnicode(buf[0] & 0x7f, ch & 0x7f); + result += QValidChar(u); + break; + case JISX0212: + u = conv->jisx0212ToUnicode(buf[0] & 0x7f, ch & 0x7f); + result += QValidChar(u); + break; + default: + result += replacement; + ++invalid; + break; + } + nbuf = 0; + break; + } + } + } + } + + if (cs) { + cs->remainingChars = nbuf; + cs->invalidChars += invalid; + cs->state_data[0] = (buf[0] << 24) + (buf[1] << 16) + (buf[2] << 8) + buf[3]; + cs->state_data[1] = (prev << 8) + state; + cs->state_data[2] = esc; + } + + return result; +} + + + +/*! \internal */ +int QJisCodec::_mibEnum() +{ + return 39; +} + +/*! \internal */ +QByteArray QJisCodec::_name() +{ + return "ISO-2022-JP"; +} + +/*! + Returns the codec's mime name. +*/ +QList<QByteArray> QJisCodec::_aliases() +{ + QList<QByteArray> list; + list << "JIS7"; // Qt 3 compat + return list; +} + +#endif // QT_NO_TEXTCODEC + +QT_END_NAMESPACE |