diff options
author | Denis Dzyubenko <denis.dzyubenko@nokia.com> | 2009-06-11 12:59:23 (GMT) |
---|---|---|
committer | Denis Dzyubenko <denis.dzyubenko@nokia.com> | 2009-06-11 19:04:15 (GMT) |
commit | f6aa5d8cfbec4f4ffacf20a94a1653c1a8ee2134 (patch) | |
tree | 2ac49d8f20eb83067932bb655939d6bb044a0fb0 /src/corelib/codecs | |
parent | 52392292c8fa096e3b0bb692dedce66924ab3305 (diff) | |
download | Qt-f6aa5d8cfbec4f4ffacf20a94a1653c1a8ee2134.zip Qt-f6aa5d8cfbec4f4ffacf20a94a1653c1a8ee2134.tar.gz Qt-f6aa5d8cfbec4f4ffacf20a94a1653c1a8ee2134.tar.bz2 |
UTF-8 text codec should be able to convert data when fed one by one byte.
When the input data is fed to utf-8 by one byte it couldn't parse
the BOM correctly. So we wait until the BOM is composed into a code point and check it afterwards.
Reviewed-by: Olivier Goffart
Diffstat (limited to 'src/corelib/codecs')
-rw-r--r-- | src/corelib/codecs/qutfcodec.cpp | 11 |
1 files changed, 10 insertions, 1 deletions
diff --git a/src/corelib/codecs/qutfcodec.cpp b/src/corelib/codecs/qutfcodec.cpp index 6611315..27c0572 100644 --- a/src/corelib/codecs/qutfcodec.cpp +++ b/src/corelib/codecs/qutfcodec.cpp @@ -184,7 +184,10 @@ void QUtf8Codec::convertToUnicode(QString *target, const char *chars, int len, C uc = (uc << 6) | (ch & 0x3f); need--; if (!need) { - if (uc > 0xffff && uc < 0x110000) { + // utf-8 bom composes into 0xfeff code point + if (!headerdone && uc == 0xfeff) { + // dont do anything, just skip the BOM + } else if (uc > 0xffff && uc < 0x110000) { // surrogate pair uc -= 0x10000; unsigned short high = uc/0x400 + 0xd800; @@ -206,6 +209,7 @@ void QUtf8Codec::convertToUnicode(QString *target, const char *chars, int len, C } else { *qch++ = uc; } + headerdone = true; } } else { // error @@ -213,15 +217,18 @@ void QUtf8Codec::convertToUnicode(QString *target, const char *chars, int len, C *qch++ = replacement; ++invalid; need = 0; + headerdone = true; } } else { if (ch < 128) { *qch++ = QLatin1Char(ch); + headerdone = true; } else if ((ch & 0xe0) == 0xc0) { uc = ch & 0x1f; need = 1; error = i; min_uc = 0x80; + headerdone = true; } else if ((ch & 0xf0) == 0xe0) { uc = ch & 0x0f; need = 2; @@ -232,10 +239,12 @@ void QUtf8Codec::convertToUnicode(QString *target, const char *chars, int len, C need = 3; error = i; min_uc = 0x10000; + headerdone = true; } else { // error *qch++ = replacement; ++invalid; + headerdone = true; } } } |