diff options
author | Lars Knoll <lars.knoll@nokia.com> | 2009-03-23 09:34:13 (GMT) |
---|---|---|
committer | Simon Hausmann <simon.hausmann@nokia.com> | 2009-03-23 09:34:13 (GMT) |
commit | 67ad0519fd165acee4a4d2a94fa502e9e4847bd0 (patch) | |
tree | 1dbf50b3dff8d5ca7e9344733968c72704eb15ff /src/3rdparty/webkit/JavaScriptCore/wtf/unicode | |
download | Qt-67ad0519fd165acee4a4d2a94fa502e9e4847bd0.zip Qt-67ad0519fd165acee4a4d2a94fa502e9e4847bd0.tar.gz Qt-67ad0519fd165acee4a4d2a94fa502e9e4847bd0.tar.bz2 |
Long live Qt!
Diffstat (limited to 'src/3rdparty/webkit/JavaScriptCore/wtf/unicode')
8 files changed, 1446 insertions, 0 deletions
diff --git a/src/3rdparty/webkit/JavaScriptCore/wtf/unicode/Collator.h b/src/3rdparty/webkit/JavaScriptCore/wtf/unicode/Collator.h new file mode 100644 index 0000000..f04779d --- /dev/null +++ b/src/3rdparty/webkit/JavaScriptCore/wtf/unicode/Collator.h @@ -0,0 +1,67 @@ +/* + * Copyright (C) 2008 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Apple Computer, Inc. ("Apple") nor the names of + * its contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef WTF_Collator_h +#define WTF_Collator_h + +#include <memory> +#include <wtf/Noncopyable.h> +#include <wtf/unicode/Unicode.h> + +#if USE(ICU_UNICODE) && !UCONFIG_NO_COLLATION +struct UCollator; +#endif + +namespace WTF { + + class Collator : Noncopyable { + public: + enum Result { Equal = 0, Greater = 1, Less = -1 }; + + Collator(const char* locale); // Parsing is lenient; e.g. language identifiers (such as "en-US") are accepted, too. + ~Collator(); + void setOrderLowerFirst(bool); + + static std::auto_ptr<Collator> userDefault(); + + Result collate(const ::UChar*, size_t, const ::UChar*, size_t) const; + + private: +#if USE(ICU_UNICODE) && !UCONFIG_NO_COLLATION + void createCollator() const; + void releaseCollator(); + mutable UCollator* m_collator; +#endif + char* m_locale; + bool m_lowerFirst; + }; +} + +using WTF::Collator; + +#endif diff --git a/src/3rdparty/webkit/JavaScriptCore/wtf/unicode/CollatorDefault.cpp b/src/3rdparty/webkit/JavaScriptCore/wtf/unicode/CollatorDefault.cpp new file mode 100644 index 0000000..eddbe53 --- /dev/null +++ b/src/3rdparty/webkit/JavaScriptCore/wtf/unicode/CollatorDefault.cpp @@ -0,0 +1,75 @@ +/* + * Copyright (C) 2008 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Apple Computer, Inc. ("Apple") nor the names of + * its contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "Collator.h" + +#if !USE(ICU_UNICODE) || UCONFIG_NO_COLLATION + +namespace WTF { + +Collator::Collator(const char*) +{ +} + +Collator::~Collator() +{ +} + +void Collator::setOrderLowerFirst(bool) +{ +} + +std::auto_ptr<Collator> Collator::userDefault() +{ + return std::auto_ptr<Collator>(new Collator(0)); +} + +// A default implementation for platforms that lack Unicode-aware collation. +Collator::Result Collator::collate(const UChar* lhs, size_t lhsLength, const UChar* rhs, size_t rhsLength) const +{ + int lmin = lhsLength < rhsLength ? lhsLength : rhsLength; + int l = 0; + while (l < lmin && *lhs == *rhs) { + lhs++; + rhs++; + l++; + } + + if (l < lmin) + return (*lhs > *rhs) ? Greater : Less; + + if (lhsLength == rhsLength) + return Equal; + + return (lhsLength > rhsLength) ? Greater : Less; +} + +} + +#endif diff --git a/src/3rdparty/webkit/JavaScriptCore/wtf/unicode/UTF8.cpp b/src/3rdparty/webkit/JavaScriptCore/wtf/unicode/UTF8.cpp new file mode 100644 index 0000000..9e713fe --- /dev/null +++ b/src/3rdparty/webkit/JavaScriptCore/wtf/unicode/UTF8.cpp @@ -0,0 +1,303 @@ +/* + * Copyright (C) 2007 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "UTF8.h" + +namespace WTF { +namespace Unicode { + +inline int inlineUTF8SequenceLengthNonASCII(char b0) +{ + if ((b0 & 0xC0) != 0xC0) + return 0; + if ((b0 & 0xE0) == 0xC0) + return 2; + if ((b0 & 0xF0) == 0xE0) + return 3; + if ((b0 & 0xF8) == 0xF0) + return 4; + return 0; +} + +inline int inlineUTF8SequenceLength(char b0) +{ + return (b0 & 0x80) == 0 ? 1 : inlineUTF8SequenceLengthNonASCII(b0); +} + +int UTF8SequenceLength(char b0) +{ + return (b0 & 0x80) == 0 ? 1 : inlineUTF8SequenceLengthNonASCII(b0); +} + +int decodeUTF8Sequence(const char* sequence) +{ + // Handle 0-byte sequences (never valid). + const unsigned char b0 = sequence[0]; + const int length = inlineUTF8SequenceLength(b0); + if (length == 0) + return -1; + + // Handle 1-byte sequences (plain ASCII). + const unsigned char b1 = sequence[1]; + if (length == 1) { + if (b1) + return -1; + return b0; + } + + // Handle 2-byte sequences. + if ((b1 & 0xC0) != 0x80) + return -1; + const unsigned char b2 = sequence[2]; + if (length == 2) { + if (b2) + return -1; + const int c = ((b0 & 0x1F) << 6) | (b1 & 0x3F); + if (c < 0x80) + return -1; + return c; + } + + // Handle 3-byte sequences. + if ((b2 & 0xC0) != 0x80) + return -1; + const unsigned char b3 = sequence[3]; + if (length == 3) { + if (b3) + return -1; + const int c = ((b0 & 0xF) << 12) | ((b1 & 0x3F) << 6) | (b2 & 0x3F); + if (c < 0x800) + return -1; + // UTF-16 surrogates should never appear in UTF-8 data. + if (c >= 0xD800 && c <= 0xDFFF) + return -1; + return c; + } + + // Handle 4-byte sequences. + if ((b3 & 0xC0) != 0x80) + return -1; + const unsigned char b4 = sequence[4]; + if (length == 4) { + if (b4) + return -1; + const int c = ((b0 & 0x7) << 18) | ((b1 & 0x3F) << 12) | ((b2 & 0x3F) << 6) | (b3 & 0x3F); + if (c < 0x10000 || c > 0x10FFFF) + return -1; + return c; + } + + return -1; +} + +// Once the bits are split out into bytes of UTF-8, this is a mask OR-ed +// into the first byte, depending on how many bytes follow. There are +// as many entries in this table as there are UTF-8 sequence types. +// (I.e., one byte sequence, two byte... etc.). Remember that sequencs +// for *legal* UTF-8 will be 4 or fewer bytes total. +static const unsigned char firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; + +ConversionResult convertUTF16ToUTF8( + const UChar** sourceStart, const UChar* sourceEnd, + char** targetStart, char* targetEnd, bool strict) +{ + ConversionResult result = conversionOK; + const UChar* source = *sourceStart; + char* target = *targetStart; + while (source < sourceEnd) { + UChar32 ch; + unsigned short bytesToWrite = 0; + const UChar32 byteMask = 0xBF; + const UChar32 byteMark = 0x80; + const UChar* oldSource = source; // In case we have to back up because of target overflow. + ch = static_cast<unsigned short>(*source++); + // If we have a surrogate pair, convert to UChar32 first. + if (ch >= 0xD800 && ch <= 0xDBFF) { + // If the 16 bits following the high surrogate are in the source buffer... + if (source < sourceEnd) { + UChar32 ch2 = static_cast<unsigned short>(*source); + // If it's a low surrogate, convert to UChar32. + if (ch2 >= 0xDC00 && ch2 <= 0xDFFF) { + ch = ((ch - 0xD800) << 10) + (ch2 - 0xDC00) + 0x0010000; + ++source; + } else if (strict) { // it's an unpaired high surrogate + --source; // return to the illegal value itself + result = sourceIllegal; + break; + } + } else { // We don't have the 16 bits following the high surrogate. + --source; // return to the high surrogate + result = sourceExhausted; + break; + } + } else if (strict) { + // UTF-16 surrogate values are illegal in UTF-32 + if (ch >= 0xDC00 && ch <= 0xDFFF) { + --source; // return to the illegal value itself + result = sourceIllegal; + break; + } + } + // Figure out how many bytes the result will require + if (ch < (UChar32)0x80) { + bytesToWrite = 1; + } else if (ch < (UChar32)0x800) { + bytesToWrite = 2; + } else if (ch < (UChar32)0x10000) { + bytesToWrite = 3; + } else if (ch < (UChar32)0x110000) { + bytesToWrite = 4; + } else { + bytesToWrite = 3; + ch = 0xFFFD; + } + + target += bytesToWrite; + if (target > targetEnd) { + source = oldSource; // Back up source pointer! + target -= bytesToWrite; + result = targetExhausted; + break; + } + switch (bytesToWrite) { // note: everything falls through. + case 4: *--target = (char)((ch | byteMark) & byteMask); ch >>= 6; + case 3: *--target = (char)((ch | byteMark) & byteMask); ch >>= 6; + case 2: *--target = (char)((ch | byteMark) & byteMask); ch >>= 6; + case 1: *--target = (char)(ch | firstByteMark[bytesToWrite]); + } + target += bytesToWrite; + } + *sourceStart = source; + *targetStart = target; + return result; +} + +// This must be called with the length pre-determined by the first byte. +// If presented with a length > 4, this returns false. The Unicode +// definition of UTF-8 goes up to 4-byte sequences. +static bool isLegalUTF8(const unsigned char* source, int length) +{ + unsigned char a; + const unsigned char* srcptr = source + length; + switch (length) { + default: return false; + // Everything else falls through when "true"... + case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false; + case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false; + case 2: if ((a = (*--srcptr)) > 0xBF) return false; + + switch (*source) { + // no fall-through in this inner switch + case 0xE0: if (a < 0xA0) return false; break; + case 0xED: if (a > 0x9F) return false; break; + case 0xF0: if (a < 0x90) return false; break; + case 0xF4: if (a > 0x8F) return false; break; + default: if (a < 0x80) return false; + } + + case 1: if (*source >= 0x80 && *source < 0xC2) return false; + } + if (*source > 0xF4) + return false; + return true; +} + +// Magic values subtracted from a buffer value during UTF8 conversion. +// This table contains as many values as there might be trailing bytes +// in a UTF-8 sequence. +static const UChar32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL, + 0x03C82080UL, 0xFA082080UL, 0x82082080UL }; + +ConversionResult convertUTF8ToUTF16( + const char** sourceStart, const char* sourceEnd, + UChar** targetStart, UChar* targetEnd, bool strict) +{ + ConversionResult result = conversionOK; + const char* source = *sourceStart; + UChar* target = *targetStart; + while (source < sourceEnd) { + UChar32 ch = 0; + int extraBytesToRead = UTF8SequenceLength(*source) - 1; + if (source + extraBytesToRead >= sourceEnd) { + result = sourceExhausted; + break; + } + // Do this check whether lenient or strict + if (!isLegalUTF8(reinterpret_cast<const unsigned char*>(source), extraBytesToRead + 1)) { + result = sourceIllegal; + break; + } + // The cases all fall through. + switch (extraBytesToRead) { + case 5: ch += static_cast<unsigned char>(*source++); ch <<= 6; // remember, illegal UTF-8 + case 4: ch += static_cast<unsigned char>(*source++); ch <<= 6; // remember, illegal UTF-8 + case 3: ch += static_cast<unsigned char>(*source++); ch <<= 6; + case 2: ch += static_cast<unsigned char>(*source++); ch <<= 6; + case 1: ch += static_cast<unsigned char>(*source++); ch <<= 6; + case 0: ch += static_cast<unsigned char>(*source++); + } + ch -= offsetsFromUTF8[extraBytesToRead]; + + if (target >= targetEnd) { + source -= (extraBytesToRead + 1); // Back up source pointer! + result = targetExhausted; break; + } + if (ch <= 0xFFFF) { + // UTF-16 surrogate values are illegal in UTF-32 + if (ch >= 0xD800 && ch <= 0xDFFF) { + if (strict) { + source -= (extraBytesToRead + 1); // return to the illegal value itself + result = sourceIllegal; + break; + } else + *target++ = 0xFFFD; + } else + *target++ = (UChar)ch; // normal case + } else if (ch > 0x10FFFF) { + if (strict) { + result = sourceIllegal; + source -= (extraBytesToRead + 1); // return to the start + break; // Bail out; shouldn't continue + } else + *target++ = 0xFFFD; + } else { + // target is a character in range 0xFFFF - 0x10FFFF + if (target + 1 >= targetEnd) { + source -= (extraBytesToRead + 1); // Back up source pointer! + result = targetExhausted; + break; + } + ch -= 0x0010000UL; + *target++ = (UChar)((ch >> 10) + 0xD800); + *target++ = (UChar)((ch & 0x03FF) + 0xDC00); + } + } + *sourceStart = source; + *targetStart = target; + return result; +} + +} +} diff --git a/src/3rdparty/webkit/JavaScriptCore/wtf/unicode/UTF8.h b/src/3rdparty/webkit/JavaScriptCore/wtf/unicode/UTF8.h new file mode 100644 index 0000000..a5ed93e --- /dev/null +++ b/src/3rdparty/webkit/JavaScriptCore/wtf/unicode/UTF8.h @@ -0,0 +1,75 @@ +/* + * Copyright (C) 2007 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef WTF_UTF8_h +#define WTF_UTF8_h + +#include "Unicode.h" + +namespace WTF { + namespace Unicode { + + // Given a first byte, gives the length of the UTF-8 sequence it begins. + // Returns 0 for bytes that are not legal starts of UTF-8 sequences. + // Only allows sequences of up to 4 bytes, since that works for all Unicode characters (U-00000000 to U-0010FFFF). + int UTF8SequenceLength(char); + + // Takes a null-terminated C-style string with a UTF-8 sequence in it and converts it to a character. + // Only allows Unicode characters (U-00000000 to U-0010FFFF). + // Returns -1 if the sequence is not valid (including presence of extra bytes). + int decodeUTF8Sequence(const char*); + + typedef enum { + conversionOK, // conversion successful + sourceExhausted, // partial character in source, but hit end + targetExhausted, // insuff. room in target for conversion + sourceIllegal // source sequence is illegal/malformed + } ConversionResult; + + // These conversion functions take a "strict" argument. When this + // flag is set to strict, both irregular sequences and isolated surrogates + // will cause an error. When the flag is set to lenient, both irregular + // sequences and isolated surrogates are converted. + // + // Whether the flag is strict or lenient, all illegal sequences will cause + // an error return. This includes sequences such as: <F4 90 80 80>, <C0 80>, + // or <A0> in UTF-8, and values above 0x10FFFF in UTF-32. Conformant code + // must check for illegal sequences. + // + // When the flag is set to lenient, characters over 0x10FFFF are converted + // to the replacement character; otherwise (when the flag is set to strict) + // they constitute an error. + + ConversionResult convertUTF8ToUTF16( + const char** sourceStart, const char* sourceEnd, + UChar** targetStart, UChar* targetEnd, bool strict = true); + + ConversionResult convertUTF16ToUTF8( + const UChar** sourceStart, const UChar* sourceEnd, + char** targetStart, char* targetEnd, bool strict = true); + } +} + +#endif // WTF_UTF8_h diff --git a/src/3rdparty/webkit/JavaScriptCore/wtf/unicode/Unicode.h b/src/3rdparty/webkit/JavaScriptCore/wtf/unicode/Unicode.h new file mode 100644 index 0000000..e6e8f23 --- /dev/null +++ b/src/3rdparty/webkit/JavaScriptCore/wtf/unicode/Unicode.h @@ -0,0 +1,37 @@ +/* + * Copyright (C) 2006 George Staikos <staikos@kde.org> + * Copyright (C) 2006, 2008, 2009 Apple Inc. All rights reserved. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + +#ifndef WTF_UNICODE_H +#define WTF_UNICODE_H + +#include <wtf/Assertions.h> + +#if USE(QT4_UNICODE) +#include "qt4/UnicodeQt4.h" +#elif USE(ICU_UNICODE) +#include <wtf/unicode/icu/UnicodeIcu.h> +#else +#error "Unknown Unicode implementation" +#endif + +COMPILE_ASSERT(sizeof(UChar) == 2, UCharIsTwoBytes); + +#endif // WTF_UNICODE_H diff --git a/src/3rdparty/webkit/JavaScriptCore/wtf/unicode/icu/CollatorICU.cpp b/src/3rdparty/webkit/JavaScriptCore/wtf/unicode/icu/CollatorICU.cpp new file mode 100644 index 0000000..79dec79 --- /dev/null +++ b/src/3rdparty/webkit/JavaScriptCore/wtf/unicode/icu/CollatorICU.cpp @@ -0,0 +1,144 @@ +/* + * Copyright (C) 2008 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Apple Computer, Inc. ("Apple") nor the names of + * its contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "Collator.h" + +#if USE(ICU_UNICODE) && !UCONFIG_NO_COLLATION + +#include "Assertions.h" +#include "Threading.h" +#include <unicode/ucol.h> +#include <string.h> + +#if PLATFORM(DARWIN) +#include <CoreFoundation/CoreFoundation.h> +#endif + +namespace WTF { + +static UCollator* cachedCollator; +static Mutex& cachedCollatorMutex() +{ + AtomicallyInitializedStatic(Mutex&, mutex = *new Mutex); + return mutex; +} + +Collator::Collator(const char* locale) + : m_collator(0) + , m_locale(locale ? strdup(locale) : 0) + , m_lowerFirst(false) +{ +} + +std::auto_ptr<Collator> Collator::userDefault() +{ +#if PLATFORM(DARWIN) && PLATFORM(CF) + // Mac OS X doesn't set UNIX locale to match user-selected one, so ICU default doesn't work. + CFStringRef collationOrder = (CFStringRef)CFPreferencesCopyValue(CFSTR("AppleCollationOrder"), kCFPreferencesAnyApplication, kCFPreferencesCurrentUser, kCFPreferencesAnyHost); + char buf[256]; + if (collationOrder) { + CFStringGetCString(collationOrder, buf, sizeof(buf), kCFStringEncodingASCII); + CFRelease(collationOrder); + return std::auto_ptr<Collator>(new Collator(buf)); + } else + return std::auto_ptr<Collator>(new Collator("")); +#else + return std::auto_ptr<Collator>(new Collator(0)); +#endif +} + +Collator::~Collator() +{ + releaseCollator(); + free(m_locale); +} + +void Collator::setOrderLowerFirst(bool lowerFirst) +{ + m_lowerFirst = lowerFirst; +} + +Collator::Result Collator::collate(const UChar* lhs, size_t lhsLength, const UChar* rhs, size_t rhsLength) const +{ + if (!m_collator) + createCollator(); + + return static_cast<Result>(ucol_strcoll(m_collator, lhs, lhsLength, rhs, rhsLength)); +} + +void Collator::createCollator() const +{ + ASSERT(!m_collator); + UErrorCode status = U_ZERO_ERROR; + + { + Locker<Mutex> lock(cachedCollatorMutex()); + if (cachedCollator) { + const char* cachedCollatorLocale = ucol_getLocaleByType(cachedCollator, ULOC_REQUESTED_LOCALE, &status); + ASSERT(U_SUCCESS(status)); + ASSERT(cachedCollatorLocale); + + UColAttributeValue cachedCollatorLowerFirst = ucol_getAttribute(cachedCollator, UCOL_CASE_FIRST, &status); + ASSERT(U_SUCCESS(status)); + + // FIXME: default locale is never matched, because ucol_getLocaleByType returns the actual one used, not 0. + if (m_locale && 0 == strcmp(cachedCollatorLocale, m_locale) + && ((UCOL_LOWER_FIRST == cachedCollatorLowerFirst && m_lowerFirst) || (UCOL_UPPER_FIRST == cachedCollatorLowerFirst && !m_lowerFirst))) { + m_collator = cachedCollator; + cachedCollator = 0; + return; + } + } + } + + m_collator = ucol_open(m_locale, &status); + if (U_FAILURE(status)) { + status = U_ZERO_ERROR; + m_collator = ucol_open("", &status); // Fallback to Unicode Collation Algorithm. + } + ASSERT(U_SUCCESS(status)); + + ucol_setAttribute(m_collator, UCOL_CASE_FIRST, m_lowerFirst ? UCOL_LOWER_FIRST : UCOL_UPPER_FIRST, &status); + ASSERT(U_SUCCESS(status)); +} + +void Collator::releaseCollator() +{ + { + Locker<Mutex> lock(cachedCollatorMutex()); + if (cachedCollator) + ucol_close(cachedCollator); + cachedCollator = m_collator; + m_collator = 0; + } +} + +} + +#endif diff --git a/src/3rdparty/webkit/JavaScriptCore/wtf/unicode/icu/UnicodeIcu.h b/src/3rdparty/webkit/JavaScriptCore/wtf/unicode/icu/UnicodeIcu.h new file mode 100644 index 0000000..608aea6 --- /dev/null +++ b/src/3rdparty/webkit/JavaScriptCore/wtf/unicode/icu/UnicodeIcu.h @@ -0,0 +1,219 @@ +/* + * Copyright (C) 2006 George Staikos <staikos@kde.org> + * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com> + * Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + +#ifndef WTF_UNICODE_ICU_H +#define WTF_UNICODE_ICU_H + +#include <stdlib.h> +#include <unicode/uchar.h> +#include <unicode/ustring.h> +#include <unicode/utf16.h> + +namespace WTF { +namespace Unicode { + +enum Direction { + LeftToRight = U_LEFT_TO_RIGHT, + RightToLeft = U_RIGHT_TO_LEFT, + EuropeanNumber = U_EUROPEAN_NUMBER, + EuropeanNumberSeparator = U_EUROPEAN_NUMBER_SEPARATOR, + EuropeanNumberTerminator = U_EUROPEAN_NUMBER_TERMINATOR, + ArabicNumber = U_ARABIC_NUMBER, + CommonNumberSeparator = U_COMMON_NUMBER_SEPARATOR, + BlockSeparator = U_BLOCK_SEPARATOR, + SegmentSeparator = U_SEGMENT_SEPARATOR, + WhiteSpaceNeutral = U_WHITE_SPACE_NEUTRAL, + OtherNeutral = U_OTHER_NEUTRAL, + LeftToRightEmbedding = U_LEFT_TO_RIGHT_EMBEDDING, + LeftToRightOverride = U_LEFT_TO_RIGHT_OVERRIDE, + RightToLeftArabic = U_RIGHT_TO_LEFT_ARABIC, + RightToLeftEmbedding = U_RIGHT_TO_LEFT_EMBEDDING, + RightToLeftOverride = U_RIGHT_TO_LEFT_OVERRIDE, + PopDirectionalFormat = U_POP_DIRECTIONAL_FORMAT, + NonSpacingMark = U_DIR_NON_SPACING_MARK, + BoundaryNeutral = U_BOUNDARY_NEUTRAL +}; + +enum DecompositionType { + DecompositionNone = U_DT_NONE, + DecompositionCanonical = U_DT_CANONICAL, + DecompositionCompat = U_DT_COMPAT, + DecompositionCircle = U_DT_CIRCLE, + DecompositionFinal = U_DT_FINAL, + DecompositionFont = U_DT_FONT, + DecompositionFraction = U_DT_FRACTION, + DecompositionInitial = U_DT_INITIAL, + DecompositionIsolated = U_DT_ISOLATED, + DecompositionMedial = U_DT_MEDIAL, + DecompositionNarrow = U_DT_NARROW, + DecompositionNoBreak = U_DT_NOBREAK, + DecompositionSmall = U_DT_SMALL, + DecompositionSquare = U_DT_SQUARE, + DecompositionSub = U_DT_SUB, + DecompositionSuper = U_DT_SUPER, + DecompositionVertical = U_DT_VERTICAL, + DecompositionWide = U_DT_WIDE, +}; + +enum CharCategory { + NoCategory = 0, + Other_NotAssigned = U_MASK(U_GENERAL_OTHER_TYPES), + Letter_Uppercase = U_MASK(U_UPPERCASE_LETTER), + Letter_Lowercase = U_MASK(U_LOWERCASE_LETTER), + Letter_Titlecase = U_MASK(U_TITLECASE_LETTER), + Letter_Modifier = U_MASK(U_MODIFIER_LETTER), + Letter_Other = U_MASK(U_OTHER_LETTER), + + Mark_NonSpacing = U_MASK(U_NON_SPACING_MARK), + Mark_Enclosing = U_MASK(U_ENCLOSING_MARK), + Mark_SpacingCombining = U_MASK(U_COMBINING_SPACING_MARK), + + Number_DecimalDigit = U_MASK(U_DECIMAL_DIGIT_NUMBER), + Number_Letter = U_MASK(U_LETTER_NUMBER), + Number_Other = U_MASK(U_OTHER_NUMBER), + + Separator_Space = U_MASK(U_SPACE_SEPARATOR), + Separator_Line = U_MASK(U_LINE_SEPARATOR), + Separator_Paragraph = U_MASK(U_PARAGRAPH_SEPARATOR), + + Other_Control = U_MASK(U_CONTROL_CHAR), + Other_Format = U_MASK(U_FORMAT_CHAR), + Other_PrivateUse = U_MASK(U_PRIVATE_USE_CHAR), + Other_Surrogate = U_MASK(U_SURROGATE), + + Punctuation_Dash = U_MASK(U_DASH_PUNCTUATION), + Punctuation_Open = U_MASK(U_START_PUNCTUATION), + Punctuation_Close = U_MASK(U_END_PUNCTUATION), + Punctuation_Connector = U_MASK(U_CONNECTOR_PUNCTUATION), + Punctuation_Other = U_MASK(U_OTHER_PUNCTUATION), + + Symbol_Math = U_MASK(U_MATH_SYMBOL), + Symbol_Currency = U_MASK(U_CURRENCY_SYMBOL), + Symbol_Modifier = U_MASK(U_MODIFIER_SYMBOL), + Symbol_Other = U_MASK(U_OTHER_SYMBOL), + + Punctuation_InitialQuote = U_MASK(U_INITIAL_PUNCTUATION), + Punctuation_FinalQuote = U_MASK(U_FINAL_PUNCTUATION) +}; + +inline UChar32 foldCase(UChar32 c) +{ + return u_foldCase(c, U_FOLD_CASE_DEFAULT); +} + +inline int foldCase(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error) +{ + UErrorCode status = U_ZERO_ERROR; + int realLength = u_strFoldCase(result, resultLength, src, srcLength, U_FOLD_CASE_DEFAULT, &status); + *error = !U_SUCCESS(status); + return realLength; +} + +inline int toLower(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error) +{ + UErrorCode status = U_ZERO_ERROR; + int realLength = u_strToLower(result, resultLength, src, srcLength, "", &status); + *error = !!U_FAILURE(status); + return realLength; +} + +inline UChar32 toLower(UChar32 c) +{ + return u_tolower(c); +} + +inline UChar32 toUpper(UChar32 c) +{ + return u_toupper(c); +} + +inline int toUpper(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error) +{ + UErrorCode status = U_ZERO_ERROR; + int realLength = u_strToUpper(result, resultLength, src, srcLength, "", &status); + *error = !!U_FAILURE(status); + return realLength; +} + +inline UChar32 toTitleCase(UChar32 c) +{ + return u_totitle(c); +} + +inline bool isArabicChar(UChar32 c) +{ + return ublock_getCode(c) == UBLOCK_ARABIC; +} + +inline bool isSeparatorSpace(UChar32 c) +{ + return u_charType(c) == U_SPACE_SEPARATOR; +} + +inline bool isPrintableChar(UChar32 c) +{ + return !!u_isprint(c); +} + +inline bool isPunct(UChar32 c) +{ + return !!u_ispunct(c); +} + +inline UChar32 mirroredChar(UChar32 c) +{ + return u_charMirror(c); +} + +inline CharCategory category(UChar32 c) +{ + return static_cast<CharCategory>(U_GET_GC_MASK(c)); +} + +inline Direction direction(UChar32 c) +{ + return static_cast<Direction>(u_charDirection(c)); +} + +inline bool isLower(UChar32 c) +{ + return !!u_islower(c); +} + +inline uint8_t combiningClass(UChar32 c) +{ + return u_getCombiningClass(c); +} + +inline DecompositionType decompositionType(UChar32 c) +{ + return static_cast<DecompositionType>(u_getIntPropertyValue(c, UCHAR_DECOMPOSITION_TYPE)); +} + +inline int umemcasecmp(const UChar* a, const UChar* b, int len) +{ + return u_memcasecmp(a, b, len, U_FOLD_CASE_DEFAULT); +} + +} } + +#endif // WTF_UNICODE_ICU_H diff --git a/src/3rdparty/webkit/JavaScriptCore/wtf/unicode/qt4/UnicodeQt4.h b/src/3rdparty/webkit/JavaScriptCore/wtf/unicode/qt4/UnicodeQt4.h new file mode 100644 index 0000000..d7d78ce --- /dev/null +++ b/src/3rdparty/webkit/JavaScriptCore/wtf/unicode/qt4/UnicodeQt4.h @@ -0,0 +1,526 @@ +/* + * Copyright (C) 2006 George Staikos <staikos@kde.org> + * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com> + * Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + +#ifndef WTF_UNICODE_QT4_H +#define WTF_UNICODE_QT4_H + +#include <QChar> +#include <QString> + +#include <config.h> + +#include <stdint.h> + +#if QT_VERSION >= 0x040300 +QT_BEGIN_NAMESPACE +namespace QUnicodeTables { + struct Properties { + ushort category : 8; + ushort line_break_class : 8; + ushort direction : 8; + ushort combiningClass :8; + ushort joining : 2; + signed short digitValue : 6; /* 5 needed */ + ushort unicodeVersion : 4; + ushort lowerCaseSpecial : 1; + ushort upperCaseSpecial : 1; + ushort titleCaseSpecial : 1; + ushort caseFoldSpecial : 1; /* currently unused */ + signed short mirrorDiff : 16; + signed short lowerCaseDiff : 16; + signed short upperCaseDiff : 16; + signed short titleCaseDiff : 16; + signed short caseFoldDiff : 16; + }; + Q_CORE_EXPORT const Properties * QT_FASTCALL properties(uint ucs4); + Q_CORE_EXPORT const Properties * QT_FASTCALL properties(ushort ucs2); +} +QT_END_NAMESPACE +#endif + +// ugly hack to make UChar compatible with JSChar in API/JSStringRef.h +#if defined(Q_OS_WIN) +typedef wchar_t UChar; +#else +typedef uint16_t UChar; +#endif +typedef uint32_t UChar32; + +// some defines from ICU + +#define U16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800) +#define U16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00) +#define U16_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000) +#define U16_GET_SUPPLEMENTARY(lead, trail) \ + (((UChar32)(lead)<<10UL)+(UChar32)(trail)-U16_SURROGATE_OFFSET) + +#define U16_LEAD(supplementary) (UChar)(((supplementary)>>10)+0xd7c0) +#define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00) + +#define U_IS_SURROGATE(c) (((c)&0xfffff800)==0xd800) +#define U16_IS_SINGLE(c) !U_IS_SURROGATE(c) +#define U16_IS_SURROGATE(c) U_IS_SURROGATE(c) +#define U16_IS_SURROGATE_LEAD(c) (((c)&0x400)==0) + +#define U16_NEXT(s, i, length, c) { \ + (c)=(s)[(i)++]; \ + if(U16_IS_LEAD(c)) { \ + uint16_t __c2; \ + if((i)<(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \ + ++(i); \ + (c)=U16_GET_SUPPLEMENTARY((c), __c2); \ + } \ + } \ +} + +#define U_MASK(x) ((uint32_t)1<<(x)) + +namespace WTF { +namespace Unicode { + +enum Direction { + LeftToRight = QChar::DirL, + RightToLeft = QChar::DirR, + EuropeanNumber = QChar::DirEN, + EuropeanNumberSeparator = QChar::DirES, + EuropeanNumberTerminator = QChar::DirET, + ArabicNumber = QChar::DirAN, + CommonNumberSeparator = QChar::DirCS, + BlockSeparator = QChar::DirB, + SegmentSeparator = QChar::DirS, + WhiteSpaceNeutral = QChar::DirWS, + OtherNeutral = QChar::DirON, + LeftToRightEmbedding = QChar::DirLRE, + LeftToRightOverride = QChar::DirLRO, + RightToLeftArabic = QChar::DirAL, + RightToLeftEmbedding = QChar::DirRLE, + RightToLeftOverride = QChar::DirRLO, + PopDirectionalFormat = QChar::DirPDF, + NonSpacingMark = QChar::DirNSM, + BoundaryNeutral = QChar::DirBN +}; + +enum DecompositionType { + DecompositionNone = QChar::NoDecomposition, + DecompositionCanonical = QChar::Canonical, + DecompositionCompat = QChar::Compat, + DecompositionCircle = QChar::Circle, + DecompositionFinal = QChar::Final, + DecompositionFont = QChar::Font, + DecompositionFraction = QChar::Fraction, + DecompositionInitial = QChar::Initial, + DecompositionIsolated = QChar::Isolated, + DecompositionMedial = QChar::Medial, + DecompositionNarrow = QChar::Narrow, + DecompositionNoBreak = QChar::NoBreak, + DecompositionSmall = QChar::Small, + DecompositionSquare = QChar::Square, + DecompositionSub = QChar::Sub, + DecompositionSuper = QChar::Super, + DecompositionVertical = QChar::Vertical, + DecompositionWide = QChar::Wide +}; + +enum CharCategory { + NoCategory = 0, + Mark_NonSpacing = U_MASK(QChar::Mark_NonSpacing), + Mark_SpacingCombining = U_MASK(QChar::Mark_SpacingCombining), + Mark_Enclosing = U_MASK(QChar::Mark_Enclosing), + Number_DecimalDigit = U_MASK(QChar::Number_DecimalDigit), + Number_Letter = U_MASK(QChar::Number_Letter), + Number_Other = U_MASK(QChar::Number_Other), + Separator_Space = U_MASK(QChar::Separator_Space), + Separator_Line = U_MASK(QChar::Separator_Line), + Separator_Paragraph = U_MASK(QChar::Separator_Paragraph), + Other_Control = U_MASK(QChar::Other_Control), + Other_Format = U_MASK(QChar::Other_Format), + Other_Surrogate = U_MASK(QChar::Other_Surrogate), + Other_PrivateUse = U_MASK(QChar::Other_PrivateUse), + Other_NotAssigned = U_MASK(QChar::Other_NotAssigned), + Letter_Uppercase = U_MASK(QChar::Letter_Uppercase), + Letter_Lowercase = U_MASK(QChar::Letter_Lowercase), + Letter_Titlecase = U_MASK(QChar::Letter_Titlecase), + Letter_Modifier = U_MASK(QChar::Letter_Modifier), + Letter_Other = U_MASK(QChar::Letter_Other), + Punctuation_Connector = U_MASK(QChar::Punctuation_Connector), + Punctuation_Dash = U_MASK(QChar::Punctuation_Dash), + Punctuation_Open = U_MASK(QChar::Punctuation_Open), + Punctuation_Close = U_MASK(QChar::Punctuation_Close), + Punctuation_InitialQuote = U_MASK(QChar::Punctuation_InitialQuote), + Punctuation_FinalQuote = U_MASK(QChar::Punctuation_FinalQuote), + Punctuation_Other = U_MASK(QChar::Punctuation_Other), + Symbol_Math = U_MASK(QChar::Symbol_Math), + Symbol_Currency = U_MASK(QChar::Symbol_Currency), + Symbol_Modifier = U_MASK(QChar::Symbol_Modifier), + Symbol_Other = U_MASK(QChar::Symbol_Other) +}; + + +#if QT_VERSION >= 0x040300 + +// FIXME: handle surrogates correctly in all methods + +inline UChar32 toLower(UChar32 ch) +{ + return QChar::toLower(ch); +} + +inline int toLower(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error) +{ + const UChar *e = src + srcLength; + const UChar *s = src; + UChar *r = result; + uint rindex = 0; + + // this avoids one out of bounds check in the loop + if (s < e && QChar(*s).isLowSurrogate()) { + if (r) + r[rindex] = *s++; + ++rindex; + } + + int needed = 0; + while (s < e && (rindex < uint(resultLength) || !r)) { + uint c = *s; + if (QChar(c).isLowSurrogate() && QChar(*(s - 1)).isHighSurrogate()) + c = QChar::surrogateToUcs4(*(s - 1), c); + const QUnicodeTables::Properties *prop = QUnicodeTables::properties(c); + if (prop->lowerCaseSpecial) { + QString qstring; + if (c < 0x10000) { + qstring += QChar(c); + } else { + qstring += QChar(*(s-1)); + qstring += QChar(*s); + } + qstring = qstring.toLower(); + for (int i = 0; i < qstring.length(); ++i) { + if (rindex >= uint(resultLength)) { + needed += qstring.length() - i; + break; + } + if (r) + r[rindex] = qstring.at(i).unicode(); + ++rindex; + } + } else { + if (r) + r[rindex] = *s + prop->lowerCaseDiff; + ++rindex; + } + ++s; + } + if (s < e) + needed += e - s; + *error = (needed != 0); + if (rindex < uint(resultLength)) + r[rindex] = 0; + return rindex + needed; +} + +inline UChar32 toUpper(UChar32 ch) +{ + return QChar::toUpper(ch); +} + +inline int toUpper(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error) +{ + const UChar *e = src + srcLength; + const UChar *s = src; + UChar *r = result; + int rindex = 0; + + // this avoids one out of bounds check in the loop + if (s < e && QChar(*s).isLowSurrogate()) { + if (r) + r[rindex] = *s++; + ++rindex; + } + + int needed = 0; + while (s < e && (rindex < resultLength || !r)) { + uint c = *s; + if (QChar(c).isLowSurrogate() && QChar(*(s - 1)).isHighSurrogate()) + c = QChar::surrogateToUcs4(*(s - 1), c); + const QUnicodeTables::Properties *prop = QUnicodeTables::properties(c); + if (prop->upperCaseSpecial) { + QString qstring; + if (c < 0x10000) { + qstring += QChar(c); + } else { + qstring += QChar(*(s-1)); + qstring += QChar(*s); + } + qstring = qstring.toUpper(); + for (int i = 0; i < qstring.length(); ++i) { + if (rindex >= resultLength) { + needed += qstring.length() - i; + break; + } + if (r) + r[rindex] = qstring.at(i).unicode(); + ++rindex; + } + } else { + if (r) + r[rindex] = *s + prop->upperCaseDiff; + ++rindex; + } + ++s; + } + if (s < e) + needed += e - s; + *error = (needed != 0); + if (rindex < resultLength) + r[rindex] = 0; + return rindex + needed; +} + +inline int toTitleCase(UChar32 c) +{ + return QChar::toTitleCase(c); +} + +inline UChar32 foldCase(UChar32 c) +{ + return QChar::toCaseFolded(c); +} + +inline int foldCase(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error) +{ + // FIXME: handle special casing. Easiest with some low level API in Qt + *error = false; + if (resultLength < srcLength) { + *error = true; + return srcLength; + } + for (int i = 0; i < srcLength; ++i) + result[i] = QChar::toCaseFolded(ushort(src[i])); + return srcLength; +} + +inline bool isArabicChar(UChar32 c) +{ + return c >= 0x0600 && c <= 0x06FF; +} + +inline bool isPrintableChar(UChar32 c) +{ + const uint test = U_MASK(QChar::Other_Control) | + U_MASK(QChar::Other_NotAssigned); + return !(U_MASK(QChar::category(c)) & test); +} + +inline bool isSeparatorSpace(UChar32 c) +{ + return QChar::category(c) == QChar::Separator_Space; +} + +inline bool isPunct(UChar32 c) +{ + const uint test = U_MASK(QChar::Punctuation_Connector) | + U_MASK(QChar::Punctuation_Dash) | + U_MASK(QChar::Punctuation_Open) | + U_MASK(QChar::Punctuation_Close) | + U_MASK(QChar::Punctuation_InitialQuote) | + U_MASK(QChar::Punctuation_FinalQuote) | + U_MASK(QChar::Punctuation_Other); + return U_MASK(QChar::category(c)) & test; +} + +inline bool isLower(UChar32 c) +{ + return QChar::category(c) == QChar::Letter_Lowercase; +} + +inline UChar32 mirroredChar(UChar32 c) +{ + return QChar::mirroredChar(c); +} + +inline uint8_t combiningClass(UChar32 c) +{ + return QChar::combiningClass(c); +} + +inline DecompositionType decompositionType(UChar32 c) +{ + return (DecompositionType)QChar::decompositionTag(c); +} + +inline int umemcasecmp(const UChar* a, const UChar* b, int len) +{ + // handle surrogates correctly + for (int i = 0; i < len; ++i) { + uint c1 = QChar::toCaseFolded(ushort(a[i])); + uint c2 = QChar::toCaseFolded(ushort(b[i])); + if (c1 != c2) + return c1 - c2; + } + return 0; +} + +inline Direction direction(UChar32 c) +{ + return (Direction)QChar::direction(c); +} + +inline CharCategory category(UChar32 c) +{ + return (CharCategory) U_MASK(QChar::category(c)); +} + +#else + +inline UChar32 toLower(UChar32 ch) +{ + if (ch > 0xffff) + return ch; + return QChar((unsigned short)ch).toLower().unicode(); +} + +inline int toLower(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error) +{ + *error = false; + if (resultLength < srcLength) { + *error = true; + return srcLength; + } + for (int i = 0; i < srcLength; ++i) + result[i] = QChar(src[i]).toLower().unicode(); + return srcLength; +} + +inline UChar32 toUpper(UChar32 ch) +{ + if (ch > 0xffff) + return ch; + return QChar((unsigned short)ch).toUpper().unicode(); +} + +inline int toUpper(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error) +{ + *error = false; + if (resultLength < srcLength) { + *error = true; + return srcLength; + } + for (int i = 0; i < srcLength; ++i) + result[i] = QChar(src[i]).toUpper().unicode(); + return srcLength; +} + +inline int toTitleCase(UChar32 c) +{ + if (c > 0xffff) + return c; + return QChar((unsigned short)c).toUpper().unicode(); +} + +inline UChar32 foldCase(UChar32 c) +{ + if (c > 0xffff) + return c; + return QChar((unsigned short)c).toLower().unicode(); +} + +inline int foldCase(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error) +{ + return toLower(result, resultLength, src, srcLength, error); +} + +inline bool isPrintableChar(UChar32 c) +{ + return (c & 0xffff0000) == 0 && QChar((unsigned short)c).isPrint(); +} + +inline bool isArabicChar(UChar32 c) +{ + return c >= 0x0600 && c <= 0x06FF; +} + +inline bool isSeparatorSpace(UChar32 c) +{ + return (c & 0xffff0000) == 0 && QChar((unsigned short)c).category() == QChar::Separator_Space; +} + +inline bool isPunct(UChar32 c) +{ + return (c & 0xffff0000) == 0 && QChar((unsigned short)c).isPunct(); +} + +inline bool isLower(UChar32 c) +{ + return (c & 0xffff0000) == 0 && QChar((unsigned short)c).category() == QChar::Letter_Lowercase; +} + +inline UChar32 mirroredChar(UChar32 c) +{ + if (c > 0xffff) + return c; + return QChar(c).mirroredChar().unicode(); +} + +inline uint8_t combiningClass(UChar32 c) +{ + if (c > 0xffff) + return 0; + return QChar((unsigned short)c).combiningClass(); +} + +inline DecompositionType decompositionType(UChar32 c) +{ + if (c > 0xffff) + return DecompositionNone; + return (DecompositionType)QChar(c).decompositionTag(); +} + +inline int umemcasecmp(const UChar* a, const UChar* b, int len) +{ + for (int i = 0; i < len; ++i) { + QChar c1 = QChar(a[i]).toLower(); + QChar c2 = QChar(b[i]).toLower(); + if (c1 != c2) + return c1.unicode() - c2.unicode(); + } + return 0; +} + +inline Direction direction(UChar32 c) +{ + if (c > 0xffff) + return LeftToRight; + return (Direction)QChar(c).direction(); +} + +inline CharCategory category(UChar32 c) +{ + if (c > 0xffff) + return NoCategory; + return (CharCategory) U_MASK(QChar(c).category()); +} + +#endif + +} } + +#endif // WTF_UNICODE_QT4_H |