diff options
author | Dimitri van Heesch <dimitri@stack.nl> | 2013-09-14 15:11:20 (GMT) |
---|---|---|
committer | Dimitri van Heesch <dimitri@stack.nl> | 2013-09-15 18:12:34 (GMT) |
commit | bca6baee6685b489c36abec5a3b550921294e228 (patch) | |
tree | a9f2f6d2622e8aba8beed7477e87d7c9c7a468e1 /src/util.cpp | |
parent | f6bc941e73bd562b15705f7bc3c958267f75f842 (diff) | |
download | Doxygen-bca6baee6685b489c36abec5a3b550921294e228.zip Doxygen-bca6baee6685b489c36abec5a3b550921294e228.tar.gz Doxygen-bca6baee6685b489c36abec5a3b550921294e228.tar.bz2 |
Bug 705910 - Indexing and searching cannot treat non ASCII identifiers
Diffstat (limited to 'src/util.cpp')
-rw-r--r-- | src/util.cpp | 69 |
1 files changed, 69 insertions, 0 deletions
diff --git a/src/util.cpp b/src/util.cpp index 5abe4ed..60a0fe1 100644 --- a/src/util.cpp +++ b/src/util.cpp @@ -7919,3 +7919,72 @@ void addDocCrossReference(MemberDef *src,MemberDef *dst) } } +//-------------------------------------------------------------------------------------- + +/*! @brief Get one unicode character as an unsigned integer from utf-8 string + * + * @param s utf-8 encoded string + * @param idx byte position of given string \a s. + * @return the unicode codepoint, 0 - MAX_UNICODE_CODEPOINT + * @see getNextUtf8OrToLower() + * @see getNextUtf8OrToUpper() + */ +uint getUtf8Code( const QCString& s, int idx ) +{ + const int length = s.length(); + if (idx >= length) { return 0; } + const uint c0 = (uchar)s.at(idx); + if ( c0 < 0xC2 || c0 >= 0xF8 ) // 1 byte character + { + return c0; + } + if (idx+1 >= length) { return 0; } + const uint c1 = ((uchar)s.at(idx+1)) & 0x3f; + if ( c0 < 0xE0 ) // 2 byte character + { + return ((c0 & 0x1f) << 6) | c1; + } + if (idx+2 >= length) { return 0; } + const uint c2 = ((uchar)s.at(idx+2)) & 0x3f; + if ( c0 < 0xF0 ) // 3 byte character + { + return ((c0 & 0x0f) << 12) | (c1 << 6) | c2; + } + if (idx+3 >= length) { return 0; } + // 4 byte character + const uint c3 = ((uchar)s.at(idx+3)) & 0x3f; + return ((c0 & 0x07) << 18) | (c1 << 12) | (c2 << 6) | c3; +} + + +/*! @brief Returns one unicode character as an unsigned integer + * from utf-8 string, making the character lower case if it was upper case. + * + * @param s utf-8 encoded string + * @param idx byte position of given string \a s. + * @return the unicode codepoint, 0 - MAX_UNICODE_CODEPOINT, excludes 'A'-'Z' + * @see getNextUtf8Code() +*/ +uint getUtf8CodeToLower( const QCString& s, int idx ) +{ + const uint v = getUtf8Code( s, idx ); + return v < 0x7f ? tolower( v ) : v; +} + + +/*! @brief Returns one unicode character as ian unsigned interger + * from utf-8 string, making the character upper case if it was lower case. + * + * @param s utf-8 encoded string + * @param idx byte position of given string \a s. + * @return the unicode codepoint, 0 - MAX_UNICODE_CODEPOINT, excludes 'A'-'Z' + * @see getNextUtf8Code() + */ +uint getUtf8CodeToUpper( const QCString& s, int idx ) +{ + const uint v = getUtf8Code( s, idx ); + return v < 0x7f ? toupper( v ) : v; +} + +//-------------------------------------------------------------------------------------- + |