summaryrefslogtreecommitdiffstats
path: root/src/utf8.cpp
diff options
context:
space:
mode:
authorDimitri van Heesch <doxygen@gmail.com>2021-04-27 18:27:56 (GMT)
committerDimitri van Heesch <doxygen@gmail.com>2021-04-27 18:27:56 (GMT)
commit4910e7500060284d815092d8058bbc3e30b925c8 (patch)
tree7162dfe4ad92c84787307e19e726b9f35d2c494d /src/utf8.cpp
parent55e86052e0522ac7b51743449055572cc8bc7823 (diff)
downloadDoxygen-4910e7500060284d815092d8058bbc3e30b925c8.zip
Doxygen-4910e7500060284d815092d8058bbc3e30b925c8.tar.gz
Doxygen-4910e7500060284d815092d8058bbc3e30b925c8.tar.bz2
Optimize UTF8 lower/upper case conversion for ASCII
Diffstat (limited to 'src/utf8.cpp')
-rw-r--r--src/utf8.cpp43
1 files changed, 34 insertions, 9 deletions
diff --git a/src/utf8.cpp b/src/utf8.cpp
index a00f615..e7108f4 100644
--- a/src/utf8.cpp
+++ b/src/utf8.cpp
@@ -74,6 +74,11 @@ static inline uint32_t convertUTF8CharToUnicode(const char *s,size_t bytesLeft,i
return 0;
}
unsigned char uc = static_cast<unsigned char>(*s);
+ if (uc<128) // ASCII case
+ {
+ len=1;
+ return uc;
+ }
switch (bytesLeft)
{
default:
@@ -134,39 +139,59 @@ uint32_t getUnicodeForUTF8CharAt(const std::string &input,size_t pos)
return convertUTF8CharToUnicode(charS.c_str(),charS.length(),len);
}
+static inline char asciiToLower(uint32_t code)
+{
+ return code>='A' && code<='Z' ? (char)(code+'a'-'A') : (char)code;
+}
+
+static inline char asciiToUpper(uint32_t code)
+{
+ return code>='a' && code<='z' ? (char)(code+'A'-'a') : (char)code;
+}
+
static inline std::string caseConvert(const std::string &input,
+ char (*asciiConversionFunc)(uint32_t code),
const char *(*conversionFunc)(uint32_t code))
{
uint32_t code;
- TextStream result;
+ std::string result;
+ result.reserve(input.length()); // assume all ASCII characters
int len;
size_t bytesLeft = input.length();
const char *p = input.c_str();
while ((code=convertUTF8CharToUnicode(p,bytesLeft,len)))
{
- const char *conv = conversionFunc(code);
- if (conv==nullptr) // no difference between lower and upper case
+ if (code<128) // ASCII case
{
- result.write(p,len);
+ char c = asciiConversionFunc(code);
+ result+=c;
}
- else // replace the input character with the conversion result
+ else // generic case
{
- result << conv;
+ const char *conv = conversionFunc(code);
+ if (conv==nullptr) // no difference between lower and upper case
+ {
+ result.append(p,len);
+ }
+ else // replace the input character with the conversion result
+ {
+ result.append(conv);
+ }
}
p+=len;
bytesLeft-=len;
}
- return result.str();
+ return result;
}
std::string convertUTF8ToLower(const std::string &input)
{
- return caseConvert(input,convertUnicodeToLower);
+ return caseConvert(input,asciiToLower,convertUnicodeToLower);
}
std::string convertUTF8ToUpper(const std::string &input)
{
- return caseConvert(input,convertUnicodeToUpper);
+ return caseConvert(input,asciiToUpper,convertUnicodeToUpper);
}
const char *writeUTF8Char(TextStream &t,const char *s)