diff options
author | Dimitri van Heesch <doxygen@gmail.com> | 2021-03-26 18:17:11 (GMT) |
---|---|---|
committer | Dimitri van Heesch <doxygen@gmail.com> | 2021-03-26 18:17:11 (GMT) |
commit | 30d9199b4ae662a8d2094f60e4dd4190718dd7c6 (patch) | |
tree | 721eb9a70085deaa8d63dcad4c9ee225779c623d | |
parent | f12f22e568febe0f403fb9018103f266f3cacd38 (diff) | |
download | Doxygen-30d9199b4ae662a8d2094f60e4dd4190718dd7c6.zip Doxygen-30d9199b4ae662a8d2094f60e4dd4190718dd7c6.tar.gz Doxygen-30d9199b4ae662a8d2094f60e4dd4190718dd7c6.tar.bz2 |
Avoid using std::isspace and friends on potentially multibyte characters
-rw-r--r-- | src/regex.cpp | 37 |
1 files changed, 29 insertions, 8 deletions
diff --git a/src/regex.cpp b/src/regex.cpp index 62678b7..ae5476e 100644 --- a/src/regex.cpp +++ b/src/regex.cpp @@ -30,6 +30,27 @@ namespace reg { +static inline bool isspace(char c) +{ + return c==' ' || c=='\t' || c=='\n' || c=='\r'; +} + +static inline bool isalpha(char c) +{ + return c<0 || (c>='a' && c<='z') || (c>='A' && c<='Z'); +} + +static inline bool isdigit(char c) +{ + return c>='0' && c<='9'; +} + +static inline bool isalnum(char c) +{ + return isalpha(c) || isdigit(c); +} + + /** Class representing a token in the compiled regular expression token stream. * A token has a kind and an optional value whose meaning depends on the kind. * It is also possible to store a (from,to) character range in a token. @@ -417,8 +438,8 @@ void Ex::Private::dump() bool Ex::Private::matchAt(size_t tokenPos,const std::string &str,Match &match,const size_t pos,int level) const { DBG("%d:matchAt(tokenPos=%zu, str='%s', pos=%zu)\n",level,tokenPos,str.c_str(),pos); - auto isStartIdChar = [](char c) { return std::isalpha(c) || c=='_' || c<0; }; - auto isIdChar = [](char c) { return std::isalnum(c) || c=='_' || c<0; }; + auto isStartIdChar = [](char c) { return isalpha(c) || c=='_'; }; + auto isIdChar = [](char c) { return isalnum(c) || c=='_'; }; auto matchCharClass = [this,isStartIdChar,isIdChar](size_t tp,char c) -> bool { PToken tok = data[tp]; @@ -431,8 +452,8 @@ bool Ex::Private::matchAt(size_t tokenPos,const std::string &str,Match &match,co // first check for built-in ranges if ((tok.kind()==PToken::Kind::Alpha && isStartIdChar(c)) || (tok.kind()==PToken::Kind::AlphaNum && isIdChar(c)) || - (tok.kind()==PToken::Kind::WhiteSpace && std::isspace(c)) || - (tok.kind()==PToken::Kind::Digit && std::isdigit(c)) + (tok.kind()==PToken::Kind::WhiteSpace && isspace(c)) || + (tok.kind()==PToken::Kind::Digit && isdigit(c)) ) { found=true; @@ -481,12 +502,12 @@ bool Ex::Private::matchAt(size_t tokenPos,const std::string &str,Match &match,co } else if (tok.kind()==PToken::Kind::WhiteSpace) // '\s*' -> eat spaces { - while (index<=str.length() && std::isspace(str[index])) { index++; if (type==Optional) break; } + while (index<=str.length() && isspace(str[index])) { index++; if (type==Optional) break; } tokenPos++; } else if (tok.kind()==PToken::Kind::Digit) // '\d*' -> eat digits { - while (index<=str.length() && std::isdigit(str[index])) { index++; if (type==Optional) break; } + while (index<=str.length() && isdigit(str[index])) { index++; if (type==Optional) break; } tokenPos++; } else if (tok.kind()==PToken::Kind::Any) // '.*' -> eat all @@ -537,11 +558,11 @@ bool Ex::Private::matchAt(size_t tokenPos,const std::string &str,Match &match,co index++; break; case PToken::Kind::WhiteSpace: - if (index>=str.length() || !std::isspace(str[index])) return false; + if (index>=str.length() || !isspace(str[index])) return false; index++; break; case PToken::Kind::Digit: - if (index>=str.length() || !std::isdigit(str[index])) return false; + if (index>=str.length() || !isdigit(str[index])) return false; index++; break; case PToken::Kind::BeginOfLine: |