summaryrefslogtreecommitdiffstats
path: root/src/regex.cpp
diff options
context:
space:
mode:
authorDimitri van Heesch <doxygen@gmail.com>2021-03-26 18:17:11 (GMT)
committerDimitri van Heesch <doxygen@gmail.com>2021-03-26 18:17:11 (GMT)
commit30d9199b4ae662a8d2094f60e4dd4190718dd7c6 (patch)
tree721eb9a70085deaa8d63dcad4c9ee225779c623d /src/regex.cpp
parentf12f22e568febe0f403fb9018103f266f3cacd38 (diff)
downloadDoxygen-30d9199b4ae662a8d2094f60e4dd4190718dd7c6.zip
Doxygen-30d9199b4ae662a8d2094f60e4dd4190718dd7c6.tar.gz
Doxygen-30d9199b4ae662a8d2094f60e4dd4190718dd7c6.tar.bz2
Avoid using std::isspace and friends on potentially multibyte characters
Diffstat (limited to 'src/regex.cpp')
-rw-r--r--src/regex.cpp37
1 files changed, 29 insertions, 8 deletions
diff --git a/src/regex.cpp b/src/regex.cpp
index 62678b7..ae5476e 100644
--- a/src/regex.cpp
+++ b/src/regex.cpp
@@ -30,6 +30,27 @@
namespace reg
{
+static inline bool isspace(char c)
+{
+ return c==' ' || c=='\t' || c=='\n' || c=='\r';
+}
+
+static inline bool isalpha(char c)
+{
+ return c<0 || (c>='a' && c<='z') || (c>='A' && c<='Z');
+}
+
+static inline bool isdigit(char c)
+{
+ return c>='0' && c<='9';
+}
+
+static inline bool isalnum(char c)
+{
+ return isalpha(c) || isdigit(c);
+}
+
+
/** Class representing a token in the compiled regular expression token stream.
* A token has a kind and an optional value whose meaning depends on the kind.
* It is also possible to store a (from,to) character range in a token.
@@ -417,8 +438,8 @@ void Ex::Private::dump()
bool Ex::Private::matchAt(size_t tokenPos,const std::string &str,Match &match,const size_t pos,int level) const
{
DBG("%d:matchAt(tokenPos=%zu, str='%s', pos=%zu)\n",level,tokenPos,str.c_str(),pos);
- auto isStartIdChar = [](char c) { return std::isalpha(c) || c=='_' || c<0; };
- auto isIdChar = [](char c) { return std::isalnum(c) || c=='_' || c<0; };
+ auto isStartIdChar = [](char c) { return isalpha(c) || c=='_'; };
+ auto isIdChar = [](char c) { return isalnum(c) || c=='_'; };
auto matchCharClass = [this,isStartIdChar,isIdChar](size_t tp,char c) -> bool
{
PToken tok = data[tp];
@@ -431,8 +452,8 @@ bool Ex::Private::matchAt(size_t tokenPos,const std::string &str,Match &match,co
// first check for built-in ranges
if ((tok.kind()==PToken::Kind::Alpha && isStartIdChar(c)) ||
(tok.kind()==PToken::Kind::AlphaNum && isIdChar(c)) ||
- (tok.kind()==PToken::Kind::WhiteSpace && std::isspace(c)) ||
- (tok.kind()==PToken::Kind::Digit && std::isdigit(c))
+ (tok.kind()==PToken::Kind::WhiteSpace && isspace(c)) ||
+ (tok.kind()==PToken::Kind::Digit && isdigit(c))
)
{
found=true;
@@ -481,12 +502,12 @@ bool Ex::Private::matchAt(size_t tokenPos,const std::string &str,Match &match,co
}
else if (tok.kind()==PToken::Kind::WhiteSpace) // '\s*' -> eat spaces
{
- while (index<=str.length() && std::isspace(str[index])) { index++; if (type==Optional) break; }
+ while (index<=str.length() && isspace(str[index])) { index++; if (type==Optional) break; }
tokenPos++;
}
else if (tok.kind()==PToken::Kind::Digit) // '\d*' -> eat digits
{
- while (index<=str.length() && std::isdigit(str[index])) { index++; if (type==Optional) break; }
+ while (index<=str.length() && isdigit(str[index])) { index++; if (type==Optional) break; }
tokenPos++;
}
else if (tok.kind()==PToken::Kind::Any) // '.*' -> eat all
@@ -537,11 +558,11 @@ bool Ex::Private::matchAt(size_t tokenPos,const std::string &str,Match &match,co
index++;
break;
case PToken::Kind::WhiteSpace:
- if (index>=str.length() || !std::isspace(str[index])) return false;
+ if (index>=str.length() || !isspace(str[index])) return false;
index++;
break;
case PToken::Kind::Digit:
- if (index>=str.length() || !std::isdigit(str[index])) return false;
+ if (index>=str.length() || !isdigit(str[index])) return false;
index++;
break;
case PToken::Kind::BeginOfLine: