diff options
author | Dimitri van Heesch <doxygen@gmail.com> | 2021-02-21 12:42:24 (GMT) |
---|---|---|
committer | Dimitri van Heesch <doxygen@gmail.com> | 2021-02-21 12:42:24 (GMT) |
commit | ee3dd13f2ccd4f128a97ea2da1a089656452dd10 (patch) | |
tree | 65e1ab4715729d3e212cbaf2e7cc7d455456d7c8 | |
parent | e43aaccb144ad800db4ee72137bdebc735217a31 (diff) | |
download | Doxygen-ee3dd13f2ccd4f128a97ea2da1a089656452dd10.zip Doxygen-ee3dd13f2ccd4f128a97ea2da1a089656452dd10.tar.gz Doxygen-ee3dd13f2ccd4f128a97ea2da1a089656452dd10.tar.bz2 |
Regression: specifying character ranges for FILE_PATTERNS didn't work
-rw-r--r-- | src/util.cpp | 73 |
1 files changed, 19 insertions, 54 deletions
diff --git a/src/util.cpp b/src/util.cpp index 168fdf0..80ea950 100644 --- a/src/util.cpp +++ b/src/util.cpp @@ -1279,70 +1279,35 @@ int filterCRLF(char *buf,int len) return dest; // length of the valid part of the buf } -template<class StringType> -static bool isMatchingWildcard(const StringType &input,size_t input_pos, - const StringType &pattern,size_t pattern_pos, - bool caseSensitive) +static std::string wildcard2regex(const std::string &pattern) { - // end of pattern reached - if (pattern_pos==pattern.length()) - { - // match iff also at the end of the input string - return input_pos==input.length(); - } - - // if we are at the end of the input string - if (input_pos==input.length()) + std::string result="^"; // match start of input + char c; + const char *p = pattern.c_str(); + while ((c=*p++)) { - // match iff the remainer of the pattern is '*'s - for (size_t i=pattern_pos; i<pattern.size();i++) + switch(c) { - if (pattern[i]!='*') return false; + case '*': result+=".*"; break; // '*' => '.*' + case '?': result+='.'; break; // '?' => '.' + case '.': case '+': case '\\': case '$': case '^': result+='\\'; result+=c; break; // escape + case '[': if (*p=='^') { result+="[^"; p++; } else result+=c; break; // don't escape ^ after [ + default: result+=c; break; // just copy } - return true; - } - - auto input_char = input[input_pos]; - auto pattern_char = pattern[pattern_pos]; - if (!caseSensitive) - { - input_char = (typename StringType::value_type)std::tolower(input_char); - pattern_char = (typename StringType::value_type)std::tolower(pattern_char); - } - // if current character matches against '?' pattern or literally - if (pattern[pattern_pos]=='?' || input_char==pattern_char) - { - // then continue with the next one - return isMatchingWildcard(input,input_pos+1,pattern,pattern_pos+1,caseSensitive); } - - // current character in the pattern is '*' - if (pattern[pattern_pos]=='*') - { - // try the same match against the next character in the input (current char is eaten by '*') - return isMatchingWildcard(input,input_pos+1,pattern,pattern_pos ,caseSensitive) || - // or try to match against the next character in the pattern ('*' matches an empty string) - isMatchingWildcard(input,input_pos ,pattern,pattern_pos+1,caseSensitive); - } - - // found a mismatch - return false; + result+='$'; // match end of input + return result; } static bool isMatchingWildcard(const std::string &input,const std::string &pattern, bool caseSensitive=false) { - if (!caseSensitive) // to properly match input 'FÓÓ' against pattern 'fóó*' we need - // to convert the std::string to a std::wstring so std::tolower works - // on multi-byte characters like Ó and not one individual bytes. - { - std::wstring_convert< std::codecvt_utf8<wchar_t> > conv; - return isMatchingWildcard(conv.from_bytes(input),0,conv.from_bytes(pattern),0,caseSensitive); - } - else // simple case were we can do byte matching for characters. - { - return isMatchingWildcard(input,0,pattern,0,caseSensitive); - } + + std::regex::flag_type flags = std::regex::ECMAScript; + if (!caseSensitive) flags |= std::regex::icase; + std::string re_str = wildcard2regex(pattern); + std::regex rePattern(wildcard2regex(pattern),flags); + return std::regex_match(input,rePattern); } static QCString getFilterFromList(const char *name,const StringVector &filterList,bool &found) |