Merge branch 'master' into feature/bug_lex

author: Dimitri van Heesch <doxygen@gmail.com> 2021-03-03 18:54:18 (GMT)
committer: GitHub <noreply@github.com> 2021-03-03 18:54:18 (GMT)
commit: 23a88bee679fb545a4d22ac10f10203cb575056b (patch)
tree: a6943dfa74967be46a5cd65d2c4361afe2d3f0a4 /src/regex.cpp
parent: 789625caed4097a075819b7d7299ab1a808fcf08 (diff)
parent: cc78ebc6ad3d24d25ac779eeed9fb5fa3c89ee27 (diff)
download: Doxygen-23a88bee679fb545a4d22ac10f10203cb575056b.zip
Doxygen-23a88bee679fb545a4d22ac10f10203cb575056b.tar.gz
Doxygen-23a88bee679fb545a4d22ac10f10203cb575056b.tar.bz2
1 files changed, 736 insertions, 0 deletions
diff --git a/src/regex.cpp b/src/regex.cpp
new file mode 100644
index 0000000..62678b7
--- /dev/null
+++ b/src/regex.cpp
@@ -0,0 +1,736 @@
+/******************************************************************************
+ *
+ * Copyright (C) 1997-2021 by Dimitri van Heesch.
+ *
+ * Permission to use, copy, modify, and distribute this software and its
+ * documentation under the terms of the GNU General Public License is hereby
+ * granted. No representations are made about the suitability of this software
+ * for any purpose. It is provided "as is" without express or implied warranty.
+ * See the GNU General Public License for more details.
+ *
+ * Documents produced by Doxygen are derivative works derived from the
+ * input used in their production; they are not affected by this license.
+ *
+ */
+
+#include "regex.h"
+#include <cstdint>
+#include <vector>
+#include <cctype>
+#include <cassert>
+#include <algorithm>
+
+#define ENABLE_DEBUG 0
+#if ENABLE_DEBUG
+#define DBG(fmt,...) do { fprintf(stderr,fmt,__VA_ARGS__); } while(0)
+#else
+#define DBG(fmt,...) do {} while(0)
+#endif
+
+namespace reg
+{
+
+/** Class representing a token in the compiled regular expression token stream.
+ *  A token has a kind and an optional value whose meaning depends on the kind.
+ *  It is also possible to store a (from,to) character range in a token.
+ */
+class PToken
+{
+  public:
+    /** The kind of token.
+     *
+     *  Ranges per bit mask:
+     *  - `0x00FF` from part of a range, except for `0x0000` which is the End marker
+     *  - `0x1FFF` built-in ranges
+     *  - `0x2FFF` user defined ranges
+     *  - `0x4FFF` special operations
+     *  - `0x8000` literal character
+     */
+    enum class Kind : uint16_t
+    {
+      End            = 0x0000,
+      WhiteSpace     = 0x1001,    // \s    range [ \t\r\n]
+      Digit          = 0x1002,    // \d    range [0-9]
+      Alpha          = 0x1003,    // \a    range [a-z_A-Z\x80-\xFF]
+      AlphaNum       = 0x1004,    // \w    range [a-Z_A-Z0-9\x80-\xFF]
+      CharClass      = 0x2001,    // []
+      NegCharClass   = 0x2002,    // [^]
+      BeginOfLine    = 0x4001,    // ^
+      EndOfLine      = 0x4002,    // $
+      BeginOfWord    = 0x4003,    // \<
+      EndOfWord      = 0x4004,    // \>
+      BeginCapture   = 0x4005,    // (
+      EndCapture     = 0x4006,    // )
+      Any            = 0x4007,    // .
+      Star           = 0x4008,    // *
+      Optional       = 0x4009,    // ?
+      Character      = 0x8000     // c
+    };
+
+    /** returns a string representation of the tokens kind (useful for debugging). */
+    const char *kindStr() const
+    {
+      if ((m_rep>>16)>=0x1000 || m_rep==0)
+      {
+        switch(static_cast<Kind>((m_rep>>16)))
+        {
+          case Kind::End:           return "End";
+          case Kind::Alpha:         return "Alpha";
+          case Kind::AlphaNum:      return "AlphaNum";
+          case Kind::WhiteSpace:    return "WhiteSpace";
+          case Kind::Digit:         return "Digit";
+          case Kind::CharClass:     return "CharClass";
+          case Kind::NegCharClass:  return "NegCharClass";
+          case Kind::Character:     return "Character";
+          case Kind::BeginOfLine:   return "BeginOfLine";
+          case Kind::EndOfLine:     return "EndOfLine";
+          case Kind::BeginOfWord:   return "BeginOfWord";
+          case Kind::EndOfWord:     return "EndOfWord";
+          case Kind::BeginCapture:  return "BeginCapture";
+          case Kind::EndCapture:    return "EndCapture";
+          case Kind::Any:           return "Any";
+          case Kind::Star:          return "Star";
+          case Kind::Optional:      return "Optional";
+        }
+      }
+      else
+      {
+        return "Range";
+      }
+    }
+
+    /** Creates a token of kind 'End' */
+    PToken() : m_rep(0) {}
+
+    /** Creates a token of the given kind \a k */
+    explicit PToken(Kind k)    : m_rep(static_cast<uint32_t>(k)<<16) {}
+
+    /** Create a token for an ASCII character */
+    PToken(char c)             : m_rep((static_cast<uint32_t>(Kind::Character)<<16) |
+                                        static_cast<uint32_t>(c)) {}
+
+    /** Create a token for a byte of an UTF-8 character */
+    PToken(uint16_t v)         : m_rep((static_cast<uint32_t>(Kind::Character)<<16) |
+                                        static_cast<uint32_t>(v)) {}
+
+    /** Create a token representing a range from one character \a from to another character \a to */
+    PToken(uint16_t from,uint16_t to) : m_rep(static_cast<uint32_t>(from)<<16 | to) {}
+
+    /** Sets the value for a token */
+    void setValue(uint16_t value) { m_rep = (m_rep & 0xFFFF0000) | value; }
+
+    /** Returns the kind of the token */
+    Kind kind() const             { return static_cast<Kind>(m_rep>>16); }
+
+    /** Returns the 'from' part of the character range. Only valid if this token represents a range */
+    uint16_t from() const         { return m_rep>>16; }
+
+    /** Returns the 'to' part of the character range. Only valid if this token represents a range */
+    uint16_t to() const           { return m_rep & 0xFFFF; }
+
+    /** Returns the value for this token */
+    uint16_t value() const        { return m_rep & 0xFFFF; }
+
+    /** Returns the value for this token as a ASCII character */
+    char asciiValue() const       { return static_cast<char>(m_rep); }
+
+    /** Returns true iff this token represents a range of characters */
+    bool isRange() const          { return m_rep!=0 && from()<=to(); }
+
+    /** Returns true iff this token is a positive or negative character class */
+    bool isCharClass() const      { return kind()==Kind::CharClass || kind()==Kind::NegCharClass; }
+
+  private:
+    uint32_t m_rep;
+};
+
+/** Private members of a regular expression */
+class Ex::Private
+{
+  public:
+    /** Creates the private part */
+    Private(const std::string &pat) : pattern(pat)
+    {
+      data.reserve(100);
+    }
+    void compile();
+#if ENABLE_DEBUG
+    void dump();
+#endif
+    bool matchAt(size_t tokenPos,const std::string &str,Match &match,size_t pos,int level) const;
+
+    /** Flag indicating the expression was successfully compiled */
+    bool error = false;
+
+    /** The token stream representing the compiled regular expression. */
+    std::vector<PToken> data; // compiled pattern
+
+    /** The pattern string as passed by the user */
+    std::string pattern;
+};
+
+/** Compiles a regular expression passed as a string into a stream of tokens that can be used for
+ *  efficient searching.
+ */
+void Ex::Private::compile()
+{
+  error = false;
+  data.clear();
+  if (pattern.empty()) return;
+  const char *start = pattern.c_str();
+  const char *ps = start;
+  char c;
+
+  int prevTokenPos=-1;
+  int tokenPos=0;
+
+  auto addToken = [&](PToken tok)
+  {
+    tokenPos++;
+    data.emplace_back(tok);
+  };
+
+  auto getNextCharacter = [&]() -> PToken
+  {
+    char cs=*ps;
+    PToken result = PToken(cs);
+    if (cs=='\\') // escaped character
+    {
+      ps++;
+      cs=*ps;
+      switch (cs)
+      {
+        case 'n': result = PToken('\n');                      break;
+        case 'r': result = PToken('\r');                      break;
+        case 't': result = PToken('\t');                      break;
+        case 's': result = PToken(PToken::Kind::WhiteSpace);  break;
+        case 'a': result = PToken(PToken::Kind::Alpha);       break;
+        case 'w': result = PToken(PToken::Kind::AlphaNum);    break;
+        case 'd': result = PToken(PToken::Kind::Digit);       break;
+        case '<': result = PToken(PToken::Kind::BeginOfWord); break;
+        case '>': result = PToken(PToken::Kind::EndOfWord);   break;
+        case 'x':
+        case 'X':
+          {
+            uint16_t v=0;
+            for (int i=0;i<2 && (cs=(*(ps+1)));i++) // 2 hex digits
+            {
+              int d = (cs>='a' && cs<='f') ? cs-'a'+10 :
+                      (cs>='A' && cs<='F') ? cs-'A'+10 :
+                      (cs>='0' && cs<='9') ? cs-'0'    :
+                      -1;
+              if (d>=0) { v<<=4; v|=d; ps++; } else break;
+            }
+            result = PToken(v);
+          }
+          break;
+        case '\0': ps--; break; // backslash at the end of the pattern
+        default:
+          result = PToken(cs);
+          break;
+      }
+    }
+    return result;
+  };
+
+  while ((c=*ps))
+  {
+    switch (c)
+    {
+      case '^': // beginning of line (if first character of the pattern)
+        prevTokenPos = tokenPos;
+        addToken(ps==start ? PToken(PToken::Kind::BeginOfLine) :
+                            PToken(c));
+        break;
+      case '$': // end of the line (if last character of the pattern)
+        prevTokenPos = tokenPos;
+        addToken(*(ps+1)=='\0' ? PToken(PToken::Kind::EndOfLine) :
+                                PToken(c));
+        break;
+      case '.': // any character
+        prevTokenPos = tokenPos;
+        addToken(PToken(PToken::Kind::Any));
+        break;
+      case '(': // begin of capture group
+        prevTokenPos = tokenPos;
+        addToken(PToken(PToken::Kind::BeginCapture));
+        break;
+      case ')': // end of capture group
+        prevTokenPos = tokenPos;
+        addToken(PToken(PToken::Kind::EndCapture));
+        break;
+      case '[': // character class
+        {
+          prevTokenPos = tokenPos;
+          ps++;
+          if (*ps==0) { error=true; return; }
+          bool esc = *ps=='\\';
+          PToken tok = getNextCharacter();
+          ps++;
+          if (!esc && tok.kind()==PToken::Kind::Character &&
+                      tok.asciiValue()=='^') // negated character class
+          {
+            addToken(PToken(PToken::Kind::NegCharClass));
+            if (*ps==0) { error=true; return; }
+            tok = getNextCharacter();
+            ps++;
+          }
+          else
+          {
+            addToken(PToken(PToken::Kind::CharClass));
+          }
+          uint16_t numTokens=0;
+          while ((c=*ps))
+          {
+            if (c=='-' && *(ps+1)!=']' && *(ps+1)!=0) // range
+            {
+              getNextCharacter();
+              ps++;
+              PToken endTok = getNextCharacter();
+              ps++;
+              if (tok.value()>endTok.value())
+              {
+                addToken(PToken(endTok.value(),tok.value())); // swap start and end
+              }
+              else
+              {
+                addToken(PToken(tok.value(),endTok.value()));
+              }
+              numTokens++;
+            }
+            else // single char, from==to
+            {
+              if (tok.kind()==PToken::Kind::Character)
+              {
+                addToken(PToken(tok.value(),tok.value()));
+              }
+              else // special token, add as-is since from>to
+              {
+                addToken(tok);
+              }
+              numTokens++;
+            }
+            if (*ps==0) { error=true; return; } // expected at least a ]
+            esc = *ps=='\\';
+            tok = getNextCharacter();
+            if (!esc && tok.kind()==PToken::Kind::Character &&
+                        tok.value()==static_cast<uint16_t>(']'))
+            {
+              break; // end of character class
+            }
+            if (*ps==0) { error=true; return; } // no ] found
+            ps++;
+          }
+          // set the value of either NegCharClass or CharClass
+          data[prevTokenPos].setValue(numTokens);
+        }
+        break;
+      case '*': // 0 or more
+      case '+': // 1 or more
+      case '?': // optional: 0 or 1
+        {
+          if (prevTokenPos==-1)
+          {
+            error=true;
+            return;
+          }
+          switch (data[prevTokenPos].kind())
+          {
+            case PToken::Kind::BeginOfLine:  // $*  or  $+ or  $?
+            case PToken::Kind::BeginOfWord:  // \<* or \<+ or \<?
+            case PToken::Kind::EndOfWord:    // \>* or \>+ or \>?
+            case PToken::Kind::Star:         // **  or  *+ or  *?
+            case PToken::Kind::Optional:     // ?*  or  ?+ or  ??
+              error=true;
+              return;
+            default: // ok
+              break;
+          }
+          int ddiff = static_cast<int>(tokenPos-prevTokenPos);
+          if (*ps=='+') // convert <pat>+ -> <pat><pat>*
+          {
+            // turn a sequence of token [T1...Tn] followed by '+' into [T1..Tn T1..Tn T*]
+            //                          ddiff=n                                ^prevTokenPos
+            data.resize(data.size()+ddiff);
+            std::copy_n(data.begin()+prevTokenPos,ddiff,data.begin()+tokenPos);
+            prevTokenPos+=ddiff;
+            tokenPos+=ddiff;
+          }
+          data.insert(data.begin()+prevTokenPos,
+                      c=='?' ? PToken(PToken::Kind::Optional) : PToken(PToken::Kind::Star));
+          tokenPos++;
+          addToken(PToken(PToken::Kind::End));
+          // turn a sequence of tokens [T1 T2 T3] followed by 'T*' or into [T* T1 T2 T3 TEND]
+          //                            ^prevTokenPos
+          // same for 'T?'.
+        }
+        break;
+      default:
+        prevTokenPos = tokenPos;
+        addToken(getNextCharacter());
+        break;
+    }
+    ps++;
+  }
+  //addToken(PToken(PToken::Kind::End));
+}
+
+#if ENABLE_DEBUG
+/** Dump the compiled token stream for this regular expression. For debugging purposes. */
+void Ex::Private::dump()
+{
+  size_t l = data.size();
+  size_t i =0;
+  DBG("==== compiled token stream for pattern '%s' ===\n",pattern.c_str());
+  while (i<l)
+  {
+    DBG("[%s:%04x]\n",data[i].kindStr(),data[i].value());
+    if (data[i].kind()==PToken::Kind::CharClass || data[i].kind()==PToken::Kind::NegCharClass)
+    {
+      uint16_t num = data[i].value();
+      while (num>0 && i<l)
+      {
+        i++;
+        if (data[i].isRange()) // from-to range
+        {
+          DBG("[%04x(%c)-%04x(%c)]\n",data[i].from(),data[i].from(),data[i].to(),data[i].to());
+        }
+        else // special character like \n or \s
+        {
+          DBG("[%s:%04x]\n",data[i].kindStr(),data[i].value());
+        }
+        num--;
+      }
+    }
+    i++;
+  }
+}
+#endif
+
+/** Internal matching routine.
+ *  @param tokenPos Offset into the token stream.
+ *  @param str      The input string to match against.
+ *  @param match    The object used to store the matching results.
+ *  @param pos      The position in the input string to start with matching
+ *  @param level    Recursion level (used for debugging)
+ */
+bool Ex::Private::matchAt(size_t tokenPos,const std::string &str,Match &match,const size_t pos,int level) const
+{
+  DBG("%d:matchAt(tokenPos=%zu, str='%s', pos=%zu)\n",level,tokenPos,str.c_str(),pos);
+  auto isStartIdChar = [](char c) { return std::isalpha(c) || c=='_' || c<0; };
+  auto isIdChar      = [](char c) { return std::isalnum(c) || c=='_' || c<0; };
+  auto matchCharClass = [this,isStartIdChar,isIdChar](size_t tp,char c) -> bool
+  {
+    PToken tok = data[tp];
+    bool negate = tok.kind()==PToken::Kind::NegCharClass;
+    uint16_t numFields = tok.value();
+    bool found = false;
+    for (uint16_t i=0;i<numFields;i++)
+    {
+      tok = data[++tp];
+      // first check for built-in ranges
+      if ((tok.kind()==PToken::Kind::Alpha      && isStartIdChar(c)) ||
+          (tok.kind()==PToken::Kind::AlphaNum   && isIdChar(c))      ||
+          (tok.kind()==PToken::Kind::WhiteSpace && std::isspace(c))  ||
+          (tok.kind()==PToken::Kind::Digit      && std::isdigit(c))
+         )
+      {
+        found=true;
+        break;
+      }
+      else // user specified range
+      {
+        uint16_t v = static_cast<uint16_t>(c);
+        if (tok.from()<=v && v<=tok.to())
+        {
+          found=true;
+          break;
+        }
+      }
+    }
+    DBG("matchCharClass(tp=%zu,c=%c (x%02x))=%d\n",tp,c,c,negate?!found:found);
+    return negate ? !found : found;
+  };
+  size_t index = pos;
+  enum SequenceType { Star, Optional };
+  auto processSequence = [this,&tokenPos,&index,&str,&matchCharClass,
+                          &isStartIdChar,&isIdChar,&match,&level,&pos](SequenceType type) -> bool
+  {
+    size_t startIndex = index;
+    PToken tok = data[++tokenPos];
+    if (tok.kind()==PToken::Kind::Character) // 'x*' -> eat x's
+    {
+      char c_tok = tok.asciiValue();
+      while (index<=str.length() && str[index]==c_tok) { index++; if (type==Optional) break; }
+      tokenPos++;
+    }
+    else if (tok.isCharClass()) // '[a-f0-4]* -> eat matching characters
+    {
+      while (index<=str.length() && matchCharClass(tokenPos,str[index])) { index++; if (type==Optional) break; }
+      tokenPos+=tok.value()+1; // skip over character ranges + end token
+    }
+    else if (tok.kind()==PToken::Kind::Alpha) // '\a*' -> eat start id characters
+    {
+      while (index<=str.length() && isStartIdChar(str[index])) { index++; if (type==Optional) break; }
+      tokenPos++;
+    }
+    else if (tok.kind()==PToken::Kind::AlphaNum) // '\w*' -> eat id characters
+    {
+      while (index<=str.length() && isIdChar(str[index])) { index++; if (type==Optional) break; }
+      tokenPos++;
+    }
+    else if (tok.kind()==PToken::Kind::WhiteSpace) // '\s*' -> eat spaces
+    {
+      while (index<=str.length() && std::isspace(str[index])) { index++; if (type==Optional) break; }
+      tokenPos++;
+    }
+    else if (tok.kind()==PToken::Kind::Digit) // '\d*' -> eat digits
+    {
+      while (index<=str.length() && std::isdigit(str[index])) { index++; if (type==Optional) break; }
+      tokenPos++;
+    }
+    else if (tok.kind()==PToken::Kind::Any) // '.*' -> eat all
+    {
+      if (type==Optional) index++; else index = str.length();
+      tokenPos++;
+    }
+    tokenPos++; // skip over end marker
+    while ((int)index>=(int)startIndex)
+    {
+      // pattern 'x*xy' should match 'xy' and 'xxxxy'
+      bool found = matchAt(tokenPos,str,match,index,level+1);
+      if (found)
+      {
+        match.setMatch(pos,index-pos+match.length());
+        return true;
+      }
+      index--;
+    }
+    return false;
+  };
+
+  while (tokenPos<data.size())
+  {
+    PToken tok = data[tokenPos];
+    //DBG("loop tokenPos=%zu token=%s\n",tokenPos,tok.kindStr());
+    if (tok.kind()==PToken::Kind::Character) // match literal character
+    {
+      char c_tok = tok.asciiValue();
+      if (index>=str.length() || str[index]!=c_tok) return false; // end of string, or non matching char
+      index++,tokenPos++;
+    }
+    else if (tok.isCharClass())
+    {
+      if (index>=str.length() || !matchCharClass(tokenPos,str[index])) return false;
+      index++,tokenPos+=tok.value()+1; // skip over character ranges + end token
+    }
+    else
+    {
+      switch (tok.kind())
+      {
+        case PToken::Kind::Alpha:
+          if (index>=str.length() || !isStartIdChar(str[index])) return false;
+          index++;
+          break;
+        case PToken::Kind::AlphaNum:
+          if (index>=str.length() || !isIdChar(str[index])) return false;
+          index++;
+          break;
+        case PToken::Kind::WhiteSpace:
+          if (index>=str.length() || !std::isspace(str[index])) return false;
+          index++;
+          break;
+        case PToken::Kind::Digit:
+          if (index>=str.length() || !std::isdigit(str[index])) return false;
+          index++;
+          break;
+        case PToken::Kind::BeginOfLine:
+          if (index!=pos) return false;
+          break;
+        case PToken::Kind::EndOfLine:
+          if (index<str.length()) return false;
+          break;
+        case PToken::Kind::BeginOfWord:
+          DBG("BeginOfWord: index=%zu isIdChar(%c)=%d prev.isIdChar(%c)=%d\n",
+              index,str[index],isIdChar(str[index]),
+              index>0?str[index]-1:0,
+              index>0?isIdChar(str[index-1]):-1);
+          if (index>=str.length() ||
+              !isIdChar(str[index]) ||
+              (index>0 && isIdChar(str[index-1]))) return false;
+          break;
+        case PToken::Kind::EndOfWord:
+          DBG("EndOfWord: index=%zu pos=%zu idIdChar(%c)=%d  prev.isIsChar(%c)=%d\n",
+              index,pos,str[index],isIdChar(str[index]),
+              index==0 ? 0 : str[index-1],
+              index==0 ? -1 : isIdChar(str[index-1]));
+          if (index<str.length() &&
+              (isIdChar(str[index]) || index==0 || !isIdChar(str[index-1]))) return false;
+          break;
+        case PToken::Kind::BeginCapture:
+          DBG("BeginCapture(%zu)\n",index);
+          match.startCapture(index);
+          break;
+        case PToken::Kind::EndCapture:
+          DBG("EndCapture(%zu)\n",index);
+          match.endCapture(index);
+          break;
+        case PToken::Kind::Any:
+          if (index>=str.length()) return false;
+          index++;
+          break;
+        case PToken::Kind::Star:
+          return processSequence(Star);
+        case PToken::Kind::Optional:
+          return processSequence(Optional);
+        default:
+          return false;
+      }
+      tokenPos++;
+    }
+  }
+  match.setMatch(pos,index-pos);
+  return true;
+}
+
+static std::string wildcard2regex(const std::string &pattern)
+{
+  std::string result="^"; // match start of input
+  char c;
+  const char *p = pattern.c_str();
+  while ((c=*p++))
+  {
+    switch(c)
+    {
+      case '*':
+        result+=".*";
+        break; // '*' => '.*'
+      case '?':
+        result+='.';
+        break; // '?' => '.'
+      case '.':
+      case '+':
+      case '\\':
+      case '$':
+      case '^':
+      case '(':
+      case ')':
+         result+='\\'; result+=c;  // escape
+         break;
+      case '[':
+         if (*p=='^') // don't escape ^ after [
+         {
+           result+="[^";
+           p++;
+         }
+         else
+         {
+           result+=c;
+         }
+        break;
+      default:  // just copy
+        result+=c;
+        break;
+    }
+  }
+  result+='$'; // match end of input
+  return result;
+}
+
+
+Ex::Ex(const std::string &pattern, Mode mode)
+  : p(std::make_unique<Private>(mode==Mode::RegEx ? pattern : wildcard2regex(pattern)))
+{
+  p->compile();
+#if ENABLE_DEBUG
+  p->dump();
+  assert(!p->error);
+#endif
+}
+
+Ex::~Ex()
+{
+}
+
+bool Ex::match(const std::string &str,Match &match,size_t pos) const
+{
+  bool found=false;
+  if (p->data.size()==0 || p->error) return found;
+  match.init(&str);
+
+  PToken tok = p->data[0];
+  if (tok.kind()==PToken::Kind::BeginOfLine) // only test match at the given position
+  {
+    found = p->matchAt(0,str,match,pos,0);
+  }
+  else
+  {
+    if (tok.kind()==PToken::Kind::Character) // search for the start character
+    {
+      size_t index = str.find(tok.asciiValue(),pos);
+      if (index==std::string::npos)
+      {
+        DBG("Ex::match(str='%s',pos=%zu)=false (no start char '%c')\n",str.c_str(),pos,tok.asciiValue());
+        return false;
+      }
+      DBG("pos=%zu str='%s' char='%c' index=%zu\n",index,str.c_str(),tok.asciiValue(),index);
+      pos=index;
+    }
+    while (pos<str.length()) // search for a match starting at pos
+    {
+      found = p->matchAt(0,str,match,pos,0);
+      if (found) break;
+      pos++;
+    }
+  }
+  DBG("Ex::match(str='%s',pos=%zu)=%d\n",str.c_str(),pos,found);
+  return found;
+}
+
+bool Ex::isValid() const
+{
+  return !p->pattern.empty() && !p->error;
+}
+
+//----------------------------------------------------------------------------------------
+
+bool search(const std::string &str,Match &match,const Ex &re,size_t pos)
+{
+  return re.match(str,match,pos);
+}
+
+bool search(const std::string &str,const Ex &re,size_t pos)
+{
+  Match match;
+  return re.match(str,match,pos);
+}
+
+bool match(const std::string &str,Match &match,const Ex &re)
+{
+  return re.match(str,match,0) && match.position()==0 && match.length()==str.length();
+}
+
+bool match(const std::string &str,const Ex &re)
+{
+  Match match;
+  return re.match(str,match,0) && match.position()==0 && match.length()==str.length();
+}
+
+std::string replace(const std::string &str,const Ex &re,const std::string &replacement)
+{
+  std::string result;
+  Match match;
+  size_t p=0;
+  while (re.match(str,match,p))
+  {
+    size_t i=match.position();
+    size_t l=match.length();
+    if (i>p) result+=str.substr(p,i-p);
+    result+=replacement;
+    p=i+l;
+  }
+  if (p<str.length()) result+=str.substr(p);
+  return result;
+}
+
+}
author	Dimitri van Heesch <doxygen@gmail.com>	2021-03-03 18:54:18 (GMT)
committer	GitHub <noreply@github.com>	2021-03-03 18:54:18 (GMT)
commit	23a88bee679fb545a4d22ac10f10203cb575056b (patch)
tree	a6943dfa74967be46a5cd65d2c4361afe2d3f0a4 /src/regex.cpp
parent	789625caed4097a075819b7d7299ab1a808fcf08 (diff)
parent	cc78ebc6ad3d24d25ac779eeed9fb5fa3c89ee27 (diff)
download	Doxygen-23a88bee679fb545a4d22ac10f10203cb575056b.zip Doxygen-23a88bee679fb545a4d22ac10f10203cb575056b.tar.gz Doxygen-23a88bee679fb545a4d22ac10f10203cb575056b.tar.bz2