diff options
author | Brad King <brad.king@kitware.com> | 2016-11-09 14:23:18 (GMT) |
---|---|---|
committer | Brad King <brad.king@kitware.com> | 2016-11-09 14:23:18 (GMT) |
commit | 95a97a40e48a0db2b206c4560d6962be3666812c (patch) | |
tree | e7f6f0e6ffca54ee18f8c276735e8a60e45f8833 /Source/kwsys/RegularExpression.cxx | |
parent | 666bb0e3fa0d212c03f785bd9f6bc05d29925e1f (diff) | |
parent | 773b36e5d4af3ac040625e0ea16bcfd30fcdeb6d (diff) | |
download | CMake-95a97a40e48a0db2b206c4560d6962be3666812c.zip CMake-95a97a40e48a0db2b206c4560d6962be3666812c.tar.gz CMake-95a97a40e48a0db2b206c4560d6962be3666812c.tar.bz2 |
Merge branch 'upstream-KWSys' into update-kwsys
* upstream-KWSys:
KWSys 2016-11-09 (18c65411)
Diffstat (limited to 'Source/kwsys/RegularExpression.cxx')
-rw-r--r-- | Source/kwsys/RegularExpression.cxx | 1666 |
1 files changed, 818 insertions, 848 deletions
diff --git a/Source/kwsys/RegularExpression.cxx b/Source/kwsys/RegularExpression.cxx index 22593b4..6d7f832 100644 --- a/Source/kwsys/RegularExpression.cxx +++ b/Source/kwsys/RegularExpression.cxx @@ -1,14 +1,5 @@ -/*============================================================================ - KWSys - Kitware System Library - Copyright 2000-2009 Kitware, Inc., Insight Software Consortium - - Distributed under the OSI-approved BSD License (the "License"); - see accompanying file Copyright.txt for details. - - This software is distributed WITHOUT ANY WARRANTY; without even the - implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - See the License for more information. -============================================================================*/ +/* Distributed under the OSI-approved BSD 3-Clause License. See accompanying + file Copyright.txt or https://cmake.org/licensing#kwsys for details. */ // // Copyright (C) 1991 Texas Instruments Incorporated. // @@ -34,30 +25,29 @@ // Work-around CMake dependency scanning limitation. This must // duplicate the above list of headers. #if 0 -# include "RegularExpression.hxx.in" +#include "RegularExpression.hxx.in" #endif #include <stdio.h> #include <string.h> -namespace KWSYS_NAMESPACE -{ +namespace KWSYS_NAMESPACE { // RegularExpression -- Copies the given regular expression. -RegularExpression::RegularExpression (const RegularExpression& rxp) { - if ( !rxp.program ) - { +RegularExpression::RegularExpression(const RegularExpression& rxp) +{ + if (!rxp.program) { this->program = 0; return; - } + } int ind; - this->progsize = rxp.progsize; // Copy regular expression size - this->program = new char[this->progsize]; // Allocate storage - for(ind=this->progsize; ind-- != 0;) // Copy regular expresion + this->progsize = rxp.progsize; // Copy regular expression size + this->program = new char[this->progsize]; // Allocate storage + for (ind = this->progsize; ind-- != 0;) // Copy regular expresion this->program[ind] = rxp.program[ind]; - this->startp[0] = rxp.startp[0]; // Copy pointers into last - this->endp[0] = rxp.endp[0]; // Successful "find" operation - this->regmust = rxp.regmust; // Copy field + this->startp[0] = rxp.startp[0]; // Copy pointers into last + this->endp[0] = rxp.endp[0]; // Successful "find" operation + this->regmust = rxp.regmust; // Copy field if (rxp.regmust != 0) { char* dum = rxp.program; ind = 0; @@ -67,32 +57,30 @@ RegularExpression::RegularExpression (const RegularExpression& rxp) { } this->regmust = this->program + ind; } - this->regstart = rxp.regstart; // Copy starting index - this->reganch = rxp.reganch; // Copy remaining private data - this->regmlen = rxp.regmlen; // Copy remaining private data + this->regstart = rxp.regstart; // Copy starting index + this->reganch = rxp.reganch; // Copy remaining private data + this->regmlen = rxp.regmlen; // Copy remaining private data } // operator= -- Copies the given regular expression. -RegularExpression& RegularExpression::operator= (const RegularExpression& rxp) +RegularExpression& RegularExpression::operator=(const RegularExpression& rxp) { - if(this == &rxp) - { + if (this == &rxp) { return *this; - } - if ( !rxp.program ) - { + } + if (!rxp.program) { this->program = 0; return *this; - } + } int ind; - this->progsize = rxp.progsize; // Copy regular expression size - delete [] this->program; - this->program = new char[this->progsize]; // Allocate storage - for(ind=this->progsize; ind-- != 0;) // Copy regular expresion + this->progsize = rxp.progsize; // Copy regular expression size + delete[] this->program; + this->program = new char[this->progsize]; // Allocate storage + for (ind = this->progsize; ind-- != 0;) // Copy regular expresion this->program[ind] = rxp.program[ind]; - this->startp[0] = rxp.startp[0]; // Copy pointers into last - this->endp[0] = rxp.endp[0]; // Successful "find" operation - this->regmust = rxp.regmust; // Copy field + this->startp[0] = rxp.startp[0]; // Copy pointers into last + this->endp[0] = rxp.endp[0]; // Successful "find" operation + this->regmust = rxp.regmust; // Copy field if (rxp.regmust != 0) { char* dum = rxp.program; ind = 0; @@ -102,37 +90,38 @@ RegularExpression& RegularExpression::operator= (const RegularExpression& rxp) } this->regmust = this->program + ind; } - this->regstart = rxp.regstart; // Copy starting index - this->reganch = rxp.reganch; // Copy remaining private data - this->regmlen = rxp.regmlen; // Copy remaining private data + this->regstart = rxp.regstart; // Copy starting index + this->reganch = rxp.reganch; // Copy remaining private data + this->regmlen = rxp.regmlen; // Copy remaining private data return *this; } // operator== -- Returns true if two regular expressions have the same // compiled program for pattern matching. -bool RegularExpression::operator== (const RegularExpression& rxp) const { - if (this != &rxp) { // Same address? - int ind = this->progsize; // Get regular expression size - if (ind != rxp.progsize) // If different size regexp - return false; // Return failure - while(ind-- != 0) // Else while still characters - if(this->program[ind] != rxp.program[ind]) // If regexp are different - return false; // Return failure +bool RegularExpression::operator==(const RegularExpression& rxp) const +{ + if (this != &rxp) { // Same address? + int ind = this->progsize; // Get regular expression size + if (ind != rxp.progsize) // If different size regexp + return false; // Return failure + while (ind-- != 0) // Else while still characters + if (this->program[ind] != rxp.program[ind]) // If regexp are different + return false; // Return failure } - return true; // Else same, return success + return true; // Else same, return success } - // deep_equal -- Returns true if have the same compiled regular expressions // and the same start and end pointers. -bool RegularExpression::deep_equal (const RegularExpression& rxp) const { +bool RegularExpression::deep_equal(const RegularExpression& rxp) const +{ int ind = this->progsize; // Get regular expression size if (ind != rxp.progsize) // If different size regexp return false; // Return failure - while(ind-- != 0) // Else while still characters - if(this->program[ind] != rxp.program[ind]) // If regexp are different + while (ind-- != 0) // Else while still characters + if (this->program[ind] != rxp.program[ind]) // If regexp are different return false; // Return failure return (this->startp[0] == rxp.startp[0] && // Else if same start/end ptrs, this->endp[0] == rxp.endp[0]); // Return true @@ -204,26 +193,26 @@ bool RegularExpression::deep_equal (const RegularExpression& rxp) const { */ // definition number opnd? meaning -#define END 0 // no End of program. -#define BOL 1 // no Match "" at beginning of line. -#define EOL 2 // no Match "" at end of line. -#define ANY 3 // no Match any one character. -#define ANYOF 4 // str Match any character in this string. -#define ANYBUT 5 // str Match any character not in this - // string. -#define BRANCH 6 // node Match this alternative, or the - // next... -#define BACK 7 // no Match "", "next" ptr points backward. -#define EXACTLY 8 // str Match this string. -#define NOTHING 9 // no Match empty string. -#define STAR 10 // node Match this (simple) thing 0 or more - // times. -#define PLUS 11 // node Match this (simple) thing 1 or more - // times. -#define OPEN 20 // no Mark this point in input as start of - // #n. +#define END 0 // no End of program. +#define BOL 1 // no Match "" at beginning of line. +#define EOL 2 // no Match "" at end of line. +#define ANY 3 // no Match any one character. +#define ANYOF 4 // str Match any character in this string. +#define ANYBUT 5 // str Match any character not in this + // string. +#define BRANCH 6 // node Match this alternative, or the + // next... +#define BACK 7 // no Match "", "next" ptr points backward. +#define EXACTLY 8 // str Match this string. +#define NOTHING 9 // no Match empty string. +#define STAR 10 // node Match this (simple) thing 0 or more + // times. +#define PLUS 11 // node Match this (simple) thing 1 or more + // times. +#define OPEN 20 // no Mark this point in input as start of + // #n. // OPEN+1 is number 1, etc. -#define CLOSE 30 // no Analogous to OPEN. +#define CLOSE 30 // no Analogous to OPEN. /* * Opcode notes: @@ -258,32 +247,32 @@ bool RegularExpression::deep_equal (const RegularExpression& rxp) const { * but allows patterns to get big without disasters. */ -#define OP(p) (*(p)) -#define NEXT(p) (((*((p)+1)&0377)<<8) + (*((p)+2)&0377)) -#define OPERAND(p) ((p) + 3) +#define OP(p) (*(p)) +#define NEXT(p) (((*((p) + 1) & 0377) << 8) + (*((p) + 2) & 0377)) +#define OPERAND(p) ((p) + 3) const unsigned char MAGIC = 0234; /* * Utility definitions. */ -#define UCHARAT(p) (reinterpret_cast<const unsigned char*>(p))[0] - - -#define FAIL(m) { regerror(m); return(0); } -#define ISMULT(c) ((c) == '*' || (c) == '+' || (c) == '?') -#define META "^$.[()|?+*\\" +#define UCHARAT(p) (reinterpret_cast<const unsigned char*>(p))[0] +#define FAIL(m) \ + { \ + regerror(m); \ + return (0); \ + } +#define ISMULT(c) ((c) == '*' || (c) == '+' || (c) == '?') +#define META "^$.[()|?+*\\" /* * Flags to be passed up and down. */ -#define HASWIDTH 01 // Known never to match null string. -#define SIMPLE 02 // Simple enough to be STAR/PLUS operand. -#define SPSTART 04 // Starts with * or +. -#define WORST 0 // Worst case. - - +#define HASWIDTH 01 // Known never to match null string. +#define SIMPLE 02 // Simple enough to be STAR/PLUS operand. +#define SPSTART 04 // Starts with * or +. +#define WORST 0 // Worst case. ///////////////////////////////////////////////////////////////////////// // @@ -291,15 +280,14 @@ const unsigned char MAGIC = 0234; // ///////////////////////////////////////////////////////////////////////// - /* * Global work variables for compile(). */ -static const char* regparse; // Input-scan pointer. -static int regnpar; // () count. -static char regdummy; -static char* regcode; // Code-emit pointer; ®dummy = don't. -static long regsize; // Code size. +static const char* regparse; // Input-scan pointer. +static int regnpar; // () count. +static char regdummy; +static char* regcode; // Code-emit pointer; ®dummy = don't. +static long regsize; // Code size. /* * Forward declarations for compile()'s friends. @@ -307,24 +295,22 @@ static long regsize; // Code size. // #ifndef static // #define static static // #endif -static char* reg (int, int*); -static char* regbranch (int*); -static char* regpiece (int*); -static char* regatom (int*); -static char* regnode (char); -static const char* regnext (const char*); -static char* regnext (char*); -static void regc (char); -static void reginsert (char, char*); -static void regtail (char*, const char*); -static void regoptail (char*, const char*); +static char* reg(int, int*); +static char* regbranch(int*); +static char* regpiece(int*); +static char* regatom(int*); +static char* regnode(char); +static const char* regnext(const char*); +static char* regnext(char*); +static void regc(char); +static void reginsert(char, char*); +static void regtail(char*, const char*); +static void regoptail(char*, const char*); #ifdef STRCSPN -static int strcspn (); +static int strcspn(); #endif - - /* * We can't allocate space until we know how big the compiled form will be, * but we can't compile it (and thus know how big it is) until we've got a @@ -339,101 +325,100 @@ static int strcspn (); * of the structure of the compiled regexp. */ - // compile -- compile a regular expression into internal code // for later pattern matching. -bool RegularExpression::compile (const char* exp) { - const char* scan; - const char* longest; - size_t len; - int flags; +bool RegularExpression::compile(const char* exp) +{ + const char* scan; + const char* longest; + size_t len; + int flags; + + if (exp == 0) { + // RAISE Error, SYM(RegularExpression), SYM(No_Expr), + printf("RegularExpression::compile(): No expression supplied.\n"); + return false; + } - if (exp == 0) { - //RAISE Error, SYM(RegularExpression), SYM(No_Expr), - printf ("RegularExpression::compile(): No expression supplied.\n"); - return false; - } + // First pass: determine size, legality. + regparse = exp; + regnpar = 1; + regsize = 0L; + regcode = ®dummy; + regc(static_cast<char>(MAGIC)); + if (!reg(0, &flags)) { + printf("RegularExpression::compile(): Error in compile.\n"); + return false; + } + this->startp[0] = this->endp[0] = this->searchstring = 0; - // First pass: determine size, legality. - regparse = exp; - regnpar = 1; - regsize = 0L; - regcode = ®dummy; - regc(static_cast<char>(MAGIC)); - if(!reg(0, &flags)) - { - printf ("RegularExpression::compile(): Error in compile.\n"); - return false; - } - this->startp[0] = this->endp[0] = this->searchstring = 0; + // Small enough for pointer-storage convention? + if (regsize >= 32767L) { // Probably could be 65535L. + // RAISE Error, SYM(RegularExpression), SYM(Expr_Too_Big), + printf("RegularExpression::compile(): Expression too big.\n"); + return false; + } - // Small enough for pointer-storage convention? - if (regsize >= 32767L) { // Probably could be 65535L. - //RAISE Error, SYM(RegularExpression), SYM(Expr_Too_Big), - printf ("RegularExpression::compile(): Expression too big.\n"); - return false; - } + // Allocate space. + //#ifndef _WIN32 + if (this->program != 0) + delete[] this->program; + //#endif + this->program = new char[regsize]; + this->progsize = static_cast<int>(regsize); + + if (this->program == 0) { + // RAISE Error, SYM(RegularExpression), SYM(Out_Of_Memory), + printf("RegularExpression::compile(): Out of memory.\n"); + return false; + } - // Allocate space. -//#ifndef _WIN32 - if (this->program != 0) delete [] this->program; -//#endif - this->program = new char[regsize]; - this->progsize = static_cast<int>(regsize); - - if (this->program == 0) { - //RAISE Error, SYM(RegularExpression), SYM(Out_Of_Memory), - printf ("RegularExpression::compile(): Out of memory.\n"); - return false; - } + // Second pass: emit code. + regparse = exp; + regnpar = 1; + regcode = this->program; + regc(static_cast<char>(MAGIC)); + reg(0, &flags); + + // Dig out information for optimizations. + this->regstart = '\0'; // Worst-case defaults. + this->reganch = 0; + this->regmust = 0; + this->regmlen = 0; + scan = this->program + 1; // First BRANCH. + if (OP(regnext(scan)) == END) { // Only one top-level choice. + scan = OPERAND(scan); + + // Starting-point info. + if (OP(scan) == EXACTLY) + this->regstart = *OPERAND(scan); + else if (OP(scan) == BOL) + this->reganch++; - // Second pass: emit code. - regparse = exp; - regnpar = 1; - regcode = this->program; - regc(static_cast<char>(MAGIC)); - reg(0, &flags); - - // Dig out information for optimizations. - this->regstart = '\0'; // Worst-case defaults. - this->reganch = 0; - this->regmust = 0; - this->regmlen = 0; - scan = this->program + 1; // First BRANCH. - if (OP(regnext(scan)) == END) { // Only one top-level choice. - scan = OPERAND(scan); - - // Starting-point info. - if (OP(scan) == EXACTLY) - this->regstart = *OPERAND(scan); - else if (OP(scan) == BOL) - this->reganch++; - - // - // If there's something expensive in the r.e., find the longest - // literal string that must appear and make it the regmust. Resolve - // ties in favor of later strings, since the regstart check works - // with the beginning of the r.e. and avoiding duplication - // strengthens checking. Not a strong reason, but sufficient in the - // absence of others. - // - if (flags & SPSTART) { - longest = 0; - len = 0; - for (; scan != 0; scan = regnext(scan)) - if (OP(scan) == EXACTLY && strlen(OPERAND(scan)) >= len) { - longest = OPERAND(scan); - len = strlen(OPERAND(scan)); - } - this->regmust = longest; - this->regmlen = len; + // + // If there's something expensive in the r.e., find the longest + // literal string that must appear and make it the regmust. Resolve + // ties in favor of later strings, since the regstart check works + // with the beginning of the r.e. and avoiding duplication + // strengthens checking. Not a strong reason, but sufficient in the + // absence of others. + // + if (flags & SPSTART) { + longest = 0; + len = 0; + for (; scan != 0; scan = regnext(scan)) + if (OP(scan) == EXACTLY && strlen(OPERAND(scan)) >= len) { + longest = OPERAND(scan); + len = strlen(OPERAND(scan)); } + this->regmust = longest; + this->regmlen = len; } - return true; + } + return true; } - /* - reg - regular expression, i.e. main body or parenthesized thing * @@ -443,115 +428,112 @@ bool RegularExpression::compile (const char* exp) { * is a trifle forced, but the need to tie the tails of the branches to what * follows makes it hard to avoid. */ -static char* reg (int paren, int *flagp) { - char* ret; - char* br; - char* ender; - int parno =0; - int flags; - - *flagp = HASWIDTH; // Tentatively. - - // Make an OPEN node, if parenthesized. - if (paren) { - if (regnpar >= RegularExpression::NSUBEXP) { - //RAISE Error, SYM(RegularExpression), SYM(Too_Many_Parens), - printf ("RegularExpression::compile(): Too many parentheses.\n"); - return 0; - } - parno = regnpar; - regnpar++; - ret = regnode(static_cast<char>(OPEN + parno)); +static char* reg(int paren, int* flagp) +{ + char* ret; + char* br; + char* ender; + int parno = 0; + int flags; + + *flagp = HASWIDTH; // Tentatively. + + // Make an OPEN node, if parenthesized. + if (paren) { + if (regnpar >= RegularExpression::NSUBEXP) { + // RAISE Error, SYM(RegularExpression), SYM(Too_Many_Parens), + printf("RegularExpression::compile(): Too many parentheses.\n"); + return 0; } - else - ret = 0; - - // Pick up the branches, linking them together. + parno = regnpar; + regnpar++; + ret = regnode(static_cast<char>(OPEN + parno)); + } else + ret = 0; + + // Pick up the branches, linking them together. + br = regbranch(&flags); + if (br == 0) + return (0); + if (ret != 0) + regtail(ret, br); // OPEN -> first. + else + ret = br; + if (!(flags & HASWIDTH)) + *flagp &= ~HASWIDTH; + *flagp |= flags & SPSTART; + while (*regparse == '|') { + regparse++; br = regbranch(&flags); if (br == 0) - return (0); - if (ret != 0) - regtail(ret, br); // OPEN -> first. - else - ret = br; + return (0); + regtail(ret, br); // BRANCH -> BRANCH. if (!(flags & HASWIDTH)) - *flagp &= ~HASWIDTH; + *flagp &= ~HASWIDTH; *flagp |= flags & SPSTART; - while (*regparse == '|') { - regparse++; - br = regbranch(&flags); - if (br == 0) - return (0); - regtail(ret, br); // BRANCH -> BRANCH. - if (!(flags & HASWIDTH)) - *flagp &= ~HASWIDTH; - *flagp |= flags & SPSTART; - } - - // Make a closing node, and hook it on the end. - ender = regnode(static_cast<char>((paren) ? CLOSE + parno : END)); - regtail(ret, ender); - - // Hook the tails of the branches to the closing node. - for (br = ret; br != 0; br = regnext(br)) - regoptail(br, ender); + } - // Check for proper termination. - if (paren && *regparse++ != ')') { - //RAISE Error, SYM(RegularExpression), SYM(Unmatched_Parens), - printf ("RegularExpression::compile(): Unmatched parentheses.\n"); - return 0; - } - else if (!paren && *regparse != '\0') { - if (*regparse == ')') { - //RAISE Error, SYM(RegularExpression), SYM(Unmatched_Parens), - printf ("RegularExpression::compile(): Unmatched parentheses.\n"); - return 0; - } - else { - //RAISE Error, SYM(RegularExpression), SYM(Internal_Error), - printf ("RegularExpression::compile(): Internal error.\n"); - return 0; - } - // NOTREACHED + // Make a closing node, and hook it on the end. + ender = regnode(static_cast<char>((paren) ? CLOSE + parno : END)); + regtail(ret, ender); + + // Hook the tails of the branches to the closing node. + for (br = ret; br != 0; br = regnext(br)) + regoptail(br, ender); + + // Check for proper termination. + if (paren && *regparse++ != ')') { + // RAISE Error, SYM(RegularExpression), SYM(Unmatched_Parens), + printf("RegularExpression::compile(): Unmatched parentheses.\n"); + return 0; + } else if (!paren && *regparse != '\0') { + if (*regparse == ')') { + // RAISE Error, SYM(RegularExpression), SYM(Unmatched_Parens), + printf("RegularExpression::compile(): Unmatched parentheses.\n"); + return 0; + } else { + // RAISE Error, SYM(RegularExpression), SYM(Internal_Error), + printf("RegularExpression::compile(): Internal error.\n"); + return 0; } - return (ret); + // NOTREACHED + } + return (ret); } - /* - regbranch - one alternative of an | operator * * Implements the concatenation operator. */ -static char* regbranch (int *flagp) { - char* ret; - char* chain; - char* latest; - int flags; - - *flagp = WORST; // Tentatively. - - ret = regnode(BRANCH); - chain = 0; - while (*regparse != '\0' && *regparse != '|' && *regparse != ')') { - latest = regpiece(&flags); - if (latest == 0) - return (0); - *flagp |= flags & HASWIDTH; - if (chain == 0) // First piece. - *flagp |= flags & SPSTART; - else - regtail(chain, latest); - chain = latest; - } - if (chain == 0) // Loop ran zero times. - regnode(NOTHING); +static char* regbranch(int* flagp) +{ + char* ret; + char* chain; + char* latest; + int flags; + + *flagp = WORST; // Tentatively. + + ret = regnode(BRANCH); + chain = 0; + while (*regparse != '\0' && *regparse != '|' && *regparse != ')') { + latest = regpiece(&flags); + if (latest == 0) + return (0); + *flagp |= flags & HASWIDTH; + if (chain == 0) // First piece. + *flagp |= flags & SPSTART; + else + regtail(chain, latest); + chain = latest; + } + if (chain == 0) // Loop ran zero times. + regnode(NOTHING); - return (ret); + return (ret); } - /* - regpiece - something followed by possible [*+?] * @@ -561,66 +543,64 @@ static char* regbranch (int *flagp) { * It might seem that this node could be dispensed with entirely, but the * endmarker role is not redundant. */ -static char* regpiece (int *flagp) { - char* ret; - char op; - char* next; - int flags; - - ret = regatom(&flags); - if (ret == 0) - return (0); +static char* regpiece(int* flagp) +{ + char* ret; + char op; + char* next; + int flags; - op = *regparse; - if (!ISMULT(op)) { - *flagp = flags; - return (ret); - } + ret = regatom(&flags); + if (ret == 0) + return (0); - if (!(flags & HASWIDTH) && op != '?') { - //RAISE Error, SYM(RegularExpression), SYM(Empty_Operand), - printf ("RegularExpression::compile() : *+ operand could be empty.\n"); - return 0; - } - *flagp = (op != '+') ? (WORST | SPSTART) : (WORST | HASWIDTH); - - if (op == '*' && (flags & SIMPLE)) - reginsert(STAR, ret); - else if (op == '*') { - // Emit x* as (x&|), where & means "self". - reginsert(BRANCH, ret); // Either x - regoptail(ret, regnode(BACK)); // and loop - regoptail(ret, ret); // back - regtail(ret, regnode(BRANCH)); // or - regtail(ret, regnode(NOTHING)); // null. - } - else if (op == '+' && (flags & SIMPLE)) - reginsert(PLUS, ret); - else if (op == '+') { - // Emit x+ as x(&|), where & means "self". - next = regnode(BRANCH); // Either - regtail(ret, next); - regtail(regnode(BACK), ret); // loop back - regtail(next, regnode(BRANCH)); // or - regtail(ret, regnode(NOTHING)); // null. - } - else if (op == '?') { - // Emit x? as (x|) - reginsert(BRANCH, ret); // Either x - regtail(ret, regnode(BRANCH)); // or - next = regnode(NOTHING);// null. - regtail(ret, next); - regoptail(ret, next); - } - regparse++; - if (ISMULT(*regparse)) { - //RAISE Error, SYM(RegularExpression), SYM(Nested_Operand), - printf ("RegularExpression::compile(): Nested *?+.\n"); - return 0; - } + op = *regparse; + if (!ISMULT(op)) { + *flagp = flags; return (ret); -} + } + if (!(flags & HASWIDTH) && op != '?') { + // RAISE Error, SYM(RegularExpression), SYM(Empty_Operand), + printf("RegularExpression::compile() : *+ operand could be empty.\n"); + return 0; + } + *flagp = (op != '+') ? (WORST | SPSTART) : (WORST | HASWIDTH); + + if (op == '*' && (flags & SIMPLE)) + reginsert(STAR, ret); + else if (op == '*') { + // Emit x* as (x&|), where & means "self". + reginsert(BRANCH, ret); // Either x + regoptail(ret, regnode(BACK)); // and loop + regoptail(ret, ret); // back + regtail(ret, regnode(BRANCH)); // or + regtail(ret, regnode(NOTHING)); // null. + } else if (op == '+' && (flags & SIMPLE)) + reginsert(PLUS, ret); + else if (op == '+') { + // Emit x+ as x(&|), where & means "self". + next = regnode(BRANCH); // Either + regtail(ret, next); + regtail(regnode(BACK), ret); // loop back + regtail(next, regnode(BRANCH)); // or + regtail(ret, regnode(NOTHING)); // null. + } else if (op == '?') { + // Emit x? as (x|) + reginsert(BRANCH, ret); // Either x + regtail(ret, regnode(BRANCH)); // or + next = regnode(NOTHING); // null. + regtail(ret, next); + regoptail(ret, next); + } + regparse++; + if (ISMULT(*regparse)) { + // RAISE Error, SYM(RegularExpression), SYM(Nested_Operand), + printf("RegularExpression::compile(): Nested *?+.\n"); + return 0; + } + return (ret); +} /* - regatom - the lowest level @@ -630,349 +610,340 @@ static char* regpiece (int *flagp) { * faster to run. Backslashed characters are exceptions, each becoming a * separate node; the code is simpler that way and it's not worth fixing. */ -static char* regatom (int *flagp) { - char* ret; - int flags; - - *flagp = WORST; // Tentatively. - - switch (*regparse++) { - case '^': - ret = regnode(BOL); - break; - case '$': - ret = regnode(EOL); - break; - case '.': - ret = regnode(ANY); - *flagp |= HASWIDTH | SIMPLE; - break; - case '[':{ - int rxpclass; - int rxpclassend; - - if (*regparse == '^') { // Complement of range. - ret = regnode(ANYBUT); - regparse++; - } - else - ret = regnode(ANYOF); - if (*regparse == ']' || *regparse == '-') - regc(*regparse++); - while (*regparse != '\0' && *regparse != ']') { - if (*regparse == '-') { - regparse++; - if (*regparse == ']' || *regparse == '\0') - regc('-'); - else { - rxpclass = UCHARAT(regparse - 2) + 1; - rxpclassend = UCHARAT(regparse); - if (rxpclass > rxpclassend + 1) { - //RAISE Error, SYM(RegularExpression), SYM(Invalid_Range), - printf ("RegularExpression::compile(): Invalid range in [].\n"); - return 0; - } - for (; rxpclass <= rxpclassend; rxpclass++) - regc(static_cast<char>(rxpclass)); - regparse++; - } - } - else - regc(*regparse++); - } - regc('\0'); - if (*regparse != ']') { - //RAISE Error, SYM(RegularExpression), SYM(Unmatched_Bracket), - printf ("RegularExpression::compile(): Unmatched [].\n"); - return 0; - } - regparse++; - *flagp |= HASWIDTH | SIMPLE; - } - break; - case '(': - ret = reg(1, &flags); - if (ret == 0) - return (0); - *flagp |= flags & (HASWIDTH | SPSTART); - break; - case '\0': - case '|': - case ')': - //RAISE Error, SYM(RegularExpression), SYM(Internal_Error), - printf ("RegularExpression::compile(): Internal error.\n"); // Never here - return 0; - case '?': - case '+': - case '*': - //RAISE Error, SYM(RegularExpression), SYM(No_Operand), - printf ("RegularExpression::compile(): ?+* follows nothing.\n"); - return 0; - case '\\': - if (*regparse == '\0') { - //RAISE Error, SYM(RegularExpression), SYM(Trailing_Backslash), - printf ("RegularExpression::compile(): Trailing backslash.\n"); - return 0; - } - ret = regnode(EXACTLY); - regc(*regparse++); - regc('\0'); - *flagp |= HASWIDTH | SIMPLE; - break; - default:{ - int len; - char ender; - - regparse--; - len = int(strcspn(regparse, META)); - if (len <= 0) { - //RAISE Error, SYM(RegularExpression), SYM(Internal_Error), - printf ("RegularExpression::compile(): Internal error.\n"); - return 0; - } - ender = *(regparse + len); - if (len > 1 && ISMULT(ender)) - len--; // Back off clear of ?+* operand. - *flagp |= HASWIDTH; - if (len == 1) - *flagp |= SIMPLE; - ret = regnode(EXACTLY); - while (len > 0) { - regc(*regparse++); - len--; - } - regc('\0'); +static char* regatom(int* flagp) +{ + char* ret; + int flags; + + *flagp = WORST; // Tentatively. + + switch (*regparse++) { + case '^': + ret = regnode(BOL); + break; + case '$': + ret = regnode(EOL); + break; + case '.': + ret = regnode(ANY); + *flagp |= HASWIDTH | SIMPLE; + break; + case '[': { + int rxpclass; + int rxpclassend; + + if (*regparse == '^') { // Complement of range. + ret = regnode(ANYBUT); + regparse++; + } else + ret = regnode(ANYOF); + if (*regparse == ']' || *regparse == '-') + regc(*regparse++); + while (*regparse != '\0' && *regparse != ']') { + if (*regparse == '-') { + regparse++; + if (*regparse == ']' || *regparse == '\0') + regc('-'); + else { + rxpclass = UCHARAT(regparse - 2) + 1; + rxpclassend = UCHARAT(regparse); + if (rxpclass > rxpclassend + 1) { + // RAISE Error, SYM(RegularExpression), SYM(Invalid_Range), + printf("RegularExpression::compile(): Invalid range in [].\n"); + return 0; } - break; - } - return (ret); + for (; rxpclass <= rxpclassend; rxpclass++) + regc(static_cast<char>(rxpclass)); + regparse++; + } + } else + regc(*regparse++); + } + regc('\0'); + if (*regparse != ']') { + // RAISE Error, SYM(RegularExpression), SYM(Unmatched_Bracket), + printf("RegularExpression::compile(): Unmatched [].\n"); + return 0; + } + regparse++; + *flagp |= HASWIDTH | SIMPLE; + } break; + case '(': + ret = reg(1, &flags); + if (ret == 0) + return (0); + *flagp |= flags & (HASWIDTH | SPSTART); + break; + case '\0': + case '|': + case ')': + // RAISE Error, SYM(RegularExpression), SYM(Internal_Error), + printf("RegularExpression::compile(): Internal error.\n"); // Never here + return 0; + case '?': + case '+': + case '*': + // RAISE Error, SYM(RegularExpression), SYM(No_Operand), + printf("RegularExpression::compile(): ?+* follows nothing.\n"); + return 0; + case '\\': + if (*regparse == '\0') { + // RAISE Error, SYM(RegularExpression), SYM(Trailing_Backslash), + printf("RegularExpression::compile(): Trailing backslash.\n"); + return 0; + } + ret = regnode(EXACTLY); + regc(*regparse++); + regc('\0'); + *flagp |= HASWIDTH | SIMPLE; + break; + default: { + int len; + char ender; + + regparse--; + len = int(strcspn(regparse, META)); + if (len <= 0) { + // RAISE Error, SYM(RegularExpression), SYM(Internal_Error), + printf("RegularExpression::compile(): Internal error.\n"); + return 0; + } + ender = *(regparse + len); + if (len > 1 && ISMULT(ender)) + len--; // Back off clear of ?+* operand. + *flagp |= HASWIDTH; + if (len == 1) + *flagp |= SIMPLE; + ret = regnode(EXACTLY); + while (len > 0) { + regc(*regparse++); + len--; + } + regc('\0'); + } break; + } + return (ret); } - /* - regnode - emit a node Location. */ -static char* regnode (char op) { - char* ret; - char* ptr; - - ret = regcode; - if (ret == ®dummy) { - regsize += 3; - return (ret); - } - - ptr = ret; - *ptr++ = op; - *ptr++ = '\0'; // Null "next" pointer. - *ptr++ = '\0'; - regcode = ptr; +static char* regnode(char op) +{ + char* ret; + char* ptr; + ret = regcode; + if (ret == ®dummy) { + regsize += 3; return (ret); -} + } + ptr = ret; + *ptr++ = op; + *ptr++ = '\0'; // Null "next" pointer. + *ptr++ = '\0'; + regcode = ptr; + + return (ret); +} /* - regc - emit (if appropriate) a byte of code */ -static void regc (char b) { - if (regcode != ®dummy) - *regcode++ = b; - else - regsize++; +static void regc(char b) +{ + if (regcode != ®dummy) + *regcode++ = b; + else + regsize++; } - /* - reginsert - insert an operator in front of already-emitted operand * * Means relocating the operand. */ -static void reginsert (char op, char* opnd) { - char* src; - char* dst; - char* place; - - if (regcode == ®dummy) { - regsize += 3; - return; - } +static void reginsert(char op, char* opnd) +{ + char* src; + char* dst; + char* place; + + if (regcode == ®dummy) { + regsize += 3; + return; + } - src = regcode; - regcode += 3; - dst = regcode; - while (src > opnd) - *--dst = *--src; + src = regcode; + regcode += 3; + dst = regcode; + while (src > opnd) + *--dst = *--src; - place = opnd; // Op node, where operand used to be. - *place++ = op; - *place++ = '\0'; - *place = '\0'; + place = opnd; // Op node, where operand used to be. + *place++ = op; + *place++ = '\0'; + *place = '\0'; } - /* - regtail - set the next-pointer at the end of a node chain */ -static void regtail (char* p, const char* val) { - char* scan; - char* temp; - int offset; - - if (p == ®dummy) - return; - - // Find last node. - scan = p; - for (;;) { - temp = regnext(scan); - if (temp == 0) - break; - scan = temp; - } +static void regtail(char* p, const char* val) +{ + char* scan; + char* temp; + int offset; - if (OP(scan) == BACK) - offset = int(scan - val); - else - offset = int(val - scan); - *(scan + 1) = static_cast<char>((offset >> 8) & 0377); - *(scan + 2) = static_cast<char>(offset & 0377); -} + if (p == ®dummy) + return; + + // Find last node. + scan = p; + for (;;) { + temp = regnext(scan); + if (temp == 0) + break; + scan = temp; + } + if (OP(scan) == BACK) + offset = int(scan - val); + else + offset = int(val - scan); + *(scan + 1) = static_cast<char>((offset >> 8) & 0377); + *(scan + 2) = static_cast<char>(offset & 0377); +} /* - regoptail - regtail on operand of first argument; nop if operandless */ -static void regoptail (char* p, const char* val) { - // "Operandless" and "op != BRANCH" are synonymous in practice. - if (p == 0 || p == ®dummy || OP(p) != BRANCH) - return; - regtail(OPERAND(p), val); +static void regoptail(char* p, const char* val) +{ + // "Operandless" and "op != BRANCH" are synonymous in practice. + if (p == 0 || p == ®dummy || OP(p) != BRANCH) + return; + regtail(OPERAND(p), val); } - - //////////////////////////////////////////////////////////////////////// -// +// // find and friends -// +// //////////////////////////////////////////////////////////////////////// - /* * Global work variables for find(). */ -static const char* reginput; // String-input pointer. -static const char* regbol; // Beginning of input, for ^ check. -static const char* *regstartp; // Pointer to startp array. -static const char* *regendp; // Ditto for endp. +static const char* reginput; // String-input pointer. +static const char* regbol; // Beginning of input, for ^ check. +static const char** regstartp; // Pointer to startp array. +static const char** regendp; // Ditto for endp. /* * Forwards. */ -static int regtry (const char*, const char* *, - const char* *, const char*); -static int regmatch (const char*); -static int regrepeat (const char*); +static int regtry(const char*, const char**, const char**, const char*); +static int regmatch(const char*); +static int regrepeat(const char*); #ifdef DEBUG -int regnarrate = 0; -void regdump (); -static char* regprop (); +int regnarrate = 0; +void regdump(); +static char* regprop(); #endif // find -- Matches the regular expression to the given string. // Returns true if found, and sets start and end indexes accordingly. -bool RegularExpression::find (const char* string) { - const char* s; +bool RegularExpression::find(const char* string) +{ + const char* s; - this->searchstring = string; + this->searchstring = string; - if (!this->program) - { - return false; - } + if (!this->program) { + return false; + } - // Check validity of program. - if (UCHARAT(this->program) != MAGIC) { - //RAISE Error, SYM(RegularExpression), SYM(Internal_Error), - printf ("RegularExpression::find(): Compiled regular expression corrupted.\n"); - return 0; - } + // Check validity of program. + if (UCHARAT(this->program) != MAGIC) { + // RAISE Error, SYM(RegularExpression), SYM(Internal_Error), + printf( + "RegularExpression::find(): Compiled regular expression corrupted.\n"); + return 0; + } - // If there is a "must appear" string, look for it. - if (this->regmust != 0) { - s = string; - while ((s = strchr(s, this->regmust[0])) != 0) { - if (strncmp(s, this->regmust, this->regmlen) == 0) - break; // Found it. - s++; - } - if (s == 0) // Not present. - return (0); + // If there is a "must appear" string, look for it. + if (this->regmust != 0) { + s = string; + while ((s = strchr(s, this->regmust[0])) != 0) { + if (strncmp(s, this->regmust, this->regmlen) == 0) + break; // Found it. + s++; } + if (s == 0) // Not present. + return (0); + } - // Mark beginning of line for ^ . - regbol = string; - - // Simplest case: anchored match need be tried only once. - if (this->reganch) - return (regtry(string, this->startp, this->endp, this->program) != 0); + // Mark beginning of line for ^ . + regbol = string; - // Messy cases: unanchored match. - s = string; - if (this->regstart != '\0') - // We know what char it must start with. - while ((s = strchr(s, this->regstart)) != 0) { - if (regtry(s, this->startp, this->endp, this->program)) - return (1); - s++; + // Simplest case: anchored match need be tried only once. + if (this->reganch) + return (regtry(string, this->startp, this->endp, this->program) != 0); - } - else - // We don't -- general case. - do { - if (regtry(s, this->startp, this->endp, this->program)) - return (1); - } while (*s++ != '\0'); + // Messy cases: unanchored match. + s = string; + if (this->regstart != '\0') + // We know what char it must start with. + while ((s = strchr(s, this->regstart)) != 0) { + if (regtry(s, this->startp, this->endp, this->program)) + return (1); + s++; + } + else + // We don't -- general case. + do { + if (regtry(s, this->startp, this->endp, this->program)) + return (1); + } while (*s++ != '\0'); - // Failure. - return (0); + // Failure. + return (0); } - /* - regtry - try match at specific point 0 failure, 1 success */ -static int regtry (const char* string, const char* *start, - const char* *end, const char* prog) { - int i; - const char* *sp1; - const char* *ep; - - reginput = string; - regstartp = start; - regendp = end; - - sp1 = start; - ep = end; - for (i = RegularExpression::NSUBEXP; i > 0; i--) { - *sp1++ = 0; - *ep++ = 0; - } - if (regmatch(prog + 1)) { - start[0] = string; - end[0] = reginput; - return (1); - } - else - return (0); +static int regtry(const char* string, const char** start, const char** end, + const char* prog) +{ + int i; + const char** sp1; + const char** ep; + + reginput = string; + regstartp = start; + regendp = end; + + sp1 = start; + ep = end; + for (i = RegularExpression::NSUBEXP; i > 0; i--) { + *sp1++ = 0; + *ep++ = 0; + } + if (regmatch(prog + 1)) { + start[0] = string; + end[0] = reginput; + return (1); + } else + return (0); } - /* - regmatch - main matching routine * @@ -984,261 +955,260 @@ static int regtry (const char* string, const char* *start, * by recursion. * 0 failure, 1 success */ -static int regmatch (const char* prog) { - const char* scan; // Current node. - const char* next; // Next node. - - scan = prog; - - while (scan != 0) { - - next = regnext(scan); - - switch (OP(scan)) { - case BOL: - if (reginput != regbol) - return (0); - break; - case EOL: - if (*reginput != '\0') - return (0); - break; - case ANY: - if (*reginput == '\0') - return (0); - reginput++; - break; - case EXACTLY:{ - size_t len; - const char* opnd; - - opnd = OPERAND(scan); - // Inline the first character, for speed. - if (*opnd != *reginput) - return (0); - len = strlen(opnd); - if (len > 1 && strncmp(opnd, reginput, len) != 0) - return (0); - reginput += len; - } - break; - case ANYOF: - if (*reginput == '\0' || strchr(OPERAND(scan), *reginput) == 0) - return (0); - reginput++; - break; - case ANYBUT: - if (*reginput == '\0' || strchr(OPERAND(scan), *reginput) != 0) - return (0); - reginput++; - break; - case NOTHING: - break; - case BACK: - break; - case OPEN + 1: - case OPEN + 2: - case OPEN + 3: - case OPEN + 4: - case OPEN + 5: - case OPEN + 6: - case OPEN + 7: - case OPEN + 8: - case OPEN + 9:{ - int no; - const char* save; - - no = OP(scan) - OPEN; - save = reginput; - - if (regmatch(next)) { - - // - // Don't set startp if some later invocation of the - // same parentheses already has. - // - if (regstartp[no] == 0) - regstartp[no] = save; - return (1); - } - else - return (0); - } -// break; - case CLOSE + 1: - case CLOSE + 2: - case CLOSE + 3: - case CLOSE + 4: - case CLOSE + 5: - case CLOSE + 6: - case CLOSE + 7: - case CLOSE + 8: - case CLOSE + 9:{ - int no; - const char* save; - - no = OP(scan) - CLOSE; - save = reginput; - - if (regmatch(next)) { - - // - // Don't set endp if some later invocation of the - // same parentheses already has. - // - if (regendp[no] == 0) - regendp[no] = save; - return (1); - } - else - return (0); - } -// break; - case BRANCH:{ - - const char* save; - - if (OP(next) != BRANCH) // No choice. - next = OPERAND(scan); // Avoid recursion. - else { - do { - save = reginput; - if (regmatch(OPERAND(scan))) - return (1); - reginput = save; - scan = regnext(scan); - } while (scan != 0 && OP(scan) == BRANCH); - return (0); - // NOTREACHED - } - } - break; - case STAR: - case PLUS:{ - char nextch; - int no; - const char* save; - int min_no; - - // - // Lookahead to avoid useless match attempts when we know - // what character comes next. - // - nextch = '\0'; - if (OP(next) == EXACTLY) - nextch = *OPERAND(next); - min_no = (OP(scan) == STAR) ? 0 : 1; - save = reginput; - no = regrepeat(OPERAND(scan)); - while (no >= min_no) { - // If it could work, try it. - if (nextch == '\0' || *reginput == nextch) - if (regmatch(next)) - return (1); - // Couldn't or didn't -- back up. - no--; - reginput = save + no; - } - return (0); - } -// break; - case END: - return (1); // Success! - - default: - //RAISE Error, SYM(RegularExpression), SYM(Internal_Error), - printf ("RegularExpression::find(): Internal error -- memory corrupted.\n"); - return 0; +static int regmatch(const char* prog) +{ + const char* scan; // Current node. + const char* next; // Next node. + + scan = prog; + + while (scan != 0) { + + next = regnext(scan); + + switch (OP(scan)) { + case BOL: + if (reginput != regbol) + return (0); + break; + case EOL: + if (*reginput != '\0') + return (0); + break; + case ANY: + if (*reginput == '\0') + return (0); + reginput++; + break; + case EXACTLY: { + size_t len; + const char* opnd; + + opnd = OPERAND(scan); + // Inline the first character, for speed. + if (*opnd != *reginput) + return (0); + len = strlen(opnd); + if (len > 1 && strncmp(opnd, reginput, len) != 0) + return (0); + reginput += len; + } break; + case ANYOF: + if (*reginput == '\0' || strchr(OPERAND(scan), *reginput) == 0) + return (0); + reginput++; + break; + case ANYBUT: + if (*reginput == '\0' || strchr(OPERAND(scan), *reginput) != 0) + return (0); + reginput++; + break; + case NOTHING: + break; + case BACK: + break; + case OPEN + 1: + case OPEN + 2: + case OPEN + 3: + case OPEN + 4: + case OPEN + 5: + case OPEN + 6: + case OPEN + 7: + case OPEN + 8: + case OPEN + 9: { + int no; + const char* save; + + no = OP(scan) - OPEN; + save = reginput; + + if (regmatch(next)) { + + // + // Don't set startp if some later invocation of the + // same parentheses already has. + // + if (regstartp[no] == 0) + regstartp[no] = save; + return (1); + } else + return (0); + } + // break; + case CLOSE + 1: + case CLOSE + 2: + case CLOSE + 3: + case CLOSE + 4: + case CLOSE + 5: + case CLOSE + 6: + case CLOSE + 7: + case CLOSE + 8: + case CLOSE + 9: { + int no; + const char* save; + + no = OP(scan) - CLOSE; + save = reginput; + + if (regmatch(next)) { + + // + // Don't set endp if some later invocation of the + // same parentheses already has. + // + if (regendp[no] == 0) + regendp[no] = save; + return (1); + } else + return (0); + } + // break; + case BRANCH: { + + const char* save; + + if (OP(next) != BRANCH) // No choice. + next = OPERAND(scan); // Avoid recursion. + else { + do { + save = reginput; + if (regmatch(OPERAND(scan))) + return (1); + reginput = save; + scan = regnext(scan); + } while (scan != 0 && OP(scan) == BRANCH); + return (0); + // NOTREACHED + } + } break; + case STAR: + case PLUS: { + char nextch; + int no; + const char* save; + int min_no; + + // + // Lookahead to avoid useless match attempts when we know + // what character comes next. + // + nextch = '\0'; + if (OP(next) == EXACTLY) + nextch = *OPERAND(next); + min_no = (OP(scan) == STAR) ? 0 : 1; + save = reginput; + no = regrepeat(OPERAND(scan)); + while (no >= min_no) { + // If it could work, try it. + if (nextch == '\0' || *reginput == nextch) + if (regmatch(next)) + return (1); + // Couldn't or didn't -- back up. + no--; + reginput = save + no; } - scan = next; + return (0); + } + // break; + case END: + return (1); // Success! + + default: + // RAISE Error, SYM(RegularExpression), SYM(Internal_Error), + printf( + "RegularExpression::find(): Internal error -- memory corrupted.\n"); + return 0; } + scan = next; + } - // - // We get here only if there's trouble -- normally "case END" is the - // terminating point. - // - //RAISE Error, SYM(RegularExpression), SYM(Internal_Error), - printf ("RegularExpression::find(): Internal error -- corrupted pointers.\n"); - return (0); + // + // We get here only if there's trouble -- normally "case END" is the + // terminating point. + // + // RAISE Error, SYM(RegularExpression), SYM(Internal_Error), + printf("RegularExpression::find(): Internal error -- corrupted pointers.\n"); + return (0); } - /* - regrepeat - repeatedly match something simple, report how many */ -static int regrepeat (const char* p) { - int count = 0; - const char* scan; - const char* opnd; - - scan = reginput; - opnd = OPERAND(p); - switch (OP(p)) { - case ANY: - count = int(strlen(scan)); - scan += count; - break; - case EXACTLY: - while (*opnd == *scan) { - count++; - scan++; - } - break; - case ANYOF: - while (*scan != '\0' && strchr(opnd, *scan) != 0) { - count++; - scan++; - } - break; - case ANYBUT: - while (*scan != '\0' && strchr(opnd, *scan) == 0) { - count++; - scan++; - } - break; - default: // Oh dear. Called inappropriately. - //RAISE Error, SYM(RegularExpression), SYM(Internal_Error), - printf ("cm RegularExpression::find(): Internal error.\n"); - return 0; - } - reginput = scan; - return (count); +static int regrepeat(const char* p) +{ + int count = 0; + const char* scan; + const char* opnd; + + scan = reginput; + opnd = OPERAND(p); + switch (OP(p)) { + case ANY: + count = int(strlen(scan)); + scan += count; + break; + case EXACTLY: + while (*opnd == *scan) { + count++; + scan++; + } + break; + case ANYOF: + while (*scan != '\0' && strchr(opnd, *scan) != 0) { + count++; + scan++; + } + break; + case ANYBUT: + while (*scan != '\0' && strchr(opnd, *scan) == 0) { + count++; + scan++; + } + break; + default: // Oh dear. Called inappropriately. + // RAISE Error, SYM(RegularExpression), SYM(Internal_Error), + printf("cm RegularExpression::find(): Internal error.\n"); + return 0; + } + reginput = scan; + return (count); } - /* - regnext - dig the "next" pointer out of a node */ -static const char* regnext (const char* p) { - int offset; +static const char* regnext(const char* p) +{ + int offset; - if (p == ®dummy) - return (0); + if (p == ®dummy) + return (0); - offset = NEXT(p); - if (offset == 0) - return (0); + offset = NEXT(p); + if (offset == 0) + return (0); - if (OP(p) == BACK) - return (p - offset); - else - return (p + offset); + if (OP(p) == BACK) + return (p - offset); + else + return (p + offset); } -static char* regnext (char* p) { - int offset; +static char* regnext(char* p) +{ + int offset; - if (p == ®dummy) - return (0); + if (p == ®dummy) + return (0); - offset = NEXT(p); - if (offset == 0) - return (0); + offset = NEXT(p); + if (offset == 0) + return (0); - if (OP(p) == BACK) - return (p - offset); - else - return (p + offset); + if (OP(p) == BACK) + return (p - offset); + else + return (p + offset); } } // namespace KWSYS_NAMESPACE |