diff options
Diffstat (limited to 'Source/kwsys/RegularExpression.cxx')
-rw-r--r-- | Source/kwsys/RegularExpression.cxx | 189 |
1 files changed, 97 insertions, 92 deletions
diff --git a/Source/kwsys/RegularExpression.cxx b/Source/kwsys/RegularExpression.cxx index 26e84e0..fa3551c 100644 --- a/Source/kwsys/RegularExpression.cxx +++ b/Source/kwsys/RegularExpression.cxx @@ -45,9 +45,9 @@ RegularExpression::RegularExpression(const RegularExpression& rxp) this->program = new char[this->progsize]; // Allocate storage for (ind = this->progsize; ind-- != 0;) // Copy regular expresion this->program[ind] = rxp.program[ind]; - this->startp[0] = rxp.startp[0]; // Copy pointers into last - this->endp[0] = rxp.endp[0]; // Successful "find" operation - this->regmust = rxp.regmust; // Copy field + // Copy pointers into last successful "find" operation + this->regmatch = rxp.regmatch; + this->regmust = rxp.regmust; // Copy field if (rxp.regmust != 0) { char* dum = rxp.program; ind = 0; @@ -78,9 +78,9 @@ RegularExpression& RegularExpression::operator=(const RegularExpression& rxp) this->program = new char[this->progsize]; // Allocate storage for (ind = this->progsize; ind-- != 0;) // Copy regular expresion this->program[ind] = rxp.program[ind]; - this->startp[0] = rxp.startp[0]; // Copy pointers into last - this->endp[0] = rxp.endp[0]; // Successful "find" operation - this->regmust = rxp.regmust; // Copy field + // Copy pointers into last successful "find" operation + this->regmatch = rxp.regmatch; + this->regmust = rxp.regmust; // Copy field if (rxp.regmust != 0) { char* dum = rxp.program; ind = 0; @@ -123,8 +123,9 @@ bool RegularExpression::deep_equal(const RegularExpression& rxp) const while (ind-- != 0) // Else while still characters if (this->program[ind] != rxp.program[ind]) // If regexp are different return false; // Return failure - return (this->startp[0] == rxp.startp[0] && // Else if same start/end ptrs, - this->endp[0] == rxp.endp[0]); // Return true + // Else if same start/end ptrs, return true + return (this->regmatch.start() == rxp.regmatch.start() && + this->regmatch.end() == rxp.regmatch.end()); } // The remaining code in this file is derived from the regular expression code @@ -276,31 +277,35 @@ const unsigned char MAGIC = 0234; ///////////////////////////////////////////////////////////////////////// /* - * Global work variables for compile(). + * Read only utility variables. */ -static const char* regparse; // Input-scan pointer. -static int regnpar; // () count. static char regdummy; -static char* regcode; // Code-emit pointer; ®dummy = don't. -static long regsize; // Code size. +static char* const regdummyptr = ®dummy; /* - * Forward declarations for compile()'s friends. + * Utility class for RegularExpression::compile(). */ -// #ifndef static -// #define static static -// #endif -static char* reg(int, int*); -static char* regbranch(int*); -static char* regpiece(int*); -static char* regatom(int*); -static char* regnode(char); +class RegExpCompile +{ +public: + const char* regparse; // Input-scan pointer. + int regnpar; // () count. + char* regcode; // Code-emit pointer; regdummyptr = don't. + long regsize; // Code size. + + char* reg(int, int*); + char* regbranch(int*); + char* regpiece(int*); + char* regatom(int*); + char* regnode(char); + void regc(char); + void reginsert(char, char*); + static void regtail(char*, const char*); + static void regoptail(char*, const char*); +}; + static const char* regnext(const char*); static char* regnext(char*); -static void regc(char); -static void reginsert(char, char*); -static void regtail(char*, const char*); -static void regoptail(char*, const char*); #ifdef STRCSPN static int strcspn(); @@ -337,19 +342,20 @@ bool RegularExpression::compile(const char* exp) } // First pass: determine size, legality. - regparse = exp; - regnpar = 1; - regsize = 0L; - regcode = ®dummy; - regc(static_cast<char>(MAGIC)); - if (!reg(0, &flags)) { + RegExpCompile comp; + comp.regparse = exp; + comp.regnpar = 1; + comp.regsize = 0L; + comp.regcode = regdummyptr; + comp.regc(static_cast<char>(MAGIC)); + if (!comp.reg(0, &flags)) { printf("RegularExpression::compile(): Error in compile.\n"); return false; } - this->startp[0] = this->endp[0] = this->searchstring = 0; + this->regmatch.clear(); // Small enough for pointer-storage convention? - if (regsize >= 32767L) { // Probably could be 65535L. + if (comp.regsize >= 32767L) { // Probably could be 65535L. // RAISE Error, SYM(RegularExpression), SYM(Expr_Too_Big), printf("RegularExpression::compile(): Expression too big.\n"); return false; @@ -360,8 +366,8 @@ bool RegularExpression::compile(const char* exp) if (this->program != 0) delete[] this->program; //#endif - this->program = new char[regsize]; - this->progsize = static_cast<int>(regsize); + this->program = new char[comp.regsize]; + this->progsize = static_cast<int>(comp.regsize); if (this->program == 0) { // RAISE Error, SYM(RegularExpression), SYM(Out_Of_Memory), @@ -370,11 +376,11 @@ bool RegularExpression::compile(const char* exp) } // Second pass: emit code. - regparse = exp; - regnpar = 1; - regcode = this->program; - regc(static_cast<char>(MAGIC)); - reg(0, &flags); + comp.regparse = exp; + comp.regnpar = 1; + comp.regcode = this->program; + comp.regc(static_cast<char>(MAGIC)); + comp.reg(0, &flags); // Dig out information for optimizations. this->regstart = '\0'; // Worst-case defaults. @@ -423,7 +429,7 @@ bool RegularExpression::compile(const char* exp) * is a trifle forced, but the need to tie the tails of the branches to what * follows makes it hard to avoid. */ -static char* reg(int paren, int* flagp) +char* RegExpCompile::reg(int paren, int* flagp) { char* ret; char* br; @@ -435,7 +441,7 @@ static char* reg(int paren, int* flagp) // Make an OPEN node, if parenthesized. if (paren) { - if (regnpar >= RegularExpression::NSUBEXP) { + if (regnpar >= RegularExpressionMatch::NSUBEXP) { // RAISE Error, SYM(RegularExpression), SYM(Too_Many_Parens), printf("RegularExpression::compile(): Too many parentheses.\n"); return 0; @@ -501,7 +507,7 @@ static char* reg(int paren, int* flagp) * * Implements the concatenation operator. */ -static char* regbranch(int* flagp) +char* RegExpCompile::regbranch(int* flagp) { char* ret; char* chain; @@ -538,7 +544,7 @@ static char* regbranch(int* flagp) * It might seem that this node could be dispensed with entirely, but the * endmarker role is not redundant. */ -static char* regpiece(int* flagp) +char* RegExpCompile::regpiece(int* flagp) { char* ret; char op; @@ -605,7 +611,7 @@ static char* regpiece(int* flagp) * faster to run. Backslashed characters are exceptions, each becoming a * separate node; the code is simpler that way and it's not worth fixing. */ -static char* regatom(int* flagp) +char* RegExpCompile::regatom(int* flagp) { char* ret; int flags; @@ -724,13 +730,13 @@ static char* regatom(int* flagp) - regnode - emit a node Location. */ -static char* regnode(char op) +char* RegExpCompile::regnode(char op) { char* ret; char* ptr; ret = regcode; - if (ret == ®dummy) { + if (ret == regdummyptr) { regsize += 3; return (ret); } @@ -747,9 +753,9 @@ static char* regnode(char op) /* - regc - emit (if appropriate) a byte of code */ -static void regc(char b) +void RegExpCompile::regc(char b) { - if (regcode != ®dummy) + if (regcode != regdummyptr) *regcode++ = b; else regsize++; @@ -760,13 +766,13 @@ static void regc(char b) * * Means relocating the operand. */ -static void reginsert(char op, char* opnd) +void RegExpCompile::reginsert(char op, char* opnd) { char* src; char* dst; char* place; - if (regcode == ®dummy) { + if (regcode == regdummyptr) { regsize += 3; return; } @@ -786,13 +792,13 @@ static void reginsert(char op, char* opnd) /* - regtail - set the next-pointer at the end of a node chain */ -static void regtail(char* p, const char* val) +void RegExpCompile::regtail(char* p, const char* val) { char* scan; char* temp; int offset; - if (p == ®dummy) + if (p == regdummyptr) return; // Find last node. @@ -815,10 +821,10 @@ static void regtail(char* p, const char* val) /* - regoptail - regtail on operand of first argument; nop if operandless */ -static void regoptail(char* p, const char* val) +void RegExpCompile::regoptail(char* p, const char* val) { // "Operandless" and "op != BRANCH" are synonymous in practice. - if (p == 0 || p == ®dummy || OP(p) != BRANCH) + if (p == 0 || p == regdummyptr || OP(p) != BRANCH) return; regtail(OPERAND(p), val); } @@ -830,34 +836,30 @@ static void regoptail(char* p, const char* val) //////////////////////////////////////////////////////////////////////// /* - * Global work variables for find(). + * Utility class for RegularExpression::find(). */ -static const char* reginput; // String-input pointer. -static const char* regbol; // Beginning of input, for ^ check. -static const char** regstartp; // Pointer to startp array. -static const char** regendp; // Ditto for endp. +class RegExpFind +{ +public: + const char* reginput; // String-input pointer. + const char* regbol; // Beginning of input, for ^ check. + const char** regstartp; // Pointer to startp array. + const char** regendp; // Ditto for endp. -/* - * Forwards. - */ -static int regtry(const char*, const char**, const char**, const char*); -static int regmatch(const char*); -static int regrepeat(const char*); - -#ifdef DEBUG -int regnarrate = 0; -void regdump(); -static char* regprop(); -#endif + int regtry(const char*, const char**, const char**, const char*); + int regmatch(const char*); + int regrepeat(const char*); +}; // find -- Matches the regular expression to the given string. // Returns true if found, and sets start and end indexes accordingly. - -bool RegularExpression::find(const char* string) +bool RegularExpression::find(char const* string, + RegularExpressionMatch& rmatch) const { const char* s; - this->searchstring = string; + rmatch.clear(); + rmatch.searchstring = string; if (!this->program) { return false; @@ -868,7 +870,7 @@ bool RegularExpression::find(const char* string) // RAISE Error, SYM(RegularExpression), SYM(Internal_Error), printf( "RegularExpression::find(): Compiled regular expression corrupted.\n"); - return 0; + return false; } // If there is a "must appear" string, look for it. @@ -880,42 +882,45 @@ bool RegularExpression::find(const char* string) s++; } if (s == 0) // Not present. - return (0); + return false; } + RegExpFind regFind; + // Mark beginning of line for ^ . - regbol = string; + regFind.regbol = string; // Simplest case: anchored match need be tried only once. if (this->reganch) - return (regtry(string, this->startp, this->endp, this->program) != 0); + return ( + regFind.regtry(string, rmatch.startp, rmatch.endp, this->program) != 0); // Messy cases: unanchored match. s = string; if (this->regstart != '\0') // We know what char it must start with. while ((s = strchr(s, this->regstart)) != 0) { - if (regtry(s, this->startp, this->endp, this->program)) - return (1); + if (regFind.regtry(s, rmatch.startp, rmatch.endp, this->program)) + return true; s++; } else // We don't -- general case. do { - if (regtry(s, this->startp, this->endp, this->program)) - return (1); + if (regFind.regtry(s, rmatch.startp, rmatch.endp, this->program)) + return true; } while (*s++ != '\0'); // Failure. - return (0); + return false; } /* - regtry - try match at specific point 0 failure, 1 success */ -static int regtry(const char* string, const char** start, const char** end, - const char* prog) +int RegExpFind::regtry(const char* string, const char** start, + const char** end, const char* prog) { int i; const char** sp1; @@ -927,7 +932,7 @@ static int regtry(const char* string, const char** start, const char** end, sp1 = start; ep = end; - for (i = RegularExpression::NSUBEXP; i > 0; i--) { + for (i = RegularExpressionMatch::NSUBEXP; i > 0; i--) { *sp1++ = 0; *ep++ = 0; } @@ -950,7 +955,7 @@ static int regtry(const char* string, const char** start, const char** end, * by recursion. * 0 failure, 1 success */ -static int regmatch(const char* prog) +int RegExpFind::regmatch(const char* prog) { const char* scan; // Current node. const char* next; // Next node. @@ -1129,7 +1134,7 @@ static int regmatch(const char* prog) /* - regrepeat - repeatedly match something simple, report how many */ -static int regrepeat(const char* p) +int RegExpFind::regrepeat(const char* p) { int count = 0; const char* scan; @@ -1176,7 +1181,7 @@ static const char* regnext(const char* p) { int offset; - if (p == ®dummy) + if (p == regdummyptr) return (0); offset = NEXT(p); @@ -1193,7 +1198,7 @@ static char* regnext(char* p) { int offset; - if (p == ®dummy) + if (p == regdummyptr) return (0); offset = NEXT(p); |