summaryrefslogtreecommitdiffstats
path: root/Source/kwsys/RegularExpression.cxx
diff options
context:
space:
mode:
Diffstat (limited to 'Source/kwsys/RegularExpression.cxx')
-rw-r--r--Source/kwsys/RegularExpression.cxx189
1 files changed, 97 insertions, 92 deletions
diff --git a/Source/kwsys/RegularExpression.cxx b/Source/kwsys/RegularExpression.cxx
index 26e84e0..fa3551c 100644
--- a/Source/kwsys/RegularExpression.cxx
+++ b/Source/kwsys/RegularExpression.cxx
@@ -45,9 +45,9 @@ RegularExpression::RegularExpression(const RegularExpression& rxp)
this->program = new char[this->progsize]; // Allocate storage
for (ind = this->progsize; ind-- != 0;) // Copy regular expresion
this->program[ind] = rxp.program[ind];
- this->startp[0] = rxp.startp[0]; // Copy pointers into last
- this->endp[0] = rxp.endp[0]; // Successful "find" operation
- this->regmust = rxp.regmust; // Copy field
+ // Copy pointers into last successful "find" operation
+ this->regmatch = rxp.regmatch;
+ this->regmust = rxp.regmust; // Copy field
if (rxp.regmust != 0) {
char* dum = rxp.program;
ind = 0;
@@ -78,9 +78,9 @@ RegularExpression& RegularExpression::operator=(const RegularExpression& rxp)
this->program = new char[this->progsize]; // Allocate storage
for (ind = this->progsize; ind-- != 0;) // Copy regular expresion
this->program[ind] = rxp.program[ind];
- this->startp[0] = rxp.startp[0]; // Copy pointers into last
- this->endp[0] = rxp.endp[0]; // Successful "find" operation
- this->regmust = rxp.regmust; // Copy field
+ // Copy pointers into last successful "find" operation
+ this->regmatch = rxp.regmatch;
+ this->regmust = rxp.regmust; // Copy field
if (rxp.regmust != 0) {
char* dum = rxp.program;
ind = 0;
@@ -123,8 +123,9 @@ bool RegularExpression::deep_equal(const RegularExpression& rxp) const
while (ind-- != 0) // Else while still characters
if (this->program[ind] != rxp.program[ind]) // If regexp are different
return false; // Return failure
- return (this->startp[0] == rxp.startp[0] && // Else if same start/end ptrs,
- this->endp[0] == rxp.endp[0]); // Return true
+ // Else if same start/end ptrs, return true
+ return (this->regmatch.start() == rxp.regmatch.start() &&
+ this->regmatch.end() == rxp.regmatch.end());
}
// The remaining code in this file is derived from the regular expression code
@@ -276,31 +277,35 @@ const unsigned char MAGIC = 0234;
/////////////////////////////////////////////////////////////////////////
/*
- * Global work variables for compile().
+ * Read only utility variables.
*/
-static const char* regparse; // Input-scan pointer.
-static int regnpar; // () count.
static char regdummy;
-static char* regcode; // Code-emit pointer; &regdummy = don't.
-static long regsize; // Code size.
+static char* const regdummyptr = &regdummy;
/*
- * Forward declarations for compile()'s friends.
+ * Utility class for RegularExpression::compile().
*/
-// #ifndef static
-// #define static static
-// #endif
-static char* reg(int, int*);
-static char* regbranch(int*);
-static char* regpiece(int*);
-static char* regatom(int*);
-static char* regnode(char);
+class RegExpCompile
+{
+public:
+ const char* regparse; // Input-scan pointer.
+ int regnpar; // () count.
+ char* regcode; // Code-emit pointer; regdummyptr = don't.
+ long regsize; // Code size.
+
+ char* reg(int, int*);
+ char* regbranch(int*);
+ char* regpiece(int*);
+ char* regatom(int*);
+ char* regnode(char);
+ void regc(char);
+ void reginsert(char, char*);
+ static void regtail(char*, const char*);
+ static void regoptail(char*, const char*);
+};
+
static const char* regnext(const char*);
static char* regnext(char*);
-static void regc(char);
-static void reginsert(char, char*);
-static void regtail(char*, const char*);
-static void regoptail(char*, const char*);
#ifdef STRCSPN
static int strcspn();
@@ -337,19 +342,20 @@ bool RegularExpression::compile(const char* exp)
}
// First pass: determine size, legality.
- regparse = exp;
- regnpar = 1;
- regsize = 0L;
- regcode = &regdummy;
- regc(static_cast<char>(MAGIC));
- if (!reg(0, &flags)) {
+ RegExpCompile comp;
+ comp.regparse = exp;
+ comp.regnpar = 1;
+ comp.regsize = 0L;
+ comp.regcode = regdummyptr;
+ comp.regc(static_cast<char>(MAGIC));
+ if (!comp.reg(0, &flags)) {
printf("RegularExpression::compile(): Error in compile.\n");
return false;
}
- this->startp[0] = this->endp[0] = this->searchstring = 0;
+ this->regmatch.clear();
// Small enough for pointer-storage convention?
- if (regsize >= 32767L) { // Probably could be 65535L.
+ if (comp.regsize >= 32767L) { // Probably could be 65535L.
// RAISE Error, SYM(RegularExpression), SYM(Expr_Too_Big),
printf("RegularExpression::compile(): Expression too big.\n");
return false;
@@ -360,8 +366,8 @@ bool RegularExpression::compile(const char* exp)
if (this->program != 0)
delete[] this->program;
//#endif
- this->program = new char[regsize];
- this->progsize = static_cast<int>(regsize);
+ this->program = new char[comp.regsize];
+ this->progsize = static_cast<int>(comp.regsize);
if (this->program == 0) {
// RAISE Error, SYM(RegularExpression), SYM(Out_Of_Memory),
@@ -370,11 +376,11 @@ bool RegularExpression::compile(const char* exp)
}
// Second pass: emit code.
- regparse = exp;
- regnpar = 1;
- regcode = this->program;
- regc(static_cast<char>(MAGIC));
- reg(0, &flags);
+ comp.regparse = exp;
+ comp.regnpar = 1;
+ comp.regcode = this->program;
+ comp.regc(static_cast<char>(MAGIC));
+ comp.reg(0, &flags);
// Dig out information for optimizations.
this->regstart = '\0'; // Worst-case defaults.
@@ -423,7 +429,7 @@ bool RegularExpression::compile(const char* exp)
* is a trifle forced, but the need to tie the tails of the branches to what
* follows makes it hard to avoid.
*/
-static char* reg(int paren, int* flagp)
+char* RegExpCompile::reg(int paren, int* flagp)
{
char* ret;
char* br;
@@ -435,7 +441,7 @@ static char* reg(int paren, int* flagp)
// Make an OPEN node, if parenthesized.
if (paren) {
- if (regnpar >= RegularExpression::NSUBEXP) {
+ if (regnpar >= RegularExpressionMatch::NSUBEXP) {
// RAISE Error, SYM(RegularExpression), SYM(Too_Many_Parens),
printf("RegularExpression::compile(): Too many parentheses.\n");
return 0;
@@ -501,7 +507,7 @@ static char* reg(int paren, int* flagp)
*
* Implements the concatenation operator.
*/
-static char* regbranch(int* flagp)
+char* RegExpCompile::regbranch(int* flagp)
{
char* ret;
char* chain;
@@ -538,7 +544,7 @@ static char* regbranch(int* flagp)
* It might seem that this node could be dispensed with entirely, but the
* endmarker role is not redundant.
*/
-static char* regpiece(int* flagp)
+char* RegExpCompile::regpiece(int* flagp)
{
char* ret;
char op;
@@ -605,7 +611,7 @@ static char* regpiece(int* flagp)
* faster to run. Backslashed characters are exceptions, each becoming a
* separate node; the code is simpler that way and it's not worth fixing.
*/
-static char* regatom(int* flagp)
+char* RegExpCompile::regatom(int* flagp)
{
char* ret;
int flags;
@@ -724,13 +730,13 @@ static char* regatom(int* flagp)
- regnode - emit a node
Location.
*/
-static char* regnode(char op)
+char* RegExpCompile::regnode(char op)
{
char* ret;
char* ptr;
ret = regcode;
- if (ret == &regdummy) {
+ if (ret == regdummyptr) {
regsize += 3;
return (ret);
}
@@ -747,9 +753,9 @@ static char* regnode(char op)
/*
- regc - emit (if appropriate) a byte of code
*/
-static void regc(char b)
+void RegExpCompile::regc(char b)
{
- if (regcode != &regdummy)
+ if (regcode != regdummyptr)
*regcode++ = b;
else
regsize++;
@@ -760,13 +766,13 @@ static void regc(char b)
*
* Means relocating the operand.
*/
-static void reginsert(char op, char* opnd)
+void RegExpCompile::reginsert(char op, char* opnd)
{
char* src;
char* dst;
char* place;
- if (regcode == &regdummy) {
+ if (regcode == regdummyptr) {
regsize += 3;
return;
}
@@ -786,13 +792,13 @@ static void reginsert(char op, char* opnd)
/*
- regtail - set the next-pointer at the end of a node chain
*/
-static void regtail(char* p, const char* val)
+void RegExpCompile::regtail(char* p, const char* val)
{
char* scan;
char* temp;
int offset;
- if (p == &regdummy)
+ if (p == regdummyptr)
return;
// Find last node.
@@ -815,10 +821,10 @@ static void regtail(char* p, const char* val)
/*
- regoptail - regtail on operand of first argument; nop if operandless
*/
-static void regoptail(char* p, const char* val)
+void RegExpCompile::regoptail(char* p, const char* val)
{
// "Operandless" and "op != BRANCH" are synonymous in practice.
- if (p == 0 || p == &regdummy || OP(p) != BRANCH)
+ if (p == 0 || p == regdummyptr || OP(p) != BRANCH)
return;
regtail(OPERAND(p), val);
}
@@ -830,34 +836,30 @@ static void regoptail(char* p, const char* val)
////////////////////////////////////////////////////////////////////////
/*
- * Global work variables for find().
+ * Utility class for RegularExpression::find().
*/
-static const char* reginput; // String-input pointer.
-static const char* regbol; // Beginning of input, for ^ check.
-static const char** regstartp; // Pointer to startp array.
-static const char** regendp; // Ditto for endp.
+class RegExpFind
+{
+public:
+ const char* reginput; // String-input pointer.
+ const char* regbol; // Beginning of input, for ^ check.
+ const char** regstartp; // Pointer to startp array.
+ const char** regendp; // Ditto for endp.
-/*
- * Forwards.
- */
-static int regtry(const char*, const char**, const char**, const char*);
-static int regmatch(const char*);
-static int regrepeat(const char*);
-
-#ifdef DEBUG
-int regnarrate = 0;
-void regdump();
-static char* regprop();
-#endif
+ int regtry(const char*, const char**, const char**, const char*);
+ int regmatch(const char*);
+ int regrepeat(const char*);
+};
// find -- Matches the regular expression to the given string.
// Returns true if found, and sets start and end indexes accordingly.
-
-bool RegularExpression::find(const char* string)
+bool RegularExpression::find(char const* string,
+ RegularExpressionMatch& rmatch) const
{
const char* s;
- this->searchstring = string;
+ rmatch.clear();
+ rmatch.searchstring = string;
if (!this->program) {
return false;
@@ -868,7 +870,7 @@ bool RegularExpression::find(const char* string)
// RAISE Error, SYM(RegularExpression), SYM(Internal_Error),
printf(
"RegularExpression::find(): Compiled regular expression corrupted.\n");
- return 0;
+ return false;
}
// If there is a "must appear" string, look for it.
@@ -880,42 +882,45 @@ bool RegularExpression::find(const char* string)
s++;
}
if (s == 0) // Not present.
- return (0);
+ return false;
}
+ RegExpFind regFind;
+
// Mark beginning of line for ^ .
- regbol = string;
+ regFind.regbol = string;
// Simplest case: anchored match need be tried only once.
if (this->reganch)
- return (regtry(string, this->startp, this->endp, this->program) != 0);
+ return (
+ regFind.regtry(string, rmatch.startp, rmatch.endp, this->program) != 0);
// Messy cases: unanchored match.
s = string;
if (this->regstart != '\0')
// We know what char it must start with.
while ((s = strchr(s, this->regstart)) != 0) {
- if (regtry(s, this->startp, this->endp, this->program))
- return (1);
+ if (regFind.regtry(s, rmatch.startp, rmatch.endp, this->program))
+ return true;
s++;
}
else
// We don't -- general case.
do {
- if (regtry(s, this->startp, this->endp, this->program))
- return (1);
+ if (regFind.regtry(s, rmatch.startp, rmatch.endp, this->program))
+ return true;
} while (*s++ != '\0');
// Failure.
- return (0);
+ return false;
}
/*
- regtry - try match at specific point
0 failure, 1 success
*/
-static int regtry(const char* string, const char** start, const char** end,
- const char* prog)
+int RegExpFind::regtry(const char* string, const char** start,
+ const char** end, const char* prog)
{
int i;
const char** sp1;
@@ -927,7 +932,7 @@ static int regtry(const char* string, const char** start, const char** end,
sp1 = start;
ep = end;
- for (i = RegularExpression::NSUBEXP; i > 0; i--) {
+ for (i = RegularExpressionMatch::NSUBEXP; i > 0; i--) {
*sp1++ = 0;
*ep++ = 0;
}
@@ -950,7 +955,7 @@ static int regtry(const char* string, const char** start, const char** end,
* by recursion.
* 0 failure, 1 success
*/
-static int regmatch(const char* prog)
+int RegExpFind::regmatch(const char* prog)
{
const char* scan; // Current node.
const char* next; // Next node.
@@ -1129,7 +1134,7 @@ static int regmatch(const char* prog)
/*
- regrepeat - repeatedly match something simple, report how many
*/
-static int regrepeat(const char* p)
+int RegExpFind::regrepeat(const char* p)
{
int count = 0;
const char* scan;
@@ -1176,7 +1181,7 @@ static const char* regnext(const char* p)
{
int offset;
- if (p == &regdummy)
+ if (p == regdummyptr)
return (0);
offset = NEXT(p);
@@ -1193,7 +1198,7 @@ static char* regnext(char* p)
{
int offset;
- if (p == &regdummy)
+ if (p == regdummyptr)
return (0);
offset = NEXT(p);