summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Source/kwsys/RegularExpression.cxx189
-rw-r--r--Source/kwsys/RegularExpression.hxx.in227
2 files changed, 272 insertions, 144 deletions
diff --git a/Source/kwsys/RegularExpression.cxx b/Source/kwsys/RegularExpression.cxx
index 26e84e0..fa3551c 100644
--- a/Source/kwsys/RegularExpression.cxx
+++ b/Source/kwsys/RegularExpression.cxx
@@ -45,9 +45,9 @@ RegularExpression::RegularExpression(const RegularExpression& rxp)
this->program = new char[this->progsize]; // Allocate storage
for (ind = this->progsize; ind-- != 0;) // Copy regular expresion
this->program[ind] = rxp.program[ind];
- this->startp[0] = rxp.startp[0]; // Copy pointers into last
- this->endp[0] = rxp.endp[0]; // Successful "find" operation
- this->regmust = rxp.regmust; // Copy field
+ // Copy pointers into last successful "find" operation
+ this->regmatch = rxp.regmatch;
+ this->regmust = rxp.regmust; // Copy field
if (rxp.regmust != 0) {
char* dum = rxp.program;
ind = 0;
@@ -78,9 +78,9 @@ RegularExpression& RegularExpression::operator=(const RegularExpression& rxp)
this->program = new char[this->progsize]; // Allocate storage
for (ind = this->progsize; ind-- != 0;) // Copy regular expresion
this->program[ind] = rxp.program[ind];
- this->startp[0] = rxp.startp[0]; // Copy pointers into last
- this->endp[0] = rxp.endp[0]; // Successful "find" operation
- this->regmust = rxp.regmust; // Copy field
+ // Copy pointers into last successful "find" operation
+ this->regmatch = rxp.regmatch;
+ this->regmust = rxp.regmust; // Copy field
if (rxp.regmust != 0) {
char* dum = rxp.program;
ind = 0;
@@ -123,8 +123,9 @@ bool RegularExpression::deep_equal(const RegularExpression& rxp) const
while (ind-- != 0) // Else while still characters
if (this->program[ind] != rxp.program[ind]) // If regexp are different
return false; // Return failure
- return (this->startp[0] == rxp.startp[0] && // Else if same start/end ptrs,
- this->endp[0] == rxp.endp[0]); // Return true
+ // Else if same start/end ptrs, return true
+ return (this->regmatch.start() == rxp.regmatch.start() &&
+ this->regmatch.end() == rxp.regmatch.end());
}
// The remaining code in this file is derived from the regular expression code
@@ -276,31 +277,35 @@ const unsigned char MAGIC = 0234;
/////////////////////////////////////////////////////////////////////////
/*
- * Global work variables for compile().
+ * Read only utility variables.
*/
-static const char* regparse; // Input-scan pointer.
-static int regnpar; // () count.
static char regdummy;
-static char* regcode; // Code-emit pointer; &regdummy = don't.
-static long regsize; // Code size.
+static char* const regdummyptr = &regdummy;
/*
- * Forward declarations for compile()'s friends.
+ * Utility class for RegularExpression::compile().
*/
-// #ifndef static
-// #define static static
-// #endif
-static char* reg(int, int*);
-static char* regbranch(int*);
-static char* regpiece(int*);
-static char* regatom(int*);
-static char* regnode(char);
+class RegExpCompile
+{
+public:
+ const char* regparse; // Input-scan pointer.
+ int regnpar; // () count.
+ char* regcode; // Code-emit pointer; regdummyptr = don't.
+ long regsize; // Code size.
+
+ char* reg(int, int*);
+ char* regbranch(int*);
+ char* regpiece(int*);
+ char* regatom(int*);
+ char* regnode(char);
+ void regc(char);
+ void reginsert(char, char*);
+ static void regtail(char*, const char*);
+ static void regoptail(char*, const char*);
+};
+
static const char* regnext(const char*);
static char* regnext(char*);
-static void regc(char);
-static void reginsert(char, char*);
-static void regtail(char*, const char*);
-static void regoptail(char*, const char*);
#ifdef STRCSPN
static int strcspn();
@@ -337,19 +342,20 @@ bool RegularExpression::compile(const char* exp)
}
// First pass: determine size, legality.
- regparse = exp;
- regnpar = 1;
- regsize = 0L;
- regcode = &regdummy;
- regc(static_cast<char>(MAGIC));
- if (!reg(0, &flags)) {
+ RegExpCompile comp;
+ comp.regparse = exp;
+ comp.regnpar = 1;
+ comp.regsize = 0L;
+ comp.regcode = regdummyptr;
+ comp.regc(static_cast<char>(MAGIC));
+ if (!comp.reg(0, &flags)) {
printf("RegularExpression::compile(): Error in compile.\n");
return false;
}
- this->startp[0] = this->endp[0] = this->searchstring = 0;
+ this->regmatch.clear();
// Small enough for pointer-storage convention?
- if (regsize >= 32767L) { // Probably could be 65535L.
+ if (comp.regsize >= 32767L) { // Probably could be 65535L.
// RAISE Error, SYM(RegularExpression), SYM(Expr_Too_Big),
printf("RegularExpression::compile(): Expression too big.\n");
return false;
@@ -360,8 +366,8 @@ bool RegularExpression::compile(const char* exp)
if (this->program != 0)
delete[] this->program;
//#endif
- this->program = new char[regsize];
- this->progsize = static_cast<int>(regsize);
+ this->program = new char[comp.regsize];
+ this->progsize = static_cast<int>(comp.regsize);
if (this->program == 0) {
// RAISE Error, SYM(RegularExpression), SYM(Out_Of_Memory),
@@ -370,11 +376,11 @@ bool RegularExpression::compile(const char* exp)
}
// Second pass: emit code.
- regparse = exp;
- regnpar = 1;
- regcode = this->program;
- regc(static_cast<char>(MAGIC));
- reg(0, &flags);
+ comp.regparse = exp;
+ comp.regnpar = 1;
+ comp.regcode = this->program;
+ comp.regc(static_cast<char>(MAGIC));
+ comp.reg(0, &flags);
// Dig out information for optimizations.
this->regstart = '\0'; // Worst-case defaults.
@@ -423,7 +429,7 @@ bool RegularExpression::compile(const char* exp)
* is a trifle forced, but the need to tie the tails of the branches to what
* follows makes it hard to avoid.
*/
-static char* reg(int paren, int* flagp)
+char* RegExpCompile::reg(int paren, int* flagp)
{
char* ret;
char* br;
@@ -435,7 +441,7 @@ static char* reg(int paren, int* flagp)
// Make an OPEN node, if parenthesized.
if (paren) {
- if (regnpar >= RegularExpression::NSUBEXP) {
+ if (regnpar >= RegularExpressionMatch::NSUBEXP) {
// RAISE Error, SYM(RegularExpression), SYM(Too_Many_Parens),
printf("RegularExpression::compile(): Too many parentheses.\n");
return 0;
@@ -501,7 +507,7 @@ static char* reg(int paren, int* flagp)
*
* Implements the concatenation operator.
*/
-static char* regbranch(int* flagp)
+char* RegExpCompile::regbranch(int* flagp)
{
char* ret;
char* chain;
@@ -538,7 +544,7 @@ static char* regbranch(int* flagp)
* It might seem that this node could be dispensed with entirely, but the
* endmarker role is not redundant.
*/
-static char* regpiece(int* flagp)
+char* RegExpCompile::regpiece(int* flagp)
{
char* ret;
char op;
@@ -605,7 +611,7 @@ static char* regpiece(int* flagp)
* faster to run. Backslashed characters are exceptions, each becoming a
* separate node; the code is simpler that way and it's not worth fixing.
*/
-static char* regatom(int* flagp)
+char* RegExpCompile::regatom(int* flagp)
{
char* ret;
int flags;
@@ -724,13 +730,13 @@ static char* regatom(int* flagp)
- regnode - emit a node
Location.
*/
-static char* regnode(char op)
+char* RegExpCompile::regnode(char op)
{
char* ret;
char* ptr;
ret = regcode;
- if (ret == &regdummy) {
+ if (ret == regdummyptr) {
regsize += 3;
return (ret);
}
@@ -747,9 +753,9 @@ static char* regnode(char op)
/*
- regc - emit (if appropriate) a byte of code
*/
-static void regc(char b)
+void RegExpCompile::regc(char b)
{
- if (regcode != &regdummy)
+ if (regcode != regdummyptr)
*regcode++ = b;
else
regsize++;
@@ -760,13 +766,13 @@ static void regc(char b)
*
* Means relocating the operand.
*/
-static void reginsert(char op, char* opnd)
+void RegExpCompile::reginsert(char op, char* opnd)
{
char* src;
char* dst;
char* place;
- if (regcode == &regdummy) {
+ if (regcode == regdummyptr) {
regsize += 3;
return;
}
@@ -786,13 +792,13 @@ static void reginsert(char op, char* opnd)
/*
- regtail - set the next-pointer at the end of a node chain
*/
-static void regtail(char* p, const char* val)
+void RegExpCompile::regtail(char* p, const char* val)
{
char* scan;
char* temp;
int offset;
- if (p == &regdummy)
+ if (p == regdummyptr)
return;
// Find last node.
@@ -815,10 +821,10 @@ static void regtail(char* p, const char* val)
/*
- regoptail - regtail on operand of first argument; nop if operandless
*/
-static void regoptail(char* p, const char* val)
+void RegExpCompile::regoptail(char* p, const char* val)
{
// "Operandless" and "op != BRANCH" are synonymous in practice.
- if (p == 0 || p == &regdummy || OP(p) != BRANCH)
+ if (p == 0 || p == regdummyptr || OP(p) != BRANCH)
return;
regtail(OPERAND(p), val);
}
@@ -830,34 +836,30 @@ static void regoptail(char* p, const char* val)
////////////////////////////////////////////////////////////////////////
/*
- * Global work variables for find().
+ * Utility class for RegularExpression::find().
*/
-static const char* reginput; // String-input pointer.
-static const char* regbol; // Beginning of input, for ^ check.
-static const char** regstartp; // Pointer to startp array.
-static const char** regendp; // Ditto for endp.
+class RegExpFind
+{
+public:
+ const char* reginput; // String-input pointer.
+ const char* regbol; // Beginning of input, for ^ check.
+ const char** regstartp; // Pointer to startp array.
+ const char** regendp; // Ditto for endp.
-/*
- * Forwards.
- */
-static int regtry(const char*, const char**, const char**, const char*);
-static int regmatch(const char*);
-static int regrepeat(const char*);
-
-#ifdef DEBUG
-int regnarrate = 0;
-void regdump();
-static char* regprop();
-#endif
+ int regtry(const char*, const char**, const char**, const char*);
+ int regmatch(const char*);
+ int regrepeat(const char*);
+};
// find -- Matches the regular expression to the given string.
// Returns true if found, and sets start and end indexes accordingly.
-
-bool RegularExpression::find(const char* string)
+bool RegularExpression::find(char const* string,
+ RegularExpressionMatch& rmatch) const
{
const char* s;
- this->searchstring = string;
+ rmatch.clear();
+ rmatch.searchstring = string;
if (!this->program) {
return false;
@@ -868,7 +870,7 @@ bool RegularExpression::find(const char* string)
// RAISE Error, SYM(RegularExpression), SYM(Internal_Error),
printf(
"RegularExpression::find(): Compiled regular expression corrupted.\n");
- return 0;
+ return false;
}
// If there is a "must appear" string, look for it.
@@ -880,42 +882,45 @@ bool RegularExpression::find(const char* string)
s++;
}
if (s == 0) // Not present.
- return (0);
+ return false;
}
+ RegExpFind regFind;
+
// Mark beginning of line for ^ .
- regbol = string;
+ regFind.regbol = string;
// Simplest case: anchored match need be tried only once.
if (this->reganch)
- return (regtry(string, this->startp, this->endp, this->program) != 0);
+ return (
+ regFind.regtry(string, rmatch.startp, rmatch.endp, this->program) != 0);
// Messy cases: unanchored match.
s = string;
if (this->regstart != '\0')
// We know what char it must start with.
while ((s = strchr(s, this->regstart)) != 0) {
- if (regtry(s, this->startp, this->endp, this->program))
- return (1);
+ if (regFind.regtry(s, rmatch.startp, rmatch.endp, this->program))
+ return true;
s++;
}
else
// We don't -- general case.
do {
- if (regtry(s, this->startp, this->endp, this->program))
- return (1);
+ if (regFind.regtry(s, rmatch.startp, rmatch.endp, this->program))
+ return true;
} while (*s++ != '\0');
// Failure.
- return (0);
+ return false;
}
/*
- regtry - try match at specific point
0 failure, 1 success
*/
-static int regtry(const char* string, const char** start, const char** end,
- const char* prog)
+int RegExpFind::regtry(const char* string, const char** start,
+ const char** end, const char* prog)
{
int i;
const char** sp1;
@@ -927,7 +932,7 @@ static int regtry(const char* string, const char** start, const char** end,
sp1 = start;
ep = end;
- for (i = RegularExpression::NSUBEXP; i > 0; i--) {
+ for (i = RegularExpressionMatch::NSUBEXP; i > 0; i--) {
*sp1++ = 0;
*ep++ = 0;
}
@@ -950,7 +955,7 @@ static int regtry(const char* string, const char** start, const char** end,
* by recursion.
* 0 failure, 1 success
*/
-static int regmatch(const char* prog)
+int RegExpFind::regmatch(const char* prog)
{
const char* scan; // Current node.
const char* next; // Next node.
@@ -1129,7 +1134,7 @@ static int regmatch(const char* prog)
/*
- regrepeat - repeatedly match something simple, report how many
*/
-static int regrepeat(const char* p)
+int RegExpFind::regrepeat(const char* p)
{
int count = 0;
const char* scan;
@@ -1176,7 +1181,7 @@ static const char* regnext(const char* p)
{
int offset;
- if (p == &regdummy)
+ if (p == regdummyptr)
return (0);
offset = NEXT(p);
@@ -1193,7 +1198,7 @@ static char* regnext(char* p)
{
int offset;
- if (p == &regdummy)
+ if (p == regdummyptr)
return (0);
offset = NEXT(p);
diff --git a/Source/kwsys/RegularExpression.hxx.in b/Source/kwsys/RegularExpression.hxx.in
index 763fdab..a3fe72d 100644
--- a/Source/kwsys/RegularExpression.hxx.in
+++ b/Source/kwsys/RegularExpression.hxx.in
@@ -34,6 +34,115 @@
namespace @KWSYS_NAMESPACE@ {
+// Forward declaration
+class RegularExpression;
+
+/** \class RegularExpressionMatch
+ * \brief Stores the pattern matches of a RegularExpression
+ */
+class @KWSYS_NAMESPACE@_EXPORT RegularExpressionMatch
+{
+public:
+ RegularExpressionMatch();
+
+ bool isValid() const;
+ void clear();
+
+ std::string::size_type start() const;
+ std::string::size_type end() const;
+ std::string::size_type start(int n) const;
+ std::string::size_type end(int n) const;
+ std::string match(int n) const;
+
+ enum
+ {
+ NSUBEXP = 10
+ };
+
+private:
+ friend class RegularExpression;
+ const char* startp[NSUBEXP];
+ const char* endp[NSUBEXP];
+ const char* searchstring;
+};
+
+/**
+ * \brief Creates an invalid match object
+ */
+inline RegularExpressionMatch::RegularExpressionMatch()
+{
+ startp[0] = 0;
+ endp[0] = 0;
+ searchstring = 0;
+}
+
+/**
+ * \brief Returns true if the match pointers are valid
+ */
+inline bool RegularExpressionMatch::isValid() const
+{
+ return (this->startp[0] != 0);
+}
+
+/**
+ * \brief Resets to the (invalid) construction state.
+ */
+inline void RegularExpressionMatch::clear()
+{
+ startp[0] = 0;
+ endp[0] = 0;
+ searchstring = 0;
+}
+
+/**
+ * \brief Returns the start index of the full match.
+ */
+inline std::string::size_type RegularExpressionMatch::start() const
+{
+ return static_cast<std::string::size_type>(this->startp[0] - searchstring);
+}
+
+/**
+ * \brief Returns the end index of the full match.
+ */
+inline std::string::size_type RegularExpressionMatch::end() const
+{
+ return static_cast<std::string::size_type>(this->endp[0] - searchstring);
+}
+
+/**
+ * \brief Returns the start index of nth submatch.
+ * start(0) is the start of the full match.
+ */
+inline std::string::size_type RegularExpressionMatch::start(int n) const
+{
+ return static_cast<std::string::size_type>(this->startp[n] -
+ this->searchstring);
+}
+
+/**
+ * \brief Returns the end index of nth submatch.
+ * end(0) is the end of the full match.
+ */
+inline std::string::size_type RegularExpressionMatch::end(int n) const
+{
+ return static_cast<std::string::size_type>(this->endp[n] -
+ this->searchstring);
+}
+
+/**
+ * \brief Returns the nth submatch as a string.
+ */
+inline std::string RegularExpressionMatch::match(int n) const
+{
+ if (this->startp[n] == 0) {
+ return std::string();
+ } else {
+ return std::string(this->startp[n], static_cast<std::string::size_type>(
+ this->endp[n] - this->startp[n]));
+ }
+}
+
/** \class RegularExpression
* \brief Implements pattern matching with regular expressions.
*
@@ -170,6 +279,9 @@ namespace @KWSYS_NAMESPACE@ {
* the same as the two characters before the first p encounterd in
* the line. It would match "drepa qrepb" in "rep drepa qrepb".
*
+ * All methods of RegularExpression can be called simultaneously from
+ * different threads but only if each invocation uses an own instance of
+ * RegularExpression.
*/
class @KWSYS_NAMESPACE@_EXPORT RegularExpression
{
@@ -213,9 +325,19 @@ public:
/**
* Matches the regular expression to the given string.
+ * Returns true if found, and sets start and end indexes
+ * in the RegularExpressionMatch instance accordingly.
+ *
+ * This method is thread safe when called with different
+ * RegularExpressionMatch instances.
+ */
+ bool find(char const*, RegularExpressionMatch&) const;
+
+ /**
+ * Matches the regular expression to the given string.
* Returns true if found, and sets start and end indexes accordingly.
*/
- bool find(char const*);
+ inline bool find(char const*);
/**
* Matches the regular expression to the given std string.
@@ -224,14 +346,18 @@ public:
inline bool find(std::string const&);
/**
- * Index to start of first find.
+ * Match indices
*/
+ inline RegularExpressionMatch const& regMatch() const;
inline std::string::size_type start() const;
+ inline std::string::size_type end() const;
+ inline std::string::size_type start(int n) const;
+ inline std::string::size_type end(int n) const;
/**
- * Index to end of first find.
+ * Match strings
*/
- inline std::string::size_type end() const;
+ inline std::string match(int n) const;
/**
* Copy the given regular expression.
@@ -266,29 +392,14 @@ public:
*/
inline void set_invalid();
- /**
- * Destructor.
- */
- // awf added
- std::string::size_type start(int n) const;
- std::string::size_type end(int n) const;
- std::string match(int n) const;
-
- enum
- {
- NSUBEXP = 10
- };
-
private:
- const char* startp[NSUBEXP];
- const char* endp[NSUBEXP];
+ RegularExpressionMatch regmatch;
char regstart; // Internal use only
char reganch; // Internal use only
const char* regmust; // Internal use only
std::string::size_type regmlen; // Internal use only
char* program;
int progsize;
- const char* searchstring;
};
/**
@@ -344,51 +455,42 @@ inline bool RegularExpression::compile(std::string const& s)
* Matches the regular expression to the given std string.
* Returns true if found, and sets start and end indexes accordingly.
*/
-inline bool RegularExpression::find(std::string const& s)
+inline bool RegularExpression::find(const char* s)
{
- return this->find(s.c_str());
+ return this->find(s, this->regmatch);
}
/**
- * Set the start position for the regular expression.
+ * Matches the regular expression to the given std string.
+ * Returns true if found, and sets start and end indexes accordingly.
*/
-inline std::string::size_type RegularExpression::start() const
+inline bool RegularExpression::find(std::string const& s)
{
- return static_cast<std::string::size_type>(this->startp[0] - searchstring);
+ return this->find(s.c_str());
}
/**
- * Returns the start/end index of the last item found.
+ * Returns the internal match object
*/
-inline std::string::size_type RegularExpression::end() const
+inline RegularExpressionMatch const& RegularExpression::regMatch() const
{
- return static_cast<std::string::size_type>(this->endp[0] - searchstring);
+ return this->regmatch;
}
/**
- * Returns true if two regular expressions have different
- * compiled program for pattern matching.
+ * Returns the start index of the full match.
*/
-inline bool RegularExpression::operator!=(const RegularExpression& r) const
+inline std::string::size_type RegularExpression::start() const
{
- return (!(*this == r));
+ return regmatch.start();
}
/**
- * Returns true if a valid regular expression is compiled
- * and ready for pattern matching.
+ * Returns the end index of the full match.
*/
-inline bool RegularExpression::is_valid() const
-{
- return (this->program != 0);
-}
-
-inline void RegularExpression::set_invalid()
+inline std::string::size_type RegularExpression::end() const
{
- //#ifndef _WIN32
- delete[] this->program;
- //#endif
- this->program = 0;
+ return regmatch.end();
}
/**
@@ -396,7 +498,7 @@ inline void RegularExpression::set_invalid()
*/
inline std::string::size_type RegularExpression::start(int n) const
{
- return static_cast<std::string::size_type>(this->startp[n] - searchstring);
+ return regmatch.start(n);
}
/**
@@ -404,7 +506,7 @@ inline std::string::size_type RegularExpression::start(int n) const
*/
inline std::string::size_type RegularExpression::end(int n) const
{
- return static_cast<std::string::size_type>(this->endp[n] - searchstring);
+ return regmatch.end(n);
}
/**
@@ -412,12 +514,33 @@ inline std::string::size_type RegularExpression::end(int n) const
*/
inline std::string RegularExpression::match(int n) const
{
- if (this->startp[n] == 0) {
- return std::string("");
- } else {
- return std::string(this->startp[n], static_cast<std::string::size_type>(
- this->endp[n] - this->startp[n]));
- }
+ return regmatch.match(n);
+}
+
+/**
+ * Returns true if two regular expressions have different
+ * compiled program for pattern matching.
+ */
+inline bool RegularExpression::operator!=(const RegularExpression& r) const
+{
+ return (!(*this == r));
+}
+
+/**
+ * Returns true if a valid regular expression is compiled
+ * and ready for pattern matching.
+ */
+inline bool RegularExpression::is_valid() const
+{
+ return (this->program != 0);
+}
+
+inline void RegularExpression::set_invalid()
+{
+ //#ifndef _WIN32
+ delete[] this->program;
+ //#endif
+ this->program = 0;
}
} // namespace @KWSYS_NAMESPACE@