summaryrefslogtreecommitdiffstats
path: root/generic/tclRegexp.c
diff options
context:
space:
mode:
Diffstat (limited to 'generic/tclRegexp.c')
-rw-r--r--generic/tclRegexp.c193
1 files changed, 120 insertions, 73 deletions
diff --git a/generic/tclRegexp.c b/generic/tclRegexp.c
index b5e3bec..6348e4a 100644
--- a/generic/tclRegexp.c
+++ b/generic/tclRegexp.c
@@ -9,8 +9,6 @@
*
* See the file "license.terms" for information on usage and redistribution of
* this file, and for a DISCLAIMER OF ALL WARRANTIES.
- *
- * RCS: @(#) $Id: tclRegexp.c,v 1.21 2005/11/02 00:55:06 dkf Exp $
*/
#include "tclInt.h"
@@ -55,8 +53,8 @@
*
* *** NOTE: this code has been altered slightly for use in Tcl: ***
* *** 1. Names have been changed, e.g. from re_comp to ***
- * *** TclRegComp, to avoid clashes with other ***
- * *** regexp implementations used by applications. ***
+ * *** TclRegComp, to avoid clashes with other ***
+ * *** regexp implementations used by applications. ***
*/
/*
@@ -85,7 +83,7 @@ static Tcl_ThreadDataKey dataKey;
* Declarations for functions used only in this file.
*/
-static TclRegexp * CompileRegexp(Tcl_Interp *interp, CONST char *pattern,
+static TclRegexp * CompileRegexp(Tcl_Interp *interp, const char *pattern,
int length, int flags);
static void DupRegexpInternalRep(Tcl_Obj *srcPtr,
Tcl_Obj *copyPtr);
@@ -93,7 +91,7 @@ static void FinalizeRegexp(ClientData clientData);
static void FreeRegexp(TclRegexp *regexpPtr);
static void FreeRegexpInternalRep(Tcl_Obj *objPtr);
static int RegExpExecUniChar(Tcl_Interp *interp, Tcl_RegExp re,
- CONST Tcl_UniChar *uniString, int numChars,
+ const Tcl_UniChar *uniString, int numChars,
int nmatches, int flags);
static int SetRegexpFromAny(Tcl_Interp *interp, Tcl_Obj *objPtr);
@@ -102,7 +100,7 @@ static int SetRegexpFromAny(Tcl_Interp *interp, Tcl_Obj *objPtr);
* compiled form of the regular expression.
*/
-Tcl_ObjType tclRegexpType = {
+const Tcl_ObjType tclRegexpType = {
"regexp", /* name */
FreeRegexpInternalRep, /* freeIntRepProc */
DupRegexpInternalRep, /* dupIntRepProc */
@@ -136,7 +134,7 @@ Tcl_RegExp
Tcl_RegExpCompile(
Tcl_Interp *interp, /* For use in error reporting and to access
* the interp regexp cache. */
- CONST char *pattern) /* String for which to produce compiled
+ const char *pattern) /* String for which to produce compiled
* regular expression. */
{
return (Tcl_RegExp) CompileRegexp(interp, pattern, (int) strlen(pattern),
@@ -169,15 +167,15 @@ Tcl_RegExpExec(
Tcl_RegExp re, /* Compiled regular expression; must have been
* returned by previous call to
* Tcl_GetRegExpFromObj. */
- CONST char *text, /* Text against which to match re. */
- CONST char *start) /* If text is part of a larger string, this
+ const char *text, /* Text against which to match re. */
+ const char *start) /* If text is part of a larger string, this
* identifies beginning of larger string, so
* that "^" won't match. */
{
int flags, result, numChars;
- TclRegexp *regexp = (TclRegexp *)re;
+ TclRegexp *regexp = (TclRegexp *) re;
Tcl_DString ds;
- CONST Tcl_UniChar *ustr;
+ const Tcl_UniChar *ustr;
/*
* If the starting point is offset from the beginning of the buffer, then
@@ -237,13 +235,13 @@ Tcl_RegExpRange(
int index, /* 0 means give the range of the entire match,
* > 0 means give the range of a matching
* subrange. */
- CONST char **startPtr, /* Store address of first character in
+ const char **startPtr, /* Store address of first character in
* (sub-)range here. */
- CONST char **endPtr) /* Store address of character just after last
+ const char **endPtr) /* Store address of character just after last
* in (sub-)range here. */
{
TclRegexp *regexpPtr = (TclRegexp *) re;
- CONST char *string;
+ const char *string;
if ((size_t) index > regexpPtr->re.re_nsub) {
*startPtr = *endPtr = NULL;
@@ -251,7 +249,7 @@ Tcl_RegExpRange(
*startPtr = *endPtr = NULL;
} else {
if (regexpPtr->objPtr) {
- string = Tcl_GetString(regexpPtr->objPtr);
+ string = TclGetString(regexpPtr->objPtr);
} else {
string = regexpPtr->string;
}
@@ -285,7 +283,7 @@ RegExpExecUniChar(
Tcl_Interp *interp, /* Interpreter to use for error reporting. */
Tcl_RegExp re, /* Compiled regular expression; returned by a
* previous call to Tcl_GetRegExpFromObj */
- CONST Tcl_UniChar *wString, /* String against which to match re. */
+ const Tcl_UniChar *wString, /* String against which to match re. */
int numChars, /* Length of Tcl_UniChar string (must be
* >=0). */
int nmatches, /* How many subexpression matches (counting
@@ -390,12 +388,11 @@ TclRegExpRangeUniChar(
int
Tcl_RegExpMatch(
Tcl_Interp *interp, /* Used for error reporting. May be NULL. */
- CONST char *text, /* Text to search for pattern matches. */
- CONST char *pattern) /* Regular expression to match against text. */
+ const char *text, /* Text to search for pattern matches. */
+ const char *pattern) /* Regular expression to match against text. */
{
- Tcl_RegExp re;
+ Tcl_RegExp re = Tcl_RegExpCompile(interp, pattern);
- re = Tcl_RegExpCompile(interp, pattern);
if (re == NULL) {
return -1;
}
@@ -437,6 +434,28 @@ Tcl_RegExpExecObj(
TclRegexp *regexpPtr = (TclRegexp *) re;
Tcl_UniChar *udata;
int length;
+ int reflags = regexpPtr->flags;
+#define TCL_REG_GLOBOK_FLAGS \
+ (TCL_REG_ADVANCED | TCL_REG_NOSUB | TCL_REG_NOCASE)
+
+ /*
+ * Take advantage of the equivalent glob pattern, if one exists.
+ * This is possible based only on the right mix of incoming flags (0)
+ * and regexp compile flags.
+ */
+ if ((offset == 0) && (nmatches == 0) && (flags == 0)
+ && !(reflags & ~TCL_REG_GLOBOK_FLAGS)
+ && (regexpPtr->globObjPtr != NULL)) {
+ int nocase = (reflags & TCL_REG_NOCASE) ? TCL_MATCH_NOCASE : 0;
+
+ /*
+ * Pass to TclStringMatchObj for obj-specific handling.
+ * XXX: Currently doesn't take advantage of exact-ness that
+ * XXX: TclReToGlob tells us about
+ */
+
+ return TclStringMatchObj(textObj, regexpPtr->globObjPtr, nocase);
+ }
/*
* Save the target object so we can extract strings from it later.
@@ -552,17 +571,17 @@ Tcl_GetRegExpFromObj(
{
int length;
TclRegexp *regexpPtr;
- char *pattern;
+ const char *pattern;
/*
* This is OK because we only actually interpret this value properly as a
* TclRegexp* when the type is tclRegexpType.
*/
- regexpPtr = (TclRegexp *) objPtr->internalRep.otherValuePtr;
+ regexpPtr = objPtr->internalRep.twoPtrValue.ptr1;
if ((objPtr->typePtr != &tclRegexpType) || (regexpPtr->flags != flags)) {
- pattern = Tcl_GetStringFromObj(objPtr, &length);
+ pattern = TclGetStringFromObj(objPtr, &length);
regexpPtr = CompileRegexp(interp, pattern, length, flags);
if (regexpPtr == NULL) {
@@ -582,7 +601,7 @@ Tcl_GetRegExpFromObj(
*/
TclFreeIntRep(objPtr);
- objPtr->internalRep.otherValuePtr = (void *) regexpPtr;
+ objPtr->internalRep.twoPtrValue.ptr1 = regexpPtr;
objPtr->typePtr = &tclRegexpType;
}
return (Tcl_RegExp) regexpPtr;
@@ -612,12 +631,12 @@ TclRegAbout(
Tcl_Interp *interp, /* For use in variable assignment. */
Tcl_RegExp re) /* The compiled regular expression. */
{
- TclRegexp *regexpPtr = (TclRegexp *)re;
- char buf[TCL_INTEGER_SPACE];
- static struct infoname {
+ TclRegexp *regexpPtr = (TclRegexp *) re;
+ struct infoname {
int bit;
- char *text;
- } infonames[] = {
+ const char *text;
+ };
+ static const struct infoname infonames[] = {
{REG_UBACKREF, "REG_UBACKREF"},
{REG_ULOOKAHEAD, "REG_ULOOKAHEAD"},
{REG_UBOUNDS, "REG_UBOUNDS"},
@@ -632,38 +651,42 @@ TclRegAbout(
{REG_UEMPTYMATCH, "REG_UEMPTYMATCH"},
{REG_UIMPOSSIBLE, "REG_UIMPOSSIBLE"},
{REG_USHORTEST, "REG_USHORTEST"},
- {0, ""}
+ {0, NULL}
};
- struct infoname *inf;
- int n;
+ const struct infoname *inf;
+ Tcl_Obj *infoObj, *resultObj;
+
+ /*
+ * The reset here guarantees that the interpreter result is empty and
+ * unshared. This means that we can use Tcl_ListObjAppendElement on the
+ * result object quite safely.
+ */
Tcl_ResetResult(interp);
- sprintf(buf, "%u", (unsigned)(regexpPtr->re.re_nsub));
- Tcl_AppendElement(interp, buf);
+ /*
+ * Assume that there will never be more than INT_MAX subexpressions. This
+ * is a pretty reasonable assumption; the RE engine doesn't scale _that_
+ * well and Tcl has other limits that constrain things as well...
+ */
+
+ resultObj = Tcl_NewObj();
+ Tcl_ListObjAppendElement(NULL, resultObj,
+ Tcl_NewIntObj((int) regexpPtr->re.re_nsub));
/*
- * Must count bits before generating list, because we must know whether {}
- * are needed before we start appending names.
+ * Now append a list of all the bit-flags set for the RE.
*/
- n = 0;
- for (inf = infonames; inf->bit != 0; inf++) {
- if (regexpPtr->re.re_info&inf->bit) {
- n++;
+ TclNewObj(infoObj);
+ for (inf=infonames ; inf->bit != 0 ; inf++) {
+ if (regexpPtr->re.re_info & inf->bit) {
+ Tcl_ListObjAppendElement(NULL, infoObj,
+ Tcl_NewStringObj(inf->text, -1));
}
}
- if (n != 1) {
- Tcl_AppendResult(interp, " {", NULL);
- }
- for (inf = infonames; inf->bit != 0; inf++) {
- if (regexpPtr->re.re_info&inf->bit) {
- Tcl_AppendElement(interp, inf->text);
- }
- }
- if (n != 1) {
- Tcl_AppendResult(interp, "}", NULL);
- }
+ Tcl_ListObjAppendElement(NULL, resultObj, infoObj);
+ Tcl_SetObjResult(interp, resultObj);
return 0;
}
@@ -687,18 +710,18 @@ TclRegAbout(
void
TclRegError(
Tcl_Interp *interp, /* Interpreter for error reporting. */
- CONST char *msg, /* Message to prepend to error. */
+ const char *msg, /* Message to prepend to error. */
int status) /* Status code to report. */
{
char buf[100]; /* ample in practice */
- char cbuf[100]; /* lots in practice */
+ char cbuf[TCL_INTEGER_SPACE];
size_t n;
- char *p;
+ const char *p;
Tcl_ResetResult(interp);
n = TclReError(status, NULL, buf, sizeof(buf));
p = (n > sizeof(buf)) ? "..." : "";
- Tcl_AppendResult(interp, msg, buf, p, NULL);
+ Tcl_SetObjResult(interp, Tcl_ObjPrintf("%s%s%s", msg, buf, p));
sprintf(cbuf, "%d", status);
(void) TclReError(REG_ITOA, NULL, cbuf, sizeof(cbuf));
@@ -726,7 +749,7 @@ static void
FreeRegexpInternalRep(
Tcl_Obj *objPtr) /* Regexp object with internal rep to free. */
{
- TclRegexp *regexpRepPtr = (TclRegexp *) objPtr->internalRep.otherValuePtr;
+ TclRegexp *regexpRepPtr = objPtr->internalRep.twoPtrValue.ptr1;
/*
* If this is the last reference to the regexp, free it.
@@ -735,6 +758,7 @@ FreeRegexpInternalRep(
if (--(regexpRepPtr->refCount) <= 0) {
FreeRegexp(regexpRepPtr);
}
+ objPtr->typePtr = NULL;
}
/*
@@ -759,10 +783,10 @@ DupRegexpInternalRep(
Tcl_Obj *srcPtr, /* Object with internal rep to copy. */
Tcl_Obj *copyPtr) /* Object with internal rep to set. */
{
- TclRegexp *regexpPtr = (TclRegexp *) srcPtr->internalRep.otherValuePtr;
+ TclRegexp *regexpPtr = srcPtr->internalRep.twoPtrValue.ptr1;
regexpPtr->refCount++;
- copyPtr->internalRep.otherValuePtr = srcPtr->internalRep.otherValuePtr;
+ copyPtr->internalRep.twoPtrValue.ptr1 = srcPtr->internalRep.twoPtrValue.ptr1;
copyPtr->typePtr = &tclRegexpType;
}
@@ -822,15 +846,14 @@ SetRegexpFromAny(
static TclRegexp *
CompileRegexp(
Tcl_Interp *interp, /* Used for error reporting if not NULL. */
- CONST char *string, /* The regexp to compile (UTF-8). */
+ const char *string, /* The regexp to compile (UTF-8). */
int length, /* The length of the string in bytes. */
int flags) /* Compilation flags. */
{
TclRegexp *regexpPtr;
- CONST Tcl_UniChar *uniString;
- int numChars;
+ const Tcl_UniChar *uniString;
+ int numChars, status, i, exact;
Tcl_DString stringBuf;
- int status, i;
ThreadSpecificData *tsdPtr = TCL_TSD_INIT(&dataKey);
if (!tsdPtr->initialized) {
@@ -882,7 +905,7 @@ CompileRegexp(
* This is a new expression, so compile it and add it to the cache.
*/
- regexpPtr = (TclRegexp *) ckalloc(sizeof(TclRegexp));
+ regexpPtr = ckalloc(sizeof(TclRegexp));
regexpPtr->objPtr = NULL;
regexpPtr->string = NULL;
regexpPtr->details.rm_extend.rm_so = -1;
@@ -909,22 +932,34 @@ CompileRegexp(
* Clean up and report errors in the interpreter, if possible.
*/
- ckfree((char *)regexpPtr);
+ ckfree(regexpPtr);
if (interp) {
TclRegError(interp,
- "couldn't compile regular expression pattern: ",
- status);
+ "couldn't compile regular expression pattern: ", status);
}
return NULL;
}
/*
+ * Convert RE to a glob pattern equivalent, if any, and cache it. If this
+ * is not possible, then globObjPtr will be NULL. This is used by
+ * Tcl_RegExpExecObj to optionally do a fast match (avoids RE engine).
+ */
+
+ if (TclReToGlob(NULL, string, length, &stringBuf, &exact) == TCL_OK) {
+ regexpPtr->globObjPtr = TclDStringToObj(&stringBuf);
+ Tcl_IncrRefCount(regexpPtr->globObjPtr);
+ } else {
+ regexpPtr->globObjPtr = NULL;
+ }
+
+ /*
* Allocate enough space for all of the subexpressions, plus one extra for
* the entire pattern.
*/
- regexpPtr->matches = (regmatch_t *) ckalloc(
- sizeof(regmatch_t) * (regexpPtr->re.re_nsub + 1));
+ regexpPtr->matches =
+ ckalloc(sizeof(regmatch_t) * (regexpPtr->re.re_nsub + 1));
/*
* Initialize the refcount to one initially, since it is in the cache.
@@ -939,6 +974,7 @@ CompileRegexp(
if (tsdPtr->patterns[NUM_REGEXPS-1] != NULL) {
TclRegexp *oldRegexpPtr = tsdPtr->regexps[NUM_REGEXPS-1];
+
if (--(oldRegexpPtr->refCount) <= 0) {
FreeRegexp(oldRegexpPtr);
}
@@ -949,8 +985,8 @@ CompileRegexp(
tsdPtr->patLengths[i+1] = tsdPtr->patLengths[i];
tsdPtr->regexps[i+1] = tsdPtr->regexps[i];
}
- tsdPtr->patterns[0] = (char *) ckalloc((unsigned) (length+1));
- strcpy(tsdPtr->patterns[0], string);
+ tsdPtr->patterns[0] = ckalloc(length + 1);
+ memcpy(tsdPtr->patterns[0], string, (unsigned) length + 1);
tsdPtr->patLengths[0] = length;
tsdPtr->regexps[0] = regexpPtr;
@@ -978,10 +1014,13 @@ FreeRegexp(
TclRegexp *regexpPtr) /* Compiled regular expression to free. */
{
TclReFree(&regexpPtr->re);
+ if (regexpPtr->globObjPtr) {
+ TclDecrRefCount(regexpPtr->globObjPtr);
+ }
if (regexpPtr->matches) {
- ckfree((char *) regexpPtr->matches);
+ ckfree(regexpPtr->matches);
}
- ckfree((char *) regexpPtr);
+ ckfree(regexpPtr);
}
/*
@@ -1014,7 +1053,15 @@ FinalizeRegexp(
FreeRegexp(regexpPtr);
}
ckfree(tsdPtr->patterns[i]);
+ tsdPtr->patterns[i] = NULL;
}
+
+ /*
+ * We may find ourselves reinitialized if another finalization routine
+ * invokes regexps.
+ */
+
+ tsdPtr->initialized = 0;
}
/*