summaryrefslogtreecommitdiffstats
path: root/generic/tclRegexp.c
diff options
context:
space:
mode:
Diffstat (limited to 'generic/tclRegexp.c')
-rw-r--r--generic/tclRegexp.c101
1 files changed, 71 insertions, 30 deletions
diff --git a/generic/tclRegexp.c b/generic/tclRegexp.c
index a5a8095..6348e4a 100644
--- a/generic/tclRegexp.c
+++ b/generic/tclRegexp.c
@@ -9,8 +9,6 @@
*
* See the file "license.terms" for information on usage and redistribution of
* this file, and for a DISCLAIMER OF ALL WARRANTIES.
- *
- * RCS: @(#) $Id: tclRegexp.c,v 1.24 2007/03/07 09:35:42 dkf Exp $
*/
#include "tclInt.h"
@@ -55,8 +53,8 @@
*
* *** NOTE: this code has been altered slightly for use in Tcl: ***
* *** 1. Names have been changed, e.g. from re_comp to ***
- * *** TclRegComp, to avoid clashes with other ***
- * *** regexp implementations used by applications. ***
+ * *** TclRegComp, to avoid clashes with other ***
+ * *** regexp implementations used by applications. ***
*/
/*
@@ -102,7 +100,7 @@ static int SetRegexpFromAny(Tcl_Interp *interp, Tcl_Obj *objPtr);
* compiled form of the regular expression.
*/
-Tcl_ObjType tclRegexpType = {
+const Tcl_ObjType tclRegexpType = {
"regexp", /* name */
FreeRegexpInternalRep, /* freeIntRepProc */
DupRegexpInternalRep, /* dupIntRepProc */
@@ -175,7 +173,7 @@ Tcl_RegExpExec(
* that "^" won't match. */
{
int flags, result, numChars;
- TclRegexp *regexp = (TclRegexp *)re;
+ TclRegexp *regexp = (TclRegexp *) re;
Tcl_DString ds;
const Tcl_UniChar *ustr;
@@ -251,7 +249,7 @@ Tcl_RegExpRange(
*startPtr = *endPtr = NULL;
} else {
if (regexpPtr->objPtr) {
- string = Tcl_GetString(regexpPtr->objPtr);
+ string = TclGetString(regexpPtr->objPtr);
} else {
string = regexpPtr->string;
}
@@ -393,9 +391,8 @@ Tcl_RegExpMatch(
const char *text, /* Text to search for pattern matches. */
const char *pattern) /* Regular expression to match against text. */
{
- Tcl_RegExp re;
+ Tcl_RegExp re = Tcl_RegExpCompile(interp, pattern);
- re = Tcl_RegExpCompile(interp, pattern);
if (re == NULL) {
return -1;
}
@@ -437,6 +434,28 @@ Tcl_RegExpExecObj(
TclRegexp *regexpPtr = (TclRegexp *) re;
Tcl_UniChar *udata;
int length;
+ int reflags = regexpPtr->flags;
+#define TCL_REG_GLOBOK_FLAGS \
+ (TCL_REG_ADVANCED | TCL_REG_NOSUB | TCL_REG_NOCASE)
+
+ /*
+ * Take advantage of the equivalent glob pattern, if one exists.
+ * This is possible based only on the right mix of incoming flags (0)
+ * and regexp compile flags.
+ */
+ if ((offset == 0) && (nmatches == 0) && (flags == 0)
+ && !(reflags & ~TCL_REG_GLOBOK_FLAGS)
+ && (regexpPtr->globObjPtr != NULL)) {
+ int nocase = (reflags & TCL_REG_NOCASE) ? TCL_MATCH_NOCASE : 0;
+
+ /*
+ * Pass to TclStringMatchObj for obj-specific handling.
+ * XXX: Currently doesn't take advantage of exact-ness that
+ * XXX: TclReToGlob tells us about
+ */
+
+ return TclStringMatchObj(textObj, regexpPtr->globObjPtr, nocase);
+ }
/*
* Save the target object so we can extract strings from it later.
@@ -552,17 +571,17 @@ Tcl_GetRegExpFromObj(
{
int length;
TclRegexp *regexpPtr;
- char *pattern;
+ const char *pattern;
/*
* This is OK because we only actually interpret this value properly as a
* TclRegexp* when the type is tclRegexpType.
*/
- regexpPtr = (TclRegexp *) objPtr->internalRep.otherValuePtr;
+ regexpPtr = objPtr->internalRep.twoPtrValue.ptr1;
if ((objPtr->typePtr != &tclRegexpType) || (regexpPtr->flags != flags)) {
- pattern = Tcl_GetStringFromObj(objPtr, &length);
+ pattern = TclGetStringFromObj(objPtr, &length);
regexpPtr = CompileRegexp(interp, pattern, length, flags);
if (regexpPtr == NULL) {
@@ -582,7 +601,7 @@ Tcl_GetRegExpFromObj(
*/
TclFreeIntRep(objPtr);
- objPtr->internalRep.otherValuePtr = (void *) regexpPtr;
+ objPtr->internalRep.twoPtrValue.ptr1 = regexpPtr;
objPtr->typePtr = &tclRegexpType;
}
return (Tcl_RegExp) regexpPtr;
@@ -635,7 +654,7 @@ TclRegAbout(
{0, NULL}
};
const struct infoname *inf;
- Tcl_Obj *infoObj;
+ Tcl_Obj *infoObj, *resultObj;
/*
* The reset here guarantees that the interpreter result is empty and
@@ -651,7 +670,8 @@ TclRegAbout(
* well and Tcl has other limits that constrain things as well...
*/
- Tcl_ListObjAppendElement(NULL, Tcl_GetObjResult(interp),
+ resultObj = Tcl_NewObj();
+ Tcl_ListObjAppendElement(NULL, resultObj,
Tcl_NewIntObj((int) regexpPtr->re.re_nsub));
/*
@@ -665,7 +685,8 @@ TclRegAbout(
Tcl_NewStringObj(inf->text, -1));
}
}
- Tcl_ListObjAppendElement(NULL, Tcl_GetObjResult(interp), infoObj);
+ Tcl_ListObjAppendElement(NULL, resultObj, infoObj);
+ Tcl_SetObjResult(interp, resultObj);
return 0;
}
@@ -693,14 +714,14 @@ TclRegError(
int status) /* Status code to report. */
{
char buf[100]; /* ample in practice */
- char cbuf[100]; /* lots in practice */
+ char cbuf[TCL_INTEGER_SPACE];
size_t n;
const char *p;
Tcl_ResetResult(interp);
n = TclReError(status, NULL, buf, sizeof(buf));
p = (n > sizeof(buf)) ? "..." : "";
- Tcl_AppendResult(interp, msg, buf, p, NULL);
+ Tcl_SetObjResult(interp, Tcl_ObjPrintf("%s%s%s", msg, buf, p));
sprintf(cbuf, "%d", status);
(void) TclReError(REG_ITOA, NULL, cbuf, sizeof(cbuf));
@@ -728,7 +749,7 @@ static void
FreeRegexpInternalRep(
Tcl_Obj *objPtr) /* Regexp object with internal rep to free. */
{
- TclRegexp *regexpRepPtr = (TclRegexp *) objPtr->internalRep.otherValuePtr;
+ TclRegexp *regexpRepPtr = objPtr->internalRep.twoPtrValue.ptr1;
/*
* If this is the last reference to the regexp, free it.
@@ -737,6 +758,7 @@ FreeRegexpInternalRep(
if (--(regexpRepPtr->refCount) <= 0) {
FreeRegexp(regexpRepPtr);
}
+ objPtr->typePtr = NULL;
}
/*
@@ -761,10 +783,10 @@ DupRegexpInternalRep(
Tcl_Obj *srcPtr, /* Object with internal rep to copy. */
Tcl_Obj *copyPtr) /* Object with internal rep to set. */
{
- TclRegexp *regexpPtr = (TclRegexp *) srcPtr->internalRep.otherValuePtr;
+ TclRegexp *regexpPtr = srcPtr->internalRep.twoPtrValue.ptr1;
regexpPtr->refCount++;
- copyPtr->internalRep.otherValuePtr = srcPtr->internalRep.otherValuePtr;
+ copyPtr->internalRep.twoPtrValue.ptr1 = srcPtr->internalRep.twoPtrValue.ptr1;
copyPtr->typePtr = &tclRegexpType;
}
@@ -830,7 +852,7 @@ CompileRegexp(
{
TclRegexp *regexpPtr;
const Tcl_UniChar *uniString;
- int numChars, status, i;
+ int numChars, status, i, exact;
Tcl_DString stringBuf;
ThreadSpecificData *tsdPtr = TCL_TSD_INIT(&dataKey);
@@ -883,7 +905,7 @@ CompileRegexp(
* This is a new expression, so compile it and add it to the cache.
*/
- regexpPtr = (TclRegexp *) ckalloc(sizeof(TclRegexp));
+ regexpPtr = ckalloc(sizeof(TclRegexp));
regexpPtr->objPtr = NULL;
regexpPtr->string = NULL;
regexpPtr->details.rm_extend.rm_so = -1;
@@ -910,7 +932,7 @@ CompileRegexp(
* Clean up and report errors in the interpreter, if possible.
*/
- ckfree((char *)regexpPtr);
+ ckfree(regexpPtr);
if (interp) {
TclRegError(interp,
"couldn't compile regular expression pattern: ", status);
@@ -919,12 +941,25 @@ CompileRegexp(
}
/*
+ * Convert RE to a glob pattern equivalent, if any, and cache it. If this
+ * is not possible, then globObjPtr will be NULL. This is used by
+ * Tcl_RegExpExecObj to optionally do a fast match (avoids RE engine).
+ */
+
+ if (TclReToGlob(NULL, string, length, &stringBuf, &exact) == TCL_OK) {
+ regexpPtr->globObjPtr = TclDStringToObj(&stringBuf);
+ Tcl_IncrRefCount(regexpPtr->globObjPtr);
+ } else {
+ regexpPtr->globObjPtr = NULL;
+ }
+
+ /*
* Allocate enough space for all of the subexpressions, plus one extra for
* the entire pattern.
*/
- regexpPtr->matches = (regmatch_t *) ckalloc(
- sizeof(regmatch_t) * (regexpPtr->re.re_nsub + 1));
+ regexpPtr->matches =
+ ckalloc(sizeof(regmatch_t) * (regexpPtr->re.re_nsub + 1));
/*
* Initialize the refcount to one initially, since it is in the cache.
@@ -939,6 +974,7 @@ CompileRegexp(
if (tsdPtr->patterns[NUM_REGEXPS-1] != NULL) {
TclRegexp *oldRegexpPtr = tsdPtr->regexps[NUM_REGEXPS-1];
+
if (--(oldRegexpPtr->refCount) <= 0) {
FreeRegexp(oldRegexpPtr);
}
@@ -949,8 +985,8 @@ CompileRegexp(
tsdPtr->patLengths[i+1] = tsdPtr->patLengths[i];
tsdPtr->regexps[i+1] = tsdPtr->regexps[i];
}
- tsdPtr->patterns[0] = (char *) ckalloc((unsigned) (length+1));
- strcpy(tsdPtr->patterns[0], string);
+ tsdPtr->patterns[0] = ckalloc(length + 1);
+ memcpy(tsdPtr->patterns[0], string, (unsigned) length + 1);
tsdPtr->patLengths[0] = length;
tsdPtr->regexps[0] = regexpPtr;
@@ -978,10 +1014,13 @@ FreeRegexp(
TclRegexp *regexpPtr) /* Compiled regular expression to free. */
{
TclReFree(&regexpPtr->re);
+ if (regexpPtr->globObjPtr) {
+ TclDecrRefCount(regexpPtr->globObjPtr);
+ }
if (regexpPtr->matches) {
- ckfree((char *) regexpPtr->matches);
+ ckfree(regexpPtr->matches);
}
- ckfree((char *) regexpPtr);
+ ckfree(regexpPtr);
}
/*
@@ -1016,10 +1055,12 @@ FinalizeRegexp(
ckfree(tsdPtr->patterns[i]);
tsdPtr->patterns[i] = NULL;
}
+
/*
* We may find ourselves reinitialized if another finalization routine
* invokes regexps.
*/
+
tsdPtr->initialized = 0;
}