diff options
Diffstat (limited to 'generic/tclRegexp.c')
-rw-r--r-- | generic/tclRegexp.c | 101 |
1 files changed, 71 insertions, 30 deletions
diff --git a/generic/tclRegexp.c b/generic/tclRegexp.c index a5a8095..6348e4a 100644 --- a/generic/tclRegexp.c +++ b/generic/tclRegexp.c @@ -9,8 +9,6 @@ * * See the file "license.terms" for information on usage and redistribution of * this file, and for a DISCLAIMER OF ALL WARRANTIES. - * - * RCS: @(#) $Id: tclRegexp.c,v 1.24 2007/03/07 09:35:42 dkf Exp $ */ #include "tclInt.h" @@ -55,8 +53,8 @@ * * *** NOTE: this code has been altered slightly for use in Tcl: *** * *** 1. Names have been changed, e.g. from re_comp to *** - * *** TclRegComp, to avoid clashes with other *** - * *** regexp implementations used by applications. *** + * *** TclRegComp, to avoid clashes with other *** + * *** regexp implementations used by applications. *** */ /* @@ -102,7 +100,7 @@ static int SetRegexpFromAny(Tcl_Interp *interp, Tcl_Obj *objPtr); * compiled form of the regular expression. */ -Tcl_ObjType tclRegexpType = { +const Tcl_ObjType tclRegexpType = { "regexp", /* name */ FreeRegexpInternalRep, /* freeIntRepProc */ DupRegexpInternalRep, /* dupIntRepProc */ @@ -175,7 +173,7 @@ Tcl_RegExpExec( * that "^" won't match. */ { int flags, result, numChars; - TclRegexp *regexp = (TclRegexp *)re; + TclRegexp *regexp = (TclRegexp *) re; Tcl_DString ds; const Tcl_UniChar *ustr; @@ -251,7 +249,7 @@ Tcl_RegExpRange( *startPtr = *endPtr = NULL; } else { if (regexpPtr->objPtr) { - string = Tcl_GetString(regexpPtr->objPtr); + string = TclGetString(regexpPtr->objPtr); } else { string = regexpPtr->string; } @@ -393,9 +391,8 @@ Tcl_RegExpMatch( const char *text, /* Text to search for pattern matches. */ const char *pattern) /* Regular expression to match against text. */ { - Tcl_RegExp re; + Tcl_RegExp re = Tcl_RegExpCompile(interp, pattern); - re = Tcl_RegExpCompile(interp, pattern); if (re == NULL) { return -1; } @@ -437,6 +434,28 @@ Tcl_RegExpExecObj( TclRegexp *regexpPtr = (TclRegexp *) re; Tcl_UniChar *udata; int length; + int reflags = regexpPtr->flags; +#define TCL_REG_GLOBOK_FLAGS \ + (TCL_REG_ADVANCED | TCL_REG_NOSUB | TCL_REG_NOCASE) + + /* + * Take advantage of the equivalent glob pattern, if one exists. + * This is possible based only on the right mix of incoming flags (0) + * and regexp compile flags. + */ + if ((offset == 0) && (nmatches == 0) && (flags == 0) + && !(reflags & ~TCL_REG_GLOBOK_FLAGS) + && (regexpPtr->globObjPtr != NULL)) { + int nocase = (reflags & TCL_REG_NOCASE) ? TCL_MATCH_NOCASE : 0; + + /* + * Pass to TclStringMatchObj for obj-specific handling. + * XXX: Currently doesn't take advantage of exact-ness that + * XXX: TclReToGlob tells us about + */ + + return TclStringMatchObj(textObj, regexpPtr->globObjPtr, nocase); + } /* * Save the target object so we can extract strings from it later. @@ -552,17 +571,17 @@ Tcl_GetRegExpFromObj( { int length; TclRegexp *regexpPtr; - char *pattern; + const char *pattern; /* * This is OK because we only actually interpret this value properly as a * TclRegexp* when the type is tclRegexpType. */ - regexpPtr = (TclRegexp *) objPtr->internalRep.otherValuePtr; + regexpPtr = objPtr->internalRep.twoPtrValue.ptr1; if ((objPtr->typePtr != &tclRegexpType) || (regexpPtr->flags != flags)) { - pattern = Tcl_GetStringFromObj(objPtr, &length); + pattern = TclGetStringFromObj(objPtr, &length); regexpPtr = CompileRegexp(interp, pattern, length, flags); if (regexpPtr == NULL) { @@ -582,7 +601,7 @@ Tcl_GetRegExpFromObj( */ TclFreeIntRep(objPtr); - objPtr->internalRep.otherValuePtr = (void *) regexpPtr; + objPtr->internalRep.twoPtrValue.ptr1 = regexpPtr; objPtr->typePtr = &tclRegexpType; } return (Tcl_RegExp) regexpPtr; @@ -635,7 +654,7 @@ TclRegAbout( {0, NULL} }; const struct infoname *inf; - Tcl_Obj *infoObj; + Tcl_Obj *infoObj, *resultObj; /* * The reset here guarantees that the interpreter result is empty and @@ -651,7 +670,8 @@ TclRegAbout( * well and Tcl has other limits that constrain things as well... */ - Tcl_ListObjAppendElement(NULL, Tcl_GetObjResult(interp), + resultObj = Tcl_NewObj(); + Tcl_ListObjAppendElement(NULL, resultObj, Tcl_NewIntObj((int) regexpPtr->re.re_nsub)); /* @@ -665,7 +685,8 @@ TclRegAbout( Tcl_NewStringObj(inf->text, -1)); } } - Tcl_ListObjAppendElement(NULL, Tcl_GetObjResult(interp), infoObj); + Tcl_ListObjAppendElement(NULL, resultObj, infoObj); + Tcl_SetObjResult(interp, resultObj); return 0; } @@ -693,14 +714,14 @@ TclRegError( int status) /* Status code to report. */ { char buf[100]; /* ample in practice */ - char cbuf[100]; /* lots in practice */ + char cbuf[TCL_INTEGER_SPACE]; size_t n; const char *p; Tcl_ResetResult(interp); n = TclReError(status, NULL, buf, sizeof(buf)); p = (n > sizeof(buf)) ? "..." : ""; - Tcl_AppendResult(interp, msg, buf, p, NULL); + Tcl_SetObjResult(interp, Tcl_ObjPrintf("%s%s%s", msg, buf, p)); sprintf(cbuf, "%d", status); (void) TclReError(REG_ITOA, NULL, cbuf, sizeof(cbuf)); @@ -728,7 +749,7 @@ static void FreeRegexpInternalRep( Tcl_Obj *objPtr) /* Regexp object with internal rep to free. */ { - TclRegexp *regexpRepPtr = (TclRegexp *) objPtr->internalRep.otherValuePtr; + TclRegexp *regexpRepPtr = objPtr->internalRep.twoPtrValue.ptr1; /* * If this is the last reference to the regexp, free it. @@ -737,6 +758,7 @@ FreeRegexpInternalRep( if (--(regexpRepPtr->refCount) <= 0) { FreeRegexp(regexpRepPtr); } + objPtr->typePtr = NULL; } /* @@ -761,10 +783,10 @@ DupRegexpInternalRep( Tcl_Obj *srcPtr, /* Object with internal rep to copy. */ Tcl_Obj *copyPtr) /* Object with internal rep to set. */ { - TclRegexp *regexpPtr = (TclRegexp *) srcPtr->internalRep.otherValuePtr; + TclRegexp *regexpPtr = srcPtr->internalRep.twoPtrValue.ptr1; regexpPtr->refCount++; - copyPtr->internalRep.otherValuePtr = srcPtr->internalRep.otherValuePtr; + copyPtr->internalRep.twoPtrValue.ptr1 = srcPtr->internalRep.twoPtrValue.ptr1; copyPtr->typePtr = &tclRegexpType; } @@ -830,7 +852,7 @@ CompileRegexp( { TclRegexp *regexpPtr; const Tcl_UniChar *uniString; - int numChars, status, i; + int numChars, status, i, exact; Tcl_DString stringBuf; ThreadSpecificData *tsdPtr = TCL_TSD_INIT(&dataKey); @@ -883,7 +905,7 @@ CompileRegexp( * This is a new expression, so compile it and add it to the cache. */ - regexpPtr = (TclRegexp *) ckalloc(sizeof(TclRegexp)); + regexpPtr = ckalloc(sizeof(TclRegexp)); regexpPtr->objPtr = NULL; regexpPtr->string = NULL; regexpPtr->details.rm_extend.rm_so = -1; @@ -910,7 +932,7 @@ CompileRegexp( * Clean up and report errors in the interpreter, if possible. */ - ckfree((char *)regexpPtr); + ckfree(regexpPtr); if (interp) { TclRegError(interp, "couldn't compile regular expression pattern: ", status); @@ -919,12 +941,25 @@ CompileRegexp( } /* + * Convert RE to a glob pattern equivalent, if any, and cache it. If this + * is not possible, then globObjPtr will be NULL. This is used by + * Tcl_RegExpExecObj to optionally do a fast match (avoids RE engine). + */ + + if (TclReToGlob(NULL, string, length, &stringBuf, &exact) == TCL_OK) { + regexpPtr->globObjPtr = TclDStringToObj(&stringBuf); + Tcl_IncrRefCount(regexpPtr->globObjPtr); + } else { + regexpPtr->globObjPtr = NULL; + } + + /* * Allocate enough space for all of the subexpressions, plus one extra for * the entire pattern. */ - regexpPtr->matches = (regmatch_t *) ckalloc( - sizeof(regmatch_t) * (regexpPtr->re.re_nsub + 1)); + regexpPtr->matches = + ckalloc(sizeof(regmatch_t) * (regexpPtr->re.re_nsub + 1)); /* * Initialize the refcount to one initially, since it is in the cache. @@ -939,6 +974,7 @@ CompileRegexp( if (tsdPtr->patterns[NUM_REGEXPS-1] != NULL) { TclRegexp *oldRegexpPtr = tsdPtr->regexps[NUM_REGEXPS-1]; + if (--(oldRegexpPtr->refCount) <= 0) { FreeRegexp(oldRegexpPtr); } @@ -949,8 +985,8 @@ CompileRegexp( tsdPtr->patLengths[i+1] = tsdPtr->patLengths[i]; tsdPtr->regexps[i+1] = tsdPtr->regexps[i]; } - tsdPtr->patterns[0] = (char *) ckalloc((unsigned) (length+1)); - strcpy(tsdPtr->patterns[0], string); + tsdPtr->patterns[0] = ckalloc(length + 1); + memcpy(tsdPtr->patterns[0], string, (unsigned) length + 1); tsdPtr->patLengths[0] = length; tsdPtr->regexps[0] = regexpPtr; @@ -978,10 +1014,13 @@ FreeRegexp( TclRegexp *regexpPtr) /* Compiled regular expression to free. */ { TclReFree(®expPtr->re); + if (regexpPtr->globObjPtr) { + TclDecrRefCount(regexpPtr->globObjPtr); + } if (regexpPtr->matches) { - ckfree((char *) regexpPtr->matches); + ckfree(regexpPtr->matches); } - ckfree((char *) regexpPtr); + ckfree(regexpPtr); } /* @@ -1016,10 +1055,12 @@ FinalizeRegexp( ckfree(tsdPtr->patterns[i]); tsdPtr->patterns[i] = NULL; } + /* * We may find ourselves reinitialized if another finalization routine * invokes regexps. */ + tsdPtr->initialized = 0; } |