/* * tclRegexp.c -- * * This file contains the public interfaces to the Tcl regular expression * mechanism. * * Copyright © 1998 Sun Microsystems, Inc. * Copyright © 1998-1999 Scriptics Corporation. * * See the file "license.terms" for information on usage and redistribution of * this file, and for a DISCLAIMER OF ALL WARRANTIES. */ #include "tclInt.h" #include "tclRegexp.h" #include /* *---------------------------------------------------------------------- * The routines in this file use Henry Spencer's regular expression package * contained in the following additional source files: * * regc_color.c regc_cvec.c regc_lex.c * regc_nfa.c regcomp.c regcustom.h * rege_dfa.c regerror.c regerrs.h * regex.h regexec.c regfree.c * regfronts.c regguts.h * * Copyright © 1998 Henry Spencer. All rights reserved. * * Development of this software was funded, in part, by Cray Research Inc., * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics * Corporation, none of whom are responsible for the results. The author * thanks all of them. * * Redistribution and use in source and binary forms -- with or without * modification -- are permitted for any purpose, provided that * redistributions in source form retain this entire copyright notice and * indicate the origin and nature of any modifications. * * I'd appreciate being given credit for this package in the documentation of * software which uses it, but that is not a requirement. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * *** NOTE: this code has been altered slightly for use in Tcl: *** * *** 1. Names have been changed, e.g. from re_comp to *** * *** TclRegComp, to avoid clashes with other *** * *** regexp implementations used by applications. *** */ /* * Thread local storage used to maintain a per-thread cache of compiled * regular expressions. */ #define NUM_REGEXPS 30 typedef struct { int initialized; /* Set to 1 when the module is initialized. */ char *patterns[NUM_REGEXPS];/* Strings corresponding to compiled regular * expression patterns. NULL means that this * slot isn't used. Malloc-ed. */ int patLengths[NUM_REGEXPS];/* Number of non-null characters in * corresponding entry in patterns. -1 means * entry isn't used. */ struct TclRegexp *regexps[NUM_REGEXPS]; /* Compiled forms of above strings. Also * malloc-ed, or NULL if not in use yet. */ } ThreadSpecificData; static Tcl_ThreadDataKey dataKey; /* * Declarations for functions used only in this file. */ static TclRegexp * CompileRegexp(Tcl_Interp *interp, const char *pattern, int length, int flags); static void DupRegexpInternalRep(Tcl_Obj *srcPtr, Tcl_Obj *copyPtr); static void FinalizeRegexp(ClientData clientData); static void FreeRegexp(TclRegexp *regexpPtr); static void FreeRegexpInternalRep(Tcl_Obj *objPtr); static int RegExpExecUniChar(Tcl_Interp *interp, Tcl_RegExp re, const Tcl_UniChar *uniString, int numChars, int nmatches, int flags); static int SetRegexpFromAny(Tcl_Interp *interp, Tcl_Obj *objPtr); /* * The regular expression Tcl object type. This serves as a cache of the * compiled form of the regular expression. */ const Tcl_ObjType tclRegexpType = { "regexp", /* name */ FreeRegexpInternalRep, /* freeIntRepProc */ DupRegexpInternalRep, /* dupIntRepProc */ NULL, /* updateStringProc */ SetRegexpFromAny /* setFromAnyProc */ }; #define RegexpSetInternalRep(objPtr, rePtr) \ do { \ Tcl_ObjInternalRep ir; \ (rePtr)->refCount++; \ ir.twoPtrValue.ptr1 = (rePtr); \ ir.twoPtrValue.ptr2 = NULL; \ Tcl_StoreInternalRep((objPtr), &tclRegexpType, &ir); \ } while (0) #define RegexpGetInternalRep(objPtr, rePtr) \ do { \ const Tcl_ObjInternalRep *irPtr; \ irPtr = TclFetchInternalRep((objPtr), &tclRegexpType); \ (rePtr) = irPtr ? (TclRegexp *)irPtr->twoPtrValue.ptr1 : NULL; \ } while (0) /* *---------------------------------------------------------------------- * * Tcl_RegExpCompile -- * * Compile a regular expression into a form suitable for fast matching. * This function is DEPRECATED in favor of the object version of the * command. * * Results: * The return value is a pointer to the compiled form of string, suitable * for passing to Tcl_RegExpExec. This compiled form is only valid up * until the next call to this function, so don't keep these around for a * long time! If an error occurred while compiling the pattern, then NULL * is returned and an error message is left in the interp's result. * * Side effects: * Updates the cache of compiled regexps. * *---------------------------------------------------------------------- */ Tcl_RegExp Tcl_RegExpCompile( Tcl_Interp *interp, /* For use in error reporting and to access * the interp regexp cache. */ const char *pattern) /* String for which to produce compiled * regular expression. */ { return (Tcl_RegExp) CompileRegexp(interp, pattern, (int) strlen(pattern), REG_ADVANCED); } /* *---------------------------------------------------------------------- * * Tcl_RegExpExec -- * * Execute the regular expression matcher using a compiled form of a * regular expression and save information about any match that is found. * * Results: * If an error occurs during the matching operation then -1 is returned * and the interp's result contains an error message. Otherwise the * return value is 1 if a matching range is found and 0 if there is no * matching range. * * Side effects: * None. * *---------------------------------------------------------------------- */ int Tcl_RegExpExec( Tcl_Interp *interp, /* Interpreter to use for error reporting. */ Tcl_RegExp re, /* Compiled regular expression; must have been * returned by previous call to * Tcl_GetRegExpFromObj. */ const char *text, /* Text against which to match re. */ const char *start) /* If text is part of a larger string, this * identifies beginning of larger string, so * that "^" won't match. */ { int flags, result, numChars; TclRegexp *regexp = (TclRegexp *) re; Tcl_DString ds; const Tcl_UniChar *ustr; /* * If the starting point is offset from the beginning of the buffer, then * we need to tell the regexp engine not to match "^". */ if (text > start) { flags = REG_NOTBOL; } else { flags = 0; } /* * Remember the string for use by Tcl_RegExpRange(). */ regexp->string = text; regexp->objPtr = NULL; /* * Convert the string to Unicode and perform the match. */ Tcl_DStringInit(&ds); ustr = Tcl_UtfToUniCharDString(text, -1, &ds); numChars = Tcl_DStringLength(&ds) / sizeof(Tcl_UniChar); result = RegExpExecUniChar(interp, re, ustr, numChars, -1 /* nmatches */, flags); Tcl_DStringFree(&ds); return result; } /* *--------------------------------------------------------------------------- * * Tcl_RegExpRange -- * * Returns pointers describing the range of a regular expression match, * or one of the subranges within the match. * * Results: * The variables at *startPtr and *endPtr are modified to hold the * addresses of the endpoints of the range given by index. If the * specified range doesn't exist then NULLs are returned. * * Side effects: * None. * *--------------------------------------------------------------------------- */ void Tcl_RegExpRange( Tcl_RegExp re, /* Compiled regular expression that has been * passed to Tcl_RegExpExec. */ int index, /* 0 means give the range of the entire match, * > 0 means give the range of a matching * subrange. */ const char **startPtr, /* Store address of first character in * (sub-)range here. */ const char **endPtr) /* Store address of character just after last * in (sub-)range here. */ { TclRegexp *regexpPtr = (TclRegexp *) re; const char *string; if ((size_t) index > regexpPtr->re.re_nsub) { *startPtr = *endPtr = NULL; } else if (regexpPtr->matches[index].rm_so == TCL_INDEX_NONE) { *startPtr = *endPtr = NULL; } else { if (regexpPtr->objPtr) { string = TclGetString(regexpPtr->objPtr); } else { string = regexpPtr->string; } *startPtr = TclUtfAtIndex(string, regexpPtr->matches[index].rm_so); *endPtr = TclUtfAtIndex(string, regexpPtr->matches[index].rm_eo); } } /* *--------------------------------------------------------------------------- * * RegExpExecUniChar -- * * Execute the regular expression matcher using a compiled form of a * regular expression and save information about any match that is found. * * Results: * If an error occurs during the matching operation then -1 is returned * and an error message is left in interp's result. Otherwise the return * value is 1 if a matching range was found or 0 if there was no matching * range. * * Side effects: * None. * *---------------------------------------------------------------------- */ static int RegExpExecUniChar( Tcl_Interp *interp, /* Interpreter to use for error reporting. */ Tcl_RegExp re, /* Compiled regular expression; returned by a * previous call to Tcl_GetRegExpFromObj */ const Tcl_UniChar *wString, /* String against which to match re. */ int numChars, /* Length of Tcl_UniChar string (must be * >=0). */ int nmatches, /* How many subexpression matches (counting * the whole match as subexpression 0) are of * interest. -1 means "don't know". */ int flags) /* Regular expression flags. */ { int status; TclRegexp *regexpPtr = (TclRegexp *) re; size_t last = regexpPtr->re.re_nsub + 1; size_t nm = last; if (nmatches >= 0 && (size_t) nmatches < nm) { nm = (size_t) nmatches; } status = TclReExec(®expPtr->re, wString, (size_t) numChars, ®expPtr->details, nm, regexpPtr->matches, flags); /* * Check for errors. */ if (status != REG_OKAY) { if (status == REG_NOMATCH) { return 0; } if (interp != NULL) { TclRegError(interp, "error while matching regular expression: ", status); } return -1; } return 1; } /* *--------------------------------------------------------------------------- * * TclRegExpRangeUniChar -- * * Returns pointers describing the range of a regular expression match, * or one of the subranges within the match, or the hypothetical range * represented by the rm_extend field of the rm_detail_t. * * Results: * The variables at *startPtr and *endPtr are modified to hold the * offsets of the endpoints of the range given by index. If the specified * range doesn't exist then -1s are supplied. * * Side effects: * None. * *--------------------------------------------------------------------------- */ void TclRegExpRangeUniChar( Tcl_RegExp re, /* Compiled regular expression that has been * passed to Tcl_RegExpExec. */ int index, /* 0 means give the range of the entire match, * > 0 means give the range of a matching * subrange, TCL_INDEX_NONE means the range of the * rm_extend field. */ int *startPtr, /* Store address of first character in * (sub-)range here. */ int *endPtr) /* Store address of character just after last * in (sub-)range here. */ { TclRegexp *regexpPtr = (TclRegexp *) re; if ((regexpPtr->flags®_EXPECT) && (index == TCL_INDEX_NONE)) { *startPtr = regexpPtr->details.rm_extend.rm_so; *endPtr = regexpPtr->details.rm_extend.rm_eo; } else if ((size_t) index > regexpPtr->re.re_nsub) { *startPtr = TCL_INDEX_NONE; *endPtr = TCL_INDEX_NONE; } else { *startPtr = regexpPtr->matches[index].rm_so; *endPtr = regexpPtr->matches[index].rm_eo; } } /* *---------------------------------------------------------------------- * * Tcl_RegExpMatch -- * * See if a string matches a regular expression. * * Results: * If an error occurs during the matching operation then -1 is returned * and the interp's result contains an error message. Otherwise the * return value is 1 if "text" matches "pattern" and 0 otherwise. * * Side effects: * None. * *---------------------------------------------------------------------- */ int Tcl_RegExpMatch( Tcl_Interp *interp, /* Used for error reporting. May be NULL. */ const char *text, /* Text to search for pattern matches. */ const char *pattern) /* Regular expression to match against text. */ { Tcl_RegExp re = Tcl_RegExpCompile(interp, pattern); if (re == NULL) { return -1; } return Tcl_RegExpExec(interp, re, text, text); } /* *---------------------------------------------------------------------- * * Tcl_RegExpExecObj -- * * Execute a precompiled regexp against the given object. * * Results: * If an error occurs during the matching operation then -1 is returned * and the interp's result contains an error message. Otherwise the * return value is 1 if "string" matches "pattern" and 0 otherwise. * * Side effects: * Converts the object to a Unicode object. * *---------------------------------------------------------------------- */ int Tcl_RegExpExecObj( Tcl_Interp *interp, /* Interpreter to use for error reporting. */ Tcl_RegExp re, /* Compiled regular expression; must have been * returned by previous call to * Tcl_GetRegExpFromObj. */ Tcl_Obj *textObj, /* Text against which to match re. */ int offset, /* Character index that marks where matching * should begin. */ int nmatches, /* How many subexpression matches (counting * the whole match as subexpression 0) are of * interest. -1 means all of them. */ int flags) /* Regular expression execution flags. */ { TclRegexp *regexpPtr = (TclRegexp *) re; Tcl_UniChar *udata; int length; int reflags = regexpPtr->flags; #define TCL_REG_GLOBOK_FLAGS \ (TCL_REG_ADVANCED | TCL_REG_NOSUB | TCL_REG_NOCASE) /* * Take advantage of the equivalent glob pattern, if one exists. * This is possible based only on the right mix of incoming flags (0) * and regexp compile flags. */ if ((offset == 0) && (nmatches == 0) && (flags == 0) && !(reflags & ~TCL_REG_GLOBOK_FLAGS) && (regexpPtr->globObjPtr != NULL)) { int nocase = (reflags & TCL_REG_NOCASE) ? TCL_MATCH_NOCASE : 0; /* * Pass to TclStringMatchObj for obj-specific handling. * XXX: Currently doesn't take advantage of exact-ness that * XXX: TclReToGlob tells us about */ return TclStringMatchObj(textObj, regexpPtr->globObjPtr, nocase); } /* * Save the target object so we can extract strings from it later. */ regexpPtr->string = NULL; regexpPtr->objPtr = textObj; udata = TclGetUnicodeFromObj(textObj, &length); if (offset > length) { offset = length; } udata += offset; length -= offset; return RegExpExecUniChar(interp, re, udata, length, nmatches, flags); } /* *---------------------------------------------------------------------- * * Tcl_RegExpMatchObj -- * * See if an object matches a regular expression. * * Results: * If an error occurs during the matching operation then -1 is returned * and the interp's result contains an error message. Otherwise the * return value is 1 if "text" matches "pattern" and 0 otherwise. * * Side effects: * Changes the internal rep of the pattern and string objects. * *---------------------------------------------------------------------- */ int Tcl_RegExpMatchObj( Tcl_Interp *interp, /* Used for error reporting. May be NULL. */ Tcl_Obj *textObj, /* Object containing the String to search. */ Tcl_Obj *patternObj) /* Regular expression to match against * string. */ { Tcl_RegExp re; /* * For performance reasons, first try compiling the RE without support for * subexpressions. On failure, try again without TCL_REG_NOSUB in case the * RE has backreferences in it. Closely related to [Bug 1366683]. If this * still fails, an error message will be left in the interpreter. */ if (!(re = Tcl_GetRegExpFromObj(interp, patternObj, TCL_REG_ADVANCED | TCL_REG_NOSUB)) && !(re = Tcl_GetRegExpFromObj(interp, patternObj, TCL_REG_ADVANCED))) { return -1; } return Tcl_RegExpExecObj(interp, re, textObj, 0 /* offset */, 0 /* nmatches */, 0 /* flags */); } /* *---------------------------------------------------------------------- * * Tcl_RegExpGetInfo -- * * Retrieve information about the current match. * * Results: * None. * * Side effects: * None. * *---------------------------------------------------------------------- */ void Tcl_RegExpGetInfo( Tcl_RegExp regexp, /* Pattern from which to get subexpressions. */ Tcl_RegExpInfo *infoPtr) /* Match information is stored here. */ { TclRegexp *regexpPtr = (TclRegexp *) regexp; infoPtr->nsubs = regexpPtr->re.re_nsub; infoPtr->matches = (Tcl_RegExpIndices *) regexpPtr->matches; infoPtr->extendStart = regexpPtr->details.rm_extend.rm_so; } /* *---------------------------------------------------------------------- * * Tcl_GetRegExpFromObj -- * * Compile a regular expression into a form suitable for fast matching. * This function caches the result in a Tcl_Obj. * * Results: * The return value is a pointer to the compiled form of string, suitable * for passing to Tcl_RegExpExec. If an error occurred while compiling * the pattern, then NULL is returned and an error message is left in the * interp's result. * * Side effects: * Updates the native rep of the Tcl_Obj. * *---------------------------------------------------------------------- */ Tcl_RegExp Tcl_GetRegExpFromObj( Tcl_Interp *interp, /* For use in error reporting, and to access * the interp regexp cache. */ Tcl_Obj *objPtr, /* Object whose string rep contains regular * expression pattern. Internal rep will be * changed to compiled form of this regular * expression. */ int flags) /* Regular expression compilation flags. */ { int length; TclRegexp *regexpPtr; const char *pattern; RegexpGetInternalRep(objPtr, regexpPtr); if ((regexpPtr == NULL) || (regexpPtr->flags != flags)) { pattern = TclGetStringFromObj(objPtr, &length); regexpPtr = CompileRegexp(interp, pattern, length, flags); if (regexpPtr == NULL) { return NULL; } RegexpSetInternalRep(objPtr, regexpPtr); } return (Tcl_RegExp) regexpPtr; } /* *---------------------------------------------------------------------- * * TclRegAbout -- * * Return information about a compiled regular expression. * * Results: * The return value is -1 for failure, 0 for success, although at the * moment there's nothing that could fail. On success, a list is left in * the interp's result: first element is the subexpression count, second * is a list of re_info bit names. * * Side effects: * None. * *---------------------------------------------------------------------- */ int TclRegAbout( Tcl_Interp *interp, /* For use in variable assignment. */ Tcl_RegExp re) /* The compiled regular expression. */ { TclRegexp *regexpPtr = (TclRegexp *) re; struct infoname { int bit; const char *text; }; static const struct infoname infonames[] = { {REG_UBACKREF, "REG_UBACKREF"}, {REG_ULOOKAHEAD, "REG_ULOOKAHEAD"}, {REG_UBOUNDS, "REG_UBOUNDS"}, {REG_UBRACES, "REG_UBRACES"}, {REG_UBSALNUM, "REG_UBSALNUM"}, {REG_UPBOTCH, "REG_UPBOTCH"}, {REG_UBBS, "REG_UBBS"}, {REG_UNONPOSIX, "REG_UNONPOSIX"}, {REG_UUNSPEC, "REG_UUNSPEC"}, {REG_UUNPORT, "REG_UUNPORT"}, {REG_ULOCALE, "REG_ULOCALE"}, {REG_UEMPTYMATCH, "REG_UEMPTYMATCH"}, {REG_UIMPOSSIBLE, "REG_UIMPOSSIBLE"}, {REG_USHORTEST, "REG_USHORTEST"}, {0, NULL} }; const struct infoname *inf; Tcl_Obj *infoObj, *resultObj; /* * The reset here guarantees that the interpreter result is empty and * unshared. This means that we can use Tcl_ListObjAppendElement on the * result object quite safely. */ Tcl_ResetResult(interp); /* * Assume that there will never be more than INT_MAX subexpressions. This * is a pretty reasonable assumption; the RE engine doesn't scale _that_ * well and Tcl has other limits that constrain things as well... */ TclNewObj(resultObj); TclNewIndexObj(infoObj, regexpPtr->re.re_nsub); Tcl_ListObjAppendElement(NULL, resultObj, infoObj); /* * Now append a list of all the bit-flags set for the RE. */ TclNewObj(infoObj); for (inf=infonames ; inf->bit != 0 ; inf++) { if (regexpPtr->re.re_info & inf->bit) { Tcl_ListObjAppendElement(NULL, infoObj, Tcl_NewStringObj(inf->text, -1)); } } Tcl_ListObjAppendElement(NULL, resultObj, infoObj); Tcl_SetObjResult(interp, resultObj); return 0; } /* *---------------------------------------------------------------------- * * TclRegError -- * * Generate an error message based on the regexp status code. * * Results: * Places an error in the interpreter. * * Side effects: * Sets errorCode as well. * *---------------------------------------------------------------------- */ void TclRegError( Tcl_Interp *interp, /* Interpreter for error reporting. */ const char *msg, /* Message to prepend to error. */ int status) /* Status code to report. */ { char buf[100]; /* ample in practice */ char cbuf[TCL_INTEGER_SPACE]; size_t n; const char *p; Tcl_ResetResult(interp); n = TclReError(status, buf, sizeof(buf)); p = (n > sizeof(buf)) ? "..." : ""; Tcl_SetObjResult(interp, Tcl_ObjPrintf("%s%s%s", msg, buf, p)); snprintf(cbuf, sizeof(cbuf), "%d", status); (void) TclReError(REG_ITOA, cbuf, sizeof(cbuf)); Tcl_SetErrorCode(interp, "REGEXP", cbuf, buf, (void *)NULL); } /* *---------------------------------------------------------------------- * * FreeRegexpInternalRep -- * * Deallocate the storage associated with a regexp object's internal * representation. * * Results: * None. * * Side effects: * Frees the compiled regular expression. * *---------------------------------------------------------------------- */ static void FreeRegexpInternalRep( Tcl_Obj *objPtr) /* Regexp object with internal rep to free. */ { TclRegexp *regexpRepPtr; RegexpGetInternalRep(objPtr, regexpRepPtr); assert(regexpRepPtr != NULL); /* * If this is the last reference to the regexp, free it. */ if (regexpRepPtr->refCount-- <= 1) { FreeRegexp(regexpRepPtr); } } /* *---------------------------------------------------------------------- * * DupRegexpInternalRep -- * * We copy the reference to the compiled regexp and bump its reference * count. * * Results: * None. * * Side effects: * Increments the reference count of the regexp. * *---------------------------------------------------------------------- */ static void DupRegexpInternalRep( Tcl_Obj *srcPtr, /* Object with internal rep to copy. */ Tcl_Obj *copyPtr) /* Object with internal rep to set. */ { TclRegexp *regexpPtr; RegexpGetInternalRep(srcPtr, regexpPtr); assert(regexpPtr != NULL); RegexpSetInternalRep(copyPtr, regexpPtr); } /* *---------------------------------------------------------------------- * * SetRegexpFromAny -- * * Attempt to generate a compiled regular expression for the Tcl object * "objPtr". * * Results: * The return value is TCL_OK or TCL_ERROR. If an error occurs during * conversion, an error message is left in the interpreter's result * unless "interp" is NULL. * * Side effects: * If no error occurs, a regular expression is stored as "objPtr"s * internal representation. * *---------------------------------------------------------------------- */ static int SetRegexpFromAny( Tcl_Interp *interp, /* Used for error reporting if not NULL. */ Tcl_Obj *objPtr) /* The object to convert. */ { if (Tcl_GetRegExpFromObj(interp, objPtr, REG_ADVANCED) == NULL) { return TCL_ERROR; } return TCL_OK; } /* *--------------------------------------------------------------------------- * * CompileRegexp -- * * Attempt to compile the given regexp pattern. If the compiled regular * expression can be found in the per-thread cache, it will be used * instead of compiling a new copy. * * Results: * The return value is a pointer to a newly allocated TclRegexp that * represents the compiled pattern, or NULL if the pattern could not be * compiled. If NULL is returned, an error message is left in the * interp's result. * * Side effects: * The thread-local regexp cache is updated and a new TclRegexp may be * allocated. * *---------------------------------------------------------------------- */ static TclRegexp * CompileRegexp( Tcl_Interp *interp, /* Used for error reporting if not NULL. */ const char *string, /* The regexp to compile (UTF-8). */ int length, /* The length of the string in bytes. */ int flags) /* Compilation flags. */ { TclRegexp *regexpPtr; const Tcl_UniChar *uniString; int numChars, status, i, exact; Tcl_DString stringBuf; ThreadSpecificData *tsdPtr = TCL_TSD_INIT(&dataKey); if (!tsdPtr->initialized) { tsdPtr->initialized = 1; Tcl_CreateThreadExitHandler(FinalizeRegexp, NULL); } /* * This routine maintains a second-level regular expression cache in * addition to the per-object regexp cache. The per-thread cache is needed * to handle the case where for various reasons the object is lost between * invocations of the regexp command, but the literal pattern is the same. */ /* * Check the per-thread compiled regexp cache. We can only reuse a regexp * if it has the same pattern and the same flags. */ for (i = 0; (i < NUM_REGEXPS) && (tsdPtr->patterns[i] != NULL); i++) { if ((length == tsdPtr->patLengths[i]) && (tsdPtr->regexps[i]->flags == flags) && (strcmp(string, tsdPtr->patterns[i]) == 0)) { /* * Move the matched pattern to the first slot in the cache and * shift the other patterns down one position. */ if (i != 0) { int j; char *cachedString; cachedString = tsdPtr->patterns[i]; regexpPtr = tsdPtr->regexps[i]; for (j = i-1; j >= 0; j--) { tsdPtr->patterns[j+1] = tsdPtr->patterns[j]; tsdPtr->patLengths[j+1] = tsdPtr->patLengths[j]; tsdPtr->regexps[j+1] = tsdPtr->regexps[j]; } tsdPtr->patterns[0] = cachedString; tsdPtr->patLengths[0] = length; tsdPtr->regexps[0] = regexpPtr; } return tsdPtr->regexps[0]; } } /* * This is a new expression, so compile it and add it to the cache. */ regexpPtr = (TclRegexp*)ckalloc(sizeof(TclRegexp)); regexpPtr->objPtr = NULL; regexpPtr->string = NULL; regexpPtr->details.rm_extend.rm_so = -1; regexpPtr->details.rm_extend.rm_eo = -1; /* * Get the up-to-date string representation and map to unicode. */ Tcl_DStringInit(&stringBuf); uniString = Tcl_UtfToUniCharDString(string, length, &stringBuf); numChars = Tcl_DStringLength(&stringBuf) / sizeof(Tcl_UniChar); /* * Compile the string and check for errors. */ regexpPtr->flags = flags; status = TclReComp(®expPtr->re, uniString, (size_t) numChars, flags); Tcl_DStringFree(&stringBuf); if (status != REG_OKAY) { /* * Clean up and report errors in the interpreter, if possible. */ ckfree(regexpPtr); if (interp) { TclRegError(interp, "couldn't compile regular expression pattern: ", status); } return NULL; } /* * Convert RE to a glob pattern equivalent, if any, and cache it. If this * is not possible, then globObjPtr will be NULL. This is used by * Tcl_RegExpExecObj to optionally do a fast match (avoids RE engine). */ if (TclReToGlob(NULL, string, length, &stringBuf, &exact, NULL) == TCL_OK) { regexpPtr->globObjPtr = Tcl_DStringToObj(&stringBuf); Tcl_IncrRefCount(regexpPtr->globObjPtr); } else { regexpPtr->globObjPtr = NULL; } /* * Allocate enough space for all of the subexpressions, plus one extra for * the entire pattern. */ regexpPtr->matches = (regmatch_t*)ckalloc(sizeof(regmatch_t) * (regexpPtr->re.re_nsub + 1)); /* * Initialize the refcount to one initially, since it is in the cache. */ regexpPtr->refCount = 1; /* * Free the last regexp, if necessary, and make room at the head of the * list for the new regexp. */ if (tsdPtr->patterns[NUM_REGEXPS-1] != NULL) { TclRegexp *oldRegexpPtr = tsdPtr->regexps[NUM_REGEXPS-1]; if (oldRegexpPtr->refCount-- <= 1) { FreeRegexp(oldRegexpPtr); } ckfree(tsdPtr->patterns[NUM_REGEXPS-1]); } for (i = NUM_REGEXPS - 2; i >= 0; i--) { tsdPtr->patterns[i+1] = tsdPtr->patterns[i]; tsdPtr->patLengths[i+1] = tsdPtr->patLengths[i]; tsdPtr->regexps[i+1] = tsdPtr->regexps[i]; } tsdPtr->patterns[0] = (char *)ckalloc(length + 1); memcpy(tsdPtr->patterns[0], string, length + 1); tsdPtr->patLengths[0] = length; tsdPtr->regexps[0] = regexpPtr; return regexpPtr; } /* *---------------------------------------------------------------------- * * FreeRegexp -- * * Release the storage associated with a TclRegexp. * * Results: * None. * * Side effects: * None. * *---------------------------------------------------------------------- */ static void FreeRegexp( TclRegexp *regexpPtr) /* Compiled regular expression to free. */ { TclReFree(®expPtr->re); if (regexpPtr->globObjPtr) { TclDecrRefCount(regexpPtr->globObjPtr); } if (regexpPtr->matches) { ckfree(regexpPtr->matches); } ckfree(regexpPtr); } /* *---------------------------------------------------------------------- * * FinalizeRegexp -- * * Release the storage associated with the per-thread regexp cache. * * Results: * None. * * Side effects: * None. * *---------------------------------------------------------------------- */ static void FinalizeRegexp( TCL_UNUSED(ClientData)) { int i; TclRegexp *regexpPtr; ThreadSpecificData *tsdPtr = TCL_TSD_INIT(&dataKey); for (i = 0; (i < NUM_REGEXPS) && (tsdPtr->patterns[i] != NULL); i++) { regexpPtr = tsdPtr->regexps[i]; if (regexpPtr->refCount-- <= 1) { FreeRegexp(regexpPtr); } ckfree(tsdPtr->patterns[i]); tsdPtr->patterns[i] = NULL; } /* * We may find ourselves reinitialized if another finalization routine * invokes regexps. */ tsdPtr->initialized = 0; } /* * Local Variables: * mode: c * c-basic-offset: 4 * fill-column: 78 * End: */