diff options
Diffstat (limited to 'generic/tclRegexp.c')
| -rw-r--r-- | generic/tclRegexp.c | 169 |
1 files changed, 77 insertions, 92 deletions
diff --git a/generic/tclRegexp.c b/generic/tclRegexp.c index a823af5..dac6aba 100644 --- a/generic/tclRegexp.c +++ b/generic/tclRegexp.c @@ -4,8 +4,8 @@ * This file contains the public interfaces to the Tcl regular expression * mechanism. * - * Copyright © 1998 Sun Microsystems, Inc. - * Copyright © 1998-1999 Scriptics Corporation. + * Copyright (c) 1998 by Sun Microsystems, Inc. + * Copyright (c) 1998-1999 by Scriptics Corporation. * * See the file "license.terms" for information on usage and redistribution of * this file, and for a DISCLAIMER OF ALL WARRANTIES. @@ -13,7 +13,6 @@ #include "tclInt.h" #include "tclRegexp.h" -#include <assert.h> /* *---------------------------------------------------------------------- @@ -26,7 +25,7 @@ * regex.h regexec.c regfree.c * regfronts.c regguts.h * - * Copyright © 1998 Henry Spencer. All rights reserved. + * Copyright (c) 1998 Henry Spencer. All rights reserved. * * Development of this software was funded, in part, by Cray Research Inc., * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics @@ -54,8 +53,8 @@ * * *** NOTE: this code has been altered slightly for use in Tcl: *** * *** 1. Names have been changed, e.g. from re_comp to *** - * *** TclRegComp, to avoid clashes with other *** - * *** regexp implementations used by applications. *** + * *** TclRegComp, to avoid clashes with other *** + * *** regexp implementations used by applications. *** */ /* @@ -65,7 +64,7 @@ #define NUM_REGEXPS 30 -typedef struct { +typedef struct ThreadSpecificData { int initialized; /* Set to 1 when the module is initialized. */ char *patterns[NUM_REGEXPS];/* Strings corresponding to compiled regular * expression patterns. NULL means that this @@ -101,30 +100,13 @@ static int SetRegexpFromAny(Tcl_Interp *interp, Tcl_Obj *objPtr); * compiled form of the regular expression. */ -const Tcl_ObjType tclRegexpType = { +Tcl_ObjType tclRegexpType = { "regexp", /* name */ FreeRegexpInternalRep, /* freeIntRepProc */ DupRegexpInternalRep, /* dupIntRepProc */ NULL, /* updateStringProc */ SetRegexpFromAny /* setFromAnyProc */ }; - -#define RegexpSetInternalRep(objPtr, rePtr) \ - do { \ - Tcl_ObjInternalRep ir; \ - (rePtr)->refCount++; \ - ir.twoPtrValue.ptr1 = (rePtr); \ - ir.twoPtrValue.ptr2 = NULL; \ - Tcl_StoreInternalRep((objPtr), &tclRegexpType, &ir); \ - } while (0) - -#define RegexpGetInternalRep(objPtr, rePtr) \ - do { \ - const Tcl_ObjInternalRep *irPtr; \ - irPtr = TclFetchInternalRep((objPtr), &tclRegexpType); \ - (rePtr) = irPtr ? (TclRegexp *)irPtr->twoPtrValue.ptr1 : NULL; \ - } while (0) - /* *---------------------------------------------------------------------- @@ -191,7 +173,7 @@ Tcl_RegExpExec( * that "^" won't match. */ { int flags, result, numChars; - TclRegexp *regexp = (TclRegexp *) re; + TclRegexp *regexp = (TclRegexp *)re; Tcl_DString ds; const Tcl_UniChar *ustr; @@ -263,7 +245,7 @@ Tcl_RegExpRange( if ((size_t) index > regexpPtr->re.re_nsub) { *startPtr = *endPtr = NULL; - } else if (regexpPtr->matches[index].rm_so == TCL_INDEX_NONE) { + } else if (regexpPtr->matches[index].rm_so < 0) { *startPtr = *endPtr = NULL; } else { if (regexpPtr->objPtr) { @@ -271,8 +253,8 @@ Tcl_RegExpRange( } else { string = regexpPtr->string; } - *startPtr = TclUtfAtIndex(string, regexpPtr->matches[index].rm_so); - *endPtr = TclUtfAtIndex(string, regexpPtr->matches[index].rm_eo); + *startPtr = Tcl_UtfAtIndex(string, regexpPtr->matches[index].rm_so); + *endPtr = Tcl_UtfAtIndex(string, regexpPtr->matches[index].rm_eo); } } @@ -364,7 +346,7 @@ TclRegExpRangeUniChar( * passed to Tcl_RegExpExec. */ int index, /* 0 means give the range of the entire match, * > 0 means give the range of a matching - * subrange, TCL_INDEX_NONE means the range of the + * subrange, -1 means the range of the * rm_extend field. */ int *startPtr, /* Store address of first character in * (sub-)range here. */ @@ -373,12 +355,12 @@ TclRegExpRangeUniChar( { TclRegexp *regexpPtr = (TclRegexp *) re; - if ((regexpPtr->flags®_EXPECT) && (index == TCL_INDEX_NONE)) { + if ((regexpPtr->flags®_EXPECT) && index == -1) { *startPtr = regexpPtr->details.rm_extend.rm_so; *endPtr = regexpPtr->details.rm_extend.rm_eo; } else if ((size_t) index > regexpPtr->re.re_nsub) { - *startPtr = TCL_INDEX_NONE; - *endPtr = TCL_INDEX_NONE; + *startPtr = -1; + *endPtr = -1; } else { *startPtr = regexpPtr->matches[index].rm_so; *endPtr = regexpPtr->matches[index].rm_eo; @@ -409,8 +391,9 @@ Tcl_RegExpMatch( const char *text, /* Text to search for pattern matches. */ const char *pattern) /* Regular expression to match against text. */ { - Tcl_RegExp re = Tcl_RegExpCompile(interp, pattern); + Tcl_RegExp re; + re = Tcl_RegExpCompile(interp, pattern); if (re == NULL) { return -1; } @@ -453,8 +436,7 @@ Tcl_RegExpExecObj( Tcl_UniChar *udata; int length; int reflags = regexpPtr->flags; -#define TCL_REG_GLOBOK_FLAGS \ - (TCL_REG_ADVANCED | TCL_REG_NOSUB | TCL_REG_NOCASE) +#define TCL_REG_GLOBOK_FLAGS (TCL_REG_ADVANCED | TCL_REG_NOSUB | TCL_REG_NOCASE) /* * Take advantage of the equivalent glob pattern, if one exists. @@ -482,7 +464,7 @@ Tcl_RegExpExecObj( regexpPtr->string = NULL; regexpPtr->objPtr = textObj; - udata = TclGetUnicodeFromObj(textObj, &length); + udata = Tcl_GetUnicodeFromObj(textObj, &length); if (offset > length) { offset = length; @@ -520,16 +502,9 @@ Tcl_RegExpMatchObj( { Tcl_RegExp re; - /* - * For performance reasons, first try compiling the RE without support for - * subexpressions. On failure, try again without TCL_REG_NOSUB in case the - * RE has backreferences in it. Closely related to [Bug 1366683]. If this - * still fails, an error message will be left in the interpreter. - */ - - if (!(re = Tcl_GetRegExpFromObj(interp, patternObj, - TCL_REG_ADVANCED | TCL_REG_NOSUB)) - && !(re = Tcl_GetRegExpFromObj(interp, patternObj, TCL_REG_ADVANCED))) { + re = Tcl_GetRegExpFromObj(interp, patternObj, + TCL_REG_ADVANCED | TCL_REG_NOSUB); + if (re == NULL) { return -1; } return Tcl_RegExpExecObj(interp, re, textObj, 0 /* offset */, @@ -596,11 +571,16 @@ Tcl_GetRegExpFromObj( { int length; TclRegexp *regexpPtr; - const char *pattern; + char *pattern; - RegexpGetInternalRep(objPtr, regexpPtr); + /* + * This is OK because we only actually interpret this value properly as a + * TclRegexp* when the type is tclRegexpType. + */ + + regexpPtr = (TclRegexp *) objPtr->internalRep.twoPtrValue.ptr1; - if ((regexpPtr == NULL) || (regexpPtr->flags != flags)) { + if ((objPtr->typePtr != &tclRegexpType) || (regexpPtr->flags != flags)) { pattern = TclGetStringFromObj(objPtr, &length); regexpPtr = CompileRegexp(interp, pattern, length, flags); @@ -608,7 +588,21 @@ Tcl_GetRegExpFromObj( return NULL; } - RegexpSetInternalRep(objPtr, regexpPtr); + /* + * Add a reference to the regexp so it will persist even if it is + * pushed out of the current thread's regexp cache. This reference + * will be removed when the object's internal rep is freed. + */ + + regexpPtr->refCount++; + + /* + * Free the old representation and set our type. + */ + + TclFreeIntRep(objPtr); + objPtr->internalRep.twoPtrValue.ptr1 = (void *) regexpPtr; + objPtr->typePtr = &tclRegexpType; } return (Tcl_RegExp) regexpPtr; } @@ -660,7 +654,7 @@ TclRegAbout( {0, NULL} }; const struct infoname *inf; - Tcl_Obj *infoObj, *resultObj; + Tcl_Obj *infoObj; /* * The reset here guarantees that the interpreter result is empty and @@ -676,9 +670,8 @@ TclRegAbout( * well and Tcl has other limits that constrain things as well... */ - TclNewObj(resultObj); - TclNewIndexObj(infoObj, regexpPtr->re.re_nsub); - Tcl_ListObjAppendElement(NULL, resultObj, infoObj); + Tcl_ListObjAppendElement(NULL, Tcl_GetObjResult(interp), + Tcl_NewIntObj((int) regexpPtr->re.re_nsub)); /* * Now append a list of all the bit-flags set for the RE. @@ -691,8 +684,7 @@ TclRegAbout( Tcl_NewStringObj(inf->text, -1)); } } - Tcl_ListObjAppendElement(NULL, resultObj, infoObj); - Tcl_SetObjResult(interp, resultObj); + Tcl_ListObjAppendElement(NULL, Tcl_GetObjResult(interp), infoObj); return 0; } @@ -720,18 +712,18 @@ TclRegError( int status) /* Status code to report. */ { char buf[100]; /* ample in practice */ - char cbuf[TCL_INTEGER_SPACE]; + char cbuf[100]; /* lots in practice */ size_t n; const char *p; Tcl_ResetResult(interp); - n = TclReError(status, buf, sizeof(buf)); + n = TclReError(status, NULL, buf, sizeof(buf)); p = (n > sizeof(buf)) ? "..." : ""; - Tcl_SetObjResult(interp, Tcl_ObjPrintf("%s%s%s", msg, buf, p)); + Tcl_AppendResult(interp, msg, buf, p, NULL); - snprintf(cbuf, sizeof(cbuf), "%d", status); - (void) TclReError(REG_ITOA, cbuf, sizeof(cbuf)); - Tcl_SetErrorCode(interp, "REGEXP", cbuf, buf, (void *)NULL); + sprintf(cbuf, "%d", status); + (void) TclReError(REG_ITOA, NULL, cbuf, sizeof(cbuf)); + Tcl_SetErrorCode(interp, "REGEXP", cbuf, buf, NULL); } /* @@ -755,19 +747,16 @@ static void FreeRegexpInternalRep( Tcl_Obj *objPtr) /* Regexp object with internal rep to free. */ { - TclRegexp *regexpRepPtr; - - RegexpGetInternalRep(objPtr, regexpRepPtr); - - assert(regexpRepPtr != NULL); + TclRegexp *regexpRepPtr = (TclRegexp *) objPtr->internalRep.twoPtrValue.ptr1; /* * If this is the last reference to the regexp, free it. */ - if (regexpRepPtr->refCount-- <= 1) { + if (--(regexpRepPtr->refCount) <= 0) { FreeRegexp(regexpRepPtr); } + objPtr->typePtr = NULL; } /* @@ -792,13 +781,11 @@ DupRegexpInternalRep( Tcl_Obj *srcPtr, /* Object with internal rep to copy. */ Tcl_Obj *copyPtr) /* Object with internal rep to set. */ { - TclRegexp *regexpPtr; - - RegexpGetInternalRep(srcPtr, regexpPtr); - - assert(regexpPtr != NULL); + TclRegexp *regexpPtr = (TclRegexp *) srcPtr->internalRep.twoPtrValue.ptr1; - RegexpSetInternalRep(copyPtr, regexpPtr); + regexpPtr->refCount++; + copyPtr->internalRep.twoPtrValue.ptr1 = srcPtr->internalRep.twoPtrValue.ptr1; + copyPtr->typePtr = &tclRegexpType; } /* @@ -916,7 +903,7 @@ CompileRegexp( * This is a new expression, so compile it and add it to the cache. */ - regexpPtr = (TclRegexp*)ckalloc(sizeof(TclRegexp)); + regexpPtr = (TclRegexp *) ckalloc(sizeof(TclRegexp)); regexpPtr->objPtr = NULL; regexpPtr->string = NULL; regexpPtr->details.rm_extend.rm_so = -1; @@ -943,7 +930,7 @@ CompileRegexp( * Clean up and report errors in the interpreter, if possible. */ - ckfree(regexpPtr); + ckfree((char *)regexpPtr); if (interp) { TclRegError(interp, "couldn't compile regular expression pattern: ", status); @@ -957,10 +944,11 @@ CompileRegexp( * Tcl_RegExpExecObj to optionally do a fast match (avoids RE engine). */ - if (TclReToGlob(NULL, string, length, &stringBuf, &exact, - NULL) == TCL_OK) { - regexpPtr->globObjPtr = Tcl_DStringToObj(&stringBuf); + if (TclReToGlob(NULL, string, length, &stringBuf, &exact) == TCL_OK) { + regexpPtr->globObjPtr = Tcl_NewStringObj(Tcl_DStringValue(&stringBuf), + Tcl_DStringLength(&stringBuf)); Tcl_IncrRefCount(regexpPtr->globObjPtr); + Tcl_DStringFree(&stringBuf); } else { regexpPtr->globObjPtr = NULL; } @@ -970,8 +958,8 @@ CompileRegexp( * the entire pattern. */ - regexpPtr->matches = - (regmatch_t*)ckalloc(sizeof(regmatch_t) * (regexpPtr->re.re_nsub + 1)); + regexpPtr->matches = (regmatch_t *) ckalloc( + sizeof(regmatch_t) * (regexpPtr->re.re_nsub + 1)); /* * Initialize the refcount to one initially, since it is in the cache. @@ -986,8 +974,7 @@ CompileRegexp( if (tsdPtr->patterns[NUM_REGEXPS-1] != NULL) { TclRegexp *oldRegexpPtr = tsdPtr->regexps[NUM_REGEXPS-1]; - - if (oldRegexpPtr->refCount-- <= 1) { + if (--(oldRegexpPtr->refCount) <= 0) { FreeRegexp(oldRegexpPtr); } ckfree(tsdPtr->patterns[NUM_REGEXPS-1]); @@ -997,8 +984,8 @@ CompileRegexp( tsdPtr->patLengths[i+1] = tsdPtr->patLengths[i]; tsdPtr->regexps[i+1] = tsdPtr->regexps[i]; } - tsdPtr->patterns[0] = (char *)ckalloc(length + 1); - memcpy(tsdPtr->patterns[0], string, length + 1); + tsdPtr->patterns[0] = (char *) ckalloc((unsigned) (length+1)); + strcpy(tsdPtr->patterns[0], string); tsdPtr->patLengths[0] = length; tsdPtr->regexps[0] = regexpPtr; @@ -1030,9 +1017,9 @@ FreeRegexp( TclDecrRefCount(regexpPtr->globObjPtr); } if (regexpPtr->matches) { - ckfree(regexpPtr->matches); + ckfree((char *) regexpPtr->matches); } - ckfree(regexpPtr); + ckfree((char *) regexpPtr); } /* @@ -1053,7 +1040,7 @@ FreeRegexp( static void FinalizeRegexp( - TCL_UNUSED(ClientData)) + ClientData clientData) /* Not used. */ { int i; TclRegexp *regexpPtr; @@ -1061,18 +1048,16 @@ FinalizeRegexp( for (i = 0; (i < NUM_REGEXPS) && (tsdPtr->patterns[i] != NULL); i++) { regexpPtr = tsdPtr->regexps[i]; - if (regexpPtr->refCount-- <= 1) { + if (--(regexpPtr->refCount) <= 0) { FreeRegexp(regexpPtr); } ckfree(tsdPtr->patterns[i]); tsdPtr->patterns[i] = NULL; } - /* * We may find ourselves reinitialized if another finalization routine * invokes regexps. */ - tsdPtr->initialized = 0; } |
