summaryrefslogtreecommitdiffstats
path: root/generic/tclRegexp.c
diff options
context:
space:
mode:
Diffstat (limited to 'generic/tclRegexp.c')
-rw-r--r--generic/tclRegexp.c169
1 files changed, 77 insertions, 92 deletions
diff --git a/generic/tclRegexp.c b/generic/tclRegexp.c
index a823af5..dac6aba 100644
--- a/generic/tclRegexp.c
+++ b/generic/tclRegexp.c
@@ -4,8 +4,8 @@
* This file contains the public interfaces to the Tcl regular expression
* mechanism.
*
- * Copyright © 1998 Sun Microsystems, Inc.
- * Copyright © 1998-1999 Scriptics Corporation.
+ * Copyright (c) 1998 by Sun Microsystems, Inc.
+ * Copyright (c) 1998-1999 by Scriptics Corporation.
*
* See the file "license.terms" for information on usage and redistribution of
* this file, and for a DISCLAIMER OF ALL WARRANTIES.
@@ -13,7 +13,6 @@
#include "tclInt.h"
#include "tclRegexp.h"
-#include <assert.h>
/*
*----------------------------------------------------------------------
@@ -26,7 +25,7 @@
* regex.h regexec.c regfree.c
* regfronts.c regguts.h
*
- * Copyright © 1998 Henry Spencer. All rights reserved.
+ * Copyright (c) 1998 Henry Spencer. All rights reserved.
*
* Development of this software was funded, in part, by Cray Research Inc.,
* UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
@@ -54,8 +53,8 @@
*
* *** NOTE: this code has been altered slightly for use in Tcl: ***
* *** 1. Names have been changed, e.g. from re_comp to ***
- * *** TclRegComp, to avoid clashes with other ***
- * *** regexp implementations used by applications. ***
+ * *** TclRegComp, to avoid clashes with other ***
+ * *** regexp implementations used by applications. ***
*/
/*
@@ -65,7 +64,7 @@
#define NUM_REGEXPS 30
-typedef struct {
+typedef struct ThreadSpecificData {
int initialized; /* Set to 1 when the module is initialized. */
char *patterns[NUM_REGEXPS];/* Strings corresponding to compiled regular
* expression patterns. NULL means that this
@@ -101,30 +100,13 @@ static int SetRegexpFromAny(Tcl_Interp *interp, Tcl_Obj *objPtr);
* compiled form of the regular expression.
*/
-const Tcl_ObjType tclRegexpType = {
+Tcl_ObjType tclRegexpType = {
"regexp", /* name */
FreeRegexpInternalRep, /* freeIntRepProc */
DupRegexpInternalRep, /* dupIntRepProc */
NULL, /* updateStringProc */
SetRegexpFromAny /* setFromAnyProc */
};
-
-#define RegexpSetInternalRep(objPtr, rePtr) \
- do { \
- Tcl_ObjInternalRep ir; \
- (rePtr)->refCount++; \
- ir.twoPtrValue.ptr1 = (rePtr); \
- ir.twoPtrValue.ptr2 = NULL; \
- Tcl_StoreInternalRep((objPtr), &tclRegexpType, &ir); \
- } while (0)
-
-#define RegexpGetInternalRep(objPtr, rePtr) \
- do { \
- const Tcl_ObjInternalRep *irPtr; \
- irPtr = TclFetchInternalRep((objPtr), &tclRegexpType); \
- (rePtr) = irPtr ? (TclRegexp *)irPtr->twoPtrValue.ptr1 : NULL; \
- } while (0)
-
/*
*----------------------------------------------------------------------
@@ -191,7 +173,7 @@ Tcl_RegExpExec(
* that "^" won't match. */
{
int flags, result, numChars;
- TclRegexp *regexp = (TclRegexp *) re;
+ TclRegexp *regexp = (TclRegexp *)re;
Tcl_DString ds;
const Tcl_UniChar *ustr;
@@ -263,7 +245,7 @@ Tcl_RegExpRange(
if ((size_t) index > regexpPtr->re.re_nsub) {
*startPtr = *endPtr = NULL;
- } else if (regexpPtr->matches[index].rm_so == TCL_INDEX_NONE) {
+ } else if (regexpPtr->matches[index].rm_so < 0) {
*startPtr = *endPtr = NULL;
} else {
if (regexpPtr->objPtr) {
@@ -271,8 +253,8 @@ Tcl_RegExpRange(
} else {
string = regexpPtr->string;
}
- *startPtr = TclUtfAtIndex(string, regexpPtr->matches[index].rm_so);
- *endPtr = TclUtfAtIndex(string, regexpPtr->matches[index].rm_eo);
+ *startPtr = Tcl_UtfAtIndex(string, regexpPtr->matches[index].rm_so);
+ *endPtr = Tcl_UtfAtIndex(string, regexpPtr->matches[index].rm_eo);
}
}
@@ -364,7 +346,7 @@ TclRegExpRangeUniChar(
* passed to Tcl_RegExpExec. */
int index, /* 0 means give the range of the entire match,
* > 0 means give the range of a matching
- * subrange, TCL_INDEX_NONE means the range of the
+ * subrange, -1 means the range of the
* rm_extend field. */
int *startPtr, /* Store address of first character in
* (sub-)range here. */
@@ -373,12 +355,12 @@ TclRegExpRangeUniChar(
{
TclRegexp *regexpPtr = (TclRegexp *) re;
- if ((regexpPtr->flags&REG_EXPECT) && (index == TCL_INDEX_NONE)) {
+ if ((regexpPtr->flags&REG_EXPECT) && index == -1) {
*startPtr = regexpPtr->details.rm_extend.rm_so;
*endPtr = regexpPtr->details.rm_extend.rm_eo;
} else if ((size_t) index > regexpPtr->re.re_nsub) {
- *startPtr = TCL_INDEX_NONE;
- *endPtr = TCL_INDEX_NONE;
+ *startPtr = -1;
+ *endPtr = -1;
} else {
*startPtr = regexpPtr->matches[index].rm_so;
*endPtr = regexpPtr->matches[index].rm_eo;
@@ -409,8 +391,9 @@ Tcl_RegExpMatch(
const char *text, /* Text to search for pattern matches. */
const char *pattern) /* Regular expression to match against text. */
{
- Tcl_RegExp re = Tcl_RegExpCompile(interp, pattern);
+ Tcl_RegExp re;
+ re = Tcl_RegExpCompile(interp, pattern);
if (re == NULL) {
return -1;
}
@@ -453,8 +436,7 @@ Tcl_RegExpExecObj(
Tcl_UniChar *udata;
int length;
int reflags = regexpPtr->flags;
-#define TCL_REG_GLOBOK_FLAGS \
- (TCL_REG_ADVANCED | TCL_REG_NOSUB | TCL_REG_NOCASE)
+#define TCL_REG_GLOBOK_FLAGS (TCL_REG_ADVANCED | TCL_REG_NOSUB | TCL_REG_NOCASE)
/*
* Take advantage of the equivalent glob pattern, if one exists.
@@ -482,7 +464,7 @@ Tcl_RegExpExecObj(
regexpPtr->string = NULL;
regexpPtr->objPtr = textObj;
- udata = TclGetUnicodeFromObj(textObj, &length);
+ udata = Tcl_GetUnicodeFromObj(textObj, &length);
if (offset > length) {
offset = length;
@@ -520,16 +502,9 @@ Tcl_RegExpMatchObj(
{
Tcl_RegExp re;
- /*
- * For performance reasons, first try compiling the RE without support for
- * subexpressions. On failure, try again without TCL_REG_NOSUB in case the
- * RE has backreferences in it. Closely related to [Bug 1366683]. If this
- * still fails, an error message will be left in the interpreter.
- */
-
- if (!(re = Tcl_GetRegExpFromObj(interp, patternObj,
- TCL_REG_ADVANCED | TCL_REG_NOSUB))
- && !(re = Tcl_GetRegExpFromObj(interp, patternObj, TCL_REG_ADVANCED))) {
+ re = Tcl_GetRegExpFromObj(interp, patternObj,
+ TCL_REG_ADVANCED | TCL_REG_NOSUB);
+ if (re == NULL) {
return -1;
}
return Tcl_RegExpExecObj(interp, re, textObj, 0 /* offset */,
@@ -596,11 +571,16 @@ Tcl_GetRegExpFromObj(
{
int length;
TclRegexp *regexpPtr;
- const char *pattern;
+ char *pattern;
- RegexpGetInternalRep(objPtr, regexpPtr);
+ /*
+ * This is OK because we only actually interpret this value properly as a
+ * TclRegexp* when the type is tclRegexpType.
+ */
+
+ regexpPtr = (TclRegexp *) objPtr->internalRep.twoPtrValue.ptr1;
- if ((regexpPtr == NULL) || (regexpPtr->flags != flags)) {
+ if ((objPtr->typePtr != &tclRegexpType) || (regexpPtr->flags != flags)) {
pattern = TclGetStringFromObj(objPtr, &length);
regexpPtr = CompileRegexp(interp, pattern, length, flags);
@@ -608,7 +588,21 @@ Tcl_GetRegExpFromObj(
return NULL;
}
- RegexpSetInternalRep(objPtr, regexpPtr);
+ /*
+ * Add a reference to the regexp so it will persist even if it is
+ * pushed out of the current thread's regexp cache. This reference
+ * will be removed when the object's internal rep is freed.
+ */
+
+ regexpPtr->refCount++;
+
+ /*
+ * Free the old representation and set our type.
+ */
+
+ TclFreeIntRep(objPtr);
+ objPtr->internalRep.twoPtrValue.ptr1 = (void *) regexpPtr;
+ objPtr->typePtr = &tclRegexpType;
}
return (Tcl_RegExp) regexpPtr;
}
@@ -660,7 +654,7 @@ TclRegAbout(
{0, NULL}
};
const struct infoname *inf;
- Tcl_Obj *infoObj, *resultObj;
+ Tcl_Obj *infoObj;
/*
* The reset here guarantees that the interpreter result is empty and
@@ -676,9 +670,8 @@ TclRegAbout(
* well and Tcl has other limits that constrain things as well...
*/
- TclNewObj(resultObj);
- TclNewIndexObj(infoObj, regexpPtr->re.re_nsub);
- Tcl_ListObjAppendElement(NULL, resultObj, infoObj);
+ Tcl_ListObjAppendElement(NULL, Tcl_GetObjResult(interp),
+ Tcl_NewIntObj((int) regexpPtr->re.re_nsub));
/*
* Now append a list of all the bit-flags set for the RE.
@@ -691,8 +684,7 @@ TclRegAbout(
Tcl_NewStringObj(inf->text, -1));
}
}
- Tcl_ListObjAppendElement(NULL, resultObj, infoObj);
- Tcl_SetObjResult(interp, resultObj);
+ Tcl_ListObjAppendElement(NULL, Tcl_GetObjResult(interp), infoObj);
return 0;
}
@@ -720,18 +712,18 @@ TclRegError(
int status) /* Status code to report. */
{
char buf[100]; /* ample in practice */
- char cbuf[TCL_INTEGER_SPACE];
+ char cbuf[100]; /* lots in practice */
size_t n;
const char *p;
Tcl_ResetResult(interp);
- n = TclReError(status, buf, sizeof(buf));
+ n = TclReError(status, NULL, buf, sizeof(buf));
p = (n > sizeof(buf)) ? "..." : "";
- Tcl_SetObjResult(interp, Tcl_ObjPrintf("%s%s%s", msg, buf, p));
+ Tcl_AppendResult(interp, msg, buf, p, NULL);
- snprintf(cbuf, sizeof(cbuf), "%d", status);
- (void) TclReError(REG_ITOA, cbuf, sizeof(cbuf));
- Tcl_SetErrorCode(interp, "REGEXP", cbuf, buf, (void *)NULL);
+ sprintf(cbuf, "%d", status);
+ (void) TclReError(REG_ITOA, NULL, cbuf, sizeof(cbuf));
+ Tcl_SetErrorCode(interp, "REGEXP", cbuf, buf, NULL);
}
/*
@@ -755,19 +747,16 @@ static void
FreeRegexpInternalRep(
Tcl_Obj *objPtr) /* Regexp object with internal rep to free. */
{
- TclRegexp *regexpRepPtr;
-
- RegexpGetInternalRep(objPtr, regexpRepPtr);
-
- assert(regexpRepPtr != NULL);
+ TclRegexp *regexpRepPtr = (TclRegexp *) objPtr->internalRep.twoPtrValue.ptr1;
/*
* If this is the last reference to the regexp, free it.
*/
- if (regexpRepPtr->refCount-- <= 1) {
+ if (--(regexpRepPtr->refCount) <= 0) {
FreeRegexp(regexpRepPtr);
}
+ objPtr->typePtr = NULL;
}
/*
@@ -792,13 +781,11 @@ DupRegexpInternalRep(
Tcl_Obj *srcPtr, /* Object with internal rep to copy. */
Tcl_Obj *copyPtr) /* Object with internal rep to set. */
{
- TclRegexp *regexpPtr;
-
- RegexpGetInternalRep(srcPtr, regexpPtr);
-
- assert(regexpPtr != NULL);
+ TclRegexp *regexpPtr = (TclRegexp *) srcPtr->internalRep.twoPtrValue.ptr1;
- RegexpSetInternalRep(copyPtr, regexpPtr);
+ regexpPtr->refCount++;
+ copyPtr->internalRep.twoPtrValue.ptr1 = srcPtr->internalRep.twoPtrValue.ptr1;
+ copyPtr->typePtr = &tclRegexpType;
}
/*
@@ -916,7 +903,7 @@ CompileRegexp(
* This is a new expression, so compile it and add it to the cache.
*/
- regexpPtr = (TclRegexp*)ckalloc(sizeof(TclRegexp));
+ regexpPtr = (TclRegexp *) ckalloc(sizeof(TclRegexp));
regexpPtr->objPtr = NULL;
regexpPtr->string = NULL;
regexpPtr->details.rm_extend.rm_so = -1;
@@ -943,7 +930,7 @@ CompileRegexp(
* Clean up and report errors in the interpreter, if possible.
*/
- ckfree(regexpPtr);
+ ckfree((char *)regexpPtr);
if (interp) {
TclRegError(interp,
"couldn't compile regular expression pattern: ", status);
@@ -957,10 +944,11 @@ CompileRegexp(
* Tcl_RegExpExecObj to optionally do a fast match (avoids RE engine).
*/
- if (TclReToGlob(NULL, string, length, &stringBuf, &exact,
- NULL) == TCL_OK) {
- regexpPtr->globObjPtr = Tcl_DStringToObj(&stringBuf);
+ if (TclReToGlob(NULL, string, length, &stringBuf, &exact) == TCL_OK) {
+ regexpPtr->globObjPtr = Tcl_NewStringObj(Tcl_DStringValue(&stringBuf),
+ Tcl_DStringLength(&stringBuf));
Tcl_IncrRefCount(regexpPtr->globObjPtr);
+ Tcl_DStringFree(&stringBuf);
} else {
regexpPtr->globObjPtr = NULL;
}
@@ -970,8 +958,8 @@ CompileRegexp(
* the entire pattern.
*/
- regexpPtr->matches =
- (regmatch_t*)ckalloc(sizeof(regmatch_t) * (regexpPtr->re.re_nsub + 1));
+ regexpPtr->matches = (regmatch_t *) ckalloc(
+ sizeof(regmatch_t) * (regexpPtr->re.re_nsub + 1));
/*
* Initialize the refcount to one initially, since it is in the cache.
@@ -986,8 +974,7 @@ CompileRegexp(
if (tsdPtr->patterns[NUM_REGEXPS-1] != NULL) {
TclRegexp *oldRegexpPtr = tsdPtr->regexps[NUM_REGEXPS-1];
-
- if (oldRegexpPtr->refCount-- <= 1) {
+ if (--(oldRegexpPtr->refCount) <= 0) {
FreeRegexp(oldRegexpPtr);
}
ckfree(tsdPtr->patterns[NUM_REGEXPS-1]);
@@ -997,8 +984,8 @@ CompileRegexp(
tsdPtr->patLengths[i+1] = tsdPtr->patLengths[i];
tsdPtr->regexps[i+1] = tsdPtr->regexps[i];
}
- tsdPtr->patterns[0] = (char *)ckalloc(length + 1);
- memcpy(tsdPtr->patterns[0], string, length + 1);
+ tsdPtr->patterns[0] = (char *) ckalloc((unsigned) (length+1));
+ strcpy(tsdPtr->patterns[0], string);
tsdPtr->patLengths[0] = length;
tsdPtr->regexps[0] = regexpPtr;
@@ -1030,9 +1017,9 @@ FreeRegexp(
TclDecrRefCount(regexpPtr->globObjPtr);
}
if (regexpPtr->matches) {
- ckfree(regexpPtr->matches);
+ ckfree((char *) regexpPtr->matches);
}
- ckfree(regexpPtr);
+ ckfree((char *) regexpPtr);
}
/*
@@ -1053,7 +1040,7 @@ FreeRegexp(
static void
FinalizeRegexp(
- TCL_UNUSED(ClientData))
+ ClientData clientData) /* Not used. */
{
int i;
TclRegexp *regexpPtr;
@@ -1061,18 +1048,16 @@ FinalizeRegexp(
for (i = 0; (i < NUM_REGEXPS) && (tsdPtr->patterns[i] != NULL); i++) {
regexpPtr = tsdPtr->regexps[i];
- if (regexpPtr->refCount-- <= 1) {
+ if (--(regexpPtr->refCount) <= 0) {
FreeRegexp(regexpPtr);
}
ckfree(tsdPtr->patterns[i]);
tsdPtr->patterns[i] = NULL;
}
-
/*
* We may find ourselves reinitialized if another finalization routine
* invokes regexps.
*/
-
tsdPtr->initialized = 0;
}