summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorhobbs <hobbs>2007-11-12 02:07:18 (GMT)
committerhobbs <hobbs>2007-11-12 02:07:18 (GMT)
commitcf8a7199f105edc95e59373e098af6eb47d22a16 (patch)
treec7f005156cbe08f8e11d6845d4a71f991dc5b488
parent094b6f7ae513ec561543276d7659f3a8b2a5b853 (diff)
downloadtcl-cf8a7199f105edc95e59373e098af6eb47d22a16.zip
tcl-cf8a7199f105edc95e59373e098af6eb47d22a16.tar.gz
tcl-cf8a7199f105edc95e59373e098af6eb47d22a16.tar.bz2
* generic/tclCompCmds.c, generic/tclCompile.c, generic/tclCompile.h:
* generic/tclExecute.c, generic/tclInt.decls, generic/tclIntDecls.h: * generic/tclRegexp.c, generic/tclRegexp.h: Add INST_REGEXP and fully * generic/tclStubInit.c, generic/tclUtil.c: compiled [regexp] for the * tests/regexpComp.test: [Bug 1830166] simple cases. Also added TclReToGlob function to convert RE to glob patterns and use these in the possible cases.
-rw-r--r--ChangeLog10
-rw-r--r--generic/tclCompCmds.c128
-rw-r--r--generic/tclCompile.c5
-rw-r--r--generic/tclCompile.h8
-rw-r--r--generic/tclExecute.c35
-rw-r--r--generic/tclInt.decls10
-rw-r--r--generic/tclIntDecls.h14
-rw-r--r--generic/tclRegexp.c61
-rw-r--r--generic/tclRegexp.h3
-rw-r--r--generic/tclStubInit.c3
-rw-r--r--generic/tclUtil.c186
-rw-r--r--tests/regexpComp.test76
12 files changed, 436 insertions, 103 deletions
diff --git a/ChangeLog b/ChangeLog
index 5e8bffb..aaf06cd 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,13 @@
+2007-11-11 Jeff Hobbs <jeffh@ActiveState.com>
+
+ * generic/tclCompCmds.c, generic/tclCompile.c, generic/tclCompile.h:
+ * generic/tclExecute.c, generic/tclInt.decls, generic/tclIntDecls.h:
+ * generic/tclRegexp.c, generic/tclRegexp.h: Add INST_REGEXP and fully
+ * generic/tclStubInit.c, generic/tclUtil.c: compiled [regexp] for the
+ * tests/regexpComp.test: [Bug 1830166] simple cases. Also
+ added TclReToGlob function to convert RE to glob patterns and use
+ these in the possible cases.
+
2007-11-10 Miguel Sofer <msofer@users.sf.net>
* generic/tclResult.c (ResetObjResult): clarify the logic.
diff --git a/generic/tclCompCmds.c b/generic/tclCompCmds.c
index 035dd24..6179190 100644
--- a/generic/tclCompCmds.c
+++ b/generic/tclCompCmds.c
@@ -12,7 +12,7 @@
* See the file "license.terms" for information on usage and redistribution of
* this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclCompCmds.c,v 1.122 2007/11/11 19:32:14 msofer Exp $
+ * RCS: @(#) $Id: tclCompCmds.c,v 1.123 2007/11/12 02:07:19 hobbs Exp $
*/
#include "tclInt.h"
@@ -2883,7 +2883,7 @@ TclCompileRegexpCmd(
{
Tcl_Token *varTokenPtr; /* Pointer to the Tcl_Token representing the
* parse of the RE or string. */
- int i, len, nocase, anchorLeft, anchorRight, start;
+ int i, len, nocase, exact, sawLast, simple;
char *str;
DefineLineInformation; /* TIP #280 */
@@ -2898,7 +2898,9 @@ TclCompileRegexpCmd(
return TCL_ERROR;
}
+ simple = 0;
nocase = 0;
+ sawLast = 0;
varTokenPtr = parsePtr->tokenPtr;
/*
@@ -2919,6 +2921,7 @@ TclCompileRegexpCmd(
str = (char *) varTokenPtr[1].start;
len = varTokenPtr[1].size;
if ((len == 2) && (str[0] == '-') && (str[1] == '-')) {
+ sawLast++;
i++;
break;
} else if ((len > 1) && (strncmp(str,"-nocase",(unsigned)len) == 0)) {
@@ -2946,102 +2949,41 @@ TclCompileRegexpCmd(
*/
varTokenPtr = TokenAfter(varTokenPtr);
- str = (char *) varTokenPtr[1].start;
- len = varTokenPtr[1].size;
- if ((varTokenPtr->type != TCL_TOKEN_SIMPLE_WORD) || (*str == '-')) {
- return TCL_ERROR;
- }
- if (len == 0) {
- /*
- * The semantics of regexp are always match on re == "".
- */
+ if (varTokenPtr->type == TCL_TOKEN_SIMPLE_WORD) {
+ Tcl_DString ds;
- PushLiteral(envPtr, "1", 1);
- return TCL_OK;
- }
-
- /*
- * Make a copy of the string that is null-terminated for checks which
- * require such.
- */
-
- str = (char *) TclStackAlloc(interp, (unsigned) len + 1);
- strncpy(str, varTokenPtr[1].start, (size_t) len);
- str[len] = '\0';
- start = 0;
-
- /*
- * Check for anchored REs (ie ^foo$), so we can use string equal if
- * possible. Do not alter the start of str so we can free it correctly.
- */
-
- if (str[0] == '^') {
- start++;
- anchorLeft = 1;
- } else {
- anchorLeft = 0;
- }
- if ((str[len-1] == '$') && ((len == 1) || (str[len-2] != '\\'))) {
- anchorRight = 1;
- str[--len] = '\0';
- } else {
- anchorRight = 0;
- }
-
- /*
- * On the first (pattern) arg, check to see if any RE special characters
- * are in the word. If not, this is the same as 'string equal'.
- */
-
- if ((len > 1+start) && (str[start] == '.') && (str[start+1] == '*')) {
- start += 2;
- anchorLeft = 0;
- }
- if ((len > 2+start) && (str[len-3] != '\\')
- && (str[len-2] == '.') && (str[len-1] == '*')) {
- len -= 2;
- str[len] = '\0';
- anchorRight = 0;
- }
+ simple = 1;
+ str = (char *) varTokenPtr[1].start;
+ len = varTokenPtr[1].size;
+ if ((*str == '-') && !sawLast) {
+ return TCL_ERROR;
+ }
- /*
- * Don't do anything with REs with other special chars. Also check if this
- * is a bad RE (do this at the end because it can be expensive). If so,
- * let it complain at runtime.
- */
+ if (len == 0) {
+ /*
+ * The semantics of regexp are always match on re == "".
+ */
- if ((strpbrk(str + start, "*+?{}()[].\\|^$") != NULL)
- || (Tcl_RegExpCompile(NULL, str) == NULL)) {
- TclStackFree(interp, str);
- return TCL_ERROR;
- }
+ PushLiteral(envPtr, "1", 1);
+ return TCL_OK;
+ }
- if (anchorLeft && anchorRight) {
- PushLiteral(envPtr, str+start, len-start);
- } else {
/*
- * This needs to find the substring anywhere in the string, so use
- * [string match] and *foo*, with appropriate anchoring.
+ * Attempt to convert pattern to glob. If successful, push the
+ * converted pattern.
*/
- char *newStr = TclStackAlloc(interp, (unsigned) len + 3);
-
- len -= start;
- if (anchorLeft) {
- strncpy(newStr, str + start, (size_t) len);
- } else {
- newStr[0] = '*';
- strncpy(newStr + 1, str + start, (size_t) len++);
- }
- if (!anchorRight) {
- newStr[len++] = '*';
+ if (TclReToGlob(NULL, varTokenPtr[1].start, len, &ds, &exact)
+ != TCL_OK) {
+ return TCL_ERROR;
}
- newStr[len] = '\0';
- PushLiteral(envPtr, newStr, len);
- TclStackFree(interp, newStr);
+
+ PushLiteral(envPtr, Tcl_DStringValue(&ds), Tcl_DStringLength(&ds));
+ Tcl_DStringFree(&ds);
+ } else {
+ CompileWord(envPtr, varTokenPtr, interp, parsePtr->numWords-2);
}
- TclStackFree(interp, str);
/*
* Push the string arg.
@@ -3050,10 +2992,14 @@ TclCompileRegexpCmd(
varTokenPtr = TokenAfter(varTokenPtr);
CompileWord(envPtr, varTokenPtr, interp, parsePtr->numWords-1);
- if (anchorLeft && anchorRight && !nocase) {
- TclEmitOpcode(INST_STR_EQ, envPtr);
+ if (simple) {
+ if (exact && !nocase) {
+ TclEmitOpcode(INST_STR_EQ, envPtr);
+ } else {
+ TclEmitInstInt1(INST_STR_MATCH, nocase, envPtr);
+ }
} else {
- TclEmitInstInt1(INST_STR_MATCH, nocase, envPtr);
+ TclEmitInstInt1(INST_REGEXP, nocase, envPtr);
}
return TCL_OK;
diff --git a/generic/tclCompile.c b/generic/tclCompile.c
index d66f356..ce9f7e1 100644
--- a/generic/tclCompile.c
+++ b/generic/tclCompile.c
@@ -11,7 +11,7 @@
* See the file "license.terms" for information on usage and redistribution of
* this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclCompile.c,v 1.137 2007/11/11 19:32:14 msofer Exp $
+ * RCS: @(#) $Id: tclCompile.c,v 1.138 2007/11/12 02:07:19 hobbs Exp $
*/
#include "tclInt.h"
@@ -385,6 +385,9 @@ InstructionDesc tclInstructionTable[] = {
/* Compiled bytecodes to signal syntax error. */
{"reverse", 5, 0, 1, {OPERAND_UINT4}},
/* Reverse the order of the arg elements at the top of stack */
+
+ {"regexp", 2, -1, 1, {OPERAND_INT1}},
+ /* Regexp: push (regexp stknext stktop) opnd == nocase */
{0}
};
diff --git a/generic/tclCompile.h b/generic/tclCompile.h
index 3e02b1d..618f704 100644
--- a/generic/tclCompile.h
+++ b/generic/tclCompile.h
@@ -9,7 +9,7 @@
* See the file "license.terms" for information on usage and redistribution of
* this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclCompile.h,v 1.82 2007/10/27 13:15:58 msofer Exp $
+ * RCS: @(#) $Id: tclCompile.h,v 1.83 2007/11/12 02:07:19 hobbs Exp $
*/
#ifndef _TCLCOMPILATION
@@ -636,8 +636,12 @@ typedef struct ByteCode {
#define INST_REVERSE 126
+/* regexp instruction */
+
+#define INST_REGEXP 127
+
/* The last opcode */
-#define LAST_INST_OPCODE 126
+#define LAST_INST_OPCODE 127
/*
* Table describing the Tcl bytecode instructions: their name (for displaying
diff --git a/generic/tclExecute.c b/generic/tclExecute.c
index cb72097..80247a6 100644
--- a/generic/tclExecute.c
+++ b/generic/tclExecute.c
@@ -13,7 +13,7 @@
* See the file "license.terms" for information on usage and redistribution of
* this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclExecute.c,v 1.345 2007/11/11 19:32:14 msofer Exp $
+ * RCS: @(#) $Id: tclExecute.c,v 1.346 2007/11/12 02:07:19 hobbs Exp $
*/
#include "tclInt.h"
@@ -4098,6 +4098,39 @@ TclExecuteByteCode(
NEXT_INST_F(2, 2, 1);
}
+ case INST_REGEXP: {
+ int nocase, match;
+ Tcl_Obj *valuePtr, *value2Ptr;
+ Tcl_RegExp regExpr;
+
+ nocase = TclGetInt1AtPtr(pc+1);
+ valuePtr = OBJ_AT_TOS; /* String */
+ value2Ptr = OBJ_UNDER_TOS; /* Pattern */
+
+ regExpr = Tcl_GetRegExpFromObj(interp, value2Ptr,
+ TCL_REG_ADVANCED | (nocase ? TCL_REG_NOCASE : 0));
+ if (regExpr == NULL) {
+ match = -1;
+ } else {
+ match = Tcl_RegExpExecObj(interp, regExpr, valuePtr, 0, 0, 0);
+ }
+
+ /*
+ * Adjustment is 2 due to the nocase byte
+ */
+
+ if (match < 0) {
+ objResultPtr = Tcl_GetObjResult(interp);
+ TRACE_WITH_OBJ(("%.20s %.20s => ERROR: ", O2S(valuePtr), O2S(value2Ptr)), objResultPtr);
+ result = TCL_ERROR;
+ goto checkForCatch;
+ } else {
+ TRACE(("%.20s %.20s => %d\n", O2S(valuePtr), O2S(value2Ptr), match));
+ objResultPtr = constants[match];
+ NEXT_INST_F(2, 2, 1);
+ }
+ }
+
case INST_EQ:
case INST_NEQ:
case INST_LT:
diff --git a/generic/tclInt.decls b/generic/tclInt.decls
index c3118df..97a1269 100644
--- a/generic/tclInt.decls
+++ b/generic/tclInt.decls
@@ -12,7 +12,7 @@
# See the file "license.terms" for information on usage and redistribution
# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
#
-# RCS: @(#) $Id: tclInt.decls,v 1.116 2007/11/09 18:55:15 hobbs Exp $
+# RCS: @(#) $Id: tclInt.decls,v 1.117 2007/11/12 02:07:19 hobbs Exp $
library tcl
@@ -945,7 +945,13 @@ declare 236 generic {
# Added for 8.5b3 to improve binary glob match case
declare 237 generic {
int TclByteArrayMatch(const unsigned char *string, int strLen,
- const unsigned char *pattern, int ptnLen)
+ const unsigned char *pattern, int ptnLen)
+}
+
+# Added for 8.5b3 to generalize check for RE to glob pattern conversion
+declare 238 generic {
+ int TclReToGlob(Tcl_Interp *interp, const char *reStr, int reStrLen,
+ Tcl_DString *dsPtr, int *exactPtr)
}
##############################################################################
diff --git a/generic/tclIntDecls.h b/generic/tclIntDecls.h
index 05daefb..ca01f4f 100644
--- a/generic/tclIntDecls.h
+++ b/generic/tclIntDecls.h
@@ -11,7 +11,7 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclIntDecls.h,v 1.107 2007/11/09 18:55:15 hobbs Exp $
+ * RCS: @(#) $Id: tclIntDecls.h,v 1.108 2007/11/12 02:07:19 hobbs Exp $
*/
#ifndef _TCLINTDECLS
@@ -1064,6 +1064,13 @@ EXTERN int TclByteArrayMatch (const unsigned char * string,
int strLen, const unsigned char * pattern,
int ptnLen);
#endif
+#ifndef TclReToGlob_TCL_DECLARED
+#define TclReToGlob_TCL_DECLARED
+/* 238 */
+EXTERN int TclReToGlob (Tcl_Interp * interp, const char * reStr,
+ int reStrLen, Tcl_DString * dsPtr,
+ int * exactPtr);
+#endif
typedef struct TclIntStubs {
int magic;
@@ -1322,6 +1329,7 @@ typedef struct TclIntStubs {
void (*tclInitVarHashTable) (TclVarHashTable * tablePtr, Namespace * nsPtr); /* 235 */
void (*tclBackgroundException) (Tcl_Interp * interp, int code); /* 236 */
int (*tclByteArrayMatch) (const unsigned char * string, int strLen, const unsigned char * pattern, int ptnLen); /* 237 */
+ int (*tclReToGlob) (Tcl_Interp * interp, const char * reStr, int reStrLen, Tcl_DString * dsPtr, int * exactPtr); /* 238 */
} TclIntStubs;
#ifdef __cplusplus
@@ -2059,6 +2067,10 @@ extern TclIntStubs *tclIntStubsPtr;
#define TclByteArrayMatch \
(tclIntStubsPtr->tclByteArrayMatch) /* 237 */
#endif
+#ifndef TclReToGlob
+#define TclReToGlob \
+ (tclIntStubsPtr->tclReToGlob) /* 238 */
+#endif
#endif /* defined(USE_TCL_STUBS) && !defined(USE_TCL_STUB_PROCS) */
diff --git a/generic/tclRegexp.c b/generic/tclRegexp.c
index 89e2061..d24e9a8 100644
--- a/generic/tclRegexp.c
+++ b/generic/tclRegexp.c
@@ -10,7 +10,7 @@
* See the file "license.terms" for information on usage and redistribution of
* this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclRegexp.c,v 1.25 2007/11/11 19:32:17 msofer Exp $
+ * RCS: @(#) $Id: tclRegexp.c,v 1.26 2007/11/12 02:07:20 hobbs Exp $
*/
#include "tclInt.h"
@@ -437,6 +437,45 @@ Tcl_RegExpExecObj(
TclRegexp *regexpPtr = (TclRegexp *) re;
Tcl_UniChar *udata;
int length;
+ int reflags = regexpPtr->flags;
+#define TCL_REG_GLOBOK_FLAGS (TCL_REG_ADVANCED | TCL_REG_NOSUB | TCL_REG_NOCASE)
+
+ /*
+ * Take advantage of the equivalent glob pattern, if one exists.
+ * This is possible based only on the right mix of incoming flags (0)
+ * and regexp compile flags.
+ */
+ if ((offset == 0) && (nmatches == 0) && (flags == 0)
+ && !(reflags & ~TCL_REG_GLOBOK_FLAGS)
+ && (regexpPtr->globObjPtr != NULL)) {
+ int match, nocase = (reflags & TCL_REG_NOCASE);
+
+ /*
+ * Promote based on the type of incoming object.
+ * XXX: Currently doesn't take advantage of exact-ness that
+ * XXX: TclReToGlob tells us about
+ */
+
+ if (textObj->typePtr == &tclStringType) {
+ Tcl_UniChar *uptn;
+ int plen;
+
+ udata = Tcl_GetUnicodeFromObj(textObj, &length);
+ uptn = Tcl_GetUnicodeFromObj(regexpPtr->globObjPtr, &plen);
+ match = TclUniCharMatch(udata, length, uptn, plen, nocase);
+ } else if ((textObj->typePtr == &tclByteArrayType) && !nocase) {
+ unsigned char *data, *ptn;
+ int plen;
+
+ data = Tcl_GetByteArrayFromObj(textObj, &length);
+ ptn = Tcl_GetByteArrayFromObj(regexpPtr->globObjPtr, &plen);
+ match = TclByteArrayMatch(data, length, ptn, plen);
+ } else {
+ match = Tcl_StringCaseMatch(TclGetString(textObj),
+ TclGetString(regexpPtr->globObjPtr), nocase);
+ }
+ return match;
+ }
/*
* Save the target object so we can extract strings from it later.
@@ -830,7 +869,7 @@ CompileRegexp(
{
TclRegexp *regexpPtr;
const Tcl_UniChar *uniString;
- int numChars, status, i;
+ int numChars, status, i, exact;
Tcl_DString stringBuf;
ThreadSpecificData *tsdPtr = TCL_TSD_INIT(&dataKey);
@@ -919,6 +958,21 @@ CompileRegexp(
}
/*
+ * Convert RE to a glob pattern equivalent, if any, and cache it. If this
+ * is not possible, then globObjPtr will be NULL. This is used by
+ * Tcl_RegExpExecObj to optionally do a fast match (avoids RE engine).
+ */
+
+ if (TclReToGlob(NULL, string, length, &stringBuf, &exact) == TCL_OK) {
+ regexpPtr->globObjPtr = Tcl_NewStringObj(Tcl_DStringValue(&stringBuf),
+ Tcl_DStringLength(&stringBuf));
+ Tcl_IncrRefCount(regexpPtr->globObjPtr);
+ Tcl_DStringFree(&stringBuf);
+ } else {
+ regexpPtr->globObjPtr = NULL;
+ }
+
+ /*
* Allocate enough space for all of the subexpressions, plus one extra for
* the entire pattern.
*/
@@ -978,6 +1032,9 @@ FreeRegexp(
TclRegexp *regexpPtr) /* Compiled regular expression to free. */
{
TclReFree(&regexpPtr->re);
+ if (regexpPtr->globObjPtr) {
+ TclDecrRefCount(regexpPtr->globObjPtr);
+ }
if (regexpPtr->matches) {
ckfree((char *) regexpPtr->matches);
}
diff --git a/generic/tclRegexp.h b/generic/tclRegexp.h
index 1515225..cf95445 100644
--- a/generic/tclRegexp.h
+++ b/generic/tclRegexp.h
@@ -10,7 +10,7 @@
* See the file "license.terms" for information on usage and redistribution of
* this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclRegexp.h,v 1.13 2005/10/12 23:55:25 dkf Exp $
+ * RCS: @(#) $Id: tclRegexp.h,v 1.14 2007/11/12 02:07:20 hobbs Exp $
*/
#ifndef _TCLREGEXP
@@ -32,6 +32,7 @@ typedef struct TclRegexp {
* subexpressions. */
CONST char *string; /* Last string passed to Tcl_RegExpExec. */
Tcl_Obj *objPtr; /* Last object passed to Tcl_RegExpExecObj. */
+ Tcl_Obj *globObjPtr; /* Glob pattern rep of RE or NULL if none. */
regmatch_t *matches; /* Array of indices into the Tcl_UniChar
* representation of the last string matched
* with this regexp to indicate the location
diff --git a/generic/tclStubInit.c b/generic/tclStubInit.c
index bfb17fd..ff088fc 100644
--- a/generic/tclStubInit.c
+++ b/generic/tclStubInit.c
@@ -8,7 +8,7 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclStubInit.c,v 1.145 2007/11/08 00:50:32 hobbs Exp $
+ * RCS: @(#) $Id: tclStubInit.c,v 1.146 2007/11/12 02:07:20 hobbs Exp $
*/
#include "tclInt.h"
@@ -327,6 +327,7 @@ TclIntStubs tclIntStubs = {
TclInitVarHashTable, /* 235 */
TclBackgroundException, /* 236 */
TclByteArrayMatch, /* 237 */
+ TclReToGlob, /* 238 */
};
TclIntPlatStubs tclIntPlatStubs = {
diff --git a/generic/tclUtil.c b/generic/tclUtil.c
index 1bcd957..d6bf7f1 100644
--- a/generic/tclUtil.c
+++ b/generic/tclUtil.c
@@ -11,7 +11,7 @@
* See the file "license.terms" for information on usage and redistribution of
* this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclUtil.c,v 1.87 2007/11/11 19:32:17 msofer Exp $
+ * RCS: @(#) $Id: tclUtil.c,v 1.88 2007/11/12 02:07:20 hobbs Exp $
*/
#include "tclInt.h"
@@ -3183,6 +3183,190 @@ TclGetPlatform(void)
}
/*
+ *----------------------------------------------------------------------
+ *
+ * TclReToGlob --
+ *
+ * Attempt to convert a regular expression to an equivalent glob pattern.
+ *
+ * Results:
+ * Returns TCL_OK on success, TCL_ERROR on failure.
+ * If interp is not NULL, an error message is placed in the result.
+ * On success, the DString will contain an exact equivalent glob pattern.
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+TclReToGlob(Tcl_Interp *interp,
+ const char *reStr,
+ int reStrLen,
+ Tcl_DString *dsPtr,
+ int *exactPtr)
+{
+ int anchorLeft, anchorRight;
+ char *dsStr, *dsStrStart, *msg;
+ const char *p, *strEnd;
+
+ strEnd = reStr + reStrLen;
+ Tcl_DStringInit(dsPtr);
+
+ /*
+ * "***=xxx" == "*xxx*"
+ */
+
+ if ((reStrLen >= 4) && (memcmp("***=", reStr, 4) == 0)) {
+ *exactPtr = 1;
+ Tcl_DStringAppend(dsPtr, reStr + 4, reStrLen - 4);
+ return TCL_OK;
+ }
+
+ /*
+ * Write to the ds directly without the function overhead.
+ * An equivalent glob pattern can be no more than reStrLen+2 in size.
+ */
+
+ Tcl_DStringSetLength(dsPtr, reStrLen + 2);
+ dsStrStart = Tcl_DStringValue(dsPtr);
+
+ /*
+ * Check for anchored REs (ie ^foo$), so we can use string equal if
+ * possible. Do not alter the start of str so we can free it correctly.
+ */
+
+ msg = NULL;
+ p = reStr;
+ anchorRight = 0;
+ dsStr = dsStrStart;
+ if (*p == '^') {
+ anchorLeft = 1;
+ p++;
+ } else {
+ anchorLeft = 0;
+ *dsStr++ = '*';
+ }
+
+ for ( ; p < strEnd; p++) {
+ switch (*p) {
+ case '\\':
+ p++;
+ switch (*p) {
+ case 'a':
+ *dsStr++ = '\a';
+ break;
+ case 'b':
+ *dsStr++ = '\b';
+ break;
+ case 'f':
+ *dsStr++ = '\f';
+ break;
+ case 'n':
+ *dsStr++ = '\n';
+ break;
+ case 'r':
+ *dsStr++ = '\r';
+ break;
+ case 't':
+ *dsStr++ = '\t';
+ break;
+ case 'v':
+ *dsStr++ = '\v';
+ break;
+ case 'B':
+ *dsStr++ = '\\';
+ *dsStr++ = '\\';
+ anchorLeft = 0; /* prevent exact match */
+ break;
+ case '\\': case '*': case '+': case '?':
+ case '{': case '}': case '(': case ')': case '[': case ']':
+ case '.': case '|': case '^': case '$':
+ *dsStr++ = '\\';
+ *dsStr++ = *p;
+ anchorLeft = 0; /* prevent exact match */
+ break;
+ default:
+ msg = "invalid escape sequence";
+ goto invalidGlob;
+ }
+ break;
+ case '.':
+ anchorLeft = 0; /* prevent exact match */
+ if (p+1 < strEnd) {
+ if (p[1] == '*') {
+ p++;
+ if ((dsStr == dsStrStart) || (dsStr[-1] != '*')) {
+ *dsStr++ = '*';
+ }
+ continue;
+ } else if (p[1] == '+') {
+ p++;
+ *dsStr++ = '?';
+ *dsStr++ = '*';
+ continue;
+ }
+ }
+ *dsStr++ = '?';
+ break;
+ case '$':
+ if (p+1 != strEnd) {
+ msg = "$ not anchor";
+ goto invalidGlob;
+ }
+ anchorRight = 1;
+ break;
+ case '*': case '+': case '?': case '|': case '^':
+ case '{': case '}': case '(': case ')': case '[': case ']':
+ msg = "unhandled RE special char";
+ goto invalidGlob;
+ break;
+ default:
+ *dsStr++ = *p;
+ break;
+ }
+ }
+ if (!anchorRight && ((dsStr == dsStrStart) || (dsStr[-1] != '*'))) {
+ *dsStr++ = '*';
+ }
+ Tcl_DStringSetLength(dsPtr, dsStr - dsStrStart);
+
+#ifdef TCL_MEM_DEBUG
+ /*
+ * Check if this is a bad RE (do this at the end because it can be
+ * expensive).
+ * XXX: Is it possible that we can have a bad RE make it through the
+ * XXX: above checks?
+ */
+
+ if (Tcl_RegExpCompile(NULL, reStr) == NULL) {
+ msg = "couldn't compile RE";
+ goto invalidGlob;
+ }
+#endif
+
+ *exactPtr = (anchorLeft && anchorRight);
+
+#if 0
+ fprintf(stderr, "INPUT RE '%.*s' OUTPUT GLOB '%s' anchor %d:%d \n",
+ reStrLen, reStr,
+ Tcl_DStringValue(dsPtr), anchorLeft, anchorRight);
+ fflush(stderr);
+#endif
+ return TCL_OK;
+
+ invalidGlob:
+#if 0
+ fprintf(stderr, "INPUT RE '%.*s' NO OUTPUT GLOB %s (%c)\n",
+ reStrLen, reStr, msg, *p);
+ fflush(stderr);
+#endif
+ Tcl_DStringFree(dsPtr);
+ return TCL_ERROR;
+}
+
+/*
* Local Variables:
* mode: c
* c-basic-offset: 4
diff --git a/tests/regexpComp.test b/tests/regexpComp.test
index a84099e..8460006 100644
--- a/tests/regexpComp.test
+++ b/tests/regexpComp.test
@@ -822,6 +822,82 @@ foreach {str exp result} {
[subst {evalInProc {set a "$str"; regexp {$exp} \$a}}] $result
}
+set i 0
+foreach {str exp result} {
+ foo ^foo 1
+ foobar ^foobar$ 1
+ foobar bar$ 1
+ foobar ^$ 0
+ "" ^$ 1
+ anything $ 1
+ anything ^.*$ 1
+ anything ^.*a$ 0
+ anything ^.*a.*$ 1
+ anything ^.*.*$ 1
+ anything ^.*..*$ 1
+ anything ^.*b$ 0
+ anything ^a.*$ 1
+} {
+ test regexpComp-23.[incr i] {regexp command compiling tests INST_REGEXP} \
+ [subst {evalInProc {set a "$str"; set re "$exp"; regexp \$re \$a}}] $result
+}
+
+test regexpComp-24.1 {regexp command compiling tests} {
+ evalInProc {
+ set re foo
+ regexp -nocase $re bar
+ }
+} 0
+test regexpComp-24.2 {regexp command compiling tests} {
+ evalInProc {
+ set re {^foo$}
+ regexp $re dogfood
+ }
+} 0
+test regexpComp-24.3 {regexp command compiling tests} {
+ evalInProc {
+ set a foo
+ set re {^foo$}
+ regexp $re $a
+ }
+} 1
+test regexpComp-24.4 {regexp command compiling tests} {
+ evalInProc {
+ set re foo
+ regexp $re dogfood
+ }
+} 1
+test regexpComp-24.5 {regexp command compiling tests} {
+ evalInProc {
+ set re FOO
+ regexp -nocase $re dogfod
+ }
+} 0
+test regexpComp-24.6 {regexp command compiling tests} {
+ evalInProc {
+ set re foo
+ regexp -n $re dogfoOd
+ }
+} 1
+test regexpComp-24.7 {regexp command compiling tests} {
+ evalInProc {
+ set re FoO
+ regexp -no -- $re dogfood
+ }
+} 1
+test regexpComp-24.8 {regexp command compiling tests} {
+ evalInProc {
+ set re foo
+ regexp -- $re dogfod
+ }
+} 0
+test regexpComp-24.9 {regexp command compiling tests} {
+ evalInProc {
+ set re "("
+ list [catch {regexp -- $re dogfod} msg] $msg
+ }
+} {1 {couldn't compile regular expression pattern: parentheses () not balanced}}
+
# cleanup
::tcltest::cleanupTests
return