diff options
author | hobbs <hobbs> | 2007-11-12 02:07:18 (GMT) |
---|---|---|
committer | hobbs <hobbs> | 2007-11-12 02:07:18 (GMT) |
commit | cf8a7199f105edc95e59373e098af6eb47d22a16 (patch) | |
tree | c7f005156cbe08f8e11d6845d4a71f991dc5b488 | |
parent | 094b6f7ae513ec561543276d7659f3a8b2a5b853 (diff) | |
download | tcl-cf8a7199f105edc95e59373e098af6eb47d22a16.zip tcl-cf8a7199f105edc95e59373e098af6eb47d22a16.tar.gz tcl-cf8a7199f105edc95e59373e098af6eb47d22a16.tar.bz2 |
* generic/tclCompCmds.c, generic/tclCompile.c, generic/tclCompile.h:
* generic/tclExecute.c, generic/tclInt.decls, generic/tclIntDecls.h:
* generic/tclRegexp.c, generic/tclRegexp.h: Add INST_REGEXP and fully
* generic/tclStubInit.c, generic/tclUtil.c: compiled [regexp] for the
* tests/regexpComp.test: [Bug 1830166] simple cases. Also
added TclReToGlob function to convert RE to glob patterns and use
these in the possible cases.
-rw-r--r-- | ChangeLog | 10 | ||||
-rw-r--r-- | generic/tclCompCmds.c | 128 | ||||
-rw-r--r-- | generic/tclCompile.c | 5 | ||||
-rw-r--r-- | generic/tclCompile.h | 8 | ||||
-rw-r--r-- | generic/tclExecute.c | 35 | ||||
-rw-r--r-- | generic/tclInt.decls | 10 | ||||
-rw-r--r-- | generic/tclIntDecls.h | 14 | ||||
-rw-r--r-- | generic/tclRegexp.c | 61 | ||||
-rw-r--r-- | generic/tclRegexp.h | 3 | ||||
-rw-r--r-- | generic/tclStubInit.c | 3 | ||||
-rw-r--r-- | generic/tclUtil.c | 186 | ||||
-rw-r--r-- | tests/regexpComp.test | 76 |
12 files changed, 436 insertions, 103 deletions
@@ -1,3 +1,13 @@ +2007-11-11 Jeff Hobbs <jeffh@ActiveState.com> + + * generic/tclCompCmds.c, generic/tclCompile.c, generic/tclCompile.h: + * generic/tclExecute.c, generic/tclInt.decls, generic/tclIntDecls.h: + * generic/tclRegexp.c, generic/tclRegexp.h: Add INST_REGEXP and fully + * generic/tclStubInit.c, generic/tclUtil.c: compiled [regexp] for the + * tests/regexpComp.test: [Bug 1830166] simple cases. Also + added TclReToGlob function to convert RE to glob patterns and use + these in the possible cases. + 2007-11-10 Miguel Sofer <msofer@users.sf.net> * generic/tclResult.c (ResetObjResult): clarify the logic. diff --git a/generic/tclCompCmds.c b/generic/tclCompCmds.c index 035dd24..6179190 100644 --- a/generic/tclCompCmds.c +++ b/generic/tclCompCmds.c @@ -12,7 +12,7 @@ * See the file "license.terms" for information on usage and redistribution of * this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclCompCmds.c,v 1.122 2007/11/11 19:32:14 msofer Exp $ + * RCS: @(#) $Id: tclCompCmds.c,v 1.123 2007/11/12 02:07:19 hobbs Exp $ */ #include "tclInt.h" @@ -2883,7 +2883,7 @@ TclCompileRegexpCmd( { Tcl_Token *varTokenPtr; /* Pointer to the Tcl_Token representing the * parse of the RE or string. */ - int i, len, nocase, anchorLeft, anchorRight, start; + int i, len, nocase, exact, sawLast, simple; char *str; DefineLineInformation; /* TIP #280 */ @@ -2898,7 +2898,9 @@ TclCompileRegexpCmd( return TCL_ERROR; } + simple = 0; nocase = 0; + sawLast = 0; varTokenPtr = parsePtr->tokenPtr; /* @@ -2919,6 +2921,7 @@ TclCompileRegexpCmd( str = (char *) varTokenPtr[1].start; len = varTokenPtr[1].size; if ((len == 2) && (str[0] == '-') && (str[1] == '-')) { + sawLast++; i++; break; } else if ((len > 1) && (strncmp(str,"-nocase",(unsigned)len) == 0)) { @@ -2946,102 +2949,41 @@ TclCompileRegexpCmd( */ varTokenPtr = TokenAfter(varTokenPtr); - str = (char *) varTokenPtr[1].start; - len = varTokenPtr[1].size; - if ((varTokenPtr->type != TCL_TOKEN_SIMPLE_WORD) || (*str == '-')) { - return TCL_ERROR; - } - if (len == 0) { - /* - * The semantics of regexp are always match on re == "". - */ + if (varTokenPtr->type == TCL_TOKEN_SIMPLE_WORD) { + Tcl_DString ds; - PushLiteral(envPtr, "1", 1); - return TCL_OK; - } - - /* - * Make a copy of the string that is null-terminated for checks which - * require such. - */ - - str = (char *) TclStackAlloc(interp, (unsigned) len + 1); - strncpy(str, varTokenPtr[1].start, (size_t) len); - str[len] = '\0'; - start = 0; - - /* - * Check for anchored REs (ie ^foo$), so we can use string equal if - * possible. Do not alter the start of str so we can free it correctly. - */ - - if (str[0] == '^') { - start++; - anchorLeft = 1; - } else { - anchorLeft = 0; - } - if ((str[len-1] == '$') && ((len == 1) || (str[len-2] != '\\'))) { - anchorRight = 1; - str[--len] = '\0'; - } else { - anchorRight = 0; - } - - /* - * On the first (pattern) arg, check to see if any RE special characters - * are in the word. If not, this is the same as 'string equal'. - */ - - if ((len > 1+start) && (str[start] == '.') && (str[start+1] == '*')) { - start += 2; - anchorLeft = 0; - } - if ((len > 2+start) && (str[len-3] != '\\') - && (str[len-2] == '.') && (str[len-1] == '*')) { - len -= 2; - str[len] = '\0'; - anchorRight = 0; - } + simple = 1; + str = (char *) varTokenPtr[1].start; + len = varTokenPtr[1].size; + if ((*str == '-') && !sawLast) { + return TCL_ERROR; + } - /* - * Don't do anything with REs with other special chars. Also check if this - * is a bad RE (do this at the end because it can be expensive). If so, - * let it complain at runtime. - */ + if (len == 0) { + /* + * The semantics of regexp are always match on re == "". + */ - if ((strpbrk(str + start, "*+?{}()[].\\|^$") != NULL) - || (Tcl_RegExpCompile(NULL, str) == NULL)) { - TclStackFree(interp, str); - return TCL_ERROR; - } + PushLiteral(envPtr, "1", 1); + return TCL_OK; + } - if (anchorLeft && anchorRight) { - PushLiteral(envPtr, str+start, len-start); - } else { /* - * This needs to find the substring anywhere in the string, so use - * [string match] and *foo*, with appropriate anchoring. + * Attempt to convert pattern to glob. If successful, push the + * converted pattern. */ - char *newStr = TclStackAlloc(interp, (unsigned) len + 3); - - len -= start; - if (anchorLeft) { - strncpy(newStr, str + start, (size_t) len); - } else { - newStr[0] = '*'; - strncpy(newStr + 1, str + start, (size_t) len++); - } - if (!anchorRight) { - newStr[len++] = '*'; + if (TclReToGlob(NULL, varTokenPtr[1].start, len, &ds, &exact) + != TCL_OK) { + return TCL_ERROR; } - newStr[len] = '\0'; - PushLiteral(envPtr, newStr, len); - TclStackFree(interp, newStr); + + PushLiteral(envPtr, Tcl_DStringValue(&ds), Tcl_DStringLength(&ds)); + Tcl_DStringFree(&ds); + } else { + CompileWord(envPtr, varTokenPtr, interp, parsePtr->numWords-2); } - TclStackFree(interp, str); /* * Push the string arg. @@ -3050,10 +2992,14 @@ TclCompileRegexpCmd( varTokenPtr = TokenAfter(varTokenPtr); CompileWord(envPtr, varTokenPtr, interp, parsePtr->numWords-1); - if (anchorLeft && anchorRight && !nocase) { - TclEmitOpcode(INST_STR_EQ, envPtr); + if (simple) { + if (exact && !nocase) { + TclEmitOpcode(INST_STR_EQ, envPtr); + } else { + TclEmitInstInt1(INST_STR_MATCH, nocase, envPtr); + } } else { - TclEmitInstInt1(INST_STR_MATCH, nocase, envPtr); + TclEmitInstInt1(INST_REGEXP, nocase, envPtr); } return TCL_OK; diff --git a/generic/tclCompile.c b/generic/tclCompile.c index d66f356..ce9f7e1 100644 --- a/generic/tclCompile.c +++ b/generic/tclCompile.c @@ -11,7 +11,7 @@ * See the file "license.terms" for information on usage and redistribution of * this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclCompile.c,v 1.137 2007/11/11 19:32:14 msofer Exp $ + * RCS: @(#) $Id: tclCompile.c,v 1.138 2007/11/12 02:07:19 hobbs Exp $ */ #include "tclInt.h" @@ -385,6 +385,9 @@ InstructionDesc tclInstructionTable[] = { /* Compiled bytecodes to signal syntax error. */ {"reverse", 5, 0, 1, {OPERAND_UINT4}}, /* Reverse the order of the arg elements at the top of stack */ + + {"regexp", 2, -1, 1, {OPERAND_INT1}}, + /* Regexp: push (regexp stknext stktop) opnd == nocase */ {0} }; diff --git a/generic/tclCompile.h b/generic/tclCompile.h index 3e02b1d..618f704 100644 --- a/generic/tclCompile.h +++ b/generic/tclCompile.h @@ -9,7 +9,7 @@ * See the file "license.terms" for information on usage and redistribution of * this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclCompile.h,v 1.82 2007/10/27 13:15:58 msofer Exp $ + * RCS: @(#) $Id: tclCompile.h,v 1.83 2007/11/12 02:07:19 hobbs Exp $ */ #ifndef _TCLCOMPILATION @@ -636,8 +636,12 @@ typedef struct ByteCode { #define INST_REVERSE 126 +/* regexp instruction */ + +#define INST_REGEXP 127 + /* The last opcode */ -#define LAST_INST_OPCODE 126 +#define LAST_INST_OPCODE 127 /* * Table describing the Tcl bytecode instructions: their name (for displaying diff --git a/generic/tclExecute.c b/generic/tclExecute.c index cb72097..80247a6 100644 --- a/generic/tclExecute.c +++ b/generic/tclExecute.c @@ -13,7 +13,7 @@ * See the file "license.terms" for information on usage and redistribution of * this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclExecute.c,v 1.345 2007/11/11 19:32:14 msofer Exp $ + * RCS: @(#) $Id: tclExecute.c,v 1.346 2007/11/12 02:07:19 hobbs Exp $ */ #include "tclInt.h" @@ -4098,6 +4098,39 @@ TclExecuteByteCode( NEXT_INST_F(2, 2, 1); } + case INST_REGEXP: { + int nocase, match; + Tcl_Obj *valuePtr, *value2Ptr; + Tcl_RegExp regExpr; + + nocase = TclGetInt1AtPtr(pc+1); + valuePtr = OBJ_AT_TOS; /* String */ + value2Ptr = OBJ_UNDER_TOS; /* Pattern */ + + regExpr = Tcl_GetRegExpFromObj(interp, value2Ptr, + TCL_REG_ADVANCED | (nocase ? TCL_REG_NOCASE : 0)); + if (regExpr == NULL) { + match = -1; + } else { + match = Tcl_RegExpExecObj(interp, regExpr, valuePtr, 0, 0, 0); + } + + /* + * Adjustment is 2 due to the nocase byte + */ + + if (match < 0) { + objResultPtr = Tcl_GetObjResult(interp); + TRACE_WITH_OBJ(("%.20s %.20s => ERROR: ", O2S(valuePtr), O2S(value2Ptr)), objResultPtr); + result = TCL_ERROR; + goto checkForCatch; + } else { + TRACE(("%.20s %.20s => %d\n", O2S(valuePtr), O2S(value2Ptr), match)); + objResultPtr = constants[match]; + NEXT_INST_F(2, 2, 1); + } + } + case INST_EQ: case INST_NEQ: case INST_LT: diff --git a/generic/tclInt.decls b/generic/tclInt.decls index c3118df..97a1269 100644 --- a/generic/tclInt.decls +++ b/generic/tclInt.decls @@ -12,7 +12,7 @@ # See the file "license.terms" for information on usage and redistribution # of this file, and for a DISCLAIMER OF ALL WARRANTIES. # -# RCS: @(#) $Id: tclInt.decls,v 1.116 2007/11/09 18:55:15 hobbs Exp $ +# RCS: @(#) $Id: tclInt.decls,v 1.117 2007/11/12 02:07:19 hobbs Exp $ library tcl @@ -945,7 +945,13 @@ declare 236 generic { # Added for 8.5b3 to improve binary glob match case declare 237 generic { int TclByteArrayMatch(const unsigned char *string, int strLen, - const unsigned char *pattern, int ptnLen) + const unsigned char *pattern, int ptnLen) +} + +# Added for 8.5b3 to generalize check for RE to glob pattern conversion +declare 238 generic { + int TclReToGlob(Tcl_Interp *interp, const char *reStr, int reStrLen, + Tcl_DString *dsPtr, int *exactPtr) } ############################################################################## diff --git a/generic/tclIntDecls.h b/generic/tclIntDecls.h index 05daefb..ca01f4f 100644 --- a/generic/tclIntDecls.h +++ b/generic/tclIntDecls.h @@ -11,7 +11,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclIntDecls.h,v 1.107 2007/11/09 18:55:15 hobbs Exp $ + * RCS: @(#) $Id: tclIntDecls.h,v 1.108 2007/11/12 02:07:19 hobbs Exp $ */ #ifndef _TCLINTDECLS @@ -1064,6 +1064,13 @@ EXTERN int TclByteArrayMatch (const unsigned char * string, int strLen, const unsigned char * pattern, int ptnLen); #endif +#ifndef TclReToGlob_TCL_DECLARED +#define TclReToGlob_TCL_DECLARED +/* 238 */ +EXTERN int TclReToGlob (Tcl_Interp * interp, const char * reStr, + int reStrLen, Tcl_DString * dsPtr, + int * exactPtr); +#endif typedef struct TclIntStubs { int magic; @@ -1322,6 +1329,7 @@ typedef struct TclIntStubs { void (*tclInitVarHashTable) (TclVarHashTable * tablePtr, Namespace * nsPtr); /* 235 */ void (*tclBackgroundException) (Tcl_Interp * interp, int code); /* 236 */ int (*tclByteArrayMatch) (const unsigned char * string, int strLen, const unsigned char * pattern, int ptnLen); /* 237 */ + int (*tclReToGlob) (Tcl_Interp * interp, const char * reStr, int reStrLen, Tcl_DString * dsPtr, int * exactPtr); /* 238 */ } TclIntStubs; #ifdef __cplusplus @@ -2059,6 +2067,10 @@ extern TclIntStubs *tclIntStubsPtr; #define TclByteArrayMatch \ (tclIntStubsPtr->tclByteArrayMatch) /* 237 */ #endif +#ifndef TclReToGlob +#define TclReToGlob \ + (tclIntStubsPtr->tclReToGlob) /* 238 */ +#endif #endif /* defined(USE_TCL_STUBS) && !defined(USE_TCL_STUB_PROCS) */ diff --git a/generic/tclRegexp.c b/generic/tclRegexp.c index 89e2061..d24e9a8 100644 --- a/generic/tclRegexp.c +++ b/generic/tclRegexp.c @@ -10,7 +10,7 @@ * See the file "license.terms" for information on usage and redistribution of * this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclRegexp.c,v 1.25 2007/11/11 19:32:17 msofer Exp $ + * RCS: @(#) $Id: tclRegexp.c,v 1.26 2007/11/12 02:07:20 hobbs Exp $ */ #include "tclInt.h" @@ -437,6 +437,45 @@ Tcl_RegExpExecObj( TclRegexp *regexpPtr = (TclRegexp *) re; Tcl_UniChar *udata; int length; + int reflags = regexpPtr->flags; +#define TCL_REG_GLOBOK_FLAGS (TCL_REG_ADVANCED | TCL_REG_NOSUB | TCL_REG_NOCASE) + + /* + * Take advantage of the equivalent glob pattern, if one exists. + * This is possible based only on the right mix of incoming flags (0) + * and regexp compile flags. + */ + if ((offset == 0) && (nmatches == 0) && (flags == 0) + && !(reflags & ~TCL_REG_GLOBOK_FLAGS) + && (regexpPtr->globObjPtr != NULL)) { + int match, nocase = (reflags & TCL_REG_NOCASE); + + /* + * Promote based on the type of incoming object. + * XXX: Currently doesn't take advantage of exact-ness that + * XXX: TclReToGlob tells us about + */ + + if (textObj->typePtr == &tclStringType) { + Tcl_UniChar *uptn; + int plen; + + udata = Tcl_GetUnicodeFromObj(textObj, &length); + uptn = Tcl_GetUnicodeFromObj(regexpPtr->globObjPtr, &plen); + match = TclUniCharMatch(udata, length, uptn, plen, nocase); + } else if ((textObj->typePtr == &tclByteArrayType) && !nocase) { + unsigned char *data, *ptn; + int plen; + + data = Tcl_GetByteArrayFromObj(textObj, &length); + ptn = Tcl_GetByteArrayFromObj(regexpPtr->globObjPtr, &plen); + match = TclByteArrayMatch(data, length, ptn, plen); + } else { + match = Tcl_StringCaseMatch(TclGetString(textObj), + TclGetString(regexpPtr->globObjPtr), nocase); + } + return match; + } /* * Save the target object so we can extract strings from it later. @@ -830,7 +869,7 @@ CompileRegexp( { TclRegexp *regexpPtr; const Tcl_UniChar *uniString; - int numChars, status, i; + int numChars, status, i, exact; Tcl_DString stringBuf; ThreadSpecificData *tsdPtr = TCL_TSD_INIT(&dataKey); @@ -919,6 +958,21 @@ CompileRegexp( } /* + * Convert RE to a glob pattern equivalent, if any, and cache it. If this + * is not possible, then globObjPtr will be NULL. This is used by + * Tcl_RegExpExecObj to optionally do a fast match (avoids RE engine). + */ + + if (TclReToGlob(NULL, string, length, &stringBuf, &exact) == TCL_OK) { + regexpPtr->globObjPtr = Tcl_NewStringObj(Tcl_DStringValue(&stringBuf), + Tcl_DStringLength(&stringBuf)); + Tcl_IncrRefCount(regexpPtr->globObjPtr); + Tcl_DStringFree(&stringBuf); + } else { + regexpPtr->globObjPtr = NULL; + } + + /* * Allocate enough space for all of the subexpressions, plus one extra for * the entire pattern. */ @@ -978,6 +1032,9 @@ FreeRegexp( TclRegexp *regexpPtr) /* Compiled regular expression to free. */ { TclReFree(®expPtr->re); + if (regexpPtr->globObjPtr) { + TclDecrRefCount(regexpPtr->globObjPtr); + } if (regexpPtr->matches) { ckfree((char *) regexpPtr->matches); } diff --git a/generic/tclRegexp.h b/generic/tclRegexp.h index 1515225..cf95445 100644 --- a/generic/tclRegexp.h +++ b/generic/tclRegexp.h @@ -10,7 +10,7 @@ * See the file "license.terms" for information on usage and redistribution of * this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclRegexp.h,v 1.13 2005/10/12 23:55:25 dkf Exp $ + * RCS: @(#) $Id: tclRegexp.h,v 1.14 2007/11/12 02:07:20 hobbs Exp $ */ #ifndef _TCLREGEXP @@ -32,6 +32,7 @@ typedef struct TclRegexp { * subexpressions. */ CONST char *string; /* Last string passed to Tcl_RegExpExec. */ Tcl_Obj *objPtr; /* Last object passed to Tcl_RegExpExecObj. */ + Tcl_Obj *globObjPtr; /* Glob pattern rep of RE or NULL if none. */ regmatch_t *matches; /* Array of indices into the Tcl_UniChar * representation of the last string matched * with this regexp to indicate the location diff --git a/generic/tclStubInit.c b/generic/tclStubInit.c index bfb17fd..ff088fc 100644 --- a/generic/tclStubInit.c +++ b/generic/tclStubInit.c @@ -8,7 +8,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclStubInit.c,v 1.145 2007/11/08 00:50:32 hobbs Exp $ + * RCS: @(#) $Id: tclStubInit.c,v 1.146 2007/11/12 02:07:20 hobbs Exp $ */ #include "tclInt.h" @@ -327,6 +327,7 @@ TclIntStubs tclIntStubs = { TclInitVarHashTable, /* 235 */ TclBackgroundException, /* 236 */ TclByteArrayMatch, /* 237 */ + TclReToGlob, /* 238 */ }; TclIntPlatStubs tclIntPlatStubs = { diff --git a/generic/tclUtil.c b/generic/tclUtil.c index 1bcd957..d6bf7f1 100644 --- a/generic/tclUtil.c +++ b/generic/tclUtil.c @@ -11,7 +11,7 @@ * See the file "license.terms" for information on usage and redistribution of * this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclUtil.c,v 1.87 2007/11/11 19:32:17 msofer Exp $ + * RCS: @(#) $Id: tclUtil.c,v 1.88 2007/11/12 02:07:20 hobbs Exp $ */ #include "tclInt.h" @@ -3183,6 +3183,190 @@ TclGetPlatform(void) } /* + *---------------------------------------------------------------------- + * + * TclReToGlob -- + * + * Attempt to convert a regular expression to an equivalent glob pattern. + * + * Results: + * Returns TCL_OK on success, TCL_ERROR on failure. + * If interp is not NULL, an error message is placed in the result. + * On success, the DString will contain an exact equivalent glob pattern. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +int +TclReToGlob(Tcl_Interp *interp, + const char *reStr, + int reStrLen, + Tcl_DString *dsPtr, + int *exactPtr) +{ + int anchorLeft, anchorRight; + char *dsStr, *dsStrStart, *msg; + const char *p, *strEnd; + + strEnd = reStr + reStrLen; + Tcl_DStringInit(dsPtr); + + /* + * "***=xxx" == "*xxx*" + */ + + if ((reStrLen >= 4) && (memcmp("***=", reStr, 4) == 0)) { + *exactPtr = 1; + Tcl_DStringAppend(dsPtr, reStr + 4, reStrLen - 4); + return TCL_OK; + } + + /* + * Write to the ds directly without the function overhead. + * An equivalent glob pattern can be no more than reStrLen+2 in size. + */ + + Tcl_DStringSetLength(dsPtr, reStrLen + 2); + dsStrStart = Tcl_DStringValue(dsPtr); + + /* + * Check for anchored REs (ie ^foo$), so we can use string equal if + * possible. Do not alter the start of str so we can free it correctly. + */ + + msg = NULL; + p = reStr; + anchorRight = 0; + dsStr = dsStrStart; + if (*p == '^') { + anchorLeft = 1; + p++; + } else { + anchorLeft = 0; + *dsStr++ = '*'; + } + + for ( ; p < strEnd; p++) { + switch (*p) { + case '\\': + p++; + switch (*p) { + case 'a': + *dsStr++ = '\a'; + break; + case 'b': + *dsStr++ = '\b'; + break; + case 'f': + *dsStr++ = '\f'; + break; + case 'n': + *dsStr++ = '\n'; + break; + case 'r': + *dsStr++ = '\r'; + break; + case 't': + *dsStr++ = '\t'; + break; + case 'v': + *dsStr++ = '\v'; + break; + case 'B': + *dsStr++ = '\\'; + *dsStr++ = '\\'; + anchorLeft = 0; /* prevent exact match */ + break; + case '\\': case '*': case '+': case '?': + case '{': case '}': case '(': case ')': case '[': case ']': + case '.': case '|': case '^': case '$': + *dsStr++ = '\\'; + *dsStr++ = *p; + anchorLeft = 0; /* prevent exact match */ + break; + default: + msg = "invalid escape sequence"; + goto invalidGlob; + } + break; + case '.': + anchorLeft = 0; /* prevent exact match */ + if (p+1 < strEnd) { + if (p[1] == '*') { + p++; + if ((dsStr == dsStrStart) || (dsStr[-1] != '*')) { + *dsStr++ = '*'; + } + continue; + } else if (p[1] == '+') { + p++; + *dsStr++ = '?'; + *dsStr++ = '*'; + continue; + } + } + *dsStr++ = '?'; + break; + case '$': + if (p+1 != strEnd) { + msg = "$ not anchor"; + goto invalidGlob; + } + anchorRight = 1; + break; + case '*': case '+': case '?': case '|': case '^': + case '{': case '}': case '(': case ')': case '[': case ']': + msg = "unhandled RE special char"; + goto invalidGlob; + break; + default: + *dsStr++ = *p; + break; + } + } + if (!anchorRight && ((dsStr == dsStrStart) || (dsStr[-1] != '*'))) { + *dsStr++ = '*'; + } + Tcl_DStringSetLength(dsPtr, dsStr - dsStrStart); + +#ifdef TCL_MEM_DEBUG + /* + * Check if this is a bad RE (do this at the end because it can be + * expensive). + * XXX: Is it possible that we can have a bad RE make it through the + * XXX: above checks? + */ + + if (Tcl_RegExpCompile(NULL, reStr) == NULL) { + msg = "couldn't compile RE"; + goto invalidGlob; + } +#endif + + *exactPtr = (anchorLeft && anchorRight); + +#if 0 + fprintf(stderr, "INPUT RE '%.*s' OUTPUT GLOB '%s' anchor %d:%d \n", + reStrLen, reStr, + Tcl_DStringValue(dsPtr), anchorLeft, anchorRight); + fflush(stderr); +#endif + return TCL_OK; + + invalidGlob: +#if 0 + fprintf(stderr, "INPUT RE '%.*s' NO OUTPUT GLOB %s (%c)\n", + reStrLen, reStr, msg, *p); + fflush(stderr); +#endif + Tcl_DStringFree(dsPtr); + return TCL_ERROR; +} + +/* * Local Variables: * mode: c * c-basic-offset: 4 diff --git a/tests/regexpComp.test b/tests/regexpComp.test index a84099e..8460006 100644 --- a/tests/regexpComp.test +++ b/tests/regexpComp.test @@ -822,6 +822,82 @@ foreach {str exp result} { [subst {evalInProc {set a "$str"; regexp {$exp} \$a}}] $result } +set i 0 +foreach {str exp result} { + foo ^foo 1 + foobar ^foobar$ 1 + foobar bar$ 1 + foobar ^$ 0 + "" ^$ 1 + anything $ 1 + anything ^.*$ 1 + anything ^.*a$ 0 + anything ^.*a.*$ 1 + anything ^.*.*$ 1 + anything ^.*..*$ 1 + anything ^.*b$ 0 + anything ^a.*$ 1 +} { + test regexpComp-23.[incr i] {regexp command compiling tests INST_REGEXP} \ + [subst {evalInProc {set a "$str"; set re "$exp"; regexp \$re \$a}}] $result +} + +test regexpComp-24.1 {regexp command compiling tests} { + evalInProc { + set re foo + regexp -nocase $re bar + } +} 0 +test regexpComp-24.2 {regexp command compiling tests} { + evalInProc { + set re {^foo$} + regexp $re dogfood + } +} 0 +test regexpComp-24.3 {regexp command compiling tests} { + evalInProc { + set a foo + set re {^foo$} + regexp $re $a + } +} 1 +test regexpComp-24.4 {regexp command compiling tests} { + evalInProc { + set re foo + regexp $re dogfood + } +} 1 +test regexpComp-24.5 {regexp command compiling tests} { + evalInProc { + set re FOO + regexp -nocase $re dogfod + } +} 0 +test regexpComp-24.6 {regexp command compiling tests} { + evalInProc { + set re foo + regexp -n $re dogfoOd + } +} 1 +test regexpComp-24.7 {regexp command compiling tests} { + evalInProc { + set re FoO + regexp -no -- $re dogfood + } +} 1 +test regexpComp-24.8 {regexp command compiling tests} { + evalInProc { + set re foo + regexp -- $re dogfod + } +} 0 +test regexpComp-24.9 {regexp command compiling tests} { + evalInProc { + set re "(" + list [catch {regexp -- $re dogfod} msg] $msg + } +} {1 {couldn't compile regular expression pattern: parentheses () not balanced}} + # cleanup ::tcltest::cleanupTests return |