diff options
author | hobbs <hobbs> | 2007-11-12 02:07:18 (GMT) |
---|---|---|
committer | hobbs <hobbs> | 2007-11-12 02:07:18 (GMT) |
commit | cf8a7199f105edc95e59373e098af6eb47d22a16 (patch) | |
tree | c7f005156cbe08f8e11d6845d4a71f991dc5b488 /generic/tclCompCmds.c | |
parent | 094b6f7ae513ec561543276d7659f3a8b2a5b853 (diff) | |
download | tcl-cf8a7199f105edc95e59373e098af6eb47d22a16.zip tcl-cf8a7199f105edc95e59373e098af6eb47d22a16.tar.gz tcl-cf8a7199f105edc95e59373e098af6eb47d22a16.tar.bz2 |
* generic/tclCompCmds.c, generic/tclCompile.c, generic/tclCompile.h:
* generic/tclExecute.c, generic/tclInt.decls, generic/tclIntDecls.h:
* generic/tclRegexp.c, generic/tclRegexp.h: Add INST_REGEXP and fully
* generic/tclStubInit.c, generic/tclUtil.c: compiled [regexp] for the
* tests/regexpComp.test: [Bug 1830166] simple cases. Also
added TclReToGlob function to convert RE to glob patterns and use
these in the possible cases.
Diffstat (limited to 'generic/tclCompCmds.c')
-rw-r--r-- | generic/tclCompCmds.c | 128 |
1 files changed, 37 insertions, 91 deletions
diff --git a/generic/tclCompCmds.c b/generic/tclCompCmds.c index 035dd24..6179190 100644 --- a/generic/tclCompCmds.c +++ b/generic/tclCompCmds.c @@ -12,7 +12,7 @@ * See the file "license.terms" for information on usage and redistribution of * this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclCompCmds.c,v 1.122 2007/11/11 19:32:14 msofer Exp $ + * RCS: @(#) $Id: tclCompCmds.c,v 1.123 2007/11/12 02:07:19 hobbs Exp $ */ #include "tclInt.h" @@ -2883,7 +2883,7 @@ TclCompileRegexpCmd( { Tcl_Token *varTokenPtr; /* Pointer to the Tcl_Token representing the * parse of the RE or string. */ - int i, len, nocase, anchorLeft, anchorRight, start; + int i, len, nocase, exact, sawLast, simple; char *str; DefineLineInformation; /* TIP #280 */ @@ -2898,7 +2898,9 @@ TclCompileRegexpCmd( return TCL_ERROR; } + simple = 0; nocase = 0; + sawLast = 0; varTokenPtr = parsePtr->tokenPtr; /* @@ -2919,6 +2921,7 @@ TclCompileRegexpCmd( str = (char *) varTokenPtr[1].start; len = varTokenPtr[1].size; if ((len == 2) && (str[0] == '-') && (str[1] == '-')) { + sawLast++; i++; break; } else if ((len > 1) && (strncmp(str,"-nocase",(unsigned)len) == 0)) { @@ -2946,102 +2949,41 @@ TclCompileRegexpCmd( */ varTokenPtr = TokenAfter(varTokenPtr); - str = (char *) varTokenPtr[1].start; - len = varTokenPtr[1].size; - if ((varTokenPtr->type != TCL_TOKEN_SIMPLE_WORD) || (*str == '-')) { - return TCL_ERROR; - } - if (len == 0) { - /* - * The semantics of regexp are always match on re == "". - */ + if (varTokenPtr->type == TCL_TOKEN_SIMPLE_WORD) { + Tcl_DString ds; - PushLiteral(envPtr, "1", 1); - return TCL_OK; - } - - /* - * Make a copy of the string that is null-terminated for checks which - * require such. - */ - - str = (char *) TclStackAlloc(interp, (unsigned) len + 1); - strncpy(str, varTokenPtr[1].start, (size_t) len); - str[len] = '\0'; - start = 0; - - /* - * Check for anchored REs (ie ^foo$), so we can use string equal if - * possible. Do not alter the start of str so we can free it correctly. - */ - - if (str[0] == '^') { - start++; - anchorLeft = 1; - } else { - anchorLeft = 0; - } - if ((str[len-1] == '$') && ((len == 1) || (str[len-2] != '\\'))) { - anchorRight = 1; - str[--len] = '\0'; - } else { - anchorRight = 0; - } - - /* - * On the first (pattern) arg, check to see if any RE special characters - * are in the word. If not, this is the same as 'string equal'. - */ - - if ((len > 1+start) && (str[start] == '.') && (str[start+1] == '*')) { - start += 2; - anchorLeft = 0; - } - if ((len > 2+start) && (str[len-3] != '\\') - && (str[len-2] == '.') && (str[len-1] == '*')) { - len -= 2; - str[len] = '\0'; - anchorRight = 0; - } + simple = 1; + str = (char *) varTokenPtr[1].start; + len = varTokenPtr[1].size; + if ((*str == '-') && !sawLast) { + return TCL_ERROR; + } - /* - * Don't do anything with REs with other special chars. Also check if this - * is a bad RE (do this at the end because it can be expensive). If so, - * let it complain at runtime. - */ + if (len == 0) { + /* + * The semantics of regexp are always match on re == "". + */ - if ((strpbrk(str + start, "*+?{}()[].\\|^$") != NULL) - || (Tcl_RegExpCompile(NULL, str) == NULL)) { - TclStackFree(interp, str); - return TCL_ERROR; - } + PushLiteral(envPtr, "1", 1); + return TCL_OK; + } - if (anchorLeft && anchorRight) { - PushLiteral(envPtr, str+start, len-start); - } else { /* - * This needs to find the substring anywhere in the string, so use - * [string match] and *foo*, with appropriate anchoring. + * Attempt to convert pattern to glob. If successful, push the + * converted pattern. */ - char *newStr = TclStackAlloc(interp, (unsigned) len + 3); - - len -= start; - if (anchorLeft) { - strncpy(newStr, str + start, (size_t) len); - } else { - newStr[0] = '*'; - strncpy(newStr + 1, str + start, (size_t) len++); - } - if (!anchorRight) { - newStr[len++] = '*'; + if (TclReToGlob(NULL, varTokenPtr[1].start, len, &ds, &exact) + != TCL_OK) { + return TCL_ERROR; } - newStr[len] = '\0'; - PushLiteral(envPtr, newStr, len); - TclStackFree(interp, newStr); + + PushLiteral(envPtr, Tcl_DStringValue(&ds), Tcl_DStringLength(&ds)); + Tcl_DStringFree(&ds); + } else { + CompileWord(envPtr, varTokenPtr, interp, parsePtr->numWords-2); } - TclStackFree(interp, str); /* * Push the string arg. @@ -3050,10 +2992,14 @@ TclCompileRegexpCmd( varTokenPtr = TokenAfter(varTokenPtr); CompileWord(envPtr, varTokenPtr, interp, parsePtr->numWords-1); - if (anchorLeft && anchorRight && !nocase) { - TclEmitOpcode(INST_STR_EQ, envPtr); + if (simple) { + if (exact && !nocase) { + TclEmitOpcode(INST_STR_EQ, envPtr); + } else { + TclEmitInstInt1(INST_STR_MATCH, nocase, envPtr); + } } else { - TclEmitInstInt1(INST_STR_MATCH, nocase, envPtr); + TclEmitInstInt1(INST_REGEXP, nocase, envPtr); } return TCL_OK; |