summaryrefslogtreecommitdiffstats
path: root/generic/tclCompCmds.c
diff options
context:
space:
mode:
authorhobbs <hobbs>2007-11-12 02:07:18 (GMT)
committerhobbs <hobbs>2007-11-12 02:07:18 (GMT)
commitcf8a7199f105edc95e59373e098af6eb47d22a16 (patch)
treec7f005156cbe08f8e11d6845d4a71f991dc5b488 /generic/tclCompCmds.c
parent094b6f7ae513ec561543276d7659f3a8b2a5b853 (diff)
downloadtcl-cf8a7199f105edc95e59373e098af6eb47d22a16.zip
tcl-cf8a7199f105edc95e59373e098af6eb47d22a16.tar.gz
tcl-cf8a7199f105edc95e59373e098af6eb47d22a16.tar.bz2
* generic/tclCompCmds.c, generic/tclCompile.c, generic/tclCompile.h:
* generic/tclExecute.c, generic/tclInt.decls, generic/tclIntDecls.h: * generic/tclRegexp.c, generic/tclRegexp.h: Add INST_REGEXP and fully * generic/tclStubInit.c, generic/tclUtil.c: compiled [regexp] for the * tests/regexpComp.test: [Bug 1830166] simple cases. Also added TclReToGlob function to convert RE to glob patterns and use these in the possible cases.
Diffstat (limited to 'generic/tclCompCmds.c')
-rw-r--r--generic/tclCompCmds.c128
1 files changed, 37 insertions, 91 deletions
diff --git a/generic/tclCompCmds.c b/generic/tclCompCmds.c
index 035dd24..6179190 100644
--- a/generic/tclCompCmds.c
+++ b/generic/tclCompCmds.c
@@ -12,7 +12,7 @@
* See the file "license.terms" for information on usage and redistribution of
* this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclCompCmds.c,v 1.122 2007/11/11 19:32:14 msofer Exp $
+ * RCS: @(#) $Id: tclCompCmds.c,v 1.123 2007/11/12 02:07:19 hobbs Exp $
*/
#include "tclInt.h"
@@ -2883,7 +2883,7 @@ TclCompileRegexpCmd(
{
Tcl_Token *varTokenPtr; /* Pointer to the Tcl_Token representing the
* parse of the RE or string. */
- int i, len, nocase, anchorLeft, anchorRight, start;
+ int i, len, nocase, exact, sawLast, simple;
char *str;
DefineLineInformation; /* TIP #280 */
@@ -2898,7 +2898,9 @@ TclCompileRegexpCmd(
return TCL_ERROR;
}
+ simple = 0;
nocase = 0;
+ sawLast = 0;
varTokenPtr = parsePtr->tokenPtr;
/*
@@ -2919,6 +2921,7 @@ TclCompileRegexpCmd(
str = (char *) varTokenPtr[1].start;
len = varTokenPtr[1].size;
if ((len == 2) && (str[0] == '-') && (str[1] == '-')) {
+ sawLast++;
i++;
break;
} else if ((len > 1) && (strncmp(str,"-nocase",(unsigned)len) == 0)) {
@@ -2946,102 +2949,41 @@ TclCompileRegexpCmd(
*/
varTokenPtr = TokenAfter(varTokenPtr);
- str = (char *) varTokenPtr[1].start;
- len = varTokenPtr[1].size;
- if ((varTokenPtr->type != TCL_TOKEN_SIMPLE_WORD) || (*str == '-')) {
- return TCL_ERROR;
- }
- if (len == 0) {
- /*
- * The semantics of regexp are always match on re == "".
- */
+ if (varTokenPtr->type == TCL_TOKEN_SIMPLE_WORD) {
+ Tcl_DString ds;
- PushLiteral(envPtr, "1", 1);
- return TCL_OK;
- }
-
- /*
- * Make a copy of the string that is null-terminated for checks which
- * require such.
- */
-
- str = (char *) TclStackAlloc(interp, (unsigned) len + 1);
- strncpy(str, varTokenPtr[1].start, (size_t) len);
- str[len] = '\0';
- start = 0;
-
- /*
- * Check for anchored REs (ie ^foo$), so we can use string equal if
- * possible. Do not alter the start of str so we can free it correctly.
- */
-
- if (str[0] == '^') {
- start++;
- anchorLeft = 1;
- } else {
- anchorLeft = 0;
- }
- if ((str[len-1] == '$') && ((len == 1) || (str[len-2] != '\\'))) {
- anchorRight = 1;
- str[--len] = '\0';
- } else {
- anchorRight = 0;
- }
-
- /*
- * On the first (pattern) arg, check to see if any RE special characters
- * are in the word. If not, this is the same as 'string equal'.
- */
-
- if ((len > 1+start) && (str[start] == '.') && (str[start+1] == '*')) {
- start += 2;
- anchorLeft = 0;
- }
- if ((len > 2+start) && (str[len-3] != '\\')
- && (str[len-2] == '.') && (str[len-1] == '*')) {
- len -= 2;
- str[len] = '\0';
- anchorRight = 0;
- }
+ simple = 1;
+ str = (char *) varTokenPtr[1].start;
+ len = varTokenPtr[1].size;
+ if ((*str == '-') && !sawLast) {
+ return TCL_ERROR;
+ }
- /*
- * Don't do anything with REs with other special chars. Also check if this
- * is a bad RE (do this at the end because it can be expensive). If so,
- * let it complain at runtime.
- */
+ if (len == 0) {
+ /*
+ * The semantics of regexp are always match on re == "".
+ */
- if ((strpbrk(str + start, "*+?{}()[].\\|^$") != NULL)
- || (Tcl_RegExpCompile(NULL, str) == NULL)) {
- TclStackFree(interp, str);
- return TCL_ERROR;
- }
+ PushLiteral(envPtr, "1", 1);
+ return TCL_OK;
+ }
- if (anchorLeft && anchorRight) {
- PushLiteral(envPtr, str+start, len-start);
- } else {
/*
- * This needs to find the substring anywhere in the string, so use
- * [string match] and *foo*, with appropriate anchoring.
+ * Attempt to convert pattern to glob. If successful, push the
+ * converted pattern.
*/
- char *newStr = TclStackAlloc(interp, (unsigned) len + 3);
-
- len -= start;
- if (anchorLeft) {
- strncpy(newStr, str + start, (size_t) len);
- } else {
- newStr[0] = '*';
- strncpy(newStr + 1, str + start, (size_t) len++);
- }
- if (!anchorRight) {
- newStr[len++] = '*';
+ if (TclReToGlob(NULL, varTokenPtr[1].start, len, &ds, &exact)
+ != TCL_OK) {
+ return TCL_ERROR;
}
- newStr[len] = '\0';
- PushLiteral(envPtr, newStr, len);
- TclStackFree(interp, newStr);
+
+ PushLiteral(envPtr, Tcl_DStringValue(&ds), Tcl_DStringLength(&ds));
+ Tcl_DStringFree(&ds);
+ } else {
+ CompileWord(envPtr, varTokenPtr, interp, parsePtr->numWords-2);
}
- TclStackFree(interp, str);
/*
* Push the string arg.
@@ -3050,10 +2992,14 @@ TclCompileRegexpCmd(
varTokenPtr = TokenAfter(varTokenPtr);
CompileWord(envPtr, varTokenPtr, interp, parsePtr->numWords-1);
- if (anchorLeft && anchorRight && !nocase) {
- TclEmitOpcode(INST_STR_EQ, envPtr);
+ if (simple) {
+ if (exact && !nocase) {
+ TclEmitOpcode(INST_STR_EQ, envPtr);
+ } else {
+ TclEmitInstInt1(INST_STR_MATCH, nocase, envPtr);
+ }
} else {
- TclEmitInstInt1(INST_STR_MATCH, nocase, envPtr);
+ TclEmitInstInt1(INST_REGEXP, nocase, envPtr);
}
return TCL_OK;