* generic/tclCompCmds.c, generic/tclCompile.c, generic/tclCompile.h:

* generic/tclExecute.c, generic/tclInt.decls, generic/tclIntDecls.h: * generic/tclRegexp.c, generic/tclRegexp.h: Add INST_REGEXP and fully * generic/tclStubInit.c, generic/tclUtil.c: compiled [regexp] for the * tests/regexpComp.test: [Bug 1830166] simple cases. Also added TclReToGlob function to convert RE to glob patterns and use these in the possible cases.
author: hobbs <hobbs> 2007-11-12 02:07:18 (GMT)
committer: hobbs <hobbs> 2007-11-12 02:07:18 (GMT)
commit: cf8a7199f105edc95e59373e098af6eb47d22a16 (patch)
tree: c7f005156cbe08f8e11d6845d4a71f991dc5b488 /generic/tclCompCmds.c
parent: 094b6f7ae513ec561543276d7659f3a8b2a5b853 (diff)
download: tcl-cf8a7199f105edc95e59373e098af6eb47d22a16.zip
tcl-cf8a7199f105edc95e59373e098af6eb47d22a16.tar.gz
tcl-cf8a7199f105edc95e59373e098af6eb47d22a16.tar.bz2
1 files changed, 37 insertions, 91 deletions
diff --git a/generic/tclCompCmds.c b/generic/tclCompCmds.c
index 035dd24..6179190 100644
--- a/generic/tclCompCmds.c
+++ b/generic/tclCompCmds.c
@@ -12,7 +12,7 @@
  * See the file "license.terms" for information on usage and redistribution of
  * this file, and for a DISCLAIMER OF ALL WARRANTIES.
  *
- * RCS: @(#) $Id: tclCompCmds.c,v 1.122 2007/11/11 19:32:14 msofer Exp $
+ * RCS: @(#) $Id: tclCompCmds.c,v 1.123 2007/11/12 02:07:19 hobbs Exp $
  */
 
 #include "tclInt.h"
@@ -2883,7 +2883,7 @@ TclCompileRegexpCmd(
 {
     Tcl_Token *varTokenPtr;	/* Pointer to the Tcl_Token representing the
 				 * parse of the RE or string. */
-    int i, len, nocase, anchorLeft, anchorRight, start;
+    int i, len, nocase, exact, sawLast, simple;
     char *str;
     DefineLineInformation;	/* TIP #280 */
 
@@ -2898,7 +2898,9 @@ TclCompileRegexpCmd(
 	return TCL_ERROR;
     }
 
+    simple = 0;
     nocase = 0;
+    sawLast = 0;
     varTokenPtr = parsePtr->tokenPtr;
 
     /*
@@ -2919,6 +2921,7 @@ TclCompileRegexpCmd(
 	str = (char *) varTokenPtr[1].start;
 	len = varTokenPtr[1].size;
 	if ((len == 2) && (str[0] == '-') && (str[1] == '-')) {
+	    sawLast++;
 	    i++;
 	    break;
 	} else if ((len > 1) && (strncmp(str,"-nocase",(unsigned)len) == 0)) {
@@ -2946,102 +2949,41 @@ TclCompileRegexpCmd(
      */
 
     varTokenPtr = TokenAfter(varTokenPtr);
-    str = (char *) varTokenPtr[1].start;
-    len = varTokenPtr[1].size;
-    if ((varTokenPtr->type != TCL_TOKEN_SIMPLE_WORD) || (*str == '-')) {
-	return TCL_ERROR;
-    }
 
-    if (len == 0) {
-	/*
-	 * The semantics of regexp are always match on re == "".
-	 */
+    if (varTokenPtr->type == TCL_TOKEN_SIMPLE_WORD) {
+	Tcl_DString ds;
 
-	PushLiteral(envPtr, "1", 1);
-	return TCL_OK;
-    }
-
-    /*
-     * Make a copy of the string that is null-terminated for checks which
-     * require such.
-     */
-
-    str = (char *) TclStackAlloc(interp, (unsigned) len + 1);
-    strncpy(str, varTokenPtr[1].start, (size_t) len);
-    str[len] = '\0';
-    start = 0;
-
-    /*
-     * Check for anchored REs (ie ^foo$), so we can use string equal if
-     * possible. Do not alter the start of str so we can free it correctly.
-     */
-
-    if (str[0] == '^') {
-	start++;
-	anchorLeft = 1;
-    } else {
-	anchorLeft = 0;
-    }
-    if ((str[len-1] == '$') && ((len == 1) || (str[len-2] != '\\'))) {
-	anchorRight = 1;
-	str[--len] = '\0';
-    } else {
-	anchorRight = 0;
-    }
-
-    /*
-     * On the first (pattern) arg, check to see if any RE special characters
-     * are in the word. If not, this is the same as 'string equal'.
-     */
-
-    if ((len > 1+start) && (str[start] == '.') && (str[start+1] == '*')) {
-	start += 2;
-	anchorLeft = 0;
-    }
-    if ((len > 2+start) && (str[len-3] != '\\')
-	    && (str[len-2] == '.') && (str[len-1] == '*')) {
-	len -= 2;
-	str[len] = '\0';
-	anchorRight = 0;
-    }
+	simple = 1;
+	str = (char *) varTokenPtr[1].start;
+	len = varTokenPtr[1].size;
+	if ((*str == '-') && !sawLast) {
+	    return TCL_ERROR;
+	}
 
-    /*
-     * Don't do anything with REs with other special chars. Also check if this
-     * is a bad RE (do this at the end because it can be expensive). If so,
-     * let it complain at runtime.
-     */
+	if (len == 0) {
+	    /*
+	     * The semantics of regexp are always match on re == "".
+	     */
 
-    if ((strpbrk(str + start, "*+?{}()[].\\|^$") != NULL)
-	    || (Tcl_RegExpCompile(NULL, str) == NULL)) {
-	TclStackFree(interp, str);
-	return TCL_ERROR;
-    }
+	    PushLiteral(envPtr, "1", 1);
+	    return TCL_OK;
+	}
 
-    if (anchorLeft && anchorRight) {
-	PushLiteral(envPtr, str+start, len-start);
-    } else {
 	/*
-	 * This needs to find the substring anywhere in the string, so use
-	 * [string match] and *foo*, with appropriate anchoring.
+	 * Attempt to convert pattern to glob.  If successful, push the
+	 * converted pattern.
 	 */
 
-	char *newStr = TclStackAlloc(interp, (unsigned) len + 3);
-
-	len -= start;
-	if (anchorLeft) {
-	    strncpy(newStr, str + start, (size_t) len);
-	} else {
-	    newStr[0] = '*';
-	    strncpy(newStr + 1, str + start, (size_t) len++);
-	}
-	if (!anchorRight) {
-	    newStr[len++] = '*';
+	if (TclReToGlob(NULL, varTokenPtr[1].start, len, &ds, &exact)
+		!= TCL_OK) {
+	    return TCL_ERROR;
 	}
-	newStr[len] = '\0';
-	PushLiteral(envPtr, newStr, len);
-	TclStackFree(interp, newStr);
+
+	PushLiteral(envPtr, Tcl_DStringValue(&ds), Tcl_DStringLength(&ds));
+	Tcl_DStringFree(&ds);
+    } else {
+	CompileWord(envPtr, varTokenPtr, interp, parsePtr->numWords-2);
     }
-    TclStackFree(interp, str);
 
     /*
      * Push the string arg.
@@ -3050,10 +2992,14 @@ TclCompileRegexpCmd(
     varTokenPtr = TokenAfter(varTokenPtr);
     CompileWord(envPtr, varTokenPtr, interp, parsePtr->numWords-1);
 
-    if (anchorLeft && anchorRight && !nocase) {
-	TclEmitOpcode(INST_STR_EQ, envPtr);
+    if (simple) {
+	if (exact && !nocase) {
+	    TclEmitOpcode(INST_STR_EQ, envPtr);
+	} else {
+	    TclEmitInstInt1(INST_STR_MATCH, nocase, envPtr);
+	}
     } else {
-	TclEmitInstInt1(INST_STR_MATCH, nocase, envPtr);
+	TclEmitInstInt1(INST_REGEXP, nocase, envPtr);
     }
 
     return TCL_OK;
author	hobbs <hobbs>	2007-11-12 02:07:18 (GMT)
committer	hobbs <hobbs>	2007-11-12 02:07:18 (GMT)
commit	cf8a7199f105edc95e59373e098af6eb47d22a16 (patch)
tree	c7f005156cbe08f8e11d6845d4a71f991dc5b488 /generic/tclCompCmds.c
parent	094b6f7ae513ec561543276d7659f3a8b2a5b853 (diff)
download	tcl-cf8a7199f105edc95e59373e098af6eb47d22a16.zip tcl-cf8a7199f105edc95e59373e098af6eb47d22a16.tar.gz tcl-cf8a7199f105edc95e59373e098af6eb47d22a16.tar.bz2