From d97d504c5e8a9f2ffe16cd98a47a9136667ec481 Mon Sep 17 00:00:00 2001 From: hobbs Date: Tue, 29 Jan 2002 02:40:49 +0000 Subject: * tests/regexpComp.test (new): * generic/tclInt.h: * generic/tclBasic.c: added TclCompileRegexpCmd entry * generic/tclCompCmds.c (TclCompileStringCmd): corrected to return TCL_OUT_LINE_COMPILE instead of TCL_ERROR for parsing errors, so it only throws the error for runtime compile, in case the user modifies 'string'. (TclCompileRegexpCmd): first try at a byte-compiled regexp command. It handles static strings and ^$ bounded static strings. (TclCompileAppendCmd): made TclPushVarName call always use TCL_CREATE_VAR as numWords is always > 2 at that point. --- ChangeLog | 20 ++ generic/tclBasic.c | 4 +- generic/tclCompCmds.c | 150 ++++++++-- generic/tclInt.h | 9 +- tests/regexpComp.test | 760 ++++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 919 insertions(+), 24 deletions(-) create mode 100644 tests/regexpComp.test diff --git a/ChangeLog b/ChangeLog index 780e689..f305801 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,25 @@ 2002-01-28 Jeff Hobbs + * tests/regexpComp.test (new): + * generic/tclInt.h: + * generic/tclBasic.c: added TclCompileRegexpCmd entry + * generic/tclCompCmds.c (TclCompileStringCmd): corrected to return + TCL_OUT_LINE_COMPILE instead of TCL_ERROR for parsing errors, so + it only throws the error for runtime compile, in case the user + modifies 'string'. + (TclCompileRegexpCmd): first try at a byte-compiled regexp + command. It handles static strings and ^$ bounded static strings. + (TclCompileAppendCmd): made TclPushVarName call always use + TCL_CREATE_VAR as numWords is always > 2 at that point. + + * generic/tclExecute.c (TclExecuteByteCode:INST_LIST): correct + possibly dangerous decr in macro call. + + * win/tclWinInit.c (TclpFindVariable): CONSTification touch-up + + * win/tclWinReg.c (OpenSubKey): corrected bug introduced in + CONSTification that dropped pointer reference. + * ChangeLog.2000 (new file): * ChangeLog: broke changes from 2000 into ChangeLog.2000 to reduce size of the main ChangeLog. diff --git a/generic/tclBasic.c b/generic/tclBasic.c index 3ebdfb0..133228a 100644 --- a/generic/tclBasic.c +++ b/generic/tclBasic.c @@ -13,7 +13,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclBasic.c,v 1.44 2002/01/25 21:36:09 dgp Exp $ + * RCS: @(#) $Id: tclBasic.c,v 1.45 2002/01/29 02:40:49 hobbs Exp $ */ #include "tclInt.h" @@ -139,7 +139,7 @@ static CmdInfo builtInCmds[] = { {"proc", (Tcl_CmdProc *) NULL, Tcl_ProcObjCmd, (CompileProc *) NULL, 1}, {"regexp", (Tcl_CmdProc *) NULL, Tcl_RegexpObjCmd, - (CompileProc *) NULL, 1}, + TclCompileRegexpCmd, 1}, {"regsub", (Tcl_CmdProc *) NULL, Tcl_RegsubObjCmd, (CompileProc *) NULL, 1}, {"rename", (Tcl_CmdProc *) NULL, Tcl_RenameObjCmd, diff --git a/generic/tclCompCmds.c b/generic/tclCompCmds.c index 536ad2d..dfab488 100644 --- a/generic/tclCompCmds.c +++ b/generic/tclCompCmds.c @@ -10,7 +10,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclCompCmds.c,v 1.21 2002/01/25 20:40:55 dgp Exp $ + * RCS: @(#) $Id: tclCompCmds.c,v 1.22 2002/01/29 02:40:50 hobbs Exp $ */ #include "tclInt.h" @@ -107,8 +107,7 @@ TclCompileAppendCmd(interp, parsePtr, envPtr) varTokenPtr = parsePtr->tokenPtr + (parsePtr->tokenPtr->numComponents + 1); - code = TclPushVarName(interp, varTokenPtr, envPtr, - ((numWords > 2) ? TCL_CREATE_VAR : 0), + code = TclPushVarName(interp, varTokenPtr, envPtr, TCL_CREATE_VAR, &localIndex, &simpleVarName, &isScalar); if (code != TCL_OK) { goto done; @@ -2149,6 +2148,125 @@ TclCompileLsetCmd( interp, parsePtr, envPtr ) /* *---------------------------------------------------------------------- * + * TclCompileRegexpCmd -- + * + * Procedure called to compile the "regexp" command. + * + * Results: + * The return value is a standard Tcl result, which is TCL_OK if + * the compilation was successful. If the "regexp" command is too + * complex for this function, then TCL_OUT_LINE_COMPILE is returned, + * indicating that the command should be compiled "out of line" + * (that is, not byte-compiled). If an error occurs, TCL_ERROR is + * returned, and the interpreter result contains an error message. + * + * Side effects: + * Instructions are added to envPtr to execute the "regexp" command + * at runtime. + * + *---------------------------------------------------------------------- + */ + +int +TclCompileRegexpCmd(interp, parsePtr, envPtr) + Tcl_Interp* interp; /* Tcl interpreter for error reporting */ + Tcl_Parse* parsePtr; /* Points to a parse structure for + * the command */ + CompileEnv* envPtr; /* Holds the resulting instructions */ +{ + Tcl_Token *varTokenPtr; /* Pointer to the Tcl_Token representing + * the parse of the RE or string */ + int length, code, exactMatch; + char c, *str; + + if (parsePtr->numWords != 3) { + /* We are only interested in compiling simple regexp cases. */ + return TCL_OUT_LINE_COMPILE; + } + + varTokenPtr = parsePtr->tokenPtr + (parsePtr->tokenPtr->numComponents + 1); + if (varTokenPtr->type != TCL_TOKEN_SIMPLE_WORD) { + /* Not a simple string - punt to runtime. */ + return TCL_OUT_LINE_COMPILE; + } + str = varTokenPtr[1].start; + length = varTokenPtr[1].size; + if (*str == '-') { + /* + * Looks like it may be an option. With 3 args, this is an + * incorrect call, but we punt on it here. + */ + return TCL_OUT_LINE_COMPILE; + } + + /* + * On the first (pattern) arg, check to see if any RE special characters + * are in the word. If not, this is the same as 'string equal'. + * We can use strchr here because the glob chars are all in the ascii-7 + * range. If -nocase was specified, we can't do this because INST_STR_EQ + * has no support for nocase. + */ + if ((length > 1) && (str[0] == '^') && (str[length-1] == '$')) { + /* + * It appears and exact search was requested (ie ^foo$), so strip + * off the special chars and signal exactMatch. + */ + str++; length -= 2; + exactMatch = 1; + } else { + exactMatch = 0; + } + c = str[length]; + str[length] = '\0'; + if (strpbrk(str, "*+?{}()[].\\|^$") != NULL) { + str[length] = c; + /* We don't do anything with REs with special chars yet. */ + return TCL_OUT_LINE_COMPILE; + } + str[length] = c; + if (exactMatch) { + TclEmitPush(TclRegisterLiteral(envPtr, str, length, 0), envPtr); + } else { + /* + * This needs to find the substring anywhere in the string, so + * use string match and *foo*. + */ + char *newStr = ckalloc((unsigned) length + 3); + newStr[0] = '*'; + strncpy(newStr + 1, str, (size_t) length); + newStr[length+1] = '*'; + newStr[length+2] = '\0'; + TclEmitPush(TclRegisterLiteral(envPtr, newStr, length+2, 0), envPtr); + ckfree((char *) newStr); + } + + /* + * Push the string arg + */ + varTokenPtr = varTokenPtr + (varTokenPtr->numComponents + 1); + if (varTokenPtr->type == TCL_TOKEN_SIMPLE_WORD) { + TclEmitPush(TclRegisterLiteral(envPtr, + varTokenPtr[1].start, varTokenPtr[1].size, 0), envPtr); + } else { + code = TclCompileTokens(interp, varTokenPtr+1, + varTokenPtr->numComponents, envPtr); + if (code != TCL_OK) { + return code; + } + } + + if (exactMatch) { + TclEmitOpcode(INST_STR_EQ, envPtr); + } else { + TclEmitInstInt1(INST_STR_MATCH, 0 /* nocase */, envPtr); + } + + return TCL_OK; +} + +/* + *---------------------------------------------------------------------- + * * TclCompileReturnCmd -- * * Procedure called to compile the "return" command. @@ -2420,9 +2538,8 @@ TclCompileStringCmd(interp, parsePtr, envPtr) }; if (parsePtr->numWords < 2) { - Tcl_SetResult(interp, "wrong # args: should be \"string option " - "arg ?arg ...?\"", TCL_STATIC); - return TCL_ERROR; + /* Fail at run time, not in compilation */ + return TCL_OUT_LINE_COMPILE; } opTokenPtr = parsePtr->tokenPtr + (parsePtr->tokenPtr->numComponents + 1); @@ -2437,7 +2554,7 @@ TclCompileStringCmd(interp, parsePtr, envPtr) Tcl_DecrRefCount(opObj); varTokenPtr = opTokenPtr + (opTokenPtr->numComponents + 1); - + switch ((enum options) index) { case STR_BYTELENGTH: case STR_FIRST: @@ -2499,9 +2616,8 @@ TclCompileStringCmd(interp, parsePtr, envPtr) int i; if (parsePtr->numWords != 4) { - Tcl_SetResult(interp, "wrong # args: should be " - "\"string index string charIndex\"", TCL_STATIC); - return TCL_ERROR; + /* Fail at run time, not in compilation */ + return TCL_OUT_LINE_COMPILE; } /* @@ -2528,9 +2644,8 @@ TclCompileStringCmd(interp, parsePtr, envPtr) } case STR_LENGTH: { if (parsePtr->numWords != 3) { - Tcl_SetResult(interp, "wrong # args: should be " - "\"string length string\"", TCL_STATIC); - return TCL_ERROR; + /* Fail at run time, not in compilation */ + return TCL_OUT_LINE_COMPILE; } if (varTokenPtr->type == TCL_TOKEN_SIMPLE_WORD) { @@ -2559,10 +2674,8 @@ TclCompileStringCmd(interp, parsePtr, envPtr) char c, *str; if (parsePtr->numWords < 4 || parsePtr->numWords > 5) { - Tcl_SetResult(interp, "wrong # args: should be " - "\"string match ?-nocase? pattern string\"", - TCL_STATIC); - return TCL_ERROR; + /* Fail at run time, not in compilation */ + return TCL_OUT_LINE_COMPILE; } if (parsePtr->numWords == 5) { @@ -2581,7 +2694,8 @@ TclCompileStringCmd(interp, parsePtr, envPtr) "bad option \"", str, "\": must be -nocase", (char *) NULL); str[length] = c; - return TCL_ERROR; + /* Fail at run time, not in compilation */ + return TCL_OUT_LINE_COMPILE; } varTokenPtr = varTokenPtr + (varTokenPtr->numComponents + 1); } diff --git a/generic/tclInt.h b/generic/tclInt.h index f20379c..9388c5d 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -12,7 +12,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclInt.h,v 1.76 2002/01/25 20:40:55 dgp Exp $ + * RCS: @(#) $Id: tclInt.h,v 1.77 2002/01/29 02:40:50 hobbs Exp $ */ #ifndef _TCLINT @@ -2126,9 +2126,10 @@ EXTERN int TclCompileListCmd _ANSI_ARGS_((Tcl_Interp *interp, Tcl_Parse *parsePtr, struct CompileEnv *envPtr)); EXTERN int TclCompileLlengthCmd _ANSI_ARGS_((Tcl_Interp *interp, Tcl_Parse *parsePtr, struct CompileEnv *envPtr)); -EXTERN int TclCompileLsetCmd _ANSI_ARGS_(( Tcl_Interp* interp, - Tcl_Parse* parsePtr, - struct CompileEnv* envPtr )); +EXTERN int TclCompileLsetCmd _ANSI_ARGS_((Tcl_Interp* interp, + Tcl_Parse* parsePtr, struct CompileEnv* envPtr)); +EXTERN int TclCompileRegexpCmd _ANSI_ARGS_((Tcl_Interp* interp, + Tcl_Parse* parsePtr, struct CompileEnv* envPtr)); EXTERN int TclCompileReturnCmd _ANSI_ARGS_((Tcl_Interp *interp, Tcl_Parse *parsePtr, struct CompileEnv *envPtr)); EXTERN int TclCompileSetCmd _ANSI_ARGS_((Tcl_Interp *interp, diff --git a/tests/regexpComp.test b/tests/regexpComp.test new file mode 100644 index 0000000..41e63d6 --- /dev/null +++ b/tests/regexpComp.test @@ -0,0 +1,760 @@ +# Commands covered: regexp, regsub +# +# This file contains a collection of tests for one or more of the Tcl +# built-in commands. Sourcing this file into Tcl runs the tests and +# generates output for errors. No output means no errors were found. +# +# Copyright (c) 1991-1993 The Regents of the University of California. +# Copyright (c) 1998 Sun Microsystems, Inc. +# Copyright (c) 1998-1999 by Scriptics Corporation. +# +# See the file "license.terms" for information on usage and redistribution +# of this file, and for a DISCLAIMER OF ALL WARRANTIES. +# +# RCS: @(#) $Id$ + +if {[lsearch [namespace children] ::tcltest] == -1} { + package require tcltest + namespace import -force ::tcltest::* +} + +# Procedure to evaluate a script within a proc, to test compilation +# functionality + +proc evalInProc { script } { + proc testProc {} $script + set status [catch { + testProc + } result] + rename testProc {} + return $result + #return [list $status $result] +} + +catch {unset foo} +test regexp-1.1 {basic regexp operation} { + evalInProc { + regexp ab*c abbbc + } +} 1 +test regexp-1.2 {basic regexp operation} { + evalInProc { + regexp ab*c ac + } +} 1 +test regexp-1.3 {basic regexp operation} { + evalInProc { + regexp ab*c ab + } +} 0 +test regexp-1.4 {basic regexp operation} { + evalInProc { + regexp -- -gorp abc-gorpxxx + } +} 1 +test regexp-1.5 {basic regexp operation} { + evalInProc { + regexp {^([^ ]*)[ ]*([^ ]*)} "" a + } +} 1 +test regexp-1.6 {basic regexp operation} { + list [catch {regexp {} abc} msg] $msg +} {0 1} +test regexp-1.7 {regexp utf compliance} { + # if not UTF-8 aware, result is "0 1" + evalInProc { + set foo "\u4e4eb q" + regexp "\u4e4eb q" "a\u4e4eb qw\u5e4e\x4e wq" bar + list [string compare $foo $bar] [regexp 4 $bar] + } +} {0 0} + +test regexp-2.1 {getting substrings back from regexp} { + evalInProc { + set foo {} + list [regexp ab*c abbbbc foo] $foo + } +} {1 abbbbc} +test regexp-2.2 {getting substrings back from regexp} { + evalInProc { + set foo {} + set f2 {} + list [regexp a(b*)c abbbbc foo f2] $foo $f2 + } +} {1 abbbbc bbbb} +test regexp-2.3 {getting substrings back from regexp} { + evalInProc { + set foo {} + set f2 {} + list [regexp a(b*)(c) abbbbc foo f2] $foo $f2 + } +} {1 abbbbc bbbb} +test regexp-2.4 {getting substrings back from regexp} { + evalInProc { + set foo {} + set f2 {} + set f3 {} + list [regexp a(b*)(c) abbbbc foo f2 f3] $foo $f2 $f3 + } +} {1 abbbbc bbbb c} +test regexp-2.5 {getting substrings back from regexp} { + evalInProc { + set foo {}; set f1 {}; set f2 {}; set f3 {}; set f4 {}; set f5 {}; + set f6 {}; set f7 {}; set f8 {}; set f9 {}; set fa {}; set fb {}; + list [regexp (1*)(2*)(3*)(4*)(5*)(6*)(7*)(8*)(9*)(a*)(b*) \ + 12223345556789999aabbb \ + foo f1 f2 f3 f4 f5 f6 f7 f8 f9 fa fb] $foo $f1 $f2 $f3 $f4 $f5 \ + $f6 $f7 $f8 $f9 $fa $fb + } +} {1 12223345556789999aabbb 1 222 33 4 555 6 7 8 9999 aa bbb} +test regexp-2.6 {getting substrings back from regexp} { + evalInProc { + set foo 2; set f2 2; set f3 2; set f4 2 + list [regexp (a)(b)? xay foo f2 f3 f4] $foo $f2 $f3 $f4 + } +} {1 a a {} {}} +test regexp-2.7 {getting substrings back from regexp} { + evalInProc { + set foo 1; set f2 1; set f3 1; set f4 1 + list [regexp (a)(b)?(c) xacy foo f2 f3 f4] $foo $f2 $f3 $f4 + } +} {1 ac a {} c} +test regexp-2.8 {getting substrings back from regexp} { + evalInProc { + set match {} + list [regexp {^a*b} aaaab match] $match + } +} {1 aaaab} + +test regexp-3.1 {-indices option to regexp} { + evalInProc { + set foo {} + list [regexp -indices ab*c abbbbc foo] $foo + } +} {1 {0 5}} +test regexp-3.2 {-indices option to regexp} { + evalInProc { + set foo {} + set f2 {} + list [regexp -indices a(b*)c abbbbc foo f2] $foo $f2 + } +} {1 {0 5} {1 4}} +test regexp-3.3 {-indices option to regexp} { + evalInProc { + set foo {} + set f2 {} + list [regexp -indices a(b*)(c) abbbbc foo f2] $foo $f2 + } +} {1 {0 5} {1 4}} +test regexp-3.4 {-indices option to regexp} { + evalInProc { + set foo {} + set f2 {} + set f3 {} + list [regexp -indices a(b*)(c) abbbbc foo f2 f3] $foo $f2 $f3 + } +} {1 {0 5} {1 4} {5 5}} +test regexp-3.5 {-indices option to regexp} { + evalInProc { + set foo {}; set f1 {}; set f2 {}; set f3 {}; set f4 {}; set f5 {}; + set f6 {}; set f7 {}; set f8 {}; set f9 {} + list [regexp -indices (1*)(2*)(3*)(4*)(5*)(6*)(7*)(8*)(9*) \ + 12223345556789999 \ + foo f1 f2 f3 f4 f5 f6 f7 f8 f9] $foo $f1 $f2 $f3 $f4 $f5 \ + $f6 $f7 $f8 $f9 + } +} {1 {0 16} {0 0} {1 3} {4 5} {6 6} {7 9} {10 10} {11 11} {12 12} {13 16}} +test regexp-3.6 {getting substrings back from regexp} { + evalInProc { + set foo 2; set f2 2; set f3 2; set f4 2 + list [regexp -indices (a)(b)? xay foo f2 f3 f4] $foo $f2 $f3 $f4 + } +} {1 {1 1} {1 1} {-1 -1} {-1 -1}} +test regexp-3.7 {getting substrings back from regexp} { + evalInProc { + set foo 1; set f2 1; set f3 1; set f4 1 + list [regexp -indices (a)(b)?(c) xacy foo f2 f3 f4] $foo $f2 $f3 $f4 + } +} {1 {1 2} {1 1} {-1 -1} {2 2}} + +test regexp-4.1 {-nocase option to regexp} { + evalInProc { + regexp -nocase foo abcFOo + } +} 1 +test regexp-4.2 {-nocase option to regexp} { + evalInProc { + set f1 22 + set f2 33 + set f3 44 + list [regexp -nocase {a(b*)([xy]*)z} aBbbxYXxxZ22 f1 f2 f3] $f1 $f2 $f3 + } +} {1 aBbbxYXxxZ Bbb xYXxx} +test regexp-4.3 {-nocase option to regexp} { + evalInProc { + regexp -nocase FOo abcFOo + } +} 1 +set ::x abcdefghijklmnopqrstuvwxyz1234567890 +set ::x $x$x$x$x$x$x$x$x$x$x$x$x +test regexp-4.4 {case conversion in regexp} { + evalInProc { + list [regexp -nocase $::x $::x foo] $foo + } +} "1 $x" +catch {unset ::x} + +test regexp-5.1 {exercise cache of compiled expressions} { + evalInProc { + regexp .*a b + regexp .*b c + regexp .*c d + regexp .*d e + regexp .*e f + regexp .*a bbba + } +} 1 +test regexp-5.2 {exercise cache of compiled expressions} { + evalInProc { + regexp .*a b + regexp .*b c + regexp .*c d + regexp .*d e + regexp .*e f + regexp .*b xxxb + } +} 1 +test regexp-5.3 {exercise cache of compiled expressions} { + evalInProc { + regexp .*a b + regexp .*b c + regexp .*c d + regexp .*d e + regexp .*e f + regexp .*c yyyc + } +} 1 +test regexp-5.4 {exercise cache of compiled expressions} { + evalInProc { + regexp .*a b + regexp .*b c + regexp .*c d + regexp .*d e + regexp .*e f + regexp .*d 1d + } +} 1 +test regexp-5.5 {exercise cache of compiled expressions} { + evalInProc { + regexp .*a b + regexp .*b c + regexp .*c d + regexp .*d e + regexp .*e f + regexp .*e xe + } +} 1 + +test regexp-6.1 {regexp errors} { + evalInProc { + list [catch {regexp a} msg] $msg + } +} {1 {wrong # args: should be "regexp ?switches? exp string ?matchVar? ?subMatchVar subMatchVar ...?"}} +test regexp-6.2 {regexp errors} { + evalInProc { + list [catch {regexp -nocase a} msg] $msg + } +} {1 {wrong # args: should be "regexp ?switches? exp string ?matchVar? ?subMatchVar subMatchVar ...?"}} +test regexp-6.3 {regexp errors} { + evalInProc { + list [catch {regexp -gorp a} msg] $msg + } +} {1 {bad switch "-gorp": must be -all, -about, -indices, -inline, -expanded, -line, -linestop, -lineanchor, -nocase, -start, or --}} +test regexp-6.4 {regexp errors} { + evalInProc { + list [catch {regexp a( b} msg] $msg + } +} {1 {couldn't compile regular expression pattern: parentheses () not balanced}} +test regexp-6.5 {regexp errors} { + evalInProc { + list [catch {regexp a( b} msg] $msg + } +} {1 {couldn't compile regular expression pattern: parentheses () not balanced}} +test regexp-6.6 {regexp errors} { + evalInProc { + list [catch {regexp a a f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1} msg] $msg + } +} {0 1} +test regexp-6.7 {regexp errors} { + evalInProc { + list [catch {regexp (x)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.) xyzzy} msg] $msg + } +} {0 0} +test regexp-6.8 {regexp errors} { + evalInProc { + catch {unset f1} + set f1 44 + list [catch {regexp abc abc f1(f2)} msg] $msg + } +} {1 {couldn't set variable "f1(f2)"}} +test regexp-6.9 {regexp errors, -start bad int check} { + evalInProc { + list [catch {regexp -start bogus {^$} {}} msg] $msg + } +} {1 {expected integer but got "bogus"}} + +test regexp-7.1 {basic regsub operation} { + evalInProc { + list [regsub aa+ xaxaaaxaa 111&222 foo] $foo + } +} {1 xax111aaa222xaa} +test regexp-7.2 {basic regsub operation} { + evalInProc { + list [regsub aa+ aaaxaa &111 foo] $foo + } +} {1 aaa111xaa} +test regexp-7.3 {basic regsub operation} { + evalInProc { + list [regsub aa+ xaxaaa 111& foo] $foo + } +} {1 xax111aaa} +test regexp-7.4 {basic regsub operation} { + evalInProc { + list [regsub aa+ aaa 11&2&333 foo] $foo + } +} {1 11aaa2aaa333} +test regexp-7.5 {basic regsub operation} { + evalInProc { + list [regsub aa+ xaxaaaxaa &2&333 foo] $foo + } +} {1 xaxaaa2aaa333xaa} +test regexp-7.6 {basic regsub operation} { + evalInProc { + list [regsub aa+ xaxaaaxaa 1&22& foo] $foo + } +} {1 xax1aaa22aaaxaa} +test regexp-7.7 {basic regsub operation} { + evalInProc { + list [regsub a(a+) xaxaaaxaa {1\122\1} foo] $foo + } +} {1 xax1aa22aaxaa} +test regexp-7.8 {basic regsub operation} { + evalInProc { + list [regsub a(a+) xaxaaaxaa {1\\\122\1} foo] $foo + } +} "1 {xax1\\aa22aaxaa}" +test regexp-7.9 {basic regsub operation} { + evalInProc { + list [regsub a(a+) xaxaaaxaa {1\\122\1} foo] $foo + } +} "1 {xax1\\122aaxaa}" +test regexp-7.10 {basic regsub operation} { + evalInProc { + list [regsub a(a+) xaxaaaxaa {1\\&\1} foo] $foo + } +} "1 {xax1\\aaaaaxaa}" +test regexp-7.11 {basic regsub operation} { + evalInProc { + list [regsub a(a+) xaxaaaxaa {1\&\1} foo] $foo + } +} {1 xax1&aaxaa} +test regexp-7.12 {basic regsub operation} { + evalInProc { + list [regsub a(a+) xaxaaaxaa {\1\1\1\1&&} foo] $foo + } +} {1 xaxaaaaaaaaaaaaaaxaa} +test regexp-7.13 {basic regsub operation} { + evalInProc { + set foo xxx + list [regsub abc xyz 111 foo] $foo + } +} {0 xyz} +test regexp-7.14 {basic regsub operation} { + evalInProc { + set foo xxx + list [regsub ^ xyz "111 " foo] $foo + } +} {1 {111 xyz}} +test regexp-7.15 {basic regsub operation} { + evalInProc { + set foo xxx + list [regsub -- -foo abc-foodef "111 " foo] $foo + } +} {1 {abc111 def}} +test regexp-7.16 {basic regsub operation} { + evalInProc { + set foo xxx + list [regsub x "" y foo] $foo + } +} {0 {}} +test regexp-7.17 {regsub utf compliance} { + evalInProc { + # if not UTF-8 aware, result is "0 1" + set foo "xyz555ijka\u4e4ebpqr" + regsub a\u4e4eb xyza\u4e4ebijka\u4e4ebpqr 555 bar + list [string compare $foo $bar] [regexp 4 $bar] + } +} {0 0} + +test regexp-8.1 {case conversion in regsub} { + evalInProc { + list [regsub -nocase a(a+) xaAAaAAay & foo] $foo + } +} {1 xaAAaAAay} +test regexp-8.2 {case conversion in regsub} { + evalInProc { + list [regsub -nocase a(a+) xaAAaAAay & foo] $foo + } +} {1 xaAAaAAay} +test regexp-8.3 {case conversion in regsub} { + evalInProc { + set foo 123 + list [regsub a(a+) xaAAaAAay & foo] $foo + } +} {0 xaAAaAAay} +test regexp-8.4 {case conversion in regsub} { + evalInProc { + set foo 123 + list [regsub -nocase a CaDE b foo] $foo + } +} {1 CbDE} +test regexp-8.5 {case conversion in regsub} { + evalInProc { + set foo 123 + list [regsub -nocase XYZ CxYzD b foo] $foo + } +} {1 CbD} +test regexp-8.6 {case conversion in regsub} { + evalInProc { + set x abcdefghijklmnopqrstuvwxyz1234567890 + set x $x$x$x$x$x$x$x$x$x$x$x$x + set foo 123 + list [regsub -nocase $x $x b foo] $foo + } +} {1 b} + +test regexp-9.1 {-all option to regsub} { + evalInProc { + set foo 86 + list [regsub -all x+ axxxbxxcxdx |&| foo] $foo + } +} {4 a|xxx|b|xx|c|x|d|x|} +test regexp-9.2 {-all option to regsub} { + evalInProc { + set foo 86 + list [regsub -nocase -all x+ aXxXbxxcXdx |&| foo] $foo + } +} {4 a|XxX|b|xx|c|X|d|x|} +test regexp-9.3 {-all option to regsub} { + evalInProc { + set foo 86 + list [regsub x+ axxxbxxcxdx |&| foo] $foo + } +} {1 a|xxx|bxxcxdx} +test regexp-9.4 {-all option to regsub} { + evalInProc { + set foo 86 + list [regsub -all bc axxxbxxcxdx |&| foo] $foo + } +} {0 axxxbxxcxdx} +test regexp-9.5 {-all option to regsub} { + evalInProc { + set foo xxx + list [regsub -all node "node node more" yy foo] $foo + } +} {2 {yy yy more}} +test regexp-9.6 {-all option to regsub} { + evalInProc { + set foo xxx + list [regsub -all ^ xxx 123 foo] $foo + } +} {1 123xxx} + +test regexp-10.1 {expanded syntax in regsub} { + evalInProc { + set foo xxx + list [regsub -expanded ". \#comment\n . \#comment2" abc def foo] $foo + } +} {1 defc} +test regexp-10.2 {newline sensitivity in regsub} { + evalInProc { + set foo xxx + list [regsub -line {^a.*b$} "dabc\naxyb\n" 123 foo] $foo + } +} "1 {dabc\n123\n}" +test regexp-10.3 {newline sensitivity in regsub} { + evalInProc { + set foo xxx + list [regsub -line {^a.*b$} "dabc\naxyb\nxb" 123 foo] $foo + } +} "1 {dabc\n123\nxb}" +test regexp-10.4 {partial newline sensitivity in regsub} { + evalInProc { + set foo xxx + list [regsub -lineanchor {^a.*b$} "da\naxyb\nxb" 123 foo] $foo + } +} "1 {da\n123}" +test regexp-10.5 {inverse partial newline sensitivity in regsub} { + evalInProc { + set foo xxx + list [regsub -linestop {a.*b} "da\nbaxyb\nxb" 123 foo] $foo + } +} "1 {da\nb123\nxb}" + +test regexp-11.1 {regsub errors} { + evalInProc { + list [catch {regsub a b c} msg] $msg + } +} {1 {wrong # args: should be "regsub ?switches? exp string subSpec varName"}} +test regexp-11.2 {regsub errors} { + evalInProc { + list [catch {regsub -nocase a b c} msg] $msg + } +} {1 {wrong # args: should be "regsub ?switches? exp string subSpec varName"}} +test regexp-11.3 {regsub errors} { + evalInProc { + list [catch {regsub -nocase -all a b c} msg] $msg + } +} {1 {wrong # args: should be "regsub ?switches? exp string subSpec varName"}} +test regexp-11.4 {regsub errors} { + evalInProc { + list [catch {regsub a b c d e f} msg] $msg + } +} {1 {wrong # args: should be "regsub ?switches? exp string subSpec varName"}} +test regexp-11.5 {regsub errors} { + evalInProc { + list [catch {regsub -gorp a b c} msg] $msg + } +} {1 {bad switch "-gorp": must be -all, -nocase, -expanded, -line, -linestop, -lineanchor, -start, or --}} +test regexp-11.6 {regsub errors} { + evalInProc { + list [catch {regsub -nocase a( b c d} msg] $msg + } +} {1 {couldn't compile regular expression pattern: parentheses () not balanced}} +test regexp-11.7 {regsub errors} { + evalInProc { + catch {unset f1} + set f1 44 + list [catch {regsub -nocase aaa aaa xxx f1(f2)} msg] $msg + } +} {1 {couldn't set variable "f1(f2)"}} +test regexp-11.8 {regsub errors, -start bad int check} { + evalInProc { + list [catch {regsub -start bogus pattern string rep var} msg] $msg + } +} {1 {expected integer but got "bogus"}} + +# This test crashes on the Mac unless you increase the Stack Space to about 1 +# Meg. This is probably bigger than most users want... +# 8.2.3 regexp reduced stack space requirements, but this should be +# tested again +test regexp-12.1 {Tcl_RegExpExec: large number of subexpressions} {macCrash} { + evalInProc { + list [regexp (.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.) abcdefghijklmnopqrstuvwxyz all a b c d e f g h i j k l m n o p q r s t u v w x y z] $all $a $b $c $d $e $f $g $h $i $j $k $l $m $n $o $p $q $r $s $t $u $v $w $x $y $z + } +} {1 abcdefghijklmnopqrstuvwxyz a b c d e f g h i j k l m n o p q r s t u v w x y z} + +test regexp-13.1 {regsub of a very large string} { + # This test is designed to stress the memory subsystem in order + # to catch Bug #933. It only fails if the Tcl memory allocator + # is in use. + + set line {BEGIN_TABLE ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; END_TABLE} + set filedata [string repeat $line 200] + for {set i 1} {$i<10} {incr i} { + regsub -all "BEGIN_TABLE " $filedata "" newfiledata + } + set x done +} {done} + +test regexp-14.1 {CompileRegexp: regexp cache} { + evalInProc { + regexp .*a b + regexp .*b c + regexp .*c d + regexp .*d e + regexp .*e f + set x . + append x *a + regexp $x bbba + } +} 1 +test regexp-14.2 {CompileRegexp: regexp cache, different flags} { + evalInProc { + regexp .*a b + regexp .*b c + regexp .*c d + regexp .*d e + regexp .*e f + set x . + append x *a + regexp -nocase $x bbba + } +} 1 + +# There is no exec on the Mac ... + +test regexp-14.3 {CompileRegexp: regexp cache, empty regexp and empty cache} {unixOrPc} { + makeFile {puts [regexp {} foo]} junk.tcl + exec $::tcltest::tcltest junk.tcl +} 1 + +test regexp-15.1 {regexp -start} { + catch {unset x} + list [regexp -start -10 {\d} 1abc2de3 x] $x +} {1 1} +test regexp-15.2 {regexp -start} { + catch {unset x} + list [regexp -start 2 {\d} 1abc2de3 x] $x +} {1 2} +test regexp-15.3 {regexp -start} { + catch {unset x} + list [regexp -start 4 {\d} 1abc2de3 x] $x +} {1 2} +test regexp-15.4 {regexp -start} { + catch {unset x} + list [regexp -start 5 {\d} 1abc2de3 x] $x +} {1 3} +test regexp-15.5 {regexp -start, over end of string} { + catch {unset x} + list [regexp -start [string length 1abc2de3] {\d} 1abc2de3 x] [info exists x] +} {0 0} +test regexp-15.6 {regexp -start, loss of ^$ behavior} { + list [regexp -start 2 {^$} {}] +} {0} + +test regexp-16.1 {regsub -start} { + catch {unset x} + list [regsub -all -start 2 {\d} a1b2c3d4e5 {/&} x] $x +} {4 a1b/2c/3d/4e/5} +test regexp-16.2 {regsub -start} { + catch {unset x} + list [regsub -all -start -25 {z} hello {/&} x] $x +} {0 hello} +test regexp-16.3 {regsub -start} { + catch {unset x} + list [regsub -all -start 3 {z} hello {/&} x] $x +} {0 hello} +test regexp-16.4 {regsub -start, \A behavior} { + set out {} + lappend out [regsub -start 0 -all {\A(\w)} {abcde} {/\1} x] $x + lappend out [regsub -start 2 -all {\A(\w)} {abcde} {/\1} x] $x +} {5 /a/b/c/d/e 3 ab/c/d/e} + +test regexp-17.1 {regexp -inline} { + regexp -inline b ababa +} {b} +test regexp-17.2 {regexp -inline} { + regexp -inline (b) ababa +} {b b} +test regexp-17.3 {regexp -inline -indices} { + regexp -inline -indices (b) ababa +} {{1 1} {1 1}} +test regexp-17.4 {regexp -inline} { + regexp -inline {\w(\d+)\w} " hello 23 there456def " +} {e456d 456} +test regexp-17.5 {regexp -inline no matches} { + regexp -inline {\w(\d+)\w} "" +} {} +test regexp-17.6 {regexp -inline no matches} { + regexp -inline hello goodbye +} {} +test regexp-17.7 {regexp -inline, no matchvars allowed} { + list [catch {regexp -inline b abc match} msg] $msg +} {1 {regexp match variables not allowed when using -inline}} + +test regexp-18.1 {regexp -all} { + regexp -all b bbbbb +} {5} +test regexp-18.2 {regexp -all} { + regexp -all b abababbabaaaaaaaaaab +} {6} +test regexp-18.3 {regexp -all -inline} { + regexp -all -inline b abababbabaaaaaaaaaab +} {b b b b b b} +test regexp-18.4 {regexp -all -inline} { + regexp -all -inline {\w(\w)} abcdefg +} {ab b cd d ef f} +test regexp-18.5 {regexp -all -inline} { + regexp -all -inline {\w(\w)$} abcdefg +} {fg g} +test regexp-18.6 {regexp -all -inline} { + regexp -all -inline {\d+} 10:20:30:40 +} {10 20 30 40} +test regexp-18.7 {regexp -all -inline} { + list [catch {regexp -all -inline b abc match} msg] $msg +} {1 {regexp match variables not allowed when using -inline}} +test regexp-18.8 {regexp -all} { + # This should not cause an infinite loop + regexp -all -inline {a*} a +} {a} +test regexp-18.9 {regexp -all} { + # Yes, the expected result is {a {}}. Here's why: + # Start at index 0; a* matches the "a" there then stops. + # Go to index 1; a* matches the lambda (or {}) there then stops. Recall + # that a* matches zero or more "a"'s; thus it matches the string "b", as + # there are zero or more "a"'s there. + # Go to index 2; this is past the end of the string, so stop. + regexp -all -inline {a*} ab +} {a {}} +test regexp-18.10 {regexp -all} { + # Yes, the expected result is {a {} a}. Here's why: + # Start at index 0; a* matches the "a" there then stops. + # Go to index 1; a* matches the lambda (or {}) there then stops. Recall + # that a* matches zero or more "a"'s; thus it matches the string "b", as + # there are zero or more "a"'s there. + # Go to index 2; a* matches the "a" there then stops. + # Go to index 3; this is past the end of the string, so stop. + regexp -all -inline {a*} aba +} {a {} a} +test regexp-18.11 {regexp -all} { + regexp -all -inline {^a} aaaa +} {a} +test regexp-18.12 {regexp -all -inline -indices} { + regexp -all -inline -indices a(b(c)d|e(f)g)h abcdhaefgh +} {{0 4} {1 3} {2 2} {-1 -1} {5 9} {6 8} {-1 -1} {7 7}} + +test regexp-19.1 {regsub null replacement} { + regsub -all {@} {@hel@lo@} "\0a\0" result + list $result [string length $result] +} "\0a\0hel\0a\0lo\0a\0 14" + +test regexp-20.1 {regsub shared object shimmering} { + # Bug #461322 + set a abcdefghijklmnopqurstuvwxyz + set b $a + set c abcdefghijklmnopqurstuvwxyz0123456789 + regsub $a $c $b d + list $d [string length $d] [string bytelength $d] +} [list abcdefghijklmnopqurstuvwxyz0123456789 37 37] + +test regexp-21.1 {regexp command compiling tests} { + evalInProc { + regexp foo bar + } +} 0 +test regexp-21.2 {regexp command compiling tests} { + evalInProc { + regexp {^foo$} dogfood + } +} 0 +test regexp-21.3 {regexp command compiling tests} { + evalInProc { + set a foo + regexp {^foo$} $a + } +} 1 +test regexp-21.4 {regexp command compiling tests} { + evalInProc { + regexp foo dogfood + } +} 1 +test regexp-21.5 {regexp command compiling tests} { + evalInProc { + regexp foo dogfod + } +} 0 + +# cleanup +::tcltest::cleanupTests +return -- cgit v0.12