diff options
-rw-r--r-- | ChangeLog | 12 | ||||
-rw-r--r-- | generic/tclCmdMZ.c | 49 | ||||
-rw-r--r-- | tests/regexp.test | 304 |
3 files changed, 320 insertions, 45 deletions
@@ -1,3 +1,15 @@ +2010-02-09 Mo DeJong <mdejong@users.sourceforge.net> + + [Bug 2826551, Patch 2948425]: Assorted regexp bugs related to -all, + -line and -start options and newlines. + * generic/tclCmdMZ.c (Tcl_RegexpObjCmd): If -offset is given, treat it + as the start of the line if the previous character was a newline. Fix + nasty edge case where a zero length match would not advance the index. + * tests/regexp.test: Add regression tests back ported from Jacl. + Checks for a number of issues related to -line and newline handling. A + few of tests were broken before the patch and continue to be broken, + marked as knownBug. + 2010-02-11 Donal K. Fellows <dkf@users.sf.net> * generic/tclOO.c (ObjectRenamedTrace): [Bug 2949397]: Prevent diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index 7e42ec3..5cf9761 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -15,7 +15,7 @@ * See the file "license.terms" for information on usage and redistribution of * this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclCmdMZ.c,v 1.198 2009/12/25 22:45:05 nijtmans Exp $ + * RCS: @(#) $Id: tclCmdMZ.c,v 1.199 2010/02/11 11:14:22 dkf Exp $ */ #include "tclInt.h" @@ -108,7 +108,7 @@ Tcl_RegexpObjCmd( Tcl_Obj *const objv[]) /* Argument objects. */ { int i, indices, match, about, offset, all, doinline, numMatchesSaved; - int cflags, eflags, stringLength; + int cflags, eflags, stringLength, matchLength; Tcl_RegExp regExpr; Tcl_Obj *objPtr, *startIndex = NULL, *resultPtr = NULL; Tcl_RegExpInfo info; @@ -126,7 +126,6 @@ Tcl_RegexpObjCmd( indices = 0; about = 0; cflags = TCL_REG_ADVANCED; - eflags = 0; offset = 0; all = 0; doinline = 0; @@ -250,15 +249,6 @@ Tcl_RegexpObjCmd( return TCL_ERROR; } - if (offset > 0) { - /* - * Add flag if using offset (string is part of a larger string), so - * that "^" won't match. - */ - - eflags |= TCL_REG_NOTBOL; - } - objc -= 2; objv += 2; @@ -286,12 +276,23 @@ Tcl_RegexpObjCmd( */ while (1) { - match = Tcl_RegExpExecObj(interp, regExpr, objPtr, - offset /* offset */, numMatchesSaved, eflags - | ((offset > 0 && - (Tcl_GetUniChar(objPtr,offset-1) != (Tcl_UniChar)'\n')) - ? TCL_REG_NOTBOL : 0)); + /* + * Pass either 0 or TCL_REG_NOTBOL in the eflags. Passing + * TCL_REG_NOTBOL indicates that the character at offset should not be + * considered the start of the line. If for example the pattern {^} is + * passed and -start is positive, then the pattern will not match the + * start of the string unless the previous character is a newline. + */ + if ((offset == 0) || ((offset > 0) && + (Tcl_GetUniChar(objPtr, offset-1) == (Tcl_UniChar) '\n'))) { + eflags = 0; + } else { + eflags = TCL_REG_NOTBOL; + } + + match = Tcl_RegExpExecObj(interp, regExpr, objPtr, offset, + numMatchesSaved, eflags); if (match < 0) { return TCL_ERROR; } @@ -385,6 +386,7 @@ Tcl_RegexpObjCmd( } } else { Tcl_Obj *valuePtr; + valuePtr = Tcl_ObjSetVar2(interp, objv[i], NULL, newPtr, 0); if (valuePtr == NULL) { Tcl_AppendResult(interp, "couldn't set variable \"", @@ -408,12 +410,19 @@ Tcl_RegexpObjCmd( * offset never changes). */ - if (info.matches[0].end == 0) { + matchLength = (info.matches[0].end - info.matches[0].start); + + offset += info.matches[0].end; + + /* + * A match of length zero could happen for {^} {$} or {.*} and in + * these cases we always want to bump the index up one. + */ + + if (matchLength == 0) { offset++; } - offset += info.matches[0].end; all++; - eflags |= TCL_REG_NOTBOL; if (offset >= stringLength) { break; } diff --git a/tests/regexp.test b/tests/regexp.test index 2fe87b9..c77e147 100644 --- a/tests/regexp.test +++ b/tests/regexp.test @@ -11,7 +11,7 @@ # See the file "license.terms" for information on usage and redistribution # of this file, and for a DISCLAIMER OF ALL WARRANTIES. # -# RCS: @(#) $Id: regexp.test,v 1.34 2009/09/21 21:30:41 mdejong Exp $ +# RCS: @(#) $Id: regexp.test,v 1.35 2010/02/11 11:14:22 dkf Exp $ if {[lsearch [namespace children] ::tcltest] == -1} { package require tcltest 2 @@ -19,6 +19,9 @@ if {[lsearch [namespace children] ::tcltest] == -1} { } catch {unset foo} + +testConstraint exec [llength [info commands exec]] + test regexp-1.1 {basic regexp operation} { regexp ab*c abbbc } 1 @@ -43,7 +46,6 @@ test regexp-1.7 {regexp utf compliance} { regexp "\u4e4eb q" "a\u4e4eb qw\u5e4e\x4e wq" bar list [string compare $foo $bar] [regexp 4 $bar] } {0 0} - test regexp-1.8 {regexp ***= metasyntax} { regexp -- "***=o" "aeiou" } 1 @@ -311,12 +313,39 @@ test regexp-7.17 {regsub utf compliance} { test regexp-7.18 {basic regsub replacement} { list [regsub a+ aaa {&} foo] $foo } {1 aaa} -test regexp-7.19 {basic regsub backslash replacement} { +test regexp-7.19 {basic regsub replacement} { + list [regsub a+ aaa {\&} foo] $foo +} {1 &} +test regexp-7.20 {basic regsub replacement} { + list [regsub a+ aaa {\\&} foo] $foo +} {1 {\aaa}} +test regexp-7.21 {basic regsub replacement} { + list [regsub a+ aaa {\\\&} foo] $foo +} {1 {\&}} +test regexp-7.22 {basic regsub replacement} { list [regsub a+ aaa {\0} foo] $foo } {1 aaa} -test regexp-7.20 {basic regsub backslash replacement} { +test regexp-7.23 {basic regsub replacement} { + list [regsub a+ aaa {\\0} foo] $foo +} {1 {\0}} +test regexp-7.24 {basic regsub replacement} { list [regsub a+ aaa {\\\0} foo] $foo } {1 {\aaa}} +test regexp-7.25 {basic regsub replacement} { + list [regsub a+ aaa {\\\\0} foo] $foo +} {1 {\\0}} +test regexp-7.26 {dollar zero is not a backslash replacement} { + list [regsub a+ aaa {$0} foo] $foo +} {1 {$0}} +test regexp-7.27 {dollar zero is not a backslash replacement} { + list [regsub a+ aaa {\0$0} foo] $foo +} {1 {aaa$0}} +test regexp-7.28 {dollar zero is not a backslash replacement} { + list [regsub a+ aaa {\$0} foo] $foo +} {1 {\$0}} +test regexp-7.29 {dollar zero is not a backslash replacement} { + list [regsub a+ aaa {\\} foo] $foo +} {1 \\} test regexp-8.1 {case conversion in regsub} { list [regsub -nocase a(a+) xaAAaAAay & foo] $foo @@ -437,10 +466,8 @@ test regexp-12.1 {Tcl_RegExpExec: large number of subexpressions} {macCrash} { } {1 abcdefghijklmnopqrstuvwxyz a b c d e f g h i j k l m n o p q r s t u v w x y z} test regexp-13.1 {regsub of a very large string} { - # This test is designed to stress the memory subsystem in order - # to catch Bug #933. It only fails if the Tcl memory allocator - # is in use. - + # This test is designed to stress the memory subsystem in order to catch + # Bug #933. It only fails if the Tcl memory allocator is in use. set line {BEGIN_TABLE ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; END_TABLE} set filedata [string repeat $line 200] for {set i 1} {$i<10} {incr i} { @@ -469,10 +496,8 @@ test regexp-14.2 {CompileRegexp: regexp cache, different flags} { append x *a regexp -nocase $x bbba } 1 - -testConstraint exec [llength [info commands exec]] test regexp-14.3 {CompileRegexp: regexp cache, empty regexp and empty cache} -constraints { - exec + exec } -setup { set junk [makeFile {puts [regexp {} foo]} junk.tcl] } -body { @@ -642,64 +667,293 @@ test regexp-20.2 {regsub shared object shimmering with -about} { test regexp-21.1 {regsub works with empty string} { regsub -- ^ {} foo } {foo} - test regexp-21.2 {regsub works with empty string} { regsub -- \$ {} foo } {foo} - test regexp-21.3 {regsub works with empty string offset} { regsub -start 0 -- ^ {} foo } {foo} - test regexp-21.4 {regsub works with empty string offset} { regsub -start 0 -- \$ {} foo } {foo} - test regexp-21.5 {regsub works with empty string offset} { regsub -start 3 -- \$ {123} foo } {123foo} - test regexp-21.6 {regexp works with empty string} { regexp -- ^ {} } {1} - test regexp-21.7 {regexp works with empty string} { regexp -start 0 -- ^ {} } {1} - test regexp-21.8 {regexp works with empty string offset} { regexp -start 3 -- ^ {123} } {0} - test regexp-21.9 {regexp works with empty string offset} { regexp -start 3 -- \$ {123} } {1} - test regexp-21.10 {multiple matches handle newlines} { regsub -all -lineanchor -- {^#[^\n]*\n} "#one\n#two\n#three\n" foo\n } "foo\nfoo\nfoo\n" - test regexp-21.11 {multiple matches handle newlines} { regsub -all -line -- ^ "a\nb\nc" \# } "\#a\n\#b\n\#c" - test regexp-21.12 {multiple matches handle newlines} { regsub -all -line -- ^ "\n\n" \# } "\#\n\#\n\#" - test regexp-21.13 {multiple matches handle newlines} { regexp -all -inline -indices -line -- ^ "a\nb\nc" } {{0 -1} {2 1} {4 3}} - test regexp-22.1 {Bug 1810038} { regexp ($|^X)* {} } 1 - test regexp-22.2 {regexp compile and backrefs, Bug 1857126} { regexp -- {([bc])\1} bb } 1 +test regexp-23.1 {regexp -all and -line} { + set string "" + list \ + [regexp -all -inline -indices -line -- {^} $string] \ + [regexp -all -inline -indices -line -- {^$} $string] \ + [regexp -all -inline -indices -line -- {$} $string] +} {{{0 -1}} {{0 -1}} {{0 -1}}} +test regexp-23.2 {regexp -all and -line} { + set string "\n" + list \ + [regexp -all -inline -indices -line -- {^} $string] \ + [regexp -all -inline -indices -line -- {^$} $string] \ + [regexp -all -inline -indices -line -- {$} $string] +} {{{0 -1}} {{0 -1}} {{0 -1}}} +test regexp-23.3 {regexp -all and -line} { + set string "\n\n" + list \ + [regexp -all -inline -indices -line -- {^} $string] \ + [regexp -all -inline -indices -line -- {^$} $string] \ + [regexp -all -inline -indices -line -- {$} $string] +} {{{0 -1} {1 0}} {{0 -1} {1 0}} {{0 -1} {1 0}}} +test regexp-23.4 {regexp -all and -line} { + set string "a" + list \ + [regexp -all -inline -indices -line -- {^} $string] \ + [regexp -all -inline -indices -line -- {^.*$} $string] \ + [regexp -all -inline -indices -line -- {$} $string] +} {{{0 -1}} {{0 0}} {{1 0}}} +test regexp-23.5 {regexp -all and -line} {knownBug} { + set string "a\n" + list \ + [regexp -all -inline -indices -line -- {^} $string] \ + [regexp -all -inline -indices -line -- {^.*$} $string] \ + [regexp -all -inline -indices -line -- {$} $string] +} {{{0 -1} {2 1}} {{0 0} {2 1}} {{1 0} {2 1}}} +test regexp-23.6 {regexp -all and -line} { + set string "\na" + list \ + [regexp -all -inline -indices -line -- {^} $string] \ + [regexp -all -inline -indices -line -- {^.*$} $string] \ + [regexp -all -inline -indices -line -- {$} $string] +} {{{0 -1} {1 0}} {{0 -1} {1 1}} {{0 -1} {2 1}}} +test regexp-23.7 {regexp -all and -line} {knownBug} { + set string "ab\n" + list \ + [regexp -all -inline -indices -line -- {^} $string] \ + [regexp -all -inline -indices -line -- {^.*$} $string] \ + [regexp -all -inline -indices -line -- {$} $string] +} {{{0 -1} {3 2}} {{0 1} {3 2}} {{2 1} {3 2}}} +test regexp-23.8 {regexp -all and -line} { + set string "a\nb" + list \ + [regexp -all -inline -indices -line -- {^} $string] \ + [regexp -all -inline -indices -line -- {^.*$} $string] \ + [regexp -all -inline -indices -line -- {$} $string] +} {{{0 -1} {2 1}} {{0 0} {2 2}} {{1 0} {3 2}}} +test regexp-23.9 {regexp -all and -line} {knownBug} { + set string "a\nb\n" + list \ + [regexp -all -inline -indices -line -- {^} $string] \ + [regexp -all -inline -indices -line -- {^.*$} $string] \ + [regexp -all -inline -indices -line -- {$} $string] +} {{{0 -1} {2 1} {4 3}} {{0 0} {2 2} {4 3}} {{1 0} {3 2} {4 3}}} +test regexp-23.10 {regexp -all and -line} { + set string "a\nb\nc" + list \ + [regexp -all -inline -indices -line -- {^} $string] \ + [regexp -all -inline -indices -line -- {^.*$} $string] \ + [regexp -all -inline -indices -line -- {$} $string] +} {{{0 -1} {2 1} {4 3}} {{0 0} {2 2} {4 4}} {{1 0} {3 2} {5 4}}} +test regexp-23.11 {regexp -all and -line} { + regexp -all -inline -indices -line -- {b} "abb\nb" +} {{1 1} {2 2} {4 4}} + +test regexp-24.1 {regsub -all and -line} { + foreach {v1 v2 v3} {{} {} {}} {} + set string "" + list \ + [regsub -line -all {^} $string {<&>} v1] $v1 \ + [regsub -line -all {^$} $string {<&>} v2] $v2 \ + [regsub -line -all {$} $string {<&>} v3] $v3 +} {1 <> 1 <> 1 <>} +test regexp-24.2 {regsub -all and -line} { + foreach {v1 v2 v3} {{} {} {}} {} + set string "\n" + list \ + [regsub -line -all {^} $string {<&>} v1] $v1 \ + [regsub -line -all {^$} $string {<&>} v2] $v2 \ + [regsub -line -all {$} $string {<&>} v3] $v3 +} [list 2 "<>\n<>" 2 "<>\n<>" 2 "<>\n<>"] +test regexp-24.3 {regsub -all and -line} { + foreach {v1 v2 v3} {{} {} {}} {} + set string "\n\n" + list \ + [regsub -line -all {^} $string {<&>} v1] $v1 \ + [regsub -line -all {^$} $string {<&>} v2] $v2 \ + [regsub -line -all {$} $string {<&>} v3] $v3 +} [list 3 "<>\n<>\n<>" 3 "<>\n<>\n<>" 3 "<>\n<>\n<>"] +test regexp-24.4 {regsub -all and -line} { + foreach {v1 v2 v3} {{} {} {}} {} + set string "a" + list \ + [regsub -line -all {^} $string {<&>} v1] $v1 \ + [regsub -line -all {^.*$} $string {<&>} v2] $v2 \ + [regsub -line -all {$} $string {<&>} v3] $v3 +} [list 1 "<>a" 1 "<a>" 1 "a<>"] +test regexp-24.5 {regsub -all and -line} { + foreach {v1 v2 v3} {{} {} {}} {} + set string "a\n" + list \ + [regsub -line -all {^} $string {<&>} v1] $v1 \ + [regsub -line -all {^.*$} $string {<&>} v2] $v2 \ + [regsub -line -all {$} $string {<&>} v3] $v3 +} [list 2 "<>a\n<>" 2 "<a>\n<>" 2 "a<>\n<>"] +test regexp-24.6 {regsub -all and -line} { + foreach {v1 v2 v3} {{} {} {}} {} + set string "\na" + list \ + [regsub -line -all {^} $string {<&>} v1] $v1 \ + [regsub -line -all {^.*$} $string {<&>} v2] $v2 \ + [regsub -line -all {$} $string {<&>} v3] $v3 +} [list 2 "<>\n<>a" 2 "<>\n<a>" 2 "<>\na<>"] +test regexp-24.7 {regsub -all and -line} { + foreach {v1 v2 v3} {{} {} {}} {} + set string "ab\n" + list \ + [regsub -line -all {^} $string {<&>} v1] $v1 \ + [regsub -line -all {^.*$} $string {<&>} v2] $v2 \ + [regsub -line -all {$} $string {<&>} v3] $v3 +} [list 2 "<>ab\n<>" 2 "<ab>\n<>" 2 "ab<>\n<>"] +test regexp-24.8 {regsub -all and -line} { + foreach {v1 v2 v3} {{} {} {}} {} + set string "a\nb" + list \ + [regsub -line -all {^} $string {<&>} v1] $v1 \ + [regsub -line -all {^.*$} $string {<&>} v2] $v2 \ + [regsub -line -all {$} $string {<&>} v3] $v3 +} [list 2 "<>a\n<>b" 2 "<a>\n<b>" 2 "a<>\nb<>"] +test regexp-24.9 {regsub -all and -line} { + foreach {v1 v2 v3} {{} {} {}} {} + set string "a\nb\n" + list \ + [regsub -line -all {^} $string {<&>} v1] $v1 \ + [regsub -line -all {^.*$} $string {<&>} v2] $v2 \ + [regsub -line -all {$} $string {<&>} v3] $v3 +} [list 3 "<>a\n<>b\n<>" 3 "<a>\n<b>\n<>" 3 "a<>\nb<>\n<>"] +test regexp-24.10 {regsub -all and -line} { + foreach {v1 v2 v3} {{} {} {}} {} + set string "a\nb\nc" + list \ + [regsub -line -all {^} $string {<&>} v1] $v1 \ + [regsub -line -all {^.*$} $string {<&>} v2] $v2 \ + [regsub -line -all {$} $string {<&>} v3] $v3 +} [list 3 "<>a\n<>b\n<>c" 3 "<a>\n<b>\n<c>" 3 "a<>\nb<>\nc<>"] +test regexp-24.11 {regsub -all and -line} { + regsub -line -all {b} "abb\nb" {<&>} +} "a<b><b>\n<b>" + +test regexp-25.1 {regexp without -line option} { + set foo "" + list [regexp {a.*b} "dabc\naxyb\n" foo] $foo +} [list 1 abc\naxyb] +test regexp-25.2 {regexp without -line option} { + set foo "" + list [regexp {^a.*b$} "dabc\naxyb\n" foo] $foo +} {0 {}} +test regexp-25.3 {regexp with -line option} { + set foo "" + list [regexp -line {^a.*b$} "dabc\naxyb\n" foo] $foo +} {1 axyb} +test regexp-25.4 {regexp with -line option} { + set foo "" + list [regexp -line {^a.*b$} "dabc\naxyb\nxb" foo] $foo +} {1 axyb} +test regexp-25.5 {regexp without -line option} { + set foo "" + list [regexp {^a.*b$} "dabc\naxyb\nxb" foo] $foo +} {0 {}} +test regexp-25.6 {regexp without -line option} { + set foo "" + list [regexp {a.*b$} "dabc\naxyb\nxb" foo] $foo +} "1 {abc\naxyb\nxb}" +test regexp-25.7 {regexp with -lineanchor option} { + set foo "" + list [regexp -lineanchor {^a.*b$} "dabc\naxyb\nxb" foo] $foo +} "1 {axyb\nxb}" +test regexp-25.8 {regexp with -lineanchor and -linestop option} { + set foo "" + list [regexp -lineanchor -linestop {^a.*b$} "dabc\naxyb\nxb" foo] $foo +} {1 axyb} +test regexp-25.9 {regexp with -linestop option} { + set foo "" + list [regexp -linestop {a.*b} "ab\naxyb\nxb" foo] $foo +} {1 ab} + +test regexp-26.1 {matches start of line 1 time} { + regexp -all -inline -- {^a+} "aab\naaa" +} {aa} +test regexp-26.2 {matches start of line(s) 2 times} { + regexp -all -inline -line -- {^a+} "aab\naaa" +} {aa aaa} +test regexp-26.3 {effect of -line -all and -start} { + list \ + [regexp -all -inline -line -start 0 -- {^a+} "aab\naaa"] \ + [regexp -all -inline -line -start 1 -- {^a+} "aab\naaa"] \ + [regexp -all -inline -line -start 3 -- {^a+} "aab\naaa"] \ + [regexp -all -inline -line -start 4 -- {^a+} "aab\naaa"] \ +} {{aa aaa} aaa aaa aaa} +# No regexp-26.4 +test regexp-26.5 {match length 0, match length 1} { + regexp -all -inline -line -- {^b*} "a\nb" +} {{} b} +test regexp-26.6 {non reporting capture group} { + regexp -all -inline -line -- {^(?:a+|b)} "aab\naaa" +} {aa aaa} +test regexp-26.7 {Tcl bug 2826551: -line sensitive regexp and -start} { + set match1 {} + set match2 {} + list \ + [regexp -start 0 -indices -line {^a} "\nab" match1] $match1 \ + [regexp -start 1 -indices -line {^a} "\nab" match2] $match2 +} {1 {1 1} 1 {1 1}} +test regexp-26.8 {Tcl bug 2826551: diff regexp with -line option} { + set data "@1\n2\n+3\n@4\n-5\n+6\n7\n@8\n9\n" + regexp -all -inline -line {^@.*\n(?:[^@].*\n?)*} $data +} [list "@1\n2\n+3\n" "@4\n-5\n+6\n7\n" "@8\n9\n"] +test regexp-26.9 {Tcl bug 2826551: diff regexp with embedded -line option} { + set data "@1\n2\n+3\n@4\n-5\n+6\n7\n@8\n9\n" + regexp -all -inline {(?n)^@.*\n(?:[^@].*\n?)*} $data +} [list "@1\n2\n+3\n" "@4\n-5\n+6\n7\n" "@8\n9\n"] +test regexp-26.10 {regexp with -line option} { + regexp -all -inline -line -- {a*} "a\n" +} {a {}} +test regexp-26.11 {regexp without -line option} { + regexp -all -inline -- {a*} "a\n" +} {a {}} +test regexp-26.12 {regexp with -line option} { + regexp -all -inline -line -- {a*} "b\n" +} {{} {}} +test regexp-26.13 {regexp without -line option} { + regexp -all -inline -- {a*} "b\n" +} {{} {}} + # cleanup ::tcltest::cleanupTests return |