From 9f5140ba5656acce75cbb9f43602fdd70cf400c6 Mon Sep 17 00:00:00 2001 From: dkf Date: Thu, 12 Aug 2010 08:55:37 +0000 Subject: * generic/tclCmdMZ.c (Tcl_RegexpObjCmd): [Bug 2826551, Patch 2948425]: Backport of updates to make handling of RE line anchors correct. --- ChangeLog | 45 +++++---- generic/tclCmdMZ.c | 46 ++++++---- tests/regexp.test | 264 +++++++++++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 298 insertions(+), 57 deletions(-) diff --git a/ChangeLog b/ChangeLog index fbf4551..0508bf9 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,15 +1,20 @@ +2010-08-12 Donal K. Fellows + + * generic/tclCmdMZ.c (Tcl_RegexpObjCmd): [Bug 2826551, Patch 2948425]: + Backport of updates to make handling of RE line anchors correct. + 2010-08-11 Jeff Hobbs - * unix/ldAix: remove ancient (pre-4.2) AIX support + * unix/ldAix: Remove ancient (pre-4.2) AIX support * unix/configure: regen with ac-2.59 * unix/configure.in, unix/tclConfig.sh.in, unix/Makefile.in: - * unix/tcl.m4 (AIX): remove the need for ldAIX, replace with - -bexpall/-brtl. Remove TCL_EXP_FILE (export file) and other - baggage that went with it. Remove pre-4 AIX build support. + * unix/tcl.m4 (AIX): Remove the need for ldAIX, replace with + -bexpall/-brtl. Remove TCL_EXP_FILE (export file) and other baggage + that went with it. Remove pre-4 AIX build support. 2010-08-10 Jeff Hobbs - * generic/tclUtil.c (TclByteArrayMatch): patterns may not be + * generic/tclUtil.c (TclByteArrayMatch): Patterns may not be null-terminated, so account for that. 2010-08-05 Don Porter @@ -20,10 +25,10 @@ 2010-08-04 Jeff Hobbs - * unix/tclUnixFCmd.c: adjust license header as per + * unix/tclUnixFCmd.c: Adjust license header as per ftp://ftp.cs.berkeley.edu/pub/4bsd/README.Impt.License.Change - * license.terms: fix DFARs note for number-adjusted rights clause + * license.terms: Fix DFARs note for number-adjusted rights clause * win/tclWin32Dll.c (asciiProcs, unicodeProcs): * win/tclWinLoad.c (TclpDlopen): 'load' use LoadLibraryEx with @@ -54,14 +59,14 @@ 2010-08-03 Andreas Kupries - * tests/var.test (var-19.1): [Bug 3037525]: Added test - demonstrating the local hashtable deletion crash and fix. + * tests/var.test (var-19.1): [Bug 3037525]: Added test demonstrating + the local hashtable deletion crash and fix. - * tests/info.test (info-39.1, test_info_frame): Changed absolute - to relative frame adressing to handle difference between testing - with -singleproc 1 vs. the default -singleproc 0. Plus comment - fix. The test and issue are not relevant to the trunk, forward - porting is not required. + * tests/info.test (info-39.1, test_info_frame): Changed absolute to + relative frame adressing to handle difference between testing with + -singleproc 1 vs. the default -singleproc 0. Plus comment fix. The + test and issue are not relevant to the trunk, forward porting is not + required. 2010-08-03 Don Porter @@ -80,19 +85,19 @@ 2010-07-28 Miguel Sofer - * generic/tclVar.c: fix for crash [Bug 3037525]: lose fickle - optimisation in TclDeleteVars (used for runtime-created locals) + * generic/tclVar.c: [Bug 3037525]: lose fickle optimisation in + TclDeleteVars (used for runtime-created locals) that caused crashes. 2010-07-25 Jan Nijtmans - * generic/tclInt.h: [Bug 3030870] make itcl 3.x built with pre-8.6 - * generic/tclBasic.c: work in 8.6 revert tclInt.h to what it was + * generic/tclInt.h: [Bug 3030870]: Make itcl 3.x built with pre-8.6 + * generic/tclBasic.c: work in 8.6 revert tclInt.h to what it was before, and relax the relation between Tcl_CallFrame and CallFrame. 2010-07-17 Jan Nijtmans - * generic/tcl.h: [Bug 3030870] make itcl 3.x built with - * generic/tclInt.h: pre-8.6 work in 8.6 + * generic/tcl.h: [Bug 3030870]: Make itcl 3.x built with pre-8.6 + * generic/tclInt.h: work in 8.6 2010-07-02 Donal K. Fellows diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index 2011b4b..0c4615a 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -15,7 +15,7 @@ * See the file "license.terms" for information on usage and redistribution of * this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclCmdMZ.c,v 1.163.2.8 2010/04/06 08:26:02 vasiljevic Exp $ + * RCS: @(#) $Id: tclCmdMZ.c,v 1.163.2.9 2010/08/12 08:55:38 dkf Exp $ */ #include "tclInt.h" @@ -89,7 +89,7 @@ Tcl_RegexpObjCmd( Tcl_Obj *CONST objv[]) /* Argument objects. */ { int i, indices, match, about, offset, all, doinline, numMatchesSaved; - int cflags, eflags, stringLength; + int cflags, eflags, stringLength, matchLength; Tcl_RegExp regExpr; Tcl_Obj *objPtr, *startIndex = NULL, *resultPtr = NULL; Tcl_RegExpInfo info; @@ -231,15 +231,6 @@ Tcl_RegexpObjCmd( return TCL_ERROR; } - if (offset > 0) { - /* - * Add flag if using offset (string is part of a larger string), so - * that "^" won't match. - */ - - eflags |= TCL_REG_NOTBOL; - } - objc -= 2; objv += 2; @@ -267,12 +258,23 @@ Tcl_RegexpObjCmd( */ while (1) { - match = Tcl_RegExpExecObj(interp, regExpr, objPtr, - offset /* offset */, numMatchesSaved, eflags - | ((offset > 0 && - (Tcl_GetUniChar(objPtr,offset-1) != (Tcl_UniChar)'\n')) - ? TCL_REG_NOTBOL : 0)); + /* + * Pass either 0 or TCL_REG_NOTBOL in the eflags. Passing + * TCL_REG_NOTBOL indicates that the character at offset should not be + * considered the start of the line. If for example the pattern {^} is + * passed and -start is positive, then the pattern will not match the + * start of the string unless the previous character is a newline. + */ + if ((offset == 0) || ((offset > 0) && + (Tcl_GetUniChar(objPtr, offset-1) == (Tcl_UniChar)'\n'))) { + eflags = 0; + } else { + eflags = TCL_REG_NOTBOL; + } + + match = Tcl_RegExpExecObj(interp, regExpr, objPtr, offset, + numMatchesSaved, eflags); if (match < 0) { return TCL_ERROR; } @@ -389,12 +391,18 @@ Tcl_RegexpObjCmd( * offset never changes). */ - if (info.matches[0].end == 0) { + matchLength = info.matches[0].end - info.matches[0].start; + offset += info.matches[0].end; + + /* + * A match of length zero could happen for {^} {$} or {.*} and in + * these cases we always want to bump the index up one. + */ + + if (matchLength == 0) { offset++; } - offset += info.matches[0].end; all++; - eflags |= TCL_REG_NOTBOL; if (offset >= stringLength) { break; } diff --git a/tests/regexp.test b/tests/regexp.test index 295d83c..37f4442 100644 --- a/tests/regexp.test +++ b/tests/regexp.test @@ -11,13 +11,15 @@ # See the file "license.terms" for information on usage and redistribution # of this file, and for a DISCLAIMER OF ALL WARRANTIES. # -# RCS: @(#) $Id: regexp.test,v 1.30.2.1 2008/08/21 23:19:06 hobbs Exp $ +# RCS: @(#) $Id: regexp.test,v 1.30.2.2 2010/08/12 08:55:39 dkf Exp $ if {[lsearch [namespace children] ::tcltest] == -1} { package require tcltest 2 namespace import -force ::tcltest::* } +testConstraint exec [llength [info commands exec]] + catch {unset foo} test regexp-1.1 {basic regexp operation} { regexp ab*c abbbc @@ -457,8 +459,6 @@ test regexp-14.2 {CompileRegexp: regexp cache, different flags} { append x *a regexp -nocase $x bbba } 1 - -testConstraint exec [llength [info commands exec]] test regexp-14.3 {CompileRegexp: regexp cache, empty regexp and empty cache} -constraints { exec } -setup { @@ -622,7 +622,7 @@ test regexp-20.1 {regsub shared object shimmering} { set c abcdefghijklmnopqurstuvwxyz0123456789 regsub $a $c $b d list $d [string length $d] [string bytelength $d] -} [list abcdefghijklmnopqurstuvwxyz0123456789 37 37] +} {abcdefghijklmnopqurstuvwxyz0123456789 37 37} test regexp-20.2 {regsub shared object shimmering with -about} { eval regexp -about abc } {0 {}} @@ -630,64 +630,292 @@ test regexp-20.2 {regsub shared object shimmering with -about} { test regexp-21.1 {regsub works with empty string} { regsub -- ^ {} foo } {foo} - test regexp-21.2 {regsub works with empty string} { regsub -- \$ {} foo } {foo} - test regexp-21.3 {regsub works with empty string offset} { regsub -start 0 -- ^ {} foo } {foo} - test regexp-21.4 {regsub works with empty string offset} { regsub -start 0 -- \$ {} foo } {foo} - test regexp-21.5 {regsub works with empty string offset} { regsub -start 3 -- \$ {123} foo } {123foo} - test regexp-21.6 {regexp works with empty string} { regexp -- ^ {} } {1} - test regexp-21.7 {regexp works with empty string} { regexp -start 0 -- ^ {} } {1} - test regexp-21.8 {regexp works with empty string offset} { regexp -start 3 -- ^ {123} } {0} - test regexp-21.9 {regexp works with empty string offset} { regexp -start 3 -- \$ {123} } {1} - test regexp-21.10 {multiple matches handle newlines} { regsub -all -lineanchor -- {^#[^\n]*\n} "#one\n#two\n#three\n" foo\n } "foo\nfoo\nfoo\n" - test regexp-21.11 {multiple matches handle newlines} { regsub -all -line -- ^ "a\nb\nc" \# } "\#a\n\#b\n\#c" - test regexp-21.12 {multiple matches handle newlines} { regsub -all -line -- ^ "\n\n" \# } "\#\n\#\n\#" - test regexp-21.13 {multiple matches handle newlines} { regexp -all -inline -indices -line -- ^ "a\nb\nc" } {{0 -1} {2 1} {4 3}} - test regexp-22.1 {Bug 1810038} { regexp ($|^X)* {} } 1 - test regexp-22.2 {regexp compile and backrefs, Bug 1857126} { regexp -- {([bc])\1} bb } 1 +test regexp-23.1 {regexp -all and -line} { + set string "" + list \ + [regexp -all -inline -indices -line -- {^} $string] \ + [regexp -all -inline -indices -line -- {^$} $string] \ + [regexp -all -inline -indices -line -- {$} $string] +} {{{0 -1}} {{0 -1}} {{0 -1}}} +test regexp-23.2 {regexp -all and -line} { + set string "\n" + list \ + [regexp -all -inline -indices -line -- {^} $string] \ + [regexp -all -inline -indices -line -- {^$} $string] \ + [regexp -all -inline -indices -line -- {$} $string] +} {{{0 -1}} {{0 -1}} {{0 -1}}} +test regexp-23.3 {regexp -all and -line} { + set string "\n\n" + list \ + [regexp -all -inline -indices -line -- {^} $string] \ + [regexp -all -inline -indices -line -- {^$} $string] \ + [regexp -all -inline -indices -line -- {$} $string] +} {{{0 -1} {1 0}} {{0 -1} {1 0}} {{0 -1} {1 0}}} +test regexp-23.4 {regexp -all and -line} { + set string "a" + list \ + [regexp -all -inline -indices -line -- {^} $string] \ + [regexp -all -inline -indices -line -- {^.*$} $string] \ + [regexp -all -inline -indices -line -- {$} $string] +} {{{0 -1}} {{0 0}} {{1 0}}} +test regexp-23.5 {regexp -all and -line} {knownBug} { + set string "a\n" + list \ + [regexp -all -inline -indices -line -- {^} $string] \ + [regexp -all -inline -indices -line -- {^.*$} $string] \ + [regexp -all -inline -indices -line -- {$} $string] +} {{{0 -1} {2 1}} {{0 0} {2 1}} {{1 0} {2 1}}} +test regexp-23.6 {regexp -all and -line} { + set string "\na" + list \ + [regexp -all -inline -indices -line -- {^} $string] \ + [regexp -all -inline -indices -line -- {^.*$} $string] \ + [regexp -all -inline -indices -line -- {$} $string] +} {{{0 -1} {1 0}} {{0 -1} {1 1}} {{0 -1} {2 1}}} +test regexp-23.7 {regexp -all and -line} {knownBug} { + set string "ab\n" + list \ + [regexp -all -inline -indices -line -- {^} $string] \ + [regexp -all -inline -indices -line -- {^.*$} $string] \ + [regexp -all -inline -indices -line -- {$} $string] +} {{{0 -1} {3 2}} {{0 1} {3 2}} {{2 1} {3 2}}} +test regexp-23.8 {regexp -all and -line} { + set string "a\nb" + list \ + [regexp -all -inline -indices -line -- {^} $string] \ + [regexp -all -inline -indices -line -- {^.*$} $string] \ + [regexp -all -inline -indices -line -- {$} $string] +} {{{0 -1} {2 1}} {{0 0} {2 2}} {{1 0} {3 2}}} +test regexp-23.9 {regexp -all and -line} {knownBug} { + set string "a\nb\n" + list \ + [regexp -all -inline -indices -line -- {^} $string] \ + [regexp -all -inline -indices -line -- {^.*$} $string] \ + [regexp -all -inline -indices -line -- {$} $string] +} {{{0 -1} {2 1} {4 3}} {{0 0} {2 2} {4 3}} {{1 0} {3 2} {4 3}}} +test regexp-23.10 {regexp -all and -line} { + set string "a\nb\nc" + list \ + [regexp -all -inline -indices -line -- {^} $string] \ + [regexp -all -inline -indices -line -- {^.*$} $string] \ + [regexp -all -inline -indices -line -- {$} $string] +} {{{0 -1} {2 1} {4 3}} {{0 0} {2 2} {4 4}} {{1 0} {3 2} {5 4}}} +test regexp-23.11 {regexp -all and -line} { + regexp -all -inline -indices -line -- {b} "abb\nb" +} {{1 1} {2 2} {4 4}} + +test regexp-24.1 {regsub -all and -line} { + foreach {v1 v2 v3} {{} {} {}} {} + set string "" + list \ + [regsub -line -all {^} $string {<&>} v1] $v1 \ + [regsub -line -all {^$} $string {<&>} v2] $v2 \ + [regsub -line -all {$} $string {<&>} v3] $v3 +} {1 <> 1 <> 1 <>} +test regexp-24.2 {regsub -all and -line} { + foreach {v1 v2 v3} {{} {} {}} {} + set string "\n" + list \ + [regsub -line -all {^} $string {<&>} v1] $v1 \ + [regsub -line -all {^$} $string {<&>} v2] $v2 \ + [regsub -line -all {$} $string {<&>} v3] $v3 +} "2 {<>\n<>} 2 {<>\n<>} 2 {<>\n<>}" +test regexp-24.3 {regsub -all and -line} { + foreach {v1 v2 v3} {{} {} {}} {} + set string "\n\n" + list \ + [regsub -line -all {^} $string {<&>} v1] $v1 \ + [regsub -line -all {^$} $string {<&>} v2] $v2 \ + [regsub -line -all {$} $string {<&>} v3] $v3 +} "3 {<>\n<>\n<>} 3 {<>\n<>\n<>} 3 {<>\n<>\n<>}" +test regexp-24.4 {regsub -all and -line} { + foreach {v1 v2 v3} {{} {} {}} {} + set string "a" + list \ + [regsub -line -all {^} $string {<&>} v1] $v1 \ + [regsub -line -all {^.*$} $string {<&>} v2] $v2 \ + [regsub -line -all {$} $string {<&>} v3] $v3 +} {1 <>a 1 1 a<>} +test regexp-24.5 {regsub -all and -line} { + foreach {v1 v2 v3} {{} {} {}} {} + set string "a\n" + list \ + [regsub -line -all {^} $string {<&>} v1] $v1 \ + [regsub -line -all {^.*$} $string {<&>} v2] $v2 \ + [regsub -line -all {$} $string {<&>} v3] $v3 +} "2 {<>a\n<>} 2 {\n<>} 2 {a<>\n<>}" +test regexp-24.6 {regsub -all and -line} { + foreach {v1 v2 v3} {{} {} {}} {} + set string "\na" + list \ + [regsub -line -all {^} $string {<&>} v1] $v1 \ + [regsub -line -all {^.*$} $string {<&>} v2] $v2 \ + [regsub -line -all {$} $string {<&>} v3] $v3 +} "2 {<>\n<>a} 2 {<>\n} 2 {<>\na<>}" +test regexp-24.7 {regsub -all and -line} { + foreach {v1 v2 v3} {{} {} {}} {} + set string "ab\n" + list \ + [regsub -line -all {^} $string {<&>} v1] $v1 \ + [regsub -line -all {^.*$} $string {<&>} v2] $v2 \ + [regsub -line -all {$} $string {<&>} v3] $v3 +} "2 {<>ab\n<>} 2 {\n<>} 2 {ab<>\n<>}" +test regexp-24.8 {regsub -all and -line} { + foreach {v1 v2 v3} {{} {} {}} {} + set string "a\nb" + list \ + [regsub -line -all {^} $string {<&>} v1] $v1 \ + [regsub -line -all {^.*$} $string {<&>} v2] $v2 \ + [regsub -line -all {$} $string {<&>} v3] $v3 +} "2 {<>a\n<>b} 2 {\n} 2 {a<>\nb<>}" +test regexp-24.9 {regsub -all and -line} { + foreach {v1 v2 v3} {{} {} {}} {} + set string "a\nb\n" + list \ + [regsub -line -all {^} $string {<&>} v1] $v1 \ + [regsub -line -all {^.*$} $string {<&>} v2] $v2 \ + [regsub -line -all {$} $string {<&>} v3] $v3 +} "3 {<>a\n<>b\n<>} 3 {\n\n<>} 3 {a<>\nb<>\n<>}" +test regexp-24.10 {regsub -all and -line} { + foreach {v1 v2 v3} {{} {} {}} {} + set string "a\nb\nc" + list \ + [regsub -line -all {^} $string {<&>} v1] $v1 \ + [regsub -line -all {^.*$} $string {<&>} v2] $v2 \ + [regsub -line -all {$} $string {<&>} v3] $v3 +} "3 {<>a\n<>b\n<>c} 3 {\n\n} 3 {a<>\nb<>\nc<>}" +test regexp-24.11 {regsub -all and -line} { + regsub -line -all {b} "abb\nb" {<&>} +} "a\n" + +test regexp-25.1 {regexp without -line option} { + set foo "" + list [regexp {a.*b} "dabc\naxyb\n" foo] $foo +} "1 {abc\naxyb}" +test regexp-25.2 {regexp without -line option} { + set foo "" + list [regexp {^a.*b$} "dabc\naxyb\n" foo] $foo +} {0 {}} +test regexp-25.3 {regexp with -line option} { + set foo "" + list [regexp -line {^a.*b$} "dabc\naxyb\n" foo] $foo +} {1 axyb} +test regexp-25.4 {regexp with -line option} { + set foo "" + list [regexp -line {^a.*b$} "dabc\naxyb\nxb" foo] $foo +} {1 axyb} +test regexp-25.5 {regexp without -line option} { + set foo "" + list [regexp {^a.*b$} "dabc\naxyb\nxb" foo] $foo +} {0 {}} +test regexp-25.6 {regexp without -line option} { + set foo "" + list [regexp {a.*b$} "dabc\naxyb\nxb" foo] $foo +} "1 {abc\naxyb\nxb}" +test regexp-25.7 {regexp with -lineanchor option} { + set foo "" + list [regexp -lineanchor {^a.*b$} "dabc\naxyb\nxb" foo] $foo +} "1 {axyb\nxb}" +test regexp-25.8 {regexp with -lineanchor and -linestop option} { + set foo "" + list [regexp -lineanchor -linestop {^a.*b$} "dabc\naxyb\nxb" foo] $foo +} {1 axyb} +test regexp-25.9 {regexp with -linestop option} { + set foo "" + list [regexp -linestop {a.*b} "ab\naxyb\nxb" foo] $foo +} {1 ab} + +test regexp-26.1 {matches start of line 1 time} { + regexp -all -inline -- {^a+} "aab\naaa" +} {aa} +test regexp-26.2 {matches start of line(s) 2 times} { + regexp -all -inline -line -- {^a+} "aab\naaa" +} {aa aaa} +test regexp-26.3 {effect of -line -all and -start} { + list \ + [regexp -all -inline -line -start 0 -- {^a+} "aab\naaa"] \ + [regexp -all -inline -line -start 1 -- {^a+} "aab\naaa"] \ + [regexp -all -inline -line -start 3 -- {^a+} "aab\naaa"] \ + [regexp -all -inline -line -start 4 -- {^a+} "aab\naaa"] \ +} {{aa aaa} aaa aaa aaa} +test regexp-26.5 {match length 0, match length 1} { + regexp -all -inline -line -- {^b*} "a\nb" +} {{} b} +test regexp-26.6 {non reporting capture group} { + regexp -all -inline -line -- {^(?:a+|b)} "aab\naaa" +} {aa aaa} +test regexp-26.7 {Tcl bug 2826551: -line sensitive regexp and -start} { + set match1 {} + set match2 {} + list \ + [regexp -start 0 -indices -line {^a} "\nab" match1] $match1 \ + [regexp -start 1 -indices -line {^a} "\nab" match2] $match2 +} {1 {1 1} 1 {1 1}} +test regexp-26.8 {Tcl bug 2826551: diff regexp with -line option} { + set data "@1\n2\n+3\n@4\n-5\n+6\n7\n@8\n9\n" + regexp -all -inline -line {^@.*\n(?:[^@].*\n?)*} $data +} "{@1\n2\n+3\n} {@4\n-5\n+6\n7\n} {@8\n9\n}" +test regexp-26.9 {Tcl bug 2826551: diff regexp with embedded -line option} { + set data "@1\n2\n+3\n@4\n-5\n+6\n7\n@8\n9\n" + regexp -all -inline {(?n)^@.*\n(?:[^@].*\n?)*} $data +} "{@1\n2\n+3\n} {@4\n-5\n+6\n7\n} {@8\n9\n}" +test regexp-26.10 {regexp with -line option} { + regexp -all -inline -line -- {a*} "a\n" +} {a {}} +test regexp-26.11 {regexp without -line option} { + regexp -all -inline -- {a*} "a\n" +} {a {}} +test regexp-26.12 {regexp with -line option} { + regexp -all -inline -line -- {a*} "b\n" +} {{} {}} +test regexp-26.13 {regexp without -line option} { + regexp -all -inline -- {a*} "b\n" +} {{} {}} + # cleanup ::tcltest::cleanupTests return -- cgit v0.12