# reg.test -- # # This file contains a collection of tests for one or more of the Tcl # built-in commands. Sourcing this file into Tcl runs the tests and # generates output for errors. No output means no errors were found. # (Don't panic if you are seeing this as part of the reg distribution # and aren't using Tcl -- reg's own regression tester also knows how # to read this file, ignoring the Tcl-isms.) # # Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. # # RCS: @(#) $Id: reg.test,v 1.25.2.1 2009/10/29 17:21:18 dgp Exp $ if {[lsearch [namespace children] ::tcltest] == -1} { package require tcltest 2 } # All tests require the testregexp command, return if this # command doesn't exist ::tcltest::testConstraint testregexp [llength [info commands testregexp]] ::tcltest::testConstraint localeRegexp 0 # This file uses some custom procedures, defined below, for regexp regression # testing. The name of the procedure indicates the general nature of the # test: # expectError compile error expected # expectNomatch match failure expected # expectMatch successful match # expectIndices successful match with -indices (used in checking things # like nonparticipating subexpressions) # expectPartial unsuccessful match with -indices (!!) (used in checking # partial-match reporting) # There is also "doing" which sets up title and major test number for each # block of tests. # The first 3 arguments are constant: a minor number (which often gets # a letter or two suffixed to it internally), some flags, and the RE # itself. For expectError, the remaining argument is the name of the # compile error expected, less the leading "REG_". For the rest, the # next argument is the string to try the match against. Remaining # arguments are the substring expected to be matched, and any # substrings expected to be matched by subexpressions. (For # expectNomatch, these arguments are optional, and if present are # ignored except that they indicate how many subexpressions should be # present in the RE.) It is an error for the number of subexpression # arguments to be wrong. Cases involving nonparticipating # subexpressions, checking where empty substrings are located, # etc. should be done using expectIndices and expectPartial. # The flag characters are complex and a bit eclectic. Generally speaking, # lowercase letters are compile options, uppercase are expected re_info # bits, and nonalphabetics are match options, controls for how the test is # run, or testing options. The one small surprise is that AREs are the # default, and you must explicitly request lesser flavors of RE. The flags # are as follows. It is admitted that some are not very mnemonic. # There are some others which are purely debugging tools and are not # useful in this file. # # - no-op (placeholder) # + provide fake xy equivalence class and ch collating element # % force small state-set cache in matcher (to test cache replace) # ^ beginning of string is not beginning of line # $ end of string is not end of line # * test is Unicode-specific, needs big character set # # & test as both ARE and BRE # b BRE # e ERE # a turn advanced-features bit on (error unless ERE already) # q literal string, no metacharacters at all # # i case-independent matching # o ("opaque") no subexpression capture # p newlines are half-magic, excluded from . and [^ only # w newlines are half-magic, significant to ^ and $ only # n newlines are fully magic, both effects # x expanded RE syntax # t incomplete-match reporting # # A backslash-_a_lphanumeric seen # B ERE/ARE literal-_b_race heuristic used # E backslash (_e_scape) seen within [] # H looka_h_ead constraint seen # I _i_mpossible to match # L _l_ocale-specific construct seen # M unportable (_m_achine-specific) construct seen # N RE can match empty (_n_ull) string # P non-_P_OSIX construct seen # Q {} _q_uantifier seen # R back _r_eference seen # S POSIX-un_s_pecified syntax seen # T prefers shortest (_t_iny) # U saw original-POSIX botch: unmatched right paren in ERE (_u_gh) # The one area we can't easily test is memory-allocation failures (which # are hard to provoke on command). Embedded NULs also are not tested at # the moment, but this is a historical accident which should be fixed. # test procedures and related namespace eval RETest { namespace export doing expect* knownBug variable regBug 0 # re_info abbreviation mapping table variable infonames array set infonames { A REG_UBSALNUM B REG_UBRACES E REG_UBBS H REG_ULOOKAHEAD I REG_UIMPOSSIBLE L REG_ULOCALE M REG_UUNPORT N REG_UEMPTYMATCH P REG_UNONPOSIX Q REG_UBOUNDS R REG_UBACKREF S REG_UUNSPEC T REG_USHORTEST U REG_UPBOTCH } variable infonameorder "RHQBAUEPSMLNIT" ;# must match bit order, lsb first # build test number (internal) proc TestNum {args} { return reg-[join [concat $args] .] } # build description, with possible modifiers (internal) proc TestDesc {args} { variable description set testid [concat $args] set d $description if {[llength $testid] > 1} { set d "$d ([lrange $testid 1 end])" } return $d } # build trailing options and flags argument from a flags string (internal) proc TestFlags {fl} { set args [list] set flags "" foreach f [split $fl ""] { switch -exact -- $f { "i" { lappend args "-nocase" } "x" { lappend args "-expanded" } "n" { lappend args "-line" } "p" { lappend args "-linestop" } "w" { lappend args "-lineanchor" } "-" { } default { append flags $f } } } if {$flags ne ""} { lappend args -xflags $flags } return $args } # build info-flags list from a flags string (internal) proc TestInfoFlags {fl} { variable infonames variable infonameorder set ret [list] foreach f [split $infonameorder ""] { if {[string match *$f* $fl]} { lappend ret $infonames($f) } } return $ret } # match expected, internal routine that does the work # parameters like the "real" routines except they don't have "opts", # which is a possibly-empty list of switches for the regexp match attempt # The ! flag is used to indicate expected match failure (for REG_EXPECT, # which wants argument testing even in the event of failure). proc MatchExpected {opts testid flags re target args} { variable regBug # if &, test as both BRE and ARE if {[string match *&* $flags]} { set f [string map {& {}} $flags] MatchExpected $opts "$testid ARE" ${f} $re $target {*}$args MatchExpected $opts "$testid BRE" ${f}b $re $target {*}$args return } set constraints [list testregexp] if {$regBug} { # This will register as a skipped test lappend constraints knownBug } # Tcl locale stuff doesn't do the ch/xy test fakery yet if {[string match *+* $flags]} { # This will register as a skipped test lappend constraints localeRegexp } set f [TestFlags $flags] set infoflags [TestInfoFlags $flags] set ccmd [list testregexp -about {*}$f $re] set ecmd [list testregexp {*}$opts {*}$f $re $target] set nsub [expr {[llength $args] - 1}] set names [list] set refs "" for {set i 0} {$i < [llength $args]} {incr i} { if {$i == 0} { set name match } else { set name sub$i } lappend names $name append refs " \$$name" set $name "" } if {[string match *o* $flags]} { ;# REG_NOSUB kludge set nsub 0 ;# unsigned value cannot be -1 } if {[string match *t* $flags]} { ;# REG_EXPECT incr nsub -1 ;# the extra does not count } set erun "list \[[concat $ecmd $names]\] $refs" set result [list [expr {![string match *!* $flags]}] {*}$args] set info [list $nsub $infoflags] ::tcltest::test [TestNum $testid compile] [TestDesc $testid compile] \ -constraints $constraints -body $ccmd -result $info ::tcltest::test [TestNum $testid execute] [TestDesc $testid execute] \ -constraints $constraints -body $erun -result $result } # set major test number and description proc doing {major desc} { variable description "RE engine $desc" } # compilation error expected proc expectError {testid flags re err} { # if &, test as both ARE and BRE if {[string match *&* $flags]} { set f [string map {& {}} $flags] expectError "$testid ARE" ${f} $re $err expectError "$testid BRE" ${f}b $re $err return } set constraints [list testregexp] # Tcl locale stuff doesn't do the ch/xy test fakery yet if {[string match *+* $flags]} { # This will register as a skipped test lappend constraints localeRegexp } set cmd [list testregexp -about {*}[TestFlags $flags] $re] ::tcltest::test [TestNum $testid error] [TestDesc $testid error] \ -constraints $constraints -result [list 1 REG_$err] -body \ "list \[catch \{$cmd\}\] \[lindex \$::errorCode 1\]" } # match failure expected proc expectNomatch {testid flags re target args} { # if &, test as both ARE and BRE if {[string match *&* $flags]} { set f [string map {& {}} $flags] expectNomatch "$testid ARE" ${f} $re $target {*}$args expectNomatch "$testid BRE" ${f}b $re $target {*}$args return } set constraints [list testregexp] # Tcl locale stuff doesn't do the ch/xy test fakery yet if {[string match *+* $flags]} { # This will register as a skipped test lappend constraints localeRegexp } set f [TestFlags $flags] set infoflags [TestInfoFlags $flags] set ccmd [list testregexp -about {*}$f $re] set nsub [expr {[llength $args] - 1}] if {$nsub == -1} { # didn't tell us number of subexps set ccmd "lreplace \[$ccmd\] 0 0" set info [list $infoflags] } else { set info [list $nsub $infoflags] } set ecmd [list testregexp {*}$f $re $target] ::tcltest::test [TestNum $testid compile] [TestDesc $testid compile] \ -constraints $constraints -body $ccmd -result $info ::tcltest::test [TestNum $testid execute] [TestDesc $testid execute] \ -constraints $constraints -body $ecmd -result 0 } # match expected (no missing, empty, or ambiguous submatches) # expectMatch testno flags re target mat submat ... proc expectMatch {args} { MatchExpected {} {*}$args } # match expected (full fanciness) # expectIndices testno flags re target mat submat ... proc expectIndices {args} { MatchExpected -indices {*}$args } # partial match expected # expectPartial testno flags re target mat "" ... # Quirk: number of ""s must be one more than number of subREs. proc expectPartial {args} { lset args 1 ![lindex $args 1] ;# add ! flag MatchExpected -indices {*}$args } # test is a knownBug proc knownBug {args} { variable regBug 1 uplevel \#0 $args set regBug 0 } } namespace import RETest::* ######## the tests themselves ######## # support functions and preliminary misc. # This is sensitive to changes in message wording, but we really have to # test the code->message expansion at least once. ::tcltest::test reg-0.1 "regexp error reporting" { list [catch {regexp (*) ign} msg] $msg } {1 {couldn't compile regular expression pattern: quantifier operand invalid}} doing 1 "basic sanity checks" expectMatch 1.1 & abc abc abc expectNomatch 1.2 & abc def expectMatch 1.3 & abc xyabxabce abc doing 2 "invalid option combinations" expectError 2.1 qe a INVARG expectError 2.2 qa a INVARG expectError 2.3 qx a INVARG expectError 2.4 qn a INVARG expectError 2.5 ba a INVARG doing 3 "basic syntax" expectIndices 3.1 &NS "" a {0 -1} expectMatch 3.2 NS a| a a expectMatch 3.3 - a|b a a expectMatch 3.4 - a|b b b expectMatch 3.5 NS a||b b b expectMatch 3.6 & ab ab ab doing 4 "parentheses" expectMatch 4.1 - (a)e ae ae a expectMatch 4.2 o (a)e ae expectMatch 4.3 b {\(a\)b} ab ab a expectMatch 4.4 - a((b)c) abc abc bc b expectMatch 4.5 - a(b)(c) abc abc b c expectError 4.6 - a(b EPAREN expectError 4.7 b {a\(b} EPAREN # sigh, we blew it on the specs here... someday this will be fixed in POSIX, # but meanwhile, it's fixed in AREs expectMatch 4.8 eU a)b a)b a)b expectError 4.9 - a)b EPAREN expectError 4.10 b {a\)b} EPAREN expectMatch 4.11 P a(?:b)c abc abc expectError 4.12 e a(?:b)c BADRPT expectIndices 4.13 S a()b ab {0 1} {1 0} expectMatch 4.14 SP a(?:)b ab ab expectIndices 4.15 S a(|b)c ac {0 1} {1 0} expectMatch 4.16 S a(b|)c abc abc b doing 5 "simple one-char matching" # general case of brackets done later expectMatch 5.1 & a.b axb axb expectNomatch 5.2 &n "a.b" "a\nb" expectMatch 5.3 & {a[bc]d} abd abd expectMatch 5.4 & {a[bc]d} acd acd expectNomatch 5.5 & {a[bc]d} aed expectNomatch 5.6 & {a[^bc]d} abd expectMatch 5.7 & {a[^bc]d} aed aed expectNomatch 5.8 &p "a\[^bc]d" "a\nd" doing 6 "context-dependent syntax" # plus odds and ends expectError 6.1 - * BADRPT expectMatch 6.2 b * * * expectMatch 6.3 b {\(*\)} * * * expectError 6.4 - (*) BADRPT expectMatch 6.5 b ^* * * expectError 6.6 - ^* BADRPT expectNomatch 6.7 & ^b ^b expectMatch 6.8 b x^ x^ x^ expectNomatch 6.9 I x^ x expectMatch 6.10 n "\n^" "x\nb" "\n" expectNomatch 6.11 bS {\(^b\)} ^b expectMatch 6.12 - (^b) b b b expectMatch 6.13 & {x$} x x expectMatch 6.14 bS {\(x$\)} x x x expectMatch 6.15 - {(x$)} x x x expectMatch 6.16 b {x$y} "x\$y" "x\$y" expectNomatch 6.17 I {x$y} xy expectMatch 6.18 n "x\$\n" "x\n" "x\n" expectError 6.19 - + BADRPT expectError 6.20 - ? BADRPT doing 7 "simple quantifiers" expectMatch 7.1 &N a* aa aa expectIndices 7.2 &N a* b {0 -1} expectMatch 7.3 - a+ aa aa expectMatch 7.4 - a?b ab ab expectMatch 7.5 - a?b b b expectError 7.6 - ** BADRPT expectMatch 7.7 bN ** *** *** expectError 7.8 & a** BADRPT expectError 7.9 & a**b BADRPT expectError 7.10 & *** BADRPT expectError 7.11 - a++ BADRPT expectError 7.12 - a?+ BADRPT expectError 7.13 - a?* BADRPT expectError 7.14 - a+* BADRPT expectError 7.15 - a*+ BADRPT doing 8 "braces" expectMatch 8.1 NQ "a{0,1}" "" "" expectMatch 8.2 NQ "a{0,1}" ac a expectError 8.3 - "a{1,0}" BADBR expectError 8.4 - "a{1,2,3}" BADBR expectError 8.5 - "a{257}" BADBR expectError 8.6 - "a{1000}" BADBR expectError 8.7 - "a{1" EBRACE expectError 8.8 - "a{1n}" BADBR expectMatch 8.9 BS "a{b" "a\{b" "a\{b" expectMatch 8.10 BS "a{" "a\{" "a\{" expectMatch 8.11 bQ "a\\{0,1\\}b" cb b expectError 8.12 b "a\\{0,1" EBRACE expectError 8.13 - "a{0,1\\" BADBR expectMatch 8.14 Q "a{0}b" ab b expectMatch 8.15 Q "a{0,0}b" ab b expectMatch 8.16 Q "a{0,1}b" ab ab expectMatch 8.17 Q "a{0,2}b" b b expectMatch 8.18 Q "a{0,2}b" aab aab expectMatch 8.19 Q "a{0,}b" aab aab expectMatch 8.20 Q "a{1,1}b" aab ab expectMatch 8.21 Q "a{1,3}b" aaaab aaab expectNomatch 8.22 Q "a{1,3}b" b expectMatch 8.23 Q "a{1,}b" aab aab expectNomatch 8.24 Q "a{2,3}b" ab expectMatch 8.25 Q "a{2,3}b" aaaab aaab expectNomatch 8.26 Q "a{2,}b" ab expectMatch 8.27 Q "a{2,}b" aaaab aaaab doing 9 "brackets" expectMatch 9.1 & {a[bc]} ac ac expectMatch 9.2 & {a[-]} a- a- expectMatch 9.3 & {a[[.-.]]} a- a- expectMatch 9.4 &L {a[[.zero.]]} a0 a0 expectMatch 9.5 &LM {a[[.zero.]-9]} a2 a2 expectMatch 9.6 &M {a[0-[.9.]]} a2 a2 expectMatch 9.7 &+L {a[[=x=]]} ax ax expectMatch 9.8 &+L {a[[=x=]]} ay ay expectNomatch 9.9 &+L {a[[=x=]]} az expectError 9.10 & {a[0-[=x=]]} ERANGE expectMatch 9.11 &L {a[[:digit:]]} a0 a0 expectError 9.12 & {a[[:woopsie:]]} ECTYPE expectNomatch 9.13 &L {a[[:digit:]]} ab expectError 9.14 & {a[0-[:digit:]]} ERANGE expectMatch 9.15 &LP {[[:<:]]a} a a expectMatch 9.16 &LP {a[[:>:]]} a a expectError 9.17 & {a[[..]]b} ECOLLATE expectError 9.18 & {a[[==]]b} ECOLLATE expectError 9.19 & {a[[::]]b} ECTYPE expectError 9.20 & {a[[.a} EBRACK expectError 9.21 & {a[[=a} EBRACK expectError 9.22 & {a[[:a} EBRACK expectError 9.23 & {a[} EBRACK expectError 9.24 & {a[b} EBRACK expectError 9.25 & {a[b-} EBRACK expectError 9.26 & {a[b-c} EBRACK expectMatch 9.27 &M {a[b-c]} ab ab expectMatch 9.28 & {a[b-b]} ab ab expectMatch 9.29 &M {a[1-2]} a2 a2 expectError 9.30 & {a[c-b]} ERANGE expectError 9.31 & {a[a-b-c]} ERANGE expectMatch 9.32 &M {a[--?]b} a?b a?b expectMatch 9.33 & {a[---]b} a-b a-b expectMatch 9.34 & {a[]b]c} a]c a]c expectMatch 9.35 EP {a[\]]b} a]b a]b expectNomatch 9.36 bE {a[\]]b} a]b expectMatch 9.37 bE {a[\]]b} "a\\]b" "a\\]b" expectMatch 9.38 eE {a[\]]b} "a\\]b" "a\\]b" expectMatch 9.39 EP {a[\\]b} "a\\b" "a\\b" expectMatch 9.40 eE {a[\\]b} "a\\b" "a\\b" expectMatch 9.41 bE {a[\\]b} "a\\b" "a\\b" expectError 9.42 - {a[\Z]b} EESCAPE expectMatch 9.43 & {a[[b]c} "a\[c" "a\[c" expectMatch 9.44 EMP* {a[\u00fe-\u0507][\u00ff-\u0300]b} \ "a\u0102\u02ffb" "a\u0102\u02ffb" doing 10 "anchors and newlines" expectMatch 10.1 & ^a a a expectNomatch 10.2 &^ ^a a expectIndices 10.3 &N ^ a {0 -1} expectIndices 10.4 & {a$} aba {2 2} expectNomatch 10.5 {&$} {a$} a expectIndices 10.6 &N {$} ab {2 1} expectMatch 10.7 &n ^a a a expectMatch 10.8 &n "^a" "b\na" "a" expectIndices 10.9 &w "^a" "a\na" {0 0} expectIndices 10.10 &n^ "^a" "a\na" {2 2} expectMatch 10.11 &n {a$} a a expectMatch 10.12 &n "a\$" "a\nb" "a" expectIndices 10.13 &n "a\$" "a\na" {0 0} expectIndices 10.14 N ^^ a {0 -1} expectMatch 10.15 b ^^ ^ ^ expectIndices 10.16 N {$$} a {1 0} expectMatch 10.17 b {$$} "\$" "\$" expectMatch 10.18 &N {^$} "" "" expectNomatch 10.19 &N {^$} a expectIndices 10.20 &nN "^\$" a\n\nb {2 1} expectMatch 10.21 N {$^} "" "" expectMatch 10.22 b {$^} "\$^" "\$^" expectMatch 10.23 P {\Aa} a a expectMatch 10.24 ^P {\Aa} a a expectNomatch 10.25 ^nP {\Aa} "b\na" expectMatch 10.26 P {a\Z} a a expectMatch 10.27 \$P {a\Z} a a expectNomatch 10.28 \$nP {a\Z} "a\nb" expectError 10.29 - ^* BADRPT expectError 10.30 - {$*} BADRPT expectError 10.31 - {\A*} BADRPT expectError 10.32 - {\Z*} BADRPT doing 11 "boundary constraints" expectMatch 11.1 &LP {[[:<:]]a} a a expectMatch 11.2 &LP {[[:<:]]a} -a a expectNomatch 11.3 &LP {[[:<:]]a} ba expectMatch 11.4 &LP {a[[:>:]]} a a expectMatch 11.5 &LP {a[[:>:]]} a- a expectNomatch 11.6 &LP {a[[:>:]]} ab expectMatch 11.7 bLP {\} a a expectNomatch 11.10 bLP {a\>} ab expectMatch 11.11 LP {\ya} a a expectNomatch 11.12 LP {\ya} ba expectMatch 11.13 LP {a\y} a a expectNomatch 11.14 LP {a\y} ab expectMatch 11.15 LP {a\Y} ab a expectNomatch 11.16 LP {a\Y} a- expectNomatch 11.17 LP {a\Y} a expectNomatch 11.18 LP {-\Y} -a expectMatch 11.19 LP {-\Y} -% - expectNomatch 11.20 LP {\Y-} a- expectError 11.21 - {[[:<:]]*} BADRPT expectError 11.22 - {[[:>:]]*} BADRPT expectError 11.23 b {\<*} BADRPT expectError 11.24 b {\>*} BADRPT expectError 11.25 - {\y*} BADRPT expectError 11.26 - {\Y*} BADRPT expectMatch 11.27 LP {\ma} a a expectNomatch 11.28 LP {\ma} ba expectMatch 11.29 LP {a\M} a a expectNomatch 11.30 LP {a\M} ab expectNomatch 11.31 ILP {\Ma} a expectNomatch 11.32 ILP {a\m} a doing 12 "character classes" expectMatch 12.1 LP {a\db} a0b a0b expectNomatch 12.2 LP {a\db} axb expectNomatch 12.3 LP {a\Db} a0b expectMatch 12.4 LP {a\Db} axb axb expectMatch 12.5 LP "a\\sb" "a b" "a b" expectMatch 12.6 LP "a\\sb" "a\tb" "a\tb" expectMatch 12.7 LP "a\\sb" "a\nb" "a\nb" expectNomatch 12.8 LP {a\sb} axb expectMatch 12.9 LP {a\Sb} axb axb expectNomatch 12.10 LP "a\\Sb" "a b" expectMatch 12.11 LP {a\wb} axb axb expectNomatch 12.12 LP {a\wb} a-b expectNomatch 12.13 LP {a\Wb} axb expectMatch 12.14 LP {a\Wb} a-b a-b expectMatch 12.15 LP {\y\w+z\y} adze-guz guz expectMatch 12.16 LPE {a[\d]b} a1b a1b expectMatch 12.17 LPE "a\[\\s]b" "a b" "a b" expectMatch 12.18 LPE {a[\w]b} axb axb doing 13 "escapes" expectError 13.1 & "a\\" EESCAPE expectMatch 13.2 - {a\]+)>} a } 1 test reg-33.4 {Bug 505048} { regexp {\A\s*([^b]*)b} ab } 1 test reg-33.5 {Bug 505048} { regexp {\A\s*[^b]*(b)} ab } 1 test reg-33.6 {Bug 505048} { regexp {\A(\s*)[^b]*(b)} ab } 1 test reg-33.7 {Bug 505048} { regexp {\A\s*[^b]*b} ab } 1 test reg-33.8 {Bug 505048} { regexp -inline {\A\s*[^b]*b} ab } ab test reg-33.9 {Bug 505048} { regexp -indices -inline {\A\s*[^b]*b} ab } {{0 1}} test reg-33.10 {Bug 840258} -body { regsub {(^|\n)+\.*b} \n.b {} tmp } -cleanup { unset tmp } -result 1 test reg-33.11 {Bug 840258} -body { regsub {(^|[\n\r]+)\.*\?<.*?(\n|\r)+} \ "TQ\r\n.?<5000267>Test already stopped\r\n" {} tmp } -cleanup { unset tmp } -result 1 test reg-33.12 {Bug 1810264 - bad read} { regexp {\3161573148} {\3161573148} } 0 test reg-33.13 {Bug 1810264 - infinite loop} { regexp {($|^)*} {x} } 1 # Some environments have small default stack sizes. [Bug 1905562] test reg-33.14 {Bug 1810264 - super-expensive expression} nonPortable { regexp {(x{200}){200}$y} {x} } 0 # cleanup ::tcltest::cleanupTests return