# reg.test -- # # This file contains a collection of tests for one or more of the Tcl # built-in commands. Sourcing this file into Tcl runs the tests and # generates output for errors. No output means no errors were found. # (Don't panic if you are seeing this as part of the reg distribution # and aren't using Tcl -- reg's own regression tester also knows how # to read this file, ignoring the Tcl-isms.) # # Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. # # RCS: @(#) $Id: reg.test,v 1.11 1999/10/13 02:22:28 hobbs Exp $ if {[lsearch [namespace children] ::tcltest] == -1} { package require tcltest namespace import ::tcltest::* } # All tests require the testregexp command, return if this # command doesn't exist set ::tcltest::testConstraints(testregexp) \ [expr {[info commands testregexp] != {}}] # This file uses some custom procedures, defined below, for regexp regression # testing. The name of the procedure indicates the general nature of the # test: # e compile error expected # f match failure expected # m successful match # i successful match with -indices (used in checking things like # nonparticipating subexpressions) # p unsuccessful match with -indices (!!) (used in checking # partial-match reporting) # There is also "doing" which sets up title and major test number for each # block of tests, and "xx" which ignores its arguments and arranges for the # next "doing" to announce that some tests were bypassed (which is better # than just commenting them out). # The first 3 arguments are constant: a minor number (which often gets # a letter or two suffixed to it internally), some flags, and the RE itself. # For e, the remaining argument is the name of the compile error expected, # less the leading "REG_". For the rest, the next argument is the string # to try the match against. Remaining arguments are the substring expected # to be matched, and any substrings expected to be matched by subexpressions. # (For f, these arguments are optional, and if present are ignored except # that they indicate how many subexpressions should be present in the RE.) # It is an error for the number of subexpression arguments to be wrong. # Cases involving nonparticipating subexpressions, checking where empty # substrings are located, etc. should be done using i and p. # The flag characters are complex and a bit eclectic. Generally speaking, # lowercase letters are compile options, uppercase are expected re_info # bits, and nonalphabetics are match options, controls for how the test is # run, or testing options. The one small surprise is that AREs are the # default, and you must explicitly request lesser flavors of RE. The flags # are as follows. It is admitted that some are not very mnemonic. # There are some others which are purely debugging tools and are not # useful in this file. # # - no-op (placeholder) # + provide fake xy equivalence class and ch collating element # % force small state-set cache in matcher (to test cache replace) # ^ beginning of string is not beginning of line # $ end of string is not end of line # * test is Unicode-specific, needs big character set # # & test as both ARE and BRE # b BRE # e ERE # a turn advanced-features bit on (error unless ERE already) # q literal string, no metacharacters at all # # i case-independent matching # o ("opaque") no subexpression capture # p newlines are half-magic, excluded from . and [^ only # w newlines are half-magic, significant to ^ and $ only # n newlines are fully magic, both effects # x expanded RE syntax # t incomplete-match reporting # # A backslash-_a_lphanumeric seen # B ERE/ARE literal-_b_race heuristic used # E backslash (_e_scape) seen within [] # H looka_h_ead constraint seen # I _i_mpossible to match # L _l_ocale-specific construct seen # M unportable (_m_achine-specific) construct seen # N RE can match empty (_n_ull) string # P non-_P_OSIX construct seen # Q {} _q_uantifier seen # R back _r_eference seen # S POSIX-un_s_pecified syntax seen # T prefers shortest (_t_iny) # U saw original-POSIX botch: unmatched right paren in ERE (_u_gh) # The one area we can't easily test is memory-allocation failures (which # are hard to provoke on command). Embedded NULs also are not tested at # the moment, but this is a historical accident which should be fixed. # test procedures and related set ask "about" set xflags "xflags" set testbypassed 0 # re_info abbreviation mapping table set infonames(A) "REG_UBSALNUM" set infonames(B) "REG_UBRACES" set infonames(E) "REG_UBBS" set infonames(H) "REG_ULOOKAHEAD" set infonames(I) "REG_UIMPOSSIBLE" set infonames(L) "REG_ULOCALE" set infonames(M) "REG_UUNPORT" set infonames(N) "REG_UEMPTYMATCH" set infonames(P) "REG_UNONPOSIX" set infonames(Q) "REG_UBOUNDS" set infonames(R) "REG_UBACKREF" set infonames(S) "REG_UUNSPEC" set infonames(T) "REG_USHORTEST" set infonames(U) "REG_UPBOTCH" set infonameorder "RHQBAUEPSMLNIT" ;# must match bit order, lsb first # set major test number and description proc doing {major desc} { global prefix description testbypassed if {$testbypassed != 0} { puts stdout "!!! bypassed $testbypassed tests in\ $prefix, `$description'" } set prefix reg-$major set description "reg $desc" set testbypassed 0 } # build test number (internal) proc tno {testid} { return [lindex $testid 0] } # build description, with possible modifiers (internal) proc desc {testid} { global description set d $description if {[llength $testid] > 1} { set d "([lreplace $testid 0 0]) $d" } return $d } # build trailing options and flags argument from a flags string (internal) proc flags {fl} { global xflags set args [list] set flags "" foreach f [split $fl ""] { switch -exact -- $f { "i" { lappend args "-nocase" } "x" { lappend args "-expanded" } "n" { lappend args "-line" } "p" { lappend args "-linestop" } "w" { lappend args "-lineanchor" } "-" { } default { append flags $f } } } if {[string compare $flags ""] != 0} { lappend args -$xflags $flags } return $args } # build info-flags list from a flags string (internal) proc infoflags {fl} { global infonames infonameorder set ret [list] foreach f [split $infonameorder ""] { if {[string first $f $fl] >= 0} { lappend ret $infonames($f) } } return $ret } # compilation error expected proc e {testid flags re err} { global prefix ask errorCode # Tcl locale stuff doesn't do the ch/xy test fakery yet if {[string first "+" $flags] >= 0} { xx return } # if &, test as both ARE and BRE set amp [string first "&" $flags] if {$amp >= 0} { set f [string range $flags 0 [expr $amp - 1]] append f [string range $flags [expr $amp + 1] end] e [linsert $testid end ARE] ${f} $re $err e [linsert $testid end BRE] ${f}b $re $err return } set cmd [concat [list testregexp -$ask] [flags $flags] [list $re]] set run "list \[catch \{$cmd\}\] \[lindex \$errorCode 1\]" test $prefix.[tno $testid] [desc $testid] {testregexp} $run [list 1 REG_$err] } # match failure expected proc f {testid flags re target args} { global prefix description ask # Tcl locale stuff doesn't do the ch/xy test fakery yet if {[string first "+" $flags] >= 0} { xx return } # if &, test as both ARE and BRE set amp [string first "&" $flags] if {$amp >= 0} { set f [string range $flags 0 [expr $amp - 1]] append f [string range $flags [expr $amp + 1] end] eval [linsert $args 0 f [linsert $testid end ARE] ${f} $re \ $target] eval [linsert $args 0 f [linsert $testid end BRE] ${f}b $re \ $target] return } set f [flags $flags] set infoflags [infoflags $flags] set ccmd [concat [list testregexp -$ask] $f [list $re]] set nsub [expr [llength $args] - 1] if {$nsub == -1} { # didn't tell us number of subexps set ccmd "lreplace \[$ccmd\] 0 0" set info [list $infoflags] } else { set info [list $nsub $infoflags] } lappend testid "compile" test $prefix.[tno $testid] [desc $testid] {testregexp} $ccmd $info set testid [lreplace $testid end end "execute"] set ecmd [concat [list testregexp] $f [list $re $target]] test $prefix.[tno $testid] [desc $testid] {testregexp} $ecmd 0 } # match expected, internal routine that does the work # parameters like the "real" routines except they don't have "opts", # which is a possibly-empty list of switches for the regexp match attempt # The ! flag is used to indicate expected match failure (for REG_EXPECT, # which wants argument testing even in the event of failure). proc matchexpected {opts testid flags re target args} { global prefix description ask # Tcl locale stuff doesn't do the ch/xy test fakery yet if {[string first "+" $flags] >= 0} { xx return } # if &, test as both BRE and ARE set amp [string first "&" $flags] if {$amp >= 0} { set f [string range $flags 0 [expr $amp - 1]] append f [string range $flags [expr $amp + 1] end] eval [concat [list matchexpected $opts \ [linsert $testid end ARE] ${f} $re $target] $args] eval [concat [list matchexpected $opts \ [linsert $testid end BRE] ${f}b $re $target] $args] return } set f [flags $flags] set infoflags [infoflags $flags] set ccmd [concat [list testregexp -$ask] $f [list $re]] set ecmd [concat [list testregexp] $opts $f [list $re $target]] set nsub [expr [llength $args] - 1] set names [list] set refs "" for {set i 0} {$i <= $nsub} {incr i} { if {$i == 0} { set name match } else { set name sub$i } lappend names $name append refs " \$$name" set $name "" } if {[string first "o" $flags] >= 0} { ;# REG_NOSUB kludge set nsub 0 ;# unsigned value cannot be -1 } if {[string first "t" $flags] >= 0} { ;# REG_EXPECT incr nsub -1 ;# the extra does not count } set ecmd [concat $ecmd $names] set erun "list \[$ecmd\] $refs" set retcode [list 1] if {[string first "!" $flags] >= 0} { set retcode [list 0] } set result [concat $retcode $args] set info [list $nsub $infoflags] lappend testid "compile" test $prefix.[tno $testid] [desc $testid] {testregexp} $ccmd $info set testid [lreplace $testid end end "execute"] test $prefix.[tno $testid] [desc $testid] {testregexp} $erun $result } # match expected (no missing, empty, or ambiguous submatches) # m testno flags re target mat submat ... proc m {args} { eval matchexpected [linsert $args 0 [list]] } # match expected (full fanciness) # i testno flags re target mat submat ... proc i {args} { eval matchexpected [linsert $args 0 [list "-indices"]] } # partial match expected # p testno flags re target mat "" ... # Quirk: number of ""s must be one more than number of subREs. proc p {args} { set f [lindex $args 1] ;# add ! flag set args [lreplace $args 1 1 "!$f"] eval matchexpected [linsert $args 0 [list "-indices"]] } # test temporarily unimplemented proc xx {args} { global testbypassed incr testbypassed } # the tests themselves # support functions and preliminary misc. # This is sensitive to changes in message wording, but we really have to # test the code->message expansion at least once. test reg-0.1 "regexp error reporting" { list [catch {regexp (*) ign} msg] $msg } {1 {couldn't compile regular expression pattern: quantifier operand invalid}} doing 1 "basic sanity checks" m 1 & abc abc abc f 2 & abc def m 3 & abc xyabxabce abc doing 2 "invalid option combinations" e 1 qe a INVARG e 2 qa a INVARG e 3 qx a INVARG e 4 qn a INVARG e 5 ba a INVARG doing 3 "basic syntax" i 1 &NS "" a {0 -1} m 2 NS a| a a m 3 - a|b a a m 4 - a|b b b m 5 NS a||b b b m 6 & ab ab ab doing 4 "parentheses" m 1 - (a)e ae ae a m 2 o (a)e ae m 3 b {\(a\)b} ab ab a m 4 - a((b)c) abc abc bc b m 5 - a(b)(c) abc abc b c e 6 - a(b EPAREN e 7 b {a\(b} EPAREN # sigh, we blew it on the specs here... someday this will be fixed in POSIX, # but meanwhile, it's fixed in AREs m 8 eU a)b a)b a)b e 9 - a)b EPAREN e 10 b {a\)b} EPAREN m 11 P a(?:b)c abc abc e 12 e a(?:b)c BADRPT i 13 S a()b ab {0 1} {1 0} m 14 SP a(?:)b ab ab i 15 S a(|b)c ac {0 1} {1 0} m 16 S a(b|)c abc abc b doing 5 "simple one-char matching" # general case of brackets done later m 1 & a.b axb axb f 2 &n "a.b" "a\nb" m 3 & {a[bc]d} abd abd m 4 & {a[bc]d} acd acd f 5 & {a[bc]d} aed f 6 & {a[^bc]d} abd m 7 & {a[^bc]d} aed aed f 8 &p "a\[^bc]d" "a\nd" doing 6 "context-dependent syntax" # plus odds and ends e 1 - * BADRPT m 2 b * * * m 3 b {\(*\)} * * * e 4 - (*) BADRPT m 5 b ^* * * e 6 - ^* BADRPT f 7 & ^b ^b m 8 b x^ x^ x^ f 9 I x^ x m 10 n "\n^" "x\nb" "\n" f 11 bS {\(^b\)} ^b m 12 - (^b) b b b m 13 & {x$} x x m 14 bS {\(x$\)} x x x m 15 - {(x$)} x x x m 16 b {x$y} "x\$y" "x\$y" f 17 I {x$y} xy m 18 n "x\$\n" "x\n" "x\n" e 19 - + BADRPT e 20 - ? BADRPT doing 7 "simple quantifiers" m 1 &N a* aa aa i 2 &N a* b {0 -1} m 3 - a+ aa aa m 4 - a?b ab ab m 5 - a?b b b e 6 - ** BADRPT m 7 bN ** *** *** e 8 & a** BADRPT e 9 & a**b BADRPT e 10 & *** BADRPT e 11 - a++ BADRPT e 12 - a?+ BADRPT e 13 - a?* BADRPT e 14 - a+* BADRPT e 15 - a*+ BADRPT doing 8 "braces" m 1 NQ "a{0,1}" "" "" m 2 NQ "a{0,1}" ac a e 3 - "a{1,0}" BADBR e 4 - "a{1,2,3}" BADBR e 5 - "a{257}" BADBR e 6 - "a{1000}" BADBR e 7 - "a{1" EBRACE e 8 - "a{1n}" BADBR m 9 BS "a{b" "a\{b" "a\{b" m 10 BS "a{" "a\{" "a\{" m 11 bQ "a\\{0,1\\}b" cb b e 12 b "a\\{0,1" EBRACE e 13 - "a{0,1\\" BADBR m 14 Q "a{0}b" ab b m 15 Q "a{0,0}b" ab b m 16 Q "a{0,1}b" ab ab m 17 Q "a{0,2}b" b b m 18 Q "a{0,2}b" aab aab m 19 Q "a{0,}b" aab aab m 20 Q "a{1,1}b" aab ab m 21 Q "a{1,3}b" aaaab aaab f 22 Q "a{1,3}b" b m 23 Q "a{1,}b" aab aab f 24 Q "a{2,3}b" ab m 25 Q "a{2,3}b" aaaab aaab f 26 Q "a{2,}b" ab m 27 Q "a{2,}b" aaaab aaaab doing 9 "brackets" m 1 & {a[bc]} ac ac m 2 & {a[-]} a- a- m 3 & {a[[.-.]]} a- a- m 4 &L {a[[.zero.]]} a0 a0 m 5 &LM {a[[.zero.]-9]} a2 a2 m 6 &M {a[0-[.9.]]} a2 a2 m 7 &+L {a[[=x=]]} ax ax m 8 &+L {a[[=x=]]} ay ay f 9 &+L {a[[=x=]]} az e 10 & {a[0-[=x=]]} ERANGE m 11 &L {a[[:digit:]]} a0 a0 e 12 & {a[[:woopsie:]]} ECTYPE f 13 &L {a[[:digit:]]} ab e 14 & {a[0-[:digit:]]} ERANGE m 15 &LP {[[:<:]]a} a a m 16 &LP {a[[:>:]]} a a e 17 & {a[[..]]b} ECOLLATE e 18 & {a[[==]]b} ECOLLATE e 19 & {a[[::]]b} ECTYPE e 20 & {a[[.a} EBRACK e 21 & {a[[=a} EBRACK e 22 & {a[[:a} EBRACK e 23 & {a[} EBRACK e 24 & {a[b} EBRACK e 25 & {a[b-} EBRACK e 26 & {a[b-c} EBRACK m 27 &M {a[b-c]} ab ab m 28 & {a[b-b]} ab ab m 29 &M {a[1-2]} a2 a2 e 30 & {a[c-b]} ERANGE e 31 & {a[a-b-c]} ERANGE m 32 &M {a[--?]b} a?b a?b m 33 & {a[---]b} a-b a-b m 34 & {a[]b]c} a]c a]c m 35 EP {a[\]]b} a]b a]b f 36 bE {a[\]]b} a]b m 37 bE {a[\]]b} "a\\]b" "a\\]b" m 38 eE {a[\]]b} "a\\]b" "a\\]b" m 39 EP {a[\\]b} "a\\b" "a\\b" m 40 eE {a[\\]b} "a\\b" "a\\b" m 41 bE {a[\\]b} "a\\b" "a\\b" e 42 - {a[\Z]b} EESCAPE m 43 & {a[[b]c} "a\[c" "a\[c" m 44 EMP* {a[\u00fe-\u0507][\u00ff-\u0300]b} \ "a\u0102\u02ffb" "a\u0102\u02ffb" doing 10 "anchors and newlines" m 1 & ^a a a f 2 &^ ^a a i 3 &N ^ a {0 -1} i 4 & {a$} aba {2 2} f 5 {&$} {a$} a i 6 &N {$} ab {2 1} m 7 &n ^a a a m 8 &n "^a" "b\na" "a" i 9 &w "^a" "a\na" {0 0} i 10 &n^ "^a" "a\na" {2 2} m 11 &n {a$} a a m 12 &n "a\$" "a\nb" "a" i 13 &n "a\$" "a\na" {0 0} i 14 N ^^ a {0 -1} m 15 b ^^ ^ ^ i 16 N {$$} a {1 0} m 17 b {$$} "\$" "\$" m 18 &N {^$} "" "" f 19 &N {^$} a i 20 &nN "^\$" "a\n\nb" {2 1} m 21 N {$^} "" "" m 22 b {$^} "\$^" "\$^" m 23 P {\Aa} a a m 24 ^P {\Aa} a a f 25 ^nP {\Aa} "b\na" m 26 P {a\Z} a a m 27 {$P} {a\Z} a a f 28 {$nP} {a\Z} "a\nb" e 29 - ^* BADRPT e 30 - {$*} BADRPT e 31 - {\A*} BADRPT e 32 - {\Z*} BADRPT doing 11 "boundary constraints" m 1 &LP {[[:<:]]a} a a m 2 &LP {[[:<:]]a} -a a f 3 &LP {[[:<:]]a} ba m 4 &LP {a[[:>:]]} a a m 5 &LP {a[[:>:]]} a- a f 6 &LP {a[[:>:]]} ab m 7 bLP {\<a} a a f 8 bLP {\<a} ba m 9 bLP {a\>} a a f 10 bLP {a\>} ab m 11 LP {\ya} a a f 12 LP {\ya} ba m 13 LP {a\y} a a f 14 LP {a\y} ab m 15 LP {a\Y} ab a f 16 LP {a\Y} a- f 17 LP {a\Y} a f 18 LP {-\Y} -a m 19 LP {-\Y} -% - f 20 LP {\Y-} a- e 21 - {[[:<:]]*} BADRPT e 22 - {[[:>:]]*} BADRPT e 23 b {\<*} BADRPT e 24 b {\>*} BADRPT e 25 - {\y*} BADRPT e 26 - {\Y*} BADRPT m 27 LP {\ma} a a f 28 LP {\ma} ba m 29 LP {a\M} a a f 30 LP {a\M} ab f 31 ILP {\Ma} a f 32 ILP {a\m} a doing 12 "character classes" m 1 LP {a\db} a0b a0b f 2 LP {a\db} axb f 3 LP {a\Db} a0b m 4 LP {a\Db} axb axb m 5 LP "a\\sb" "a b" "a b" m 6 LP "a\\sb" "a\tb" "a\tb" m 7 LP "a\\sb" "a\nb" "a\nb" f 8 LP {a\sb} axb m 9 LP {a\Sb} axb axb f 10 LP "a\\Sb" "a b" m 11 LP {a\wb} axb axb f 12 LP {a\wb} a-b f 13 LP {a\Wb} axb m 14 LP {a\Wb} a-b a-b m 15 LP {\y\w+z\y} adze-guz guz m 16 LPE {a[\d]b} a1b a1b m 17 LPE "a\[\\s]b" "a b" "a b" m 18 LPE {a[\w]b} axb axb doing 13 "escapes" e 1 & "a\\" EESCAPE m 2 - {a\<b} a<b a<b m 3 e {a\<b} a<b a<b m 4 bAS {a\wb} awb awb m 5 eAS {a\wb} awb awb m 6 PL "a\\ab" "a\007b" "a\007b" m 7 P "a\\bb" "a\bb" "a\bb" m 8 P {a\Bb} "a\\b" "a\\b" m 9 MP "a\\chb" "a\bb" "a\bb" m 10 MP "a\\cHb" "a\bb" "a\bb" m 11 LMP "a\\e" "a\033" "a\033" m 12 P "a\\fb" "a\fb" "a\fb" m 13 P "a\\nb" "a\nb" "a\nb" m 14 P "a\\rb" "a\rb" "a\rb" m 15 P "a\\tb" "a\tb" "a\tb" m 16 P "a\\u0008x" "a\bx" "a\bx" e 17 - {a\u008x} EESCAPE m 18 P "a\\u00088x" "a\b8x" "a\b8x" m 19 P "a\\U00000008x" "a\bx" "a\bx" e 20 - {a\U0000008x} EESCAPE m 21 P "a\\vb" "a\vb" "a\vb" m 22 MP "a\\x08x" "a\bx" "a\bx" e 23 - {a\xq} EESCAPE m 24 MP "a\\x0008x" "a\bx" "a\bx" e 25 - {a\z} EESCAPE m 26 MP "a\\010b" "a\bb" "a\bb" doing 14 "back references" # ugh m 1 RP {a(b*)c\1} abbcbb abbcbb bb m 2 RP {a(b*)c\1} ac ac "" f 3 RP {a(b*)c\1} abbcb m 4 RP {a(b*)\1} abbcbb abb b m 5 RP {a(b|bb)\1} abbcbb abb b m 6 RP {a([bc])\1} abb abb b f 7 RP {a([bc])\1} abc m 8 RP {a([bc])\1} abcabb abb b f 9 RP {a([bc])*\1} abc f 10 RP {a([bc])\1} abB m 11 iRP {a([bc])\1} abB abB b m 12 RP {a([bc])\1+} abbb abbb b m 13 QRP "a(\[bc])\\1{3,4}" abbbb abbbb b f 14 QRP "a(\[bc])\\1{3,4}" abbb m 15 RP {a([bc])\1*} abbb abbb b m 16 RP {a([bc])\1*} ab ab b m 17 RP {a([bc])(\1*)} ab ab b "" e 18 - {a((b)\1)} ESUBREG e 19 - {a(b)c\2} ESUBREG m 20 bR {a\(b*\)c\1} abbcbb abbcbb bb doing 15 "octal escapes vs back references" # initial zero is always octal m 1 MP "a\\010b" "a\bb" "a\bb" m 2 MP "a\\0070b" "a\0070b" "a\0070b" m 3 MP "a\\07b" "a\007b" "a\007b" m 4 MP "a(b)(b)(b)(b)(b)(b)(b)(b)(b)(b)\\07c" "abbbbbbbbbb\007c" \ "abbbbbbbbbb\007c" "b" "b" "b" "b" "b" "b" \ "b" "b" "b" "b" # a single digit is always a backref e 5 - {a\7b} ESUBREG # otherwise it's a backref only if within range (barf!) m 6 MP "a\\10b" "a\bb" "a\bb" m 7 MP {a\101b} aAb aAb m 8 RP {a(b)(b)(b)(b)(b)(b)(b)(b)(b)(b)\10c} abbbbbbbbbbbc \ abbbbbbbbbbbc b b b b b b b \ b b b # but we're fussy about border cases -- guys who want octal should use the zero e 9 - {a((((((((((b\10))))))))))c} ESUBREG # BREs don't have octal, EREs don't have backrefs m 10 MP "a\\12b" "a\nb" "a\nb" e 11 b {a\12b} ESUBREG m 12 eAS {a\12b} a12b a12b doing 16 "expanded syntax" m 1 xP "a b c" "abc" "abc" m 2 xP "a b #oops\nc\td" "abcd" "abcd" m 3 x "a\\ b\\\tc" "a b\tc" "a b\tc" m 4 xP "a b\\#c" "ab#c" "ab#c" m 5 xP "a b\[c d]e" "ab e" "ab e" m 6 xP "a b\[c#d]e" "ab#e" "ab#e" m 7 xP "a b\[c#d]e" "abde" "abde" m 8 xSPB "ab{ d" "ab\{d" "ab\{d" m 9 xPQ "ab{ 1 , 2 }c" "abc" "abc" doing 17 "misc syntax" m 1 P a(?#comment)b ab ab doing 18 "unmatchable REs" f 1 I a^b ab doing 19 "case independence" m 1 &i ab Ab Ab m 2 &i {a[bc]} aC aC f 3 &i {a[^bc]} aB m 4 &iM {a[b-d]} aC aC f 5 &iM {a[^b-d]} aC doing 20 "directors and embedded options" e 1 & ***? BADPAT m 2 q ***? ***? ***? m 3 &P ***=a*b a*b a*b m 4 q ***=a*b ***=a*b ***=a*b m 5 bLP {***:\w+} ab ab m 6 eLP {***:\w+} ab ab e 7 & ***:***=a*b BADRPT m 8 &P ***:(?b)a+b a+b a+b m 9 P (?b)a+b a+b a+b e 10 e {(?b)\w+} BADRPT m 11 bAS {(?b)\w+} (?b)w+ (?b)w+ m 12 iP (?c)a a a f 13 iP (?c)a A m 14 APS {(?e)\W+} WW WW m 15 P (?i)a+ Aa Aa f 16 P "(?m)a.b" "a\nb" m 17 P "(?m)^b" "a\nb" "b" f 18 P "(?n)a.b" "a\nb" m 19 P "(?n)^b" "a\nb" "b" f 20 P "(?p)a.b" "a\nb" f 21 P "(?p)^b" "a\nb" m 22 P (?q)a+b a+b a+b m 23 nP "(?s)a.b" "a\nb" "a\nb" m 24 xP "(?t)a b" "a b" "a b" m 25 P "(?w)a.b" "a\nb" "a\nb" m 26 P "(?w)^b" "a\nb" "b" m 27 P "(?x)a b" "ab" "ab" e 28 - (?z)ab BADOPT m 29 P (?ici)a+ Aa Aa e 30 P (?i)(?q)a+ BADRPT m 31 P (?q)(?i)a+ (?i)a+ (?i)a+ m 32 P (?qe)a+ a a m 33 xP "(?q)a b" "a b" "a b" m 34 P "(?qx)a b" "a b" "a b" m 35 P (?qi)ab Ab Ab doing 21 "capturing" m 1 - a(b)c abc abc b m 2 P a(?:b)c xabc abc m 3 - a((b))c xabcy abc b b m 4 P a(?:(b))c abcy abc b m 5 P a((?:b))c abc abc b m 6 P a(?:(?:b))c abc abc i 7 Q "a(b){0}c" ac {0 1} {-1 -1} m 8 - a(b)c(d)e abcde abcde b d m 9 - (b)c(d)e bcde bcde b d m 10 - a(b)(d)e abde abde b d m 11 - a(b)c(d) abcd abcd b d m 12 - (ab)(cd) xabcdy abcd ab cd m 13 - a(b)?c xabcy abc b i 14 - a(b)?c xacy {1 2} {-1 -1} m 15 - a(b)?c(d)?e xabcdey abcde b d i 16 - a(b)?c(d)?e xacdey {1 4} {-1 -1} {3 3} i 17 - a(b)?c(d)?e xabcey {1 4} {2 2} {-1 -1} i 18 - a(b)?c(d)?e xacey {1 3} {-1 -1} {-1 -1} m 19 - a(b)*c xabcy abc b i 20 - a(b)*c xabbbcy {1 5} {4 4} i 21 - a(b)*c xacy {1 2} {-1 -1} m 22 - a(b*)c xabbbcy abbbc bbb m 23 - a(b*)c xacy ac "" f 24 - a(b)+c xacy m 25 - a(b)+c xabcy abc b i 26 - a(b)+c xabbbcy {1 5} {4 4} m 27 - a(b+)c xabbbcy abbbc bbb i 28 Q "a(b){2,3}c" xabbbcy {1 5} {4 4} i 29 Q "a(b){2,3}c" xabbcy {1 4} {3 3} f 30 Q "a(b){2,3}c" xabcy m 31 LP "\\y(\\w+)\\y" "-- abc-" "abc" "abc" m 32 - a((b|c)d+)+ abacdbd acdbd bd b m 33 N (.*).* abc abc abc m 34 N (a*)* bc "" "" doing 22 "multicharacter collating elements" # again ugh m 1 &+L {a[c]e} ace ace f 2 &+IL {a[c]h} ach m 3 &+L {a[[.ch.]]} ach ach f 4 &+L {a[[.ch.]]} ace m 5 &+L {a[c[.ch.]]} ac ac m 6 &+L {a[c[.ch.]]} ace ac m 7 &+L {a[c[.ch.]]} ache ach f 8 &+L {a[^c]e} ace m 9 &+L {a[^c]e} abe abe m 10 &+L {a[^c]e} ache ache f 11 &+L {a[^[.ch.]]} ach m 12 &+L {a[^[.ch.]]} ace ac m 13 &+L {a[^[.ch.]]} ac ac m 14 &+L {a[^[.ch.]]} abe ab f 15 &+L {a[^c[.ch.]]} ach f 16 &+L {a[^c[.ch.]]} ace f 17 &+L {a[^c[.ch.]]} ac m 18 &+L {a[^c[.ch.]]} abe ab m 19 &+L {a[^b]} ac ac m 20 &+L {a[^b]} ace ac m 21 &+L {a[^b]} ach ach f 22 &+L {a[^b]} abe doing 23 "lookahead constraints" m 1 HP a(?=b)b* ab ab f 2 HP a(?=b)b* a m 3 HP a(?=b)b*(?=c)c* abc abc f 4 HP a(?=b)b*(?=c)c* ab f 5 HP a(?!b)b* ab m 6 HP a(?!b)b* a a m 7 HP (?=b)b b b f 8 HP (?=b)b a doing 24 "non-greedy quantifiers" m 1 PT ab+? abb ab m 2 PT ab+?c abbc abbc m 3 PT ab*? abb a m 4 PT ab*?c abbc abbc m 5 PT ab?? ab a m 6 PT ab??c abc abc m 7 PQT "ab{2,4}?" abbbb abb m 8 PQT "ab{2,4}?c" abbbbc abbbbc m 9 - 3z* 123zzzz456 3zzzz m 10 PT 3z*? 123zzzz456 3 m 11 - z*4 123zzzz456 zzzz4 m 12 PT z*?4 123zzzz456 zzzz4 doing 25 "mixed quantifiers" # this is very incomplete as yet # should include | m 1 PNT {^(.*?)(a*)$} xyza xyza xyz a m 2 PNT {^(.*?)(a*)$} xyzaa xyzaa xyz aa m 3 PNT {^(.*?)(a*)$} xyz xyz xyz "" doing 26 "tricky cases" # attempts to trick the matcher into accepting a short match m 1 - (week|wee)(night|knights) weeknights weeknights \ wee knights m 2 RP {a(bc*).*\1} abccbccb abccbccb b m 3 - {a(b.[bc]*)+} abcbd abcbd bd doing 27 "implementation misc." # duplicate arcs are suppressed m 1 P a(?:b|b)c abc abc # make color/subcolor relationship go back and forth m 2 & {[ab][ab][ab]} aba aba m 3 & {[ab][ab][ab][ab][ab][ab][ab]} abababa abababa doing 28 "boundary busters etc." # color-descriptor allocation changes at 10 m 1 & abcdefghijkl abcdefghijkl abcdefghijkl # so does arc allocation m 2 P a(?:b|c|d|e|f|g|h|i|j|k|l|m)n agn agn # subexpression tracking also at 10 m 3 - a(((((((((((((b)))))))))))))c abc abc b b b b b b b b b b b b b # state-set handling changes slightly at unsigned size (might be 64...) # (also stresses arc allocation) m 4 Q "ab{1,100}c" abbc abbc m 5 Q "ab{1,100}c" abbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbc \ abbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbc m 6 Q "ab{1,100}c" \ abbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbc \ abbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbc # force small cache and bust it, several ways m 7 LP {\w+abcdefgh} xyzabcdefgh xyzabcdefgh m 8 %LP {\w+abcdefgh} xyzabcdefgh xyzabcdefgh m 9 %LP {\w+abcdefghijklmnopqrst} xyzabcdefghijklmnopqrst \ xyzabcdefghijklmnopqrst i 10 %LP {\w+(abcdefgh)?} xyz {0 2} {-1 -1} i 11 %LP {\w+(abcdefgh)?} xyzabcdefg {0 9} {-1 -1} i 12 %LP {\w+(abcdefghijklmnopqrst)?} xyzabcdefghijklmnopqrs \ {0 21} {-1 -1} doing 29 "incomplete matches" p 1 t def abc {3 2} "" p 2 t bcd abc {1 2} "" p 3 t abc abab {0 3} "" p 4 t abc abdab {3 4} "" i 5 t abc abc {0 2} {0 2} i 6 t abc xyabc {2 4} {2 4} p 7 t abc+ xyab {2 3} "" i 8 t abc+ xyabc {2 4} {2 4} xx i 9 t abc+ xyabcd {2 4} {6 5} i 10 t abc+ xyabcdd {2 4} {7 6} p 11 tPT abc+? xyab {2 3} "" # the retain numbers in these two may look wrong, but they aren't i 12 tPT abc+? xyabc {2 4} {5 4} i 13 tPT abc+? xyabcc {2 4} {6 5} i 14 tPT abc+? xyabcd {2 4} {6 5} i 15 tPT abc+? xyabcdd {2 4} {7 6} i 16 t abcd|bc xyabc {3 4} {2 4} p 17 tn .*k "xx\nyyy" {3 5} "" doing 30 "misc. oddities and old bugs" e 1 & *** BADRPT m 2 N a?b* abb abb m 3 N a?b* bb bb m 4 & a*b aab aab m 5 & ^a*b aaaab aaaab m 6 &M {[0-6][1-2][0-3][0-6][1-6][0-6]} 010010 010010 # temporary REG_BOSONLY kludge m 7 s abc abcd abc f 8 s abc xabcd # back to normal stuff m 9 HLP {(?n)^(?![t#])\S+} "tk\n\n#\n#\nit0" it0 # flush any leftover complaints doing 0 "flush" # cleanup ::tcltest::cleanupTests return