wxWidgets/tests/regex/reg.test
Vadim Zeitlin 3f66f6a5b3 Remove all lines containing cvs/svn "$Id$" keyword.
This keyword is not expanded by Git which means it's not replaced with the
correct revision value in the releases made using git-based scripts and it's
confusing to have lines with unexpanded "$Id$" in the released files. As
expanding them with Git is not that simple (it could be done with git archive
and export-subst attribute) and there are not many benefits in having them in
the first place, just remove all these lines.

If nothing else, this will make an eventual transition to Git simpler.

Closes .

git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@74602 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
2013-07-26 16:02:46 +00:00

1135 lines
29 KiB
Plaintext

# reg.test --
#
# This file contains a collection of tests for one or more of the Tcl
# built-in commands. Sourcing this file into Tcl runs the tests and
# generates output for errors. No output means no errors were found.
# (Don't panic if you are seeing this as part of the reg distribution
# and aren't using Tcl -- reg's own regression tester also knows how
# to read this file, ignoring the Tcl-isms.)
#
# Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
#
if {[lsearch [namespace children] ::tcltest] == -1} {
package require tcltest 2
namespace import -force ::tcltest::*
}
# All tests require the testregexp command, return if this
# command doesn't exist
::tcltest::testConstraint testregexp \
[expr {[info commands testregexp] != {}}]
::tcltest::testConstraint localeRegexp 0
# This file uses some custom procedures, defined below, for regexp regression
# testing. The name of the procedure indicates the general nature of the
# test:
# e compile error expected
# f match failure expected
# m successful match
# i successful match with -indices (used in checking things like
# nonparticipating subexpressions)
# p unsuccessful match with -indices (!!) (used in checking
# partial-match reporting)
# There is also "doing" which sets up title and major test number for each
# block of tests.
# The first 3 arguments are constant: a minor number (which often gets
# a letter or two suffixed to it internally), some flags, and the RE itself.
# For e, the remaining argument is the name of the compile error expected,
# less the leading "REG_". For the rest, the next argument is the string
# to try the match against. Remaining arguments are the substring expected
# to be matched, and any substrings expected to be matched by subexpressions.
# (For f, these arguments are optional, and if present are ignored except
# that they indicate how many subexpressions should be present in the RE.)
# It is an error for the number of subexpression arguments to be wrong.
# Cases involving nonparticipating subexpressions, checking where empty
# substrings are located, etc. should be done using i and p.
# The flag characters are complex and a bit eclectic. Generally speaking,
# lowercase letters are compile options, uppercase are expected re_info
# bits, and nonalphabetics are match options, controls for how the test is
# run, or testing options. The one small surprise is that AREs are the
# default, and you must explicitly request lesser flavors of RE. The flags
# are as follows. It is admitted that some are not very mnemonic.
# There are some others which are purely debugging tools and are not
# useful in this file.
#
# - no-op (placeholder)
# + provide fake xy equivalence class and ch collating element
# % force small state-set cache in matcher (to test cache replace)
# ^ beginning of string is not beginning of line
# $ end of string is not end of line
# * test is Unicode-specific, needs big character set
#
# & test as both ARE and BRE
# b BRE
# e ERE
# a turn advanced-features bit on (error unless ERE already)
# q literal string, no metacharacters at all
#
# i case-independent matching
# o ("opaque") no subexpression capture
# p newlines are half-magic, excluded from . and [^ only
# w newlines are half-magic, significant to ^ and $ only
# n newlines are fully magic, both effects
# x expanded RE syntax
# t incomplete-match reporting
#
# A backslash-_a_lphanumeric seen
# B ERE/ARE literal-_b_race heuristic used
# E backslash (_e_scape) seen within []
# H looka_h_ead constraint seen
# I _i_mpossible to match
# L _l_ocale-specific construct seen
# M unportable (_m_achine-specific) construct seen
# N RE can match empty (_n_ull) string
# P non-_P_OSIX construct seen
# Q {} _q_uantifier seen
# R back _r_eference seen
# S POSIX-un_s_pecified syntax seen
# T prefers shortest (_t_iny)
# U saw original-POSIX botch: unmatched right paren in ERE (_u_gh)
# The one area we can't easily test is memory-allocation failures (which
# are hard to provoke on command). Embedded NULs also are not tested at
# the moment, but this is a historical accident which should be fixed.
# test procedures and related
set ask "about"
set xflags "xflags"
set testbypassed 0
# re_info abbreviation mapping table
set infonames(A) "REG_UBSALNUM"
set infonames(B) "REG_UBRACES"
set infonames(E) "REG_UBBS"
set infonames(H) "REG_ULOOKAHEAD"
set infonames(I) "REG_UIMPOSSIBLE"
set infonames(L) "REG_ULOCALE"
set infonames(M) "REG_UUNPORT"
set infonames(N) "REG_UEMPTYMATCH"
set infonames(P) "REG_UNONPOSIX"
set infonames(Q) "REG_UBOUNDS"
set infonames(R) "REG_UBACKREF"
set infonames(S) "REG_UUNSPEC"
set infonames(T) "REG_USHORTEST"
set infonames(U) "REG_UPBOTCH"
set infonameorder "RHQBAUEPSMLNIT" ;# must match bit order, lsb first
# set major test number and description
proc doing {major desc} {
global prefix description testbypassed
if {$testbypassed != 0} {
puts stdout "!!! bypassed $testbypassed tests in\
$prefix, `$description'"
}
set prefix reg-$major
set description "reg $desc"
set testbypassed 0
}
# build test number (internal)
proc tno {testid} {
return [join $testid .]
}
# build description, with possible modifiers (internal)
proc desc {testid} {
global description
set d $description
if {[llength $testid] > 1} {
set d "([lreplace $testid 0 0]) $d"
}
return $d
}
# build trailing options and flags argument from a flags string (internal)
proc flags {fl} {
global xflags
set args [list]
set flags ""
foreach f [split $fl ""] {
switch -exact -- $f {
"i" { lappend args "-nocase" }
"x" { lappend args "-expanded" }
"n" { lappend args "-line" }
"p" { lappend args "-linestop" }
"w" { lappend args "-lineanchor" }
"-" { }
default { append flags $f }
}
}
if {[string compare $flags ""] != 0} {
lappend args -$xflags $flags
}
return $args
}
# build info-flags list from a flags string (internal)
proc infoflags {fl} {
global infonames infonameorder
set ret [list]
foreach f [split $infonameorder ""] {
if {[string first $f $fl] >= 0} {
lappend ret $infonames($f)
}
}
return $ret
}
# compilation error expected
proc e {testid flags re err} {
global prefix ask errorCode
# Tcl locale stuff doesn't do the ch/xy test fakery yet
if {[string first "+" $flags] >= 0} {
# This will register as a skipped test
test $prefix.[tno $testid] [desc $testid] localeRegexp {} {}
return
}
# if &, test as both ARE and BRE
set amp [string first "&" $flags]
if {$amp >= 0} {
set f [string range $flags 0 [expr $amp - 1]]
append f [string range $flags [expr $amp + 1] end]
e [linsert $testid end ARE] ${f} $re $err
e [linsert $testid end BRE] ${f}b $re $err
return
}
set cmd [concat [list testregexp -$ask] [flags $flags] [list $re]]
set run "list \[catch \{$cmd\}\] \[lindex \$errorCode 1\]"
test $prefix.[tno $testid] [desc $testid] \
{testregexp} $run [list 1 REG_$err]
}
# match failure expected
proc f {testid flags re target args} {
global prefix description ask
# Tcl locale stuff doesn't do the ch/xy test fakery yet
if {[string first "+" $flags] >= 0} {
# This will register as a skipped test
test $prefix.[tno $testid] [desc $testid] localeRegexp {} {}
return
}
# if &, test as both ARE and BRE
set amp [string first "&" $flags]
if {$amp >= 0} {
set f [string range $flags 0 [expr $amp - 1]]
append f [string range $flags [expr $amp + 1] end]
eval [linsert $args 0 f [linsert $testid end ARE] ${f} $re \
$target]
eval [linsert $args 0 f [linsert $testid end BRE] ${f}b $re \
$target]
return
}
set f [flags $flags]
set infoflags [infoflags $flags]
set ccmd [concat [list testregexp -$ask] $f [list $re]]
set nsub [expr [llength $args] - 1]
if {$nsub == -1} {
# didn't tell us number of subexps
set ccmd "lreplace \[$ccmd\] 0 0"
set info [list $infoflags]
} else {
set info [list $nsub $infoflags]
}
lappend testid "compile"
test $prefix.[tno $testid] [desc $testid] {testregexp} $ccmd $info
set testid [lreplace $testid end end "execute"]
set ecmd [concat [list testregexp] $f [list $re $target]]
test $prefix.[tno $testid] [desc $testid] {testregexp} $ecmd 0
}
# match expected, internal routine that does the work
# parameters like the "real" routines except they don't have "opts",
# which is a possibly-empty list of switches for the regexp match attempt
# The ! flag is used to indicate expected match failure (for REG_EXPECT,
# which wants argument testing even in the event of failure).
proc matchexpected {opts testid flags re target args} {
global prefix description ask regBug
if {[info exists regBug] && $regBug} {
# This will register as a skipped test
test $prefix.[tno $testid] [desc $testid] knownBug {format 0} {1}
return
}
# Tcl locale stuff doesn't do the ch/xy test fakery yet
if {[string first "+" $flags] >= 0} {
# This will register as a skipped test
test $prefix.[tno $testid] [desc $testid] localeRegexp {} {}
return
}
# if &, test as both BRE and ARE
set amp [string first "&" $flags]
if {$amp >= 0} {
set f [string range $flags 0 [expr $amp - 1]]
append f [string range $flags [expr $amp + 1] end]
eval [concat [list matchexpected $opts \
[linsert $testid end ARE] ${f} $re $target] $args]
eval [concat [list matchexpected $opts \
[linsert $testid end BRE] ${f}b $re $target] $args]
return
}
set f [flags $flags]
set infoflags [infoflags $flags]
set ccmd [concat [list testregexp -$ask] $f [list $re]]
set ecmd [concat [list testregexp] $opts $f [list $re $target]]
set nsub [expr [llength $args] - 1]
set names [list]
set refs ""
for {set i 0} {$i <= $nsub} {incr i} {
if {$i == 0} {
set name match
} else {
set name sub$i
}
lappend names $name
append refs " \$$name"
set $name ""
}
if {[string first "o" $flags] >= 0} { ;# REG_NOSUB kludge
set nsub 0 ;# unsigned value cannot be -1
}
if {[string first "t" $flags] >= 0} { ;# REG_EXPECT
incr nsub -1 ;# the extra does not count
}
set ecmd [concat $ecmd $names]
set erun "list \[$ecmd\] $refs"
set retcode [list 1]
if {[string first "!" $flags] >= 0} {
set retcode [list 0]
}
set result [concat $retcode $args]
set info [list $nsub $infoflags]
lappend testid "compile"
test $prefix.[tno $testid] [desc $testid] {testregexp} $ccmd $info
set testid [lreplace $testid end end "execute"]
test $prefix.[tno $testid] [desc $testid] {testregexp} $erun $result
}
# match expected (no missing, empty, or ambiguous submatches)
# m testno flags re target mat submat ...
proc m {args} {
eval matchexpected [linsert $args 0 [list]]
}
# match expected (full fanciness)
# i testno flags re target mat submat ...
proc i {args} {
eval matchexpected [linsert $args 0 [list "-indices"]]
}
# partial match expected
# p testno flags re target mat "" ...
# Quirk: number of ""s must be one more than number of subREs.
proc p {args} {
set f [lindex $args 1] ;# add ! flag
set args [lreplace $args 1 1 "!$f"]
eval matchexpected [linsert $args 0 [list "-indices"]]
}
# test is a knownBug
proc knownBug {args} {
set ::regBug 1
uplevel #0 $args
set ::regBug 0
}
# the tests themselves
# support functions and preliminary misc.
# This is sensitive to changes in message wording, but we really have to
# test the code->message expansion at least once.
test reg-0.1 "regexp error reporting" {
list [catch {regexp (*) ign} msg] $msg
} {1 {couldn't compile regular expression pattern: quantifier operand invalid}}
doing 1 "basic sanity checks"
m 1 & abc abc abc
f 2 & abc def
m 3 & abc xyabxabce abc
doing 2 "invalid option combinations"
e 1 qe a INVARG
e 2 qa a INVARG
e 3 qx a INVARG
e 4 qn a INVARG
e 5 ba a INVARG
doing 3 "basic syntax"
i 1 &NS "" a {0 -1}
m 2 NS a| a a
m 3 - a|b a a
m 4 - a|b b b
m 5 NS a||b b b
m 6 & ab ab ab
doing 4 "parentheses"
m 1 - (a)e ae ae a
m 2 o (a)e ae
m 3 b {\(a\)b} ab ab a
m 4 - a((b)c) abc abc bc b
m 5 - a(b)(c) abc abc b c
e 6 - a(b EPAREN
e 7 b {a\(b} EPAREN
# sigh, we blew it on the specs here... someday this will be fixed in POSIX,
# but meanwhile, it's fixed in AREs
m 8 eU a)b a)b a)b
e 9 - a)b EPAREN
e 10 b {a\)b} EPAREN
m 11 P a(?:b)c abc abc
e 12 e a(?:b)c BADRPT
i 13 S a()b ab {0 1} {1 0}
m 14 SP a(?:)b ab ab
i 15 S a(|b)c ac {0 1} {1 0}
m 16 S a(b|)c abc abc b
doing 5 "simple one-char matching"
# general case of brackets done later
m 1 & a.b axb axb
f 2 &n "a.b" "a\nb"
m 3 & {a[bc]d} abd abd
m 4 & {a[bc]d} acd acd
f 5 & {a[bc]d} aed
f 6 & {a[^bc]d} abd
m 7 & {a[^bc]d} aed aed
f 8 &p "a\[^bc]d" "a\nd"
doing 6 "context-dependent syntax"
# plus odds and ends
e 1 - * BADRPT
m 2 b * * *
m 3 b {\(*\)} * * *
e 4 - (*) BADRPT
m 5 b ^* * *
e 6 - ^* BADRPT
f 7 & ^b ^b
m 8 b x^ x^ x^
f 9 I x^ x
m 10 n "\n^" "x\nb" "\n"
f 11 bS {\(^b\)} ^b
m 12 - (^b) b b b
m 13 & {x$} x x
m 14 bS {\(x$\)} x x x
m 15 - {(x$)} x x x
m 16 b {x$y} "x\$y" "x\$y"
f 17 I {x$y} xy
m 18 n "x\$\n" "x\n" "x\n"
e 19 - + BADRPT
e 20 - ? BADRPT
doing 7 "simple quantifiers"
m 1 &N a* aa aa
i 2 &N a* b {0 -1}
m 3 - a+ aa aa
m 4 - a?b ab ab
m 5 - a?b b b
e 6 - ** BADRPT
m 7 bN ** *** ***
e 8 & a** BADRPT
e 9 & a**b BADRPT
e 10 & *** BADRPT
e 11 - a++ BADRPT
e 12 - a?+ BADRPT
e 13 - a?* BADRPT
e 14 - a+* BADRPT
e 15 - a*+ BADRPT
doing 8 "braces"
m 1 NQ "a{0,1}" "" ""
m 2 NQ "a{0,1}" ac a
e 3 - "a{1,0}" BADBR
e 4 - "a{1,2,3}" BADBR
e 5 - "a{257}" BADBR
e 6 - "a{1000}" BADBR
e 7 - "a{1" EBRACE
e 8 - "a{1n}" BADBR
m 9 BS "a{b" "a\{b" "a\{b"
m 10 BS "a{" "a\{" "a\{"
m 11 bQ "a\\{0,1\\}b" cb b
e 12 b "a\\{0,1" EBRACE
e 13 - "a{0,1\\" BADBR
m 14 Q "a{0}b" ab b
m 15 Q "a{0,0}b" ab b
m 16 Q "a{0,1}b" ab ab
m 17 Q "a{0,2}b" b b
m 18 Q "a{0,2}b" aab aab
m 19 Q "a{0,}b" aab aab
m 20 Q "a{1,1}b" aab ab
m 21 Q "a{1,3}b" aaaab aaab
f 22 Q "a{1,3}b" b
m 23 Q "a{1,}b" aab aab
f 24 Q "a{2,3}b" ab
m 25 Q "a{2,3}b" aaaab aaab
f 26 Q "a{2,}b" ab
m 27 Q "a{2,}b" aaaab aaaab
doing 9 "brackets"
m 1 & {a[bc]} ac ac
m 2 & {a[-]} a- a-
m 3 & {a[[.-.]]} a- a-
m 4 &L {a[[.zero.]]} a0 a0
m 5 &LM {a[[.zero.]-9]} a2 a2
m 6 &M {a[0-[.9.]]} a2 a2
m 7 &+L {a[[=x=]]} ax ax
m 8 &+L {a[[=x=]]} ay ay
f 9 &+L {a[[=x=]]} az
e 10 & {a[0-[=x=]]} ERANGE
m 11 &L {a[[:digit:]]} a0 a0
e 12 & {a[[:woopsie:]]} ECTYPE
f 13 &L {a[[:digit:]]} ab
e 14 & {a[0-[:digit:]]} ERANGE
m 15 &LP {[[:<:]]a} a a
m 16 &LP {a[[:>:]]} a a
e 17 & {a[[..]]b} ECOLLATE
e 18 & {a[[==]]b} ECOLLATE
e 19 & {a[[::]]b} ECTYPE
e 20 & {a[[.a} EBRACK
e 21 & {a[[=a} EBRACK
e 22 & {a[[:a} EBRACK
e 23 & {a[} EBRACK
e 24 & {a[b} EBRACK
e 25 & {a[b-} EBRACK
e 26 & {a[b-c} EBRACK
m 27 &M {a[b-c]} ab ab
m 28 & {a[b-b]} ab ab
m 29 &M {a[1-2]} a2 a2
e 30 & {a[c-b]} ERANGE
e 31 & {a[a-b-c]} ERANGE
m 32 &M {a[--?]b} a?b a?b
m 33 & {a[---]b} a-b a-b
m 34 & {a[]b]c} a]c a]c
m 35 EP {a[\]]b} a]b a]b
f 36 bE {a[\]]b} a]b
m 37 bE {a[\]]b} "a\\]b" "a\\]b"
m 38 eE {a[\]]b} "a\\]b" "a\\]b"
m 39 EP {a[\\]b} "a\\b" "a\\b"
m 40 eE {a[\\]b} "a\\b" "a\\b"
m 41 bE {a[\\]b} "a\\b" "a\\b"
e 42 - {a[\Z]b} EESCAPE
m 43 & {a[[b]c} "a\[c" "a\[c"
m 44 EMP* {a[\u00fe-\u0507][\u00ff-\u0300]b} \
"a\u0102\u02ffb" "a\u0102\u02ffb"
doing 10 "anchors and newlines"
m 1 & ^a a a
f 2 &^ ^a a
i 3 &N ^ a {0 -1}
i 4 & {a$} aba {2 2}
f 5 {&$} {a$} a
i 6 &N {$} ab {2 1}
m 7 &n ^a a a
m 8 &n "^a" "b\na" "a"
i 9 &w "^a" "a\na" {0 0}
i 10 &n^ "^a" "a\na" {2 2}
m 11 &n {a$} a a
m 12 &n "a\$" "a\nb" "a"
i 13 &n "a\$" "a\na" {0 0}
i 14 N ^^ a {0 -1}
m 15 b ^^ ^ ^
i 16 N {$$} a {1 0}
m 17 b {$$} "\$" "\$"
m 18 &N {^$} "" ""
f 19 &N {^$} a
i 20 &nN "^\$" "a\n\nb" {2 1}
m 21 N {$^} "" ""
m 22 b {$^} "\$^" "\$^"
m 23 P {\Aa} a a
m 24 ^P {\Aa} a a
f 25 ^nP {\Aa} "b\na"
m 26 P {a\Z} a a
m 27 {$P} {a\Z} a a
f 28 {$nP} {a\Z} "a\nb"
e 29 - ^* BADRPT
e 30 - {$*} BADRPT
e 31 - {\A*} BADRPT
e 32 - {\Z*} BADRPT
doing 11 "boundary constraints"
m 1 &LP {[[:<:]]a} a a
m 2 &LP {[[:<:]]a} -a a
f 3 &LP {[[:<:]]a} ba
m 4 &LP {a[[:>:]]} a a
m 5 &LP {a[[:>:]]} a- a
f 6 &LP {a[[:>:]]} ab
m 7 bLP {\<a} a a
f 8 bLP {\<a} ba
m 9 bLP {a\>} a a
f 10 bLP {a\>} ab
m 11 LP {\ya} a a
f 12 LP {\ya} ba
m 13 LP {a\y} a a
f 14 LP {a\y} ab
m 15 LP {a\Y} ab a
f 16 LP {a\Y} a-
f 17 LP {a\Y} a
f 18 LP {-\Y} -a
m 19 LP {-\Y} -% -
f 20 LP {\Y-} a-
e 21 - {[[:<:]]*} BADRPT
e 22 - {[[:>:]]*} BADRPT
e 23 b {\<*} BADRPT
e 24 b {\>*} BADRPT
e 25 - {\y*} BADRPT
e 26 - {\Y*} BADRPT
m 27 LP {\ma} a a
f 28 LP {\ma} ba
m 29 LP {a\M} a a
f 30 LP {a\M} ab
f 31 ILP {\Ma} a
f 32 ILP {a\m} a
doing 12 "character classes"
m 1 LP {a\db} a0b a0b
f 2 LP {a\db} axb
f 3 LP {a\Db} a0b
m 4 LP {a\Db} axb axb
m 5 LP "a\\sb" "a b" "a b"
m 6 LP "a\\sb" "a\tb" "a\tb"
m 7 LP "a\\sb" "a\nb" "a\nb"
f 8 LP {a\sb} axb
m 9 LP {a\Sb} axb axb
f 10 LP "a\\Sb" "a b"
m 11 LP {a\wb} axb axb
f 12 LP {a\wb} a-b
f 13 LP {a\Wb} axb
m 14 LP {a\Wb} a-b a-b
m 15 LP {\y\w+z\y} adze-guz guz
m 16 LPE {a[\d]b} a1b a1b
m 17 LPE "a\[\\s]b" "a b" "a b"
m 18 LPE {a[\w]b} axb axb
doing 13 "escapes"
e 1 & "a\\" EESCAPE
m 2 - {a\<b} a<b a<b
m 3 e {a\<b} a<b a<b
m 4 bAS {a\wb} awb awb
m 5 eAS {a\wb} awb awb
m 6 PL "a\\ab" "a\007b" "a\007b"
m 7 P "a\\bb" "a\bb" "a\bb"
m 8 P {a\Bb} "a\\b" "a\\b"
m 9 MP "a\\chb" "a\bb" "a\bb"
m 10 MP "a\\cHb" "a\bb" "a\bb"
m 11 LMP "a\\e" "a\033" "a\033"
m 12 P "a\\fb" "a\fb" "a\fb"
m 13 P "a\\nb" "a\nb" "a\nb"
m 14 P "a\\rb" "a\rb" "a\rb"
m 15 P "a\\tb" "a\tb" "a\tb"
m 16 P "a\\u0008x" "a\bx" "a\bx"
e 17 - {a\u008x} EESCAPE
m 18 P "a\\u00088x" "a\b8x" "a\b8x"
m 19 P "a\\U00000008x" "a\bx" "a\bx"
e 20 - {a\U0000008x} EESCAPE
m 21 P "a\\vb" "a\vb" "a\vb"
m 22 MP "a\\x08x" "a\bx" "a\bx"
e 23 - {a\xq} EESCAPE
m 24 MP "a\\x0008x" "a\bx" "a\bx"
e 25 - {a\z} EESCAPE
m 26 MP "a\\010b" "a\bb" "a\bb"
doing 14 "back references"
# ugh
m 1 RP {a(b*)c\1} abbcbb abbcbb bb
m 2 RP {a(b*)c\1} ac ac ""
f 3 RP {a(b*)c\1} abbcb
m 4 RP {a(b*)\1} abbcbb abb b
m 5 RP {a(b|bb)\1} abbcbb abb b
m 6 RP {a([bc])\1} abb abb b
f 7 RP {a([bc])\1} abc
m 8 RP {a([bc])\1} abcabb abb b
f 9 RP {a([bc])*\1} abc
f 10 RP {a([bc])\1} abB
m 11 iRP {a([bc])\1} abB abB b
m 12 RP {a([bc])\1+} abbb abbb b
m 13 QRP "a(\[bc])\\1{3,4}" abbbb abbbb b
f 14 QRP "a(\[bc])\\1{3,4}" abbb
m 15 RP {a([bc])\1*} abbb abbb b
m 16 RP {a([bc])\1*} ab ab b
m 17 RP {a([bc])(\1*)} ab ab b ""
e 18 - {a((b)\1)} ESUBREG
e 19 - {a(b)c\2} ESUBREG
m 20 bR {a\(b*\)c\1} abbcbb abbcbb bb
doing 15 "octal escapes vs back references"
# initial zero is always octal
m 1 MP "a\\010b" "a\bb" "a\bb"
m 2 MP "a\\0070b" "a\0070b" "a\0070b"
m 3 MP "a\\07b" "a\007b" "a\007b"
m 4 MP "a(b)(b)(b)(b)(b)(b)(b)(b)(b)(b)\\07c" "abbbbbbbbbb\007c" \
"abbbbbbbbbb\007c" "b" "b" "b" "b" "b" "b" \
"b" "b" "b" "b"
# a single digit is always a backref
e 5 - {a\7b} ESUBREG
# otherwise it's a backref only if within range (barf!)
m 6 MP "a\\10b" "a\bb" "a\bb"
m 7 MP {a\101b} aAb aAb
m 8 RP {a(b)(b)(b)(b)(b)(b)(b)(b)(b)(b)\10c} abbbbbbbbbbbc \
abbbbbbbbbbbc b b b b b b b \
b b b
# but we're fussy about border cases -- guys who want octal should use the zero
e 9 - {a((((((((((b\10))))))))))c} ESUBREG
# BREs don't have octal, EREs don't have backrefs
m 10 MP "a\\12b" "a\nb" "a\nb"
e 11 b {a\12b} ESUBREG
m 12 eAS {a\12b} a12b a12b
doing 16 "expanded syntax"
m 1 xP "a b c" "abc" "abc"
m 2 xP "a b #oops\nc\td" "abcd" "abcd"
m 3 x "a\\ b\\\tc" "a b\tc" "a b\tc"
m 4 xP "a b\\#c" "ab#c" "ab#c"
m 5 xP "a b\[c d]e" "ab e" "ab e"
m 6 xP "a b\[c#d]e" "ab#e" "ab#e"
m 7 xP "a b\[c#d]e" "abde" "abde"
m 8 xSPB "ab{ d" "ab\{d" "ab\{d"
m 9 xPQ "ab{ 1 , 2 }c" "abc" "abc"
doing 17 "misc syntax"
m 1 P a(?#comment)b ab ab
doing 18 "unmatchable REs"
f 1 I a^b ab
doing 19 "case independence"
m 1 &i ab Ab Ab
m 2 &i {a[bc]} aC aC
f 3 &i {a[^bc]} aB
m 4 &iM {a[b-d]} aC aC
f 5 &iM {a[^b-d]} aC
doing 20 "directors and embedded options"
e 1 & ***? BADPAT
m 2 q ***? ***? ***?
m 3 &P ***=a*b a*b a*b
m 4 q ***=a*b ***=a*b ***=a*b
m 5 bLP {***:\w+} ab ab
m 6 eLP {***:\w+} ab ab
e 7 & ***:***=a*b BADRPT
m 8 &P ***:(?b)a+b a+b a+b
m 9 P (?b)a+b a+b a+b
e 10 e {(?b)\w+} BADRPT
m 11 bAS {(?b)\w+} (?b)w+ (?b)w+
m 12 iP (?c)a a a
f 13 iP (?c)a A
m 14 APS {(?e)\W+} WW WW
m 15 P (?i)a+ Aa Aa
f 16 P "(?m)a.b" "a\nb"
m 17 P "(?m)^b" "a\nb" "b"
f 18 P "(?n)a.b" "a\nb"
m 19 P "(?n)^b" "a\nb" "b"
f 20 P "(?p)a.b" "a\nb"
f 21 P "(?p)^b" "a\nb"
m 22 P (?q)a+b a+b a+b
m 23 nP "(?s)a.b" "a\nb" "a\nb"
m 24 xP "(?t)a b" "a b" "a b"
m 25 P "(?w)a.b" "a\nb" "a\nb"
m 26 P "(?w)^b" "a\nb" "b"
m 27 P "(?x)a b" "ab" "ab"
e 28 - (?z)ab BADOPT
m 29 P (?ici)a+ Aa Aa
e 30 P (?i)(?q)a+ BADRPT
m 31 P (?q)(?i)a+ (?i)a+ (?i)a+
m 32 P (?qe)a+ a a
m 33 xP "(?q)a b" "a b" "a b"
m 34 P "(?qx)a b" "a b" "a b"
m 35 P (?qi)ab Ab Ab
doing 21 "capturing"
m 1 - a(b)c abc abc b
m 2 P a(?:b)c xabc abc
m 3 - a((b))c xabcy abc b b
m 4 P a(?:(b))c abcy abc b
m 5 P a((?:b))c abc abc b
m 6 P a(?:(?:b))c abc abc
i 7 Q "a(b){0}c" ac {0 1} {-1 -1}
m 8 - a(b)c(d)e abcde abcde b d
m 9 - (b)c(d)e bcde bcde b d
m 10 - a(b)(d)e abde abde b d
m 11 - a(b)c(d) abcd abcd b d
m 12 - (ab)(cd) xabcdy abcd ab cd
m 13 - a(b)?c xabcy abc b
i 14 - a(b)?c xacy {1 2} {-1 -1}
m 15 - a(b)?c(d)?e xabcdey abcde b d
i 16 - a(b)?c(d)?e xacdey {1 4} {-1 -1} {3 3}
i 17 - a(b)?c(d)?e xabcey {1 4} {2 2} {-1 -1}
i 18 - a(b)?c(d)?e xacey {1 3} {-1 -1} {-1 -1}
m 19 - a(b)*c xabcy abc b
i 20 - a(b)*c xabbbcy {1 5} {4 4}
i 21 - a(b)*c xacy {1 2} {-1 -1}
m 22 - a(b*)c xabbbcy abbbc bbb
m 23 - a(b*)c xacy ac ""
f 24 - a(b)+c xacy
m 25 - a(b)+c xabcy abc b
i 26 - a(b)+c xabbbcy {1 5} {4 4}
m 27 - a(b+)c xabbbcy abbbc bbb
i 28 Q "a(b){2,3}c" xabbbcy {1 5} {4 4}
i 29 Q "a(b){2,3}c" xabbcy {1 4} {3 3}
f 30 Q "a(b){2,3}c" xabcy
m 31 LP "\\y(\\w+)\\y" "-- abc-" "abc" "abc"
m 32 - a((b|c)d+)+ abacdbd acdbd bd b
m 33 N (.*).* abc abc abc
m 34 N (a*)* bc "" ""
doing 22 "multicharacter collating elements"
# again ugh
m 1 &+L {a[c]e} ace ace
f 2 &+IL {a[c]h} ach
m 3 &+L {a[[.ch.]]} ach ach
f 4 &+L {a[[.ch.]]} ace
m 5 &+L {a[c[.ch.]]} ac ac
m 6 &+L {a[c[.ch.]]} ace ac
m 7 &+L {a[c[.ch.]]} ache ach
f 8 &+L {a[^c]e} ace
m 9 &+L {a[^c]e} abe abe
m 10 &+L {a[^c]e} ache ache
f 11 &+L {a[^[.ch.]]} ach
m 12 &+L {a[^[.ch.]]} ace ac
m 13 &+L {a[^[.ch.]]} ac ac
m 14 &+L {a[^[.ch.]]} abe ab
f 15 &+L {a[^c[.ch.]]} ach
f 16 &+L {a[^c[.ch.]]} ace
f 17 &+L {a[^c[.ch.]]} ac
m 18 &+L {a[^c[.ch.]]} abe ab
m 19 &+L {a[^b]} ac ac
m 20 &+L {a[^b]} ace ac
m 21 &+L {a[^b]} ach ach
f 22 &+L {a[^b]} abe
doing 23 "lookahead constraints"
m 1 HP a(?=b)b* ab ab
f 2 HP a(?=b)b* a
m 3 HP a(?=b)b*(?=c)c* abc abc
f 4 HP a(?=b)b*(?=c)c* ab
f 5 HP a(?!b)b* ab
m 6 HP a(?!b)b* a a
m 7 HP (?=b)b b b
f 8 HP (?=b)b a
doing 24 "non-greedy quantifiers"
m 1 PT ab+? abb ab
m 2 PT ab+?c abbc abbc
m 3 PT ab*? abb a
m 4 PT ab*?c abbc abbc
m 5 PT ab?? ab a
m 6 PT ab??c abc abc
m 7 PQT "ab{2,4}?" abbbb abb
m 8 PQT "ab{2,4}?c" abbbbc abbbbc
m 9 - 3z* 123zzzz456 3zzzz
m 10 PT 3z*? 123zzzz456 3
m 11 - z*4 123zzzz456 zzzz4
m 12 PT z*?4 123zzzz456 zzzz4
doing 25 "mixed quantifiers"
# this is very incomplete as yet
# should include |
m 1 PNT {^(.*?)(a*)$} xyza xyza xyz a
m 2 PNT {^(.*?)(a*)$} xyzaa xyzaa xyz aa
m 3 PNT {^(.*?)(a*)$} xyz xyz xyz ""
doing 26 "tricky cases"
# attempts to trick the matcher into accepting a short match
m 1 - (week|wee)(night|knights) weeknights weeknights \
wee knights
m 2 RP {a(bc*).*\1} abccbccb abccbccb b
m 3 - {a(b.[bc]*)+} abcbd abcbd bd
doing 27 "implementation misc."
# duplicate arcs are suppressed
m 1 P a(?:b|b)c abc abc
# make color/subcolor relationship go back and forth
m 2 & {[ab][ab][ab]} aba aba
m 3 & {[ab][ab][ab][ab][ab][ab][ab]} abababa abababa
doing 28 "boundary busters etc."
# color-descriptor allocation changes at 10
m 1 & abcdefghijkl abcdefghijkl abcdefghijkl
# so does arc allocation
m 2 P a(?:b|c|d|e|f|g|h|i|j|k|l|m)n agn agn
# subexpression tracking also at 10
m 3 - a(((((((((((((b)))))))))))))c abc abc b b b b b b b b b b b b b
# state-set handling changes slightly at unsigned size (might be 64...)
# (also stresses arc allocation)
m 4 Q "ab{1,100}c" abbc abbc
m 5 Q "ab{1,100}c" abbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbc \
abbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbc
m 6 Q "ab{1,100}c" \
abbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbc \
abbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbc
# force small cache and bust it, several ways
m 7 LP {\w+abcdefgh} xyzabcdefgh xyzabcdefgh
m 8 %LP {\w+abcdefgh} xyzabcdefgh xyzabcdefgh
m 9 %LP {\w+abcdefghijklmnopqrst} xyzabcdefghijklmnopqrst \
xyzabcdefghijklmnopqrst
i 10 %LP {\w+(abcdefgh)?} xyz {0 2} {-1 -1}
i 11 %LP {\w+(abcdefgh)?} xyzabcdefg {0 9} {-1 -1}
i 12 %LP {\w+(abcdefghijklmnopqrst)?} xyzabcdefghijklmnopqrs \
{0 21} {-1 -1}
doing 29 "incomplete matches"
p 1 t def abc {3 2} ""
p 2 t bcd abc {1 2} ""
p 3 t abc abab {0 3} ""
p 4 t abc abdab {3 4} ""
i 5 t abc abc {0 2} {0 2}
i 6 t abc xyabc {2 4} {2 4}
p 7 t abc+ xyab {2 3} ""
i 8 t abc+ xyabc {2 4} {2 4}
knownBug i 9 t abc+ xyabcd {2 4} {6 5}
i 10 t abc+ xyabcdd {2 4} {7 6}
p 11 tPT abc+? xyab {2 3} ""
# the retain numbers in these two may look wrong, but they aren't
i 12 tPT abc+? xyabc {2 4} {5 4}
i 13 tPT abc+? xyabcc {2 4} {6 5}
i 14 tPT abc+? xyabcd {2 4} {6 5}
i 15 tPT abc+? xyabcdd {2 4} {7 6}
i 16 t abcd|bc xyabc {3 4} {2 4}
p 17 tn .*k "xx\nyyy" {3 5} ""
doing 30 "misc. oddities and old bugs"
e 1 & *** BADRPT
m 2 N a?b* abb abb
m 3 N a?b* bb bb
m 4 & a*b aab aab
m 5 & ^a*b aaaab aaaab
m 6 &M {[0-6][1-2][0-3][0-6][1-6][0-6]} 010010 010010
# temporary REG_BOSONLY kludge
m 7 s abc abcd abc
f 8 s abc xabcd
# back to normal stuff
m 9 HLP {(?n)^(?![t#])\S+} "tk\n\n#\n#\nit0" it0
# flush any leftover complaints
doing 0 "flush"
# Tests resulting from bugs reported by users
test reg-31.1 {[[:xdigit:]] behaves correctly when followed by [[:space:]]} {
set str {2:::DebugWin32}
set re {([[:xdigit:]])([[:space:]]*)}
list [regexp $re $str match xdigit spaces] $match $xdigit $spaces
# Code used to produce {1 2:::DebugWin32 2 :::DebugWin32} !!!
} {1 2 2 {}}
test reg-32.1 {canmatch functionality -- at end} {
set pat {blah}
set line "asd asd"
# can match at the final d, if '%' follows
set res [testregexp -xflags -- c $pat $line resvar]
lappend res $resvar
} {0 7}
test reg-32.2 {canmatch functionality -- at end} {
set pat {s%$}
set line "asd asd"
# can only match after the end of the string
set res [testregexp -xflags -- c $pat $line resvar]
lappend res $resvar
} {0 7}
test reg-32.3 {canmatch functionality -- not last char} {
set pat {[^d]%$}
set line "asd asd"
# can only match after the end of the string
set res [testregexp -xflags -- c $pat $line resvar]
lappend res $resvar
} {0 7}
test reg-32.3.1 {canmatch functionality -- no match} {
set pat {\Zx}
set line "asd asd"
# can match the last char, if followed by x
set res [testregexp -xflags -- c $pat $line resvar]
lappend res $resvar
} {0 -1}
test reg-32.4 {canmatch functionality -- last char} {knownBug} {
set pat {.x}
set line "asd asd"
# can match the last char, if followed by x
set res [testregexp -xflags -- c $pat $line resvar]
lappend res $resvar
} {0 6}
test reg-32.4.1 {canmatch functionality -- last char} {knownBug} {
set pat {.x$}
set line "asd asd"
# can match the last char, if followed by x
set res [testregexp -xflags -- c $pat $line resvar]
lappend res $resvar
} {0 6}
test reg-32.5 {canmatch functionality -- last char} {knownBug} {
set pat {.[^d]x$}
set line "asd asd"
# can match the last char, if followed by not-d and x.
set res [testregexp -xflags -- c $pat $line resvar]
lappend res $resvar
} {0 6}
test reg-32.6 {canmatch functionality -- last char} {knownBug} {
set pat {[^a]%[^\r\n]*$}
set line "asd asd"
# can match at the final d, if '%' follows
set res [testregexp -xflags -- c $pat $line resvar]
lappend res $resvar
} {0 6}
test reg-32.7 {canmatch functionality -- last char} {knownBug} {
set pat {[^a]%$}
set line "asd asd"
# can match at the final d, if '%' follows
set res [testregexp -xflags -- c $pat $line resvar]
lappend res $resvar
} {0 6}
test reg-32.8 {canmatch functionality -- last char} {knownBug} {
set pat {[^x]%$}
set line "asd asd"
# can match at the final d, if '%' follows
set res [testregexp -xflags -- c $pat $line resvar]
lappend res $resvar
} {0 6}
test reg-32.9 {canmatch functionality -- more complex case} {knownBug} {
set pat {((\B\B|\Bh+line)[ \t]*|[^\B]%[^\r\n]*)$}
set line "asd asd"
# can match at the final d, if '%' follows
set res [testregexp -xflags -- c $pat $line resvar]
lappend res $resvar
} {0 6}
# Tests reg-33.*: Checks for bug fixes
test reg-33.1 {Bug 230589} {
regexp {[ ]*(^|[^%])%V} "*%V2" m s
} 1
test reg-33.2 {Bug 504785} {
regexp -inline {([^_.]*)([^.]*)\.(..)(.).*} bbcos_001_c01.q1la
} {bbcos_001_c01.q1la bbcos _001_c01 q1 l}
test reg-33.3 {Bug 505048} {
regexp {\A\s*[^<]*\s*<([^>]+)>} a<a>
} 1
test reg-33.4 {Bug 505048} {
regexp {\A\s*([^b]*)b} ab
} 1
test reg-33.5 {Bug 505048} {
regexp {\A\s*[^b]*(b)} ab
} 1
test reg-33.6 {Bug 505048} {
regexp {\A(\s*)[^b]*(b)} ab
} 1
test reg-33.7 {Bug 505048} {
regexp {\A\s*[^b]*b} ab
} 1
test reg-33.8 {Bug 505048} {
regexp -inline {\A\s*[^b]*b} ab
} ab
test reg-33.9 {Bug 505048} {
regexp -indices -inline {\A\s*[^b]*b} ab
} {{0 1}}
test reg-33.10 {Bug 840258} {
regsub {(^|\n)+\.*b} \n.b {} tmp
} 1
test reg-33.11 {Bug 840258} {
regsub {(^|[\n\r]+)\.*\?<.*?(\n|\r)+} \
"TQ\r\n.?<5000267>Test already stopped\r\n" {} tmp
} 1
# cleanup
::tcltest::cleanupTests
return