Update.

* posix/Makefile: Add rules to build and run tst-rxspencer. (distribute): Add rxspencer/tests and rxspencer/COPYRIGHT. * posix/tst-rxspencer.c: New file. * posix/rxspencer/tests: New file. * posix/rxspencer/COPYRIGHT: New file. Patch mostly by Jakub Jelinek.
2024-11-21 20:40:05 +00:00 · 2003-11-13 20:52:55 +00:00 · 2003-11-13 20:52:55 +00:00 · 78c81ab7b4
commit 78c81ab7b4
parent 78d8b07a44
5 changed files with 1051 additions and 2 deletions
--- a/7
+++ b/7
@ -1,5 +1,12 @@
 2003-11-13  Ulrich Drepper  <drepper@redhat.com>
 	* posix/Makefile: Add rules to build and run tst-rxspencer.
 	(distribute): Add rxspencer/tests and rxspencer/COPYRIGHT.
 	* posix/tst-rxspencer.c: New file.
 	* posix/rxspencer/tests: New file.
 	* posix/rxspencer/COPYRIGHT: New file.
 	Patch mostly by Jakub Jelinek.
 	* posix/regcomp.c (parse_bracket_exp): Don't check for range if
 	this is no option given the first token.
--- a/posix/Makefile
+++ b/posix/Makefile
@ -34,7 +34,7 @@ distribute := confstr.h TESTS TESTS2C.sed testcases.h \
 	      PTESTS PTESTS2C.sed ptestcases.h \
 	      globtest.c globtest.sh wordexp-tst.sh annexc.c fnmatch_loop.c   \
 	      spawn_int.h tst-getconf.sh regcomp.c regexec.c regex_internal.c \
-	      regex_internal.h fork.h
+	      regex_internal.h fork.h rxspencer/tests rxspencer/COPYRIGHT
 routines :=								      \
 	uname								      \
@ -78,7 +78,7 @@ tests		:= tstgetopt testfnm runtests runptests	     \
 		   bug-regex8 bug-regex9 bug-regex10 bug-regex11 bug-regex12 \
 		   bug-regex13 bug-regex14 bug-regex15 bug-regex16 \
 		   bug-regex17 bug-regex18 bug-regex19 bug-regex20 \
-		   tst-nice tst-nanosleep transbug
+		   tst-nice tst-nanosleep transbug tst-rxspencer
 ifeq (yes,$(build-shared))
 test-srcs	:= globtest
 tests           += wordexp-test tst-exec tst-spawn
@ -147,6 +147,7 @@ tst-exec-ARGS = -- $(built-program-cmd)
 tst-spawn-ARGS = -- $(built-program-cmd)
 tst-dir-ARGS = `pwd` `cd $(common-objdir)/$(subdir); pwd` `cd $(common-objdir); pwd` $(objpfx)tst-dir
 tst-chmod-ARGS = `pwd`
 tst-rxspencer-ARGS = rxspencer/tests
 tst-fnmatch-ENV = LOCPATH=$(common-objpfx)localedata
 tst-regexloc-ENV = LOCPATH=$(common-objpfx)localedata
--- a/posix/rxspencer/COPYRIGHT
+++ b/posix/rxspencer/COPYRIGHT
@ -0,0 +1,20 @@
 Copyright 1992, 1993, 1994, 1997 Henry Spencer.  All rights reserved.
 This software is not subject to any license of the American Telephone
 and Telegraph Company or of the Regents of the University of California.
 Permission is granted to anyone to use this software for any purpose on
 any computer system, and to alter it and redistribute it, subject
 to the following restrictions:
 1. The author is not responsible for the consequences of use of this
   software, no matter how awful, even if they arise from flaws in it.
 2. The origin of this software must not be misrepresented, either by
   explicit claim or by omission.  Since few users ever read sources,
   credits must appear in the documentation.
 3. Altered versions must be plainly marked as such, and must not be
   misrepresented as being the original software.  Since few users
   ever read sources, credits must appear in the documentation.
 4. This notice may not be removed or altered.
--- a/posix/rxspencer/tests
+++ b/posix/rxspencer/tests
@ -0,0 +1,506 @@
 # regular expression test set
 # Lines are at least three fields, separated by one or more tabs.  "" stands
 # for an empty field.  First field is an RE.  Second field is flags.  If
 # C flag given, regcomp() is expected to fail, and the third field is the
 # error name (minus the leading REG_).
 #
 # Otherwise it is expected to succeed, and the third field is the string to
 # try matching it against.  If there is no fourth field, the match is
 # expected to fail.  If there is a fourth field, it is the substring that
 # the RE is expected to match.  If there is a fifth field, it is a comma-
 # separated list of what the subexpressions should match, with - indicating
 # no match for that one.  In both the fourth and fifth fields, a (sub)field
 # starting with @ indicates that the (sub)expression is expected to match
 # a null string followed by the stuff after the @; this provides a way to
 # test where null strings match.  The character `N' in REs and strings
 # is newline, `S' is space, `T' is tab, `Z' is NUL.
 #
 # The full list of flags:
 #	-	placeholder, does nothing
 #	b	RE is a BRE, not an ERE
 #	&	try it as both an ERE and a BRE
 #	C	regcomp() error expected, third field is error name
 #	i	REG_ICASE
 #	m	("mundane") REG_NOSPEC
 #	s	REG_NOSUB (not really testable)
 #	n	REG_NEWLINE
 #	^	REG_NOTBOL
 #	$	REG_NOTEOL
 #	#	REG_STARTEND (see below)
 #	p	REG_PEND
 #
 # For REG_STARTEND, the start/end offsets are those of the substring
 # enclosed in ().
 # basics
 a		&	a	a
 abc		&	abc	abc
 abc|de		-	abc	abc
 a|b|c		-	abc	a
 # parentheses and perversions thereof
 a(b)c		-	abc	abc
 a\(b\)c		b	abc	abc
 a(		C	EPAREN
 a(		b	a(	a(
 a\(		-	a(	a(
 a\(		bC	EPAREN
 a\(b		bC	EPAREN
 a(b		C	EPAREN
 a(b		b	a(b	a(b
 # gag me with a right parenthesis -- 1003.2 goofed here (my fault, partly)
 a)		-	a)	a)
 )		-	)	)
 # end gagging (in a just world, those *should* give EPAREN)
 a)		b	a)	a)
 a\)		bC	EPAREN
 \)		bC	EPAREN
 a()b		-	ab	ab
 a\(\)b		b	ab	ab
 # anchoring and REG_NEWLINE
 ^abc$		&	abc	abc
 a^b		-	a^b
 a^b		b	a^b	a^b
 a$b		-	a$b
 a$b		b	a$b	a$b
 ^		&	abc	@abc
 $		&	abc	@
 ^$		&	""	@
 $^		-	""	@
 \($\)\(^\)	b	""	@
 # stop retching, those are legitimate (although disgusting)
 ^^		-	""	@
 $$		-	""	@
 b$		&	abNc
 b$		&n	abNc	b
 ^b$		&	aNbNc
 ^b$		&n	aNbNc	b
 ^$		&n	aNNb	@Nb
 ^$		n	abc
 ^$		n	abcN	@
 $^		n	aNNb	@Nb
 \($\)\(^\)	bn	aNNb	@Nb
 ^^		n^	aNNb	@Nb
 $$		n	aNNb	@NN
 ^a		^	a
 a$		$	a
 ^a		^n	aNb
 ^b		^n	aNb	b
 a$		$n	bNa
 b$		$n	bNa	b
 a*(^b$)c*	-	b	b
 a*\(^b$\)c*	b	b	b
 # certain syntax errors and non-errors
 |		C	EMPTY
 |		b	|	|
 *		C	BADRPT
 *		b	*	*
 +		C	BADRPT
 ?		C	BADRPT
 ""		&C	EMPTY
 ()		-	abc	@abc
 \(\)		b	abc	@abc
 a||b		C	EMPTY
 |ab		C	EMPTY
 ab|		C	EMPTY
 (|a)b		C	EMPTY
 (a|)b		C	EMPTY
 (*a)		C	BADRPT
 (+a)		C	BADRPT
 (?a)		C	BADRPT
 ({1}a)		C	BADRPT
 \(\{1\}a\)	bC	BADRPT
 (a|*b)		C	BADRPT
 (a|+b)		C	BADRPT
 (a|?b)		C	BADRPT
 (a|{1}b)	C	BADRPT
 ^*		C	BADRPT
 ^*		b	*	*
 ^+		C	BADRPT
 ^?		C	BADRPT
 ^{1}		C	BADRPT
 ^\{1\}		bC	BADRPT
 # metacharacters, backslashes
 a.c		&	abc	abc
 a[bc]d		&	abd	abd
 a\*c		&	a*c	a*c
 a\\b		&	a\b	a\b
 a\\\*b		&	a\*b	a\*b
 # The following test is wrong.  Using \b in an BRE or ERE is undefined.
 # a\bc		&	abc	abc
 a\		&C	EESCAPE
 a\\bc		&	a\bc	a\bc
 \{		bC	BADRPT
 a\[b		&	a[b	a[b
 a[b		&C	EBRACK
 # trailing $ is a peculiar special case for the BRE code
 a$		&	a	a
 a$		&	a$
 a\$		&	a
 a\$		&	a$	a$
 a\\$		&	a
 a\\$		&	a$
 a\\$		&	a\$
 a\\$		&	a\	a\
 # back references, ugh
 a\(b\)\2c	bC	ESUBREG
 a\(b\1\)c	bC	ESUBREG
 a\(b*\)c\1d	b	abbcbbd	abbcbbd	bb
 a\(b*\)c\1d	b	abbcbd
 a\(b*\)c\1d	b	abbcbbbd
 ^\(.\)\1	b	abc
 a\([bc]\)\1d	b	abcdabbd	abbd	b
 a\(\([bc]\)\2\)*d	b	abbccd	abbccd
 a\(\([bc]\)\2\)*d	b	abbcbd
 # actually, this next one probably ought to fail, but the spec is unclear
 a\(\(b\)*\2\)*d		b	abbbd	abbbd
 # here is a case that no NFA implementation does right
 \(ab*\)[ab]*\1	b	ababaaa	ababaaa	a
 # check out normal matching in the presence of back refs
 \(a\)\1bcd	b	aabcd	aabcd
 \(a\)\1bc*d	b	aabcd	aabcd
 \(a\)\1bc*d	b	aabd	aabd
 \(a\)\1bc*d	b	aabcccd	aabcccd
 \(a\)\1bc*[ce]d	b	aabcccd	aabcccd
 ^\(a\)\1b\(c\)*cd$	b	aabcccd	aabcccd
 # ordinary repetitions
 ab*c		&	abc	abc
 ab+c		-	abc	abc
 ab?c		-	abc	abc
 a\(*\)b		b	a*b	a*b
 a\(**\)b	b	ab	ab
 a\(***\)b	bC	BADRPT
 *a		b	*a	*a
 **a		b	a	a
 ***a		bC	BADRPT
 # the dreaded bounded repetitions
 # The following two tests are not correct:
 #{		&	{	{
 #{abc		&	{abc	{abc
 # '{' is always a special char outside bracket expressions.  So test ony BRE:
 {		b	{	{
 {abc		b	{abc	{abc
 {1		C	BADRPT
 {1}		C	BADRPT
 # Same reason as for the two tests above:
 #a{b		&	a{b	a{b
 a{b		b	a{b	a{b
 a{1}b		-	ab	ab
 a\{1\}b		b	ab	ab
 a{1,}b		-	ab	ab
 a\{1,\}b	b	ab	ab
 a{1,2}b		-	aab	aab
 a\{1,2\}b	b	aab	aab
 a{1		C	EBRACE
 a\{1		bC	EBRACE
 a{1a		C	EBRACE
 a\{1a		bC	EBRACE
 a{1a}		C	BADBR
 a\{1a\}		bC	BADBR
 # These four tests checks for undefined behavior.  Our implementation does
 # something different.
 #a{,2}		-	a{,2}	a{,2}
 #a\{,2\}		bC	BADBR
 #a{,}		-	a{,}	a{,}
 #a\{,\}		bC	BADBR
 a{1,x}		C	BADBR
 a\{1,x\}	bC	BADBR
 a{1,x		C	EBRACE
 a\{1,x		bC	EBRACE
 # These two tests probably fails due to an arbitrary limit on the number of
 # repetitions in the other implementation.
 #a{300}		C	BADBR
 #a\{300\}	bC	BADBR
 a{1,0}		C	BADBR
 a\{1,0\}	bC	BADBR
 ab{0,0}c	-	abcac	ac
 ab\{0,0\}c	b	abcac	ac
 ab{0,1}c	-	abcac	abc
 ab\{0,1\}c	b	abcac	abc
 ab{0,3}c	-	abbcac	abbc
 ab\{0,3\}c	b	abbcac	abbc
 ab{1,1}c	-	acabc	abc
 ab\{1,1\}c	b	acabc	abc
 ab{1,3}c	-	acabc	abc
 ab\{1,3\}c	b	acabc	abc
 ab{2,2}c	-	abcabbc	abbc
 ab\{2,2\}c	b	abcabbc	abbc
 ab{2,4}c	-	abcabbc	abbc
 ab\{2,4\}c	b	abcabbc	abbc
 ((a{1,10}){1,10}){1,10}	-	a	a	a,a
 # multiple repetitions
 # Wow, there is serious disconnect here.  The ERE grammar is like this:
 # ERE_expression : one_char_or_coll_elem_ERE
 #                | '^'
 #                | '$'
 #                | '(' extended_reg_exp ')'
 #                | ERE_expression ERE_dupl_symbol
 #                ;
 # where ERE_dupl_symbol is any of the repetition methods.  It is clear from
 # this that consecutive repetition is OK.  On top of this, the one test not
 # marked as failing must fail.  For BREs the situation is different, so we
 # use the four tests.
 #a**		&C	BADRPT
 a**		bC	BADRPT
 #a++		C	BADRPT
 #a??		C	BADRPT
 #a*+		C	BADRPT
 #a*?		C	BADRPT
 #a+*		C	BADRPT
 #a+?		C	BADRPT
 #a?*		C	BADRPT
 #a?+		C	BADRPT
 #a{1}{1}		C	BADRPT
 #a*{1}		C	BADRPT
 #a+{1}		C	BADRPT
 #a?{1}		C	BADRPT
 #a{1}*		C	BADRPT
 #a{1}+		C	BADRPT
 #a{1}?		C	BADRPT
 #a*{b}		-	a{b}	a{b}
 a\{1\}\{1\}	bC	BADRPT
 a*\{1\}		bC	BADRPT
 a\{1\}*		bC	BADRPT
 # brackets, and numerous perversions thereof
 a[b]c		&	abc	abc
 a[ab]c		&	abc	abc
 a[^ab]c		&	adc	adc
 a[]b]c		&	a]c	a]c
 a[[b]c		&	a[c	a[c
 a[-b]c		&	a-c	a-c
 a[^]b]c		&	adc	adc
 a[^-b]c		&	adc	adc
 a[b-]c		&	a-c	a-c
 a[b		&C	EBRACK
 a[]		&C	EBRACK
 a[1-3]c		&	a2c	a2c
 a[3-1]c		&C	ERANGE
 a[1-3-5]c	&C	ERANGE
 a[[.-.]--]c	&	a-c	a-c
 # I don't thing the error value should be ERANGE since a[1-] would be
 # valid, too.  Expect EBRACK.
 #a[1-		&C	ERANGE
 a[1-		&C	EBRACK
 a[[.		&C	EBRACK
 a[[.x		&C	EBRACK
 a[[.x.		&C	EBRACK
 a[[.x.]		&C	EBRACK
 a[[.x.]]	&	ax	ax
 a[[.x,.]]	&C	ECOLLATE
 # XXX Doesn't work yet.
 # a[[.one.]]b	&	a1b	a1b
 a[[.notdef.]]b	&C	ECOLLATE
 a[[.].]]b	&	a]b	a]b
 a[[:alpha:]]c	&	abc	abc
 a[[:notdef:]]c	&C	ECTYPE
 a[[:		&C	EBRACK
 a[[:alpha	&C	EBRACK
 a[[:alpha:]	&C	EBRACK
 a[[:alpha,:]	&C	ECTYPE
 a[[:]:]]b	&C	ECTYPE
 a[[:-:]]b	&C	ECTYPE
 a[[:alph:]]	&C	ECTYPE
 a[[:alphabet:]]	&C	ECTYPE
 [[:alnum:]]+	-	-%@a0X-	a0X
 [[:alpha:]]+	-	-%@aX0-	aX
 [[:blank:]]+	-	aSSTb	SST
 [[:cntrl:]]+	-	aNTb	NT
 [[:digit:]]+	-	a019b	019
 [[:graph:]]+	-	Sa%bS	a%b
 [[:lower:]]+	-	AabC	ab
 [[:print:]]+	-	NaSbN	aSb
 [[:punct:]]+	-	S%-&T	%-&
 [[:space:]]+	-	aSNTb	SNT
 [[:upper:]]+	-	aBCd	BC
 [[:xdigit:]]+	-	p0f3Cq	0f3C
 a[[=b=]]c	&	abc	abc
 a[[=		&C	EBRACK
 a[[=b		&C	EBRACK
 a[[=b=		&C	EBRACK
 a[[=b=]		&C	EBRACK
 a[[=b,=]]	&C	ECOLLATE
 # XXX Doesn't work yet.
 #a[[=one=]]b	&	a1b	a1b
 # complexities
 a(((b)))c	-	abc	abc
 a(b|(c))d	-	abd	abd
 a(b*|c)d	-	abbd	abbd
 # just gotta have one DFA-buster, of course
 a[ab]{20}	-	aaaaabaaaabaaaabaaaab	aaaaabaaaabaaaabaaaab
 # and an inline expansion in case somebody gets tricky
 a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab]	-	aaaaabaaaabaaaabaaaab	aaaaabaaaabaaaabaaaab
 # and in case somebody just slips in an NFA...
 a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab](wee|week)(knights|night)	-	aaaaabaaaabaaaabaaaabweeknights	aaaaabaaaabaaaabaaaabweeknights
 # fish for anomalies as the number of states passes 32
 12345678901234567890123456789	-	a12345678901234567890123456789b	12345678901234567890123456789
 123456789012345678901234567890	-	a123456789012345678901234567890b	123456789012345678901234567890
 1234567890123456789012345678901	-	a1234567890123456789012345678901b	1234567890123456789012345678901
 12345678901234567890123456789012	-	a12345678901234567890123456789012b	12345678901234567890123456789012
 123456789012345678901234567890123	-	a123456789012345678901234567890123b	123456789012345678901234567890123
 # and one really big one, beyond any plausible word width
 1234567890123456789012345678901234567890123456789012345678901234567890	-	a1234567890123456789012345678901234567890123456789012345678901234567890b	1234567890123456789012345678901234567890123456789012345678901234567890
 # fish for problems as brackets go past 8
 [ab][cd][ef][gh][ij][kl][mn]	-	xacegikmoq	acegikm
 [ab][cd][ef][gh][ij][kl][mn][op]	-	xacegikmoq	acegikmo
 [ab][cd][ef][gh][ij][kl][mn][op][qr]	-	xacegikmoqy	acegikmoq
 [ab][cd][ef][gh][ij][kl][mn][op][q]	-	xacegikmoqy	acegikmoq
 # subtleties of matching
 abc		&	xabcy	abc
 a\(b\)?c\1d	b	acd
 aBc		i	Abc	Abc
 a[Bc]*d		i	abBCcd	abBCcd
 0[[:upper:]]1	&i	0a1	0a1
 0[[:lower:]]1	&i	0A1	0A1
 a[^b]c		&i	abc
 a[^b]c		&i	aBc
 a[^b]c		&i	adc	adc
 [a]b[c]		-	abc	abc
 [a]b[a]		-	aba	aba
 [abc]b[abc]	-	abc	abc
 [abc]b[abd]	-	abd	abd
 a(b?c)+d	-	accd	accd
 (wee|week)(knights|night)	-	weeknights	weeknights
 (we|wee|week|frob)(knights|night|day)	-	weeknights	weeknights
 a[bc]d		-	xyzaaabcaababdacd	abd
 a[ab]c		-	aaabc	abc
 abc		s	abc	abc
 a*		&	b	@b
 # Let's have some fun -- try to match a C comment.
 # first the obvious, which looks okay at first glance...
 /\*.*\*/	-	/*x*/	/*x*/
 # but...
 /\*.*\*/	-	/*x*/y/*z*/	/*x*/y/*z*/
 # okay, we must not match */ inside; try to do that...
 /\*([^*]|\*[^/])*\*/	-	/*x*/	/*x*/
 /\*([^*]|\*[^/])*\*/	-	/*x*/y/*z*/	/*x*/
 # but...
 /\*([^*]|\*[^/])*\*/	-	/*x**/y/*z*/	/*x**/y/*z*/
 # and a still fancier version, which does it right (I think)...
 /\*([^*]|\*+[^*/])*\*+/	-	/*x*/	/*x*/
 /\*([^*]|\*+[^*/])*\*+/	-	/*x*/y/*z*/	/*x*/
 /\*([^*]|\*+[^*/])*\*+/	-	/*x**/y/*z*/	/*x**/
 /\*([^*]|\*+[^*/])*\*+/	-	/*x****/y/*z*/	/*x****/
 /\*([^*]|\*+[^*/])*\*+/	-	/*x**x*/y/*z*/	/*x**x*/
 /\*([^*]|\*+[^*/])*\*+/	-	/*x***x/y/*z*/	/*x***x/y/*z*/
 # subexpressions
 .*		-	abc	abc	-
 a(b)(c)d	-	abcd	abcd	b,c
 a(((b)))c	-	abc	abc	b,b,b
 a(b|(c))d	-	abd	abd	b,-
 a(b*|c|e)d	-	abbd	abbd	bb
 a(b*|c|e)d	-	acd	acd	c
 a(b*|c|e)d	-	ad	ad	@d
 a(b?)c		-	abc	abc	b
 a(b?)c		-	ac	ac	@c
 a(b+)c		-	abc	abc	b
 a(b+)c		-	abbbc	abbbc	bbb
 a(b*)c		-	ac	ac	@c
 (a|ab)(bc([de]+)f|cde)	-	abcdef	abcdef	a,bcdef,de
 # the regression tester only asks for 9 subexpressions
 a(b)(c)(d)(e)(f)(g)(h)(i)(j)k	-	abcdefghijk	abcdefghijk	b,c,d,e,f,g,h,i,j
 a(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)l	-	abcdefghijkl	abcdefghijkl	b,c,d,e,f,g,h,i,j,k
 a([bc]?)c	-	abc	abc	b
 a([bc]?)c	-	ac	ac	@c
 a([bc]+)c	-	abc	abc	b
 a([bc]+)c	-	abcc	abcc	bc
 a([bc]+)bc	-	abcbc	abcbc	bc
 a(bb+|b)b	-	abb	abb	b
 a(bbb+|bb+|b)b	-	abb	abb	b
 a(bbb+|bb+|b)b	-	abbb	abbb	bb
 a(bbb+|bb+|b)bb	-	abbb	abbb	b
 (.*).*		-	abcdef	abcdef	abcdef
 (a*)*		-	bc	@b	@b
 # do we get the right subexpression when it is used more than once?
 a(b|c)*d	-	ad	ad	-
 a(b|c)*d	-	abcd	abcd	c
 a(b|c)+d	-	abd	abd	b
 a(b|c)+d	-	abcd	abcd	c
 a(b|c?)+d	-	ad	ad	@d
 a(b|c?)+d	-	abcd	abcd	@d
 a(b|c){0,0}d	-	ad	ad	-
 a(b|c){0,1}d	-	ad	ad	-
 a(b|c){0,1}d	-	abd	abd	b
 a(b|c){0,2}d	-	ad	ad	-
 a(b|c){0,2}d	-	abcd	abcd	c
 a(b|c){0,}d	-	ad	ad	-
 a(b|c){0,}d	-	abcd	abcd	c
 a(b|c){1,1}d	-	abd	abd	b
 a(b|c){1,1}d	-	acd	acd	c
 a(b|c){1,2}d	-	abd	abd	b
 a(b|c){1,2}d	-	abcd	abcd	c
 a(b|c){1,}d	-	abd	abd	b
 a(b|c){1,}d	-	abcd	abcd	c
 a(b|c){2,2}d	-	acbd	acbd	b
 a(b|c){2,2}d	-	abcd	abcd	c
 a(b|c){2,4}d	-	abcd	abcd	c
 a(b|c){2,4}d	-	abcbd	abcbd	b
 a(b|c){2,4}d	-	abcbcd	abcbcd	c
 a(b|c){2,}d	-	abcd	abcd	c
 a(b|c){2,}d	-	abcbd	abcbd	b
 a(b+|((c)*))+d	-	abd	abd	@d,@d,-
 # XXX Needs to be checked.
 #a(b+|((c)*))+d	-	abcd	abcd	@d,@d,-
 # check out the STARTEND option
 [abc]		&#	a(b)c	b
 [abc]		&#	a(d)c
 [abc]		&#	a(bc)d	b
 [abc]		&#	a(dc)d	c
 .		&#	a()c
 b.*c		&#	b(bc)c	bc
 b.*		&#	b(bc)c	bc
 .*c		&#	b(bc)c	bc
 # plain strings, with the NOSPEC flag
 abc		m	abc	abc
 abc		m	xabcy	abc
 abc		m	xyz
 a*b		m	aba*b	a*b
 a*b		m	ab
 ""		mC	EMPTY
 # cases involving NULs
 aZb		&	a	a
 aZb		&p	a
 aZb		&p#	(aZb)	aZb
 aZ*b		&p#	(ab)	ab
 a.b		&#	(aZb)	aZb
 a.*		&#	(aZb)c	aZb
 # word boundaries (ick)
 [[:<:]]a	&	a	a
 [[:<:]]a	&	ba
 [[:<:]]a	&	-a	a
 a[[:>:]]	&	a	a
 a[[:>:]]	&	ab
 a[[:>:]]	&	a-	a
 [[:<:]]a.c[[:>:]]	&	axcd-dayc-dazce-abc	abc
 [[:<:]]a.c[[:>:]]	&	axcd-dayc-dazce-abc-q	abc
 [[:<:]]a.c[[:>:]]	&	axc-dayc-dazce-abc	axc
 [[:<:]]b.c[[:>:]]	&	a_bxc-byc_d-bzc-q	bzc
 [[:<:]].x..[[:>:]]	&	y_xa_-_xb_y-_xc_-axdc	_xc_
 [[:<:]]a_b[[:>:]]	&	x_a_b
 # past problems, and suspected problems
 (A[1])|(A[2])|(A[3])|(A[4])|(A[5])|(A[6])|(A[7])|(A[8])|(A[9])|(A[A])	-	A1	A1
 abcdefghijklmnop	i	abcdefghijklmnop	abcdefghijklmnop
 abcdefghijklmnopqrstuv	i	abcdefghijklmnopqrstuv	abcdefghijklmnopqrstuv
 (ALAK)|(ALT[AB])|(CC[123]1)|(CM[123]1)|(GAMC)|(LC[23][EO ])|(SEM[1234])|(SL[ES][12])|(SLWW)|(SLF )|(SLDT)|(VWH[12])|(WH[34][EW])|(WP1[ESN])	-	CC11	CC11
 CC[13]1|a{21}[23][EO][123][Es][12]a{15}aa[34][EW]aaaaaaa[X]a	-	CC11	CC11
 Char \([a-z0-9_]*\)\[.*	b	Char xyz[k	Char xyz[k	xyz
 a?b	-	ab	ab
 -\{0,1\}[0-9]*$	b	-5	-5
 a*a*a*a*a*a*a*	&	aaaaaa	aaaaaa
--- a/posix/tst-rxspencer.c
+++ b/posix/tst-rxspencer.c
@ -0,0 +1,515 @@
 /* Regular expression tests.
   Copyright (C) 2003 Free Software Foundation, Inc.
   This file is part of the GNU C Library.
   Contributed by Jakub Jelinek <jakub@redhat.com>, 2003.
   The GNU C Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 2.1 of the License, or (at your option) any later version.
   The GNU C Library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.
   You should have received a copy of the GNU Lesser General Public
   License along with the GNU C Library; if not, write to the Free
   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
   02111-1307 USA.  */
 #include <sys/types.h>
 #include <mcheck.h>
 #include <regex.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <locale.h>
 #include <getopt.h>
 static void
 replace_special_chars (char *str)
 {
  for (; (str = strpbrk (str, "NTSZ")) != NULL; ++str)
    switch (*str)
      {
      case 'N': *str = '\n'; break;
      case 'T': *str = '\t'; break;
      case 'S': *str = ' '; break;
      case 'Z': *str = '\0'; break;
      }
 }
 static void
 glibc_re_syntax (char *str)
 {
  char *p, *end = strchr (str, '\0') + 1;
  /* Replace [[:<:]] with \< and [[:>:]] with \>.  */
  for (p = str; (p = strstr (p, "[[:")) != NULL; )
    if ((p[3] == '<' || p[3] == '>') && strncmp (p + 4, ":]]", 3) == 0)
      {
        p[0] = '\\';
        p[1] = p[3];
        memmove (p + 2, p + 7, end - p - 7);
        end -= 5;
        p += 2;
      }
    else
      p += 3;
 }
 static char *
 mb_replace (char *dst, const char c)
 {
  switch (c)
    {
    /* Replace a with \'a and A with \'A.  */
    case 'a':
      *dst++ = '\xc3';
      *dst++ = '\xa1';
      break;
    case 'A':
      *dst++ = '\xc3';
      *dst++ = '\x81';
      break;
    /* Replace b with \v{c} and B with \v{C}.  */
    case 'b':
      *dst++ = '\xc4';
      *dst++ = '\x8d';
      break;
    case 'B':
      *dst++ = '\xc4';
      *dst++ = '\x8c';
      break;
    /* Replace c with \v{d} and C with \v{D}.  */
    case 'c':
      *dst++ = '\xc4';
      *dst++ = '\x8f';
      break;
    case 'C':
      *dst++ = '\xc4';
      *dst++ = '\x8e';
      break;
    /* Replace d with \'e and D with \'E.  */
    case 'd':
      *dst++ = '\xc3';
      *dst++ = '\xa9';
      break;
    case 'D':
      *dst++ = '\xc3';
      *dst++ = '\x89';
      break;
    }
  return dst;
 }
 static char *
 mb_frob_string (const char *str, const char *letters)
 {
  char *ret, *dst;
  const char *src;
  if (str == NULL)
    return NULL;
  ret = malloc (2 * strlen (str) + 1);
  if (ret == NULL)
    return NULL;
  for (src = str, dst = ret; *src; ++src)
    if (strchr (letters, *src))
      dst = mb_replace (dst, *src);
    else
      *dst++ = *src;
  *dst = '\0';
  return ret;
 }
 /* Like mb_frob_string, but don't replace anything between
   [: and :], [. and .] or [= and =].  */
 static char *
 mb_frob_pattern (const char *str, const char *letters)
 {
  char *ret, *dst;
  const char *src;
  int in_class = 0;
  if (str == NULL)
    return NULL;
  ret = malloc (2 * strlen (str) + 1);
  if (ret == NULL)
    return NULL;
  for (src = str, dst = ret; *src; ++src)
    if (!in_class && strchr (letters, *src))
      dst = mb_replace (dst, *src);
    else
      {
 	if (!in_class && *src == '[' && strchr (":.=", src[1]))
 	  in_class = 1;
 	else if (in_class && *src == ']' && strchr (":.=", src[-1]))
 	  in_class = 0;
 	*dst++ = *src;
      }
  *dst = '\0';
  return ret;
 }
 static int
 check_match (regmatch_t *rm, int idx, const char *string,
 	     const char *match, const char *fail)
 {
  if (match[0] == '-' && match[1] == '\0')
    {
      if (rm[idx].rm_so == -1 && rm[idx].rm_eo == -1)
 	return 0;
      printf ("%s rm[%d] unexpectedly matched\n", fail, idx);
      return 1;
    }
  if (rm[idx].rm_so == -1 || rm[idx].rm_eo == -1)
    {
      printf ("%s rm[%d] unexpectedly did not match\n", fail, idx);
      return 1;
    }
  if (match[0] == '@')
    {
      if (rm[idx].rm_so != rm[idx].rm_eo)
 	{
 	  printf ("%s rm[%d] not empty\n", fail, idx);
 	  return 1;
 	}
      if (strncmp (string + rm[idx].rm_so, match + 1, strlen (match + 1)))
 	{
 	  printf ("%s rm[%d] not matching %s\n", fail, idx, match);
 	  return 1;
 	}
      return 0;
    }
  if (rm[idx].rm_eo - rm[idx].rm_so != strlen (match)
      || strncmp (string + rm[idx].rm_so, match,
 		  rm[idx].rm_eo - rm[idx].rm_so))
    {
      printf ("%s rm[%d] not matching %s\n", fail, idx, match);
      return 1;
    }
  return 0;
 }
 static int
 test (const char *pattern, int cflags, const char *string, int eflags,
      char *expect, char *matches, const char *fail)
 {
  regex_t re;
  regmatch_t rm[10];
  int n, ret = 0;
  n = regcomp (&re, pattern, cflags);
  if (n != 0)
    {
      if (eflags == -1)
 	{
 	  static struct { reg_errcode_t code; const char *name; } codes []
 #define C(x) { REG_##x, #x }
 	    = { C(NOERROR), C(NOMATCH), C(BADPAT), C(ECOLLATE),
 		C(ECTYPE), C(EESCAPE), C(ESUBREG), C(EBRACK),
 		C(EPAREN), C(EBRACE), C(BADBR), C(ERANGE),
 		C(ESPACE), C(BADRPT) };
 	  for (int i = 0; i < sizeof (codes) / sizeof (codes[0]); ++i)
 	    if (n == codes[i].code)
 	      {
 		if (strcmp (string, codes[i].name))
 		  {
 		    printf ("%s regcomp returned REG_%s (expected REG_%s)\n",
 			    fail, codes[i].name, string);
 		    return 1;
 		  }
 	        return 0;
 	      }
 	  printf ("%s regcomp return value REG_%d\n", fail, n);
 	  return 1;
 	}
      char buf[500];
      regerror (n, &re, buf, sizeof (buf));
      printf ("%s regcomp failed: %s\n", fail, buf);
      return 1;
    }
  if (eflags == -1)
    {
      regfree (&re);
      /* The test case file assumes something only guaranteed by the
 	 rxspencer regex implementation.  Namely that for empty
 	 expressions regcomp() return REG_EMPTY.  This is not the case
 	 for us and so we ignore this error.  */
      if (strcmp (string, "EMPTY") == 0)
 	return 0;
      printf ("%s regcomp unexpectedly succeeded\n", fail);
      return 1;
    }
  if (regexec (&re, string, 10, rm, eflags))
    {
      regfree (&re);
      if (expect == NULL)
 	return 0;
      printf ("%s regexec failed\n", fail);
      return 1;
    }
  regfree (&re);
  if (expect == NULL)
    {
      printf ("%s regexec unexpectedly succeeded\n", fail);
      return 1;
    }
  if (cflags & REG_NOSUB)
    return 0;
  ret = check_match (rm, 0, string, expect, fail);
  if (matches == NULL)
    return ret;
  for (n = 1; ret == 0 && n < 10; ++n)
    {
      char *p = NULL;
      if (matches)
 	{
 	  p = strchr (matches, ',');
 	  if (p != NULL)
 	    *p = '\0';
 	}
      ret = check_match (rm, n, string, matches ?: "-", fail);
      if (p)
 	{
 	  *p = ',';
 	  matches = p + 1;
 	}
      else
 	matches = NULL;
    }
  return ret;
 }
 static int
 mb_test (const char *pattern, int cflags, const char *string, int eflags,
 	 char *expect, const char *matches, const char *letters,
 	 const char *fail)
 {
  char *pattern_mb = mb_frob_pattern (pattern, letters);
  const char *string_mb
    = eflags == -1 ? string : mb_frob_string (string, letters);
  char *expect_mb = mb_frob_string (expect, letters);
  char *matches_mb = mb_frob_string (matches, letters);
  int ret = 0;
  if (!pattern_mb || !string_mb
      || (expect && !expect_mb) || (matches && !matches_mb))
    {
      printf ("%s %m", fail);
      ret = 1;
    }
  else
    ret = test (pattern_mb, cflags, string_mb, eflags, expect_mb,
 		matches_mb, fail);
  free (matches_mb);
  free (expect_mb);
  if (string_mb != string)
    free ((char *) string_mb);
  free (pattern_mb);
  return ret;
 }
 static int
 mb_tests (const char *pattern, int cflags, const char *string, int eflags,
 	  char *expect, const char *matches)
 {
  int ret = 0;
  int i;
  char letters[9], fail[20];
  /* The tests aren't supposed to work with xdigit, since a-dA-D are
     hex digits while \'a \'A \v{c}\v{C}\v{d}\v{D}\'e \'E are not.  */
  if (strstr (pattern, "[:xdigit:]"))
    return 0;
  for (i = 1; i < 16; ++i)
    {
      char *p = letters;
      if (i & 1)
 	*p++ = 'a', *p++ = 'A';
      if (i & 2)
        *p++ = 'b', *p++ = 'B';
      if (i & 4)
        *p++ = 'c', *p++ = 'C';
      if (i & 8)
        *p++ = 'd', *p++ = 'D';
      *p++ = '\0';
      sprintf (fail, "UTF-8 %s FAIL", letters);
      ret |= mb_test (pattern, cflags, string, eflags, expect, matches,
 		      letters, fail);
    }
  return ret;
 }
 int
 main (int argc, char **argv)
 {
  int ret = 0;
  char *line = NULL;
  size_t line_len = 0;
  ssize_t len;
  FILE *f;
  static int test_utf8 = 0;
  static const struct option options[] =
    {
      {"utf8",	no_argument,	&test_utf8,	1},
      {NULL,	0,		NULL,		0 }
    };
  while (getopt_long (argc, argv, "u", options, NULL) >= 0);
  if (optind + 1 != argc)
    {
      fprintf (stderr, "Missing test filename\n");
      return 1;
    }
  f = fopen (argv[optind], "r");
  if (f == NULL)
    {
      fprintf (stderr, "Couldn't open %s\n", argv[1]);
      return 1;
    }
  while ((len = getline (&line, &line_len, f)) > 0)
    {
      char *pattern, *flagstr, *string, *expect, *matches, *p;
      int cflags = REG_EXTENDED, eflags = 0, try_bre_ere = 0;
      if (line[len - 1] == '\n')
        line[len - 1] = '\0';
      /* Skip comments and empty lines.  */
      if (*line == '#' || *line == '\0')
 	continue;
      puts (line);
      fflush (stdout);
      pattern = strtok (line, "\t");
      if (pattern == NULL)
        continue;
      if (strcmp (pattern, "\"\"") == 0)
 	pattern += 2;
      flagstr = strtok (NULL, "\t");
      if (flagstr == NULL)
        continue;
      string = strtok (NULL, "\t");
      if (string == NULL)
        continue;
      if (strcmp (string, "\"\"") == 0)
 	string += 2;
      for (p = flagstr; *p; ++p)
 	switch (*p)
 	  {
 	  case '-':
 	    break;
 	  case 'b':
 	    cflags &= ~REG_EXTENDED;
 	    break;
 	  case '&':
 	    try_bre_ere = 1;
 	    break;
 	  case 'C':
 	    eflags = -1;
 	    break;
 	  case 'i':
 	    cflags |= REG_ICASE;
 	    break;
 	  case 's':
 	    cflags |= REG_NOSUB;
 	    break;
 	  case 'n':
 	    cflags |= REG_NEWLINE;
 	    break;
 	  case '^':
 	    eflags |= REG_NOTBOL;
 	    break;
 	  case '$':
 	    eflags |= REG_NOTEOL;
 	    break;
 	  case 'm':
 	  case 'p':
 	  case '#':
 	    /* Not supported.  */
 	    flagstr = NULL;
 	    break;
 	  }
      if (flagstr == NULL)
 	continue;
      replace_special_chars (pattern);
      glibc_re_syntax (pattern);
      if (eflags != -1)
        replace_special_chars (string);
      expect = strtok (NULL, "\t");
      matches = NULL;
      if (expect != NULL)
        {
 	  replace_special_chars (expect);
 	  matches = strtok (NULL, "\t");
 	  if (matches != NULL)
 	    replace_special_chars (matches);
        }
      setlocale (LC_ALL, "C");
      if (test (pattern, cflags, string, eflags, expect, matches, "FAIL")
 	  || (try_bre_ere
 	      && test (pattern, cflags & ~REG_EXTENDED, string, eflags,
 		       expect, matches, "FAIL")))
 	ret = 1;
      else if (test_utf8)
 	{
 	  setlocale (LC_ALL, "cs_CZ.UTF-8");
 	  if (test (pattern, cflags, string, eflags, expect, matches,
 		    "UTF-8 FAIL")
 	      || (try_bre_ere
 		  && test (pattern, cflags & ~REG_EXTENDED, string, eflags,
 			   expect, matches, "UTF-8 FAIL")))
 	    ret = 1;
 	  else if (mb_tests (pattern, cflags, string, eflags, expect, matches)
 		   || (try_bre_ere
 		       && mb_tests (pattern, cflags & ~REG_EXTENDED, string,
 				    eflags, expect, matches)))
 	    ret = 1;
 	}
    }
  fclose (f);
  return ret;
 }