mirror of
https://sourceware.org/git/glibc.git
synced 2024-11-24 14:00:30 +00:00
Fix test cases tst-fnmatch and tst-regexloc for the new iso14651_t1_common file.
See: http://pubs.opengroup.org/onlinepubs/7908799/xbd/re.html > A range expression represents the set of collating elements that fall > between two elements in the current collation sequence, > inclusively. It is expressed as the starting point and the ending > point separated by a hyphen (-). > > Range expressions must not be used in portable applications because > their behaviour is dependent on the collating sequence. Ranges will be > treated according to the current collating sequence, and include such > characters that fall within the range based on that collating > sequence, regardless of character values. This, however, means that > the interpretation will differ depending on collating sequence. If, > for instance, one collating sequence defines ä as a variant of a, > while another defines it as a letter following z, then the expression > [ä-z] is valid in the first language and invalid in the second. Therefore, using [a-z] does not make much sense except in the C/POSIX locale. The new iso14651_t1_common lists upper case and lower case Latin characters in a different order than the old one which causes surprising results for example in the de_DE locale: [a-z] now includes A because A comes after a in iso14651_t1_common but does not include Z because that comes after z in iso14651_t1_common. * posix/tst-fnmatch.input: Fix results for range expressions for non C locales. * posix/tst-regexloc.c: Do not use a range expression for de_DE.ISO-8859-1 locale.
This commit is contained in:
parent
770cbe147c
commit
ac3a3b4b0d
@ -1,3 +1,10 @@
|
||||
2018-02-27 Mike FABIAN <mfabian@redhat.com>
|
||||
|
||||
* posix/tst-fnmatch.input: Fix results for range expressions
|
||||
for non C locales.
|
||||
* posix/tst-regexloc.c: Do not use a range expression for
|
||||
de_DE.ISO-8859-1 locale.
|
||||
|
||||
2018-02-27 Mike FABIAN <mfabian@redhat.com>
|
||||
|
||||
* posix/bug-regex5.c: Fix test case because with the new
|
||||
|
@ -418,21 +418,47 @@ C "-" "[Z-\\]]" NOMATCH
|
||||
# Following are tests outside the scope of IEEE 2003.2 since they are using
|
||||
# locales other than the C locale. The main focus of the tests is on the
|
||||
# handling of ranges and the recognition of character (vs bytes).
|
||||
#
|
||||
# See:
|
||||
#
|
||||
# http://pubs.opengroup.org/onlinepubs/7908799/xbd/re.html
|
||||
#
|
||||
# > A range expression represents the set of collating elements that fall
|
||||
# > between two elements in the current collation sequence,
|
||||
# > inclusively. It is expressed as the starting point and the ending
|
||||
# > point separated by a hyphen (-).
|
||||
# >
|
||||
# > Range expressions must not be used in portable applications because
|
||||
# > their behaviour is dependent on the collating sequence. Ranges will be
|
||||
# > treated according to the current collating sequence, and include such
|
||||
# > characters that fall within the range based on that collating
|
||||
# > sequence, regardless of character values. This, however, means that
|
||||
# > the interpretation will differ depending on collating sequence. If,
|
||||
# > for instance, one collating sequence defines ä as a variant of a,
|
||||
# > while another defines it as a letter following z, then the expression
|
||||
# > [ä-z] is valid in the first language and invalid in the second.
|
||||
#
|
||||
# Therefore, using [a-z] does not make much sense except in the C/POSIX locale.
|
||||
# The new iso14651_t1_common lists upper case and lower case Latin characters
|
||||
# in a different order than the old one which causes surprising results
|
||||
# for example in the de_DE locale: [a-z] now includes A because A comes
|
||||
# after a in iso14651_t1_common but does not include Z because that comes
|
||||
# after z in iso14651_t1_common.
|
||||
de_DE.ISO-8859-1 "a" "[a-z]" 0
|
||||
de_DE.ISO-8859-1 "z" "[a-z]" 0
|
||||
de_DE.ISO-8859-1 "ä" "[a-z]" 0
|
||||
de_DE.ISO-8859-1 "ö" "[a-z]" 0
|
||||
de_DE.ISO-8859-1 "ü" "[a-z]" 0
|
||||
de_DE.ISO-8859-1 "A" "[a-z]" NOMATCH
|
||||
de_DE.ISO-8859-1 "A" "[a-z]" 0 # surprising but correct!
|
||||
de_DE.ISO-8859-1 "Z" "[a-z]" NOMATCH
|
||||
de_DE.ISO-8859-1 "Ä" "[a-z]" NOMATCH
|
||||
de_DE.ISO-8859-1 "Ö" "[a-z]" NOMATCH
|
||||
de_DE.ISO-8859-1 "Ü" "[a-z]" NOMATCH
|
||||
de_DE.ISO-8859-1 "Ä" "[a-z]" 0 # surprising but correct!
|
||||
de_DE.ISO-8859-1 "Ö" "[a-z]" 0 # surprising but correct!
|
||||
de_DE.ISO-8859-1 "Ü" "[a-z]" 0 # surprising but correct!
|
||||
de_DE.ISO-8859-1 "a" "[A-Z]" NOMATCH
|
||||
de_DE.ISO-8859-1 "z" "[A-Z]" NOMATCH
|
||||
de_DE.ISO-8859-1 "ä" "[A-Z]" NOMATCH
|
||||
de_DE.ISO-8859-1 "ö" "[A-Z]" NOMATCH
|
||||
de_DE.ISO-8859-1 "ü" "[A-Z]" NOMATCH
|
||||
de_DE.ISO-8859-1 "z" "[A-Z]" 0 # surprising but correct!
|
||||
de_DE.ISO-8859-1 "ä" "[A-Z]" 0 # surprising but correct!
|
||||
de_DE.ISO-8859-1 "ö" "[A-Z]" 0 # surprising but correct!
|
||||
de_DE.ISO-8859-1 "ü" "[A-Z]" 0 # surprising but correct!
|
||||
de_DE.ISO-8859-1 "A" "[A-Z]" 0
|
||||
de_DE.ISO-8859-1 "Z" "[A-Z]" 0
|
||||
de_DE.ISO-8859-1 "Ä" "[A-Z]" 0
|
||||
@ -515,16 +541,16 @@ de_DE.UTF-8 "z" "[a-z]" 0
|
||||
de_DE.UTF-8 "ä" "[a-z]" 0
|
||||
de_DE.UTF-8 "ö" "[a-z]" 0
|
||||
de_DE.UTF-8 "ü" "[a-z]" 0
|
||||
de_DE.UTF-8 "A" "[a-z]" NOMATCH
|
||||
de_DE.UTF-8 "A" "[a-z]" 0 # surprising but correct!
|
||||
de_DE.UTF-8 "Z" "[a-z]" NOMATCH
|
||||
de_DE.UTF-8 "Ä" "[a-z]" NOMATCH
|
||||
de_DE.UTF-8 "Ö" "[a-z]" NOMATCH
|
||||
de_DE.UTF-8 "Ü" "[a-z]" NOMATCH
|
||||
de_DE.UTF-8 "Ä" "[a-z]" 0 # surprising but correct!
|
||||
de_DE.UTF-8 "Ö" "[a-z]" 0 # surprising but correct!
|
||||
de_DE.UTF-8 "Ü" "[a-z]" 0 # surprising but correct!
|
||||
de_DE.UTF-8 "a" "[A-Z]" NOMATCH
|
||||
de_DE.UTF-8 "z" "[A-Z]" NOMATCH
|
||||
de_DE.UTF-8 "ä" "[A-Z]" NOMATCH
|
||||
de_DE.UTF-8 "ö" "[A-Z]" NOMATCH
|
||||
de_DE.UTF-8 "ü" "[A-Z]" NOMATCH
|
||||
de_DE.UTF-8 "z" "[A-Z]" 0 # surprising but correct!
|
||||
de_DE.UTF-8 "ä" "[A-Z]" 0 # surprising but correct!
|
||||
de_DE.UTF-8 "ö" "[A-Z]" 0 # surprising but correct!
|
||||
de_DE.UTF-8 "ü" "[A-Z]" 0 # surprising but correct!
|
||||
de_DE.UTF-8 "A" "[A-Z]" 0
|
||||
de_DE.UTF-8 "Z" "[A-Z]" 0
|
||||
de_DE.UTF-8 "Ä" "[A-Z]" 0
|
||||
|
@ -29,8 +29,8 @@ do_test (void)
|
||||
|
||||
if (setlocale (LC_ALL, "de_DE.ISO-8859-1") == NULL)
|
||||
puts ("cannot set locale");
|
||||
else if (regcomp (&re, "[a-f]*", 0) != REG_NOERROR)
|
||||
puts ("cannot compile expression \"[a-f]*\"");
|
||||
else if (regcomp (&re, "[abcdef]*", 0) != REG_NOERROR)
|
||||
puts ("cannot compile expression \"[abcdef]*\"");
|
||||
else if (regexec (&re, "abcdefCDEF", 1, mat, 0) == REG_NOMATCH)
|
||||
puts ("no match");
|
||||
else
|
||||
|
Loading…
Reference in New Issue
Block a user