mirror of
https://sourceware.org/git/glibc.git
synced 2024-12-22 02:40:08 +00:00
Update.
2000-05-24 Ulrich Drepper <drepper@redhat.com> * locale/programs/ld-collate.c (struct element_t): Add mbseqorder and wcseqorder members. (struct locale_collate_t): Likewise. (collate_finish): Assign collation sequence value to each character. Create tables for output. (collate_output): Write out tables with collation sequence information. * locale/C-collate.c: Provide C locale data for collation sequence table. * locale/langinfo.h: Add _NL_COLLATE_COLLSEQMB and _NL_COLLATE_COLLSEQWC. * locale/categories.def: Add entries for _NL_COLLATE_COLLSEQMB and _NL_COLLATE_COLLSEQWC. * posix/fnmatch.c: Define SUFFIX and WIDE_CHAR_VERSION before include fnmatch_loop.c. * posix/fnmatch_loop.c: Don't use strcoll while determining whether character is matched by range expression. Use collation sequence table. Outside glibc fall back on simple character value comparison.
This commit is contained in:
parent
b7cbee1cb0
commit
acb5ee2e56
20
ChangeLog
20
ChangeLog
@ -1,3 +1,23 @@
|
||||
2000-05-24 Ulrich Drepper <drepper@redhat.com>
|
||||
|
||||
* locale/programs/ld-collate.c (struct element_t): Add mbseqorder
|
||||
and wcseqorder members.
|
||||
(struct locale_collate_t): Likewise.
|
||||
(collate_finish): Assign collation sequence value to each character.
|
||||
Create tables for output.
|
||||
(collate_output): Write out tables with collation sequence information.
|
||||
* locale/C-collate.c: Provide C locale data for collation sequence
|
||||
table.
|
||||
* locale/langinfo.h: Add _NL_COLLATE_COLLSEQMB and
|
||||
_NL_COLLATE_COLLSEQWC.
|
||||
* locale/categories.def: Add entries for _NL_COLLATE_COLLSEQMB and
|
||||
_NL_COLLATE_COLLSEQWC.
|
||||
* posix/fnmatch.c: Define SUFFIX and WIDE_CHAR_VERSION before
|
||||
include fnmatch_loop.c.
|
||||
* posix/fnmatch_loop.c: Don't use strcoll while determining whether
|
||||
character is matched by range expression. Use collation sequence
|
||||
table. Outside glibc fall back on simple character value comparison.
|
||||
|
||||
2000-05-24 Andreas Jaeger <aj@suse.de>
|
||||
|
||||
* sysdeps/mips/elf/start.S (ENTRY_POINT): Align stack for double
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* Copyright (C) 1995, 1996, 1997, 1999 Free Software Foundation, Inc.
|
||||
/* Copyright (C) 1995, 1996, 1997, 1999, 2000 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1995.
|
||||
|
||||
@ -20,12 +20,84 @@
|
||||
#include <endian.h>
|
||||
#include "localeinfo.h"
|
||||
|
||||
static const char collseqmb[] =
|
||||
{
|
||||
'\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07',
|
||||
'\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f',
|
||||
'\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17',
|
||||
'\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f',
|
||||
'\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27',
|
||||
'\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f',
|
||||
'\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37',
|
||||
'\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f',
|
||||
'\x40', '\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47',
|
||||
'\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f',
|
||||
'\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57',
|
||||
'\x58', '\x59', '\x5a', '\x5b', '\x5c', '\x5d', '\x5e', '\x5f',
|
||||
'\x60', '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67',
|
||||
'\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f',
|
||||
'\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77',
|
||||
'\x78', '\x79', '\x7a', '\x7b', '\x7c', '\x7d', '\x7e', '\x7f',
|
||||
'\x80', '\x81', '\x82', '\x83', '\x84', '\x85', '\x86', '\x87',
|
||||
'\x88', '\x89', '\x8a', '\x8b', '\x8c', '\x8d', '\x8e', '\x8f',
|
||||
'\x90', '\x91', '\x92', '\x93', '\x94', '\x95', '\x96', '\x97',
|
||||
'\x98', '\x99', '\x9a', '\x9b', '\x9c', '\x9d', '\x9e', '\x9f',
|
||||
'\xa0', '\xa1', '\xa2', '\xa3', '\xa4', '\xa5', '\xa6', '\xa7',
|
||||
'\xa8', '\xa9', '\xaa', '\xab', '\xac', '\xad', '\xae', '\xaf',
|
||||
'\xb0', '\xb1', '\xb2', '\xb3', '\xb4', '\xb5', '\xb6', '\xb7',
|
||||
'\xb8', '\xb9', '\xba', '\xbb', '\xbc', '\xbd', '\xbe', '\xbf',
|
||||
'\xc0', '\xc1', '\xc2', '\xc3', '\xc4', '\xc5', '\xc6', '\xc7',
|
||||
'\xc8', '\xc9', '\xca', '\xcb', '\xcc', '\xcd', '\xce', '\xcf',
|
||||
'\xd0', '\xd1', '\xd2', '\xd3', '\xd4', '\xd5', '\xd6', '\xd7',
|
||||
'\xd8', '\xd9', '\xda', '\xdb', '\xdc', '\xdd', '\xde', '\xdf',
|
||||
'\xe0', '\xe1', '\xe2', '\xe3', '\xe4', '\xe5', '\xe6', '\xe7',
|
||||
'\xe8', '\xe9', '\xea', '\xeb', '\xec', '\xed', '\xee', '\xef',
|
||||
'\xf0', '\xf1', '\xf2', '\xf3', '\xf4', '\xf5', '\xf6', '\xf7',
|
||||
'\xf8', '\xf9', '\xfa', '\xfb', '\xfc', '\xfd', '\xfe', '\xff'
|
||||
};
|
||||
|
||||
static const uint32_t collseqwc[] =
|
||||
{
|
||||
L'\x00', L'\x01', L'\x02', L'\x03', L'\x04', L'\x05', L'\x06', L'\x07',
|
||||
L'\x08', L'\x09', L'\x0a', L'\x0b', L'\x0c', L'\x0d', L'\x0e', L'\x0f',
|
||||
L'\x10', L'\x11', L'\x12', L'\x13', L'\x14', L'\x15', L'\x16', L'\x17',
|
||||
L'\x18', L'\x19', L'\x1a', L'\x1b', L'\x1c', L'\x1d', L'\x1e', L'\x1f',
|
||||
L'\x20', L'\x21', L'\x22', L'\x23', L'\x24', L'\x25', L'\x26', L'\x27',
|
||||
L'\x28', L'\x29', L'\x2a', L'\x2b', L'\x2c', L'\x2d', L'\x2e', L'\x2f',
|
||||
L'\x30', L'\x31', L'\x32', L'\x33', L'\x34', L'\x35', L'\x36', L'\x37',
|
||||
L'\x38', L'\x39', L'\x3a', L'\x3b', L'\x3c', L'\x3d', L'\x3e', L'\x3f',
|
||||
L'\x40', L'\x41', L'\x42', L'\x43', L'\x44', L'\x45', L'\x46', L'\x47',
|
||||
L'\x48', L'\x49', L'\x4a', L'\x4b', L'\x4c', L'\x4d', L'\x4e', L'\x4f',
|
||||
L'\x50', L'\x51', L'\x52', L'\x53', L'\x54', L'\x55', L'\x56', L'\x57',
|
||||
L'\x58', L'\x59', L'\x5a', L'\x5b', L'\x5c', L'\x5d', L'\x5e', L'\x5f',
|
||||
L'\x60', L'\x61', L'\x62', L'\x63', L'\x64', L'\x65', L'\x66', L'\x67',
|
||||
L'\x68', L'\x69', L'\x6a', L'\x6b', L'\x6c', L'\x6d', L'\x6e', L'\x6f',
|
||||
L'\x70', L'\x71', L'\x72', L'\x73', L'\x74', L'\x75', L'\x76', L'\x77',
|
||||
L'\x78', L'\x79', L'\x7a', L'\x7b', L'\x7c', L'\x7d', L'\x7e', L'\x7f',
|
||||
L'\x80', L'\x81', L'\x82', L'\x83', L'\x84', L'\x85', L'\x86', L'\x87',
|
||||
L'\x88', L'\x89', L'\x8a', L'\x8b', L'\x8c', L'\x8d', L'\x8e', L'\x8f',
|
||||
L'\x90', L'\x91', L'\x92', L'\x93', L'\x94', L'\x95', L'\x96', L'\x97',
|
||||
L'\x98', L'\x99', L'\x9a', L'\x9b', L'\x9c', L'\x9d', L'\x9e', L'\x9f',
|
||||
L'\xa0', L'\xa1', L'\xa2', L'\xa3', L'\xa4', L'\xa5', L'\xa6', L'\xa7',
|
||||
L'\xa8', L'\xa9', L'\xaa', L'\xab', L'\xac', L'\xad', L'\xae', L'\xaf',
|
||||
L'\xb0', L'\xb1', L'\xb2', L'\xb3', L'\xb4', L'\xb5', L'\xb6', L'\xb7',
|
||||
L'\xb8', L'\xb9', L'\xba', L'\xbb', L'\xbc', L'\xbd', L'\xbe', L'\xbf',
|
||||
L'\xc0', L'\xc1', L'\xc2', L'\xc3', L'\xc4', L'\xc5', L'\xc6', L'\xc7',
|
||||
L'\xc8', L'\xc9', L'\xca', L'\xcb', L'\xcc', L'\xcd', L'\xce', L'\xcf',
|
||||
L'\xd0', L'\xd1', L'\xd2', L'\xd3', L'\xd4', L'\xd5', L'\xd6', L'\xd7',
|
||||
L'\xd8', L'\xd9', L'\xda', L'\xdb', L'\xdc', L'\xdd', L'\xde', L'\xdf',
|
||||
L'\xe0', L'\xe1', L'\xe2', L'\xe3', L'\xe4', L'\xe5', L'\xe6', L'\xe7',
|
||||
L'\xe8', L'\xe9', L'\xea', L'\xeb', L'\xec', L'\xed', L'\xee', L'\xef',
|
||||
L'\xf0', L'\xf1', L'\xf2', L'\xf3', L'\xf4', L'\xf5', L'\xf6', L'\xf7',
|
||||
L'\xf8', L'\xf9', L'\xfa', L'\xfb', L'\xfc', L'\xfd', L'\xfe', L'\xff'
|
||||
};
|
||||
|
||||
const struct locale_data _nl_C_LC_COLLATE =
|
||||
{
|
||||
_nl_C_name,
|
||||
NULL, 0, 0, /* no file mapped */
|
||||
UNDELETABLE,
|
||||
16,
|
||||
18,
|
||||
{
|
||||
{ word: 0 },
|
||||
{ string: NULL },
|
||||
@ -40,8 +112,10 @@ const struct locale_data _nl_C_LC_COLLATE =
|
||||
{ string: NULL },
|
||||
{ string: NULL },
|
||||
{ string: NULL },
|
||||
{ word: 0 },
|
||||
{ string: NULL },
|
||||
{ string: NULL }
|
||||
{ string: NULL },
|
||||
{ string: NULL },
|
||||
{ string: collseqmb },
|
||||
{ wstr: collseqwc }
|
||||
}
|
||||
};
|
||||
|
@ -58,6 +58,8 @@ DEFINE_CATEGORY
|
||||
DEFINE_ELEMENT (_NL_COLLATE_SYMB_HASH_SIZEMB, "collate-symb-hash-sizemb", std, word)
|
||||
DEFINE_ELEMENT (_NL_COLLATE_SYMB_TABLEMB, "collate-symb-tablemb", std, string)
|
||||
DEFINE_ELEMENT (_NL_COLLATE_SYMB_EXTRAMB, "collate-symb-extramb", std, string)
|
||||
DEFINE_ELEMENT (_NL_COLLATE_COLLSEQMB, "collate-collseqmb", std, string)
|
||||
DEFINE_ELEMENT (_NL_COLLATE_COLLSEQWC, "collate-collseqwc", std, string)
|
||||
), NO_POSTLOAD)
|
||||
|
||||
|
||||
|
@ -248,6 +248,8 @@ enum
|
||||
_NL_COLLATE_SYMB_HASH_SIZEMB,
|
||||
_NL_COLLATE_SYMB_TABLEMB,
|
||||
_NL_COLLATE_SYMB_EXTRAMB,
|
||||
_NL_COLLATE_COLLSEQMB,
|
||||
_NL_COLLATE_COLLSEQWC,
|
||||
_NL_NUM_LC_COLLATE,
|
||||
|
||||
/* LC_CTYPE category: character classification.
|
||||
|
@ -1,3 +1,7 @@
|
||||
2000-05-24 Ulrich Drepper <drepper@redhat.com>
|
||||
|
||||
* locales/iso14651_t1: New file.
|
||||
|
||||
2000-05-15 Andreas Jaeger <aj@suse.de>
|
||||
|
||||
* tst-fmon.data: Change testcase following fixes for
|
||||
|
1406
localedata/locales/iso14651_t1
Normal file
1406
localedata/locales/iso14651_t1
Normal file
File diff suppressed because it is too large
Load Diff
@ -48,6 +48,15 @@
|
||||
# include <wctype.h>
|
||||
#endif
|
||||
|
||||
/* We need some of the locale data (the collation sequence information)
|
||||
but there is no interface to get this information in general. Therefore
|
||||
we support a correct implementation only in glibc. */
|
||||
#ifdef _LIBC
|
||||
# include "../locale/localeinfo.h"
|
||||
|
||||
# define CONCAT(a,b) __CONCAT(a,b)
|
||||
#endif
|
||||
|
||||
/* Comment out all this code if we are using the GNU C Library, and are not
|
||||
actually compiling the library itself. This code is part of the GNU C
|
||||
Library, but also included in many other GNU distributions. Compiling
|
||||
@ -192,6 +201,7 @@ __wcschrnul (s, c)
|
||||
# define STRCHR(S, C) strchr (S, C)
|
||||
# define STRCHRNUL(S, C) __strchrnul (S, C)
|
||||
# define STRCOLL(S1, S2) strcoll (S1, S2)
|
||||
# define SUFFIX MB
|
||||
# include "fnmatch_loop.c"
|
||||
|
||||
|
||||
@ -209,7 +219,10 @@ __wcschrnul (s, c)
|
||||
# define BTOWC(C) (C)
|
||||
# define STRCHR(S, C) wcschr (S, C)
|
||||
# define STRCHRNUL(S, C) __wcschrnul (S, C)
|
||||
# define STRCOLL(S1, S2) wcscoll (S1, S2)
|
||||
# define STRCOLL(S1, S2) wcscoll (S1, S2)
|
||||
# define SUFFIX WC
|
||||
# define WIDE_CHAR_VERSION 1
|
||||
|
||||
|
||||
# undef IS_CHAR_CLASS
|
||||
# ifdef _LIBC
|
||||
|
@ -31,6 +31,16 @@ FCT (pattern, string, no_leading_period, flags)
|
||||
{
|
||||
register const CHAR *p = pattern, *n = string;
|
||||
register UCHAR c;
|
||||
#ifdef _LIBC
|
||||
const UCHAR *collseq = (const UCHAR *)
|
||||
_NL_CURRENT(LC_COLLATE, CONCAT(_NL_COLLATE_COLLSEQ,SUFFIX));
|
||||
# ifdef WIDE_CHAR_VERSION
|
||||
const wint_t *names = (const wint_t *)
|
||||
_NL_CURRENT (LC_COLLATE, _NL_COLLATE_NAMES);
|
||||
size_t size = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_HASH_SIZE);
|
||||
size_t layers = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_HASH_LAYERS);
|
||||
# endif
|
||||
#endif
|
||||
|
||||
while ((c = *p++) != L('\0'))
|
||||
{
|
||||
@ -210,9 +220,9 @@ FCT (pattern, string, no_leading_period, flags)
|
||||
/* Leave room for the null. */
|
||||
CHAR str[CHAR_CLASS_MAX_LENGTH + 1];
|
||||
size_t c1 = 0;
|
||||
# if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
|
||||
#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
|
||||
wctype_t wt;
|
||||
# endif
|
||||
#endif
|
||||
const CHAR *startp = p;
|
||||
|
||||
for (;;)
|
||||
@ -240,7 +250,7 @@ FCT (pattern, string, no_leading_period, flags)
|
||||
}
|
||||
str[c1] = L('\0');
|
||||
|
||||
# if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
|
||||
#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
|
||||
wt = IS_CHAR_CLASS (str);
|
||||
if (wt == 0)
|
||||
/* Invalid character class name. */
|
||||
@ -248,7 +258,7 @@ FCT (pattern, string, no_leading_period, flags)
|
||||
|
||||
if (ISWCTYPE (BTOWC ((UCHAR) *n), wt))
|
||||
goto matched;
|
||||
# else
|
||||
#else
|
||||
if ((STREQ (str, L("alnum")) && ISALNUM ((UCHAR) *n))
|
||||
|| (STREQ (str, L("alpha")) && ISALPHA ((UCHAR) *n))
|
||||
|| (STREQ (str, L("blank")) && ISBLANK ((UCHAR) *n))
|
||||
@ -262,7 +272,7 @@ FCT (pattern, string, no_leading_period, flags)
|
||||
|| (STREQ (str, L("upper")) && ISUPPER ((UCHAR) *n))
|
||||
|| (STREQ (str, L("xdigit")) && ISXDIGIT ((UCHAR) *n)))
|
||||
goto matched;
|
||||
# endif
|
||||
#endif
|
||||
}
|
||||
else if (c == L('\0'))
|
||||
/* [ (unterminated) loses. */
|
||||
@ -279,27 +289,117 @@ FCT (pattern, string, no_leading_period, flags)
|
||||
|
||||
if (c == L('-') && *p != L(']'))
|
||||
{
|
||||
/* It is a range. */
|
||||
CHAR lo[2];
|
||||
CHAR fc[2];
|
||||
#if _LIBC
|
||||
/* We have to find the collation sequence
|
||||
value for C. Collation sequence is nothing
|
||||
we can regularly access. The sequence
|
||||
value is defined by the order in which the
|
||||
definitions of the collation values for the
|
||||
various characters appear in the source
|
||||
file. A strange concept, nowhere
|
||||
documented. */
|
||||
int32_t fseqidx;
|
||||
int32_t lseqidx;
|
||||
UCHAR cend = *p++;
|
||||
# ifdef WIDE_CHAR_VERSION
|
||||
size_t cnt;
|
||||
# endif
|
||||
|
||||
if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
|
||||
cend = *p++;
|
||||
if (cend == L('\0'))
|
||||
return FNM_NOMATCH;
|
||||
|
||||
lo[0] = cold;
|
||||
lo[1] = L('\0');
|
||||
fc[0] = fn;
|
||||
fc[1] = L('\0');
|
||||
if (STRCOLL (lo, fc) <= 0)
|
||||
# ifdef WIDE_CHAR_VERSION
|
||||
/* Search in the `names' array for the characters. */
|
||||
fseqidx = fn % size;
|
||||
cnt = 0;
|
||||
while (names[fseqidx] != fn)
|
||||
{
|
||||
CHAR hi[2];
|
||||
hi[0] = FOLD (cend);
|
||||
hi[1] = L('\0');
|
||||
if (STRCOLL (fc, hi) <= 0)
|
||||
if (++cnt == layers)
|
||||
/* XXX We don't know anything about
|
||||
the character we are supposed to
|
||||
match. This means we are failing. */
|
||||
goto range_not_matched;
|
||||
|
||||
fseqidx += size;
|
||||
}
|
||||
lseqidx = cold % size;
|
||||
cnt = 0;
|
||||
while (names[lseqidx] != cold)
|
||||
{
|
||||
if (++cnt == layers)
|
||||
{
|
||||
lseqidx = -1;
|
||||
break;
|
||||
}
|
||||
lseqidx += size;
|
||||
}
|
||||
# else
|
||||
fseqidx = fn;
|
||||
lseqidx = cold;
|
||||
# endif
|
||||
|
||||
/* XXX It is not entirely clear to me how to handle
|
||||
characters which are not mentioned in the
|
||||
collation specification. */
|
||||
if (
|
||||
# ifdef WIDE_CHAR_VERSION
|
||||
lseqidx == -1 ||
|
||||
# endif
|
||||
collseq[lseqidx] <= collseq[fseqidx])
|
||||
{
|
||||
/* We have to look at the upper bound. */
|
||||
int32_t hseqidx;
|
||||
|
||||
cend = FOLD (cend);
|
||||
# ifdef WIDE_CHAR_VERSION
|
||||
hseqidx = cend % size;
|
||||
cnt = 0;
|
||||
while (names[hseqidx] != cend)
|
||||
{
|
||||
if (++cnt == layers)
|
||||
{
|
||||
/* Hum, no information about the upper
|
||||
bound. The matching succeeds if the
|
||||
lower bound is matched exactly. */
|
||||
if (lseqidx == -1 || cold != fn)
|
||||
goto range_not_matched;
|
||||
|
||||
goto matched;
|
||||
}
|
||||
}
|
||||
# else
|
||||
hseqidx = cend;
|
||||
# endif
|
||||
|
||||
if (
|
||||
# ifdef WIDE_CHAR_VERSION
|
||||
(lseqidx == -1
|
||||
&& collseq[fseqidx] == collseq[hseqidx]) ||
|
||||
# endif
|
||||
collseq[fseqidx] <= collseq[hseqidx])
|
||||
goto matched;
|
||||
}
|
||||
# ifdef WIDE_CHAR_VERSION
|
||||
range_not_matched:
|
||||
# endif
|
||||
#else
|
||||
/* We use a boring value comparison of the character
|
||||
values. This is better than comparing using
|
||||
`strcoll' since the latter would have surprising
|
||||
and sometimes fatal consequences. */
|
||||
UCHAR cend = *p++;
|
||||
|
||||
if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
|
||||
cend = *p++;
|
||||
if (cend == L('\0'))
|
||||
return FNM_NOMATCH;
|
||||
|
||||
/* It is a range. */
|
||||
if (cold <= fc && fc <= c)
|
||||
goto matched;
|
||||
#endif
|
||||
|
||||
c = *p++;
|
||||
}
|
||||
@ -371,3 +471,4 @@ FCT (pattern, string, no_leading_period, flags)
|
||||
#undef STRCOLL
|
||||
#undef L
|
||||
#undef BTOWC
|
||||
#undef SUFFIX
|
||||
|
Loading…
Reference in New Issue
Block a user