Check generated locale for non-ASCII 8-bit characters with case conversion.

If a locale does not have 8-bit characters with case conversion which
are different from the ASCII conversion (±0x20) then we can perform
some optimizations.  These will follow later.
This commit is contained in:
Ulrich Drepper 2009-07-20 20:04:42 -07:00
parent c3db953c16
commit 8a4494506d
5 changed files with 47 additions and 4 deletions

View File

@ -1,3 +1,16 @@
2009-07-20 Ulrich Drepper <drepper@redhat.com>
* locale/localeinfo.h (LIMAGIC): Update value for LC_CTYPE.
* locale/langinfo.h: Define _NL_CTYPE_NONASCII_CASE.
* locale/C-ctype.c (_nl_C_LC_CTYPE): Add initializer for
_NL_CTYPE_NONASCII_CASE.
* locale/programs/ld-ctype.c (locale_ctype_t): Add nonascii_case
field.
(ctype_finish): Check whether there are any 8-bit characters outside
the range ASCII has or whether the mapping isn't the same as for
ASCII (±0x20). Set nonascii_case appropriately.
(ctype_output): Add output handler for nonascii_case.
2009-07-17 Ulrich Drepper <drepper@redhat.com> 2009-07-17 Ulrich Drepper <drepper@redhat.com>
* sysdeps/generic/sysdep.h: Define cfi_personality, cfi_lsda, * sysdeps/generic/sysdep.h: Define cfi_personality, cfi_lsda,

View File

@ -1,4 +1,4 @@
/* Copyright (C) 1995-2002, 2003 Free Software Foundation, Inc. /* Copyright (C) 1995-2002, 2003, 2009 Free Software Foundation, Inc.
This file is part of the GNU C Library. This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1995. Contributed by Ulrich Drepper <drepper@cygnus.com>, 1995.
@ -682,6 +682,8 @@ const struct locale_data _nl_C_LC_CTYPE attribute_hidden =
{ .string = (const char *) _nl_C_LC_CTYPE_class_alnum.header }, { .string = (const char *) _nl_C_LC_CTYPE_class_alnum.header },
/* NR_MAPS wctrans_tables */ /* NR_MAPS wctrans_tables */
{ .string = (const char *) _nl_C_LC_CTYPE_map_toupper.header }, { .string = (const char *) _nl_C_LC_CTYPE_map_toupper.header },
{ .string = (const char *) _nl_C_LC_CTYPE_map_tolower.header } { .string = (const char *) _nl_C_LC_CTYPE_map_tolower.header },
/* _NL_CTYPE_NONASCII_CASE */
{ .word = 0 }
} }
}; };

View File

@ -348,6 +348,7 @@ enum
_NL_CTYPE_EXTRA_MAP_12, _NL_CTYPE_EXTRA_MAP_12,
_NL_CTYPE_EXTRA_MAP_13, _NL_CTYPE_EXTRA_MAP_13,
_NL_CTYPE_EXTRA_MAP_14, _NL_CTYPE_EXTRA_MAP_14,
_NL_CTYPE_NONASCII_CASE,
_NL_NUM_LC_CTYPE, _NL_NUM_LC_CTYPE,
/* LC_MONETARY category: formatting of monetary quantities. /* LC_MONETARY category: formatting of monetary quantities.

View File

@ -1,5 +1,5 @@
/* Declarations for internal libc locale interfaces /* Declarations for internal libc locale interfaces
Copyright (C) 1995-2003, 2005, 2006, 2007, 2008 Copyright (C) 1995-2003, 2005, 2006, 2007, 2008, 2009
Free Software Foundation, Inc. Free Software Foundation, Inc.
This file is part of the GNU C Library. This file is part of the GNU C Library.
@ -35,6 +35,8 @@
#define LIMAGIC(category) \ #define LIMAGIC(category) \
(category == LC_COLLATE \ (category == LC_COLLATE \
? ((unsigned int) (0x20051014 ^ (category))) \ ? ((unsigned int) (0x20051014 ^ (category))) \
: category == LC_CTYPE \
? ((unsigned int) (0x20090720 ^ (category))) \
: ((unsigned int) (0x20031115 ^ (category)))) : ((unsigned int) (0x20031115 ^ (category))))
/* Two special weight constants for the collation data. */ /* Two special weight constants for the collation data. */

View File

@ -1,4 +1,4 @@
/* Copyright (C) 1995-2006, 2007 Free Software Foundation, Inc. /* Copyright (C) 1995-2006, 2007, 2009 Free Software Foundation, Inc.
This file is part of the GNU C Library. This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@gnu.org>, 1995. Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
@ -181,6 +181,7 @@ struct locale_ctype_t
size_t default_missing_lineno; size_t default_missing_lineno;
uint32_t to_nonascii; uint32_t to_nonascii;
uint32_t nonascii_case;
/* The arrays for the binary representation. */ /* The arrays for the binary representation. */
char_class_t *ctype_b; char_class_t *ctype_b;
@ -625,6 +626,27 @@ character <SP> not defined in character map")));
else else
ctype->class256_collection[space_seq->bytes[0]] |= BIT (tok_print); ctype->class256_collection[space_seq->bytes[0]] |= BIT (tok_print);
/* Check whether all single-byte characters make to their upper/lowercase
equivalent according to the ASCII rules. */
for (cnt = 'A'; cnt <= 'Z'; ++cnt)
{
uint32_t uppval = ctype->map256_collection[0][cnt];
uint32_t lowval = ctype->map256_collection[1][cnt];
uint32_t lowuppval = ctype->map256_collection[0][lowval];
uint32_t lowlowval = ctype->map256_collection[1][lowval];
if (uppval != cnt
|| lowval != cnt + 0x20
|| lowuppval != cnt
|| lowlowval != cnt + 0x20)
ctype->nonascii_case = 1;
}
for (cnt = 0; cnt < 256; ++cnt)
if (cnt < 'A' || (cnt > 'Z' && cnt < 'a') || cnt > 'z')
if (ctype->map256_collection[0][cnt] != cnt
|| ctype->map256_collection[1][cnt] != cnt)
ctype->nonascii_case = 1;
/* Now that the tests are done make sure the name array contains all /* Now that the tests are done make sure the name array contains all
characters which are handled in the WIDTH section of the characters which are handled in the WIDTH section of the
character set definition file. */ character set definition file. */
@ -1045,6 +1067,9 @@ ctype_output (struct localedef_t *locale, const struct charmap_t *charmap,
CTYPE_DATA (_NL_CTYPE_MAP_TO_NONASCII, CTYPE_DATA (_NL_CTYPE_MAP_TO_NONASCII,
&ctype->to_nonascii, sizeof (uint32_t)); &ctype->to_nonascii, sizeof (uint32_t));
CTYPE_DATA (_NL_CTYPE_NONASCII_CASE,
&ctype->nonascii_case, sizeof (uint32_t));
case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN): case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN):
iov[2 + elem + offset].iov_base = alloca (sizeof (uint32_t)); iov[2 + elem + offset].iov_base = alloca (sizeof (uint32_t));
iov[2 + elem + offset].iov_len = sizeof (uint32_t); iov[2 + elem + offset].iov_len = sizeof (uint32_t);