From 8a4494506d9175a2c205ff8d39dc58abd83682eb Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Mon, 20 Jul 2009 20:04:42 -0700 Subject: [PATCH] Check generated locale for non-ASCII 8-bit characters with case conversion. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If a locale does not have 8-bit characters with case conversion which are different from the ASCII conversion (±0x20) then we can perform some optimizations. These will follow later. --- ChangeLog | 13 +++++++++++++ locale/C-ctype.c | 6 ++++-- locale/langinfo.h | 1 + locale/localeinfo.h | 4 +++- locale/programs/ld-ctype.c | 27 ++++++++++++++++++++++++++- 5 files changed, 47 insertions(+), 4 deletions(-) diff --git a/ChangeLog b/ChangeLog index da91742d21..9d6b6d3d10 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,16 @@ +2009-07-20 Ulrich Drepper + + * locale/localeinfo.h (LIMAGIC): Update value for LC_CTYPE. + * locale/langinfo.h: Define _NL_CTYPE_NONASCII_CASE. + * locale/C-ctype.c (_nl_C_LC_CTYPE): Add initializer for + _NL_CTYPE_NONASCII_CASE. + * locale/programs/ld-ctype.c (locale_ctype_t): Add nonascii_case + field. + (ctype_finish): Check whether there are any 8-bit characters outside + the range ASCII has or whether the mapping isn't the same as for + ASCII (±0x20). Set nonascii_case appropriately. + (ctype_output): Add output handler for nonascii_case. + 2009-07-17 Ulrich Drepper * sysdeps/generic/sysdep.h: Define cfi_personality, cfi_lsda, diff --git a/locale/C-ctype.c b/locale/C-ctype.c index 85f3d2addb..2044fb77b6 100644 --- a/locale/C-ctype.c +++ b/locale/C-ctype.c @@ -1,4 +1,4 @@ -/* Copyright (C) 1995-2002, 2003 Free Software Foundation, Inc. +/* Copyright (C) 1995-2002, 2003, 2009 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper , 1995. @@ -682,6 +682,8 @@ const struct locale_data _nl_C_LC_CTYPE attribute_hidden = { .string = (const char *) _nl_C_LC_CTYPE_class_alnum.header }, /* NR_MAPS wctrans_tables */ { .string = (const char *) _nl_C_LC_CTYPE_map_toupper.header }, - { .string = (const char *) _nl_C_LC_CTYPE_map_tolower.header } + { .string = (const char *) _nl_C_LC_CTYPE_map_tolower.header }, + /* _NL_CTYPE_NONASCII_CASE */ + { .word = 0 } } }; diff --git a/locale/langinfo.h b/locale/langinfo.h index 59017b31c8..d7ef6f60f4 100644 --- a/locale/langinfo.h +++ b/locale/langinfo.h @@ -348,6 +348,7 @@ enum _NL_CTYPE_EXTRA_MAP_12, _NL_CTYPE_EXTRA_MAP_13, _NL_CTYPE_EXTRA_MAP_14, + _NL_CTYPE_NONASCII_CASE, _NL_NUM_LC_CTYPE, /* LC_MONETARY category: formatting of monetary quantities. diff --git a/locale/localeinfo.h b/locale/localeinfo.h index 3661080bb2..19ea41ae6d 100644 --- a/locale/localeinfo.h +++ b/locale/localeinfo.h @@ -1,5 +1,5 @@ /* Declarations for internal libc locale interfaces - Copyright (C) 1995-2003, 2005, 2006, 2007, 2008 + Copyright (C) 1995-2003, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc. This file is part of the GNU C Library. @@ -35,6 +35,8 @@ #define LIMAGIC(category) \ (category == LC_COLLATE \ ? ((unsigned int) (0x20051014 ^ (category))) \ + : category == LC_CTYPE \ + ? ((unsigned int) (0x20090720 ^ (category))) \ : ((unsigned int) (0x20031115 ^ (category)))) /* Two special weight constants for the collation data. */ diff --git a/locale/programs/ld-ctype.c b/locale/programs/ld-ctype.c index d4474bf1a2..376a02c2f0 100644 --- a/locale/programs/ld-ctype.c +++ b/locale/programs/ld-ctype.c @@ -1,4 +1,4 @@ -/* Copyright (C) 1995-2006, 2007 Free Software Foundation, Inc. +/* Copyright (C) 1995-2006, 2007, 2009 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper , 1995. @@ -181,6 +181,7 @@ struct locale_ctype_t size_t default_missing_lineno; uint32_t to_nonascii; + uint32_t nonascii_case; /* The arrays for the binary representation. */ char_class_t *ctype_b; @@ -625,6 +626,27 @@ character not defined in character map"))); else ctype->class256_collection[space_seq->bytes[0]] |= BIT (tok_print); + /* Check whether all single-byte characters make to their upper/lowercase + equivalent according to the ASCII rules. */ + for (cnt = 'A'; cnt <= 'Z'; ++cnt) + { + uint32_t uppval = ctype->map256_collection[0][cnt]; + uint32_t lowval = ctype->map256_collection[1][cnt]; + uint32_t lowuppval = ctype->map256_collection[0][lowval]; + uint32_t lowlowval = ctype->map256_collection[1][lowval]; + + if (uppval != cnt + || lowval != cnt + 0x20 + || lowuppval != cnt + || lowlowval != cnt + 0x20) + ctype->nonascii_case = 1; + } + for (cnt = 0; cnt < 256; ++cnt) + if (cnt < 'A' || (cnt > 'Z' && cnt < 'a') || cnt > 'z') + if (ctype->map256_collection[0][cnt] != cnt + || ctype->map256_collection[1][cnt] != cnt) + ctype->nonascii_case = 1; + /* Now that the tests are done make sure the name array contains all characters which are handled in the WIDTH section of the character set definition file. */ @@ -1045,6 +1067,9 @@ ctype_output (struct localedef_t *locale, const struct charmap_t *charmap, CTYPE_DATA (_NL_CTYPE_MAP_TO_NONASCII, &ctype->to_nonascii, sizeof (uint32_t)); + CTYPE_DATA (_NL_CTYPE_NONASCII_CASE, + &ctype->nonascii_case, sizeof (uint32_t)); + case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN): iov[2 + elem + offset].iov_base = alloca (sizeof (uint32_t)); iov[2 + elem + offset].iov_len = sizeof (uint32_t);