2000-07-23  Bruno Haible  <haible@clisp.cons.org>

	* wctype/wchar-lookup.h: New file.
	* wctype/iswctype.c: Include "wchar-lookup.h".
	(__iswctype): Support alternate locale format with 3-level tables.
	* wctype/iswctype_l.c (__iswctype_l): Likewise.
	* wctype/towctrans.c (__towctrans): Likewise.
	* wctype/towctrans_l.c (__towctrans_l): Likewise.
	* wctype/wcfuncs.c: Include "wchar-lookup.h".
	(__ctype32_wctype, __ctype32_wctrans): Declare external.
	(__iswalnum, __iswalpha, __iswcntrl, __iswdigit, __iswlower,
	__iswgraph, __iswprint, __iswpunct, __iswspace, __iswupper,
	__iswxdigit, towlower, towupper): Support alternate locale format
	with 3-level tables.
	* wctype/wcextra.c (iswblank): Likewise.
	* wctype/wcfuncs_l.c: Include "wchar-lookup.h".
	(__iswalnum_l, __iswalpha_l, __iswcntrl_l, __iswdigit_l, __iswlower_l,
	__iswgraph_l, __iswprint_l, __iswpunct_l, __iswspace_l, __iswupper_l,
	__iswxdigit_l, __towlower_l, __towupper_l): Support alternate locale
	format with 3-level tables.
	* wctype/wcextra_l.c (__iswblank_l): Likewise.
	* wctype/wctype.c (__wctype): Likewise.  In the alternate locale
	format, return a 3-level table pointer.
	* wctype/wctype_l.c (__wctype_l): Likewise.
	* wctype/wctrans.c (wctrans): Likewise.
	* wctype/wctype.h (__ISwupper, __ISwlower, __ISwalpha, __ISwdigit,
	__ISwxdigit, __ISwspace, __ISwprint, __ISwgraph, __ISwblank,
	__ISwcntrl, __ISwpunct, __ISwalnum): New enum values.
	(iswctype): Remove macro definition.
	* wcsmbs/wcwidth.h: Include "wchar-lookup.h".
	(internal_wcwidth): Support alternate locale format with 3-level
	tables.
	* locale/langinfo.h (_NL_CTYPE_CLASS_OFFSET, _NL_CTYPE_MAP_OFFSET):
	New nl_items.
	* locale/categories.def (_NL_CTYPE_CLASS_OFFSET, _NL_CTYPE_MAP_OFFSET):
	Define them as being type "word".
	* locale/C-ctype.c (_nl_C_LC_CTYPE): Add initializers for them.
	* ctype/ctype-info.c (__ctype32_wctype, __ctype32_wctrans,
	__ctype32_width): New exported variables.
	* locale/lc-ctype.c (_nl_postload_ctype): Initialize them in the
	alternate locale format. Don't initialize __ctype_names and
	__ctype_width in the alternate locale format.
	* locale/programs/localedef.h (oldstyle_tables): New declaration.
	* locale/programs/localedef.c (oldstyle_tables): New variable.
	(OPT_OLDSTYLE): New macro.
	(options): Add --old-style option.
	(parse_opt): Handle --old-style option.
	* locale/programs/ld-ctype.c (locale_ctype_t): Add class_offset,
	map_offset, class_3level, map_3level, width_3level members.
	(ctype_output): Support for alternate locale format: Computation of
	nelems changes. _NL_CTYPE_TOUPPER32, _NL_CTYPE_TOLOWER32 and
	_NL_CTYPE_CLASS32 only 256 characters. _NL_CTYPE_NAMES empty.
	New fields _NL_CTYPE_CLASS_OFFSET, _NL_CTYPE_MAP_OFFSET. Field
	_NL_CTYPE_WIDTH now contains the three-level table. Extra elems
	now contain both class and map tables.
	(struct wctype_table): New type.
	(wctype_table_init, wctype_table_add, wctype_table_finalize): New
	functions.
	(struct wcwidth_table): New type.
	(wcwidth_table_init, wcwidth_table_add, wcwidth_table_finalize): New
	functions.
	(struct wctrans_table): New type.
	(wctrans_table_init, wctrans_table_add, wctrans_table_finalize): New
	functions.
	(allocate_arrays): Support for alternate locale format: Set
	plane_size and plane_cnt to 0. Restrict ctype->ctype32_b to the first
	256 characters. Compute ctype->class_3level. Restrict ctype->map32[idx]
	to the first 256 characters. Compute ctype->map_3level. Set
	ctype->class_offset and ctype->map_offset. Compute ctype->width_3level
	instead of ctype->width.
This commit is contained in:
Ulrich Drepper 2000-07-25 04:37:22 +00:00
parent f79d99cb26
commit ef44614405
26 changed files with 1759 additions and 334 deletions

View File

@ -1,3 +1,74 @@
2000-07-23 Bruno Haible <haible@clisp.cons.org>
* wctype/wchar-lookup.h: New file.
* wctype/iswctype.c: Include "wchar-lookup.h".
(__iswctype): Support alternate locale format with 3-level tables.
* wctype/iswctype_l.c (__iswctype_l): Likewise.
* wctype/towctrans.c (__towctrans): Likewise.
* wctype/towctrans_l.c (__towctrans_l): Likewise.
* wctype/wcfuncs.c: Include "wchar-lookup.h".
(__ctype32_wctype, __ctype32_wctrans): Declare external.
(__iswalnum, __iswalpha, __iswcntrl, __iswdigit, __iswlower,
__iswgraph, __iswprint, __iswpunct, __iswspace, __iswupper,
__iswxdigit, towlower, towupper): Support alternate locale format
with 3-level tables.
* wctype/wcextra.c (iswblank): Likewise.
* wctype/wcfuncs_l.c: Include "wchar-lookup.h".
(__iswalnum_l, __iswalpha_l, __iswcntrl_l, __iswdigit_l, __iswlower_l,
__iswgraph_l, __iswprint_l, __iswpunct_l, __iswspace_l, __iswupper_l,
__iswxdigit_l, __towlower_l, __towupper_l): Support alternate locale
format with 3-level tables.
* wctype/wcextra_l.c (__iswblank_l): Likewise.
* wctype/wctype.c (__wctype): Likewise. In the alternate locale
format, return a 3-level table pointer.
* wctype/wctype_l.c (__wctype_l): Likewise.
* wctype/wctrans.c (wctrans): Likewise.
* wctype/wctype.h (__ISwupper, __ISwlower, __ISwalpha, __ISwdigit,
__ISwxdigit, __ISwspace, __ISwprint, __ISwgraph, __ISwblank,
__ISwcntrl, __ISwpunct, __ISwalnum): New enum values.
(iswctype): Remove macro definition.
* wcsmbs/wcwidth.h: Include "wchar-lookup.h".
(internal_wcwidth): Support alternate locale format with 3-level
tables.
* locale/langinfo.h (_NL_CTYPE_CLASS_OFFSET, _NL_CTYPE_MAP_OFFSET):
New nl_items.
* locale/categories.def (_NL_CTYPE_CLASS_OFFSET, _NL_CTYPE_MAP_OFFSET):
Define them as being type "word".
* locale/C-ctype.c (_nl_C_LC_CTYPE): Add initializers for them.
* ctype/ctype-info.c (__ctype32_wctype, __ctype32_wctrans,
__ctype32_width): New exported variables.
* locale/lc-ctype.c (_nl_postload_ctype): Initialize them in the
alternate locale format. Don't initialize __ctype_names and
__ctype_width in the alternate locale format.
* locale/programs/localedef.h (oldstyle_tables): New declaration.
* locale/programs/localedef.c (oldstyle_tables): New variable.
(OPT_OLDSTYLE): New macro.
(options): Add --old-style option.
(parse_opt): Handle --old-style option.
* locale/programs/ld-ctype.c (locale_ctype_t): Add class_offset,
map_offset, class_3level, map_3level, width_3level members.
(ctype_output): Support for alternate locale format: Computation of
nelems changes. _NL_CTYPE_TOUPPER32, _NL_CTYPE_TOLOWER32 and
_NL_CTYPE_CLASS32 only 256 characters. _NL_CTYPE_NAMES empty.
New fields _NL_CTYPE_CLASS_OFFSET, _NL_CTYPE_MAP_OFFSET. Field
_NL_CTYPE_WIDTH now contains the three-level table. Extra elems
now contain both class and map tables.
(struct wctype_table): New type.
(wctype_table_init, wctype_table_add, wctype_table_finalize): New
functions.
(struct wcwidth_table): New type.
(wcwidth_table_init, wcwidth_table_add, wcwidth_table_finalize): New
functions.
(struct wctrans_table): New type.
(wctrans_table_init, wctrans_table_add, wctrans_table_finalize): New
functions.
(allocate_arrays): Support for alternate locale format: Set
plane_size and plane_cnt to 0. Restrict ctype->ctype32_b to the first
256 characters. Compute ctype->class_3level. Restrict ctype->map32[idx]
to the first 256 characters. Compute ctype->map_3level. Set
ctype->class_offset and ctype->map_offset. Compute ctype->width_3level
instead of ctype->width.
2000-07-24 Ulrich Drepper <drepper@redhat.com>
* libio/iogetwline.c (_IO_getwline_info): Use wide character

View File

@ -37,3 +37,6 @@ const __uint32_t *__ctype32_tolower = b (__uint32_t, tolower, 128);
const __uint32_t *__ctype32_toupper = b (__uint32_t, toupper, 128);
const __uint32_t *__ctype_names = b (__uint32_t, names, 0);
const unsigned char *__ctype_width = b (unsigned char, width, 0);
const char *__ctype32_wctype[12];
const char *__ctype32_wctrans[2];
const char *__ctype32_width;

View File

@ -380,6 +380,8 @@ const struct locale_data _nl_C_LC_CTYPE =
{ string: "ANSI_X3.4-1968" },
{ string: (const char *) &_nl_C_LC_CTYPE_toupper[128] },
{ string: (const char *) &_nl_C_LC_CTYPE_tolower[128] },
{ word: 0 },
{ word: 0 },
{ word: 1 },
{ string: "0" },
{ string: "1" },

View File

@ -84,6 +84,8 @@ DEFINE_CATEGORY
DEFINE_ELEMENT (_NL_CTYPE_CODESET_NAME, "charmap", std, string)
DEFINE_ELEMENT (_NL_CTYPE_TOUPPER32, "ctype-toupper32", std, string)
DEFINE_ELEMENT (_NL_CTYPE_TOLOWER32, "ctype-tolower32", std, string)
DEFINE_ELEMENT (_NL_CTYPE_CLASS_OFFSET, "ctype-class-offset", std, word)
DEFINE_ELEMENT (_NL_CTYPE_MAP_OFFSET, "ctype-map-offset", std, word)
DEFINE_ELEMENT (_NL_CTYPE_INDIGITS_MB_LEN, "ctype-indigits_mb-len", std, word)
DEFINE_ELEMENT (_NL_CTYPE_INDIGITS0_MB, "ctype-indigits0_mb", std, string)
DEFINE_ELEMENT (_NL_CTYPE_INDIGITS1_MB, "ctype-indigits1_mb", std, string)

View File

@ -274,6 +274,8 @@ enum
#define CODESET CODESET
_NL_CTYPE_TOUPPER32,
_NL_CTYPE_TOLOWER32,
_NL_CTYPE_CLASS_OFFSET,
_NL_CTYPE_MAP_OFFSET,
_NL_CTYPE_INDIGITS_MB_LEN,
_NL_CTYPE_INDIGITS0_MB,
_NL_CTYPE_INDIGITS1_MB,

View File

@ -1,5 +1,5 @@
/* Define current locale data for LC_CTYPE category.
Copyright (C) 1995, 1996, 1997, 1998, 1999 Free Software Foundation, Inc.
Copyright (C) 1995-1999, 2000 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@ -45,6 +45,9 @@ _nl_postload_ctype (void)
extern const unsigned char *__ctype_width;
extern const uint32_t *__ctype32_toupper;
extern const uint32_t *__ctype32_tolower;
extern const char *__ctype32_wctype[12];
extern const char *__ctype32_wctrans[2];
extern const char *__ctype32_width;
__ctype_b = current (uint16_t, CLASS, 128);
__ctype_toupper = current (uint32_t, TOUPPER, 128);
@ -52,6 +55,27 @@ _nl_postload_ctype (void)
__ctype32_b = current (uint32_t, CLASS32, 0);
__ctype32_toupper = current (uint32_t, TOUPPER32, 0);
__ctype32_tolower = current (uint32_t, TOLOWER32, 0);
if (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_HASH_SIZE) != 0)
{
/* Old locale format. */
__ctype_names = current (uint32_t, NAMES, 0);
__ctype_width = current (unsigned char, WIDTH, 0);
}
else
{
/* New locale format. */
size_t offset, cnt;
offset = _NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_CLASS_OFFSET);
for (cnt = 0; cnt < 12; cnt++)
__ctype32_wctype[cnt] =
_nl_current_LC_CTYPE->values[offset + cnt].string;
offset = _NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_MAP_OFFSET);
for (cnt = 0; cnt < 2; cnt++)
__ctype32_wctrans[cnt] =
_nl_current_LC_CTYPE->values[offset + cnt].string;
__ctype32_width = current (char, WIDTH, 0);
}
}

File diff suppressed because it is too large Load Diff

View File

@ -55,6 +55,9 @@ int verbose;
/* If not zero suppress warnings and information messages. */
int be_quiet;
/* If not zero, produce old-style hash table instead of 3-level access tables. */
int oldstyle_tables;
/* If not zero force output even if warning were issued. */
static int force_output;
@ -77,6 +80,7 @@ void (*argp_program_version_hook) (FILE *, struct argp_state *) = print_version;
#define OPT_POSIX 1
#define OPT_QUIET 2
#define OPT_OLDSTYLE 3
/* Definitions of arguments for argp functions. */
static const struct argp_option options[] =
@ -91,6 +95,7 @@ static const struct argp_option options[] =
{ NULL, 0, NULL, 0, N_("Output control:") },
{ "force", 'c', NULL, 0,
N_("Create output even if warning messages were issued") },
{ "old-style", OPT_OLDSTYLE, NULL, 0, N_("Create old-style tables") },
{ "posix", OPT_POSIX, NULL, 0, N_("Be strictly POSIX conform") },
{ "quiet", OPT_QUIET, NULL, 0,
N_("Suppress warnings and information messages") },
@ -242,6 +247,9 @@ parse_opt (int key, char *arg, struct argp_state *state)
case OPT_POSIX:
posix_conformance = 1;
break;
case OPT_OLDSTYLE:
oldstyle_tables = 1;
break;
case 'c':
force_output = 1;
break;

View File

@ -111,6 +111,7 @@ struct localedef_t
/* Global variables of the localedef program. */
extern int verbose;
extern int be_quiet;
extern int oldstyle_tables;
extern const char *repertoire_global;

View File

@ -1,5 +1,8 @@
2000-07-24 Ulrich Drepper <drepper@redhat.com>
* tst-wctype.input: Add more input text.
* tst-wctype.c: Test more classes.
* Makefile: Add rules to build, run, and distribute tst-langinfo.
* tst-langinfo.c: New file.
* tst-langinfo.sh: New file.

View File

@ -1,3 +1,23 @@
/* Test program for iswctype() function in ja_JP locale.
Copyright (C) 2000 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@cygnus.com>.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with the GNU C Library; see the file COPYING.LIB. If not,
write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA. */
#include <error.h>
#include <locale.h>
#include <stdio.h>
@ -36,5 +56,89 @@ main (void)
}
}
wct = wctype ("jkata");
if (wct == 0)
error (EXIT_FAILURE, 0, "jkata: no such character class");
if (fgetws (buf, sizeof (buf) / sizeof (buf[0]), stdin) != NULL)
{
int n;
wprintf (L"buf[] = \"%ls\"\n", buf);
result = 0;
for (n = 0; buf[n] != L'\0'; ++n)
{
wprintf (L"jkata(U%04lx = %lc) = %d\n", (long) buf[n], buf[n],
iswctype (buf[n], wct));
result |= ((buf[n] < 0xff && iswctype (buf[n], wct))
|| (buf[n] > 0xff && !iswctype (buf[n], wct)));
}
}
wct = wctype ("jdigit");
if (wct == 0)
error (EXIT_FAILURE, 0, "jdigit: no such character class");
if (fgetws (buf, sizeof (buf) / sizeof (buf[0]), stdin) != NULL)
{
int n;
wprintf (L"buf[] = \"%ls\"\n", buf);
result = 0;
for (n = 0; buf[n] != L'\0'; ++n)
{
wprintf (L"jdigit(U%04lx = %lc) = %d\n", (long) buf[n], buf[n],
iswctype (buf[n], wct));
result |= ((buf[n] < 0xff && iswctype (buf[n], wct))
|| (buf[n] > 0xff && !iswctype (buf[n], wct)));
}
}
wct = wctype ("jspace");
if (wct == 0)
error (EXIT_FAILURE, 0, "jspace: no such character class");
if (fgetws (buf, sizeof (buf) / sizeof (buf[0]), stdin) != NULL)
{
int n;
wprintf (L"buf[] = \"%ls\"\n", buf);
result = 0;
for (n = 0; buf[n] != L'\0'; ++n)
{
wprintf (L"jspace(U%04lx = %lc) = %d\n", (long) buf[n], buf[n],
iswctype (buf[n], wct));
result |= ((buf[n] < 0xff && iswctype (buf[n], wct))
|| (buf[n] > 0xff && !iswctype (buf[n], wct)));
}
}
wct = wctype ("jkanji");
if (wct == 0)
error (EXIT_FAILURE, 0, "jkanji: no such character class");
if (fgetws (buf, sizeof (buf) / sizeof (buf[0]), stdin) != NULL)
{
int n;
wprintf (L"buf[] = \"%ls\"\n", buf);
result = 0;
for (n = 0; buf[n] != L'\0'; ++n)
{
wprintf (L"jkanji(U%04lx = %lc) = %d\n", (long) buf[n], buf[n],
iswctype (buf[n], wct));
result |= ((buf[n] < 0xff && iswctype (buf[n], wct))
|| (buf[n] > 0xff && !iswctype (buf[n], wct)));
}
}
return result;
}

View File

@ -1 +1,5 @@
ぁあぃいぅうぇえぉおかがきぎくぐけげこごさざしじすずabcdefghjklmnoprrstuvwxyz
ァアィイゥウェエォオカガキギクグケゲコゴサザシジスズabcdefghjklmnoprrstuvwxyz
0123456789
  a
亜唖娃阿哀愛挨姶逢葵茜穐悪握渥旭葦芦鯵梓圧斡扱宛姐虻abcdefghjklmnoprrstuvwxyz

View File

@ -21,27 +21,47 @@
#include <wchar.h>
#include <wctype.h>
#include "../wctype/cname-lookup.h"
#include "../wctype/wchar-lookup.h"
/* Array containing width information. */
/* Tables containing character property information. */
extern const char *__ctype32_wctype[12];
/* Tables containing width information. */
extern unsigned char *__ctype_width;
extern const char *__ctype32_width;
/* If the program is compiled without optimization the following declaration
is not visible in the header. */
extern unsigned int *__ctype32_b;
static __inline int
internal_wcwidth (wint_t ch)
internal_wcwidth (wint_t wc)
{
if (wc == L'\0')
return 0;
if (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_HASH_SIZE) != 0)
{
/* Old locale format. */
size_t idx;
unsigned char res;
if (ch == L'\0')
return 0;
idx = cname_lookup (ch);
idx = cname_lookup (wc);
if (idx == ~((size_t) 0) || (__ctype32_b[idx] & _ISwprint) == 0)
return -1;
res = __ctype_width[idx];
return res == (unsigned char) '\xff' ? -1 : (int) res;
}
else
{
/* New locale format. */
unsigned char res;
if (wctype_table_lookup (__ctype32_wctype[__ISwprint], wc) == 0)
return -1;
res = wcwidth_table_lookup (__ctype32_width, wc);
return res == (unsigned char) '\xff' ? -1 : (int) res;
}
}

View File

@ -1,4 +1,4 @@
/* Copyright (C) 1996 Free Software Foundation, Inc.
/* Copyright (C) 1996, 2000 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>.
@ -21,6 +21,7 @@
#include <wctype.h>
#include "cname-lookup.h"
#include "wchar-lookup.h"
extern unsigned int *__ctype32_b;
@ -29,6 +30,9 @@ extern unsigned int *__ctype32_b;
int
__iswctype (wint_t wc, wctype_t desc)
{
if (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_HASH_SIZE) != 0)
{
/* Old locale format. */
size_t idx;
idx = cname_lookup (wc);
@ -36,5 +40,16 @@ __iswctype (wint_t wc, wctype_t desc)
return 0;
return __ctype32_b[idx] & desc;
}
else
{
/* If the user passes in an invalid DESC valid (the one returned from
`wctype' in case of an error) simply return 0. */
if (desc == (wctype_t) 0)
return 0;
/* New locale format. */
return wctype_table_lookup ((const char *) desc, wc);
}
}
weak_alias (__iswctype, iswctype)

View File

@ -22,11 +22,15 @@
#define USE_IN_EXTENDED_LOCALE_MODEL 1
#include "cname-lookup.h"
#include "wchar-lookup.h"
int
__iswctype_l (wint_t wc, wctype_t desc, __locale_t locale)
{
if (locale->__locales[LC_CTYPE]->values[_NL_ITEM_INDEX (_NL_CTYPE_HASH_SIZE)].word != 0)
{
/* Old locale format. */
const uint32_t *class32_b;
size_t idx;
@ -38,4 +42,15 @@ __iswctype_l (wint_t wc, wctype_t desc, __locale_t locale)
locale->__locales[LC_CTYPE]->values[_NL_ITEM_INDEX (_NL_CTYPE_CLASS32)].string;
return class32_b[idx] & desc;
}
else
{
/* If the user passes in an invalid DESC valid (the one returned from
`__wctype_l' in case of an error) simply return 0. */
if (desc == (wctype_t) 0)
return 0;
/* New locale format. */
return wctype_table_lookup ((const char *) desc, wc);
}
}

View File

@ -21,22 +21,32 @@
/* Define the lookup function. */
#include "cname-lookup.h"
#include "wchar-lookup.h"
wint_t
__towctrans (wint_t wc, wctrans_t desc)
{
size_t idx;
/* If the user passes in an invalid DESC valid (the one returned from
`wctrans' in case of an error) simply return the value. */
if (desc == (wctrans_t) 0)
return wc;
if (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_HASH_SIZE) != 0)
{
/* Old locale format. */
size_t idx;
idx = cname_lookup (wc);
if (idx == ~((size_t) 0))
/* Character is not known. Default action is to simply return it. */
return wc;
return (wint_t) desc[idx];
}
else
{
/* New locale format. */
return wctrans_table_lookup ((const char *) desc, wc);
}
}
weak_alias (__towctrans, towctrans)

View File

@ -1,5 +1,5 @@
/* Map wide character using given mapping and locale.
Copyright (C) 1996, 1997 Free Software Foundation, Inc.
Copyright (C) 1996, 1997, 2000 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@ -22,10 +22,19 @@
/* Define the lookup function. */
#define USE_IN_EXTENDED_LOCALE_MODEL 1
#include "cname-lookup.h"
#include "wchar-lookup.h"
wint_t
__towctrans_l (wint_t wc, wctrans_t desc, __locale_t locale)
{
/* If the user passes in an invalid DESC valid (the one returned from
`__wctrans_l' in case of an error) simply return the value. */
if (desc == (wctrans_t) 0)
return wc;
if (locale->__locales[LC_CTYPE]->values[_NL_ITEM_INDEX (_NL_CTYPE_HASH_SIZE)].word != 0)
{
/* Old locale format. */
size_t idx;
idx = cname_lookup (wc, locale);
@ -34,4 +43,10 @@ __towctrans_l (wint_t wc, wctrans_t desc, __locale_t locale)
return wc;
return (wint_t) desc[idx];
}
else
{
/* New locale format. */
return wctrans_table_lookup ((const char *) desc, wc);
}
}

View File

@ -1,5 +1,5 @@
/* Additional non standardized wide character classification functions.
Copyright (C) 1997, 1999 Free Software Foundation, Inc.
Copyright (C) 1997, 1999, 2000 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
@ -23,14 +23,21 @@
#include <wctype.h>
#include "cname-lookup.h"
#include "wchar-lookup.h"
/* If the program is compiled without optimization the following declaration
is not visible in the header. */
extern unsigned int *__ctype32_b;
/* This is not exported. */
extern const char *__ctype32_wctype[12];
int
(iswblank) (wint_t wc)
{
if (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_HASH_SIZE) != 0)
{
/* Old locale format. */
size_t idx;
idx = cname_lookup (wc);
@ -38,4 +45,10 @@ int
return 0;
return __ctype32_b[idx] & _ISwblank;
}
else
{
/* New locale format. */
return wctype_table_lookup (__ctype32_wctype[__ISwblank], wc);
}
}

View File

@ -1,5 +1,5 @@
/* Additional non standardized wide character classification functions.
Copyright (C) 1997, 1999 Free Software Foundation, Inc.
Copyright (C) 1997, 1999, 2000 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
@ -24,12 +24,16 @@
#define USE_IN_EXTENDED_LOCALE_MODEL 1
#include "cname-lookup.h"
#include "wchar-lookup.h"
int
(__iswblank_l) (wint_t wc, __locale_t locale)
{
const unsigned int *class32_b;
if (locale->__locales[LC_CTYPE]->values[_NL_ITEM_INDEX (_NL_CTYPE_HASH_SIZE)].word != 0)
{
/* Old locale format. */
const uint32_t *class32_b;
size_t idx;
idx = cname_lookup (wc, locale);
@ -39,5 +43,13 @@ int
class32_b = (uint32_t *)
locale->__locales[LC_CTYPE]->values[_NL_ITEM_INDEX (_NL_CTYPE_CLASS32)].string;
return class32_b[idx] & _ISwblank;
return class32_b[idx] & _ISwbit (__ISwblank);
}
else
{
/* New locale format. */
size_t i = locale->__locales[LC_CTYPE]->values[_NL_ITEM_INDEX (_NL_CTYPE_CLASS_OFFSET)].word + __ISwblank;
const char *desc = locale->__locales[LC_CTYPE]->values[i].string;
return wctype_table_lookup (desc, wc);
}
}

View File

@ -21,6 +21,7 @@
#include <ctype.h> /* For __ctype_tolower and __ctype_toupper. */
#include "cname-lookup.h"
#include "wchar-lookup.h"
/* If the program is compiled without optimization the following declaration
is not visible in the header. */
@ -29,6 +30,8 @@ extern unsigned int *__ctype32_b;
/* These are not exported. */
extern const uint32_t *__ctype32_toupper;
extern const uint32_t *__ctype32_tolower;
extern const char *__ctype32_wctype[12];
extern const char *__ctype32_wctrans[2];
/* Provide real-function versions of all the wctype macros. */
@ -36,43 +39,55 @@ extern const uint32_t *__ctype32_tolower;
int \
__##name (wint_t wc) \
{ \
if (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_HASH_SIZE) != 0) \
{ \
/* Old locale format. */ \
size_t idx; \
\
idx = cname_lookup (wc); \
if (idx == ~((size_t) 0)) \
return 0; \
\
return __ctype32_b[idx] & type; \
return __ctype32_b[idx] & _ISwbit (type); \
} \
else \
{ \
/* New locale format. */ \
return wctype_table_lookup (__ctype32_wctype[type], wc); \
} \
} \
weak_alias (__##name, name)
#undef iswalnum
func (iswalnum, _ISwalnum)
func (iswalnum, __ISwalnum)
#undef iswalpha
func (iswalpha, _ISwalpha)
func (iswalpha, __ISwalpha)
#undef iswcntrl
func (iswcntrl, _ISwcntrl)
func (iswcntrl, __ISwcntrl)
#undef iswdigit
func (iswdigit, _ISwdigit)
func (iswdigit, __ISwdigit)
#undef iswlower
func (iswlower, _ISwlower)
func (iswlower, __ISwlower)
#undef iswgraph
func (iswgraph, _ISwgraph)
func (iswgraph, __ISwgraph)
#undef iswprint
func (iswprint, _ISwprint)
func (iswprint, __ISwprint)
#undef iswpunct
func (iswpunct, _ISwpunct)
func (iswpunct, __ISwpunct)
#undef iswspace
func (iswspace, _ISwspace)
func (iswspace, __ISwspace)
#undef iswupper
func (iswupper, _ISwupper)
func (iswupper, __ISwupper)
#undef iswxdigit
func (iswxdigit, _ISwxdigit)
func (iswxdigit, __ISwxdigit)
wint_t
(towlower) (wc)
wint_t wc;
{
if (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_HASH_SIZE) != 0)
{
/* Old locale format. */
size_t idx;
idx = cname_lookup (wc);
@ -81,12 +96,21 @@ wint_t
return wc;
return (wint_t) __ctype32_tolower[idx];
}
else
{
/* New locale format. */
return wctrans_table_lookup (__ctype32_wctrans[1], wc);
}
}
wint_t
(towupper) (wc)
wint_t wc;
{
if (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_HASH_SIZE) != 0)
{
/* Old locale format. */
size_t idx;
idx = cname_lookup (wc);
@ -95,4 +119,10 @@ wint_t
return wc;
return (wint_t) __ctype32_toupper[idx];
}
else
{
/* New locale format. */
return wctrans_table_lookup (__ctype32_wctrans[0], wc);
}
}

View File

@ -22,28 +22,55 @@
#define USE_IN_EXTENDED_LOCALE_MODEL
#include "cname-lookup.h"
#include "wchar-lookup.h"
/* Provide real-function versions of all the wctype macros. */
#define func(name, type) \
int name (wint_t wc, __locale_t locale) \
{ return __iswctype_l (wc, type, locale); }
{ \
if (locale->__locales[LC_CTYPE]->values[_NL_ITEM_INDEX (_NL_CTYPE_HASH_SIZE)].word != 0) \
{ \
/* Old locale format. */ \
const uint32_t *class32_b; \
size_t idx; \
\
idx = cname_lookup (wc, locale); \
if (idx == ~((size_t) 0)) \
return 0; \
\
class32_b = (uint32_t *) \
locale->__locales[LC_CTYPE]->values[_NL_ITEM_INDEX (_NL_CTYPE_CLASS32)].string; \
\
return class32_b[idx] & _ISwbit (type); \
} \
else \
{ \
/* New locale format. */ \
size_t i = locale->__locales[LC_CTYPE]->values[_NL_ITEM_INDEX (_NL_CTYPE_CLASS_OFFSET)].word + type; \
const char *desc = locale->__locales[LC_CTYPE]->values[i].string; \
return wctype_table_lookup (desc, wc); \
} \
}
func (__iswalnum_l, _ISwalnum)
func (__iswalpha_l, _ISwalpha)
func (__iswcntrl_l, _ISwcntrl)
func (__iswdigit_l, _ISwdigit)
func (__iswlower_l, _ISwlower)
func (__iswgraph_l, _ISwgraph)
func (__iswprint_l, _ISwprint)
func (__iswpunct_l, _ISwpunct)
func (__iswspace_l, _ISwspace)
func (__iswupper_l, _ISwupper)
func (__iswxdigit_l, _ISwxdigit)
func (__iswalnum_l, __ISwalnum)
func (__iswalpha_l, __ISwalpha)
func (__iswcntrl_l, __ISwcntrl)
func (__iswdigit_l, __ISwdigit)
func (__iswlower_l, __ISwlower)
func (__iswgraph_l, __ISwgraph)
func (__iswprint_l, __ISwprint)
func (__iswpunct_l, __ISwpunct)
func (__iswspace_l, __ISwspace)
func (__iswupper_l, __ISwupper)
func (__iswxdigit_l, __ISwxdigit)
wint_t
(__towlower_l) (wint_t wc, __locale_t locale)
{
if (locale->__locales[LC_CTYPE]->values[_NL_ITEM_INDEX (_NL_CTYPE_HASH_SIZE)].word != 0)
{
/* Old locale format. */
const int32_t *class32_tolower;
size_t idx;
@ -55,11 +82,22 @@ wint_t
locale->__locales[LC_CTYPE]->values[_NL_ITEM_INDEX (_NL_CTYPE_TOLOWER32)].string;
return class32_tolower[idx];
}
else
{
/* New locale format. */
size_t i = locale->__locales[LC_CTYPE]->values[_NL_ITEM_INDEX (_NL_CTYPE_MAP_OFFSET)].word + 1;
const char *desc = locale->__locales[LC_CTYPE]->values[i].string;
return wctrans_table_lookup (desc, wc);
}
}
wint_t
(__towupper_l) (wint_t wc, __locale_t locale)
{
if (locale->__locales[LC_CTYPE]->values[_NL_ITEM_INDEX (_NL_CTYPE_HASH_SIZE)].word != 0)
{
/* Old locale format. */
const int32_t *class32_toupper;
size_t idx;
@ -71,4 +109,12 @@ wint_t
locale->__locales[LC_CTYPE]->values[_NL_ITEM_INDEX (_NL_CTYPE_TOUPPER32)].string;
return class32_toupper[idx];
}
else
{
/* New locale format. */
size_t i = locale->__locales[LC_CTYPE]->values[_NL_ITEM_INDEX (_NL_CTYPE_MAP_OFFSET)].word + 0;
const char *desc = locale->__locales[LC_CTYPE]->values[i].string;
return wctrans_table_lookup (desc, wc);
}
}

139
wctype/wchar-lookup.h Normal file
View File

@ -0,0 +1,139 @@
/* Copyright (C) 2000 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Bruno Haible <haible@clisp.cons.org>, 2000.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with the GNU C Library; see the file COPYING.LIB. If not,
write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA. */
/* Tables indexed by a wide character are compressed through the use
of a multi-level lookup. The compression effect comes from blocks
that don't need particular data and from block that can share their
data. */
/* Bit tables are accessed by cutting wc in four blocks of bits:
- the high 32-q-p bits,
- the next q bits,
- the next p bits,
- the next 5 bits.
+------------------+-----+-----+-----+
wc = + 32-q-p-5 | q | p | 5 |
+------------------+-----+-----+-----+
p and q are variable. For 16-bit Unicode it is sufficient to
choose p and q such that q+p+5 <= 16.
The table contains the following uint32_t words:
- q+p+5,
- s = upper exclusive bound for wc >> (q+p+5),
- p+5,
- 2^q-1,
- 2^p-1,
- 1st-level table: s offsets, pointing into the 2nd-level table,
- 2nd-level table: k*2^q offsets, pointing into the 3rd-level table,
- 3rd-level table: j*2^p words, each containing 32 bits of data.
*/
static __inline int
wctype_table_lookup (const char *table, uint32_t wc)
{
uint32_t shift1 = ((const uint32_t *) table)[0];
uint32_t index1 = wc >> shift1;
uint32_t bound = ((const uint32_t *) table)[1];
if (index1 < bound)
{
uint32_t lookup1 = ((const uint32_t *) table)[5 + index1];
if (lookup1 != 0)
{
uint32_t shift2 = ((const uint32_t *) table)[2];
uint32_t mask2 = ((const uint32_t *) table)[3];
uint32_t index2 = (wc >> shift2) & mask2;
uint32_t lookup2 = ((const uint32_t *)(table + lookup1))[index2];
if (lookup2 != 0)
{
uint32_t mask3 = ((const uint32_t *) table)[4];
uint32_t index3 = (wc >> 5) & mask3;
uint32_t lookup3 = ((const uint32_t *)(table + lookup2))[index3];
return (lookup3 >> (wc & 0x1f)) & 1;
}
}
}
return 0;
}
/* Byte tables are similar to bit tables, except that the addressing
unit is a single byte, and no 5 bits are used as a word index. */
static __inline int
wcwidth_table_lookup (const char *table, uint32_t wc)
{
uint32_t shift1 = ((const uint32_t *) table)[0];
uint32_t index1 = wc >> shift1;
uint32_t bound = ((const uint32_t *) table)[1];
if (index1 < bound)
{
uint32_t lookup1 = ((const uint32_t *) table)[5 + index1];
if (lookup1 != 0)
{
uint32_t shift2 = ((const uint32_t *) table)[2];
uint32_t mask2 = ((const uint32_t *) table)[3];
uint32_t index2 = (wc >> shift2) & mask2;
uint32_t lookup2 = ((const uint32_t *)(table + lookup1))[index2];
if (lookup2 != 0)
{
uint32_t mask3 = ((const uint32_t *) table)[4];
uint32_t index3 = wc & mask3;
uint8_t lookup3 = ((const uint8_t *)(table + lookup2))[index3];
return lookup3;
}
}
}
return 0xff;
}
/* Mapping tables are similar to bit tables, except that the
addressing unit is a single signed 32-bit word, containing the
difference between the desired result and the argument, and no 5
bits are used as a word index. */
static __inline uint32_t
wctrans_table_lookup (const char *table, uint32_t wc)
{
uint32_t shift1 = ((const uint32_t *) table)[0];
uint32_t index1 = wc >> shift1;
uint32_t bound = ((const uint32_t *) table)[1];
if (index1 < bound)
{
uint32_t lookup1 = ((const uint32_t *) table)[5 + index1];
if (lookup1 != 0)
{
uint32_t shift2 = ((const uint32_t *) table)[2];
uint32_t mask2 = ((const uint32_t *) table)[3];
uint32_t index2 = (wc >> shift2) & mask2;
uint32_t lookup2 = ((const uint32_t *)(table + lookup1))[index2];
if (lookup2 != 0)
{
uint32_t mask3 = ((const uint32_t *) table)[4];
uint32_t index3 = wc & mask3;
int32_t lookup3 = ((const int32_t *)(table + lookup2))[index3];
return wc + lookup3;
}
}
}
return wc;
}

View File

@ -32,7 +32,6 @@ wctrans (const char *property)
{
const char *names;
size_t cnt;
int32_t *result;
names = _NL_CURRENT (LC_CTYPE, _NL_CTYPE_MAP_NAMES);
cnt = 0;
@ -48,13 +47,21 @@ wctrans (const char *property)
if (names[0] == '\0')
return 0;
if (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_HASH_SIZE) != 0)
{
/* Old locale format. */
if (cnt == 0)
return (wctrans_t) __ctype32_toupper;
else if (cnt == 1)
return (wctrans_t) __ctype32_tolower;
/* We have to search the table. */
result = (int32_t *) _NL_CURRENT (LC_CTYPE, _NL_NUM_LC_CTYPE + cnt - 2);
return (wctrans_t) result;
return (wctrans_t) (const int32_t *) _NL_CURRENT (LC_CTYPE, _NL_NUM_LC_CTYPE + cnt - 2);
}
else
{
/* New locale format. */
size_t i = _NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_MAP_OFFSET) + cnt;
return (wctrans_t) _nl_current_LC_CTYPE->values[i].string;
}
}

View File

@ -1,4 +1,4 @@
/* Copyright (C) 1996, 1997, 1998 Free Software Foundation, Inc.
/* Copyright (C) 1996, 1997, 1998, 2000 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1996.
@ -26,11 +26,11 @@ wctype_t
__wctype (const char *property)
{
const char *names;
wctype_t result;
unsigned int result;
size_t proplen = strlen (property);
names = _NL_CURRENT (LC_CTYPE, _NL_CTYPE_CLASS_NAMES);
for (result = 1; result != 0; result <<= 1)
for (result = 0; ; result++)
{
size_t nameslen = strlen (names);
@ -42,13 +42,22 @@ __wctype (const char *property)
return 0;
}
if (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_HASH_SIZE) != 0)
{
/* Old locale format. */
#if __BYTE_ORDER == __BIG_ENDIAN
return result;
return 1 << result;
#else
# define SWAPU32(w) \
(((w) << 24) | (((w) & 0xff00) << 8) | (((w) >> 8) & 0xff00) | ((w) >> 24))
return SWAPU32 (result);
return 1 << (result ^ 0x18); /* = SWAPU32 (1 << result); */
#endif
}
else
{
/* New locale format. */
size_t i = _NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_CLASS_OFFSET) + result;
return (wctype_t) _nl_current_LC_CTYPE->values[i].string;
}
}
weak_alias (__wctype, wctype)

View File

@ -78,18 +78,31 @@ typedef unsigned long int wctype_t;
enum
{
_ISwupper = _ISwbit (0), /* UPPERCASE. */
_ISwlower = _ISwbit (1), /* lowercase. */
_ISwalpha = _ISwbit (2), /* Alphabetic. */
_ISwdigit = _ISwbit (3), /* Numeric. */
_ISwxdigit = _ISwbit (4), /* Hexadecimal numeric. */
_ISwspace = _ISwbit (5), /* Whitespace. */
_ISwprint = _ISwbit (6), /* Printing. */
_ISwgraph = _ISwbit (7), /* Graphical. */
_ISwblank = _ISwbit (8), /* Blank (usually SPC and TAB). */
_ISwcntrl = _ISwbit (9), /* Control character. */
_ISwpunct = _ISwbit (10), /* Punctuation. */
_ISwalnum = _ISwbit (11) /* Alphanumeric. */
__ISwupper = 0, /* UPPERCASE. */
__ISwlower = 1, /* lowercase. */
__ISwalpha = 2, /* Alphabetic. */
__ISwdigit = 3, /* Numeric. */
__ISwxdigit = 4, /* Hexadecimal numeric. */
__ISwspace = 5, /* Whitespace. */
__ISwprint = 6, /* Printing. */
__ISwgraph = 7, /* Graphical. */
__ISwblank = 8, /* Blank (usually SPC and TAB). */
__ISwcntrl = 9, /* Control character. */
__ISwpunct = 10, /* Punctuation. */
__ISwalnum = 11, /* Alphanumeric. */
_ISwupper = _ISwbit (__ISwupper), /* UPPERCASE. */
_ISwlower = _ISwbit (__ISwlower), /* lowercase. */
_ISwalpha = _ISwbit (__ISwalpha), /* Alphabetic. */
_ISwdigit = _ISwbit (__ISwdigit), /* Numeric. */
_ISwxdigit = _ISwbit (__ISwxdigit), /* Hexadecimal numeric. */
_ISwspace = _ISwbit (__ISwspace), /* Whitespace. */
_ISwprint = _ISwbit (__ISwprint), /* Printing. */
_ISwgraph = _ISwbit (__ISwgraph), /* Graphical. */
_ISwblank = _ISwbit (__ISwblank), /* Blank (usually SPC and TAB). */
_ISwcntrl = _ISwbit (__ISwcntrl), /* Control character. */
_ISwpunct = _ISwbit (__ISwpunct), /* Punctuation. */
_ISwalnum = _ISwbit (__ISwalnum) /* Alphanumeric. */
};
# endif /* Not _ISwbit */
@ -227,11 +240,6 @@ extern unsigned int *__ctype32_b;
? (int) (__ctype32_b[(wint_t) (wc)] & _ISwblank) : iswblank (wc)))
# endif
# define iswctype(wc, desc) \
(__extension__ \
(__builtin_constant_p (wc) && (wint_t) (wc) <= L'\xff' \
? (int) (__ctype32_b[(wint_t) (wc)] & desc) : iswctype (wc, desc)))
#endif /* gcc && optimizing */
/*

View File

@ -1,4 +1,4 @@
/* Copyright (C) 1996, 1997 Free Software Foundation, Inc.
/* Copyright (C) 1996, 1997, 2000 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1996.
@ -26,11 +26,11 @@ wctype_t
__wctype_l (const char *property, __locale_t locale)
{
const char *names;
wctype_t result;
unsigned int result;
size_t proplen = strlen (property);
names = locale->__locales[LC_CTYPE]->values[_NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES)].string;
for (result = 1; result != 0; result <<= 1)
for (result = 0; ; result++)
{
size_t nameslen = strlen (names);
@ -42,12 +42,21 @@ __wctype_l (const char *property, __locale_t locale)
return 0;
}
if (locale->__locales[LC_CTYPE]->values[_NL_ITEM_INDEX (_NL_CTYPE_HASH_SIZE)].word == 0)
{
/* Old locale format. */
#if __BYTE_ORDER == __BIG_ENDIAN
return result;
return 1 << result;
#else
# define SWAPU32(w) \
(((w) << 24) | (((w) & 0xff00) << 8) | (((w) >> 8) & 0xff00) | ((w) >> 24))
return SWAPU32 (result);
return 1 << (result ^ 0x18); /* = SWAPU32 (1 << result); */
#endif
}
else
{
/* New locale format. */
size_t i = locale->__locales[LC_CTYPE]->values[_NL_ITEM_INDEX (_NL_CTYPE_CLASS_OFFSET)].word + result;
return (wctype_t) locale->__locales[LC_CTYPE]->values[i].string;
}
}