mirror of
https://sourceware.org/git/glibc.git
synced 2024-12-22 19:00:07 +00:00
Update.
* posix/regex_internal.h: Add forward declaration of re_dfa_t. Replace last two parameters of re_string_allocate and re_string_construct with pointer to DFA. (re_dfa_t): Add map_notascii field. * posix/regcomp.c (re_compile_internal): Add call of re_string_construct. (init_dfa): Initialize mpa_notascii. * posix/regex_internal.c: Adjust definitions of re_string_allocate and re_string_construct. Pass DFA to re_string_construct. Adjust definition. Initialize map_notascii field. (build_wcs_upper_buffer): If map_notascii is zero use simplfied method to map ASCII values to upper case. * posix/regex.c: Include localeinfo.h. * posix/regexec.c: Adjust call of re_string_allocate. * locale/langinfo.h: Add _NL_CTYPE_MAP_TO_NONASCII. * locale/localeinfo.h (LIMAGIC): Change value. * locale/categories.def. Add entry for _NL_CTYPE_MAP_TO_NONASCII. * locale/C-ctype.h: Likewise. * locale/programs/ld-ctype.c: Compute whether any mapping maps from ASCII to non-ASCII value. Write out that value.
This commit is contained in:
parent
2def87644d
commit
f0c7c524bb
23
ChangeLog
23
ChangeLog
@ -1,5 +1,28 @@
|
|||||||
2003-11-15 Ulrich Drepper <drepper@redhat.com>
|
2003-11-15 Ulrich Drepper <drepper@redhat.com>
|
||||||
|
|
||||||
|
* posix/regex_internal.h: Add forward declaration of re_dfa_t.
|
||||||
|
Replace last two parameters of re_string_allocate and
|
||||||
|
re_string_construct with pointer to DFA.
|
||||||
|
(re_dfa_t): Add map_notascii field.
|
||||||
|
* posix/regcomp.c (re_compile_internal): Add call of
|
||||||
|
re_string_construct.
|
||||||
|
(init_dfa): Initialize mpa_notascii.
|
||||||
|
* posix/regex_internal.c: Adjust definitions of re_string_allocate
|
||||||
|
and re_string_construct.
|
||||||
|
Pass DFA to re_string_construct. Adjust definition. Initialize
|
||||||
|
map_notascii field.
|
||||||
|
(build_wcs_upper_buffer): If map_notascii is zero use simplfied
|
||||||
|
method to map ASCII values to upper case.
|
||||||
|
* posix/regex.c: Include localeinfo.h.
|
||||||
|
* posix/regexec.c: Adjust call of re_string_allocate.
|
||||||
|
|
||||||
|
* locale/langinfo.h: Add _NL_CTYPE_MAP_TO_NONASCII.
|
||||||
|
* locale/localeinfo.h (LIMAGIC): Change value.
|
||||||
|
* locale/categories.def. Add entry for _NL_CTYPE_MAP_TO_NONASCII.
|
||||||
|
* locale/C-ctype.h: Likewise.
|
||||||
|
* locale/programs/ld-ctype.c: Compute whether any mapping maps from
|
||||||
|
ASCII to non-ASCII value. Write out that value.
|
||||||
|
|
||||||
* wcsmbs/mbsinit.c: Undef mbsinit and __mbsinit.
|
* wcsmbs/mbsinit.c: Undef mbsinit and __mbsinit.
|
||||||
* include/wchar.h: Provide inline versions of mbsinit and __mbsinit.
|
* include/wchar.h: Provide inline versions of mbsinit and __mbsinit.
|
||||||
|
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
/* Copyright (C) 1995-1999, 2000, 2001, 2002 Free Software Foundation, Inc.
|
/* Copyright (C) 1995-2002, 2003 Free Software Foundation, Inc.
|
||||||
This file is part of the GNU C Library.
|
This file is part of the GNU C Library.
|
||||||
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1995.
|
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1995.
|
||||||
|
|
||||||
@ -528,7 +528,7 @@ _nl_C_LC_CTYPE_width attribute_hidden =
|
|||||||
};
|
};
|
||||||
|
|
||||||
/* Number of fields with fixed meanings, starting at 0. */
|
/* Number of fields with fixed meanings, starting at 0. */
|
||||||
#define NR_FIXED 70
|
#define NR_FIXED 71
|
||||||
/* Number of class fields, starting at CLASS_OFFSET. */
|
/* Number of class fields, starting at CLASS_OFFSET. */
|
||||||
#define NR_CLASSES 12
|
#define NR_CLASSES 12
|
||||||
/* Number of map fields, starting at MAP_OFFSET. */
|
/* Number of map fields, starting at MAP_OFFSET. */
|
||||||
@ -665,6 +665,8 @@ const struct locale_data _nl_C_LC_CTYPE attribute_hidden =
|
|||||||
{ .word = 0 },
|
{ .word = 0 },
|
||||||
/* _NL_CTYPE_TRANSLIT_IGNORE */
|
/* _NL_CTYPE_TRANSLIT_IGNORE */
|
||||||
{ .wstr = NULL },
|
{ .wstr = NULL },
|
||||||
|
/* _NL_CTYPE_MAP_TO_NONASCII */
|
||||||
|
{ .word = 0 },
|
||||||
/* NR_CLASSES wctype_tables */
|
/* NR_CLASSES wctype_tables */
|
||||||
{ .string = (const char *) _nl_C_LC_CTYPE_class_upper.header },
|
{ .string = (const char *) _nl_C_LC_CTYPE_class_upper.header },
|
||||||
{ .string = (const char *) _nl_C_LC_CTYPE_class_lower.header },
|
{ .string = (const char *) _nl_C_LC_CTYPE_class_lower.header },
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/* Definition of all available locale categories and their items. -*- C -*-
|
/* Definition of all available locale categories and their items. -*- C -*-
|
||||||
Copyright (C) 1995-2001, 2002 Free Software Foundation, Inc.
|
Copyright (C) 1995-2001, 2002, 2003 Free Software Foundation, Inc.
|
||||||
This file is part of the GNU C Library.
|
This file is part of the GNU C Library.
|
||||||
|
|
||||||
The GNU C Library is free software; you can redistribute it and/or
|
The GNU C Library is free software; you can redistribute it and/or
|
||||||
@ -133,6 +133,7 @@ DEFINE_CATEGORY
|
|||||||
DEFINE_ELEMENT (_NL_CTYPE_TRANSLIT_DEFAULT_MISSING, "ctype-translit-default-missing", std, wstring)
|
DEFINE_ELEMENT (_NL_CTYPE_TRANSLIT_DEFAULT_MISSING, "ctype-translit-default-missing", std, wstring)
|
||||||
DEFINE_ELEMENT (_NL_CTYPE_TRANSLIT_IGNORE_LEN, "ctype-translit-ignore-len", std, word)
|
DEFINE_ELEMENT (_NL_CTYPE_TRANSLIT_IGNORE_LEN, "ctype-translit-ignore-len", std, word)
|
||||||
DEFINE_ELEMENT (_NL_CTYPE_TRANSLIT_IGNORE, "ctype-translit-ignore", std, string)
|
DEFINE_ELEMENT (_NL_CTYPE_TRANSLIT_IGNORE, "ctype-translit-ignore", std, string)
|
||||||
|
DEFINE_ELEMENT (_NL_CTYPE_MAP_TO_NONASCII, "map-to-nonascii", std, word)
|
||||||
), _nl_postload_ctype)
|
), _nl_postload_ctype)
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/* Access to locale-dependent parameters.
|
/* Access to locale-dependent parameters.
|
||||||
Copyright (C) 1995-99,2000,01,02 Free Software Foundation, Inc.
|
Copyright (C) 1995-2002, 2003 Free Software Foundation, Inc.
|
||||||
This file is part of the GNU C Library.
|
This file is part of the GNU C Library.
|
||||||
|
|
||||||
The GNU C Library is free software; you can redistribute it and/or
|
The GNU C Library is free software; you can redistribute it and/or
|
||||||
@ -334,6 +334,7 @@ enum
|
|||||||
_NL_CTYPE_TRANSLIT_DEFAULT_MISSING,
|
_NL_CTYPE_TRANSLIT_DEFAULT_MISSING,
|
||||||
_NL_CTYPE_TRANSLIT_IGNORE_LEN,
|
_NL_CTYPE_TRANSLIT_IGNORE_LEN,
|
||||||
_NL_CTYPE_TRANSLIT_IGNORE,
|
_NL_CTYPE_TRANSLIT_IGNORE,
|
||||||
|
_NL_CTYPE_MAP_TO_NONASCII,
|
||||||
_NL_CTYPE_EXTRA_MAP_1,
|
_NL_CTYPE_EXTRA_MAP_1,
|
||||||
_NL_CTYPE_EXTRA_MAP_2,
|
_NL_CTYPE_EXTRA_MAP_2,
|
||||||
_NL_CTYPE_EXTRA_MAP_3,
|
_NL_CTYPE_EXTRA_MAP_3,
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/* Declarations for internal libc locale interfaces
|
/* Declarations for internal libc locale interfaces
|
||||||
Copyright (C) 1995-2001, 2002 Free Software Foundation, Inc.
|
Copyright (C) 1995-2001, 2002, 2003 Free Software Foundation, Inc.
|
||||||
This file is part of the GNU C Library.
|
This file is part of the GNU C Library.
|
||||||
|
|
||||||
The GNU C Library is free software; you can redistribute it and/or
|
The GNU C Library is free software; you can redistribute it and/or
|
||||||
@ -31,7 +31,7 @@
|
|||||||
#include <intl/loadinfo.h> /* For loaded_l10nfile definition. */
|
#include <intl/loadinfo.h> /* For loaded_l10nfile definition. */
|
||||||
|
|
||||||
/* Magic number at the beginning of a locale data file for CATEGORY. */
|
/* Magic number at the beginning of a locale data file for CATEGORY. */
|
||||||
#define LIMAGIC(category) ((unsigned int) (0x20000828 ^ (category)))
|
#define LIMAGIC(category) ((unsigned int) (0x20031115 ^ (category)))
|
||||||
|
|
||||||
/* Two special weight constants for the collation data. */
|
/* Two special weight constants for the collation data. */
|
||||||
#define IGNORE_CHAR 2
|
#define IGNORE_CHAR 2
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
/* Copyright (C) 1995-1999, 2000, 2001, 2002 Free Software Foundation, Inc.
|
/* Copyright (C) 1995-2002, 2003 Free Software Foundation, Inc.
|
||||||
This file is part of the GNU C Library.
|
This file is part of the GNU C Library.
|
||||||
Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
|
Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
|
||||||
|
|
||||||
@ -181,6 +181,8 @@ struct locale_ctype_t
|
|||||||
const char *default_missing_file;
|
const char *default_missing_file;
|
||||||
size_t default_missing_lineno;
|
size_t default_missing_lineno;
|
||||||
|
|
||||||
|
uint32_t to_nonascii;
|
||||||
|
|
||||||
/* The arrays for the binary representation. */
|
/* The arrays for the binary representation. */
|
||||||
char_class_t *ctype_b;
|
char_class_t *ctype_b;
|
||||||
char_class32_t *ctype32_b;
|
char_class32_t *ctype32_b;
|
||||||
@ -1035,6 +1037,10 @@ ctype_output (struct localedef_t *locale, const struct charmap_t *charmap,
|
|||||||
idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
|
idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
|
||||||
|
CTYPE_DATA (_NL_CTYPE_MAP_TO_NONASCII,
|
||||||
|
&ctype->to_nonascii, sizeof (uint32_t));
|
||||||
|
|
||||||
case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN):
|
case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN):
|
||||||
iov[2 + elem + offset].iov_base = alloca (sizeof (uint32_t));
|
iov[2 + elem + offset].iov_base = alloca (sizeof (uint32_t));
|
||||||
iov[2 + elem + offset].iov_len = sizeof (uint32_t);
|
iov[2 + elem + offset].iov_len = sizeof (uint32_t);
|
||||||
@ -2706,6 +2712,14 @@ with character code range values one must use the absolute ellipsis `...'"));
|
|||||||
|
|
||||||
if (!ignore_content)
|
if (!ignore_content)
|
||||||
{
|
{
|
||||||
|
/* Check whether the mapping converts from an ASCII value
|
||||||
|
to a non-ASCII value. */
|
||||||
|
if (from_seq != NULL && from_seq->nbytes == 1
|
||||||
|
&& isascii (from_seq->bytes[0])
|
||||||
|
&& to_seq != NULL && (to_seq->nbytes != 1
|
||||||
|
|| !isascii (to_seq->bytes[0])))
|
||||||
|
ctype->to_nonascii = 1;
|
||||||
|
|
||||||
if (mapidx < 2 && from_seq != NULL && to_seq != NULL
|
if (mapidx < 2 && from_seq != NULL && to_seq != NULL
|
||||||
&& from_seq->nbytes == 1 && to_seq->nbytes == 1)
|
&& from_seq->nbytes == 1 && to_seq->nbytes == 1)
|
||||||
/* We can use this value. */
|
/* We can use this value. */
|
||||||
|
@ -1,3 +1,7 @@
|
|||||||
|
2003-11-15 Ulrich Drepper <drepper@redhat.com>
|
||||||
|
|
||||||
|
* Makefile (tst-leaks-ENV): Add LOCPATH.
|
||||||
|
|
||||||
2003-11-11 Jakub Jelinek <jakub@redhat.com>
|
2003-11-11 Jakub Jelinek <jakub@redhat.com>
|
||||||
|
|
||||||
* Makefile (LOCALES): Add tr_TR.UTF-8.
|
* Makefile (LOCALES): Add tr_TR.UTF-8.
|
||||||
|
@ -287,6 +287,7 @@ tst-setlocale-ENV = LOCPATH=$(common-objpfx)localedata LC_ALL=ja_JP.EUC-JP
|
|||||||
|
|
||||||
bug-iconv-trans-ENV = LOCPATH=$(common-objpfx)localedata
|
bug-iconv-trans-ENV = LOCPATH=$(common-objpfx)localedata
|
||||||
|
|
||||||
tst-leaks-ENV = MALLOC_TRACE=$(objpfx)tst-leaks.mtrace
|
tst-leaks-ENV = MALLOC_TRACE=$(objpfx)tst-leaks.mtrace \
|
||||||
|
LOCPATH=$(common-objpfx)localedata
|
||||||
$(objpfx)mtrace-tst-leaks: $(objpfx)tst-leaks.out
|
$(objpfx)mtrace-tst-leaks: $(objpfx)tst-leaks.out
|
||||||
$(common-objpfx)malloc/mtrace $(objpfx)tst-leaks.mtrace > $@
|
$(common-objpfx)malloc/mtrace $(objpfx)tst-leaks.mtrace > $@
|
||||||
|
@ -748,8 +748,7 @@ re_compile_internal (preg, pattern, length, syntax)
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
err = re_string_construct (®exp, pattern, length, preg->translate,
|
err = re_string_construct (®exp, pattern, length, preg->translate,
|
||||||
syntax & RE_ICASE, dfa->mb_cur_max,
|
syntax & RE_ICASE, dfa);
|
||||||
dfa->is_utf8);
|
|
||||||
if (BE (err != REG_NOERROR, 0))
|
if (BE (err != REG_NOERROR, 0))
|
||||||
{
|
{
|
||||||
re_free (dfa);
|
re_free (dfa);
|
||||||
@ -828,6 +827,8 @@ init_dfa (dfa, pat_len)
|
|||||||
if (dfa->mb_cur_max > 1
|
if (dfa->mb_cur_max > 1
|
||||||
&& strcmp (_NL_CURRENT (LC_CTYPE, _NL_CTYPE_CODESET_NAME), "UTF-8") == 0)
|
&& strcmp (_NL_CURRENT (LC_CTYPE, _NL_CTYPE_CODESET_NAME), "UTF-8") == 0)
|
||||||
dfa->is_utf8 = 1;
|
dfa->is_utf8 = 1;
|
||||||
|
dfa->map_notascii = (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_MAP_TO_NONASCII)
|
||||||
|
!= 0);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (BE (dfa->nodes == NULL || dfa->state_table == NULL
|
if (BE (dfa->nodes == NULL || dfa->state_table == NULL
|
||||||
|
@ -20,25 +20,27 @@
|
|||||||
|
|
||||||
#ifdef _LIBC
|
#ifdef _LIBC
|
||||||
/* We have to keep the namespace clean. */
|
/* We have to keep the namespace clean. */
|
||||||
# define regfree(preg) __regfree (preg)
|
# define regfree(preg) __regfree (preg)
|
||||||
# define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef)
|
# define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef)
|
||||||
# define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags)
|
# define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags)
|
||||||
# define regerror(errcode, preg, errbuf, errbuf_size) \
|
# define regerror(errcode, preg, errbuf, errbuf_size) \
|
||||||
__regerror(errcode, preg, errbuf, errbuf_size)
|
__regerror(errcode, preg, errbuf, errbuf_size)
|
||||||
# define re_set_registers(bu, re, nu, st, en) \
|
# define re_set_registers(bu, re, nu, st, en) \
|
||||||
__re_set_registers (bu, re, nu, st, en)
|
__re_set_registers (bu, re, nu, st, en)
|
||||||
# define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \
|
# define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \
|
||||||
__re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
|
__re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
|
||||||
# define re_match(bufp, string, size, pos, regs) \
|
# define re_match(bufp, string, size, pos, regs) \
|
||||||
__re_match (bufp, string, size, pos, regs)
|
__re_match (bufp, string, size, pos, regs)
|
||||||
# define re_search(bufp, string, size, startpos, range, regs) \
|
# define re_search(bufp, string, size, startpos, range, regs) \
|
||||||
__re_search (bufp, string, size, startpos, range, regs)
|
__re_search (bufp, string, size, startpos, range, regs)
|
||||||
# define re_compile_pattern(pattern, length, bufp) \
|
# define re_compile_pattern(pattern, length, bufp) \
|
||||||
__re_compile_pattern (pattern, length, bufp)
|
__re_compile_pattern (pattern, length, bufp)
|
||||||
# define re_set_syntax(syntax) __re_set_syntax (syntax)
|
# define re_set_syntax(syntax) __re_set_syntax (syntax)
|
||||||
# define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \
|
# define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \
|
||||||
__re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop)
|
__re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop)
|
||||||
# define re_compile_fastmap(bufp) __re_compile_fastmap (bufp)
|
# define re_compile_fastmap(bufp) __re_compile_fastmap (bufp)
|
||||||
|
|
||||||
|
# include "../locale/localeinfo.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* POSIX says that <sys/types.h> must be included (by the caller) before
|
/* POSIX says that <sys/types.h> must be included (by the caller) before
|
||||||
|
@ -21,7 +21,7 @@
|
|||||||
static void re_string_construct_common (const char *str, int len,
|
static void re_string_construct_common (const char *str, int len,
|
||||||
re_string_t *pstr,
|
re_string_t *pstr,
|
||||||
RE_TRANSLATE_TYPE trans, int icase,
|
RE_TRANSLATE_TYPE trans, int icase,
|
||||||
int mb_cur_max, int is_utf8);
|
const re_dfa_t *dfa);
|
||||||
#ifdef RE_ENABLE_I18N
|
#ifdef RE_ENABLE_I18N
|
||||||
static int re_string_skip_chars (re_string_t *pstr, int new_raw_idx,
|
static int re_string_skip_chars (re_string_t *pstr, int new_raw_idx,
|
||||||
wint_t *last_wc);
|
wint_t *last_wc);
|
||||||
@ -47,17 +47,16 @@ static unsigned int inline calc_state_hash (const re_node_set *nodes,
|
|||||||
re_string_reconstruct before using the object. */
|
re_string_reconstruct before using the object. */
|
||||||
|
|
||||||
static reg_errcode_t
|
static reg_errcode_t
|
||||||
re_string_allocate (pstr, str, len, init_len, trans, icase,
|
re_string_allocate (pstr, str, len, init_len, trans, icase, dfa)
|
||||||
mb_cur_max, is_utf8)
|
|
||||||
re_string_t *pstr;
|
re_string_t *pstr;
|
||||||
const char *str;
|
const char *str;
|
||||||
int len, init_len, icase, mb_cur_max, is_utf8;
|
int len, init_len, icase;
|
||||||
RE_TRANSLATE_TYPE trans;
|
RE_TRANSLATE_TYPE trans;
|
||||||
|
const re_dfa_t *dfa;
|
||||||
{
|
{
|
||||||
reg_errcode_t ret;
|
reg_errcode_t ret;
|
||||||
int init_buf_len = (len + 1 < init_len) ? len + 1: init_len;
|
int init_buf_len = (len + 1 < init_len) ? len + 1: init_len;
|
||||||
re_string_construct_common (str, len, pstr, trans, icase,
|
re_string_construct_common (str, len, pstr, trans, icase, dfa);
|
||||||
mb_cur_max, is_utf8);
|
|
||||||
pstr->stop = pstr->len;
|
pstr->stop = pstr->len;
|
||||||
|
|
||||||
ret = re_string_realloc_buffers (pstr, init_buf_len);
|
ret = re_string_realloc_buffers (pstr, init_buf_len);
|
||||||
@ -68,22 +67,22 @@ re_string_allocate (pstr, str, len, init_len, trans, icase,
|
|||||||
: (unsigned char *) str);
|
: (unsigned char *) str);
|
||||||
pstr->mbs = MBS_ALLOCATED (pstr) ? pstr->mbs : pstr->mbs_case;
|
pstr->mbs = MBS_ALLOCATED (pstr) ? pstr->mbs : pstr->mbs_case;
|
||||||
pstr->valid_len = (MBS_CASE_ALLOCATED (pstr) || MBS_ALLOCATED (pstr)
|
pstr->valid_len = (MBS_CASE_ALLOCATED (pstr) || MBS_ALLOCATED (pstr)
|
||||||
|| mb_cur_max > 1) ? pstr->valid_len : len;
|
|| dfa->mb_cur_max > 1) ? pstr->valid_len : len;
|
||||||
return REG_NOERROR;
|
return REG_NOERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* This function allocate the buffers, and initialize them. */
|
/* This function allocate the buffers, and initialize them. */
|
||||||
|
|
||||||
static reg_errcode_t
|
static reg_errcode_t
|
||||||
re_string_construct (pstr, str, len, trans, icase, mb_cur_max, is_utf8)
|
re_string_construct (pstr, str, len, trans, icase, dfa)
|
||||||
re_string_t *pstr;
|
re_string_t *pstr;
|
||||||
const char *str;
|
const char *str;
|
||||||
int len, icase, mb_cur_max, is_utf8;
|
int len, icase;
|
||||||
RE_TRANSLATE_TYPE trans;
|
RE_TRANSLATE_TYPE trans;
|
||||||
|
const re_dfa_t *dfa;
|
||||||
{
|
{
|
||||||
reg_errcode_t ret;
|
reg_errcode_t ret;
|
||||||
re_string_construct_common (str, len, pstr, trans, icase,
|
re_string_construct_common (str, len, pstr, trans, icase, dfa);
|
||||||
mb_cur_max, is_utf8);
|
|
||||||
pstr->stop = pstr->len;
|
pstr->stop = pstr->len;
|
||||||
/* Set 0 so that this function can initialize whole buffers. */
|
/* Set 0 so that this function can initialize whole buffers. */
|
||||||
pstr->valid_len = 0;
|
pstr->valid_len = 0;
|
||||||
@ -101,7 +100,7 @@ re_string_construct (pstr, str, len, trans, icase, mb_cur_max, is_utf8)
|
|||||||
if (icase)
|
if (icase)
|
||||||
{
|
{
|
||||||
#ifdef RE_ENABLE_I18N
|
#ifdef RE_ENABLE_I18N
|
||||||
if (mb_cur_max > 1)
|
if (dfa->mb_cur_max > 1)
|
||||||
build_wcs_upper_buffer (pstr);
|
build_wcs_upper_buffer (pstr);
|
||||||
else
|
else
|
||||||
#endif /* RE_ENABLE_I18N */
|
#endif /* RE_ENABLE_I18N */
|
||||||
@ -110,7 +109,7 @@ re_string_construct (pstr, str, len, trans, icase, mb_cur_max, is_utf8)
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
#ifdef RE_ENABLE_I18N
|
#ifdef RE_ENABLE_I18N
|
||||||
if (mb_cur_max > 1)
|
if (dfa->mb_cur_max > 1)
|
||||||
build_wcs_buffer (pstr);
|
build_wcs_buffer (pstr);
|
||||||
else
|
else
|
||||||
#endif /* RE_ENABLE_I18N */
|
#endif /* RE_ENABLE_I18N */
|
||||||
@ -167,20 +166,22 @@ re_string_realloc_buffers (pstr, new_buf_len)
|
|||||||
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
re_string_construct_common (str, len, pstr, trans, icase, mb_cur_max, is_utf8)
|
re_string_construct_common (str, len, pstr, trans, icase, dfa)
|
||||||
const char *str;
|
const char *str;
|
||||||
int len;
|
int len;
|
||||||
re_string_t *pstr;
|
re_string_t *pstr;
|
||||||
RE_TRANSLATE_TYPE trans;
|
RE_TRANSLATE_TYPE trans;
|
||||||
int icase, mb_cur_max, is_utf8;
|
int icase;
|
||||||
|
const re_dfa_t *dfa;
|
||||||
{
|
{
|
||||||
memset (pstr, '\0', sizeof (re_string_t));
|
memset (pstr, '\0', sizeof (re_string_t));
|
||||||
pstr->raw_mbs = (const unsigned char *) str;
|
pstr->raw_mbs = (const unsigned char *) str;
|
||||||
pstr->len = len;
|
pstr->len = len;
|
||||||
pstr->trans = trans;
|
pstr->trans = trans;
|
||||||
pstr->icase = icase ? 1 : 0;
|
pstr->icase = icase ? 1 : 0;
|
||||||
pstr->mb_cur_max = mb_cur_max;
|
pstr->mb_cur_max = dfa->mb_cur_max;
|
||||||
pstr->is_utf8 = is_utf8;
|
pstr->is_utf8 = dfa->is_utf8;
|
||||||
|
pstr->map_notascii = dfa->map_notascii;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef RE_ENABLE_I18N
|
#ifdef RE_ENABLE_I18N
|
||||||
@ -253,47 +254,110 @@ build_wcs_upper_buffer (pstr)
|
|||||||
/* Build the buffers from pstr->valid_len to either pstr->len or
|
/* Build the buffers from pstr->valid_len to either pstr->len or
|
||||||
pstr->bufs_len. */
|
pstr->bufs_len. */
|
||||||
end_idx = (pstr->bufs_len > pstr->len)? pstr->len : pstr->bufs_len;
|
end_idx = (pstr->bufs_len > pstr->len)? pstr->len : pstr->bufs_len;
|
||||||
for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
|
|
||||||
{
|
#ifdef _LIBC
|
||||||
wchar_t wc;
|
/* The following optimization assumes that the wchar_t encoding is
|
||||||
remain_len = end_idx - byte_idx;
|
always ISO 10646. */
|
||||||
prev_st = pstr->cur_state;
|
if (! pstr->map_notascii && pstr->trans == NULL)
|
||||||
mbclen = mbrtowc (&wc, ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
|
for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
|
||||||
+ byte_idx), remain_len, &pstr->cur_state);
|
if (isascii (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx])
|
||||||
if (BE (mbclen == (size_t) -2, 0))
|
&& mbsinit (&pstr->cur_state))
|
||||||
{
|
|
||||||
/* The buffer doesn't have enough space, finish to build. */
|
|
||||||
pstr->cur_state = prev_st;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
else if (mbclen == 1 || mbclen == (size_t) -1 || mbclen == 0)
|
|
||||||
{
|
{
|
||||||
/* In case of a singlebyte character. */
|
/* In case of a singlebyte character. */
|
||||||
int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
|
pstr->mbs[byte_idx]
|
||||||
/* Apply the translation if we need. */
|
= toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]);
|
||||||
if (pstr->trans != NULL && mbclen == 1)
|
/* The next step uses the assumption that wchar_t is encoded
|
||||||
{
|
with ISO 10646: all ASCII values can be converted like this. */
|
||||||
ch = pstr->trans[ch];
|
pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx];
|
||||||
pstr->mbs_case[byte_idx] = ch;
|
++byte_idx;
|
||||||
}
|
|
||||||
pstr->wcs[byte_idx] = iswlower (wc) ? towupper (wc) : wc;
|
|
||||||
pstr->mbs[byte_idx++] = islower (ch) ? toupper (ch) : ch;
|
|
||||||
if (BE (mbclen == (size_t) -1, 0))
|
|
||||||
pstr->cur_state = prev_st;
|
|
||||||
}
|
}
|
||||||
else /* mbclen > 1 */
|
else
|
||||||
{
|
{
|
||||||
if (iswlower (wc))
|
wchar_t wc;
|
||||||
wcrtomb ((char *) pstr->mbs + byte_idx, towupper (wc), &prev_st);
|
remain_len = end_idx - byte_idx;
|
||||||
|
prev_st = pstr->cur_state;
|
||||||
|
mbclen = mbrtowc (&wc,
|
||||||
|
((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
|
||||||
|
+ byte_idx), remain_len, &pstr->cur_state);
|
||||||
|
if (BE (mbclen > 1, 1))
|
||||||
|
{
|
||||||
|
if (iswlower (wc))
|
||||||
|
wcrtomb ((char *) pstr->mbs + byte_idx, towupper (wc),
|
||||||
|
&prev_st);
|
||||||
|
else
|
||||||
|
memcpy (pstr->mbs + byte_idx,
|
||||||
|
pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
|
||||||
|
pstr->wcs[byte_idx++] = towupper (wc);
|
||||||
|
/* Write paddings. */
|
||||||
|
for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
|
||||||
|
pstr->wcs[byte_idx++] = WEOF;
|
||||||
|
}
|
||||||
|
else if (mbclen == (size_t) -1 || mbclen == 0)
|
||||||
|
{
|
||||||
|
/* In case of a singlebyte character. */
|
||||||
|
int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
|
||||||
|
/* Apply the translation if we need. */
|
||||||
|
if (BE (pstr->trans != NULL, 0) && mbclen == 1)
|
||||||
|
{
|
||||||
|
ch = pstr->trans[ch];
|
||||||
|
pstr->mbs_case[byte_idx] = ch;
|
||||||
|
}
|
||||||
|
pstr->wcs[byte_idx] = towupper (wc);
|
||||||
|
pstr->mbs[byte_idx++] = toupper (ch);
|
||||||
|
if (BE (mbclen == (size_t) -1, 0))
|
||||||
|
pstr->cur_state = prev_st;
|
||||||
|
}
|
||||||
else
|
else
|
||||||
memcpy (pstr->mbs + byte_idx,
|
{
|
||||||
pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
|
/* The buffer doesn't have enough space, finish to build. */
|
||||||
pstr->wcs[byte_idx++] = iswlower (wc) ? towupper (wc) : wc;
|
pstr->cur_state = prev_st;
|
||||||
/* Write paddings. */
|
break;
|
||||||
for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
|
}
|
||||||
pstr->wcs[byte_idx++] = WEOF;
|
|
||||||
}
|
}
|
||||||
}
|
else
|
||||||
|
#endif
|
||||||
|
for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
|
||||||
|
{
|
||||||
|
wchar_t wc;
|
||||||
|
remain_len = end_idx - byte_idx;
|
||||||
|
prev_st = pstr->cur_state;
|
||||||
|
mbclen = mbrtowc (&wc,
|
||||||
|
((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
|
||||||
|
+ byte_idx), remain_len, &pstr->cur_state);
|
||||||
|
if (mbclen == 1 || mbclen == (size_t) -1 || mbclen == 0)
|
||||||
|
{
|
||||||
|
/* In case of a singlebyte character. */
|
||||||
|
int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
|
||||||
|
/* Apply the translation if we need. */
|
||||||
|
if (BE (pstr->trans != NULL, 0) && mbclen == 1)
|
||||||
|
{
|
||||||
|
ch = pstr->trans[ch];
|
||||||
|
pstr->mbs_case[byte_idx] = ch;
|
||||||
|
}
|
||||||
|
pstr->wcs[byte_idx] = towupper (wc);
|
||||||
|
pstr->mbs[byte_idx++] = toupper (ch);
|
||||||
|
if (BE (mbclen == (size_t) -1, 0))
|
||||||
|
pstr->cur_state = prev_st;
|
||||||
|
}
|
||||||
|
else if (BE (mbclen != (size_t) -2, 1))
|
||||||
|
{
|
||||||
|
if (iswlower (wc))
|
||||||
|
wcrtomb ((char *) pstr->mbs + byte_idx, towupper (wc), &prev_st);
|
||||||
|
else
|
||||||
|
memcpy (pstr->mbs + byte_idx,
|
||||||
|
pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
|
||||||
|
pstr->wcs[byte_idx++] = towupper (wc);
|
||||||
|
/* Write paddings. */
|
||||||
|
for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
|
||||||
|
pstr->wcs[byte_idx++] = WEOF;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* The buffer doesn't have enough space, finish to build. */
|
||||||
|
pstr->cur_state = prev_st;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
pstr->valid_len = byte_idx;
|
pstr->valid_len = byte_idx;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -335,6 +335,7 @@ struct re_string_t
|
|||||||
/* 1 if REG_ICASE. */
|
/* 1 if REG_ICASE. */
|
||||||
unsigned int icase : 1;
|
unsigned int icase : 1;
|
||||||
unsigned int is_utf8 : 1;
|
unsigned int is_utf8 : 1;
|
||||||
|
unsigned int map_notascii : 1;
|
||||||
int mb_cur_max;
|
int mb_cur_max;
|
||||||
};
|
};
|
||||||
typedef struct re_string_t re_string_t;
|
typedef struct re_string_t re_string_t;
|
||||||
@ -345,31 +346,32 @@ typedef struct re_string_t re_string_t;
|
|||||||
#define MBS_CASE_ALLOCATED(pstr) (pstr->trans != NULL)
|
#define MBS_CASE_ALLOCATED(pstr) (pstr->trans != NULL)
|
||||||
|
|
||||||
|
|
||||||
|
struct re_dfa_t;
|
||||||
|
typedef struct re_dfa_t re_dfa_t;
|
||||||
#ifndef RE_NO_INTERNAL_PROTOTYPES
|
#ifndef RE_NO_INTERNAL_PROTOTYPES
|
||||||
static reg_errcode_t re_string_allocate (re_string_t *pstr, const char *str,
|
static reg_errcode_t re_string_allocate (re_string_t *pstr, const char *str,
|
||||||
int len, int init_len,
|
int len, int init_len,
|
||||||
RE_TRANSLATE_TYPE trans, int icase,
|
RE_TRANSLATE_TYPE trans, int icase,
|
||||||
int mb_cur_max, int is_utf8);
|
const re_dfa_t *dfa);
|
||||||
static reg_errcode_t re_string_construct (re_string_t *pstr, const char *str,
|
static reg_errcode_t re_string_construct (re_string_t *pstr, const char *str,
|
||||||
int len, RE_TRANSLATE_TYPE trans,
|
int len, RE_TRANSLATE_TYPE trans,
|
||||||
int icase, int mb_cur_max,
|
int icase, const re_dfa_t *dfa);
|
||||||
int is_utf8);
|
|
||||||
static reg_errcode_t re_string_reconstruct (re_string_t *pstr, int idx,
|
static reg_errcode_t re_string_reconstruct (re_string_t *pstr, int idx,
|
||||||
int eflags, int newline);
|
int eflags, int newline);
|
||||||
static reg_errcode_t re_string_realloc_buffers (re_string_t *pstr,
|
static reg_errcode_t re_string_realloc_buffers (re_string_t *pstr,
|
||||||
int new_buf_len);
|
int new_buf_len);
|
||||||
#ifdef RE_ENABLE_I18N
|
# ifdef RE_ENABLE_I18N
|
||||||
static void build_wcs_buffer (re_string_t *pstr);
|
static void build_wcs_buffer (re_string_t *pstr);
|
||||||
static void build_wcs_upper_buffer (re_string_t *pstr);
|
static void build_wcs_upper_buffer (re_string_t *pstr);
|
||||||
#endif /* RE_ENABLE_I18N */
|
# endif /* RE_ENABLE_I18N */
|
||||||
static void build_upper_buffer (re_string_t *pstr);
|
static void build_upper_buffer (re_string_t *pstr);
|
||||||
static void re_string_translate_buffer (re_string_t *pstr);
|
static void re_string_translate_buffer (re_string_t *pstr);
|
||||||
static void re_string_destruct (re_string_t *pstr);
|
static void re_string_destruct (re_string_t *pstr);
|
||||||
#ifdef RE_ENABLE_I18N
|
# ifdef RE_ENABLE_I18N
|
||||||
static int re_string_elem_size_at (const re_string_t *pstr, int idx);
|
static int re_string_elem_size_at (const re_string_t *pstr, int idx);
|
||||||
static inline int re_string_char_size_at (const re_string_t *pstr, int idx);
|
static inline int re_string_char_size_at (const re_string_t *pstr, int idx);
|
||||||
static inline wint_t re_string_wchar_at (const re_string_t *pstr, int idx);
|
static inline wint_t re_string_wchar_at (const re_string_t *pstr, int idx);
|
||||||
#endif /* RE_ENABLE_I18N */
|
# endif /* RE_ENABLE_I18N */
|
||||||
static unsigned int re_string_context_at (const re_string_t *input, int idx,
|
static unsigned int re_string_context_at (const re_string_t *input, int idx,
|
||||||
int eflags, int newline_anchor);
|
int eflags, int newline_anchor);
|
||||||
#endif
|
#endif
|
||||||
@ -610,9 +612,9 @@ struct re_dfa_t
|
|||||||
collating element. */
|
collating element. */
|
||||||
unsigned int has_mb_node : 1;
|
unsigned int has_mb_node : 1;
|
||||||
unsigned int is_utf8 : 1;
|
unsigned int is_utf8 : 1;
|
||||||
|
unsigned int map_notascii : 1;
|
||||||
int mb_cur_max;
|
int mb_cur_max;
|
||||||
};
|
};
|
||||||
typedef struct re_dfa_t re_dfa_t;
|
|
||||||
|
|
||||||
#ifndef RE_NO_INTERNAL_PROTOTYPES
|
#ifndef RE_NO_INTERNAL_PROTOTYPES
|
||||||
static reg_errcode_t re_node_set_alloc (re_node_set *set, int size);
|
static reg_errcode_t re_node_set_alloc (re_node_set *set, int size);
|
||||||
|
@ -605,8 +605,7 @@ re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
|
|||||||
fl_longest_match = (nmatch != 0 || dfa->nbackref);
|
fl_longest_match = (nmatch != 0 || dfa->nbackref);
|
||||||
|
|
||||||
err = re_string_allocate (&input, string, length, dfa->nodes_len + 1,
|
err = re_string_allocate (&input, string, length, dfa->nodes_len + 1,
|
||||||
preg->translate, preg->syntax & RE_ICASE,
|
preg->translate, preg->syntax & RE_ICASE, dfa);
|
||||||
dfa->mb_cur_max, dfa->is_utf8);
|
|
||||||
if (BE (err != REG_NOERROR, 0))
|
if (BE (err != REG_NOERROR, 0))
|
||||||
goto free_return;
|
goto free_return;
|
||||||
input.stop = stop;
|
input.stop = stop;
|
||||||
@ -1760,7 +1759,7 @@ check_dst_limits_calc_pos (dfa, mctx, limit, eclosures, subexp_idx, from_node,
|
|||||||
{
|
{
|
||||||
struct re_backref_cache_entry *ent = mctx->bkref_ents + bi;
|
struct re_backref_cache_entry *ent = mctx->bkref_ents + bi;
|
||||||
int dst, cpos;
|
int dst, cpos;
|
||||||
|
|
||||||
/* If this backreference goes beyond the point we're
|
/* If this backreference goes beyond the point we're
|
||||||
examining, don't go any further. */
|
examining, don't go any further. */
|
||||||
if (ent->str_idx > str_idx)
|
if (ent->str_idx > str_idx)
|
||||||
@ -1797,12 +1796,12 @@ check_dst_limits_calc_pos (dfa, mctx, limit, eclosures, subexp_idx, from_node,
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case OP_OPEN_SUBEXP:
|
case OP_OPEN_SUBEXP:
|
||||||
if (str_idx == lim->subexp_from && subexp_idx == dfa->nodes[node].opr.idx)
|
if (str_idx == lim->subexp_from && subexp_idx == dfa->nodes[node].opr.idx)
|
||||||
return -1;
|
return -1;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case OP_CLOSE_SUBEXP:
|
case OP_CLOSE_SUBEXP:
|
||||||
if (str_idx == lim->subexp_to && subexp_idx == dfa->nodes[node].opr.idx)
|
if (str_idx == lim->subexp_to && subexp_idx == dfa->nodes[node].opr.idx)
|
||||||
return 0;
|
return 0;
|
||||||
|
Loading…
Reference in New Issue
Block a user