mirror of
https://sourceware.org/git/glibc.git
synced 2024-12-22 19:00:07 +00:00
Update.
* posix/Makefile: Add rules to build and run tst-regex. 2001-06-20 Isamu Hasegawa <isamu@yamato.ibm.com> * posix/regex.c (FREE_WCS_BUFFERS): New macro to free buffers. (re_search_2): invoke convert_mbs_to_wcs and FREE_WCS_BUFFERS. (wcs_re_match_2_internal): Check whether the wcs buffers need seting up or not, and skip seting up routin if not needed. 2001-06-26 Isamu Hasegawa <isamu@yamato.ibm.com> * posix/regex.c (count_mbs_length): Use binary search for optimization. 2001-06-27 Ulrich Drepper <drepper@redhat.com>
This commit is contained in:
parent
7bcad28063
commit
64333c6623
16
ChangeLog
16
ChangeLog
@ -1,3 +1,19 @@
|
||||
2001-06-27 Ulrich Drepper <drepper@redhat.com>
|
||||
|
||||
* posix/Makefile: Add rules to build and run tst-regex.
|
||||
|
||||
2001-06-20 Isamu Hasegawa <isamu@yamato.ibm.com>
|
||||
|
||||
* posix/regex.c (FREE_WCS_BUFFERS): New macro to free buffers.
|
||||
(re_search_2): invoke convert_mbs_to_wcs and FREE_WCS_BUFFERS.
|
||||
(wcs_re_match_2_internal): Check whether the wcs buffers need
|
||||
seting up or not, and skip seting up routin if not needed.
|
||||
|
||||
2001-06-26 Isamu Hasegawa <isamu@yamato.ibm.com>
|
||||
|
||||
* posix/regex.c (count_mbs_length): Use binary search for
|
||||
optimization.
|
||||
|
||||
2001-06-27 Ulrich Drepper <drepper@redhat.com>
|
||||
|
||||
* posix/tst-regex.c: Fix several bugs. Add more tests.
|
||||
|
@ -70,7 +70,7 @@ tests := tstgetopt testfnm runtests runptests \
|
||||
tst-getlogin tst-mmap tst-getaddrinfo tst-truncate \
|
||||
tst-truncate64 tst-fork tst-fnmatch tst-regexloc tst-dir \
|
||||
tst-chmod bug-regex1 bug-regex2 bug-regex3 bug-regex4 \
|
||||
tst-gnuglob
|
||||
tst-gnuglob tst-regex
|
||||
ifeq (yes,$(build-shared))
|
||||
test-srcs := globtest
|
||||
tests += wordexp-test tst-exec tst-spawn
|
||||
@ -122,6 +122,7 @@ tst-chmod-ARGS = `pwd`
|
||||
tst-fnmatch-ENV = LOCPATH=$(common-objpfx)localedata
|
||||
tst-regexloc-ENV = LOCPATH=$(common-objpfx)localedata
|
||||
bug-regex1-ENV = LOCPATH=$(common-objpfx)localedata
|
||||
tst-regex-ENV = LOCPATH=$(common-objpfx)localedata
|
||||
|
||||
testcases.h: TESTS TESTS2C.sed
|
||||
sed -f TESTS2C.sed < $< > $@T
|
||||
@ -158,3 +159,9 @@ $(objpfx)bug-regex2-mem: $(objpfx)bug-regex2.out
|
||||
|
||||
$(objpfx)tst-getconf.out: tst-getconf.sh $(objpfx)getconf
|
||||
$(SHELL) -e $< $(common-objpfx) $(elf-objpfx) $(rtld-installed-name)
|
||||
|
||||
ifeq (yes,$(build-shared))
|
||||
$(objpfx)tst-regex: $(common-objpfx)rt/librt.so
|
||||
else
|
||||
$(objpfx)tst-regex: $(common-objpfx)rt/librt.a
|
||||
endif
|
||||
|
276
posix/regex.c
276
posix/regex.c
@ -416,11 +416,14 @@ static int byte_re_match_2_internal PARAMS ((struct re_pattern_buffer *bufp,
|
||||
struct re_registers *regs,
|
||||
int stop));
|
||||
static int wcs_re_match_2_internal PARAMS ((struct re_pattern_buffer *bufp,
|
||||
const char *string1, int size1,
|
||||
const char *string2, int size2,
|
||||
const char *cstring1, int csize1,
|
||||
const char *cstring2, int csize2,
|
||||
int pos,
|
||||
struct re_registers *regs,
|
||||
int stop));
|
||||
int stop,
|
||||
wchar_t *string1, int size1,
|
||||
wchar_t *string2, int size2,
|
||||
int *mbs_offset1, int *mbs_offset2));
|
||||
static int byte_re_search_2 PARAMS ((struct re_pattern_buffer *bufp,
|
||||
const char *string1, int size1,
|
||||
const char *string2, int size2,
|
||||
@ -1253,7 +1256,7 @@ convert_mbs_to_wcs (dest, src, len, offset_buffer, is_binary)
|
||||
size_t wc_count = 0;
|
||||
|
||||
mbstate_t mbs;
|
||||
int consumed;
|
||||
int i, consumed;
|
||||
size_t mb_remain = len;
|
||||
size_t mb_count = 0;
|
||||
|
||||
@ -1286,6 +1289,10 @@ convert_mbs_to_wcs (dest, src, len, offset_buffer, is_binary)
|
||||
offset_buffer[wc_count + 1] = mb_count += consumed;
|
||||
}
|
||||
|
||||
/* Fill remain of the buffer with sentinel. */
|
||||
for (i = wc_count + 1 ; i <= len ; i++)
|
||||
offset_buffer[i] = mb_count + 1;
|
||||
|
||||
return wc_count;
|
||||
}
|
||||
|
||||
@ -5047,6 +5054,23 @@ weak_alias (__re_search_2, re_search_2)
|
||||
|
||||
#ifdef INSIDE_RECURSION
|
||||
|
||||
#ifdef MATCH_MAY_ALLOCATE
|
||||
# define FREE_VAR(var) if (var) REGEX_FREE (var); var = NULL
|
||||
#else
|
||||
# define FREE_VAR(var) if (var) free (var); var = NULL
|
||||
#endif
|
||||
|
||||
#ifdef WCHAR
|
||||
# define FREE_WCS_BUFFERS() \
|
||||
do { \
|
||||
FREE_VAR (string1); \
|
||||
FREE_VAR (string2); \
|
||||
FREE_VAR (mbs_offset1); \
|
||||
FREE_VAR (mbs_offset2); \
|
||||
} while (0)
|
||||
|
||||
#endif
|
||||
|
||||
static int
|
||||
PREFIX(re_search_2) (bufp, string1, size1, string2, size2, startpos, range,
|
||||
regs, stop)
|
||||
@ -5063,6 +5087,16 @@ PREFIX(re_search_2) (bufp, string1, size1, string2, size2, startpos, range,
|
||||
register RE_TRANSLATE_TYPE translate = bufp->translate;
|
||||
int total_size = size1 + size2;
|
||||
int endpos = startpos + range;
|
||||
#ifdef WCHAR
|
||||
/* We need wchar_t* buffers correspond to cstring1, cstring2. */
|
||||
wchar_t *wcs_string1 = NULL, *wcs_string2 = NULL;
|
||||
/* We need the size of wchar_t buffers correspond to csize1, csize2. */
|
||||
int wcs_size1 = 0, wcs_size2 = 0;
|
||||
/* offset buffer for optimizatoin. See convert_mbs_to_wc. */
|
||||
int *mbs_offset1 = NULL, *mbs_offset2 = NULL;
|
||||
/* They hold whether each wchar_t is binary data or not. */
|
||||
char *is_binary = NULL;
|
||||
#endif /* WCHAR */
|
||||
|
||||
/* Check for out-of-range STARTPOS. */
|
||||
if (startpos < 0 || startpos > total_size)
|
||||
@ -5106,6 +5140,45 @@ PREFIX(re_search_2) (bufp, string1, size1, string2, size2, startpos, range,
|
||||
if (re_compile_fastmap (bufp) == -2)
|
||||
return -2;
|
||||
|
||||
#ifdef WCHAR
|
||||
/* Allocate wchar_t array for wcs_string1 and wcs_string2 and
|
||||
fill them with converted string. */
|
||||
if (size1 != 0)
|
||||
{
|
||||
wcs_string1 = REGEX_TALLOC (size1 + 1, CHAR_T);
|
||||
mbs_offset1 = REGEX_TALLOC (size1 + 1, int);
|
||||
is_binary = REGEX_TALLOC (size1 + 1, char);
|
||||
if (!wcs_string1 || !mbs_offset1 || !is_binary)
|
||||
{
|
||||
FREE_VAR (wcs_string1);
|
||||
FREE_VAR (mbs_offset1);
|
||||
FREE_VAR (is_binary);
|
||||
return -2;
|
||||
}
|
||||
wcs_size1 = convert_mbs_to_wcs(wcs_string1, string1, size1,
|
||||
mbs_offset1, is_binary);
|
||||
wcs_string1[wcs_size1] = L'\0'; /* for a sentinel */
|
||||
FREE_VAR (is_binary);
|
||||
}
|
||||
if (size2 != 0)
|
||||
{
|
||||
wcs_string2 = REGEX_TALLOC (size2 + 1, CHAR_T);
|
||||
mbs_offset2 = REGEX_TALLOC (size2 + 1, int);
|
||||
is_binary = REGEX_TALLOC (size2 + 1, char);
|
||||
if (!wcs_string2 || !mbs_offset2 || !is_binary)
|
||||
{
|
||||
FREE_WCS_BUFFERS ();
|
||||
FREE_VAR (is_binary);
|
||||
return -2;
|
||||
}
|
||||
wcs_size2 = convert_mbs_to_wcs(wcs_string2, string2, size2,
|
||||
mbs_offset2, is_binary);
|
||||
wcs_string2[wcs_size2] = L'\0'; /* for a sentinel */
|
||||
FREE_VAR (is_binary);
|
||||
}
|
||||
#endif /* WCHAR */
|
||||
|
||||
|
||||
/* Loop through the string, looking for a place to start matching. */
|
||||
for (;;)
|
||||
{
|
||||
@ -5153,10 +5226,24 @@ PREFIX(re_search_2) (bufp, string1, size1, string2, size2, startpos, range,
|
||||
/* If can't match the null string, and that's all we have left, fail. */
|
||||
if (range >= 0 && startpos == total_size && fastmap
|
||||
&& !bufp->can_be_null)
|
||||
return -1;
|
||||
{
|
||||
#ifdef WCHAR
|
||||
FREE_WCS_BUFFERS ();
|
||||
#endif
|
||||
return -1;
|
||||
}
|
||||
|
||||
#ifdef WCHAR
|
||||
val = wcs_re_match_2_internal (bufp, string1, size1, string2,
|
||||
size2, startpos, regs, stop,
|
||||
wcs_string1, wcs_size1,
|
||||
wcs_string2, wcs_size2,
|
||||
mbs_offset1, mbs_offset2);
|
||||
#else /* BYTE */
|
||||
val = byte_re_match_2_internal (bufp, string1, size1, string2,
|
||||
size2, startpos, regs, stop);
|
||||
#endif /* BYTE */
|
||||
|
||||
val = PREFIX(re_match_2_internal) (bufp, string1, size1, string2,
|
||||
size2, startpos, regs, stop);
|
||||
#ifndef REGEX_MALLOC
|
||||
# ifdef C_ALLOCA
|
||||
alloca (0);
|
||||
@ -5164,10 +5251,20 @@ PREFIX(re_search_2) (bufp, string1, size1, string2, size2, startpos, range,
|
||||
#endif
|
||||
|
||||
if (val >= 0)
|
||||
return startpos;
|
||||
{
|
||||
#ifdef WCHAR
|
||||
FREE_WCS_BUFFERS ();
|
||||
#endif
|
||||
return startpos;
|
||||
}
|
||||
|
||||
if (val == -2)
|
||||
return -2;
|
||||
{
|
||||
#ifdef WCHAR
|
||||
FREE_WCS_BUFFERS ();
|
||||
#endif
|
||||
return -2;
|
||||
}
|
||||
|
||||
advance:
|
||||
if (!range)
|
||||
@ -5183,6 +5280,9 @@ PREFIX(re_search_2) (bufp, string1, size1, string2, size2, startpos, range,
|
||||
startpos--;
|
||||
}
|
||||
}
|
||||
#ifdef WCHAR
|
||||
FREE_WCS_BUFFERS ();
|
||||
#endif
|
||||
return -1;
|
||||
}
|
||||
|
||||
@ -5255,7 +5355,6 @@ PREFIX(re_search_2) (bufp, string1, size1, string2, size2, startpos, range,
|
||||
|
||||
/* Free everything we malloc. */
|
||||
#ifdef MATCH_MAY_ALLOCATE
|
||||
# define FREE_VAR(var) if (var) REGEX_FREE (var); var = NULL
|
||||
# ifdef WCHAR
|
||||
# define FREE_VARIABLES() \
|
||||
do { \
|
||||
@ -5269,10 +5368,13 @@ PREFIX(re_search_2) (bufp, string1, size1, string2, size2, startpos, range,
|
||||
FREE_VAR (reg_info); \
|
||||
FREE_VAR (reg_dummy); \
|
||||
FREE_VAR (reg_info_dummy); \
|
||||
FREE_VAR (string1); \
|
||||
FREE_VAR (string2); \
|
||||
FREE_VAR (mbs_offset1); \
|
||||
FREE_VAR (mbs_offset2); \
|
||||
if (!cant_free_wcs_buf) \
|
||||
{ \
|
||||
FREE_VAR (string1); \
|
||||
FREE_VAR (string2); \
|
||||
FREE_VAR (mbs_offset1); \
|
||||
FREE_VAR (mbs_offset2); \
|
||||
} \
|
||||
} while (0)
|
||||
# else /* BYTE */
|
||||
# define FREE_VARIABLES() \
|
||||
@ -5290,14 +5392,16 @@ PREFIX(re_search_2) (bufp, string1, size1, string2, size2, startpos, range,
|
||||
} while (0)
|
||||
# endif /* WCHAR */
|
||||
#else
|
||||
# define FREE_VAR(var) if (var) free (var); var = NULL
|
||||
# ifdef WCHAR
|
||||
# define FREE_VARIABLES() \
|
||||
do { \
|
||||
FREE_VAR (string1); \
|
||||
FREE_VAR (string2); \
|
||||
FREE_VAR (mbs_offset1); \
|
||||
FREE_VAR (mbs_offset2); \
|
||||
if (!cant_free_wcs_buf) \
|
||||
{ \
|
||||
FREE_VAR (string1); \
|
||||
FREE_VAR (string2); \
|
||||
FREE_VAR (mbs_offset1); \
|
||||
FREE_VAR (mbs_offset2); \
|
||||
} \
|
||||
} while (0)
|
||||
# else /* BYTE */
|
||||
# define FREE_VARIABLES() ((void)0) /* Do nothing! But inhibit gcc warning. */
|
||||
@ -5331,7 +5435,8 @@ re_match (bufp, string, size, pos, regs)
|
||||
# ifdef MBS_SUPPORT
|
||||
if (MB_CUR_MAX != 1)
|
||||
result = wcs_re_match_2_internal (bufp, NULL, 0, string, size,
|
||||
pos, regs, size);
|
||||
pos, regs, size,
|
||||
NULL, 0, NULL, 0, NULL, NULL);
|
||||
else
|
||||
# endif
|
||||
result = byte_re_match_2_internal (bufp, NULL, 0, string, size,
|
||||
@ -5390,7 +5495,8 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
|
||||
# ifdef MBS_SUPPORT
|
||||
if (MB_CUR_MAX != 1)
|
||||
result = wcs_re_match_2_internal (bufp, string1, size1, string2, size2,
|
||||
pos, regs, stop);
|
||||
pos, regs, stop,
|
||||
NULL, 0, NULL, 0, NULL, NULL);
|
||||
else
|
||||
# endif
|
||||
result = byte_re_match_2_internal (bufp, string1, size1, string2, size2,
|
||||
@ -5424,7 +5530,7 @@ count_mbs_length(offset_buffer, length)
|
||||
int *offset_buffer;
|
||||
int length;
|
||||
{
|
||||
int wcs_size;
|
||||
int upper, lower;
|
||||
|
||||
/* Check whether the size is valid. */
|
||||
if (length < 0)
|
||||
@ -5433,45 +5539,73 @@ count_mbs_length(offset_buffer, length)
|
||||
if (offset_buffer == NULL)
|
||||
return 0;
|
||||
|
||||
for (wcs_size = 0 ; offset_buffer[wcs_size] != -1 ; wcs_size++)
|
||||
/* If there are no multibyte character, offset_buffer[i] == i.
|
||||
Optmize for this case. */
|
||||
if (offset_buffer[length] == length)
|
||||
return length;
|
||||
|
||||
/* Set up upper with length. (because for all i, offset_buffer[i] >= i) */
|
||||
upper = length;
|
||||
lower = 0;
|
||||
|
||||
while (true)
|
||||
{
|
||||
if (offset_buffer[wcs_size] == length)
|
||||
return wcs_size;
|
||||
if (offset_buffer[wcs_size] > length)
|
||||
/* It is a fragment of a wide character. */
|
||||
return -1;
|
||||
int middle = (lower + upper) / 2;
|
||||
if (middle == lower || middle == upper)
|
||||
break;
|
||||
if (offset_buffer[middle] > length)
|
||||
upper = middle;
|
||||
else if (offset_buffer[middle] < length)
|
||||
lower = middle;
|
||||
else
|
||||
return middle;
|
||||
}
|
||||
|
||||
/* We reached at the sentinel. */
|
||||
return -1;
|
||||
}
|
||||
#endif /* WCHAR */
|
||||
|
||||
/* This is a separate function so that we can force an alloca cleanup
|
||||
afterwards. */
|
||||
#ifdef WCHAR
|
||||
static int
|
||||
PREFIX(re_match_2_internal) (bufp, ARG_PREFIX(string1), ARG_PREFIX(size1),
|
||||
ARG_PREFIX(string2), ARG_PREFIX(size2), pos,
|
||||
regs, stop)
|
||||
wcs_re_match_2_internal (bufp, cstring1, csize1, cstring2, csize2, pos,
|
||||
regs, stop, string1, size1, string2, size2,
|
||||
mbs_offset1, mbs_offset2)
|
||||
struct re_pattern_buffer *bufp;
|
||||
const char *ARG_PREFIX(string1), *ARG_PREFIX(string2);
|
||||
int ARG_PREFIX(size1), ARG_PREFIX(size2);
|
||||
const char *cstring1, *cstring2;
|
||||
int csize1, csize2;
|
||||
int pos;
|
||||
struct re_registers *regs;
|
||||
int stop;
|
||||
/* string1 == string2 == NULL means string1/2, size1/2 and
|
||||
mbs_offset1/2 need seting up in this function. */
|
||||
/* We need wchar_t* buffers correspond to cstring1, cstring2. */
|
||||
wchar_t *string1, *string2;
|
||||
/* We need the size of wchar_t buffers correspond to csize1, csize2. */
|
||||
int size1, size2;
|
||||
/* offset buffer for optimizatoin. See convert_mbs_to_wc. */
|
||||
int *mbs_offset1, *mbs_offset2;
|
||||
#else /* BYTE */
|
||||
static int
|
||||
byte_re_match_2_internal (bufp, string1, size1,string2, size2, pos,
|
||||
regs, stop)
|
||||
struct re_pattern_buffer *bufp;
|
||||
const char *string1, *string2;
|
||||
int size1, size2;
|
||||
int pos;
|
||||
struct re_registers *regs;
|
||||
int stop;
|
||||
#endif /* BYTE */
|
||||
{
|
||||
/* General temporaries. */
|
||||
int mcnt;
|
||||
UCHAR_T *p1;
|
||||
#ifdef WCHAR
|
||||
/* We need wchar_t* buffers correspond to string1, string2. */
|
||||
CHAR_T *string1 = NULL, *string2 = NULL;
|
||||
/* We need the size of wchar_t buffers correspond to csize1, csize2. */
|
||||
int size1 = 0, size2 = 0;
|
||||
/* offset buffer for optimization. See convert_mbs_to_wc. */
|
||||
int *mbs_offset1 = NULL, *mbs_offset2 = NULL;
|
||||
/* They hold whether each wchar_t is binary data or not. */
|
||||
char *is_binary = NULL;
|
||||
/* If true, we can't free string1/2, mbs_offset1/2. */
|
||||
int cant_free_wcs_buf = 1;
|
||||
#endif /* WCHAR */
|
||||
|
||||
/* Just past the end of the corresponding string. */
|
||||
@ -5648,41 +5782,45 @@ PREFIX(re_match_2_internal) (bufp, ARG_PREFIX(string1), ARG_PREFIX(size1),
|
||||
#ifdef WCHAR
|
||||
/* Allocate wchar_t array for string1 and string2 and
|
||||
fill them with converted string. */
|
||||
if (csize1 != 0)
|
||||
if (string1 == NULL && string2 == NULL)
|
||||
{
|
||||
string1 = REGEX_TALLOC (csize1 + 1, CHAR_T);
|
||||
mbs_offset1 = REGEX_TALLOC (csize1 + 1, int);
|
||||
is_binary = REGEX_TALLOC (csize1 + 1, char);
|
||||
if (!string1 || !mbs_offset1 || !is_binary)
|
||||
/* We need seting up buffers here. */
|
||||
|
||||
/* We must free wcs buffers in this function. */
|
||||
cant_free_wcs_buf = 0;
|
||||
|
||||
if (csize1 != 0)
|
||||
{
|
||||
FREE_VAR (string1);
|
||||
FREE_VAR (mbs_offset1);
|
||||
FREE_VAR (is_binary);
|
||||
return -2;
|
||||
string1 = REGEX_TALLOC (csize1 + 1, CHAR_T);
|
||||
mbs_offset1 = REGEX_TALLOC (csize1 + 1, int);
|
||||
is_binary = REGEX_TALLOC (csize1 + 1, char);
|
||||
if (!string1 || !mbs_offset1 || !is_binary)
|
||||
{
|
||||
FREE_VAR (string1);
|
||||
FREE_VAR (mbs_offset1);
|
||||
FREE_VAR (is_binary);
|
||||
return -2;
|
||||
}
|
||||
}
|
||||
size1 = convert_mbs_to_wcs(string1, cstring1, csize1,
|
||||
mbs_offset1, is_binary);
|
||||
string1[size1] = L'\0'; /* for a sentinel */
|
||||
FREE_VAR (is_binary);
|
||||
}
|
||||
if (csize2 != 0)
|
||||
{
|
||||
string2 = REGEX_TALLOC (csize2 + 1, CHAR_T);
|
||||
mbs_offset2 = REGEX_TALLOC (csize2 + 1, int);
|
||||
is_binary = REGEX_TALLOC (csize2 + 1, char);
|
||||
if (!string2 || !mbs_offset2 || !is_binary)
|
||||
if (csize2 != 0)
|
||||
{
|
||||
FREE_VAR (string1);
|
||||
FREE_VAR (mbs_offset1);
|
||||
FREE_VAR (string2);
|
||||
FREE_VAR (mbs_offset2);
|
||||
string2 = REGEX_TALLOC (csize2 + 1, CHAR_T);
|
||||
mbs_offset2 = REGEX_TALLOC (csize2 + 1, int);
|
||||
is_binary = REGEX_TALLOC (csize2 + 1, char);
|
||||
if (!string2 || !mbs_offset2 || !is_binary)
|
||||
{
|
||||
FREE_VAR (string1);
|
||||
FREE_VAR (mbs_offset1);
|
||||
FREE_VAR (string2);
|
||||
FREE_VAR (mbs_offset2);
|
||||
FREE_VAR (is_binary);
|
||||
return -2;
|
||||
}
|
||||
size2 = convert_mbs_to_wcs(string2, cstring2, csize2,
|
||||
mbs_offset2, is_binary);
|
||||
string2[size2] = L'\0'; /* for a sentinel */
|
||||
FREE_VAR (is_binary);
|
||||
return -2;
|
||||
}
|
||||
size2 = convert_mbs_to_wcs(string2, cstring2, csize2,
|
||||
mbs_offset2, is_binary);
|
||||
string2[size2] = L'\0'; /* for a sentinel */
|
||||
FREE_VAR (is_binary);
|
||||
}
|
||||
|
||||
/* We need to cast pattern to (wchar_t*), because we casted this compiled
|
||||
|
Loading…
Reference in New Issue
Block a user