2002-04-22  Isamu Hasegawa  <isamu@yamato.ibm.com>

	* posix/regcomp.c (re_compile_internal): Adapt it to new interface
	of buffer building functions.
	* posix/regex_internal.c (re_string_allocate): New function.
	(re_string_realloc_buffers): New function.
	(re_string_skip_chars): New function.
	(re_string_reconstruct): New function.
	(re_string_construct): Adapt it to new interface of buffer building
	functions.
	(re_string_construct_common): Likewise.
	(build_wcs_buffer): Likewise.
	(build_wcs_upper_buffer): Likewise.
	(build_upper_buffer): Likewise.
	(re_string_translate_buffer): Likewise.
	(re_string_context_at): Adapt it to variable length buffers.
	* posix/regex_internal.h (re_string_t): Add new fields to handle
	variable length buffers.
	(re_match_context_t): Likewise.
	* posix/regexec.c (re_search_internal): Adapt it to new interface
	of re_string_t and re_match_context_t.
	(acquire_init_state_context): Likewise.
	(check_matching): Likewise.
	(check_halt_state_context): Likewise.
	(proceed_next_node): Likewise.
	(set_regs): Likewise.
	(sift_states_backward): Likewise.
	(clean_state_log_if_need): Likewise.
	(sift_states_iter_mb): Likewise.
	(sift_states_iter_bkref): Likewise.
	(add_epsilon_backreference): Likewise.
	(transit_state): Likewise.
	(transit_state_sb): Likewise.
	(transit_state_mb): Likewise.
	(transit_state_bkref): Likewise.
	(transit_state_bkref_loop): Likewise.
	(check_node_accept): Likewise.
	(match_ctx_init): Likewise.
	(extend_buffers): New function.

2002-04-21  Bruno Haible  <bruno@clisp.org>

	* iconvdata/tst-table.sh: For the second check, use the truncated
	GB18030 charmap table, like for the first check.
This commit is contained in:
Ulrich Drepper 2002-04-24 21:54:53 +00:00
parent be479a6dfe
commit 612546c60d
6 changed files with 20631 additions and 14603 deletions

View File

@ -1,3 +1,48 @@
2002-04-22 Isamu Hasegawa <isamu@yamato.ibm.com>
* posix/regcomp.c (re_compile_internal): Adapt it to new interface
of buffer building functions.
* posix/regex_internal.c (re_string_allocate): New function.
(re_string_realloc_buffers): New function.
(re_string_skip_chars): New function.
(re_string_reconstruct): New function.
(re_string_construct): Adapt it to new interface of buffer building
functions.
(re_string_construct_common): Likewise.
(build_wcs_buffer): Likewise.
(build_wcs_upper_buffer): Likewise.
(build_upper_buffer): Likewise.
(re_string_translate_buffer): Likewise.
(re_string_context_at): Adapt it to variable length buffers.
* posix/regex_internal.h (re_string_t): Add new fields to handle
variable length buffers.
(re_match_context_t): Likewise.
* posix/regexec.c (re_search_internal): Adapt it to new interface
of re_string_t and re_match_context_t.
(acquire_init_state_context): Likewise.
(check_matching): Likewise.
(check_halt_state_context): Likewise.
(proceed_next_node): Likewise.
(set_regs): Likewise.
(sift_states_backward): Likewise.
(clean_state_log_if_need): Likewise.
(sift_states_iter_mb): Likewise.
(sift_states_iter_bkref): Likewise.
(add_epsilon_backreference): Likewise.
(transit_state): Likewise.
(transit_state_sb): Likewise.
(transit_state_mb): Likewise.
(transit_state_bkref): Likewise.
(transit_state_bkref_loop): Likewise.
(check_node_accept): Likewise.
(match_ctx_init): Likewise.
(extend_buffers): New function.
2002-04-21 Bruno Haible <bruno@clisp.org>
* iconvdata/tst-table.sh: For the second check, use the truncated
GB18030 charmap table, like for the first check.
2002-04-24 Ulrich Drepper <drepper@redhat.com> 2002-04-24 Ulrich Drepper <drepper@redhat.com>
* elf/dl-load.c (open_verify): Correct __lseek parameters. * elf/dl-load.c (open_verify): Correct __lseek parameters.

File diff suppressed because it is too large Load Diff

View File

@ -692,12 +692,8 @@ re_compile_internal (preg, pattern, length, syntax)
return err; return err;
} }
if (syntax & RE_ICASE) err = re_string_construct (&regexp, pattern, length, preg->translate,
err = re_string_construct_toupper (&regexp, pattern, length, syntax & RE_ICASE);
preg->translate);
else
err = re_string_construct (&regexp, pattern, length, preg->translate);
if (BE (err != REG_NOERROR, 0)) if (BE (err != REG_NOERROR, 0))
{ {
re_free (dfa); re_free (dfa);

View File

@ -58,14 +58,9 @@
#include "regex_internal.h" #include "regex_internal.h"
static void re_string_construct_common (const unsigned char *str, static void re_string_construct_common (const unsigned char *str,
int len, re_string_t *pstr); int len, re_string_t *pstr,
#ifdef RE_ENABLE_I18N RE_TRANSLATE_TYPE trans, int icase);
static reg_errcode_t build_wcs_buffer (re_string_t *pstr); static int re_string_skip_chars (re_string_t *pstr, int new_raw_idx);
static reg_errcode_t build_wcs_upper_buffer (re_string_t *pstr);
#endif /* RE_ENABLE_I18N */
static reg_errcode_t build_upper_buffer (re_string_t *pstr);
static reg_errcode_t re_string_translate_buffer (re_string_t *pstr,
RE_TRANSLATE_TYPE trans);
static re_dfastate_t *create_newstate_common (re_dfa_t *dfa, static re_dfastate_t *create_newstate_common (re_dfa_t *dfa,
const re_node_set *nodes, const re_node_set *nodes,
unsigned int hash); unsigned int hash);
@ -83,278 +78,416 @@ static unsigned int inline calc_state_hash (const re_node_set *nodes,
/* Functions for string operation. */ /* Functions for string operation. */
/* Construct string object. */ /* This function allocate the buffers. It is necessary to call
re_string_reconstruct before using the object. */
static reg_errcode_t static reg_errcode_t
re_string_construct (pstr, str, len, trans) re_string_allocate (pstr, str, len, init_len, trans, icase)
re_string_t *pstr; re_string_t *pstr;
const unsigned char *str; const unsigned char *str;
int len; int len, init_len, icase;
RE_TRANSLATE_TYPE trans; RE_TRANSLATE_TYPE trans;
{ {
reg_errcode_t ret; reg_errcode_t ret;
re_string_construct_common (str, len, pstr); int init_buf_len = (len + 1 < init_len) ? len + 1: init_len;
#ifdef RE_ENABLE_I18N re_string_construct_common (str, len, pstr, trans, icase);
if (MB_CUR_MAX >1 && pstr->len > 0)
{ ret = re_string_realloc_buffers (pstr, init_buf_len);
ret = build_wcs_buffer (pstr); if (BE (ret != REG_NOERROR, 0))
if (BE (ret != REG_NOERROR, 0)) return ret;
return ret;
} pstr->mbs_case = (MBS_CASE_ALLOCATED (pstr) ? pstr->mbs_case
#endif /* RE_ENABLE_I18N */ : (unsigned char *)str);
pstr->mbs_case = str; pstr->mbs = MBS_ALLOCATED (pstr) ? pstr->mbs : pstr->mbs_case;
if (trans != NULL) pstr->valid_len = (MBS_CASE_ALLOCATED (pstr) || MBS_ALLOCATED (pstr)
{ || MB_CUR_MAX > 1) ? pstr->valid_len : len;
ret = re_string_translate_buffer (pstr, trans);
if (BE (ret != REG_NOERROR, 0))
return ret;
}
return REG_NOERROR; return REG_NOERROR;
} }
/* Construct string object. We use this function instead of /* This function allocate the buffers, and initialize them. */
re_string_construct for case insensitive mode. */
static reg_errcode_t static reg_errcode_t
re_string_construct_toupper (pstr, str, len, trans) re_string_construct (pstr, str, len, trans, icase)
re_string_t *pstr; re_string_t *pstr;
const unsigned char *str; const unsigned char *str;
int len; int len, icase;
RE_TRANSLATE_TYPE trans; RE_TRANSLATE_TYPE trans;
{ {
reg_errcode_t ret; reg_errcode_t ret;
/* Set case sensitive buffer. */ re_string_construct_common (str, len, pstr, trans, icase);
re_string_construct_common (str, len, pstr); /* Set 0 so that this function can initialize whole buffers. */
#ifdef RE_ENABLE_I18N pstr->valid_len = 0;
if (MB_CUR_MAX >1)
if (len > 0)
{ {
if (BE (pstr->len > 0, 1)) ret = re_string_realloc_buffers (pstr, len + 1);
{ if (BE (ret != REG_NOERROR, 0))
ret = build_wcs_upper_buffer (pstr); return ret;
if (BE (ret != REG_NOERROR, 0)) }
return ret; pstr->mbs_case = (MBS_CASE_ALLOCATED (pstr) ? pstr->mbs_case
} : (unsigned char *)str);
pstr->mbs = MBS_ALLOCATED (pstr) ? pstr->mbs : pstr->mbs_case;
if (icase)
{
#ifdef RE_ENABLE_I18N
if (MB_CUR_MAX > 1)
build_wcs_upper_buffer (pstr);
else
build_upper_buffer (pstr);
#endif /* RE_ENABLE_I18N */
} }
else else
#endif /* RE_ENABLE_I18N */
{ {
if (BE (pstr->len > 0, 1)) #ifdef RE_ENABLE_I18N
if (MB_CUR_MAX > 1)
build_wcs_buffer (pstr);
else
#endif /* RE_ENABLE_I18N */
{ {
ret = build_upper_buffer (pstr); if (trans != NULL)
if (BE (ret != REG_NOERROR, 0)) re_string_translate_buffer (pstr);
return ret; else
pstr->valid_len = len;
} }
} }
pstr->mbs_case = str;
if (trans != NULL) /* Initialized whole buffers, then valid_len == bufs_len. */
{ pstr->valid_len = pstr->bufs_len;
ret = re_string_translate_buffer (pstr, trans);
if (BE (ret != REG_NOERROR, 0))
return ret;
}
return REG_NOERROR; return REG_NOERROR;
} }
/* Helper functions for re_string_construct_*. */ /* Helper functions for re_string_allocate, and re_string_construct. */
static reg_errcode_t
re_string_realloc_buffers (pstr, new_buf_len)
re_string_t *pstr;
int new_buf_len;
{
#ifdef RE_ENABLE_I18N
if (MB_CUR_MAX > 1)
{
pstr->wcs = re_realloc (pstr->wcs, wchar_t, new_buf_len);
if (BE (pstr->wcs == NULL, 0))
return REG_ESPACE;
}
#endif /* RE_ENABLE_I18N */
if (MBS_ALLOCATED (pstr))
{
pstr->mbs = re_realloc (pstr->mbs, unsigned char, new_buf_len);
if (BE (pstr->mbs == NULL, 0))
return REG_ESPACE;
}
if (MBS_CASE_ALLOCATED (pstr))
{
pstr->mbs_case = re_realloc (pstr->mbs_case, unsigned char, new_buf_len);
if (BE (pstr->mbs_case == NULL, 0))
return REG_ESPACE;
if (!MBS_ALLOCATED (pstr))
pstr->mbs = pstr->mbs_case;
}
pstr->bufs_len = new_buf_len;
return REG_NOERROR;
}
static void static void
re_string_construct_common (str, len, pstr) re_string_construct_common (str, len, pstr, trans, icase)
const unsigned char *str; const unsigned char *str;
int len; int len;
re_string_t *pstr; re_string_t *pstr;
RE_TRANSLATE_TYPE trans;
int icase;
{ {
pstr->mbs = str; memset (pstr, '\0', sizeof (re_string_t));
pstr->cur_idx = 0; pstr->raw_mbs = str;
pstr->len = len; pstr->len = len;
#ifdef RE_ENABLE_I18N pstr->trans = trans;
pstr->wcs = NULL; pstr->icase = icase ? 1 : 0;
#endif
pstr->mbs_case = NULL;
pstr->mbs_alloc = 0;
pstr->mbs_case_alloc = 0;
} }
#ifdef RE_ENABLE_I18N #ifdef RE_ENABLE_I18N
/* Build wide character buffer for `pstr'. /* Build wide character buffer PSTR->WCS.
If the byte sequence of the string are: If the byte sequence of the string are:
<mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3> <mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3>
Then wide character buffer will be: Then wide character buffer will be:
<wc1> , WEOF , <wc2> , WEOF , <wc3> <wc1> , WEOF , <wc2> , WEOF , <wc3>
We use WEOF for padding, they indicate that the position isn't We use WEOF for padding, they indicate that the position isn't
a first byte of a multibyte character. */ a first byte of a multibyte character.
static reg_errcode_t Note that this function assumes PSTR->VALID_LEN elements are already
built and starts from PSTR->VALID_LEN. */
static void
build_wcs_buffer (pstr) build_wcs_buffer (pstr)
re_string_t *pstr; re_string_t *pstr;
{ {
mbstate_t state, prev_st; mbstate_t prev_st;
wchar_t wc; int byte_idx, end_idx, mbclen, remain_len;
int char_idx, char_len, mbclen; /* Build the buffers from pstr->valid_len to either pstr->len or
pstr->bufs_len. */
pstr->wcs = re_malloc (wchar_t, pstr->len + 1); end_idx = (pstr->bufs_len > pstr->len)? pstr->len : pstr->bufs_len;
if (BE (pstr->wcs == NULL, 0)) for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
return REG_ESPACE;
memset (&state, '\0', sizeof (mbstate_t));
char_len = pstr->len;
for (char_idx = 0; char_idx < char_len ;)
{ {
int next_idx, remain_len = char_len - char_idx; wchar_t wc;
prev_st = state; remain_len = end_idx - byte_idx;
mbclen = mbrtowc (&wc, pstr->mbs + char_idx, remain_len, &state); prev_st = pstr->cur_state;
if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0)) mbclen = mbrtowc (&wc, pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx,
/* We treat these cases as a singlebyte character. */ remain_len, &pstr->cur_state);
if (BE (mbclen == (size_t) -2, 0))
{ {
/* The buffer doesn't have enough space, finish to build. */
pstr->cur_state = prev_st;
break;
}
else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0))
{
/* We treat these cases as a singlebyte character. */
mbclen = 1; mbclen = 1;
wc = (wchar_t) pstr->mbs[char_idx++]; wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
state = prev_st; pstr->cur_state = prev_st;
}
/* Apply the translateion if we need. */
if (pstr->trans != NULL && mbclen == 1)
{
int ch = pstr->trans[pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]];
pstr->mbs_case[byte_idx] = ch;
} }
/* Write wide character and padding. */ /* Write wide character and padding. */
pstr->wcs[char_idx++] = wc; pstr->wcs[byte_idx++] = wc;
for (next_idx = char_idx + mbclen - 1; char_idx < next_idx ;) /* Write paddings. */
pstr->wcs[char_idx++] = WEOF; for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
pstr->wcs[byte_idx++] = WEOF;
} }
return REG_NOERROR; pstr->valid_len = byte_idx;
} }
static reg_errcode_t /* Build wide character buffer PSTR->WCS like build_wcs_buffer,
but for REG_ICASE. */
static void
build_wcs_upper_buffer (pstr) build_wcs_upper_buffer (pstr)
re_string_t *pstr; re_string_t *pstr;
{ {
mbstate_t state, prev_st; mbstate_t prev_st;
wchar_t wc; int byte_idx, end_idx, mbclen, remain_len;
unsigned char *mbs_upper; /* Build the buffers from pstr->valid_len to either pstr->len or
int char_idx, char_len, mbclen; pstr->bufs_len. */
end_idx = (pstr->bufs_len > pstr->len)? pstr->len : pstr->bufs_len;
pstr->wcs = re_malloc (wchar_t, pstr->len + 1); for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
mbs_upper = re_malloc (unsigned char, pstr->len + 1);
if (BE (pstr->wcs == NULL || mbs_upper == NULL, 0))
{ {
pstr->wcs = NULL; wchar_t wc;
return REG_ESPACE; remain_len = end_idx - byte_idx;
} prev_st = pstr->cur_state;
mbclen = mbrtowc (&wc, pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx,
memset (&state, '\0', sizeof (mbstate_t)); remain_len, &pstr->cur_state);
char_len = pstr->len; if (BE (mbclen == (size_t) -2, 0))
for (char_idx = 0 ; char_idx < char_len ; char_idx += mbclen)
{
int byte_idx, remain_len = char_len - char_idx;
prev_st = state;
mbclen = mbrtowc (&wc, pstr->mbs + char_idx, remain_len, &state);
if (mbclen == 1)
{ {
pstr->wcs[char_idx] = wc; /* The buffer doesn't have enough space, finish to build. */
if (islower (pstr->mbs[char_idx])) pstr->cur_state = prev_st;
mbs_upper[char_idx] = toupper (pstr->mbs[char_idx]); break;
else
mbs_upper[char_idx] = pstr->mbs[char_idx];
} }
else if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 else if (mbclen == 1 || mbclen == (size_t) -1 || mbclen == 0)
|| mbclen == 0, 0))
/* We treat these cases as a singlebyte character. */
{ {
mbclen = 1; /* In case of a singlebyte character. */
pstr->wcs[char_idx] = (wchar_t) pstr->mbs[char_idx]; int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
mbs_upper[char_idx] = pstr->mbs[char_idx]; /* Apply the translateion if we need. */
state = prev_st; if (pstr->trans != NULL && mbclen == 1)
{
ch = pstr->trans[ch];
pstr->mbs_case[byte_idx] = ch;
}
pstr->wcs[byte_idx] = iswlower (wc) ? toupper (wc) : wc;
pstr->mbs[byte_idx++] = islower (ch) ? toupper (ch) : ch;
if (BE (mbclen == (size_t) -1, 0))
pstr->cur_state = prev_st;
} }
else /* mbclen > 1 */ else /* mbclen > 1 */
{ {
pstr->wcs[char_idx] = wc;
if (iswlower (wc)) if (iswlower (wc))
wcrtomb (mbs_upper + char_idx, towupper (wc), &prev_st); wcrtomb (pstr->mbs + byte_idx, towupper (wc), &prev_st);
else else
memcpy (mbs_upper + char_idx, pstr->mbs + char_idx, mbclen); memcpy (pstr->mbs + byte_idx,
for (byte_idx = 1 ; byte_idx < mbclen ; byte_idx++) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
pstr->wcs[char_idx + byte_idx] = WEOF; pstr->wcs[byte_idx++] = iswlower (wc) ? toupper (wc) : wc;
/* Write paddings. */
for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
pstr->wcs[byte_idx++] = WEOF;
} }
} }
pstr->mbs = mbs_upper; pstr->valid_len = byte_idx;
pstr->mbs_alloc = 1; }
return REG_NOERROR;
/* Skip characters until the index becomes greater than NEW_RAW_IDX.
Return the index. */
static int
re_string_skip_chars (pstr, new_raw_idx)
re_string_t *pstr;
int new_raw_idx;
{
mbstate_t prev_st;
int rawbuf_idx, mbclen;
/* Skip the characters which are not necessary to check. */
for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_len;
rawbuf_idx < new_raw_idx;)
{
int remain_len = pstr->len - rawbuf_idx;
prev_st = pstr->cur_state;
mbclen = mbrlen (pstr->raw_mbs + rawbuf_idx, remain_len,
&pstr->cur_state);
if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0))
{
/* We treat these cases as a singlebyte character. */
mbclen = 1;
pstr->cur_state = prev_st;
}
/* Then proceed the next character. */
rawbuf_idx += mbclen;
}
return rawbuf_idx;
} }
#endif /* RE_ENABLE_I18N */ #endif /* RE_ENABLE_I18N */
static reg_errcode_t /* Build the buffer PSTR->MBS, and apply the translation if we need.
This function is used in case of REG_ICASE. */
static void
build_upper_buffer (pstr) build_upper_buffer (pstr)
re_string_t *pstr; re_string_t *pstr;
{ {
unsigned char *mbs_upper; int char_idx, end_idx;
int char_idx, char_len; end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
mbs_upper = re_malloc (unsigned char, pstr->len + 1); for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx)
if (BE (mbs_upper == NULL, 0))
return REG_ESPACE;
char_len = pstr->len;
for (char_idx = 0 ; char_idx < char_len ; char_idx ++)
{ {
if (islower (pstr->mbs[char_idx])) int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
mbs_upper[char_idx] = toupper (pstr->mbs[char_idx]); if (pstr->trans != NULL)
{
ch = pstr->trans[ch];
pstr->mbs_case[char_idx] = ch;
}
if (islower (ch))
pstr->mbs[char_idx] = toupper (ch);
else else
mbs_upper[char_idx] = pstr->mbs[char_idx]; pstr->mbs[char_idx] = ch;
} }
pstr->mbs = mbs_upper; pstr->valid_len = char_idx;
pstr->mbs_alloc = 1;
return REG_NOERROR;
} }
/* Apply TRANS to the buffer in PSTR. We assume that wide char buffer /* Apply TRANS to the buffer in PSTR. */
is already constructed if MB_CUR_MAX > 1. */
static void
re_string_translate_buffer (pstr)
re_string_t *pstr;
{
int buf_idx, end_idx;
end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx)
{
int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx];
pstr->mbs_case[buf_idx] = pstr->trans[ch];
}
pstr->valid_len = buf_idx;
}
/* This function re-construct the buffers.
Concretely, convert to wide character in case of MB_CUR_MAX > 1,
convert to upper case in case of REG_ICASE, apply translation. */
static reg_errcode_t static reg_errcode_t
re_string_translate_buffer (pstr, trans) re_string_reconstruct (pstr, idx, eflags, newline)
re_string_t *pstr; re_string_t *pstr;
RE_TRANSLATE_TYPE trans; int idx, eflags, newline;
{ {
int buf_idx; int offset = idx - pstr->raw_mbs_idx;
unsigned char *transed_buf, *transed_case_buf; if (offset < 0)
#ifdef DEBUG
assert (trans != NULL);
#endif
if (pstr->mbs_alloc)
{ {
transed_buf = (unsigned char *) pstr->mbs; /* Reset buffer. */
transed_case_buf = re_malloc (unsigned char, pstr->len + 1); memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
if (BE (transed_case_buf == NULL, 0)) pstr->valid_len = pstr->raw_mbs_idx = 0;
return REG_ESPACE; pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
pstr->mbs_case_alloc = 1; : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
if (!MBS_CASE_ALLOCATED (pstr))
pstr->mbs_case = (unsigned char *)pstr->raw_mbs;
if (!MBS_ALLOCATED (pstr) && !MBS_CASE_ALLOCATED (pstr))
pstr->mbs = (unsigned char *)pstr->raw_mbs;
offset = idx;
} }
else
if (offset != 0)
{ {
transed_buf = re_malloc (unsigned char, pstr->len + 1); pstr->tip_context = re_string_context_at (pstr, offset - 1, eflags,
if (BE (transed_buf == NULL, 0)) newline);
return REG_ESPACE; /* Are the characters which are already checked remain? */
transed_case_buf = NULL; if (offset < pstr->valid_len)
pstr->mbs_alloc = 1;
}
for (buf_idx = 0 ; buf_idx < pstr->len ; buf_idx++)
{
#ifdef RE_ENABLE_I18N
if (MB_CUR_MAX > 1 && !re_string_is_single_byte_char (pstr, buf_idx))
transed_buf[buf_idx] = pstr->mbs[buf_idx];
else
#endif
transed_buf[buf_idx] = trans[pstr->mbs[buf_idx]];
if (transed_case_buf)
{ {
/* Yes, move them to the front of the buffer. */
#ifdef RE_ENABLE_I18N #ifdef RE_ENABLE_I18N
if (MB_CUR_MAX > 1 && !re_string_is_single_byte_char (pstr, buf_idx)) if (MB_CUR_MAX > 1)
transed_case_buf[buf_idx] = pstr->mbs_case[buf_idx]; memmove (pstr->wcs, pstr->wcs + offset,
else (pstr->valid_len - offset) * sizeof (wchar_t));
#endif /* RE_ENABLE_I18N */
if (MBS_ALLOCATED (pstr))
memmove (pstr->mbs, pstr->mbs + offset,
pstr->valid_len - offset);
if (MBS_CASE_ALLOCATED (pstr))
memmove (pstr->mbs_case, pstr->mbs_case + offset,
pstr->valid_len - offset);
pstr->valid_len -= offset;
#if DEBUG
assert (pstr->valid_len > 0);
#endif #endif
transed_case_buf[buf_idx] = trans[pstr->mbs_case[buf_idx]]; }
else
{
/* No, skip all characters until IDX. */
pstr->valid_len = 0;
#ifdef RE_ENABLE_I18N
if (MB_CUR_MAX > 1)
{
int wcs_idx;
pstr->valid_len = re_string_skip_chars (pstr, idx) - idx;
for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
pstr->wcs[wcs_idx] = WEOF;
}
#endif /* RE_ENABLE_I18N */
}
if (!MBS_CASE_ALLOCATED (pstr))
{
pstr->mbs_case += offset;
/* In case of !MBS_ALLOCATED && !MBS_CASE_ALLOCATED. */
if (!MBS_ALLOCATED (pstr))
pstr->mbs += offset;
} }
} }
if (pstr->mbs_case_alloc == 1) pstr->raw_mbs_idx = idx;
pstr->len -= offset;
/* Then build the buffers. */
#ifdef RE_ENABLE_I18N
if (MB_CUR_MAX > 1)
{ {
pstr->mbs = transed_buf; if (pstr->icase)
pstr->mbs_case = transed_case_buf; build_wcs_upper_buffer (pstr);
else
build_wcs_buffer (pstr);
} }
else else
#endif /* RE_ENABLE_I18N */
{ {
pstr->mbs = transed_buf; if (pstr->icase)
pstr->mbs_case = transed_buf; build_upper_buffer (pstr);
else if (pstr->trans != NULL)
re_string_translate_buffer (pstr);
} }
pstr->cur_idx = 0;
return REG_NOERROR; return REG_NOERROR;
} }
@ -365,13 +498,14 @@ re_string_destruct (pstr)
#ifdef RE_ENABLE_I18N #ifdef RE_ENABLE_I18N
re_free (pstr->wcs); re_free (pstr->wcs);
#endif /* RE_ENABLE_I18N */ #endif /* RE_ENABLE_I18N */
if (pstr->mbs_alloc) if (MBS_ALLOCATED (pstr))
re_free ((void *) pstr->mbs); re_free (pstr->mbs);
if (pstr->mbs_case_alloc) if (MBS_CASE_ALLOCATED (pstr))
re_free ((void *) pstr->mbs_case); re_free (pstr->mbs_case);
} }
/* Return the context at IDX in INPUT. */ /* Return the context at IDX in INPUT. */
static unsigned int static unsigned int
re_string_context_at (input, idx, eflags, newline_anchor) re_string_context_at (input, idx, eflags, newline_anchor)
const re_string_t *input; const re_string_t *input;
@ -380,17 +514,13 @@ re_string_context_at (input, idx, eflags, newline_anchor)
int c; int c;
if (idx < 0 || idx == input->len) if (idx < 0 || idx == input->len)
{ {
unsigned int context = 0;
if (idx < 0) if (idx < 0)
context = CONTEXT_BEGBUF; /* In this case, we use the value stored in input->tip_context,
since we can't know the character in input->mbs[-1] here. */
return input->tip_context;
else /* (idx == input->len) */ else /* (idx == input->len) */
context = CONTEXT_ENDBUF; return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
: CONTEXT_NEWLINE | CONTEXT_ENDBUF);
if ((idx < 0 && !(eflags & REG_NOTBOL))
|| (idx == input->len && !(eflags & REG_NOTEOL)))
return CONTEXT_NEWLINE | context;
else
return context;
} }
c = re_string_byte_at (input, idx); c = re_string_byte_at (input, idx);
if (IS_WORD_CHAR (c)) if (IS_WORD_CHAR (c))
@ -737,6 +867,7 @@ re_node_set_insert (set, elem)
if (set->nelem - idx > 0) if (set->nelem - idx > 0)
memcpy (new_array + idx + 1, set->elems + idx, memcpy (new_array + idx + 1, set->elems + idx,
sizeof (int) * (set->nelem - idx)); sizeof (int) * (set->nelem - idx));
re_free (set->elems);
set->elems = new_array; set->elems = new_array;
} }
else else

View File

@ -201,33 +201,67 @@ typedef struct
struct re_string_t struct re_string_t
{ {
/* Indicate the raw buffer which is the original string passed as an
argument of regexec(), re_search(), etc.. */
const unsigned char *raw_mbs;
/* Index in RAW_MBS. Each character mbs[i] corresponds to
raw_mbs[raw_mbs_idx + i]. */
int raw_mbs_idx;
/* Store the multibyte string. In case of "case insensitive mode" like /* Store the multibyte string. In case of "case insensitive mode" like
REG_ICASE, upper cases of the string are stored. */ REG_ICASE, upper cases of the string are stored, otherwise MBS points
const unsigned char *mbs; the same address that RAW_MBS points. */
unsigned char *mbs;
/* Store the case sensitive multibyte string. In case of /* Store the case sensitive multibyte string. In case of
"case insensitive mode", the original string are stored, "case insensitive mode", the original string are stored,
otherwise MBS_CASE points the same address that MBS points. */ otherwise MBS_CASE points the same address that MBS points. */
const unsigned char *mbs_case; unsigned char *mbs_case;
int cur_idx;
int len;
#ifdef RE_ENABLE_I18N #ifdef RE_ENABLE_I18N
/* Store the wide character string which is corresponding to MBS. */ /* Store the wide character string which is corresponding to MBS. */
wchar_t *wcs; wchar_t *wcs;
mbstate_t cur_state;
#endif #endif
/* 1 if mbs is allocated by regex library. */ /* The length of the valid characters in the buffers. */
unsigned int mbs_alloc : 1; int valid_len;
/* 1 if mbs_case is allocated by regex library. */ /* The length of the buffers MBS, MBS_CASE, and WCS. */
unsigned int mbs_case_alloc : 1; int bufs_len;
/* The index in MBS, which is updated by re_string_fetch_byte. */
int cur_idx;
/* This is length_of_RAW_MBS - RAW_MBS_IDX. */
int len;
/* The context of mbs[0]. We store the context independently, since
the context of mbs[0] may be different from raw_mbs[0], which is
the beginning of the input string. */
unsigned int tip_context;
/* The translation passed as a part of an argument of re_compile_pattern. */
RE_TRANSLATE_TYPE trans;
/* 1 if REG_ICASE. */
unsigned int icase : 1;
}; };
typedef struct re_string_t re_string_t; typedef struct re_string_t re_string_t;
/* In case of REG_ICASE, we allocate the buffer dynamically for mbs. */
#define MBS_ALLOCATED(pstr) (pstr->icase)
/* In case that we need translation, we allocate the buffer dynamically
for mbs_case. Note that mbs == mbs_case if not REG_ICASE. */
#define MBS_CASE_ALLOCATED(pstr) (pstr->trans != NULL)
static reg_errcode_t re_string_allocate (re_string_t *pstr,
const unsigned char *str, int len,
int init_len,
RE_TRANSLATE_TYPE trans, int icase);
static reg_errcode_t re_string_construct (re_string_t *pstr, static reg_errcode_t re_string_construct (re_string_t *pstr,
const unsigned char *str, int len, const unsigned char *str, int len,
RE_TRANSLATE_TYPE trans); RE_TRANSLATE_TYPE trans, int icase);
static reg_errcode_t re_string_construct_toupper (re_string_t *pstr, static reg_errcode_t re_string_reconstruct (re_string_t *pstr, int idx,
const unsigned char *str, int eflags, int newline);
int len, static reg_errcode_t re_string_realloc_buffers (re_string_t *pstr,
RE_TRANSLATE_TYPE trans); int new_buf_len);
#ifdef RE_ENABLE_I18N
static void build_wcs_buffer (re_string_t *pstr);
static void build_wcs_upper_buffer (re_string_t *pstr);
#endif /* RE_ENABLE_I18N */
static void build_upper_buffer (re_string_t *pstr);
static void re_string_translate_buffer (re_string_t *pstr);
static void re_string_destruct (re_string_t *pstr); static void re_string_destruct (re_string_t *pstr);
#ifdef RE_ENABLE_I18N #ifdef RE_ENABLE_I18N
static int re_string_elem_size_at (const re_string_t *pstr, int idx); static int re_string_elem_size_at (const re_string_t *pstr, int idx);
@ -253,8 +287,7 @@ static unsigned int re_string_context_at (const re_string_t *input, int idx,
#define re_string_cur_idx(pstr) ((pstr)->cur_idx) #define re_string_cur_idx(pstr) ((pstr)->cur_idx)
#define re_string_get_buffer(pstr) ((pstr)->mbs) #define re_string_get_buffer(pstr) ((pstr)->mbs)
#define re_string_length(pstr) ((pstr)->len) #define re_string_length(pstr) ((pstr)->len)
#define re_string_byte_at(pstr,idx) \ #define re_string_byte_at(pstr,idx) ((pstr)->mbs[idx])
((pstr)->mbs[idx])
#define re_string_skip_bytes(pstr,idx) ((pstr)->cur_idx += (idx)) #define re_string_skip_bytes(pstr,idx) ((pstr)->cur_idx += (idx))
#define re_string_set_index(pstr,idx) ((pstr)->cur_idx = (idx)) #define re_string_set_index(pstr,idx) ((pstr)->cur_idx = (idx))
@ -279,27 +312,6 @@ struct bin_tree_t
}; };
typedef struct bin_tree_t bin_tree_t; typedef struct bin_tree_t bin_tree_t;
struct re_backref_cache_entry
{
int node;
int from;
int to;
int flag;
};
typedef struct
{
int eflags;
int match_first;
int match_last;
int state_log_top;
/* Back reference cache. */
int nbkref_ents;
int abkref_ents;
struct re_backref_cache_entry *bkref_ents;
int max_bkref_len;
} re_match_context_t;
#define CONTEXT_WORD 1 #define CONTEXT_WORD 1
#define CONTEXT_NEWLINE (CONTEXT_WORD << 1) #define CONTEXT_NEWLINE (CONTEXT_WORD << 1)
@ -363,6 +375,32 @@ struct re_state_table_entry
re_dfastate_t **array; re_dfastate_t **array;
}; };
struct re_backref_cache_entry
{
int node;
int from;
int to;
int flag;
};
typedef struct
{
/* EFLAGS of the argument of regexec. */
int eflags;
/* Where the matching ends. */
int match_last;
/* The string object corresponding to the input string. */
re_string_t *input;
/* The state log used by the matcher. */
re_dfastate_t **state_log;
int state_log_top;
/* Back reference cache. */
int nbkref_ents;
int abkref_ents;
struct re_backref_cache_entry *bkref_ents;
int max_bkref_len;
} re_match_context_t;
struct re_dfa_t struct re_dfa_t
{ {
re_bitset_ptr_t word_char; re_bitset_ptr_t word_char;

File diff suppressed because it is too large Load Diff