diff --git a/ChangeLog b/ChangeLog index 9ed5fa4d72..77eec48bbd 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,14 @@ 2013-02-12 Andreas Schwab + [BZ #15078] + * posix/regexec.c (extend_buffers): Add parameter min_len. + (check_matching): Pass minimum needed length. + (clean_state_log_if_needed): Likewise. + (get_subexp): Likewise. + * posix/Makefile (tests): Add bug-regex34. + (bug-regex34-ENV): Define. + * posix/bug-regex34.c: New file. + [BZ #11561] * posix/regcomp.c (parse_bracket_exp): When looking up collating elements compare against the byte sequence of it, not its name. diff --git a/NEWS b/NEWS index f5c6b5215f..769ae61e53 100644 --- a/NEWS +++ b/NEWS @@ -10,7 +10,8 @@ Version 2.18 * The following bugs are resolved with this release: 11561, 13951, 14142, 14200, 14317, 14327, 14496, 14964, 14981, 14982, - 14985, 14994, 14996, 15003, 15006, 15020, 15023, 15036, 15054, 15062. + 14985, 14994, 14996, 15003, 15006, 15020, 15023, 15036, 15054, 15062, + 15078. Version 2.17 diff --git a/posix/Makefile b/posix/Makefile index 88d409f303..2cacd219b5 100644 --- a/posix/Makefile +++ b/posix/Makefile @@ -86,7 +86,7 @@ tests := tstgetopt testfnm runtests runptests \ tst-rfc3484-3 \ tst-getaddrinfo3 tst-fnmatch2 tst-cpucount tst-cpuset \ bug-getopt1 bug-getopt2 bug-getopt3 bug-getopt4 \ - bug-getopt5 tst-getopt_long1 bug-regex35 + bug-getopt5 tst-getopt_long1 bug-regex34 bug-regex35 xtests := bug-ga2 ifeq (yes,$(build-shared)) test-srcs := globtest @@ -199,6 +199,7 @@ bug-regex26-ENV = LOCPATH=$(common-objpfx)localedata bug-regex30-ENV = LOCPATH=$(common-objpfx)localedata bug-regex32-ENV = LOCPATH=$(common-objpfx)localedata bug-regex33-ENV = LOCPATH=$(common-objpfx)localedata +bug-regex34-ENV = LOCPATH=$(common-objpfx)localedata bug-regex35-ENV = LOCPATH=$(common-objpfx)localedata tst-rxspencer-ARGS = --utf8 rxspencer/tests tst-rxspencer-ENV = LOCPATH=$(common-objpfx)localedata diff --git a/posix/bug-regex34.c b/posix/bug-regex34.c new file mode 100644 index 0000000000..bb3b6138f8 --- /dev/null +++ b/posix/bug-regex34.c @@ -0,0 +1,46 @@ +/* Test re_search with multi-byte characters in UTF-8. + Copyright (C) 2013 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#define _GNU_SOURCE 1 +#include +#include +#include +#include + +static int +do_test (void) +{ + struct re_pattern_buffer r; + /* ကျွန်ုပ်x */ + const char *s = "\xe1\x80\x80\xe1\x80\xbb\xe1\x80\xbd\xe1\x80\x94\xe1\x80\xba\xe1\x80\xaf\xe1\x80\x95\xe1\x80\xbax"; + + if (setlocale (LC_ALL, "en_US.UTF-8") == NULL) + { + puts ("setlocale failed"); + return 1; + } + memset (&r, 0, sizeof (r)); + + re_compile_pattern ("[^x]x", 5, &r); + /* This was triggering a buffer overflow. */ + re_search (&r, s, strlen (s), 0, strlen (s), 0); + return 0; +} + +#define TEST_FUNCTION do_test () +#include "../test-skeleton.c" diff --git a/posix/regexec.c b/posix/regexec.c index 7f2de857a7..5ca2bf67a0 100644 --- a/posix/regexec.c +++ b/posix/regexec.c @@ -197,7 +197,7 @@ static int group_nodes_into_DFAstates (const re_dfa_t *dfa, static int check_node_accept (const re_match_context_t *mctx, const re_token_t *node, int idx) internal_function; -static reg_errcode_t extend_buffers (re_match_context_t *mctx) +static reg_errcode_t extend_buffers (re_match_context_t *mctx, int min_len) internal_function; /* Entry point for POSIX code. */ @@ -1160,7 +1160,7 @@ check_matching (re_match_context_t *mctx, int fl_longest_match, || (BE (next_char_idx >= mctx->input.valid_len, 0) && mctx->input.valid_len < mctx->input.len)) { - err = extend_buffers (mctx); + err = extend_buffers (mctx, next_char_idx + 1); if (BE (err != REG_NOERROR, 0)) { assert (err == REG_ESPACE); @@ -1738,7 +1738,7 @@ clean_state_log_if_needed (re_match_context_t *mctx, int next_state_log_idx) && mctx->input.valid_len < mctx->input.len)) { reg_errcode_t err; - err = extend_buffers (mctx); + err = extend_buffers (mctx, next_state_log_idx + 1); if (BE (err != REG_NOERROR, 0)) return err; } @@ -2792,7 +2792,7 @@ get_subexp (re_match_context_t *mctx, int bkref_node, int bkref_str_idx) if (bkref_str_off >= mctx->input.len) break; - err = extend_buffers (mctx); + err = extend_buffers (mctx, bkref_str_off + 1); if (BE (err != REG_NOERROR, 0)) return err; @@ -4102,7 +4102,7 @@ check_node_accept (const re_match_context_t *mctx, const re_token_t *node, static reg_errcode_t internal_function __attribute_warn_unused_result__ -extend_buffers (re_match_context_t *mctx) +extend_buffers (re_match_context_t *mctx, int min_len) { reg_errcode_t ret; re_string_t *pstr = &mctx->input; @@ -4111,8 +4111,10 @@ extend_buffers (re_match_context_t *mctx) if (BE (INT_MAX / 2 / sizeof (re_dfastate_t *) <= pstr->bufs_len, 0)) return REG_ESPACE; - /* Double the lengthes of the buffers. */ - ret = re_string_realloc_buffers (pstr, MIN (pstr->len, pstr->bufs_len * 2)); + /* Double the lengthes of the buffers, but allocate at least MIN_LEN. */ + ret = re_string_realloc_buffers (pstr, + MAX (min_len, + MIN (pstr->len, pstr->bufs_len * 2))); if (BE (ret != REG_NOERROR, 0)) return ret;