iconv: Fix matching of multi-character transliterations (bug 31859)

Only return __GCONV_INCOMPLETE_INPUT for a partial match when the end of
the input buffer is reached.  Otherwise it is a non-match, and other
patterns should be tried.
This commit is contained in:
Andreas Schwab 2024-06-10 12:19:17 +02:00
parent 7a630f7d33
commit 1b0a2062c8
5 changed files with 125 additions and 1 deletions

View File

@ -57,6 +57,10 @@ tests = \
tst-iconv-opt \ tst-iconv-opt \
# tests # tests
test-srcs := \
tst-translit-mchar \
# test-srcs
others = iconv_prog iconvconfig others = iconv_prog iconvconfig
install-others-programs = $(inst_bindir)/iconv install-others-programs = $(inst_bindir)/iconv
install-sbin = iconvconfig install-sbin = iconvconfig
@ -73,6 +77,7 @@ include $(patsubst %,$(..)libof-iterator.mk,$(cpp-srcs-left))
ifeq ($(run-built-tests),yes) ifeq ($(run-built-tests),yes)
xtests-special += $(objpfx)test-iconvconfig.out xtests-special += $(objpfx)test-iconvconfig.out
tests-special += $(objpfx)tst-iconv_prog.out tests-special += $(objpfx)tst-iconv_prog.out
tests-special += $(objpfx)tst-translit-mchar.out
endif endif
# Make a copy of the file because gconv module names are constructed # Make a copy of the file because gconv module names are constructed
@ -92,6 +97,8 @@ $(objpfx)tst-gconv-init-failure.out: \
$(objpfx)gconv-modules $(objpfx)tst-gconv-init-failure-mod.so $(objpfx)gconv-modules $(objpfx)tst-gconv-init-failure-mod.so
endif endif
generated-dirs += tst-translit
include ../Rules include ../Rules
ifeq ($(run-built-tests),yes) ifeq ($(run-built-tests),yes)
@ -126,3 +133,11 @@ $(objpfx)tst-iconv_prog.out: tst-iconv_prog.sh $(objpfx)iconv_prog
$(BASH) $< $(common-objdir) '$(test-wrapper-env)' \ $(BASH) $< $(common-objdir) '$(test-wrapper-env)' \
'$(run-program-env)' > $@; \ '$(run-program-env)' > $@; \
$(evaluate-test) $(evaluate-test)
$(objpfx)tst-translit-mchar.out: tst-translit-mchar.sh \
$(objpfx)tst-translit-mchar \
tst-translit-locale
$(SHELL) $< $(common-objpfx) '$(run-program-prefix-before-env)' \
'$(run-program-env)' '$(run-program-prefix-after-env)' \
$< > $@; \
$(evaluate-test)

View File

@ -150,7 +150,7 @@ __gconv_transliterate (struct __gconv_step *step,
/* Nothing found, continue searching. */ /* Nothing found, continue searching. */
} }
else if (cnt > 0) else if (cnt > 0 && winbuf + cnt == winbufend)
/* This means that the input buffer contents matches a prefix of /* This means that the input buffer contents matches a prefix of
an entry. Since we cannot match it unless we get more input, an entry. Since we cannot match it unless we get more input,
we will tell the caller about it. */ we will tell the caller about it. */

10
iconv/tst-translit-locale Normal file
View File

@ -0,0 +1,10 @@
# Test multi-character transliteration rule
LC_CTYPE
copy "POSIX"
translit_start
"ÄÄ" "AA"
translit_end
END LC_CTYPE

View File

@ -0,0 +1,48 @@
/* Test multi-character transliterations.
Copyright (C) 2024 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include <locale.h>
#include <iconv.h>
#include <support/support.h>
#include <support/check.h>
static int
do_test (void)
{
iconv_t cd;
/* An input sequence that shares a common prefix with a transliteration
rule. */
char input[] = "ÄÅ";
char *inptr = input;
char outbuf[10];
char *outptr = outbuf;
size_t inlen = sizeof (input), outlen = sizeof (outbuf);
size_t n;
xsetlocale (LC_CTYPE, "tst-translit");
cd = iconv_open ("ASCII//TRANSLIT", "UTF-8");
TEST_VERIFY (cd != (iconv_t) -1);
/* This call used to loop infinitely. */
n = iconv (cd, &inptr, &inlen, &outptr, &outlen);
TEST_VERIFY (iconv_close (cd) == 0);
return n == 0;
}
#include <support/test-driver.c>

View File

@ -0,0 +1,51 @@
#!/bin/sh
# Testing of multi-character transliterations
# Copyright (C) 2024 Free Software Foundation, Inc.
# This file is part of the GNU C Library.
# The GNU C Library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
# The GNU C Library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
# You should have received a copy of the GNU Lesser General Public
# License along with the GNU C Library; if not, see
# <https://www.gnu.org/licenses/>.
set -e
common_objpfx=$1
run_program_prefix_before_env=$2
run_program_env=$3
run_program_prefix_after_env=$4
# Generate data files.
# The locale only defines the LC_CTYPE category, so we expect a failure
# due to warnings.
ret=0
${run_program_prefix_before_env} \
${run_program_env} \
I18NPATH=../localedata \
${run_program_prefix_after_env} ${common_objpfx}locale/localedef \
--quiet -i tst-translit-locale -f UTF-8 ${common_objpfx}iconv/tst-translit || ret=$?
if [ $ret -gt 1 ]; then
echo "FAIL: Locale compilation for tst-translit-locale failed (error $ret)."
exit 1
fi
set -x
# Run the test.
${run_program_prefix_before_env} \
${run_program_env} \
LOCPATH=${common_objpfx}iconv \
${run_program_prefix_after_env} ${common_objpfx}iconv/tst-translit-mchar
# Local Variables:
# mode:shell-script
# End: