iconv: Fix matching of multi-character transliterations (bug 31859)

Only return __GCONV_INCOMPLETE_INPUT for a partial match when the end of
the input buffer is reached.  Otherwise it is a non-match, and other
patterns should be tried.
This commit is contained in:
Andreas Schwab 2024-06-10 12:19:17 +02:00
parent 7a630f7d33
commit 1b0a2062c8
5 changed files with 125 additions and 1 deletions

View File

@ -57,6 +57,10 @@ tests = \
tst-iconv-opt \
# tests
test-srcs := \
tst-translit-mchar \
# test-srcs
others = iconv_prog iconvconfig
install-others-programs = $(inst_bindir)/iconv
install-sbin = iconvconfig
@ -73,6 +77,7 @@ include $(patsubst %,$(..)libof-iterator.mk,$(cpp-srcs-left))
ifeq ($(run-built-tests),yes)
xtests-special += $(objpfx)test-iconvconfig.out
tests-special += $(objpfx)tst-iconv_prog.out
tests-special += $(objpfx)tst-translit-mchar.out
endif
# Make a copy of the file because gconv module names are constructed
@ -92,6 +97,8 @@ $(objpfx)tst-gconv-init-failure.out: \
$(objpfx)gconv-modules $(objpfx)tst-gconv-init-failure-mod.so
endif
generated-dirs += tst-translit
include ../Rules
ifeq ($(run-built-tests),yes)
@ -126,3 +133,11 @@ $(objpfx)tst-iconv_prog.out: tst-iconv_prog.sh $(objpfx)iconv_prog
$(BASH) $< $(common-objdir) '$(test-wrapper-env)' \
'$(run-program-env)' > $@; \
$(evaluate-test)
$(objpfx)tst-translit-mchar.out: tst-translit-mchar.sh \
$(objpfx)tst-translit-mchar \
tst-translit-locale
$(SHELL) $< $(common-objpfx) '$(run-program-prefix-before-env)' \
'$(run-program-env)' '$(run-program-prefix-after-env)' \
$< > $@; \
$(evaluate-test)

View File

@ -150,7 +150,7 @@ __gconv_transliterate (struct __gconv_step *step,
/* Nothing found, continue searching. */
}
else if (cnt > 0)
else if (cnt > 0 && winbuf + cnt == winbufend)
/* This means that the input buffer contents matches a prefix of
an entry. Since we cannot match it unless we get more input,
we will tell the caller about it. */

10
iconv/tst-translit-locale Normal file
View File

@ -0,0 +1,10 @@
# Test multi-character transliteration rule
LC_CTYPE
copy "POSIX"
translit_start
"ÄÄ" "AA"
translit_end
END LC_CTYPE

View File

@ -0,0 +1,48 @@
/* Test multi-character transliterations.
Copyright (C) 2024 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include <locale.h>
#include <iconv.h>
#include <support/support.h>
#include <support/check.h>
static int
do_test (void)
{
iconv_t cd;
/* An input sequence that shares a common prefix with a transliteration
rule. */
char input[] = "ÄÅ";
char *inptr = input;
char outbuf[10];
char *outptr = outbuf;
size_t inlen = sizeof (input), outlen = sizeof (outbuf);
size_t n;
xsetlocale (LC_CTYPE, "tst-translit");
cd = iconv_open ("ASCII//TRANSLIT", "UTF-8");
TEST_VERIFY (cd != (iconv_t) -1);
/* This call used to loop infinitely. */
n = iconv (cd, &inptr, &inlen, &outptr, &outlen);
TEST_VERIFY (iconv_close (cd) == 0);
return n == 0;
}
#include <support/test-driver.c>

View File

@ -0,0 +1,51 @@
#!/bin/sh
# Testing of multi-character transliterations
# Copyright (C) 2024 Free Software Foundation, Inc.
# This file is part of the GNU C Library.
# The GNU C Library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
# The GNU C Library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
# You should have received a copy of the GNU Lesser General Public
# License along with the GNU C Library; if not, see
# <https://www.gnu.org/licenses/>.
set -e
common_objpfx=$1
run_program_prefix_before_env=$2
run_program_env=$3
run_program_prefix_after_env=$4
# Generate data files.
# The locale only defines the LC_CTYPE category, so we expect a failure
# due to warnings.
ret=0
${run_program_prefix_before_env} \
${run_program_env} \
I18NPATH=../localedata \
${run_program_prefix_after_env} ${common_objpfx}locale/localedef \
--quiet -i tst-translit-locale -f UTF-8 ${common_objpfx}iconv/tst-translit || ret=$?
if [ $ret -gt 1 ]; then
echo "FAIL: Locale compilation for tst-translit-locale failed (error $ret)."
exit 1
fi
set -x
# Run the test.
${run_program_prefix_before_env} \
${run_program_env} \
LOCPATH=${common_objpfx}iconv \
${run_program_prefix_after_env} ${common_objpfx}iconv/tst-translit-mchar
# Local Variables:
# mode:shell-script
# End: