gconv: Do not emit spurious NUL character in ISO-2022-JP-3 (bug 28524)

Bugfix 27256 has introduced another issue:
In conversion from ISO-2022-JP-3 encoding, it is possible
to force iconv to emit extra NUL character on internal state reset.
To do this, it is sufficient to feed iconv with escape sequence
which switches active character set.
The simplified check 'data->__statep->__count != ASCII_set'
introduced by the aforementioned bugfix picks that case and
behaves as if '\0' character has been queued thus emitting it.

To eliminate this issue, these steps are taken:
* Restore original condition
'(data->__statep->__count & ~7) != ASCII_set'.
It is necessary since bits 0-2 may contain
number of buffered input characters.
* Check that queued character is not NUL.
Similar step is taken for main conversion loop.

Bundled test case follows following logic:
* Try to convert ISO-2022-JP-3 escape sequence
switching active character set
* Reset internal state by providing NULL as input buffer
* Ensure that nothing has been converted.

Signed-off-by: Nikita Popov <npv1310@gmail.com>
This commit is contained in:
Nikita Popov 2021-11-02 13:21:42 +05:00 committed by Florian Weimer
parent 9fea0f1a2a
commit ff012870b2
3 changed files with 84 additions and 9 deletions

View File

@ -1,4 +1,5 @@
# Copyright (C) 1997-2021 Free Software Foundation, Inc. # Copyright (C) 1997-2021 Free Software Foundation, Inc.
# Copyright (C) The GNU Toolchain Authors.
# This file is part of the GNU C Library. # This file is part of the GNU C Library.
# The GNU C Library is free software; you can redistribute it and/or # The GNU C Library is free software; you can redistribute it and/or
@ -74,7 +75,7 @@ ifeq (yes,$(build-shared))
tests = bug-iconv1 bug-iconv2 tst-loading tst-e2big tst-iconv4 bug-iconv4 \ tests = bug-iconv1 bug-iconv2 tst-loading tst-e2big tst-iconv4 bug-iconv4 \
tst-iconv6 bug-iconv5 bug-iconv6 tst-iconv7 bug-iconv8 bug-iconv9 \ tst-iconv6 bug-iconv5 bug-iconv6 tst-iconv7 bug-iconv8 bug-iconv9 \
bug-iconv10 bug-iconv11 bug-iconv12 tst-iconv-big5-hkscs-to-2ucs4 \ bug-iconv10 bug-iconv11 bug-iconv12 tst-iconv-big5-hkscs-to-2ucs4 \
bug-iconv13 bug-iconv14 bug-iconv13 bug-iconv14 bug-iconv15
ifeq ($(have-thread-library),yes) ifeq ($(have-thread-library),yes)
tests += bug-iconv3 tests += bug-iconv3
endif endif
@ -327,6 +328,8 @@ $(objpfx)bug-iconv12.out: $(addprefix $(objpfx), $(gconv-modules)) \
$(addprefix $(objpfx),$(modules.so)) $(addprefix $(objpfx),$(modules.so))
$(objpfx)bug-iconv14.out: $(addprefix $(objpfx), $(gconv-modules)) \ $(objpfx)bug-iconv14.out: $(addprefix $(objpfx), $(gconv-modules)) \
$(addprefix $(objpfx),$(modules.so)) $(addprefix $(objpfx),$(modules.so))
$(objpfx)bug-iconv15.out: $(addprefix $(objpfx), $(gconv-modules)) \
$(addprefix $(objpfx),$(modules.so))
$(objpfx)iconv-test.out: run-iconv-test.sh \ $(objpfx)iconv-test.out: run-iconv-test.sh \
$(addprefix $(objpfx), $(gconv-modules)) \ $(addprefix $(objpfx), $(gconv-modules)) \

60
iconvdata/bug-iconv15.c Normal file
View File

@ -0,0 +1,60 @@
/* Bug 28524: Conversion from ISO-2022-JP-3 with iconv
may emit spurious NUL character on state reset.
Copyright (C) The GNU Toolchain Authors.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include <stddef.h>
#include <iconv.h>
#include <support/check.h>
static int
do_test (void)
{
char in[] = "\x1b(I";
char *inbuf = in;
size_t inleft = sizeof (in) - 1;
char out[1];
char *outbuf = out;
size_t outleft = sizeof (out);
iconv_t cd;
cd = iconv_open ("UTF8", "ISO-2022-JP-3");
TEST_VERIFY_EXIT (cd != (iconv_t) -1);
/* First call to iconv should alter internal state.
Now, JISX0201_Kana_set is selected and
state value != ASCII_set. */
TEST_VERIFY (iconv (cd, &inbuf, &inleft, &outbuf, &outleft) != (size_t) -1);
/* No bytes should have been added to
the output buffer at this point. */
TEST_VERIFY (outbuf == out);
TEST_VERIFY (outleft == sizeof (out));
/* Second call shall emit spurious NUL character in unpatched glibc. */
TEST_VERIFY (iconv (cd, NULL, NULL, &outbuf, &outleft) != (size_t) -1);
/* No characters are expected to be produced. */
TEST_VERIFY (outbuf == out);
TEST_VERIFY (outleft == sizeof (out));
TEST_VERIFY_EXIT (iconv_close (cd) != -1);
return 0;
}
#include <support/test-driver.c>

View File

@ -1,5 +1,6 @@
/* Conversion module for ISO-2022-JP-3. /* Conversion module for ISO-2022-JP-3.
Copyright (C) 1998-2021 Free Software Foundation, Inc. Copyright (C) 1998-2021 Free Software Foundation, Inc.
Copyright (C) The GNU Toolchain Authors.
This file is part of the GNU C Library. This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or The GNU C Library is free software; you can redistribute it and/or
@ -79,20 +80,31 @@ enum
the output state to the initial state. This has to be done during the the output state to the initial state. This has to be done during the
flushing. */ flushing. */
#define EMIT_SHIFT_TO_INIT \ #define EMIT_SHIFT_TO_INIT \
if (data->__statep->__count != ASCII_set) \ if ((data->__statep->__count & ~7) != ASCII_set) \
{ \ { \
if (FROM_DIRECTION) \ if (FROM_DIRECTION) \
{ \ { \
if (__glibc_likely (outbuf + 4 <= outend)) \ uint32_t ch = data->__statep->__count >> 6; \
\
if (__glibc_unlikely (ch != 0)) \
{ \ { \
/* Write out the last character. */ \ if (__glibc_likely (outbuf + 4 <= outend)) \
*((uint32_t *) outbuf) = data->__statep->__count >> 6; \ { \
outbuf += sizeof (uint32_t); \ /* Write out the last character. */ \
data->__statep->__count = ASCII_set; \ put32u (outbuf, ch); \
outbuf += 4; \
data->__statep->__count &= 7; \
data->__statep->__count |= ASCII_set; \
} \
else \
/* We don't have enough room in the output buffer. */ \
status = __GCONV_FULL_OUTPUT; \
} \ } \
else \ else \
/* We don't have enough room in the output buffer. */ \ { \
status = __GCONV_FULL_OUTPUT; \ data->__statep->__count &= 7; \
data->__statep->__count |= ASCII_set; \
} \
} \ } \
else \ else \
{ \ { \