gconv: Fix assertion failure in ISO-2022-JP-3 module (bug 27256)

The conversion loop to the internal encoding does not follow
the interface contract that __GCONV_FULL_OUTPUT is only returned
after the internal wchar_t buffer has been filled completely.  This
is enforced by the first of the two asserts in iconv/skeleton.c:

	      /* We must run out of output buffer space in this
		 rerun.  */
	      assert (outbuf == outerr);
	      assert (nstatus == __GCONV_FULL_OUTPUT);

This commit solves this issue by queuing a second wide character
which cannot be written immediately in the state variable, like
other converters already do (e.g., BIG5-HKSCS or TSCII).

Reported-by: Tavis Ormandy <taviso@gmail.com>
(cherry picked from commit 7d88c6142c)
This commit is contained in:
Florian Weimer 2021-01-27 13:36:12 +01:00
parent 1864775abc
commit a9acd88a5d
3 changed files with 178 additions and 20 deletions

View File

@ -73,7 +73,7 @@ modules.so := $(addsuffix .so, $(modules))
ifeq (yes,$(build-shared)) ifeq (yes,$(build-shared))
tests = bug-iconv1 bug-iconv2 tst-loading tst-e2big tst-iconv4 bug-iconv4 \ tests = bug-iconv1 bug-iconv2 tst-loading tst-e2big tst-iconv4 bug-iconv4 \
tst-iconv6 bug-iconv5 bug-iconv6 tst-iconv7 bug-iconv8 bug-iconv9 \ tst-iconv6 bug-iconv5 bug-iconv6 tst-iconv7 bug-iconv8 bug-iconv9 \
bug-iconv10 bug-iconv11 bug-iconv12 bug-iconv10 bug-iconv11 bug-iconv12 bug-iconv14
ifeq ($(have-thread-library),yes) ifeq ($(have-thread-library),yes)
tests += bug-iconv3 tests += bug-iconv3
endif endif
@ -316,6 +316,8 @@ $(objpfx)bug-iconv10.out: $(objpfx)gconv-modules \
$(addprefix $(objpfx),$(modules.so)) $(addprefix $(objpfx),$(modules.so))
$(objpfx)bug-iconv12.out: $(objpfx)gconv-modules \ $(objpfx)bug-iconv12.out: $(objpfx)gconv-modules \
$(addprefix $(objpfx),$(modules.so)) $(addprefix $(objpfx),$(modules.so))
$(objpfx)bug-iconv14.out: $(objpfx)gconv-modules \
$(addprefix $(objpfx),$(modules.so))
$(objpfx)iconv-test.out: run-iconv-test.sh $(objpfx)gconv-modules \ $(objpfx)iconv-test.out: run-iconv-test.sh $(objpfx)gconv-modules \
$(addprefix $(objpfx),$(modules.so)) \ $(addprefix $(objpfx),$(modules.so)) \

127
iconvdata/bug-iconv14.c Normal file
View File

@ -0,0 +1,127 @@
/* Assertion in ISO-2022-JP-3 due to two-character sequence (bug 27256).
Copyright (C) 2021 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include <iconv.h>
#include <string.h>
#include <errno.h>
#include <support/check.h>
/* Use an escape sequence to return to the initial state. */
static void
with_escape_sequence (void)
{
iconv_t c = iconv_open ("UTF-8", "ISO-2022-JP-3");
TEST_VERIFY_EXIT (c != (iconv_t) -1);
char in[] = "\e$(O+D\e(B";
char *inbuf = in;
size_t inleft = strlen (in);
char out[3]; /* Space for one output character. */
char *outbuf;
size_t outleft;
outbuf = out;
outleft = sizeof (out);
TEST_COMPARE (iconv (c, &inbuf, &inleft, &outbuf, &outleft), (size_t) -1);
TEST_COMPARE (errno, E2BIG);
TEST_COMPARE (inleft, 3);
TEST_COMPARE (inbuf - in, strlen (in) - 3);
TEST_COMPARE (outleft, sizeof (out) - 2);
TEST_COMPARE (outbuf - out, 2);
TEST_COMPARE (out[0] & 0xff, 0xc3);
TEST_COMPARE (out[1] & 0xff, 0xa6);
/* Return to the initial shift state, producing the pending
character. */
outbuf = out;
outleft = sizeof (out);
TEST_COMPARE (iconv (c, &inbuf, &inleft, &outbuf, &outleft), 0);
TEST_COMPARE (inleft, 0);
TEST_COMPARE (inbuf - in, strlen (in));
TEST_COMPARE (outleft, sizeof (out) - 2);
TEST_COMPARE (outbuf - out, 2);
TEST_COMPARE (out[0] & 0xff, 0xcc);
TEST_COMPARE (out[1] & 0xff, 0x80);
/* Nothing should be flushed the second time. */
outbuf = out;
outleft = sizeof (out);
TEST_COMPARE (iconv (c, NULL, 0, &outbuf, &outleft), 0);
TEST_COMPARE (outleft, sizeof (out));
TEST_COMPARE (outbuf - out, 0);
TEST_COMPARE (out[0] & 0xff, 0xcc);
TEST_COMPARE (out[1] & 0xff, 0x80);
TEST_COMPARE (iconv_close (c), 0);
}
/* Use an explicit flush to return to the initial state. */
static void
with_flush (void)
{
iconv_t c = iconv_open ("UTF-8", "ISO-2022-JP-3");
TEST_VERIFY_EXIT (c != (iconv_t) -1);
char in[] = "\e$(O+D";
char *inbuf = in;
size_t inleft = strlen (in);
char out[3]; /* Space for one output character. */
char *outbuf;
size_t outleft;
outbuf = out;
outleft = sizeof (out);
TEST_COMPARE (iconv (c, &inbuf, &inleft, &outbuf, &outleft), (size_t) -1);
TEST_COMPARE (errno, E2BIG);
TEST_COMPARE (inleft, 0);
TEST_COMPARE (inbuf - in, strlen (in));
TEST_COMPARE (outleft, sizeof (out) - 2);
TEST_COMPARE (outbuf - out, 2);
TEST_COMPARE (out[0] & 0xff, 0xc3);
TEST_COMPARE (out[1] & 0xff, 0xa6);
/* Flush the pending character. */
outbuf = out;
outleft = sizeof (out);
TEST_COMPARE (iconv (c, NULL, 0, &outbuf, &outleft), 0);
TEST_COMPARE (outleft, sizeof (out) - 2);
TEST_COMPARE (outbuf - out, 2);
TEST_COMPARE (out[0] & 0xff, 0xcc);
TEST_COMPARE (out[1] & 0xff, 0x80);
/* Nothing should be flushed the second time. */
outbuf = out;
outleft = sizeof (out);
TEST_COMPARE (iconv (c, NULL, 0, &outbuf, &outleft), 0);
TEST_COMPARE (outleft, sizeof (out));
TEST_COMPARE (outbuf - out, 0);
TEST_COMPARE (out[0] & 0xff, 0xcc);
TEST_COMPARE (out[1] & 0xff, 0x80);
TEST_COMPARE (iconv_close (c), 0);
}
static int
do_test (void)
{
with_escape_sequence ();
with_flush ();
return 0;
}
#include <support/test-driver.c>

View File

@ -67,23 +67,34 @@ enum
CURRENT_SEL_MASK = 7 << 3 CURRENT_SEL_MASK = 7 << 3
}; };
/* During UCS-4 to ISO-2022-JP-3 conversion, the COUNT element of the state /* During UCS-4 to ISO-2022-JP-3 conversion, the COUNT element of the
also contains the last two bytes to be output, shifted by 6 bits, and a state also contains the last two bytes to be output, shifted by 6
one-bit indicator whether they must be preceded by the shift sequence, bits, and a one-bit indicator whether they must be preceded by the
in bit 22. */ shift sequence, in bit 22. During ISO-2022-JP-3 to UCS-4
conversion, COUNT may also contain a non-zero pending wide
character, shifted by six bits. This happens for certain inputs in
JISX0213_1_2004_set and JISX0213_2_set if the second wide character
in a combining sequence cannot be written because the buffer is
full. */
/* Since this is a stateful encoding we have to provide code which resets /* Since this is a stateful encoding we have to provide code which resets
the output state to the initial state. This has to be done during the the output state to the initial state. This has to be done during the
flushing. */ flushing. */
#define EMIT_SHIFT_TO_INIT \ #define EMIT_SHIFT_TO_INIT \
if ((data->__statep->__count & ~7) != ASCII_set) \ if (data->__statep->__count != ASCII_set) \
{ \ { \
if (FROM_DIRECTION) \ if (FROM_DIRECTION) \
{ \ { \
/* It's easy, we don't have to emit anything, we just reset the \ if (__glibc_likely (outbuf + 4 <= outend)) \
state for the input. */ \ { \
data->__statep->__count &= 7; \ /* Write out the last character. */ \
data->__statep->__count |= ASCII_set; \ *((uint32_t *) outbuf) = data->__statep->__count >> 6; \
outbuf += sizeof (uint32_t); \
data->__statep->__count = ASCII_set; \
} \
else \
/* We don't have enough room in the output buffer. */ \
status = __GCONV_FULL_OUTPUT; \
} \ } \
else \ else \
{ \ { \
@ -151,7 +162,21 @@ enum
#define LOOPFCT FROM_LOOP #define LOOPFCT FROM_LOOP
#define BODY \ #define BODY \
{ \ { \
uint32_t ch = *inptr; \ uint32_t ch; \
\
/* Output any pending character. */ \
ch = set >> 6; \
if (__glibc_unlikely (ch != 0)) \
{ \
put32 (outptr, ch); \
outptr += 4; \
/* Remove the pending character, but preserve state bits. */ \
set &= (1 << 6) - 1; \
continue; \
} \
\
/* Otherwise read the next input byte. */ \
ch = *inptr; \
\ \
/* Recognize escape sequences. */ \ /* Recognize escape sequences. */ \
if (__glibc_unlikely (ch == ESC)) \ if (__glibc_unlikely (ch == ESC)) \
@ -297,21 +322,25 @@ enum
uint32_t u1 = __jisx0213_to_ucs_combining[ch - 1][0]; \ uint32_t u1 = __jisx0213_to_ucs_combining[ch - 1][0]; \
uint32_t u2 = __jisx0213_to_ucs_combining[ch - 1][1]; \ uint32_t u2 = __jisx0213_to_ucs_combining[ch - 1][1]; \
\ \
/* See whether we have room for two characters. */ \
if (outptr + 8 <= outend) \
{ \
inptr += 2; \ inptr += 2; \
\
put32 (outptr, u1); \ put32 (outptr, u1); \
outptr += 4; \ outptr += 4; \
\
/* See whether we have room for two characters. */ \
if (outptr + 4 <= outend) \
{ \
put32 (outptr, u2); \ put32 (outptr, u2); \
outptr += 4; \ outptr += 4; \
continue; \ continue; \
} \ } \
else \ \
{ \ /* Otherwise store only the first character now, and \
put the second one into the queue. */ \
set |= u2 << 6; \
/* Tell the caller why we terminate the loop. */ \
result = __GCONV_FULL_OUTPUT; \ result = __GCONV_FULL_OUTPUT; \
break; \ break; \
} \
} \ } \
\ \
inptr += 2; \ inptr += 2; \