mirror of
https://sourceware.org/git/glibc.git
synced 2025-01-12 20:20:18 +00:00
598f790fb1
This patch corrects the Big5-HKSCS converter to preserve the lowest 3 bits of the mbstate_t __count data member when the converter encounters an incomplete multibyte character. This fixes BZ #25744. Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
226 lines
6.6 KiB
C
226 lines
6.6 KiB
C
/* Verify the BIG5HKSCS outputs that generate 2 wchar_t's (Bug 25734).
|
|
Copyright (C) 2020-2022 Free Software Foundation, Inc.
|
|
This file is part of the GNU C Library.
|
|
|
|
The GNU C Library is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU Lesser General Public
|
|
License as published by the Free Software Foundation; either
|
|
version 2.1 of the License, or (at your option) any later version.
|
|
|
|
The GNU C Library is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
Lesser General Public License for more details.
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
License along with the GNU C Library; if not, see
|
|
<https://www.gnu.org/licenses/>. */
|
|
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
#include <locale.h>
|
|
#include <wchar.h>
|
|
#include <support/check.h>
|
|
#include <support/support.h>
|
|
|
|
/* A few BIG5-HKSCS characters map in two unicode code points.
|
|
They are:
|
|
/x88/x62 => <U00CA><U0304>
|
|
/x88/x64 => <U00CA><U030C>
|
|
/x88/xa3 => <U00EA><U0304>
|
|
/x88/xa5 => <U00EA><U030C>
|
|
Each of these is special cased in iconvdata/big5hkscs.c.
|
|
This test ensures that we correctly reset the shift state after
|
|
outputting any of these characters. We do this by converting
|
|
each them followed by converting an ASCII character. If we fail
|
|
to reset the shift state (bug 25734) then we'll see the last
|
|
character in the queue output again. */
|
|
|
|
/* Each test has name, input bytes, and expected wide character
|
|
output. */
|
|
struct testdata {
|
|
const char *name;
|
|
const char input[3];
|
|
wchar_t expected[3];
|
|
};
|
|
|
|
/* In BIG5-HKSCS (2008) there are 4 characters that generate multiple
|
|
wide characters. */
|
|
struct testdata tests[4] = {
|
|
/* <H-8862>X => <U+00CA><U+0304>X */
|
|
{ "<H-8862>", "\x88\x62\x58", { 0x00CA, 0x0304, 0x0058 } },
|
|
/* <H-8864>X => <U+00CA><U+030C>X */
|
|
{ "<H-8864>", "\x88\x64\x58", { 0x00CA, 0x030C, 0x0058 } },
|
|
/* <H-88A3>X => <U+00EA><U+0304>X */
|
|
{ "<H-88A3>", "\x88\xa3\x58", { 0x00EA, 0x0304, 0x0058 } },
|
|
/* <H-88A5>X => <U+00EA><U+030C>X */
|
|
{ "<H-88A5>", "\x88\xa5\x58", { 0x00EA, 0x030C, 0x0058 } }
|
|
};
|
|
|
|
/* Each test is of the form:
|
|
- Translate first code sequence (two bytes)
|
|
- Translate second (zero bytes)
|
|
- Translate the third (one byte). */
|
|
static int
|
|
check_conversion (struct testdata test)
|
|
{
|
|
int err = 0;
|
|
wchar_t wc;
|
|
mbstate_t st;
|
|
size_t ret;
|
|
const char *mbs = test.input;
|
|
int consumed = 0;
|
|
/* Input is always 3 bytes long. */
|
|
int inlen = 3;
|
|
|
|
memset (&st, 0, sizeof (st));
|
|
/* First conversion: Consumes first 2 bytes. */
|
|
ret = mbrtowc (&wc, mbs, inlen - consumed, &st);
|
|
if (ret != 2)
|
|
{
|
|
printf ("error: First conversion consumed only %zd bytes.\n", ret);
|
|
err++;
|
|
}
|
|
/* Advance the two consumed bytes. */
|
|
mbs += ret;
|
|
consumed += ret;
|
|
if (wc != test.expected[0])
|
|
{
|
|
printf ("error: Result of first conversion was wrong.\n");
|
|
err++;
|
|
}
|
|
/* Second conversion: Consumes 0 bytes. */
|
|
ret = mbrtowc (&wc, mbs, inlen - consumed, &st);
|
|
if (ret != 0)
|
|
{
|
|
printf ("error: Second conversion consumed only %zd bytes.\n", ret);
|
|
err++;
|
|
}
|
|
/* Advance the zero consumed bytes. */
|
|
mbs += ret;
|
|
consumed += ret;
|
|
if (wc != test.expected[1])
|
|
{
|
|
printf ("error: Result of second conversion was wrong.\n");
|
|
err++;
|
|
}
|
|
/* After the second conversion the state of the converter should be
|
|
in the initial state. It is in the initial state because the two
|
|
input BIG5-HKSCS bytes have been consumed and the 2 wchar_t's have
|
|
been output. */
|
|
if (mbsinit (&st) == 0)
|
|
{
|
|
printf ("error: Converter not in initial state.\n");
|
|
err++;
|
|
}
|
|
/* Third conversion: Consumes 1 byte (it's an ASCII character). */
|
|
ret = mbrtowc (&wc, mbs, inlen - consumed, &st);
|
|
if (ret != 1)
|
|
{
|
|
printf ("error: Third conversion consumed only %zd bytes.\n", ret);
|
|
err++;
|
|
}
|
|
/* Advance the one byte. */
|
|
mbs += ret;
|
|
consumed += ret;
|
|
if (wc != test.expected[2])
|
|
{
|
|
printf ("error: Result of third conversion was wrong.\n");
|
|
err++;
|
|
}
|
|
|
|
/* Now perform the same test as above consuming one byte at a time. */
|
|
mbs = test.input;
|
|
memset (&st, 0, sizeof (st));
|
|
|
|
/* Consume the first byte; expect an incomplete multibyte character. */
|
|
ret = mbrtowc (&wc, mbs, 1, &st);
|
|
if (ret != -2)
|
|
{
|
|
printf ("error: First byte conversion returned %zd.\n", ret);
|
|
err++;
|
|
}
|
|
/* Advance past the first consumed byte. */
|
|
mbs += 1;
|
|
/* Consume the second byte; expect the first wchar_t. */
|
|
ret = mbrtowc (&wc, mbs, 1, &st);
|
|
if (ret != 1)
|
|
{
|
|
printf ("error: Second byte conversion returned %zd.\n", ret);
|
|
err++;
|
|
}
|
|
/* Advance past the second consumed byte. */
|
|
mbs += 1;
|
|
if (wc != test.expected[0])
|
|
{
|
|
printf ("error: Result of first wchar_t conversion was wrong.\n");
|
|
err++;
|
|
}
|
|
/* Consume no bytes; expect the second wchar_t. */
|
|
ret = mbrtowc (&wc, mbs, 1, &st);
|
|
if (ret != 0)
|
|
{
|
|
printf ("error: First attempt of third byte conversion returned %zd.\n", ret);
|
|
err++;
|
|
}
|
|
/* Do not advance past the third byte. */
|
|
mbs += 0;
|
|
if (wc != test.expected[1])
|
|
{
|
|
printf ("error: Result of second wchar_t conversion was wrong.\n");
|
|
err++;
|
|
}
|
|
/* After the second wchar_t conversion, the converter should be in
|
|
the initial state since the two input BIG5-HKSCS bytes have been
|
|
consumed and the two wchar_t's have been output. */
|
|
if (mbsinit (&st) == 0)
|
|
{
|
|
printf ("error: Converter not in initial state.\n");
|
|
err++;
|
|
}
|
|
/* Consume the third byte; expect the third wchar_t. */
|
|
ret = mbrtowc (&wc, mbs, 1, &st);
|
|
if (ret != 1)
|
|
{
|
|
printf ("error: Third byte conversion returned %zd.\n", ret);
|
|
err++;
|
|
}
|
|
/* Advance past the third consumed byte. */
|
|
mbs += 1;
|
|
if (wc != test.expected[2])
|
|
{
|
|
printf ("error: Result of third wchar_t conversion was wrong.\n");
|
|
err++;
|
|
}
|
|
|
|
/* Return 0 if we saw no errors. */
|
|
return err;
|
|
}
|
|
|
|
static int
|
|
do_test (void)
|
|
{
|
|
int err = 0;
|
|
int ret;
|
|
/* Testing BIG5-HKSCS. */
|
|
xsetlocale (LC_ALL, "zh_HK.BIG5-HKSCS");
|
|
|
|
/* Run all the special conversions. */
|
|
for (int i = 0; i < (sizeof (tests) / sizeof (struct testdata)); i++)
|
|
{
|
|
printf ("Running test for %s\n", tests[i].name);
|
|
ret = check_conversion (tests[i]);
|
|
if (ret > 0)
|
|
printf ("Test %s failed.\n", tests[i].name);
|
|
err += ret;
|
|
}
|
|
|
|
/* Fail if any conversion had an error. */
|
|
if (err > 0)
|
|
FAIL_EXIT1 ("One or more conversions failed.");
|
|
|
|
return 0;
|
|
}
|
|
|
|
#include <support/test-driver.c>
|