glibc/iconvdata/tst-table.sh
lijianglin e1d3312015 add GB18030-2022 charmap and test the entire GB18030 charmap [BZ #30243]
support GB18030-2022 after add and change some transcoding relationship
of GB18030-2022.Details are as follows:
add 25 transcoding relationship
  UE81E 0x82359037
  UE826 0x82359038
  UE82B 0x82359039
  UE82C 0x82359130
  UE832 0x82359131
  UE843 0x82359132
  UE854 0x82359133
  UE864 0x82359134
  UE78D 0x84318236
  UE78F 0x84318237
  UE78E 0x84318238
  UE790 0x84318239
  UE791 0x84318330
  UE792 0x84318331
  UE793 0x84318332
  UE794 0x84318333
  UE795 0x84318334
  UE796 0x84318335
  UE816 0xfe51
  UE817 0xfe52
  UE818 0xfe53
  UE831 0xfe6c
  UE83B 0xfe76
  UE855 0xfe91
change 6 transcoding relationship
  U20087 0x95329031
  U20089 0x95329033
  U200CC 0x95329730
  U215D7 0x9536b937
  U2298F 0x9630ba35
  U241FE 0x9635b630
Test the entire GB18030 charmap, not only the Unicode BMP part.

Co-authored-by: yangyanchao <yangyanchao6@huawei.com>
Co-authored-by: liqingqing <liqingqing3@huawei.com>
Co-authored-by: Bruno Haible <bruno@clisp.org>
Reviewed-by: Andreas Schwab <schwab@suse.de>
Reviewed-by: Mike FABIAN <mfabian@redhat.com>
2023-08-29 19:02:30 +02:00

116 lines
4.0 KiB
Bash
Executable File

#!/bin/sh
# Copyright (C) 2000-2023 Free Software Foundation, Inc.
# This file is part of the GNU C Library.
#
# The GNU C Library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
# The GNU C Library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
# You should have received a copy of the GNU Lesser General Public
# License along with the GNU C Library; if not, see
# <https://www.gnu.org/licenses/>.
# Checks that the iconv() implementation (in both directions) for a
# stateless encoding agrees with the charmap table.
common_objpfx=$1
objpfx=$2
test_program_prefix=$3
charset=$4
charmap=$5
# sort is used on the build system.
LC_ALL=C
export LC_ALL
set -e
# Get the charmap.
./tst-table-charmap.sh \
< ../localedata/charmaps/${charmap:-$charset} \
> ${objpfx}tst-${charset}.charmap.table
# When the charset is GB18030, truncate this table because for this encoding,
# the charmap contains ranges (<Unnnn>..<Ummmm> notation), which the
# tst-table-charmap.sh script does not grok.
if test ${charset} = GB18030; then
grep '0x....$' < ${objpfx}tst-${charset}.charmap.table \
> ${objpfx}tst-${charset}.truncated.table
mv ${objpfx}tst-${charset}.truncated.table ${objpfx}tst-${charset}.charmap.table
fi
# Precomputed expected differences between the charmap and iconv forward.
precomposed=${charset}.precomposed
# Precompute expected differences between the charmap and iconv backward.
if test ${charset} = EUC-TW; then
irreversible=${objpfx}tst-${charset}.irreversible
(grep '^0x8EA1' ${objpfx}tst-${charset}.charmap.table
cat ${charset}.irreversible
) > ${irreversible}
else
irreversible=${charset}.irreversible
fi
# iconv in one direction.
${test_program_prefix} \
${objpfx}tst-table-from ${charset} \
> ${objpfx}tst-${charset}.table
# iconv in the other direction.
${test_program_prefix} \
${objpfx}tst-table-to ${charset} | sort \
> ${objpfx}tst-${charset}.inverse.table
# Difference between the charmap and iconv backward.
diff ${objpfx}tst-${charset}.charmap.table ${objpfx}tst-${charset}.inverse.table | \
grep '^[<>]' | sed -e 's,^. ,,' > ${objpfx}tst-${charset}.irreversible.table
# Check 1: charmap and iconv forward should be identical, except for
# precomposed characters.
{ if test -f ${precomposed}; then
cat ${objpfx}tst-${charset}.table ${precomposed} | sort | uniq -u
else
cat ${objpfx}tst-${charset}.table
fi
} | { if test ${charset} = GB18030; then grep '0x....$'; else cat; fi; } \
> ${objpfx}tst-${charset}.tmp1.table
cmp -s ${objpfx}tst-${charset}.charmap.table ${objpfx}tst-${charset}.tmp1.table ||
exit 1
# Check 2: the difference between the charmap and iconv backward.
{ if test -f ${irreversible}; then
cat ${objpfx}tst-${charset}.charmap.table ${irreversible} | sort | uniq -u
else
cat ${objpfx}tst-${charset}.charmap.table
fi
} | { if test ${charset} = GB18030; then grep '0x....$'; else cat; fi; } \
> ${objpfx}tst-${charset}.tmp2c.table
cat ${objpfx}tst-${charset}.inverse.table \
| { if test ${charset} = GB18030; then grep '0x....$'; else cat; fi; } \
> ${objpfx}tst-${charset}.tmp2i.table
cmp -s ${objpfx}tst-${charset}.tmp2c.table ${objpfx}tst-${charset}.tmp2i.table ||
exit 1
# Check 3: the difference between iconv forward and iconv backward. This is
# necessary only for GB18030, because ${objpfx}tst-${charset}.charmap.table
# is truncated for this encoding (see above).
if test ${charset} = GB18030; then
{ if test -f ${irreversible}; then
cat ${objpfx}tst-${charset}.table ${irreversible} | sort | uniq -u
else
cat ${objpfx}tst-${charset}.table
fi
} > ${objpfx}tst-${charset}.tmp3.table
cmp -s ${objpfx}tst-${charset}.tmp3.table ${objpfx}tst-${charset}.inverse.table ||
exit 1
fi
exit 0