1999-12-27  Ulrich Drepper  <drepper@cygnus.com>

	* iconvdata/gb2312.c: Update mapping of GB 0x212c from latest
	Unicode tables.
This commit is contained in:
Ulrich Drepper 1999-12-27 20:26:55 +00:00
parent 01496d70ed
commit 8892c471bf
4 changed files with 65 additions and 11 deletions

View File

@ -1,3 +1,8 @@
1999-12-27 Ulrich Drepper <drepper@cygnus.com>
* iconvdata/gb2312.c: Update mapping of GB 0x212c from latest
Unicode tables.
1999-12-26 Ulrich Drepper <drepper@cygnus.com>
* sysdeps/unix/sysv/linux/fxstat64.c: Rename have_no_stat64 to

27
FAQ
View File

@ -171,6 +171,8 @@ please let me know.
4.6. I've build make 3.77 against glibc 2.1 and now make gets
segmentation faults.
4.7. Why do so many programs using math functions fail on my AlphaStation?
4.8. The conversion table for character set XX does not match with
what I expect.
~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
@ -1715,6 +1717,31 @@ implemented in some old PALcodes of AlphaStations. This may cause
catch these signals. Updating the firmware to a 1999 release has
fixed the problem on an AlphaStation 200 4/166.
4.8. The conversion table for character set XX does not match with
what I expect.
{UD} I don't doubt for a minute that some of the conversion tables contain
errors. We tried the best we can and relied on automatic generation of the
data to prevent human-introduced errors but this still is no guarantee. If
you think you found a problem please send a bug report describing it and
give an authoritive reference. The latter is important since otherwise
the current behaviour is as good as the proposed one.
Before doing this look through the list of known problem first:
- the GBK (simplified Chinese) encoding is based on Unicode tables. This
is good. These tables, however, differ slightly from the tables used
by the M$ people. The differences are these [+ Unicode, - M$]:
+0xA1AA 0x2015
+0xA844 0x2014
-0xA1AA 0x2014
-0xA844 0x2015
In addition the Unicode tables contain mappings for the GBK characters
0xA8BC, 0xA8BF, 0xA989 to 0xA995, and 0xFE50 to 0xFEA0.
~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~

25
FAQ.in
View File

@ -1481,6 +1481,31 @@ implemented in some old PALcodes of AlphaStations. This may cause
catch these signals. Updating the firmware to a 1999 release has
fixed the problem on an AlphaStation 200 4/166.
?? The conversion table for character set XX does not match with
what I expect.
{UD} I don't doubt for a minute that some of the conversion tables contain
errors. We tried the best we can and relied on automatic generation of the
data to prevent human-introduced errors but this still is no guarantee. If
you think you found a problem please send a bug report describing it and
give an authoritive reference. The latter is important since otherwise
the current behaviour is as good as the proposed one.
Before doing this look through the list of known problem first:
- the GBK (simplified Chinese) encoding is based on Unicode tables. This
is good. These tables, however, differ slightly from the tables used
by the M$ people. The differences are these [+ Unicode, - M$]:
+0xA1AA 0x2015
+0xA844 0x2014
-0xA1AA 0x2014
-0xA844 0x2015
In addition the Unicode tables contain mappings for the GBK characters
0xA8BC, 0xA8BF, 0xA989 to 0xA995, and 0xFE50 to 0xFEA0.
Answers were given by:
{UD} Ulrich Drepper, <drepper@cygnus.com>

View File

@ -1,5 +1,5 @@
/* GB 2312 conversion tables.
Copyright (C) 1998 Free Software Foundation, Inc.
Copyright (C) 1998, 1999 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
@ -44,7 +44,7 @@ const uint16_t __gb2312_to_ucs[] =
{
[0x0000] = 0x3000, [0x0001] = 0x3001, [0x0002] = 0x3002, [0x0003] = 0x30fb,
[0x0004] = 0x02c9, [0x0005] = 0x02c7, [0x0006] = 0x00a8, [0x0007] = 0x3003,
[0x0008] = 0x3005, [0x0009] = 0x2015, [0x000a] = 0xff5e, [0x000b] = 0x2225,
[0x0008] = 0x3005, [0x0009] = 0x2015, [0x000a] = 0xff5e, [0x000b] = 0x2016,
[0x000c] = 0x2026, [0x000d] = 0x2018, [0x000e] = 0x2019, [0x000f] = 0x201c,
[0x0010] = 0x201d, [0x0011] = 0x3014, [0x0012] = 0x3015, [0x0013] = 0x3008,
[0x0014] = 0x3009, [0x0015] = 0x300a, [0x0016] = 0x300b, [0x0017] = 0x300c,
@ -2029,10 +2029,10 @@ const char __gb2312_from_ucs4_tab3[][2] =
*/
const char __gb2312_from_ucs4_tab4[][2] =
{
[0x0000] = "\x21\x2a", [0x0003] = "\x21\x2e", [0x0004] = "\x21\x2f",
[0x0007] = "\x21\x30", [0x0008] = "\x21\x31", [0x0011] = "\x21\x2d",
[0x001b] = "\x21\x6b", [0x001d] = "\x21\x64", [0x001e] = "\x21\x65",
[0x0026] = "\x21\x79",
[0x0000] = "\x21\x2a", [0x0001] = "\x21\x2c", [0x0003] = "\x21\x2e",
[0x0004] = "\x21\x2f", [0x0007] = "\x21\x30", [0x0008] = "\x21\x31",
[0x0011] = "\x21\x2d", [0x001b] = "\x21\x6b", [0x001d] = "\x21\x64",
[0x001e] = "\x21\x65", [0x0026] = "\x21\x79"
};
/* The table can be created using
@ -2055,9 +2055,6 @@ const char __gb2312_from_ucs4_tab4[][2] =
}
printf ("\n");
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
But we have a problem here since U+2225 maps to either 0x212C or
0x214E. We simply choose the first solution here.
*/
const char __gb2312_from_ucs4_tab5[][2] =
{
@ -2069,14 +2066,14 @@ const char __gb2312_from_ucs4_tab5[][2] =
[0x008e] = "\x21\x7c", [0x008f] = "\x21\x7a", [0x0090] = "\x21\x7d",
[0x0105] = "\x21\x4a", [0x010c] = "\x21\x47", [0x010e] = "\x21\x46",
[0x0117] = "\x21\x4c", [0x011a] = "\x21\x58", [0x011b] = "\x21\x5e",
[0x011d] = "\x21\x4f", [0x0122] = "\x21\x2c", [0x0124] = "\x21\x44",
[0x011d] = "\x21\x4f", [0x0122] = "\x21\x4e", [0x0124] = "\x21\x44",
[0x0125] = "\x21\x45", [0x0126] = "\x21\x49", [0x0127] = "\x21\x48",
[0x0128] = "\x21\x52", [0x012b] = "\x21\x53", [0x0131] = "\x21\x60",
[0x0132] = "\x21\x5f", [0x0133] = "\x21\x43", [0x0134] = "\x21\x4b",
[0x013a] = "\x21\x57", [0x0145] = "\x21\x56", [0x0149] = "\x21\x55",
[0x015d] = "\x21\x59", [0x015e] = "\x21\x54", [0x0161] = "\x21\x5c",
[0x0162] = "\x21\x5d", [0x016b] = "\x21\x5a", [0x016c] = "\x21\x5b",
[0x0196] = "\x21\x51", [0x01a2] = "\x21\x4d",
[0x0196] = "\x21\x51", [0x01a2] = "\x21\x4d"
};
/* The table can be created using