mirror of
https://sourceware.org/git/glibc.git
synced 2024-11-09 23:00:07 +00:00
Update.
2000-09-23 Bruno Haible <haible@clisp.cons.org> * iconvdata/gbk.c (USE_PRIVATE_AREA): Define to 0. (__gbk_to_ucs): Conditionalize private area mappings. (__gbk_from_ucs4_tab9): Likewise. (BODY for TO_LOOP): Likewise. * iconvdata/testdata/GBK: Don't use characters not yet in Unicode. * iconvdata/testdata/GBK..UTF-8: Likewise. * iconvdata/tst-tables.sh: Enable GBK test. 2000-09-23 Bruno Haible <haible@clisp.cons.org> * iconvdata/gbk.c (__gbk_to_ucs): Swap U+2014 and U+2015. (__gbk_from_ucs4_tab4): Swap entries for U+2014 and U+2015. (BODY for FROM_LOOP): Reject input > 0xFEA0, avoids out-of-bounds array access. * iconvdata/gbgbk.c (BODY for FROM_LOOP): Map 0xA844 to 0xA1AA. * iconvdata/testdata/GBK..UTF8: Swap U+2014 and U+2015. 2000-09-23 Bruno Haible <haible@clisp.cons.org> * iconvdata/johab.c (final_to_ucs): Fix typos. (jamo_from_ucs_table): Likewise. (BODY for FROM_LOOP): Map 0x5c to U+20A9. Reject ranges 0xD9E6..0xD9FE and 0xDEF2..0xDEFE. (BODY for TO_LOOP): Map U+20A9 to 0x5c. Don't produce values in the range 0xD9E6..0xD9FE. * iconvdata/tst-tables.sh: Enable JOHAB testing.
This commit is contained in:
parent
a2aa7df3d6
commit
0b95971d92
29
ChangeLog
29
ChangeLog
@ -1,3 +1,32 @@
|
||||
2000-09-23 Bruno Haible <haible@clisp.cons.org>
|
||||
|
||||
* iconvdata/gbk.c (USE_PRIVATE_AREA): Define to 0.
|
||||
(__gbk_to_ucs): Conditionalize private area mappings.
|
||||
(__gbk_from_ucs4_tab9): Likewise.
|
||||
(BODY for TO_LOOP): Likewise.
|
||||
* iconvdata/testdata/GBK: Don't use characters not yet in Unicode.
|
||||
* iconvdata/testdata/GBK..UTF-8: Likewise.
|
||||
* iconvdata/tst-tables.sh: Enable GBK test.
|
||||
|
||||
2000-09-23 Bruno Haible <haible@clisp.cons.org>
|
||||
|
||||
* iconvdata/gbk.c (__gbk_to_ucs): Swap U+2014 and U+2015.
|
||||
(__gbk_from_ucs4_tab4): Swap entries for U+2014 and U+2015.
|
||||
(BODY for FROM_LOOP): Reject input > 0xFEA0, avoids out-of-bounds
|
||||
array access.
|
||||
* iconvdata/gbgbk.c (BODY for FROM_LOOP): Map 0xA844 to 0xA1AA.
|
||||
* iconvdata/testdata/GBK..UTF8: Swap U+2014 and U+2015.
|
||||
|
||||
2000-09-23 Bruno Haible <haible@clisp.cons.org>
|
||||
|
||||
* iconvdata/johab.c (final_to_ucs): Fix typos.
|
||||
(jamo_from_ucs_table): Likewise.
|
||||
(BODY for FROM_LOOP): Map 0x5c to U+20A9. Reject ranges
|
||||
0xD9E6..0xD9FE and 0xDEF2..0xDEFE.
|
||||
(BODY for TO_LOOP): Map U+20A9 to 0x5c. Don't produce values in
|
||||
the range 0xD9E6..0xD9FE.
|
||||
* iconvdata/tst-tables.sh: Enable JOHAB testing.
|
||||
|
||||
2000-09-25 Ulrich Drepper <drepper@redhat.com>
|
||||
|
||||
* iconv/gconv_conf.c (__gconv_get_path): Fix problem with relative
|
||||
|
@ -26,6 +26,12 @@
|
||||
#include <wchar.h>
|
||||
#include <assert.h>
|
||||
|
||||
/* Unicode 3.0.1 does not contain all the characters in GBK. Define
|
||||
USE_PRIVATE_AREA to 1 in order to use mappings from/to the Unicode
|
||||
Private Use area. Until we see other systems using the same mappings,
|
||||
it is disabled. */
|
||||
#define USE_PRIVATE_AREA 0
|
||||
|
||||
/* The conversion table to UCS4 has almost no holes. It can be generated with:
|
||||
|
||||
perl tab.pl < gbk.txt
|
||||
@ -1739,7 +1745,13 @@ static const uint16_t __gbk_to_ucs[] =
|
||||
[0x1db0] = 0x00f2, [0x1db1] = 0x016b, [0x1db2] = 0x00fa, [0x1db3] = 0x01d4,
|
||||
[0x1db4] = 0x00f9, [0x1db5] = 0x01d6, [0x1db6] = 0x01d8, [0x1db7] = 0x01da,
|
||||
[0x1db8] = 0x01dc, [0x1db9] = 0x00fc, [0x1dba] = 0x00ea, [0x1dbb] = 0x0251,
|
||||
[0x1dbc] = 0xe7c7, [0x1dbd] = 0x0144, [0x1dbe] = 0x0148, [0x1dbf] = 0xe7c8,
|
||||
#if USE_PRIVATE_AREA
|
||||
[0x1dbc] = 0xe7c7,
|
||||
#endif
|
||||
[0x1dbd] = 0x0144, [0x1dbe] = 0x0148,
|
||||
#if USE_PRIVATE_AREA
|
||||
[0x1dbf] = 0xe7c8,
|
||||
#endif
|
||||
[0x1dc0] = 0x0261, [0x1dc5] = 0x3105, [0x1dc6] = 0x3106, [0x1dc7] = 0x3107,
|
||||
[0x1dc8] = 0x3108, [0x1dc9] = 0x3109, [0x1dca] = 0x310a, [0x1dcb] = 0x310b,
|
||||
[0x1dcc] = 0x310c, [0x1dcd] = 0x310d, [0x1dce] = 0x310e, [0x1dcf] = 0x310f,
|
||||
@ -1766,10 +1778,14 @@ static const uint16_t __gbk_to_ucs[] =
|
||||
[0x1e3b] = 0xfe5e, [0x1e3c] = 0xfe5f, [0x1e3d] = 0xfe60, [0x1e3e] = 0xfe61,
|
||||
[0x1e40] = 0xfe62, [0x1e41] = 0xfe63, [0x1e42] = 0xfe64, [0x1e43] = 0xfe65,
|
||||
[0x1e44] = 0xfe66, [0x1e45] = 0xfe68, [0x1e46] = 0xfe69, [0x1e47] = 0xfe6a,
|
||||
[0x1e48] = 0xfe6b, [0x1e49] = 0xe7e7, [0x1e4a] = 0xe7e8, [0x1e4b] = 0xe7e9,
|
||||
[0x1e48] = 0xfe6b,
|
||||
#if USE_PRIVATE_AREA
|
||||
[0x1e49] = 0xe7e7, [0x1e4a] = 0xe7e8, [0x1e4b] = 0xe7e9,
|
||||
[0x1e4c] = 0xe7ea, [0x1e4d] = 0xe7eb, [0x1e4e] = 0xe7ec, [0x1e4f] = 0xe7ed,
|
||||
[0x1e50] = 0xe7ee, [0x1e51] = 0xe7ef, [0x1e52] = 0xe7f0, [0x1e53] = 0xe7f1,
|
||||
[0x1e54] = 0xe7f2, [0x1e55] = 0xe7f3, [0x1e56] = 0x3007, [0x1e64] = 0x2500,
|
||||
[0x1e54] = 0xe7f2, [0x1e55] = 0xe7f3,
|
||||
#endif
|
||||
[0x1e56] = 0x3007, [0x1e64] = 0x2500,
|
||||
[0x1e65] = 0x2501, [0x1e66] = 0x2502, [0x1e67] = 0x2503, [0x1e68] = 0x2504,
|
||||
[0x1e69] = 0x2505, [0x1e6a] = 0x2506, [0x1e6b] = 0x2507, [0x1e6c] = 0x2508,
|
||||
[0x1e6d] = 0x2509, [0x1e6e] = 0x250a, [0x1e6f] = 0x250b, [0x1e70] = 0x250c,
|
||||
@ -5499,7 +5515,9 @@ static const uint16_t __gbk_to_ucs[] =
|
||||
[0x5dc2] = 0xfa0e, [0x5dc3] = 0xfa0f, [0x5dc4] = 0xfa11, [0x5dc5] = 0xfa13,
|
||||
[0x5dc6] = 0xfa14, [0x5dc7] = 0xfa18, [0x5dc8] = 0xfa1f, [0x5dc9] = 0xfa20,
|
||||
[0x5dca] = 0xfa21, [0x5dcb] = 0xfa23, [0x5dcc] = 0xfa24, [0x5dcd] = 0xfa27,
|
||||
[0x5dce] = 0xfa28, [0x5dcf] = 0xfa29, [0x5dd0] = 0xe815, [0x5dd1] = 0xe816,
|
||||
[0x5dce] = 0xfa28, [0x5dcf] = 0xfa29,
|
||||
#if USE_PRIVATE_AREA
|
||||
[0x5dd0] = 0xe815, [0x5dd1] = 0xe816,
|
||||
[0x5dd2] = 0xe817, [0x5dd3] = 0xe818, [0x5dd4] = 0xe819, [0x5dd5] = 0xe81a,
|
||||
[0x5dd6] = 0xe81b, [0x5dd7] = 0xe81c, [0x5dd8] = 0xe81d, [0x5dd9] = 0xe81e,
|
||||
[0x5dda] = 0xe81f, [0x5ddb] = 0xe820, [0x5ddc] = 0xe821, [0x5ddd] = 0xe822,
|
||||
@ -5520,6 +5538,9 @@ static const uint16_t __gbk_to_ucs[] =
|
||||
[0x5e17] = 0xe85b, [0x5e18] = 0xe85c, [0x5e19] = 0xe85d, [0x5e1a] = 0xe85e,
|
||||
[0x5e1b] = 0xe85f, [0x5e1c] = 0xe860, [0x5e1d] = 0xe861, [0x5e1e] = 0xe862,
|
||||
[0x5e1f] = 0xe863, [0x5e20] = 0xe864,
|
||||
#else
|
||||
[0x5e20] = 0x0000,
|
||||
#endif
|
||||
};
|
||||
|
||||
/* The table can be created using
|
||||
@ -12936,6 +12957,7 @@ static const char __gbk_from_ucs4_tab8[][2] =
|
||||
*/
|
||||
static const char __gbk_from_ucs4_tab9[][2] =
|
||||
{
|
||||
#if USE_PRIVATE_AREA
|
||||
[0x0000] = "\xa8\xbc", [0x0001] = "\xa8\xbf", [0x0020] = "\xa9\x89",
|
||||
[0x0021] = "\xa9\x8a", [0x0022] = "\xa9\x8b", [0x0023] = "\xa9\x8c",
|
||||
[0x0024] = "\xa9\x8d", [0x0025] = "\xa9\x8e", [0x0026] = "\xa9\x8f",
|
||||
@ -12968,6 +12990,7 @@ static const char __gbk_from_ucs4_tab9[][2] =
|
||||
[0x0096] = "\xfe\x99", [0x0097] = "\xfe\x9a", [0x0098] = "\xfe\x9b",
|
||||
[0x0099] = "\xfe\x9c", [0x009a] = "\xfe\x9d", [0x009b] = "\xfe\x9e",
|
||||
[0x009c] = "\xfe\x9f", [0x009d] = "\xfe\xa0",
|
||||
#endif
|
||||
};
|
||||
|
||||
/* The table can be created using
|
||||
@ -13418,7 +13441,7 @@ static const char __gbk_from_ucs4_tab12[][2] =
|
||||
cp = __gbk_from_ucs4_tab8[ch - 0x4e00]; \
|
||||
break; \
|
||||
case 0xe7c7 ... 0xe864: \
|
||||
cp = __gbk_from_ucs4_tab9[ch - 0xe7c7]; \
|
||||
cp = USE_PRIVATE_AREA ? __gbk_from_ucs4_tab9[ch - 0xe7c7] : "\0\0"; \
|
||||
break; \
|
||||
case 0xf92c: \
|
||||
cp = "\xfd\x9c"; \
|
||||
|
18
iconvdata/testdata/GBK
vendored
18
iconvdata/testdata/GBK
vendored
@ -438,7 +438,7 @@
|
||||
<20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20>
|
||||
<20> <20> <20> <20> <20> <20>
|
||||
芋 芍 見 角 言 谷 豆 豕 貝 赤 走 足 身 車 辛
|
||||
ò ū ú ǔ ù ǖ ǘ ǚ ǜ ü ê ɑ <20> ń ň ǹ
|
||||
¨° ¨± ¨˛ ¨ł ¨´ ¨µ ¨¶ ¨· ¨¸ ¨ą ¨ş ¨» A8BC ¨˝ ¨ľ A8BF
|
||||
阱 乳 事 些 亞 享 京 佯 依 侍 佳 使
|
||||
佬 供 例 來 侃 佰 併 侈 佩 佻 侖 佾 侏 侑 佺 兔
|
||||
兒 兕 兩 具 其 典 冽 函 刻 券
|
||||
@ -446,8 +446,8 @@
|
||||
周 咋 命 咎 固 垃 坷 坪 坡 坦 坼
|
||||
奈 奄 奔 妾 妻 委 妹 妮 姑 姆 姐 姍 始 姓 姊 妯
|
||||
妳 姒 姅 孟 孤 季 宗 定 官 宜 宙 宛 尚 屈 居
|
||||
﹢ ﹣ ﹤ ﹥ ﹦ ﹨ ﹩ ﹪ ﹫ 〾 ⿰ ⿱ ⿲ ⿳ ⿴ ⿵
|
||||
⿶ ⿷ ⿸ ⿹ ⿺ ⿻ 〇
|
||||
©€ ©<> ©‚ ©<> ©„ ©… ©† ©‡ ©<> A989 A98A A98B A98C A98D A98E A98F
|
||||
A990 A991 A992 A993 A994 A995 ©–
|
||||
岸 岩 岫 岱 岳 帘 帚 帖 帕 帛 帑 幸
|
||||
庚 店 府 底 庖 延 弦 弧 弩 往 征 彿 彼 忝 忠 忽
|
||||
念 忿 怏 怔 怯 怵 怖 怪 怕 怡 性 怩 怫 怛 或 戕
|
||||
@ -1402,9 +1402,9 @@
|
||||
<20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20> <20>
|
||||
<20>
|
||||
鑂 爕 夑 鑃 爤 鍁 𥘅 爮 牀 𤥴 梽 牕 牗 㹕 𣁄 栍
|
||||
⺁ <20> <20> <20> ⺄ 㑳 㑇 ⺈ ⺋ <20> 㖞 㘚 㘎 ⺌ ⺗ 㥮
|
||||
㤘 <20> 㧏 㧟 㩳 㧐 <20> <20> 㭎 㱮 㳠 ⺧ <20> <20> ⺪ 䁖
|
||||
䅟 ⺮ 䌷 ⺳ ⺶ ⺷ <20> 䎱 䎬 ⺻ 䏝 䓖 䙡 䙌 <20>
|
||||
䜣 䜩 䝼 䞍 ⻊ 䥇 䥺 䥽 䦂 䦃 䦅 䦆 䦟 䦛 䦷 䦶
|
||||
<EFBFBD> <20> 䲣 䲟 䲠 䲡 䱷 䲢 䴓 䴔 䴕 䴖 䴗 䴘 䴙 䶮
|
||||
<EFBFBD>
|
||||
FE50 FE51 FE52 FE53 FE54 FE55 FE56 FE57 FE58 FE59 FE5A FE5B FE5C FE5D FE5E FE5F
|
||||
FE60 FE61 FE62 FE63 FE64 FE65 FE66 FE67 FE68 FE69 FE6A FE6B FE6C FE6D FE6E FE6F
|
||||
FE70 FE71 FE72 FE73 FE74 FE75 FE76 FE77 FE78 FE79 FE7A FE7B FE7C FE7D FE7E
|
||||
FE80 FE81 FE82 FE83 FE84 FE85 FE86 FE87 FE88 FE89 FE8A FE8B FE8C FE8D FE8E FE8F
|
||||
FE90 FE91 FE92 FE93 FE94 FE95 FE96 FE97 FE98 FE99 FE9A FE9B FE9C FE9D FE9E FE9F
|
||||
FEA0
|
||||
|
18
iconvdata/testdata/GBK..UTF8
vendored
18
iconvdata/testdata/GBK..UTF8
vendored
@ -438,7 +438,7 @@
|
||||
█ ▉ ▊ ▋ ▌ ▍ ▎ ▏ ▓ ▔ ▕ ▼ ▽ ◢ ◣ ◤
|
||||
◥ ☉ ⊕ 〒 〝 〞
|
||||
ā á ǎ à ē é ě è ī í ǐ ì ō ó ǒ
|
||||
ò ū ú ǔ ù ǖ ǘ ǚ ǜ ü ê ɑ ń ň
|
||||
ò ū ú ǔ ù ǖ ǘ ǚ ǜ ü ê ɑ A8BC ń ň A8BF
|
||||
ɡ ㄅ ㄆ ㄇ ㄈ ㄉ ㄊ ㄋ ㄌ ㄍ ㄎ ㄏ
|
||||
ㄐ ㄑ ㄒ ㄓ ㄔ ㄕ ㄖ ㄗ ㄘ ㄙ ㄚ ㄛ ㄜ ㄝ ㄞ ㄟ
|
||||
ㄠ ㄡ ㄢ ㄣ ㄤ ㄥ ㄦ ㄧ ㄨ ㄩ
|
||||
@ -446,8 +446,8 @@
|
||||
㏄ ㏎ ㏑ ㏒ ㏕ ︰ ¬ ¦ ℡ ㈱ ‐
|
||||
ー ゛ ゜ ヽ ヾ 〆 ゝ ゞ ﹉ ﹊ ﹋ ﹌ ﹍ ﹎ ﹏ ﹐
|
||||
﹑ ﹒ ﹔ ﹕ ﹖ ﹗ ﹙ ﹚ ﹛ ﹜ ﹝ ﹞ ﹟ ﹠ ﹡
|
||||
﹢ ﹣ ﹤ ﹥ ﹦ ﹨ ﹩ ﹪ ﹫
|
||||
〇
|
||||
﹢ ﹣ ﹤ ﹥ ﹦ ﹨ ﹩ ﹪ ﹫ A989 A98A A98B A98C A98D A98E A98F
|
||||
A990 A991 A992 A993 A994 A995 〇
|
||||
─ ━ │ ┃ ┄ ┅ ┆ ┇ ┈ ┉ ┊ ┋
|
||||
┌ ┍ ┎ ┏ ┐ ┑ ┒ ┓ └ ┕ ┖ ┗ ┘ ┙ ┚ ┛
|
||||
├ ┝ ┞ ┟ ┠ ┡ ┢ ┣ ┤ ┥ ┦ ┧ ┨ ┩ ┪ ┫
|
||||
@ -1402,9 +1402,9 @@
|
||||
龕 龖 龗 龘 龜 龝 龞 龡 龢 龣 龤 龥 郎 凉 秊 裏
|
||||
隣
|
||||
兀 嗀 﨎 﨏 﨑 﨓 﨔 礼 﨟 蘒 﨡 﨣 﨤 﨧 﨨 﨩
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
FE50 FE51 FE52 FE53 FE54 FE55 FE56 FE57 FE58 FE59 FE5A FE5B FE5C FE5D FE5E FE5F
|
||||
FE60 FE61 FE62 FE63 FE64 FE65 FE66 FE67 FE68 FE69 FE6A FE6B FE6C FE6D FE6E FE6F
|
||||
FE70 FE71 FE72 FE73 FE74 FE75 FE76 FE77 FE78 FE79 FE7A FE7B FE7C FE7D FE7E
|
||||
FE80 FE81 FE82 FE83 FE84 FE85 FE86 FE87 FE88 FE89 FE8A FE8B FE8C FE8D FE8E FE8F
|
||||
FE90 FE91 FE92 FE93 FE94 FE95 FE96 FE97 FE98 FE99 FE9A FE9B FE9C FE9D FE9E FE9F
|
||||
FEA0
|
||||
|
@ -191,7 +191,7 @@ cat <<EOF |
|
||||
BIG5HKSCS
|
||||
EUC-JP
|
||||
EUC-CN GB2312
|
||||
#GBK Converter uses private area characters
|
||||
GBK
|
||||
EUC-TW
|
||||
GB18030
|
||||
#
|
||||
|
@ -1,3 +1,25 @@
|
||||
2000-09-24 Bruno Haible <haible@clisp.cons.org>
|
||||
|
||||
* gen-unicode-ctype.c: New file.
|
||||
* dump-ctype.c: New file.
|
||||
* Makefile (distribute): Add them.
|
||||
* locales/i18n: Update LC_CTYPE part to Unicode 3.0, using
|
||||
gen-unicode-ctype.c.
|
||||
(blank): Add U+1680.
|
||||
(cntrl): Add U+2028, U+2029.
|
||||
(space): Add U+1680, U+2028, U+2029.
|
||||
(digit): Add Myanmar, Ethiopic, Khmer, Mongolian, fullwidth digits.
|
||||
(alnum, alpha, print, graph, punct): Lots of additions.
|
||||
(lower, upper, tolower, toupper, combining, combining_level3): Update.
|
||||
(totitle): New map.
|
||||
* tst-ctype-de_DE.ISO-8859-1.in: Mark U00B5 as lower; the Unicode 3.0
|
||||
towupper functions maps it to U039C. Mark U00A0 as graph, print, punct.
|
||||
* tests-mbwc/dat_iswctype.c: Mark U00A0 as graph, print, punct.
|
||||
* tests-mbwc/dat_iswgraph.c: Mark U00A0 as graph.
|
||||
* tests-mbwc/dat_iswprint.c: Mark U00A0 as print.
|
||||
* tests-mbwc/dat_iswpunct.c: Mark U00A0 as punct.
|
||||
* tests-mbwc/dat_wcswidth.c: U00A0 is now print.
|
||||
|
||||
2000-09-23 Bruno Haible <haible@clisp.cons.org>
|
||||
|
||||
* charmaps/GBK: Add commented mappings for GBK characters not yet in
|
||||
|
@ -72,7 +72,8 @@ distribute := CHECKSUMS README SUPPORTED ChangeLog \
|
||||
$(wildcard tests-mbwc/*.[ch]) \
|
||||
$(addprefix tst-fmon-locales/tstfmon_,$(fmon-tests)) \
|
||||
gen-locale.sh show-ucs-data.c tst-langinfo.sh \
|
||||
tst-wctype.sh tst-wctype.input
|
||||
tst-wctype.sh tst-wctype.input gen-unicode-ctype.c \
|
||||
dump-ctype.c
|
||||
|
||||
# Get $(inst_i18ndir) defined.
|
||||
include ../Makeconfig
|
||||
|
163
localedata/dump-ctype.c
Normal file
163
localedata/dump-ctype.c
Normal file
@ -0,0 +1,163 @@
|
||||
/* Dump the character classes and character maps of a locale to a bunch
|
||||
of individual files which can be processed with diff, sed etc.
|
||||
Copyright (C) 2000 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Bruno Haible <haible@clisp.cons.org>, 2000.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Library General Public License as
|
||||
published by the Free Software Foundation; either version 2 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Library General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Library General Public
|
||||
License along with the GNU UTF-8 Library; see the file COPYING.LIB. If not,
|
||||
write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
Boston, MA 02111-1307, USA. */
|
||||
|
||||
/* Usage example:
|
||||
$ dump-ctype de_DE.UTF-8
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <wctype.h>
|
||||
#include <locale.h>
|
||||
#include <sys/stat.h>
|
||||
#include <unistd.h>
|
||||
#include <errno.h>
|
||||
|
||||
static const char *program_name = "dump-ctype";
|
||||
static const char *locale;
|
||||
|
||||
static const char *class_names[] =
|
||||
{
|
||||
"alnum", "alpha", "blank", "cntrl", "digit", "graph", "lower",
|
||||
"print", "punct", "space", "upper", "xdigit"
|
||||
};
|
||||
|
||||
static const char *map_names[] =
|
||||
{
|
||||
"tolower", "toupper", "totitle"
|
||||
};
|
||||
|
||||
static void dump_class (const char *class_name)
|
||||
{
|
||||
wctype_t class;
|
||||
FILE *f;
|
||||
unsigned int ch;
|
||||
|
||||
class = wctype (class_name);
|
||||
if (class == (wctype_t) 0)
|
||||
{
|
||||
fprintf (stderr, "%s %s: noexistent class %s\n", program_name,
|
||||
locale, class_name);
|
||||
return;
|
||||
}
|
||||
|
||||
f = fopen (class_name, "w");
|
||||
if (f == NULL)
|
||||
{
|
||||
fprintf (stderr, "%s %s: cannot open file %s/%s\n", program_name,
|
||||
locale, locale, class_name);
|
||||
exit (1);
|
||||
}
|
||||
|
||||
for (ch = 0; ch < 0x10000; ch++)
|
||||
if (iswctype (ch, class))
|
||||
fprintf (f, "0x%04X\n", ch);
|
||||
|
||||
if (ferror (f) || fclose (f))
|
||||
{
|
||||
fprintf (stderr, "%s %s: I/O error on file %s/%s\n", program_name,
|
||||
locale, locale, class_name);
|
||||
exit (1);
|
||||
}
|
||||
}
|
||||
|
||||
static void dump_map (const char *map_name)
|
||||
{
|
||||
wctrans_t map;
|
||||
FILE *f;
|
||||
unsigned int ch;
|
||||
|
||||
map = wctrans (map_name);
|
||||
if (map == (wctrans_t) 0)
|
||||
{
|
||||
fprintf (stderr, "%s %s: noexistent map %s\n", program_name,
|
||||
locale, map_name);
|
||||
return;
|
||||
}
|
||||
|
||||
f = fopen (map_name, "w");
|
||||
if (f == NULL)
|
||||
{
|
||||
fprintf (stderr, "%s %s: cannot open file %s/%s\n", program_name,
|
||||
locale, locale, map_name);
|
||||
exit (1);
|
||||
}
|
||||
|
||||
for (ch = 0; ch < 0x10000; ch++)
|
||||
if (towctrans (ch, map) != ch)
|
||||
fprintf (f, "0x%04X\t0x%04X\n", ch, towctrans (ch, map));
|
||||
|
||||
if (ferror (f) || fclose (f))
|
||||
{
|
||||
fprintf (stderr, "%s %s: I/O error on file %s/%s\n", program_name,
|
||||
locale, locale, map_name);
|
||||
exit (1);
|
||||
}
|
||||
}
|
||||
|
||||
int main (int argc, char *argv[])
|
||||
{
|
||||
size_t i;
|
||||
|
||||
if (argc != 2)
|
||||
{
|
||||
fprintf (stderr, "Usage: dump-ctype locale\n");
|
||||
exit (1);
|
||||
}
|
||||
locale = argv[1];
|
||||
|
||||
if (setlocale (LC_ALL, locale) == NULL)
|
||||
{
|
||||
fprintf (stderr, "%s: setlocale cannot switch to locale %s\n",
|
||||
program_name, locale);
|
||||
exit (1);
|
||||
}
|
||||
|
||||
if (mkdir (locale, 0777) < 0)
|
||||
{
|
||||
char buf[100];
|
||||
int save_errno = errno;
|
||||
|
||||
sprintf (buf, "%s: cannot create directory %s", program_name, locale);
|
||||
errno = save_errno;
|
||||
perror (buf);
|
||||
exit (1);
|
||||
}
|
||||
|
||||
if (chdir (locale) < 0)
|
||||
{
|
||||
char buf[100];
|
||||
int save_errno = errno;
|
||||
|
||||
sprintf (buf, "%s: cannot chdir to %s", program_name, locale);
|
||||
errno = save_errno;
|
||||
perror (buf);
|
||||
exit (1);
|
||||
}
|
||||
|
||||
for (i = 0; i < sizeof (class_names) / sizeof (class_names[0]); i++)
|
||||
dump_class (class_names[i]);
|
||||
|
||||
for (i = 0; i < sizeof (map_names) / sizeof (map_names[0]); i++)
|
||||
dump_map (map_names[i]);
|
||||
|
||||
return 0;
|
||||
}
|
792
localedata/gen-unicode-ctype.c
Normal file
792
localedata/gen-unicode-ctype.c
Normal file
@ -0,0 +1,792 @@
|
||||
/* Generate a Unicode conforming LC_CTYPE category from a UnicodeData file.
|
||||
Copyright (C) 2000 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Bruno Haible <haible@clisp.cons.org>, 2000.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Library General Public License as
|
||||
published by the Free Software Foundation; either version 2 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Library General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Library General Public
|
||||
License along with the GNU UTF-8 Library; see the file COPYING.LIB. If not,
|
||||
write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
Boston, MA 02111-1307, USA. */
|
||||
|
||||
/* Usage example:
|
||||
$ gen-unicode /usr/local/share/Unidata/UnicodeData.txt \
|
||||
/usr/local/share/Unidata/PropList.txt \
|
||||
3.0
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdbool.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
|
||||
/* This structure represents one line in the UnicodeData.txt file. */
|
||||
struct unicode_attribute
|
||||
{
|
||||
const char *name; /* Character name */
|
||||
const char *category; /* General category */
|
||||
const char *combining; /* Canonical combining classes */
|
||||
const char *bidi; /* Bidirectional category */
|
||||
const char *decomposition; /* Character decomposition mapping */
|
||||
const char *decdigit; /* Decimal digit value */
|
||||
const char *digit; /* Digit value */
|
||||
const char *numeric; /* Numeric value */
|
||||
int mirrored; /* mirrored */
|
||||
const char *oldname; /* Old Unicode 1.0 name */
|
||||
const char *comment; /* Comment */
|
||||
unsigned int upper; /* Uppercase mapping */
|
||||
unsigned int lower; /* Lowercase mapping */
|
||||
unsigned int title; /* Titlecase mapping */
|
||||
};
|
||||
|
||||
/* Missing fields are represented with "" for strings, and NONE for
|
||||
characters. */
|
||||
#define NONE (~(unsigned int)0)
|
||||
|
||||
/* The entire contents of the UnicodeData.txt file. */
|
||||
struct unicode_attribute unicode_attributes [0x10000];
|
||||
|
||||
/* Stores in unicode_attributes[i] the values from the given fields. */
|
||||
static void
|
||||
fill_attribute (unsigned int i,
|
||||
const char *field1, const char *field2,
|
||||
const char *field3, const char *field4,
|
||||
const char *field5, const char *field6,
|
||||
const char *field7, const char *field8,
|
||||
const char *field9, const char *field10,
|
||||
const char *field11, const char *field12,
|
||||
const char *field13, const char *field14)
|
||||
{
|
||||
struct unicode_attribute * uni;
|
||||
|
||||
if (i >= 0x10000)
|
||||
{
|
||||
fprintf (stderr, "index too large\n");
|
||||
exit (1);
|
||||
}
|
||||
uni = &unicode_attributes[i];
|
||||
/* Copy the strings. */
|
||||
uni->name = strdup (field1);
|
||||
uni->category = (field2[0] == '\0' ? "" : strdup (field2));
|
||||
uni->combining = (field3[0] == '\0' ? "" : strdup (field3));
|
||||
uni->bidi = (field4[0] == '\0' ? "" : strdup (field4));
|
||||
uni->decomposition = (field5[0] == '\0' ? "" : strdup (field5));
|
||||
uni->decdigit = (field6[0] == '\0' ? "" : strdup (field6));
|
||||
uni->digit = (field7[0] == '\0' ? "" : strdup (field7));
|
||||
uni->numeric = (field8[0] == '\0' ? "" : strdup (field8));
|
||||
uni->mirrored = (field9[0] == 'Y');
|
||||
uni->oldname = (field10[0] == '\0' ? "" : strdup (field10));
|
||||
uni->comment = (field11[0] == '\0' ? "" : strdup (field11));
|
||||
uni->upper = (field12[0] =='\0' ? NONE : strtoul (field12, NULL, 16));
|
||||
uni->lower = (field13[0] =='\0' ? NONE : strtoul (field13, NULL, 16));
|
||||
uni->title = (field14[0] =='\0' ? NONE : strtoul (field14, NULL, 16));
|
||||
}
|
||||
|
||||
/* Maximum length of a field in the UnicodeData.txt file. */
|
||||
#define FIELDLEN 120
|
||||
|
||||
/* Reads the next field from STREAM. The buffer BUFFER has size FIELDLEN.
|
||||
Reads up to (but excluding) DELIM.
|
||||
Returns 1 when a field was successfully read, otherwise 0. */
|
||||
static int
|
||||
getfield (FILE *stream, char *buffer, int delim)
|
||||
{
|
||||
int count = 0;
|
||||
int c;
|
||||
|
||||
for (; (c = getc (stream)), (c != EOF && c != delim); )
|
||||
{
|
||||
/* The original unicode.org UnicodeData.txt file happens to have
|
||||
CR/LF line terminators. Silently convert to LF. */
|
||||
if (c == '\r')
|
||||
continue;
|
||||
|
||||
/* Put c into the buffer. */
|
||||
if (++count >= FIELDLEN - 1)
|
||||
{
|
||||
fprintf (stderr, "field too long\n");
|
||||
exit (1);
|
||||
}
|
||||
*buffer++ = c;
|
||||
}
|
||||
|
||||
if (c == EOF)
|
||||
return 0;
|
||||
|
||||
*buffer = '\0';
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Stores in unicode_attributes[] the entire contents of the UnicodeData.txt
|
||||
file. */
|
||||
static void
|
||||
fill_attributes (const char *unicodedata_filename)
|
||||
{
|
||||
unsigned int i, j;
|
||||
FILE *stream;
|
||||
char field0[FIELDLEN];
|
||||
char field1[FIELDLEN];
|
||||
char field2[FIELDLEN];
|
||||
char field3[FIELDLEN];
|
||||
char field4[FIELDLEN];
|
||||
char field5[FIELDLEN];
|
||||
char field6[FIELDLEN];
|
||||
char field7[FIELDLEN];
|
||||
char field8[FIELDLEN];
|
||||
char field9[FIELDLEN];
|
||||
char field10[FIELDLEN];
|
||||
char field11[FIELDLEN];
|
||||
char field12[FIELDLEN];
|
||||
char field13[FIELDLEN];
|
||||
char field14[FIELDLEN];
|
||||
int lineno = 0;
|
||||
|
||||
for (i = 0; i < 0x10000; i++)
|
||||
unicode_attributes[i].name = NULL;
|
||||
|
||||
stream = fopen (unicodedata_filename, "r");
|
||||
if (stream == NULL)
|
||||
{
|
||||
fprintf (stderr, "error during fopen of '%s'\n", unicodedata_filename);
|
||||
exit (1);
|
||||
}
|
||||
|
||||
for (;;)
|
||||
{
|
||||
int n;
|
||||
|
||||
lineno++;
|
||||
n = getfield(stream, field0, ';');
|
||||
n += getfield(stream, field1, ';');
|
||||
n += getfield(stream, field2, ';');
|
||||
n += getfield(stream, field3, ';');
|
||||
n += getfield(stream, field4, ';');
|
||||
n += getfield(stream, field5, ';');
|
||||
n += getfield(stream, field6, ';');
|
||||
n += getfield(stream, field7, ';');
|
||||
n += getfield(stream, field8, ';');
|
||||
n += getfield(stream, field9, ';');
|
||||
n += getfield(stream, field10, ';');
|
||||
n += getfield(stream, field11, ';');
|
||||
n += getfield(stream, field12, ';');
|
||||
n += getfield(stream, field13, ';');
|
||||
n += getfield(stream, field14, '\n');
|
||||
if (n == 0)
|
||||
break;
|
||||
if (n != 15)
|
||||
{
|
||||
fprintf (stderr, "short line in'%s':%d\n",
|
||||
unicodedata_filename, lineno);
|
||||
exit (1);
|
||||
}
|
||||
i = strtoul (field0, NULL, 16);
|
||||
if (field1[0] == '<'
|
||||
&& strlen (field1) >= 9
|
||||
&& !strcmp (field1 + strlen(field1) - 8, ", First>"))
|
||||
{
|
||||
/* Deal with a range. */
|
||||
lineno++;
|
||||
n = getfield(stream, field0, ';');
|
||||
n += getfield(stream, field1, ';');
|
||||
n += getfield(stream, field2, ';');
|
||||
n += getfield(stream, field3, ';');
|
||||
n += getfield(stream, field4, ';');
|
||||
n += getfield(stream, field5, ';');
|
||||
n += getfield(stream, field6, ';');
|
||||
n += getfield(stream, field7, ';');
|
||||
n += getfield(stream, field8, ';');
|
||||
n += getfield(stream, field9, ';');
|
||||
n += getfield(stream, field10, ';');
|
||||
n += getfield(stream, field11, ';');
|
||||
n += getfield(stream, field12, ';');
|
||||
n += getfield(stream, field13, ';');
|
||||
n += getfield(stream, field14, '\n');
|
||||
if (n != 15)
|
||||
{
|
||||
fprintf (stderr, "missing end range in '%s':%d\n",
|
||||
unicodedata_filename, lineno);
|
||||
exit (1);
|
||||
}
|
||||
if (!(field1[0] == '<'
|
||||
&& strlen (field1) >= 8
|
||||
&& !strcmp (field1 + strlen (field1) - 7, ", Last>")))
|
||||
{
|
||||
fprintf (stderr, "missing end range in '%s':%d\n",
|
||||
unicodedata_filename, lineno);
|
||||
exit (1);
|
||||
}
|
||||
field1[strlen (field1) - 7] = '\0';
|
||||
j = strtoul (field0, NULL, 16);
|
||||
for (; i <= j; i++)
|
||||
fill_attribute (i, field1+1, field2, field3, field4, field5,
|
||||
field6, field7, field8, field9, field10,
|
||||
field11, field12, field13, field14);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Single character line */
|
||||
fill_attribute (i, field1, field2, field3, field4, field5,
|
||||
field6, field7, field8, field9, field10,
|
||||
field11, field12, field13, field14);
|
||||
}
|
||||
}
|
||||
if (ferror (stream) || fclose (stream))
|
||||
{
|
||||
fprintf (stderr, "error reading from '%s'\n", unicodedata_filename);
|
||||
exit (1);
|
||||
}
|
||||
}
|
||||
|
||||
/* The combining property from the PropList.txt file. */
|
||||
char unicode_combining[0x10000];
|
||||
|
||||
/* Stores in unicode_combining[] the Combining property from the
|
||||
PropList.txt file. */
|
||||
static void
|
||||
fill_combining (const char *proplist_filename)
|
||||
{
|
||||
unsigned int i;
|
||||
FILE *stream;
|
||||
char buf[100+1];
|
||||
|
||||
for (i = 0; i < 0x10000; i++)
|
||||
unicode_combining[i] = 0;
|
||||
|
||||
stream = fopen (proplist_filename, "r");
|
||||
if (stream == NULL)
|
||||
{
|
||||
fprintf (stderr, "error during fopen of '%s'\n", proplist_filename);
|
||||
exit (1);
|
||||
}
|
||||
|
||||
/* Search for the "Property dump for: 0x20000004 (Combining)" line. */
|
||||
do
|
||||
{
|
||||
if (fscanf (stream, "%100[^\n]\n", buf) < 1)
|
||||
{
|
||||
fprintf (stderr, "no combining property found in '%s'\n",
|
||||
proplist_filename);
|
||||
exit (1);
|
||||
}
|
||||
}
|
||||
while (strstr (buf, "(Combining)") == NULL);
|
||||
|
||||
for (;;)
|
||||
{
|
||||
unsigned int i1, i2;
|
||||
|
||||
if (fscanf (stream, "%100[^\n]\n", buf) < 1)
|
||||
{
|
||||
fprintf (stderr, "premature end of combining property in '%s'\n",
|
||||
proplist_filename);
|
||||
exit (1);
|
||||
}
|
||||
if (buf[0] == '*')
|
||||
break;
|
||||
if (strlen (buf) >= 10 && buf[4] == '.' && buf[5] == '.')
|
||||
{
|
||||
if (sscanf (buf, "%4X..%4X", &i1, &i2) < 2)
|
||||
{
|
||||
fprintf (stderr, "parse error in combining property in '%s'\n",
|
||||
proplist_filename);
|
||||
exit (1);
|
||||
}
|
||||
}
|
||||
else if (strlen (buf) >= 4)
|
||||
{
|
||||
if (sscanf (buf, "%4X", &i1) < 1)
|
||||
{
|
||||
fprintf (stderr, "parse error in combining property in '%s'\n",
|
||||
proplist_filename);
|
||||
exit (1);
|
||||
}
|
||||
i2 = i1;
|
||||
}
|
||||
else
|
||||
{
|
||||
fprintf (stderr, "parse error in combining property in '%s'\n",
|
||||
proplist_filename);
|
||||
exit (1);
|
||||
}
|
||||
for (i = i1; i <= i2; i++)
|
||||
unicode_combining[i] = 1;
|
||||
}
|
||||
if (ferror (stream) || fclose (stream))
|
||||
{
|
||||
fprintf (stderr, "error reading from '%s'\n", proplist_filename);
|
||||
exit (1);
|
||||
}
|
||||
}
|
||||
|
||||
/* Character mappings. */
|
||||
|
||||
static unsigned int
|
||||
to_upper (unsigned int ch)
|
||||
{
|
||||
if (unicode_attributes[ch].name != NULL
|
||||
&& unicode_attributes[ch].upper != NONE)
|
||||
return unicode_attributes[ch].upper;
|
||||
else
|
||||
return ch;
|
||||
}
|
||||
|
||||
static unsigned int
|
||||
to_lower (unsigned int ch)
|
||||
{
|
||||
if (unicode_attributes[ch].name != NULL
|
||||
&& unicode_attributes[ch].lower != NONE)
|
||||
return unicode_attributes[ch].lower;
|
||||
else
|
||||
return ch;
|
||||
}
|
||||
|
||||
static unsigned int
|
||||
to_title (unsigned int ch)
|
||||
{
|
||||
if (unicode_attributes[ch].name != NULL
|
||||
&& unicode_attributes[ch].title != NONE)
|
||||
return unicode_attributes[ch].title;
|
||||
else
|
||||
return ch;
|
||||
}
|
||||
|
||||
/* Character class properties. */
|
||||
|
||||
static bool
|
||||
is_upper (unsigned int ch)
|
||||
{
|
||||
return (to_lower (ch) != ch);
|
||||
}
|
||||
|
||||
static bool
|
||||
is_lower (unsigned int ch)
|
||||
{
|
||||
return (to_upper (ch) != ch)
|
||||
/* <U00DF> is lowercase, but without simple to_upper mapping. */
|
||||
|| (ch == 0x00DF);
|
||||
}
|
||||
|
||||
static bool
|
||||
is_alpha (unsigned int ch)
|
||||
{
|
||||
return (unicode_attributes[ch].name != NULL
|
||||
&& (unicode_attributes[ch].category[0] == 'L'
|
||||
/* Avoid warning for <U0345>. */
|
||||
|| (ch == 0x0345)
|
||||
/* Avoid warnings for <U2160>..<U217F>. */
|
||||
|| (unicode_attributes[ch].category[0] == 'N'
|
||||
&& unicode_attributes[ch].category[1] == 'l')
|
||||
/* Avoid warnings for <U24B6>..<U24E9>. */
|
||||
|| (unicode_attributes[ch].category[0] == 'S'
|
||||
&& unicode_attributes[ch].category[1] == 'o'
|
||||
&& strstr (unicode_attributes[ch].name, " LETTER ")
|
||||
!= NULL)));
|
||||
}
|
||||
|
||||
static bool
|
||||
is_digit (unsigned int ch)
|
||||
{
|
||||
return (unicode_attributes[ch].name != NULL
|
||||
&& unicode_attributes[ch].category[0] == 'N'
|
||||
&& unicode_attributes[ch].category[1] == 'd');
|
||||
/* Note: U+0BE7..U+0BEF and U+1369..U+1371 are digit systems without
|
||||
a zero. Must add <0> in front of them by hand. */
|
||||
}
|
||||
|
||||
static bool
|
||||
is_outdigit (unsigned int ch)
|
||||
{
|
||||
return (ch >= 0x0030 && ch <= 0x0039);
|
||||
}
|
||||
|
||||
static bool
|
||||
is_blank (unsigned int ch)
|
||||
{
|
||||
return (ch == 0x0009 /* '\t' */
|
||||
/* Category Zs without mention of "<noBreak>" */
|
||||
|| (unicode_attributes[ch].name != NULL
|
||||
&& unicode_attributes[ch].category[0] == 'Z'
|
||||
&& unicode_attributes[ch].category[1] == 's'
|
||||
&& !strstr (unicode_attributes[ch].decomposition, "<noBreak>")));
|
||||
}
|
||||
|
||||
static bool
|
||||
is_space (unsigned int ch)
|
||||
{
|
||||
/* Don't make U+00A0 a space. Non-breaking space means that all programs
|
||||
should treat it like a punctuation character, not like a space. */
|
||||
return (ch == 0x0020 /* ' ' */
|
||||
|| ch == 0x000C /* '\f' */
|
||||
|| ch == 0x000A /* '\n' */
|
||||
|| ch == 0x000D /* '\r' */
|
||||
|| ch == 0x0009 /* '\t' */
|
||||
|| ch == 0x000B /* '\v' */
|
||||
/* Categories Zl, Zp, and Zs without mention of "<noBreak>" */
|
||||
|| (unicode_attributes[ch].name != NULL
|
||||
&& unicode_attributes[ch].category[0] == 'Z'
|
||||
&& (unicode_attributes[ch].category[1] == 'l'
|
||||
|| unicode_attributes[ch].category[1] == 'p'
|
||||
|| (unicode_attributes[ch].category[1] == 's'
|
||||
&& !strstr (unicode_attributes[ch].decomposition,
|
||||
"<noBreak>")))));
|
||||
}
|
||||
|
||||
static bool
|
||||
is_cntrl (unsigned int ch)
|
||||
{
|
||||
return (unicode_attributes[ch].name != NULL
|
||||
&& (!strcmp (unicode_attributes[ch].name, "<control>")
|
||||
/* Categories Zl and Zp */
|
||||
|| (unicode_attributes[ch].category[0] == 'Z'
|
||||
&& (unicode_attributes[ch].category[1] == 'l'
|
||||
|| unicode_attributes[ch].category[1] == 'p'))));
|
||||
}
|
||||
|
||||
static bool
|
||||
is_xdigit (unsigned int ch)
|
||||
{
|
||||
return is_digit (ch)
|
||||
|| (ch >= 0x0041 && ch <= 0x0046)
|
||||
|| (ch >= 0x0061 && ch <= 0x0066);
|
||||
}
|
||||
|
||||
static bool
|
||||
is_graph (unsigned int ch)
|
||||
{
|
||||
return (unicode_attributes[ch].name != NULL
|
||||
&& strcmp (unicode_attributes[ch].name, "<control>")
|
||||
&& !is_space (ch));
|
||||
}
|
||||
|
||||
static bool
|
||||
is_print (unsigned int ch)
|
||||
{
|
||||
return (unicode_attributes[ch].name != NULL
|
||||
&& strcmp (unicode_attributes[ch].name, "<control>")
|
||||
/* Categories Zl and Zp */
|
||||
&& !(unicode_attributes[ch].name != NULL
|
||||
&& unicode_attributes[ch].category[0] == 'Z'
|
||||
&& (unicode_attributes[ch].category[1] == 'l'
|
||||
|| unicode_attributes[ch].category[1] == 'p')));
|
||||
}
|
||||
|
||||
static bool
|
||||
is_punct (unsigned int ch)
|
||||
{
|
||||
#if 0
|
||||
return (unicode_attributes[ch].name != NULL
|
||||
&& unicode_attributes[ch].category[0] == 'P');
|
||||
#else
|
||||
/* The traditional POSIX definition of punctuation is every graphic,
|
||||
non-alphanumeric character. */
|
||||
return (is_graph (ch) && !is_alpha (ch) && !is_digit (ch));
|
||||
#endif
|
||||
}
|
||||
|
||||
static bool
|
||||
is_combining (unsigned int ch)
|
||||
{
|
||||
return (unicode_attributes[ch].name != NULL
|
||||
&& unicode_combining[ch] != 0);
|
||||
}
|
||||
|
||||
static bool
|
||||
is_combining_level3 (unsigned int ch)
|
||||
{
|
||||
return is_combining (ch)
|
||||
&& !(unicode_attributes[ch].combining[0] != '\0'
|
||||
&& unicode_attributes[ch].combining[0] != '0'
|
||||
&& strtoul (unicode_attributes[ch].combining, NULL, 10) >= 200);
|
||||
}
|
||||
|
||||
/* Output a character class (= property) table. */
|
||||
|
||||
static void
|
||||
output_charclass (FILE *stream, const char *classname,
|
||||
bool (*func) (unsigned int))
|
||||
{
|
||||
char table[0x10000];
|
||||
unsigned int i;
|
||||
bool need_semicolon;
|
||||
const int max_column = 75;
|
||||
int column;
|
||||
|
||||
for (i = 0; i < 0x10000; i++)
|
||||
table[i] = (int) func (i);
|
||||
|
||||
fprintf (stream, "%s ", classname);
|
||||
need_semicolon = false;
|
||||
column = 1000;
|
||||
for (i = 0; i < 0x10000; )
|
||||
{
|
||||
if (!table[i])
|
||||
i++;
|
||||
else
|
||||
{
|
||||
unsigned int low, high;
|
||||
char buf[17];
|
||||
|
||||
low = i;
|
||||
do
|
||||
i++;
|
||||
while (i < 0x10000 && table[i]);
|
||||
high = i - 1;
|
||||
|
||||
if (low == high)
|
||||
sprintf (buf, "<U%04X>", low);
|
||||
else
|
||||
sprintf (buf, "<U%04X>..<U%04X>", low, high);
|
||||
|
||||
if (need_semicolon)
|
||||
{
|
||||
fprintf (stream, ";");
|
||||
column++;
|
||||
}
|
||||
|
||||
if (column + strlen (buf) > max_column)
|
||||
{
|
||||
fprintf (stream, "/\n ");
|
||||
column = 3;
|
||||
}
|
||||
|
||||
fprintf (stream, "%s", buf);
|
||||
column += strlen (buf);
|
||||
need_semicolon = true;
|
||||
}
|
||||
}
|
||||
fprintf (stream, "\n");
|
||||
}
|
||||
|
||||
/* Output a character mapping table. */
|
||||
|
||||
static void
|
||||
output_charmap (FILE *stream, const char *mapname,
|
||||
unsigned int (*func) (unsigned int))
|
||||
{
|
||||
char table[0x10000];
|
||||
unsigned int i;
|
||||
bool need_semicolon;
|
||||
const int max_column = 75;
|
||||
int column;
|
||||
|
||||
for (i = 0; i < 0x10000; i++)
|
||||
table[i] = (func (i) != i);
|
||||
|
||||
fprintf (stream, "%s ", mapname);
|
||||
need_semicolon = false;
|
||||
column = 1000;
|
||||
for (i = 0; i < 0x10000; i++)
|
||||
if (table[i])
|
||||
{
|
||||
char buf[18];
|
||||
|
||||
sprintf (buf, "(<U%04X>,<U%04X>)", i, func (i));
|
||||
|
||||
if (need_semicolon)
|
||||
{
|
||||
fprintf (stream, ";");
|
||||
column++;
|
||||
}
|
||||
|
||||
if (column + strlen (buf) > max_column)
|
||||
{
|
||||
fprintf (stream, "/\n ");
|
||||
column = 3;
|
||||
}
|
||||
|
||||
fprintf (stream, "%s", buf);
|
||||
column += strlen (buf);
|
||||
need_semicolon = true;
|
||||
}
|
||||
fprintf (stream, "\n");
|
||||
}
|
||||
|
||||
/* Output the width table. */
|
||||
|
||||
static void
|
||||
output_widthmap (FILE *stream)
|
||||
{
|
||||
}
|
||||
|
||||
/* Output the tables to the given file. */
|
||||
|
||||
static void
|
||||
output_tables (const char *filename, const char *version)
|
||||
{
|
||||
FILE *stream;
|
||||
unsigned int ch;
|
||||
|
||||
stream = fopen (filename, "w");
|
||||
if (stream == NULL)
|
||||
{
|
||||
fprintf (stderr, "cannot open '%s' for writing\n", filename);
|
||||
exit (1);
|
||||
}
|
||||
|
||||
fprintf (stream, "escape_char /\n");
|
||||
fprintf (stream, "comment_char %%\n");
|
||||
fprintf (stream, "\n");
|
||||
fprintf (stream, "%% Generated automatically by gen-unicode for Unicode %s.\n",
|
||||
version);
|
||||
fprintf (stream, "\n");
|
||||
|
||||
fprintf (stream, "LC_IDENTIFICATION\n");
|
||||
fprintf (stream, "title \"Unicode %s FDCC-set\"\n", version);
|
||||
fprintf (stream, "source \"UnicodeData.txt, PropList.txt\"\n");
|
||||
fprintf (stream, "address \"\"\n");
|
||||
fprintf (stream, "contact \"\"\n");
|
||||
fprintf (stream, "email \"bug-glibc@gnu.org\"\n");
|
||||
fprintf (stream, "tel \"\"\n");
|
||||
fprintf (stream, "fax \"\"\n");
|
||||
fprintf (stream, "language \"\"\n");
|
||||
fprintf (stream, "territory \"Earth\"\n");
|
||||
fprintf (stream, "revision \"%s\"\n", version);
|
||||
{
|
||||
time_t now;
|
||||
char date[11];
|
||||
now = time (NULL);
|
||||
strftime (date, sizeof (date), "%Y-%m-%d", gmtime (&now));
|
||||
fprintf (stream, "date \"%s\"\n", date);
|
||||
}
|
||||
fprintf (stream, "category \"unicode:2000\";LC_CTYPE\n");
|
||||
fprintf (stream, "END LC_IDENTIFICATION\n");
|
||||
fprintf (stream, "\n");
|
||||
|
||||
/* Verifications. */
|
||||
for (ch = 0; ch < 0x10000; ch++)
|
||||
{
|
||||
/* toupper restriction: "Only characters specified for the keywords
|
||||
lower and upper shall be specified. */
|
||||
if (to_upper (ch) != ch && !(is_lower (ch) || is_upper (ch)))
|
||||
fprintf (stderr,
|
||||
"<U%04X> is not upper|lower but toupper(0x%04X) = 0x%04X\n",
|
||||
ch, ch, to_upper (ch));
|
||||
|
||||
/* tolower restriction: "Only characters specified for the keywords
|
||||
lower and upper shall be specified. */
|
||||
if (to_lower (ch) != ch && !(is_lower (ch) || is_upper (ch)))
|
||||
fprintf (stderr,
|
||||
"<U%04X> is not upper|lower but tolower(0x%04X) = 0x%04X\n",
|
||||
ch, ch, to_lower (ch));
|
||||
|
||||
/* alpha restriction: "Characters classified as either upper or lower
|
||||
shall automatically belong to this class. */
|
||||
if ((is_lower (ch) || is_upper (ch)) && !is_alpha (ch))
|
||||
fprintf (stderr, "<U%04X> is upper|lower but not alpha\n", ch);
|
||||
|
||||
/* alpha restriction: "No character specified for the keywords cntrl,
|
||||
digit, punct or space shall be specified." */
|
||||
if (is_alpha (ch) && is_cntrl (ch))
|
||||
fprintf (stderr, "<U%04X> is alpha and cntrl\n", ch);
|
||||
if (is_alpha (ch) && is_digit (ch))
|
||||
fprintf (stderr, "<U%04X> is alpha and digit\n", ch);
|
||||
if (is_alpha (ch) && is_punct (ch))
|
||||
fprintf (stderr, "<U%04X> is alpha and punct\n", ch);
|
||||
if (is_alpha (ch) && is_space (ch))
|
||||
fprintf (stderr, "<U%04X> is alpha and space\n", ch);
|
||||
|
||||
/* space restriction: "No character specified for the keywords upper,
|
||||
lower, alpha, digit, graph or xdigit shall be specified."
|
||||
upper, lower, alpha already checked above. */
|
||||
if (is_space (ch) && is_digit (ch))
|
||||
fprintf (stderr, "<U%04X> is space and digit\n", ch);
|
||||
if (is_space (ch) && is_graph (ch))
|
||||
fprintf (stderr, "<U%04X> is space and graph\n", ch);
|
||||
if (is_space (ch) && is_xdigit (ch))
|
||||
fprintf (stderr, "<U%04X> is space and xdigit\n", ch);
|
||||
|
||||
/* cntrl restriction: "No character specified for the keywords upper,
|
||||
lower, alpha, digit, punct, graph, print or xdigit shall be
|
||||
specified." upper, lower, alpha already checked above. */
|
||||
if (is_cntrl (ch) && is_digit (ch))
|
||||
fprintf (stderr, "<U%04X> is cntrl and digit\n", ch);
|
||||
if (is_cntrl (ch) && is_punct (ch))
|
||||
fprintf (stderr, "<U%04X> is cntrl and punct\n", ch);
|
||||
if (is_cntrl (ch) && is_graph (ch))
|
||||
fprintf (stderr, "<U%04X> is cntrl and graph\n", ch);
|
||||
if (is_cntrl (ch) && is_print (ch))
|
||||
fprintf (stderr, "<U%04X> is cntrl and print\n", ch);
|
||||
if (is_cntrl (ch) && is_xdigit (ch))
|
||||
fprintf (stderr, "<U%04X> is cntrl and xdigit\n", ch);
|
||||
|
||||
/* punct restriction: "No character specified for the keywords upper,
|
||||
lower, alpha, digit, cntrl, xdigit or as the <space> character shall
|
||||
be specified." upper, lower, alpha, cntrl already checked above. */
|
||||
if (is_punct (ch) && is_digit (ch))
|
||||
fprintf (stderr, "<U%04X> is punct and digit\n", ch);
|
||||
if (is_punct (ch) && is_xdigit (ch))
|
||||
fprintf (stderr, "<U%04X> is punct and xdigit\n", ch);
|
||||
if (is_punct (ch) && (ch == 0x0020))
|
||||
fprintf (stderr, "<U%04X> is punct\n", ch);
|
||||
|
||||
/* graph restriction: "No character specified for the keyword cntrl
|
||||
shall be specified." Already checked above. */
|
||||
|
||||
/* print restriction: "No character specified for the keyword cntrl
|
||||
shall be specified." Already checked above. */
|
||||
|
||||
/* graph - print relation: differ only in the <space> character.
|
||||
How is this possible if there are more than one space character?!
|
||||
I think susv2/xbd/locale.html should speak of "space characters",
|
||||
not "space character". */
|
||||
if (is_print (ch) && !(is_graph (ch) || /* ch == 0x0020 */ is_space (ch)))
|
||||
fprintf (stderr, "<U%04X> is print but not graph|<space>\n", ch);
|
||||
if (!is_print (ch) && (is_graph (ch) || ch == 0x0020))
|
||||
fprintf (stderr, "<U%04X> is graph|<space> but not print\n", ch);
|
||||
}
|
||||
|
||||
fprintf (stream, "LC_CTYPE\n");
|
||||
output_charclass (stream, "upper", is_upper);
|
||||
output_charclass (stream, "lower", is_lower);
|
||||
output_charclass (stream, "alpha", is_alpha);
|
||||
output_charclass (stream, "digit", is_digit);
|
||||
output_charclass (stream, "outdigit", is_outdigit);
|
||||
output_charclass (stream, "blank", is_blank);
|
||||
output_charclass (stream, "space", is_space);
|
||||
output_charclass (stream, "cntrl", is_cntrl);
|
||||
output_charclass (stream, "punct", is_punct);
|
||||
output_charclass (stream, "xdigit", is_xdigit);
|
||||
output_charclass (stream, "graph", is_graph);
|
||||
output_charclass (stream, "print", is_print);
|
||||
output_charclass (stream, "class \"combining\";", is_combining);
|
||||
output_charclass (stream, "class \"combining_level3\";", is_combining_level3);
|
||||
output_charmap (stream, "toupper", to_upper);
|
||||
output_charmap (stream, "tolower", to_lower);
|
||||
output_charmap (stream, "map \"totitle\";", to_title);
|
||||
output_widthmap (stream);
|
||||
fprintf (stream, "END LC_CTYPE\n");
|
||||
|
||||
if (ferror (stream) || fclose (stream))
|
||||
{
|
||||
fprintf (stderr, "error writing to '%s'\n", filename);
|
||||
exit (1);
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
main (int argc, char * argv[])
|
||||
{
|
||||
if (argc != 4)
|
||||
{
|
||||
fprintf (stderr, "Usage: %s UnicodeData.txt PropList.txt version\n",
|
||||
argv[0]);
|
||||
exit (1);
|
||||
}
|
||||
|
||||
fill_attributes (argv[1]);
|
||||
fill_combining (argv[2]);
|
||||
|
||||
output_tables ("unicode", argv[3]);
|
||||
|
||||
return 0;
|
||||
}
|
File diff suppressed because it is too large
Load Diff
@ -81,7 +81,11 @@ TST_ISWCTYPE tst_iswctype_loc [] = {
|
||||
{ { 0x00B9, "digit" }, { 0,1,0 } }, /* SUP 1 */
|
||||
{ { 0x00BE, "digit" }, { 0,1,0 } }, /* 3/4 */
|
||||
{ { 0x009F, "graph" }, { 0,1,0 } }, /* CTRL */
|
||||
#ifdef SHOJI_IS_RIGHT
|
||||
{ { 0x00A0, "graph" }, { 0,1,0 } }, /* NB SPACE */
|
||||
#else
|
||||
{ { 0x00A0, "graph" }, { 0,0,0 } }, /* NB SPACE */
|
||||
#endif
|
||||
{ { 0x00A1, "graph" }, { 0,0,0 } }, /* UD ! */
|
||||
{ { 0x00B1, "graph" }, { 0,0,0 } }, /* +- sign */
|
||||
{ { 0x00B3, "graph" }, { 0,0,0 } }, /* SUP 3 */
|
||||
@ -97,7 +101,11 @@ TST_ISWCTYPE tst_iswctype_loc [] = {
|
||||
{ { 0x00F8, "graph" }, { 0,0,0 } }, /* o stroke */
|
||||
{ { 0x00FF, "graph" }, { 0,0,0 } }, /* y dia */
|
||||
{ { 0x009F, "print" }, { 0,1,0 } }, /* CTRL */
|
||||
#ifdef SHOJI_IS_RIGHT
|
||||
{ { 0x00A0, "print" }, { 0,1,0 } }, /* NB SPACE */
|
||||
#else
|
||||
{ { 0x00A0, "print" }, { 0,0,0 } }, /* NB SPACE */
|
||||
#endif
|
||||
{ { 0x00A1, "print" }, { 0,0,0 } }, /* UD ! */
|
||||
{ { 0x00B1, "print" }, { 0,0,0 } }, /* +- sign */
|
||||
{ { 0x00B4, "print" }, { 0,0,0 } }, /* ACUTE */
|
||||
@ -112,7 +120,11 @@ TST_ISWCTYPE tst_iswctype_loc [] = {
|
||||
{ { 0x00F8, "print" }, { 0,0,0 } }, /* o stroke */
|
||||
{ { 0x00FF, "print" }, { 0,0,0 } }, /* y dia */
|
||||
{ { 0x009F, "punct" }, { 0,1,0 } }, /* CTRL */
|
||||
#ifdef SHOJI_IS_RIGHT
|
||||
{ { 0x00A0, "punct" }, { 0,1,0 } }, /* NB SPACE */
|
||||
#else
|
||||
{ { 0x00A0, "punct" }, { 0,0,0 } }, /* NB SPACE */
|
||||
#endif
|
||||
{ { 0x00A1, "punct" }, { 0,0,0 } }, /* UD ! */
|
||||
{ { 0x00B0, "punct" }, { 0,0,0 } }, /* Degree */
|
||||
{ { 0x00B1, "punct" }, { 0,0,0 } }, /* +- sign */
|
||||
|
@ -16,7 +16,11 @@ TST_ISW_LOC (GRAPH, graph) = {
|
||||
{
|
||||
{ { 0x0080 }, { 0,1,0 } }, /* CTRL */
|
||||
{ { 0x009F }, { 0,1,0 } }, /* CTRL */
|
||||
#ifdef SHOJI_IS_RIGHT
|
||||
{ { 0x00A0 }, { 0,1,0 } }, /* NB SPACE */
|
||||
#else
|
||||
{ { 0x00A0 }, { 0,0,0 } }, /* NB SPACE */
|
||||
#endif
|
||||
{ { 0x00A1 }, { 0,0,0 } }, /* UD ! */
|
||||
{ { 0x00B0 }, { 0,0,0 } }, /* Degree */
|
||||
{ { 0x00B1 }, { 0,0,0 } }, /* +- sign */
|
||||
|
@ -16,7 +16,11 @@ TST_ISW_LOC (PRINT, print) = {
|
||||
{
|
||||
{ { 0x0080 }, { 0,1,0 } }, /* CTRL */
|
||||
{ { 0x009F }, { 0,1,0 } }, /* CTRL */
|
||||
#ifdef SHOJI_IS_RIGHT
|
||||
{ { 0x00A0 }, { 0,1,0 } }, /* NB SPACE */
|
||||
#else
|
||||
{ { 0x00A0 }, { 0,0,0 } }, /* NB SPACE */
|
||||
#endif
|
||||
{ { 0x00A1 }, { 0,0,0 } }, /* UD ! */
|
||||
{ { 0x00B0 }, { 0,0,0 } }, /* Degree */
|
||||
{ { 0x00B1 }, { 0,0,0 } }, /* +- sign */
|
||||
|
@ -16,7 +16,11 @@ TST_ISW_LOC (PUNCT, punct) = {
|
||||
{
|
||||
{ { 0x0080 }, { 0,1,0 } }, /* CTRL */
|
||||
{ { 0x009F }, { 0,1,0 } }, /* CTRL */
|
||||
#ifdef SHOJI_IS_RIGHT
|
||||
{ { 0x00A0 }, { 0,1,0 } }, /* NB SPACE */
|
||||
#else
|
||||
{ { 0x00A0 }, { 0,0,0 } }, /* NB SPACE */
|
||||
#endif
|
||||
{ { 0x00A1 }, { 0,0,0 } }, /* UD ! */
|
||||
{ { 0x00B0 }, { 0,0,0 } }, /* Degree */
|
||||
{ { 0x00B1 }, { 0,0,0 } }, /* +- sign */
|
||||
|
@ -56,7 +56,11 @@ TST_WCSWIDTH tst_wcswidth_loc [] = {
|
||||
/*expect*/ { 0,1,-1 },
|
||||
},
|
||||
{ /*input.*/ { { 0x00C1,0x00A0,0x0000 }, 2 }, /* 16 */
|
||||
#ifdef SHOJI_IS_RIGHT
|
||||
/*expect*/ { 0,1,-1 },
|
||||
#else
|
||||
/*expect*/ { 0,1,2 },
|
||||
#endif
|
||||
},
|
||||
{ /*input.*/ { { 0x00C1,0x00A1,0x0000 }, 2 }, /* 17 */
|
||||
/*expect*/ { 0,1,2 },
|
||||
|
@ -1,5 +1,5 @@
|
||||
lower 嵗╯丰戍貝物洎悖停眾斯須號獄播噶擱藏霰匸<E99CB0>帊昅恘
|
||||
000000000000000000000000000000000000000000000000
|
||||
000000000000000000000100000000000000000000000000
|
||||
lower 倳眑婭笫崷窙嗲睧颬睼麧緗鴇膹擨闀貘覷鏷禴矙𡜍𦶠<F0A19C8D>
|
||||
000000000000000111111111111111111111111011111111
|
||||
upper 嵗╯丰戍貝物洎悖停眾斯須號獄播噶擱藏霰匸<E99CB0>帊昅恘
|
||||
@ -23,11 +23,11 @@ space
|
||||
space 倳眑婭笫崷窙嗲睧颬睼麧緗鴇膹擨闀貘覷鏷禴矙𡜍𦶠<F0A19C8D>
|
||||
000000000000000000000000000000000000000000000000
|
||||
print 嵗╯丰戍貝物洎悖停眾斯須號獄播噶擱藏霰匸<E99CB0>帊昅恘
|
||||
011111111111111111111111111111111111111111111111
|
||||
111111111111111111111111111111111111111111111111
|
||||
print 倳眑婭笫崷窙嗲睧颬睼麧緗鴇膹擨闀貘覷鏷禴矙𡜍𦶠<F0A19C8D>
|
||||
111111111111111111111111111111111111111111111111
|
||||
graph 嵗╯丰戍貝物洎悖停眾斯須號獄播噶擱藏霰匸<E99CB0>帊昅恘
|
||||
011111111111111111111111111111111111111111111111
|
||||
111111111111111111111111111111111111111111111111
|
||||
graph 倳眑婭笫崷窙嗲睧颬睼麧緗鴇膹擨闀貘覷鏷禴矙𡜍𦶠<F0A19C8D>
|
||||
111111111111111111111111111111111111111111111111
|
||||
blank 嵗╯丰戍貝物洎悖停眾斯須號獄播噶擱藏霰匸<E99CB0>帊昅恘
|
||||
@ -39,7 +39,7 @@ cntrl
|
||||
cntrl 倳眑婭笫崷窙嗲睧颬睼麧緗鴇膹擨闀貘覷鏷禴矙𡜍𦶠<F0A19C8D>
|
||||
000000000000000000000000000000000000000000000000
|
||||
punct 嵗╯丰戍貝物洎悖停眾斯須號獄播噶擱藏霰匸<E99CB0>帊昅恘
|
||||
011111111101111111111011110111110000000000000000
|
||||
111111111101111111111011110111110000000000000000
|
||||
punct 倳眑婭笫崷窙嗲睧颬睼麧緗鴇膹擨闀貘覷鏷禴矙𡜍𦶠<F0A19C8D>
|
||||
000000010000000000000000000000000000000100000000
|
||||
alnum 嵗╯丰戍貝物洎悖停眾斯須號獄播噶擱藏霰匸<E99CB0>帊昅恘
|
||||
|
Loading…
Reference in New Issue
Block a user