ICU-5995 remove unused states from gb18030 and ibm-964 (EUC-TW) conversion tables
X-SVN-Rev: 25801
This commit is contained in:
parent
bda3b4e16c
commit
5a876de674
@ -1,4 +1,4 @@
|
||||
# Copyright (C) 2000-2005, International Business Machines Corporation and others.
|
||||
# Copyright (C) 2000-2009, International Business Machines Corporation and others.
|
||||
# All Rights Reserved.
|
||||
|
||||
# ICU codepage data for GB 18030
|
||||
@ -16,23 +16,26 @@
|
||||
# Similarly, some of the BMP mappings are marked as unassigned for the same reason.
|
||||
|
||||
# Mostly assigned sequences, with branches in the lead bytes
|
||||
<icu:state> 0-7f, 81:7, 82:8, 83:9, 84:a, 85-fe:4
|
||||
<icu:state> 30-39:2, 40-7e, 80-fe
|
||||
<icu:state> 81-fe:3
|
||||
# The second <icu:state> line is commented out (and does not count)
|
||||
# because the state table is hand-optimized and does not use what would be
|
||||
# the natural path for the encoding scheme.
|
||||
<icu:state> 0-7f, 81:6, 82:7, 83:8, 84:9, 85-fe:3
|
||||
# <icu:state> 30-39:2, 40-7e, 80-fe
|
||||
<icu:state> 81-fe:2
|
||||
<icu:state> 30-39
|
||||
|
||||
# All-unassigned 4-byte sequences
|
||||
<icu:state> 30-39:5, 40-7e, 80-fe
|
||||
<icu:state> 81-fe:6
|
||||
<icu:state> 30-39:4, 40-7e, 80-fe
|
||||
<icu:state> 81-fe:5
|
||||
<icu:state> 30-39.u
|
||||
|
||||
# Some unassigned 4-byte sequences, one state for each of the lead bytes 81-84
|
||||
# Each of these states branch on the second of four bytes; for the third and fourth bytes,
|
||||
# unassigned sequences continue with state 5, assigned ones with state 2
|
||||
<icu:state> 30:2, 31-35:5, 36-39:2, 40-7e, 80-fe
|
||||
<icu:state> 30-35:2, 36-39:5, 40-7e, 80-fe
|
||||
<icu:state> 30-35:5, 36:2, 37-39:5, 40-7e, 80-fe
|
||||
<icu:state> 30-31:2, 32-39:5, 40-7e, 80-fe
|
||||
<icu:state> 30:1, 31-35:4, 36-39:1, 40-7e, 80-fe
|
||||
<icu:state> 30-35:1, 36-39:4, 40-7e, 80-fe
|
||||
<icu:state> 30-35:4, 36:1, 37-39:4, 40-7e, 80-fe
|
||||
<icu:state> 30-31:1, 32-39:4, 40-7e, 80-fe
|
||||
|
||||
# GB 18030 BMP mappings that are not handled algorithmically are
|
||||
# generated using gbmake4 and gbtoucm tools. Please see charset/source/gb18030/gb18030.html
|
||||
|
@ -1,6 +1,6 @@
|
||||
# ***************************************************************************
|
||||
# *
|
||||
# * Copyright (C) 1995-2007, International Business Machines
|
||||
# * Copyright (C) 1995-2009, International Business Machines
|
||||
# * Corporation and others. All Rights Reserved.
|
||||
# *
|
||||
# ***************************************************************************
|
||||
@ -18,15 +18,30 @@
|
||||
<icu:charsetFamily> "ASCII"
|
||||
<icu:alias> "ibm-964_VPUA"
|
||||
|
||||
<icu:state> 0-8d, 8e:2, 90-9f, a1-fe:1, aa-c1:5, c3:5, fe:5
|
||||
# The fourth <icu:state> line is commented out (and does not count)
|
||||
# because the state table is hand-optimized and does not use what would be
|
||||
# the natural path for the encoding scheme.
|
||||
# The third <icu:state> used to start with "a1-b0:3" but overrode every one
|
||||
# of these byte values with a different state transition.
|
||||
|
||||
# 0: Initial state, single bytes and lead bytes
|
||||
<icu:state> 0-8d, 8e:2, 90-9f, a1-fe:1, aa-c1:4, c3:4, fe:4
|
||||
# 1: Trail byte state with mappings
|
||||
<icu:state> a1-fe
|
||||
<icu:state> a1-b0:3, a1:4, a2:8, a3-ab:4, ac:7, ad:6, ae-b0:4
|
||||
<icu:state> a1-fe:1
|
||||
<icu:state> a1-fe:5
|
||||
# 2: Second of four bytes, follows lead byte 8e
|
||||
<icu:state> a1:3, a2:7, a3-ab:3, ac:6, ad:5, ae-b0:3
|
||||
# (unreachable/optimized away)
|
||||
# <icu:state> a1-fe:1
|
||||
# 3: Third of four bytes, 8e xx .. .. for most xx in a1-b0; all-unassigned
|
||||
<icu:state> a1-fe:4
|
||||
# 4: All-unassigned trail byte state
|
||||
<icu:state> a1-fe.u
|
||||
<icu:state> a1-a4:1, a5-fe:5
|
||||
<icu:state> a1-e2:1, e3-fe:5
|
||||
<icu:state> a1-f2:1, f3-fe:5
|
||||
# 5: 8e ad .. .. with some mappings
|
||||
<icu:state> a1-a4:1, a5-fe:4
|
||||
# 6: 8e ac .. .. with some mappings
|
||||
<icu:state> a1-e2:1, e3-fe:4
|
||||
# 7: 8e a2 .. .. with some mappings
|
||||
<icu:state> a1-f2:1, f3-fe:4
|
||||
|
||||
CHARMAP
|
||||
<U0000> \x00 |0
|
||||
|
Loading…
Reference in New Issue
Block a user