[tibetan] Route Tibetan through USE
Fixes https://github.com/harfbuzz/harfbuzz/pull/933 https://github.com/harfbuzz/harfbuzz/issues/1012 Tibetan failures go from 0 to 2: TIBETAN: 208467 out of 208469 tests passed. 2 failed (0.000959375%)
This commit is contained in:
parent
77792187be
commit
32a438166f
@ -8,7 +8,7 @@ if len (sys.argv) != 5:
|
||||
print ("usage: ./gen-use-table.py IndicSyllabicCategory.txt IndicPositionalCategory.txt UnicodeData.txt Blocks.txt", file=sys.stderr)
|
||||
sys.exit (1)
|
||||
|
||||
BLACKLISTED_BLOCKS = ["Thai", "Lao", "Tibetan"]
|
||||
BLACKLISTED_BLOCKS = ["Thai", "Lao"]
|
||||
|
||||
files = [io.open (x, encoding='utf-8') for x in sys.argv[1:]]
|
||||
|
||||
@ -307,11 +307,28 @@ def map_to_use(data):
|
||||
|
||||
# Resolve Indic_Syllabic_Category
|
||||
|
||||
# TODO: These don't have UISC assigned in Unicode 8.0, but
|
||||
# have UIPC
|
||||
# TODO: These don't have UISC assigned in Unicode 8.0, but have UIPC
|
||||
if U == 0x17DD: UISC = Vowel_Dependent
|
||||
if 0x1CE2 <= U <= 0x1CE8: UISC = Cantillation_Mark
|
||||
|
||||
# Tibetan:
|
||||
# TODO: These don't have UISC assigned in Unicode 11.0, but have UIPC
|
||||
if 0x0F18 <= U <= 0x0F19 or 0x0F3E <= U <= 0x0F3F: UISC = Vowel_Dependent
|
||||
if 0x0F86 <= U <= 0x0F87: UISC = Tone_Mark
|
||||
# Overrides to allow NFC order matching syllable
|
||||
# https://github.com/harfbuzz/harfbuzz/issues/1012
|
||||
if UBlock == 'Tibetan' and is_VOWEL (U, UISC, UGC):
|
||||
if UIPC == Top:
|
||||
UIPC = Bottom
|
||||
|
||||
# TODO: https://github.com/harfbuzz/harfbuzz/pull/982
|
||||
# also https://github.com/harfbuzz/harfbuzz/issues/1012
|
||||
if UBlock == 'Chakma' and is_VOWEL (U, UISC, UGC):
|
||||
if UIPC == Top:
|
||||
UIPC = Bottom
|
||||
elif UIPC == Bottom:
|
||||
UIPC = Top
|
||||
|
||||
# TODO: https://github.com/harfbuzz/harfbuzz/pull/627
|
||||
if 0x1BF2 <= U <= 0x1BF3: UISC = Nukta; UIPC = Bottom
|
||||
|
||||
@ -359,13 +376,6 @@ def map_to_use(data):
|
||||
# https://github.com/roozbehp/unicode-data/issues/8
|
||||
if U == 0x0A51: UIPC = Bottom
|
||||
|
||||
# TODO: https://github.com/harfbuzz/harfbuzz/pull/982
|
||||
if UBlock == 'Chakma' and is_VOWEL (U, UISC, UGC):
|
||||
if UIPC == Top:
|
||||
UIPC = Bottom
|
||||
elif UIPC == Bottom:
|
||||
UIPC = Top
|
||||
|
||||
assert (UIPC in [Not_Applicable, Visual_Order_Left] or
|
||||
USE in use_positions), "%s %s %s %s %s" % (hex(U), UIPC, USE, UISC, UGC)
|
||||
|
||||
|
@ -194,7 +194,24 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = {
|
||||
/* 0DE0 */ O, O, O, O, O, O, B, B, B, B, B, B, B, B, B, B,
|
||||
/* 0DF0 */ O, O, VPst, VPst, O, O, O, O,
|
||||
|
||||
#define use_offset_0x1000u 1360
|
||||
#define use_offset_0x0f18u 1360
|
||||
|
||||
|
||||
/* Tibetan */
|
||||
VBlw, VBlw, O, O, O, O, O, O,
|
||||
/* 0F20 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
|
||||
/* 0F30 */ B, B, B, B, O, FM, O, FM, O, CMAbv, O, O, O, O, VPst, VPre,
|
||||
/* 0F40 */ B, B, B, B, B, B, B, B, O, B, B, B, B, B, B, B,
|
||||
/* 0F50 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
|
||||
/* 0F60 */ B, B, B, B, B, B, B, B, B, B, B, B, B, O, O, O,
|
||||
/* 0F70 */ O, VBlw, VBlw, VAbv, VBlw, VBlw, VAbv, VAbv, VAbv, VAbv, VBlw, VBlw, VBlw, VBlw, VMAbv, VMPst,
|
||||
/* 0F80 */ VBlw, VAbv, VMAbv, VMAbv, VBlw, IND, VMAbv, VMAbv, B, B, B, B, B, SUB, SUB, SUB,
|
||||
/* 0F90 */ SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, O, SUB, SUB, SUB, SUB, SUB, SUB, SUB,
|
||||
/* 0FA0 */ SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB,
|
||||
/* 0FB0 */ SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, O, O, O,
|
||||
/* 0FC0 */ O, O, O, O, O, O, FM, O,
|
||||
|
||||
#define use_offset_0x1000u 1536
|
||||
|
||||
|
||||
/* Myanmar */
|
||||
@ -210,7 +227,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = {
|
||||
/* 1080 */ B, B, MBlw, VPst, VPre, VAbv, VAbv, VMPst, VMPst, VMPst, VMPst, VMPst, VMPst, VMBlw, B, VMPst,
|
||||
/* 1090 */ B, B, B, B, B, B, B, B, B, B, VMPst, VMPst, VPst, VAbv, O, O,
|
||||
|
||||
#define use_offset_0x1700u 1520
|
||||
#define use_offset_0x1700u 1696
|
||||
|
||||
|
||||
/* Tagalog */
|
||||
@ -243,7 +260,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = {
|
||||
/* 17D0 */ FM, VAbv, H, FM, O, O, O, O, O, O, O, O, B, VAbv, O, O,
|
||||
/* 17E0 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O,
|
||||
|
||||
#define use_offset_0x1900u 1760
|
||||
#define use_offset_0x1900u 1936
|
||||
|
||||
|
||||
/* Limbu */
|
||||
@ -287,7 +304,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = {
|
||||
/* 1A80 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O,
|
||||
/* 1A90 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O,
|
||||
|
||||
#define use_offset_0x1b00u 2176
|
||||
#define use_offset_0x1b00u 2352
|
||||
|
||||
|
||||
/* Balinese */
|
||||
@ -323,7 +340,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = {
|
||||
/* 1C30 */ FAbv, FAbv, FAbv, FAbv, VMPre, VMPre, FM, CMBlw, O, O, O, O, O, O, O, O,
|
||||
/* 1C40 */ B, B, B, B, B, B, B, B, B, B, O, O, O, B, B, B,
|
||||
|
||||
#define use_offset_0x1cd0u 2512
|
||||
#define use_offset_0x1cd0u 2688
|
||||
|
||||
|
||||
/* Vedic Extensions */
|
||||
@ -332,20 +349,20 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = {
|
||||
/* 1CE0 */ VMAbv, VMPst, VMBlw, VMBlw, VMBlw, VMBlw, VMBlw, VMBlw, VMBlw, O, O, O, O, VMBlw, O, O,
|
||||
/* 1CF0 */ O, O, VMPst, VMPst, VMAbv, CS, CS, VMPst, VMAbv, VMAbv, O, O, O, O, O, O,
|
||||
|
||||
#define use_offset_0x1df8u 2560
|
||||
#define use_offset_0x1df8u 2736
|
||||
|
||||
|
||||
/* Combining Diacritical Marks Supplement */
|
||||
O, O, O, FM, O, O, O, O,
|
||||
|
||||
#define use_offset_0x2008u 2568
|
||||
#define use_offset_0x2008u 2744
|
||||
|
||||
|
||||
/* General Punctuation */
|
||||
O, O, O, O, ZWNJ, ZWJ, O, O,
|
||||
/* 2010 */ GB, GB, GB, GB, GB, O, O, O,
|
||||
|
||||
#define use_offset_0x2060u 2584
|
||||
#define use_offset_0x2060u 2760
|
||||
|
||||
/* 2060 */ WJ, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O,
|
||||
|
||||
@ -354,20 +371,20 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = {
|
||||
/* 2070 */ O, O, O, O, FM, O, O, O, O, O, O, O, O, O, O, O,
|
||||
/* 2080 */ O, O, FM, FM, FM, O, O, O,
|
||||
|
||||
#define use_offset_0x20f0u 2624
|
||||
#define use_offset_0x20f0u 2800
|
||||
|
||||
|
||||
/* Combining Diacritical Marks for Symbols */
|
||||
|
||||
/* 20F0 */ VMAbv, O, O, O, O, O, O, O,
|
||||
|
||||
#define use_offset_0x25c8u 2632
|
||||
#define use_offset_0x25c8u 2808
|
||||
|
||||
|
||||
/* Geometric Shapes */
|
||||
O, O, O, O, GB, O, O, O,
|
||||
|
||||
#define use_offset_0xa800u 2640
|
||||
#define use_offset_0xa800u 2816
|
||||
|
||||
|
||||
/* Syloti Nagri */
|
||||
@ -454,7 +471,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = {
|
||||
/* AAE0 */ B, B, B, B, B, B, B, B, B, B, B, VPre, VBlw, VAbv, VPre, VPst,
|
||||
/* AAF0 */ O, O, O, O, O, VMPst, H, O,
|
||||
|
||||
#define use_offset_0xabc0u 3400
|
||||
#define use_offset_0xabc0u 3576
|
||||
|
||||
|
||||
/* Meetei Mayek */
|
||||
@ -464,14 +481,14 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = {
|
||||
/* ABE0 */ B, B, B, VPst, VPst, VAbv, VPst, VPst, VBlw, VPst, VPst, O, VMPst, VBlw, O, O,
|
||||
/* ABF0 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O,
|
||||
|
||||
#define use_offset_0xfe00u 3464
|
||||
#define use_offset_0xfe00u 3640
|
||||
|
||||
|
||||
/* Variation Selectors */
|
||||
|
||||
/* FE00 */ VS, VS, VS, VS, VS, VS, VS, VS, VS, VS, VS, VS, VS, VS, VS, VS,
|
||||
|
||||
#define use_offset_0x10a00u 3480
|
||||
#define use_offset_0x10a00u 3656
|
||||
|
||||
|
||||
/* Kharoshthi */
|
||||
@ -482,7 +499,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = {
|
||||
/* 10A30 */ B, B, B, B, B, B, O, O, CMAbv, CMBlw, CMBlw, O, O, O, O, H,
|
||||
/* 10A40 */ B, B, B, B, B, B, B, B, B, O, O, O, O, O, O, O,
|
||||
|
||||
#define use_offset_0x11000u 3560
|
||||
#define use_offset_0x11000u 3736
|
||||
|
||||
|
||||
/* Brahmi */
|
||||
@ -503,7 +520,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = {
|
||||
/* 110A0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
|
||||
/* 110B0 */ VPst, VPre, VPst, VBlw, VBlw, VAbv, VAbv, VPst, VPst, H, CMBlw, O, O, O, O, O,
|
||||
|
||||
#define use_offset_0x11100u 3752
|
||||
#define use_offset_0x11100u 3928
|
||||
|
||||
|
||||
/* Chakma */
|
||||
@ -511,7 +528,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = {
|
||||
/* 11100 */ VMAbv, VMAbv, VMAbv, B, B, B, B, B, B, B, B, B, B, B, B, B,
|
||||
/* 11110 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
|
||||
/* 11120 */ B, B, B, B, B, B, B, VBlw, VBlw, VBlw, VAbv, VAbv, VPre, VBlw, VAbv, VAbv,
|
||||
/* 11130 */ VBlw, VAbv, VAbv, H, CMAbv, O, B, B, B, B, B, B, B, B, B, B,
|
||||
/* 11130 */ VBlw, VAbv, VAbv, H, CMBlw, O, B, B, B, B, B, B, B, B, B, B,
|
||||
/* 11140 */ O, O, O, O, B, VPst, VPst, O, O, O, O, O, O, O, O, O,
|
||||
|
||||
/* Mahajani */
|
||||
@ -541,7 +558,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = {
|
||||
/* 11220 */ B, B, B, B, B, B, B, B, B, B, B, B, VPst, VPst, VPst, VBlw,
|
||||
/* 11230 */ VAbv, VAbv, VAbv, VAbv, VMAbv, H, CMAbv, CMAbv, O, O, O, O, O, O, VMAbv, O,
|
||||
|
||||
#define use_offset_0x11280u 4072
|
||||
#define use_offset_0x11280u 4248
|
||||
|
||||
|
||||
/* Multani */
|
||||
@ -569,7 +586,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = {
|
||||
/* 11360 */ B, B, VPst, VPst, O, O, VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, O, O, O,
|
||||
/* 11370 */ VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, O, O, O,
|
||||
|
||||
#define use_offset_0x11400u 4320
|
||||
#define use_offset_0x11400u 4496
|
||||
|
||||
|
||||
/* Newa */
|
||||
@ -592,7 +609,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = {
|
||||
/* 114C0 */ VMAbv, VMPst, H, CMBlw, B, O, O, O, O, O, O, O, O, O, O, O,
|
||||
/* 114D0 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O,
|
||||
|
||||
#define use_offset_0x11580u 4544
|
||||
#define use_offset_0x11580u 4720
|
||||
|
||||
|
||||
/* Siddham */
|
||||
@ -635,7 +652,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = {
|
||||
/* 11720 */ VPst, VPst, VAbv, VAbv, VBlw, VBlw, VPre, VAbv, VBlw, VAbv, VAbv, VAbv, O, O, O, O,
|
||||
/* 11730 */ B, B, B, B, B, B, B, B, B, B, B, B, O, O, O, O,
|
||||
|
||||
#define use_offset_0x11800u 4992
|
||||
#define use_offset_0x11800u 5168
|
||||
|
||||
|
||||
/* Dogra */
|
||||
@ -645,7 +662,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = {
|
||||
/* 11820 */ B, B, B, B, B, B, B, B, B, B, B, B, VPst, VPre, VPst, VBlw,
|
||||
/* 11830 */ VBlw, VBlw, VBlw, VAbv, VAbv, VAbv, VAbv, VMAbv, VMPst, H, CMBlw, O, O, O, O, O,
|
||||
|
||||
#define use_offset_0x11a00u 5056
|
||||
#define use_offset_0x11a00u 5232
|
||||
|
||||
|
||||
/* Zanabazar Square */
|
||||
@ -664,7 +681,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = {
|
||||
/* 11A80 */ B, B, B, B, O, O, R, R, R, R, FBlw, FBlw, FBlw, FBlw, FBlw, FBlw,
|
||||
/* 11A90 */ FBlw, FBlw, FBlw, FBlw, FBlw, FBlw, VMAbv, VMPst, CMAbv, H, O, O, O, B, O, O,
|
||||
|
||||
#define use_offset_0x11c00u 5216
|
||||
#define use_offset_0x11c00u 5392
|
||||
|
||||
|
||||
/* Bhaiksuki */
|
||||
@ -685,7 +702,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = {
|
||||
/* 11CA0 */ SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, O, SUB, SUB, SUB, SUB, SUB, SUB, SUB,
|
||||
/* 11CB0 */ VBlw, VPre, VBlw, VAbv, VPst, VMAbv, VMAbv, O,
|
||||
|
||||
#define use_offset_0x11d00u 5400
|
||||
#define use_offset_0x11d00u 5576
|
||||
|
||||
|
||||
/* Masaram Gondi */
|
||||
@ -705,7 +722,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = {
|
||||
/* 11D90 */ VAbv, VAbv, O, VPst, VPst, VMAbv, VMPst, H, O, O, O, O, O, O, O, O,
|
||||
/* 11DA0 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O,
|
||||
|
||||
#define use_offset_0x11ee0u 5576
|
||||
#define use_offset_0x11ee0u 5752
|
||||
|
||||
|
||||
/* Makasar */
|
||||
@ -713,7 +730,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = {
|
||||
/* 11EE0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
|
||||
/* 11EF0 */ B, B, GB, VAbv, VBlw, VPre, VPst, O,
|
||||
|
||||
}; /* Table items: 5600; occupancy: 73% */
|
||||
}; /* Table items: 5776; occupancy: 74% */
|
||||
|
||||
USE_TABLE_ELEMENT_TYPE
|
||||
hb_use_get_category (hb_codepoint_t u)
|
||||
@ -725,6 +742,7 @@ hb_use_get_category (hb_codepoint_t u)
|
||||
if (hb_in_range<hb_codepoint_t> (u, 0x00A0u, 0x00D7u)) return use_table[u - 0x00A0u + use_offset_0x00a0u];
|
||||
if (hb_in_range<hb_codepoint_t> (u, 0x0348u, 0x034Fu)) return use_table[u - 0x0348u + use_offset_0x0348u];
|
||||
if (hb_in_range<hb_codepoint_t> (u, 0x0900u, 0x0DF7u)) return use_table[u - 0x0900u + use_offset_0x0900u];
|
||||
if (hb_in_range<hb_codepoint_t> (u, 0x0F18u, 0x0FC7u)) return use_table[u - 0x0F18u + use_offset_0x0f18u];
|
||||
break;
|
||||
|
||||
case 0x1u:
|
||||
|
@ -232,12 +232,6 @@ hb_ot_shape_complex_categorize (const hb_ot_shape_planner_t *planner)
|
||||
return &_hb_ot_complex_shaper_hangul;
|
||||
|
||||
|
||||
/* Unicode-2.0 additions */
|
||||
case HB_SCRIPT_TIBETAN:
|
||||
|
||||
return &_hb_ot_complex_shaper_tibetan;
|
||||
|
||||
|
||||
/* Unicode-1.1 additions */
|
||||
case HB_SCRIPT_HEBREW:
|
||||
|
||||
@ -289,7 +283,7 @@ hb_ot_shape_complex_categorize (const hb_ot_shape_planner_t *planner)
|
||||
|
||||
|
||||
/* Unicode-2.0 additions */
|
||||
//case HB_SCRIPT_TIBETAN:
|
||||
case HB_SCRIPT_TIBETAN:
|
||||
|
||||
/* Unicode-3.0 additions */
|
||||
//case HB_SCRIPT_MONGOLIAN:
|
||||
|
Loading…
Reference in New Issue
Block a user