From e6351d9b2c44c9abde90a6599ae7ebf0f6416c65 Mon Sep 17 00:00:00 2001 From: David Corbett Date: Mon, 11 Nov 2019 17:39:55 -0500 Subject: [PATCH] Add ms-use/IndicShapingInvalidCluster.txt --- src/Makefile.am | 2 +- src/gen-vowel-constraints.py | 4 +- src/hb-ot-shape-complex-vowel-constraints.cc | 2 +- src/ms-use/COPYING | 21 ++++ src/ms-use/IndicShapingInvalidCluster.txt | 105 +++++++++++++++++++ 5 files changed, 130 insertions(+), 4 deletions(-) create mode 100644 src/ms-use/COPYING create mode 100644 src/ms-use/IndicShapingInvalidCluster.txt diff --git a/src/Makefile.am b/src/Makefile.am index 29563c6cb..32b9f719c 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -290,7 +290,7 @@ ucd-table: gen-ucd-table.py ucd.nounihan.grouped.zip hb-common.h use-table: gen-use-table.py IndicSyllabicCategory.txt IndicPositionalCategory.txt UnicodeData.txt Blocks.txt $(AM_V_GEN) $(builddir)/$^ > $(srcdir)/hb-ot-shape-complex-use-table.cc \ || ($(RM) $(srcdir)/hb-ot-shape-complex-use-table.cc; false) -vowel-constraints: gen-vowel-constraints.py IndicShapingInvalidCluster.txt Scripts.txt +vowel-constraints: gen-vowel-constraints.py ms-use/IndicShapingInvalidCluster.txt Scripts.txt $(AM_V_GEN) $(builddir)/$^ > $(srcdir)/hb-ot-shape-complex-vowel-constraints.cc \ || ($(RM) $(srcdir)/hb-ot-shape-complex-vowel-constraints.cc; false) diff --git a/src/gen-vowel-constraints.py b/src/gen-vowel-constraints.py index 190c0412b..e0ae2a65d 100755 --- a/src/gen-vowel-constraints.py +++ b/src/gen-vowel-constraints.py @@ -25,7 +25,7 @@ import io import sys if len (sys.argv) != 3: - print ('usage: ./gen-vowel-constraints.py IndicShapingInvalidCluster.txt Scripts.txt', file=sys.stderr) + print ('usage: ./gen-vowel-constraints.py ms-use/IndicShapingInvalidCluster.txt Scripts.txt', file=sys.stderr) sys.exit (1) with io.open (sys.argv[2], encoding='utf-8') as f: @@ -156,7 +156,7 @@ print ('/* == Start of generated functions == */') print ('/*') print (' * The following functions are generated by running:') print (' *') -print (' * %s IndicShapingInvalidCluster.txt Scripts.txt' % sys.argv[0]) +print (' * %s ms-use/IndicShapingInvalidCluster.txt Scripts.txt' % sys.argv[0]) print (' *') print (' * on files with these headers:') print (' *') diff --git a/src/hb-ot-shape-complex-vowel-constraints.cc b/src/hb-ot-shape-complex-vowel-constraints.cc index b7e6f4f06..fc09eccda 100644 --- a/src/hb-ot-shape-complex-vowel-constraints.cc +++ b/src/hb-ot-shape-complex-vowel-constraints.cc @@ -2,7 +2,7 @@ /* * The following functions are generated by running: * - * ./gen-vowel-constraints.py IndicShapingInvalidCluster.txt Scripts.txt + * ./gen-vowel-constraints.py ms-use/IndicShapingInvalidCluster.txt Scripts.txt * * on files with these headers: * diff --git a/src/ms-use/COPYING b/src/ms-use/COPYING new file mode 100644 index 000000000..9e841e7a2 --- /dev/null +++ b/src/ms-use/COPYING @@ -0,0 +1,21 @@ + MIT License + + Copyright (c) Microsoft Corporation. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE diff --git a/src/ms-use/IndicShapingInvalidCluster.txt b/src/ms-use/IndicShapingInvalidCluster.txt new file mode 100644 index 000000000..8a177fde8 --- /dev/null +++ b/src/ms-use/IndicShapingInvalidCluster.txt @@ -0,0 +1,105 @@ +# IndicShapingInvalidCluster.txt +# Date: 2015-03-12, 21:17:00 GMT [AG] +# Date: 2019-11-08, 23:22:00 GMT [AG] +# +# This file defines the following property: +# +# Indic_Shaping_Invalid_Cluster +# +# Scope: This file enumerates sequences of characters that should be treated as invalid clusters + + 0905 0946 ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN SHORT E + 0905 093E ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN AA + 0930 094D 0907 ; # DEVANAGARI LETTER RA, DEVANAGARI SIGN VIRAMA, DEVANAGARI LETTER I + 0909 0941 ; # DEVANAGARI LETTER U, DEVANAGARI VOWEL SIGN U + 090F 0945 ; # DEVANAGARI LETTER E, DEVANAGARI VOWEL SIGN CANDRA E + 090F 0946 ; # DEVANAGARI LETTER E, DEVANAGARI VOWEL SIGN SHORT E + 090F 0947 ; # DEVANAGARI LETTER E, DEVANAGARI VOWEL SIGN E + 0905 0949 ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN CANDRA O + 0906 0945 ; # DEVANAGARI LETTER AA, DEVANAGARI VOWEL SIGN CANDRA E + 0905 094A ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN SHORT O + 0906 0946 ; # DEVANAGARI LETTER AA, DEVANAGARI VOWEL SIGN SHORT E + 0905 094B ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN O + 0906 0947 ; # DEVANAGARI LETTER AA, DEVANAGARI VOWEL SIGN E + 0905 094C ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN AU + 0906 0948 ; # DEVANAGARI LETTER AA, DEVANAGARI VOWEL SIGN AI + 0905 0945 ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN CANDRA E + 0905 093A ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN OE + 0905 093B ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN OOE + 0906 093A ; # DEVANAGARI LETTER AA, DEVANAGARI VOWEL SIGN OE + 0905 094F ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN AW + 0905 0956 ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN UE + 0905 0957 ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN UUE + 0985 09BE ; # BENGALI LETTER A, BENGALI VOWEL SIGN AA + 098B 09C3 ; # BENGALI LETTER VOCALIC R, BENGALI VOWEL SIGN VOCALIC R + 098C 09E2 ; # BENGALI LETTER VOCALIC L, BENGALI VOWEL SIGN VOCALIC L + 0A05 0A3E ; # GURMUKHI LETTER A, GURMUKHI VOWEL SIGN AA + 0A72 0A3F ; # GURMUKHI IRI, GURMUKHI VOWEL SIGN I + 0A72 0A40 ; # GURMUKHI IRI, GURMUKHI VOWEL SIGN II + 0A73 0A41 ; # GURMUKHI URA, GURMUKHI VOWEL SIGN U + 0A73 0A42 ; # GURMUKHI URA, GURMUKHI VOWEL SIGN UU + 0A72 0A47 ; # GURMUKHI IRI, GURMUKHI VOWEL SIGN EE + 0A05 0A48 ; # GURMUKHI LETTER A, GURMUKHI VOWEL SIGN AI + 0A73 0A4B ; # GURMUKHI URA, GURMUKHI VOWEL SIGN OO + 0A05 0A4C ; # GURMUKHI LETTER A, GURMUKHI VOWEL SIGN AU + 0A85 0ABE ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN AA + 0A85 0AC5 ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN CANDRA E + 0A85 0AC7 ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN E + 0A85 0AC8 ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN AI + 0A85 0AC9 ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN CANDRA O + 0A85 0ACB ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN O + 0A85 0ABE 0AC5 ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN AA, GUJARATI VOWEL SIGN CANDRA E + 0A85 0ACC ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN AU + 0A85 0ABE 0AC8 ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN AA, GUJARATI VOWEL SIGN AI + 0AC5 0ABE ; # GUJARATI VOWEL SIGN CANDRA E, GUJARATI VOWEL SIGN AA + 0B05 0B3E ; # ORIYA LETTER A, ORIYA VOWEL SIGN AA + 0B0F 0B57 ; # ORIYA LETTER E, ORIYA AU LENGTH MARK + 0B13 0B57 ; # ORIYA LETTER O, ORIYA AU LENGTH MARK + 0B85 0BC2 ; # TAMIL LETTER A, TAMIL VOWEL SIGN UU + 0C12 0C55 ; # TELUGU LETTER O, TELUGU LENGTH MARK + 0C12 0C4C ; # TELUGU LETTER O, TELUGU VOWEL SIGN AU + 0C3F 0C55 ; # TELUGU VOWEL SIGN I, TELUGU LENGTH MARK + 0C46 0C55 ; # TELUGU VOWEL SIGN E, TELUGU LENGTH MARK + 0C4A 0C55 ; # TELUGU VOWEL SIGN O, TELUGU LENGTH MARK + 0C89 0CBE ; # KANNADA LETTER U, KANNADA VOWEL SIGN AA + 0C92 0CCC ; # KANNADA LETTER O, KANNADA VOWEL SIGN AU + 0C8B 0CBE ; # KANNADA LETTER VOCALIC R, KANNADA VOWEL SIGN AA + 0D07 0D57 ; # MALAYALAM LETTER I, MALAYALAM AU LENGTH MARK + 0D09 0D57 ; # MALAYALAM LETTER U, MALAYALAM AU LENGTH MARK + 0D0E 0D46 ; # MALAYALAM LETTER E, MALAYALAM VOWEL SIGN E + 0D12 0D3E ; # MALAYALAM LETTER O, MALAYALAM VOWEL SIGN AA + 0D12 0D57 ; # MALAYALAM LETTER O, MALAYALAM AU LENGTH MARK + 0D85 0DCF ; # SINHALA LETTER AYANNA, SINHALA VOWEL SIGN AELA-PILLA + 0D85 0DD0 ; # SINHALA LETTER AYANNA, SINHALA VOWEL SIGN KETTI AEDA-PILLA + 0D85 0DD1 ; # SINHALA LETTER AYANNA, SINHALA VOWEL SIGN DIGA AEDA-PILLA + 0D8B 0DDF ; # SINHALA LETTER UYANNA, SINHALA VOWEL SIGN GAYANUKITTA + 0D8D 0DD8 ; # SINHALA LETTER IRUYANNA, SINHALA VOWEL SIGN GAETTA-PILLA + 0D8F 0DDF ; # SINHALA LETTER ILUYANNA, SINHALA VOWEL SIGN GAYANUKITTA + 0D91 0DCA ; # SINHALA LETTER EYANNA, SINHALA SIGN AL-LAKUNA + 0D91 0DD9 ; # SINHALA LETTER EYANNA, SINHALA VOWEL SIGN KOMBUVA + 0D91 0DDA ; # SINHALA LETTER EYANNA, SINHALA VOWEL SIGN DIGA KOMBUVA + 0D91 0DDC ; # SINHALA LETTER EYANNA, SINHALA VOWEL SIGN KOMBUVA HAA AELA-PILLA + 0D91 0DDD ; # SINHALA LETTER EYANNA, SINHALA VOWEL SIGN KOMBUVA HAA DIGA AELA-PILLA + 0D91 0DDD ; # SINHALA LETTER EYANNA, SINHALA VOWEL SIGN KOMBUVA HAA DIGA AELA-PILLA + 0D94 0DDF ; # SINHALA LETTER OYANNA, SINHALA VOWEL SIGN GAYANUKITTA + 11005 11038 ; # BRAHMI LETTER A, BRAHMI VOWEL SIGN AA + 1100B 1103E ; # BRAHMI LETTER VOCALIC R, BRAHMI VOWEL SIGN VOCALIC R + 1100F 11042 ; # BRAHMI LETTER E, BRAHMI VOWEL SIGN E + 11680 116AD ; # TAKRI LETTER A, TAKRI VOWEL SIGN AA + 11686 116B2 ; # TAKRI LETTER E, TAKRI VOWEL SIGN E + 11680 116B4 ; # TAKRI LETTER A, TAKRI VOWEL SIGN O + 11680 116B5 ; # TAKRI LETTER A, TAKRI VOWEL SIGN AU + 112B0 112E0 ; # KHUDAWADI LETTER A, KHUDAWADI VOWEL SIGN AA + 112B0 112E5 ; # KHUDAWADI LETTER A, KHUDAWADI VOWEL SIGN E + 112B0 112E6 ; # KHUDAWADI LETTER A, KHUDAWADI VOWEL SIGN AI + 112B0 112E7 ; # KHUDAWADI LETTER A, KHUDAWADI VOWEL SIGN O + 112B0 112E8 ; # KHUDAWADI LETTER A, KHUDAWADI VOWEL SIGN AU + 11481 114B0 ; # TIRHUTA LETTER A, TIRHUTA VOWEL SIGN AA + 114AA 114B5 ; # TIRHUTA LETTER LA, TIRHUTA VOWEL SIGN VOCALIC R + 114AA 114B6 ; # TIRHUTA LETTER LA, TIRHUTA VOWEL SIGN VOCALIC RR + 1148B 114BA ; # TIRHUTA LETTER E, TIRHUTA VOWEL SIGN SHORT E + 1148D 114BA ; # TIRHUTA LETTER O, TIRHUTA VOWEL SIGN SHORT E + 11600 11639 ; # MODI LETTER A, MODI VOWEL SIGN E + 11600 1163A ; # MODI LETTER A, MODI VOWEL SIGN AI + 11601 11639 ; # MODI LETTER AA, MODI VOWEL SIGN E + 11601 1163A ; # MODI LETTER AA, MODI VOWEL SIGN AI