Set width of JUNGSEONG/JONGSEONG characters from UD7B0 to UD7FB to 0 [BZ #26120]

Reviewed-by: Carlos O'Donell <carlos@redhat.com>
2024-11-24 14:00:30 +00:00 · 2020-06-16 08:29:40 +02:00 · 2020-06-16 08:29:40 +02:00 · 6e540caa21
commit 6e540caa21
parent 1d21fb1061
10 changed files with 18 additions and 9 deletions
--- a/localedata/charmaps/UTF-8
+++ b/localedata/charmaps/UTF-8
@ -48920,6 +48920,8 @@ WIDTH
 <UABE8>	0
 <UABED>	0
 <UAC00>...<UD7A3>	2
+<UD7B0>...<UD7C6>	0
+<UD7CB>...<UD7FB>	0
 <UF900>...<UFA6D>	2
 <UFA70>...<UFAD9>	2
 <UFB1E>	0
--- a/localedata/locales/i18n_ctype
+++ b/localedata/locales/i18n_ctype
@ -26,7 +26,7 @@ fax       ""
 language  ""
 territory "Earth"
 revision  "13.0.0"
-date      "2020-04-14"
+date      "2020-06-25"
 category  "i18n:2012";LC_CTYPE
 END LC_IDENTIFICATION

--- a/localedata/locales/tr_TR
+++ b/localedata/locales/tr_TR
@ -43,7 +43,7 @@ fax        ""
 language   "Turkish"
 territory  "Turkey"
 revision   "1.0"
-date       "2020-04-14"
+date       "2020-06-25"

 category "i18n:2012";LC_IDENTIFICATION
 category "i18n:2012";LC_CTYPE
--- a/localedata/locales/translit_circle
+++ b/localedata/locales/translit_circle
@ -9,7 +9,7 @@ comment_char %
 % otherwise be governed by that license.

 % Transliterations of encircled characters.
-% Generated automatically from UnicodeData.txt by gen_translit_circle.py on 2020-04-14 for Unicode 13.0.0.
+% Generated automatically from UnicodeData.txt by gen_translit_circle.py on 2020-06-25 for Unicode 13.0.0.

 LC_CTYPE

--- a/localedata/locales/translit_cjk_compat
+++ b/localedata/locales/translit_cjk_compat
@ -9,7 +9,7 @@ comment_char %
 % otherwise be governed by that license.

 % Transliterations of CJK compatibility characters.
-% Generated automatically from UnicodeData.txt by gen_translit_cjk_compat.py on 2020-04-14 for Unicode 13.0.0.
+% Generated automatically from UnicodeData.txt by gen_translit_cjk_compat.py on 2020-06-25 for Unicode 13.0.0.

 LC_CTYPE

--- a/localedata/locales/translit_combining
+++ b/localedata/locales/translit_combining
@ -10,7 +10,7 @@ comment_char %

 % Transliterations that remove all combining characters (accents,
 % pronounciation marks, etc.).
-% Generated automatically from UnicodeData.txt by gen_translit_combining.py on 2020-04-14 for Unicode 13.0.0.
+% Generated automatically from UnicodeData.txt by gen_translit_combining.py on 2020-06-25 for Unicode 13.0.0.

 LC_CTYPE

--- a/localedata/locales/translit_compat
+++ b/localedata/locales/translit_compat
@ -9,7 +9,7 @@ comment_char %
 % otherwise be governed by that license.

 % Transliterations of compatibility characters and ligatures.
-% Generated automatically from UnicodeData.txt by gen_translit_compat.py on 2020-04-14 for Unicode 13.0.0.
+% Generated automatically from UnicodeData.txt by gen_translit_compat.py on 2020-06-25 for Unicode 13.0.0.

 LC_CTYPE

--- a/localedata/locales/translit_font
+++ b/localedata/locales/translit_font
@ -9,7 +9,7 @@ comment_char %
 % otherwise be governed by that license.

 % Transliterations of font equivalents.
-% Generated automatically from UnicodeData.txt by gen_translit_font.py on 2020-04-14 for Unicode 13.0.0.
+% Generated automatically from UnicodeData.txt by gen_translit_font.py on 2020-06-25 for Unicode 13.0.0.

 LC_CTYPE

--- a/localedata/locales/translit_fraction
+++ b/localedata/locales/translit_fraction
@ -9,7 +9,7 @@ comment_char %
 % otherwise be governed by that license.

 % Transliterations of fractions.
-% Generated automatically from UnicodeData.txt by gen_translit_fraction.py on 2020-04-14 for Unicode 13.0.0.
+% Generated automatically from UnicodeData.txt by gen_translit_fraction.py on 2020-06-25 for Unicode 13.0.0.
 % The replacements have been surrounded with spaces, because fractions are
 % often preceded by a decimal number and followed by a unit or a math symbol.

--- a/localedata/unicode-gen/utf8_gen.py
+++ b/localedata/unicode-gen/utf8_gen.py
@ -258,7 +258,13 @@ def process_width(outfile, ulines, elines, plines):
        if key in width_dict:
            del width_dict[key] # default width is 1
    for key in list(range(0x1160, 0x1200)):
-        width_dict[key] = 0
+        # Hangul jungseong and jongseong:
+        if key in unicode_utils.UNICODE_ATTRIBUTES:
+            width_dict[key] = 0
+    for key in list(range(0xD7B0, 0xD800)):
+        # Hangul jungseong and jongseong:
+        if key in unicode_utils.UNICODE_ATTRIBUTES:
+            width_dict[key] = 0
    for key in list(range(0x3248, 0x3250)):
        # These are “A” which means we can decide whether to treat them
        # as “W” or “N” based on context:
@ -327,6 +333,7 @@ if __name__ == "__main__":
        help='The Unicode version of the input files used.')
    ARGS = PARSER.parse_args()

+    unicode_utils.fill_attributes(ARGS.unicode_data_file)
    with open(ARGS.unicode_data_file, mode='r') as UNIDATA_FILE:
        UNICODE_DATA_LINES = UNIDATA_FILE.readlines()
    with open(ARGS.east_asian_with_file, mode='r') as EAST_ASIAN_WIDTH_FILE: