From d333a2e0fb3a8045d2667847b8c99ee82a6bbdd2 Mon Sep 17 00:00:00 2001 From: Mike FABIAN Date: Mon, 8 Jan 2024 10:05:13 +0100 Subject: [PATCH] localedata: unicode-gen: Remove redundant \s* from regexp, fix comments --- localedata/charmaps/UTF-8 | 2 +- localedata/unicode-gen/utf8_gen.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/localedata/charmaps/UTF-8 b/localedata/charmaps/UTF-8 index 94f20d5e87..b545cc9b25 100644 --- a/localedata/charmaps/UTF-8 +++ b/localedata/charmaps/UTF-8 @@ -49858,7 +49858,7 @@ END CHARMAP % Character width according to Unicode 15.1.0. % - Default width is 1. % - Double-width characters have width 2; generated from -% "grep '^[^;]*;[WF]' EastAsianWidth.txt" +% "grep '^[^;]*;\s*[WF]' EastAsianWidth.txt" % - Non-spacing characters have width 0; generated from PropList.txt or % "grep '^[^;]*;[^;]*;[^;]*;[^;]*;NSM;' UnicodeData.txt" % - Format control characters have width 0; generated from diff --git a/localedata/unicode-gen/utf8_gen.py b/localedata/unicode-gen/utf8_gen.py index 5e77333bb4..f744e87ffc 100755 --- a/localedata/unicode-gen/utf8_gen.py +++ b/localedata/unicode-gen/utf8_gen.py @@ -204,7 +204,7 @@ def write_header_width(outfile, unicode_version): + '{:s}.\n'.format(unicode_version)) outfile.write('% - Default width is 1.\n') outfile.write('% - Double-width characters have width 2; generated from\n') - outfile.write('% "grep \'^[^;]*;[WF]\' EastAsianWidth.txt"\n') + outfile.write('% "grep \'^[^;]*;\\s*[WF]\' EastAsianWidth.txt"\n') outfile.write('% - Non-spacing characters have width 0; ' + 'generated from PropList.txt or\n') outfile.write('% "grep \'^[^;]*;[^;]*;[^;]*;[^;]*;NSM;\' ' @@ -339,8 +339,8 @@ if __name__ == "__main__": with open(ARGS.east_asian_with_file, mode='r') as EAST_ASIAN_WIDTH_FILE: EAST_ASIAN_WIDTH_LINES = [] for LINE in EAST_ASIAN_WIDTH_FILE: - # If characters from EastAasianWidth.txt which are from - # from reserved ranges (i.e. not yet assigned code points) + # If characters from EastAsianWidth.txt which are from + # reserved ranges (i.e. not yet assigned code points) # are added to the WIDTH section of the UTF-8 file, then # “make check” produces “Unknown Character” errors for # these code points because such unassigned code points @@ -350,7 +350,7 @@ if __name__ == "__main__": # the EastAsianWidth.txt file. if re.match(r'.*\.\..*', LINE): continue - if re.match(r'^[^;]*;\s*[WF]\s*', LINE): + if re.match(r'^[^;]*;\s*[WF]', LINE): EAST_ASIAN_WIDTH_LINES.append(LINE.strip()) with open(ARGS.prop_list_file, mode='r') as PROP_LIST_FILE: PROP_LIST_LINES = []