Automate LC_CTYPE generation for tr_TR, update to Unicode 8.0.0 (bug 18491).

This patch makes the automation of Unicode LC_CTYPE generation also
support generating the modified LC_CTYPE used for Turkish (where case
conversions of 'i' and 'I' differ from ASCII conventions), so allowing
that to be more readily kept in sync for future Unicode updates.  The
patch includes the locale update generated by the scripts.

Tested for x86_64.

	[BZ #18491]
	* unicode-gen/unicode_utils.py (to_upper_turkish): New function.
	(to_lower_turkish): Likewise.
	* unicode-gen/gen_unicode_ctype.py (output_tables): Support
	producing output with Turkish case conversions.
	(--turkish): New command-line option.
	* unicode-gen/Makefile (GENERATED): Add tr_TR.
	(tr_TR): New rule.
	* locales/tr_TR: Regenerate LC_CTYPE.
This commit is contained in:
Joseph Myers 2015-12-11 12:45:19 +00:00
parent 77356912e8
commit 85bafe6f3d
5 changed files with 1838 additions and 1032 deletions

View File

@ -1,3 +1,15 @@
2015-12-11 Joseph Myers <joseph@codesourcery.com>
[BZ #18491]
* unicode-gen/unicode_utils.py (to_upper_turkish): New function.
(to_lower_turkish): Likewise.
* unicode-gen/gen_unicode_ctype.py (output_tables): Support
producing output with Turkish case conversions.
(--turkish): New command-line option.
* unicode-gen/Makefile (GENERATED): Add tr_TR.
(tr_TR): New rule.
* locales/tr_TR: Regenerate LC_CTYPE.
2015-12-09 Mike FABIAN <mfabian@redhat.com>
[BZ 18568]

File diff suppressed because it is too large Load Diff

View File

@ -41,7 +41,7 @@ PYTHON3 = python3
WGET = wget
DOWNLOADS = UnicodeData.txt DerivedCoreProperties.txt EastAsianWidth.txt
GENERATED = i18n UTF-8 translit_combining translit_compat translit_circle translit_cjk_compat translit_font translit_fraction
GENERATED = i18n tr_TR UTF-8 translit_combining translit_compat translit_circle translit_cjk_compat translit_font translit_fraction
REPORTS = i18n-report UTF-8-report
all: $(GENERATED)
@ -50,6 +50,7 @@ check: check-i18n check-UTF-8
install:
cp -p i18n ../locales/i18n
cp -p tr_TR ../locales/tr_TR
cp -p UTF-8 ../charmaps/UTF-8
cp -p translit_combining ../locales/translit_combining
cp -p translit_compat ../locales/translit_compat
@ -82,6 +83,13 @@ check-i18n: i18n-report
i18n-report; \
then echo manual verification required; false; else true; fi
tr_TR: UnicodeData.txt DerivedCoreProperties.txt
tr_TR: ../locales/tr_TR # Preserve non-ctype information.
tr_TR: gen_unicode_ctype.py
$(PYTHON3) gen_unicode_ctype.py -u UnicodeData.txt \
-d DerivedCoreProperties.txt -i ../locales/tr_TR -o $@ \
--unicode_version $(UNICODE_VERSION) --turkish
UTF-8: UnicodeData.txt EastAsianWidth.txt
UTF-8: utf8_gen.py
$(PYTHON3) utf8_gen.py UnicodeData.txt EastAsianWidth.txt

View File

@ -196,7 +196,7 @@ def output_tail(i18n_file, tail=''):
else:
i18n_file.write('END LC_CTYPE\n')
def output_tables(i18n_file, unicode_version):
def output_tables(i18n_file, unicode_version, turkish):
'''Write the new LC_CTYPE character classes to the output file'''
i18n_file.write('% The following is the 14652 i18n fdcc-set '
+ 'LC_CTYPE category.\n')
@ -240,8 +240,14 @@ def output_tables(i18n_file, unicode_version):
+ '(sections 7.25.2.1.12 and 6.4.4.1).\n')
output_charclass(i18n_file, 'xdigit', unicode_utils.is_xdigit)
output_charclass(i18n_file, 'blank', unicode_utils.is_blank)
output_charmap(i18n_file, 'toupper', unicode_utils.to_upper)
output_charmap(i18n_file, 'tolower', unicode_utils.to_lower)
if turkish:
i18n_file.write('% The case conversions reflect '
+ 'Turkish conventions.\n')
output_charmap(i18n_file, 'toupper', unicode_utils.to_upper_turkish)
output_charmap(i18n_file, 'tolower', unicode_utils.to_lower_turkish)
else:
output_charmap(i18n_file, 'toupper', unicode_utils.to_upper)
output_charmap(i18n_file, 'tolower', unicode_utils.to_lower)
output_charmap(i18n_file, 'map "totitle";', unicode_utils.to_title)
i18n_file.write('% The "combining" class reflects ISO/IEC 10646-1 '
+ 'annex B.1\n')
@ -298,6 +304,10 @@ if __name__ == "__main__":
required=True,
type=str,
help='The Unicode version of the input files used.')
PARSER.add_argument(
'--turkish',
action='store_true',
help='Use Turkish case conversions.')
ARGS = PARSER.parse_args()
unicode_utils.fill_attributes(
@ -310,5 +320,5 @@ if __name__ == "__main__":
(HEAD, TAIL) = read_input_file(ARGS.input_file)
with open(ARGS.output_file, mode='w') as I18N_FILE:
output_head(I18N_FILE, ARGS.unicode_version, head=HEAD)
output_tables(I18N_FILE, ARGS.unicode_version)
output_tables(I18N_FILE, ARGS.unicode_version, ARGS.turkish)
output_tail(I18N_FILE, tail=TAIL)

View File

@ -220,6 +220,20 @@ def to_lower(code_point):
else:
return code_point
def to_upper_turkish(code_point):
'''Returns the code point of the Turkish uppercase version
of the given code point'''
if code_point == 0x0069:
return 0x0130
return to_upper(code_point)
def to_lower_turkish(code_point):
'''Returns the code point of the Turkish lowercase version
of the given code point'''
if code_point == 0x0049:
return 0x0131
return to_lower(code_point)
def to_title(code_point):
'''Returns the code point of the titlecase version
of the given code point'''