mirror of
https://sourceware.org/git/glibc.git
synced 2024-11-08 14:20:07 +00:00
a7b5eb821d
Unicode 16.0.0 Support: Character encoding, character type info, and transliteration tables are all updated to Unicode 16.0.0, using the generator scripts contributed by Mike FABIAN (Red Hat). Changes in CHARMAP and WIDTH: Total added characters in newly generated CHARMAP: 5185 Total removed characters in newly generated WIDTH: 1 Total added characters in newly generated WIDTH: 170 The removed character from WIDTH is U+1171E AHOM CONSONANT SIGN MEDIAL RA. It changed like this: UnicodeData.txt 15.1.0: 1171E;AHOM CONSONANT SIGN MEDIAL RA;Mn;0;NSM;;;;;N;;;;; UnicodeData.txt 16.0.0: 1171E;AHOM CONSONANT SIGN MEDIAL RA;Mc;0;L;;;;;N;;;;; EastAsianWidth.txt 15.1.0: 1171D..1171F ; N # Mn [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA EastAsianWidth.txt 16.0.0: 1171E ; N # Mc AHOM CONSONANT SIGN MEDIAL RA I.e it changed from Mn (Mark Nonspacing) to Mc (Mark Spacing combining). So it should now have width 1 instead of 0, therefore it is OK that it was removed from WIDTH, characters not in WIDTH get width 1 by default. Nothing suspicious when browsing the list of the 170 added characters. Changes in ctype: alpha: Added 4452 characters in new ctype which were not in old ctype combining: Added 51 characters in new ctype which were not in old ctype combining_level3: Added 43 characters in new ctype which were not in old ctype graph: Added 5185 characters in new ctype which were not in old ctype lower: Added 25 characters in new ctype which were not in old ctype print: Added 5185 characters in new ctype which were not in old ctype punct: Missing 33 characters of old ctype in new ctype punct: Added 766 characters in new ctype which were not in old ctype tolower: Added 27 characters in new ctype which were not in old ctype totitle: Added 27 characters in new ctype which were not in old ctype toupper: Added 27 characters in new ctype which were not in old ctype upper: Added 27 characters in new ctype which were not in old ctype Nothing suspicous in the additions. About the 33 characters removed from `punct`: U+0363 - U+036F are identical in UnicodeData.txt. Difference in DerivedCoreProperties.txt: DerivedCoreProperties.txt 15.1.0: not there. DerivedCoreProperties.txt 16.0.0: 0363..036F ; Alphabetic # Mn [13] COMBINING LATIN SMALL LETTER A..COMBINING LATIN SMALL LETTER X So that’s the reason why they are added to `alpha` and removed from `punct`. Same for U+1DD3 - U+1DE6, they are identical in UnicodeData.txt but there is a difference in DerivedCoreProperties.txt: DerivedCoreProperties.txt 15.1.0: 1DE7..1DF4 ; Alphabetic # Mn [14] COMBINING LATIN SMALL LETTER ALPHA..COMBINING LATIN SMALL LETTER U WITH DIAERESIS DerivedCoreProperties.txt 16.0.0: 1DD3..1DF4 ; Alphabetic # Mn [34] COMBINING LATIN SMALL LETTER FLATTENED OPEN A ABOVE..COMBINING LATIN SMALL LETTER U WITH DIAERESIS So they became `Alphabetic` and were thus added to `alpha` and removed from `punct`. Resolves: BZ #32168 Reviewed-by: Carlos O'Donell <carlos@redhat.com>
153 lines
5.5 KiB
Makefile
153 lines
5.5 KiB
Makefile
# Copyright (C) 2015-2024 Free Software Foundation, Inc
|
|
# Copyright The GNU Toolchain Authors.
|
|
# This file is part of the GNU C Library.
|
|
|
|
# The GNU C Library is free software; you can redistribute it and/or
|
|
# modify it under the terms of the GNU Lesser General Public
|
|
# License as published by the Free Software Foundation; either
|
|
# version 2.1 of the License, or (at your option) any later version.
|
|
|
|
# The GNU C Library is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
# Lesser General Public License for more details.
|
|
|
|
# You should have received a copy of the GNU Lesser General Public
|
|
# License along with the GNU C Library; if not, see
|
|
# <https://www.gnu.org/licenses/>.
|
|
|
|
# Makefile for generating and updating Unicode-extracted files.
|
|
|
|
|
|
# This Makefile is NOT used as part of the GNU libc build. It needs
|
|
# to be run manually, within the source tree, at Unicode upgrades
|
|
# (change UNICODE_VERSION below), to update ../locales/i18n_ctype ctype
|
|
# information (part of the file is preserved, so don't wipe it all
|
|
# out), and ../charmaps/UTF-8.
|
|
|
|
# Use make all to generate the files used in the glibc build out of
|
|
# the original Unicode files; make check to verify that they are what
|
|
# we expect; make install to copy them to the location expected by the
|
|
# glibc build; and make clean to remove all generated files.
|
|
|
|
# We keep a local copy of the downloaded Unicode files, to avoid
|
|
# running afoul of the LGPL corresponding sources requirements, even
|
|
# though it's not clear that they are preferred over the generated
|
|
# files for making modifications.
|
|
|
|
|
|
UNICODE_VERSION = 16.0.0
|
|
|
|
PYTHON3 = python3
|
|
WGET = wget
|
|
|
|
DOWNLOADS = UnicodeData.txt DerivedCoreProperties.txt EastAsianWidth.txt HangulSyllableType.txt
|
|
LICENSE = unicode-license.txt
|
|
GENERATED = i18n_ctype tr_TR UTF-8 translit_combining translit_compat translit_circle translit_cjk_compat translit_font translit_fraction
|
|
REPORTS = i18n_ctype-report UTF-8-report
|
|
|
|
all: $(GENERATED)
|
|
|
|
check: check-i18n_ctype check-UTF-8
|
|
|
|
install:
|
|
cp -p i18n_ctype ../locales/i18n_ctype
|
|
cp -p tr_TR ../locales/tr_TR
|
|
cp -p UTF-8 ../charmaps/UTF-8
|
|
cp -p translit_combining ../locales/translit_combining
|
|
cp -p translit_compat ../locales/translit_compat
|
|
cp -p translit_circle ../locales/translit_circle
|
|
cp -p translit_cjk_compat ../locales/translit_cjk_compat
|
|
cp -p translit_font ../locales/translit_font
|
|
cp -p translit_fraction ../locales/translit_fraction
|
|
|
|
clean: mostlyclean
|
|
-rm -rf __pycache__
|
|
mostlyclean:
|
|
-rm -f $(REPORTS) $(GENERATED)
|
|
|
|
.PHONY: all check clean mostlyclean install
|
|
|
|
i18n_ctype: UnicodeData.txt DerivedCoreProperties.txt
|
|
i18n_ctype: ../locales/i18n_ctype # Preserve non-ctype information.
|
|
i18n_ctype: gen_unicode_ctype.py
|
|
$(PYTHON3) gen_unicode_ctype.py -u UnicodeData.txt \
|
|
-d DerivedCoreProperties.txt -i ../locales/i18n_ctype -o $@ \
|
|
--unicode_version $(UNICODE_VERSION)
|
|
|
|
i18n_ctype-report: i18n_ctype ../locales/i18n_ctype
|
|
i18n_ctype-report: ctype_compatibility.py ctype_compatibility_test_cases.py
|
|
$(PYTHON3) ./ctype_compatibility.py -o ../locales/i18n_ctype \
|
|
-n i18n_ctype -a -m > $@
|
|
|
|
check-i18n_ctype: i18n_ctype-report
|
|
@if grep '\(Missing\|Added\) [^0]\|^Number of errors[^=]* = [^0]' \
|
|
i18n_ctype-report; \
|
|
then echo manual verification required; false; else true; fi
|
|
|
|
tr_TR: UnicodeData.txt DerivedCoreProperties.txt
|
|
tr_TR: ../locales/tr_TR # Preserve non-ctype information.
|
|
tr_TR: gen_unicode_ctype.py
|
|
$(PYTHON3) gen_unicode_ctype.py -u UnicodeData.txt \
|
|
-d DerivedCoreProperties.txt -i ../locales/tr_TR -o $@ \
|
|
--unicode_version $(UNICODE_VERSION) --turkish
|
|
|
|
UTF-8: UnicodeData.txt DerivedCoreProperties.txt EastAsianWidth.txt HangulSyllableType.txt
|
|
UTF-8: utf8_gen.py
|
|
$(PYTHON3) utf8_gen.py -u UnicodeData.txt \
|
|
-d DerivedCoreProperties.txt \
|
|
-e EastAsianWidth.txt \
|
|
-k HangulSyllableType.txt \
|
|
--unicode_version $(UNICODE_VERSION)
|
|
|
|
UTF-8-report: UTF-8 ../charmaps/UTF-8
|
|
UTF-8-report: utf8_compatibility.py
|
|
$(PYTHON3) ./utf8_compatibility.py -u UnicodeData.txt \
|
|
-e EastAsianWidth.txt -o ../charmaps/UTF-8 \
|
|
-n UTF-8 -a -m -c > $@
|
|
|
|
check-UTF-8: UTF-8-report
|
|
@if grep '^Total.*: [^0]' UTF-8-report; \
|
|
then echo manual verification required; false; else true; fi
|
|
|
|
translit_combining: UnicodeData.txt
|
|
translit_combining: gen_translit_combining.py
|
|
$(PYTHON3) ./gen_translit_combining.py -u UnicodeData.txt \
|
|
-o $@ --unicode_version $(UNICODE_VERSION)
|
|
|
|
translit_compat: UnicodeData.txt
|
|
translit_compat: gen_translit_compat.py
|
|
$(PYTHON3) ./gen_translit_compat.py -u UnicodeData.txt \
|
|
-o $@ --unicode_version $(UNICODE_VERSION)
|
|
|
|
translit_circle: UnicodeData.txt
|
|
translit_circle: gen_translit_circle.py
|
|
$(PYTHON3) ./gen_translit_circle.py -u UnicodeData.txt \
|
|
-o $@ --unicode_version $(UNICODE_VERSION)
|
|
|
|
translit_cjk_compat: UnicodeData.txt
|
|
translit_cjk_compat: gen_translit_cjk_compat.py
|
|
$(PYTHON3) ./gen_translit_cjk_compat.py -u UnicodeData.txt \
|
|
-o $@ --unicode_version $(UNICODE_VERSION)
|
|
|
|
translit_font: UnicodeData.txt
|
|
translit_font: gen_translit_font.py
|
|
$(PYTHON3) ./gen_translit_font.py -u UnicodeData.txt \
|
|
-o $@ --unicode_version $(UNICODE_VERSION)
|
|
|
|
translit_fraction: UnicodeData.txt
|
|
translit_fraction: gen_translit_fraction.py
|
|
$(PYTHON3) ./gen_translit_fraction.py -u UnicodeData.txt \
|
|
-o $@ --unicode_version $(UNICODE_VERSION)
|
|
|
|
.PHONY: downloads clean-downloads
|
|
downloads: $(DOWNLOADS) $(LICENSE)
|
|
clean-downloads:
|
|
-rm -f $(DOWNLOADS) $(LICENSE)
|
|
|
|
$(DOWNLOADS):
|
|
$(WGET) http://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/$@
|
|
|
|
$(LICENSE):
|
|
$(WGET) https://www.unicode.org/license.txt --output-document=$@
|