Break clashing-names test function out of CldrAccess.__checkEnum()

Moving it makes it easier to document what it's up to and why, while
leaving __checkEnum() easier to read; and I'm going to need it
elsewhere anyway. This makes no difference to generated data.

Task-number: QTBUG-94460
Change-Id: I684375bc926d5d54928fbf5b5e08978528aef487
Reviewed-by: Ievgenii Meshcheriakov <ievgenii.meshcheriakov@qt.io>
This commit is contained in:
Edward Welbourne 2023-08-01 11:48:37 +02:00
parent 4f686b7b78
commit e212b3633c
2 changed files with 40 additions and 17 deletions

View File

@ -16,6 +16,7 @@ from weakref import WeakValueDictionary as CacheDict
from pathlib import Path from pathlib import Path
from ldml import Error, Node, XmlScanner, Supplement, LocaleScanner from ldml import Error, Node, XmlScanner, Supplement, LocaleScanner
from localetools import names_clash
from qlocalexml import Locale from qlocalexml import Locale
class CldrReader (object): class CldrReader (object):
@ -353,10 +354,7 @@ class CldrAccess (object):
language, script, territory, variant) language, script, territory, variant)
@staticmethod @staticmethod
def __checkEnum(given, proper, scraps, def __checkEnum(given, proper, scraps):
remap = { 'å': 'a', 'ã': 'a', 'ç': 'c', 'é': 'e', 'í': 'i', 'ü': 'u'},
prefix = { 'St.': 'Saint', 'U.S.': 'United States' },
skip = '\u02bc'):
# Each is a { code: full name } mapping # Each is a { code: full name } mapping
for code, name in given.items(): for code, name in given.items():
try: right = proper[code] try: right = proper[code]
@ -366,19 +364,9 @@ class CldrAccess (object):
if code not in scraps: if code not in scraps:
yield name, f'[Found no CLDR name for code {code}]' yield name, f'[Found no CLDR name for code {code}]'
continue continue
if name == right: continue cleaned = names_clash(right, name)
ok = right.replace('&', 'And') if cleaned:
for k, v in prefix.items(): yield name, cleaned
if ok.startswith(k + ' '):
ok = v + ok[len(k):]
while '(' in ok:
try: f, t = ok.index('('), ok.index(')')
except ValueError: break
ok = ok[:f].rstrip() + ' ' + ok[t:].lstrip()
if ''.join(ch for ch in name.lower() if not ch.isspace()) in ''.join(
remap.get(ch, ch) for ch in ok.lower() if ch.isalpha() and ch not in skip):
continue
yield name, ok
def checkEnumData(self, grumble): def checkEnumData(self, grumble):
scraps = set() scraps = set()

View File

@ -48,6 +48,41 @@ def wrap_list(lst, perline=20):
yield head yield head
return ",\n".join(", ".join(x) for x in split(lst, perline)) return ",\n".join(", ".join(x) for x in split(lst, perline))
def names_clash(cldr, enum):
"""True if the reader might not recognize cldr as the name of enum
First argument, cldr, is the name CLDR gives for some language,
script or territory; second, enum, is the name enumdata.py gives
for it. If these are enough alike, returns None; otherwise, a
non-empty string that results from adapting cldr to be more like
how enumdata.py would express it."""
if cldr == enum:
return None
# Some common substitutions:
cldr = cldr.replace('&', 'And')
prefix = { 'St.': 'Saint', 'U.S.': 'United States' }
for k, v in prefix.items():
if cldr.startswith(k + ' '):
cldr = v + cldr[len(k):]
# Chop out any parenthesised part, e.g. (Burma):
while '(' in cldr:
try:
f, t = cldr.index('('), cldr.rindex(')')
except ValueError:
break
cldr = cldr[:f].rstrip() + ' ' + cldr[t + 1:].lstrip()
# Various accented letters:
remap = { 'å': 'a', 'ã': 'a', 'ç': 'c', 'é': 'e', 'í': 'i', 'ô': 'o', 'ü': 'u'}
skip = '\u02bc' # Punctuation for which .isalpha() is true.
# Let cldr match (ignoring non-letters and case) any substring as enum:
if ''.join(enum.lower().split()) in ''.join(
remap.get(ch, ch) for ch in cldr.lower() if ch.isalpha() and ch not in skip):
return None
return cldr
@contextmanager @contextmanager
def AtomicRenameTemporaryFile(originalLocation: Path, *, prefix: str, dir: Path): def AtomicRenameTemporaryFile(originalLocation: Path, *, prefix: str, dir: Path):