Break clashing-names test function out of CldrAccess.__checkEnum()
Moving it makes it easier to document what it's up to and why, while leaving __checkEnum() easier to read; and I'm going to need it elsewhere anyway. This makes no difference to generated data. Task-number: QTBUG-94460 Change-Id: I684375bc926d5d54928fbf5b5e08978528aef487 Reviewed-by: Ievgenii Meshcheriakov <ievgenii.meshcheriakov@qt.io>
This commit is contained in:
parent
4f686b7b78
commit
e212b3633c
@ -16,6 +16,7 @@ from weakref import WeakValueDictionary as CacheDict
|
||||
from pathlib import Path
|
||||
|
||||
from ldml import Error, Node, XmlScanner, Supplement, LocaleScanner
|
||||
from localetools import names_clash
|
||||
from qlocalexml import Locale
|
||||
|
||||
class CldrReader (object):
|
||||
@ -353,10 +354,7 @@ class CldrAccess (object):
|
||||
language, script, territory, variant)
|
||||
|
||||
@staticmethod
|
||||
def __checkEnum(given, proper, scraps,
|
||||
remap = { 'å': 'a', 'ã': 'a', 'ç': 'c', 'é': 'e', 'í': 'i', 'ü': 'u'},
|
||||
prefix = { 'St.': 'Saint', 'U.S.': 'United States' },
|
||||
skip = '\u02bc'):
|
||||
def __checkEnum(given, proper, scraps):
|
||||
# Each is a { code: full name } mapping
|
||||
for code, name in given.items():
|
||||
try: right = proper[code]
|
||||
@ -366,19 +364,9 @@ class CldrAccess (object):
|
||||
if code not in scraps:
|
||||
yield name, f'[Found no CLDR name for code {code}]'
|
||||
continue
|
||||
if name == right: continue
|
||||
ok = right.replace('&', 'And')
|
||||
for k, v in prefix.items():
|
||||
if ok.startswith(k + ' '):
|
||||
ok = v + ok[len(k):]
|
||||
while '(' in ok:
|
||||
try: f, t = ok.index('('), ok.index(')')
|
||||
except ValueError: break
|
||||
ok = ok[:f].rstrip() + ' ' + ok[t:].lstrip()
|
||||
if ''.join(ch for ch in name.lower() if not ch.isspace()) in ''.join(
|
||||
remap.get(ch, ch) for ch in ok.lower() if ch.isalpha() and ch not in skip):
|
||||
continue
|
||||
yield name, ok
|
||||
cleaned = names_clash(right, name)
|
||||
if cleaned:
|
||||
yield name, cleaned
|
||||
|
||||
def checkEnumData(self, grumble):
|
||||
scraps = set()
|
||||
|
@ -48,6 +48,41 @@ def wrap_list(lst, perline=20):
|
||||
yield head
|
||||
return ",\n".join(", ".join(x) for x in split(lst, perline))
|
||||
|
||||
def names_clash(cldr, enum):
|
||||
"""True if the reader might not recognize cldr as the name of enum
|
||||
|
||||
First argument, cldr, is the name CLDR gives for some language,
|
||||
script or territory; second, enum, is the name enumdata.py gives
|
||||
for it. If these are enough alike, returns None; otherwise, a
|
||||
non-empty string that results from adapting cldr to be more like
|
||||
how enumdata.py would express it."""
|
||||
if cldr == enum:
|
||||
return None
|
||||
|
||||
# Some common substitutions:
|
||||
cldr = cldr.replace('&', 'And')
|
||||
prefix = { 'St.': 'Saint', 'U.S.': 'United States' }
|
||||
for k, v in prefix.items():
|
||||
if cldr.startswith(k + ' '):
|
||||
cldr = v + cldr[len(k):]
|
||||
|
||||
# Chop out any parenthesised part, e.g. (Burma):
|
||||
while '(' in cldr:
|
||||
try:
|
||||
f, t = cldr.index('('), cldr.rindex(')')
|
||||
except ValueError:
|
||||
break
|
||||
cldr = cldr[:f].rstrip() + ' ' + cldr[t + 1:].lstrip()
|
||||
|
||||
# Various accented letters:
|
||||
remap = { 'å': 'a', 'ã': 'a', 'ç': 'c', 'é': 'e', 'í': 'i', 'ô': 'o', 'ü': 'u'}
|
||||
skip = '\u02bc' # Punctuation for which .isalpha() is true.
|
||||
# Let cldr match (ignoring non-letters and case) any substring as enum:
|
||||
if ''.join(enum.lower().split()) in ''.join(
|
||||
remap.get(ch, ch) for ch in cldr.lower() if ch.isalpha() and ch not in skip):
|
||||
return None
|
||||
return cldr
|
||||
|
||||
|
||||
@contextmanager
|
||||
def AtomicRenameTemporaryFile(originalLocation: Path, *, prefix: str, dir: Path):
|
||||
|
Loading…
Reference in New Issue
Block a user