Break clashing-names test function out of CldrAccess.__checkEnum()
Moving it makes it easier to document what it's up to and why, while leaving __checkEnum() easier to read; and I'm going to need it elsewhere anyway. This makes no difference to generated data. Task-number: QTBUG-94460 Change-Id: I684375bc926d5d54928fbf5b5e08978528aef487 Reviewed-by: Ievgenii Meshcheriakov <ievgenii.meshcheriakov@qt.io>
This commit is contained in:
parent
4f686b7b78
commit
e212b3633c
@ -16,6 +16,7 @@ from weakref import WeakValueDictionary as CacheDict
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from ldml import Error, Node, XmlScanner, Supplement, LocaleScanner
|
from ldml import Error, Node, XmlScanner, Supplement, LocaleScanner
|
||||||
|
from localetools import names_clash
|
||||||
from qlocalexml import Locale
|
from qlocalexml import Locale
|
||||||
|
|
||||||
class CldrReader (object):
|
class CldrReader (object):
|
||||||
@ -353,10 +354,7 @@ class CldrAccess (object):
|
|||||||
language, script, territory, variant)
|
language, script, territory, variant)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def __checkEnum(given, proper, scraps,
|
def __checkEnum(given, proper, scraps):
|
||||||
remap = { 'å': 'a', 'ã': 'a', 'ç': 'c', 'é': 'e', 'í': 'i', 'ü': 'u'},
|
|
||||||
prefix = { 'St.': 'Saint', 'U.S.': 'United States' },
|
|
||||||
skip = '\u02bc'):
|
|
||||||
# Each is a { code: full name } mapping
|
# Each is a { code: full name } mapping
|
||||||
for code, name in given.items():
|
for code, name in given.items():
|
||||||
try: right = proper[code]
|
try: right = proper[code]
|
||||||
@ -366,19 +364,9 @@ class CldrAccess (object):
|
|||||||
if code not in scraps:
|
if code not in scraps:
|
||||||
yield name, f'[Found no CLDR name for code {code}]'
|
yield name, f'[Found no CLDR name for code {code}]'
|
||||||
continue
|
continue
|
||||||
if name == right: continue
|
cleaned = names_clash(right, name)
|
||||||
ok = right.replace('&', 'And')
|
if cleaned:
|
||||||
for k, v in prefix.items():
|
yield name, cleaned
|
||||||
if ok.startswith(k + ' '):
|
|
||||||
ok = v + ok[len(k):]
|
|
||||||
while '(' in ok:
|
|
||||||
try: f, t = ok.index('('), ok.index(')')
|
|
||||||
except ValueError: break
|
|
||||||
ok = ok[:f].rstrip() + ' ' + ok[t:].lstrip()
|
|
||||||
if ''.join(ch for ch in name.lower() if not ch.isspace()) in ''.join(
|
|
||||||
remap.get(ch, ch) for ch in ok.lower() if ch.isalpha() and ch not in skip):
|
|
||||||
continue
|
|
||||||
yield name, ok
|
|
||||||
|
|
||||||
def checkEnumData(self, grumble):
|
def checkEnumData(self, grumble):
|
||||||
scraps = set()
|
scraps = set()
|
||||||
|
@ -48,6 +48,41 @@ def wrap_list(lst, perline=20):
|
|||||||
yield head
|
yield head
|
||||||
return ",\n".join(", ".join(x) for x in split(lst, perline))
|
return ",\n".join(", ".join(x) for x in split(lst, perline))
|
||||||
|
|
||||||
|
def names_clash(cldr, enum):
|
||||||
|
"""True if the reader might not recognize cldr as the name of enum
|
||||||
|
|
||||||
|
First argument, cldr, is the name CLDR gives for some language,
|
||||||
|
script or territory; second, enum, is the name enumdata.py gives
|
||||||
|
for it. If these are enough alike, returns None; otherwise, a
|
||||||
|
non-empty string that results from adapting cldr to be more like
|
||||||
|
how enumdata.py would express it."""
|
||||||
|
if cldr == enum:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Some common substitutions:
|
||||||
|
cldr = cldr.replace('&', 'And')
|
||||||
|
prefix = { 'St.': 'Saint', 'U.S.': 'United States' }
|
||||||
|
for k, v in prefix.items():
|
||||||
|
if cldr.startswith(k + ' '):
|
||||||
|
cldr = v + cldr[len(k):]
|
||||||
|
|
||||||
|
# Chop out any parenthesised part, e.g. (Burma):
|
||||||
|
while '(' in cldr:
|
||||||
|
try:
|
||||||
|
f, t = cldr.index('('), cldr.rindex(')')
|
||||||
|
except ValueError:
|
||||||
|
break
|
||||||
|
cldr = cldr[:f].rstrip() + ' ' + cldr[t + 1:].lstrip()
|
||||||
|
|
||||||
|
# Various accented letters:
|
||||||
|
remap = { 'å': 'a', 'ã': 'a', 'ç': 'c', 'é': 'e', 'í': 'i', 'ô': 'o', 'ü': 'u'}
|
||||||
|
skip = '\u02bc' # Punctuation for which .isalpha() is true.
|
||||||
|
# Let cldr match (ignoring non-letters and case) any substring as enum:
|
||||||
|
if ''.join(enum.lower().split()) in ''.join(
|
||||||
|
remap.get(ch, ch) for ch in cldr.lower() if ch.isalpha() and ch not in skip):
|
||||||
|
return None
|
||||||
|
return cldr
|
||||||
|
|
||||||
|
|
||||||
@contextmanager
|
@contextmanager
|
||||||
def AtomicRenameTemporaryFile(originalLocation: Path, *, prefix: str, dir: Path):
|
def AtomicRenameTemporaryFile(originalLocation: Path, *, prefix: str, dir: Path):
|
||||||
|
Loading…
Reference in New Issue
Block a user