Break clashing-names test function out of CldrAccess.__checkEnum()

Moving it makes it easier to document what it's up to and why, while leaving __checkEnum() easier to read; and I'm going to need it elsewhere anyway. This makes no difference to generated data. Task-number: QTBUG-94460 Change-Id: I684375bc926d5d54928fbf5b5e08978528aef487 Reviewed-by: Ievgenii Meshcheriakov <ievgenii.meshcheriakov@qt.io>
2023-08-01 11:48:37 +02:00 · 2023-08-01 11:48:37 +02:00 · e212b3633c
commit e212b3633c
parent 4f686b7b78
2 changed files with 40 additions and 17 deletions
--- a/util/locale_database/cldr.py
+++ b/util/locale_database/cldr.py
@ -16,6 +16,7 @@ from weakref import WeakValueDictionary as CacheDict
 from pathlib import Path
 from ldml import Error, Node, XmlScanner, Supplement, LocaleScanner
 from localetools import names_clash
 from qlocalexml import Locale
 class CldrReader (object):
@ -353,10 +354,7 @@ class CldrAccess (object):
                    language, script, territory, variant)
    @staticmethod
-    def __checkEnum(given, proper, scraps,
+    def __checkEnum(given, proper, scraps):
                    remap = { 'å': 'a', 'ã': 'a', 'ç': 'c', 'é': 'e', 'í': 'i', 'ü': 'u'},
                    prefix = { 'St.': 'Saint', 'U.S.': 'United States' },
                    skip = '\u02bc'):
        # Each is a { code: full name } mapping
        for code, name in given.items():
            try: right = proper[code]
@ -366,19 +364,9 @@ class CldrAccess (object):
                if code not in scraps:
                    yield name, f'[Found no CLDR name for code {code}]'
                continue
-            if name == right: continue
+            cleaned = names_clash(right, name)
-            ok = right.replace('&', 'And')
+            if cleaned:
-            for k, v in prefix.items():
+                yield name, cleaned
                if ok.startswith(k + ' '):
                    ok = v + ok[len(k):]
            while '(' in ok:
                try: f, t = ok.index('('), ok.index(')')
                except ValueError: break
                ok = ok[:f].rstrip() + ' ' + ok[t:].lstrip()
            if ''.join(ch for ch in name.lower() if not ch.isspace()) in ''.join(
                remap.get(ch, ch) for ch in ok.lower() if ch.isalpha() and ch not in skip):
                continue
            yield name, ok
    def checkEnumData(self, grumble):
        scraps = set()
--- a/util/locale_database/localetools.py
+++ b/util/locale_database/localetools.py
@ -48,6 +48,41 @@ def wrap_list(lst, perline=20):
            yield head
    return ",\n".join(", ".join(x) for x in split(lst, perline))
 def names_clash(cldr, enum):
    """True if the reader might not recognize cldr as the name of enum
    First argument, cldr, is the name CLDR gives for some language,
    script or territory; second, enum, is the name enumdata.py gives
    for it. If these are enough alike, returns None; otherwise, a
    non-empty string that results from adapting cldr to be more like
    how enumdata.py would express it."""
    if cldr == enum:
        return None
    # Some common substitutions:
    cldr = cldr.replace('&', 'And')
    prefix = { 'St.': 'Saint', 'U.S.': 'United States' }
    for k, v in prefix.items():
        if cldr.startswith(k + ' '):
            cldr = v + cldr[len(k):]
    # Chop out any parenthesised part, e.g. (Burma):
    while '(' in cldr:
        try:
            f, t = cldr.index('('), cldr.rindex(')')
        except ValueError:
            break
        cldr = cldr[:f].rstrip() + ' ' + cldr[t + 1:].lstrip()
    # Various accented letters:
    remap = { 'å': 'a', 'ã': 'a', 'ç': 'c', 'é': 'e', 'í': 'i', 'ô': 'o', 'ü': 'u'}
    skip = '\u02bc' # Punctuation for which .isalpha() is true.
    # Let cldr match (ignoring non-letters and case) any substring as enum:
    if ''.join(enum.lower().split()) in ''.join(
            remap.get(ch, ch) for ch in cldr.lower() if ch.isalpha() and ch not in skip):
        return None
    return cldr
@contextmanager
 def AtomicRenameTemporaryFile(originalLocation: Path, *, prefix: str, dir: Path):