Implement binary search in QLocale's likely sub-tag lookup
Follow through on a comment from 2012: sort the likely subtag array (in the CLDR update script) and use bsearch to find entries in it. This simplifies QLocaleXmlReader.likelyMap() slightly, moving the detection of last entry to LocaleDataWriter.likelySubtags(), but requires collecting all likely sub-tag mapping pairs (rather than just passing them through from read to write via generators) in order to sort them. Change-Id: Ieb6875ccde1ddbd475ae68c0766a666ec32b7005 Reviewed-by: Mårten Nordheim <marten.nordheim@qt.io>
This commit is contained in:
parent
246ba8ca61
commit
a9e4bf7eef
@ -201,20 +201,42 @@ QLatin1String QLocalePrivate::countryToCode(QLocale::Country country)
|
||||
return QLatin1String(reinterpret_cast<const char*>(c), c[2] == 0 ? 2 : 3);
|
||||
}
|
||||
|
||||
static int cmpLikelySubtag(const void *lhs, const void *rhs)
|
||||
{
|
||||
// Must match the comparison LocaleDataWriter.likelySubtags() uses when
|
||||
// sorting, see qtbase/util/locale_database.qlocalexml2cpp.py
|
||||
const auto compare = [](int lhs, int rhs) {
|
||||
// 0 sorts after all other values; lhs and rhs are passed ushort values.
|
||||
const int huge = 0x10000;
|
||||
return (lhs ? lhs : huge) - (rhs ? rhs : huge);
|
||||
};
|
||||
const auto &left = *reinterpret_cast<const QLocaleId *>(lhs);
|
||||
const auto &right = *reinterpret_cast<const QLocaleId *>(rhs);
|
||||
if (int cmp = compare(left.language_id, right.language_id))
|
||||
return cmp;
|
||||
if (int cmp = compare(left.country_id, right.country_id))
|
||||
return cmp;
|
||||
return compare(left.script_id, right.script_id);
|
||||
}
|
||||
|
||||
// http://www.unicode.org/reports/tr35/#Likely_Subtags
|
||||
static bool addLikelySubtags(QLocaleId &localeId)
|
||||
{
|
||||
// ### optimize with bsearch
|
||||
const QLocaleId *p = likely_subtags;
|
||||
const QLocaleId *const e = p + std::size(likely_subtags);
|
||||
for ( ; p < e; p += 2) {
|
||||
if (localeId == p[0]) {
|
||||
// Array is overtly of QLocaleId but to be interpreted as of pairs, mapping
|
||||
// each even entry to the following odd entry. So search only the even
|
||||
// entries for a match and return the matching odd entry, if found.
|
||||
static_assert(std::size(likely_subtags) % 2 == 0);
|
||||
const auto *p = reinterpret_cast<const QLocaleId *>(
|
||||
bsearch(&localeId,
|
||||
likely_subtags, std::size(likely_subtags) / 2, 2 * sizeof(QLocaleId),
|
||||
cmpLikelySubtag));
|
||||
if (!p)
|
||||
return false;
|
||||
Q_ASSERT(p >= likely_subtags && p < likely_subtags + std::size(likely_subtags));
|
||||
Q_ASSERT((p - likely_subtags) % 2 == 0);
|
||||
localeId = p[1];
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
QLocaleId QLocaleId::withLikelySubtagsAdded() const
|
||||
{
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -183,12 +183,11 @@ class QLocaleXmlReader (object):
|
||||
def ids(t):
|
||||
return tuple(x[0] for x in t)
|
||||
|
||||
for i, pair in enumerate(self.__likely, 1):
|
||||
for pair in self.__likely:
|
||||
have = self.__fromNames(pair[0])
|
||||
give = self.__fromNames(pair[1])
|
||||
yield ('_'.join(tag(have)), ids(have),
|
||||
'_'.join(tag(give)), ids(give),
|
||||
i == len(self.__likely))
|
||||
'_'.join(tag(give)), ids(give))
|
||||
|
||||
def defaultMap(self):
|
||||
"""Map language and script to their default country by ID.
|
||||
|
@ -163,11 +163,26 @@ class LocaleSourceEditor (SourceFileEditor):
|
||||
|
||||
class LocaleDataWriter (LocaleSourceEditor):
|
||||
def likelySubtags(self, likely):
|
||||
# First sort likely, so that we can use binary search in C++
|
||||
# code. Although the entries are (lang, script, region), sort
|
||||
# as (lang, region, script) and sort 0 after all non-zero
|
||||
# values. This ensures that, when several mappings partially
|
||||
# match a requested locale, the one we should prefer to use
|
||||
# appears first.
|
||||
huge = 0x10000 # > any ushort; all tag values are ushort
|
||||
def keyLikely(entry):
|
||||
have = entry[1] # Numeric id triple
|
||||
return have[0] or huge, have[2] or huge, have[1] or huge # language, region, script
|
||||
likely = list(likely) # Turn generator into list so we can sort it
|
||||
likely.sort(key=keyLikely)
|
||||
|
||||
i = 0
|
||||
self.writer.write('static const QLocaleId likely_subtags[] = {\n')
|
||||
for had, have, got, give, last in likely:
|
||||
for had, have, got, give in likely:
|
||||
i += 1
|
||||
self.writer.write(' {{ {:3d}, {:3d}, {:3d} }}'.format(*have))
|
||||
self.writer.write(', {{ {:3d}, {:3d}, {:3d} }}'.format(*give))
|
||||
self.writer.write(' ' if last else ',')
|
||||
self.writer.write(' ' if i == len(likely) else ',')
|
||||
self.writer.write(' // {} -> {}\n'.format(had, got))
|
||||
self.writer.write('};\n\n')
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user