Use likelySubtags to instantiate a locale id from it's short form

...just like described in http://www.unicode.org/reports/tr35/#Likely_Subtags.
This is much more effective than current "guessing" algorithm
+ makes it possible to instantiate a locale by the script or territory code only.

Change-Id: I674f8476e65b01c56960b6e83a1a346df0715274
Reviewed-by: Lars Knoll <lars.knoll@digia.com>
This commit is contained in:
Konstantin Ritt 2012-11-21 06:08:24 +02:00 committed by The Qt Project
parent e7c79face6
commit fe8962d3a5
7 changed files with 1521 additions and 1155 deletions

View File

@ -208,17 +208,94 @@ QString QLocalePrivate::countryCode() const
return code;
}
QString QLocalePrivate::bcp47Name() const
// http://www.unicode.org/reports/tr35/#Likely_Subtags
static bool addLikelySubtags(QLocaleId &localeId)
{
if (m_data->m_language_id == QLocale::AnyLanguage)
// ### optimize with bsearch
const int likely_subtags_count = sizeof(likely_subtags) / sizeof(likely_subtags[0]);
const QLocaleId *p = likely_subtags;
const QLocaleId *const e = p + likely_subtags_count;
for ( ; p < e; p += 2) {
if (localeId == p[0]) {
localeId = p[1];
return true;
}
}
return false;
}
QLocaleId QLocaleId::withLikelySubtagsAdded() const
{
// language_script_region
if (language_id || script_id || country_id) {
QLocaleId id = QLocaleId::fromIds(language_id, script_id, country_id);
if (addLikelySubtags(id))
return id;
}
// language_script
if (country_id) {
QLocaleId id = QLocaleId::fromIds(language_id, script_id, 0);
if (addLikelySubtags(id)) {
id.country_id = country_id;
return id;
}
}
// language_region
if (script_id) {
QLocaleId id = QLocaleId::fromIds(language_id, 0, country_id);
if (addLikelySubtags(id)) {
id.script_id = script_id;
return id;
}
}
// language
if (script_id && country_id) {
QLocaleId id = QLocaleId::fromIds(language_id, 0, 0);
if (addLikelySubtags(id)) {
id.script_id = script_id;
id.country_id = country_id;
return id;
}
}
return *this;
}
QLocaleId QLocaleId::withLikelySubtagsRemoved() const
{
QLocaleId max = withLikelySubtagsAdded();
// language
{
QLocaleId id = QLocaleId::fromIds(language_id, 0, 0);
if (id.withLikelySubtagsAdded() == max)
return id;
}
// language_region
if (country_id) {
QLocaleId id = QLocaleId::fromIds(language_id, 0, country_id);
if (id.withLikelySubtagsAdded() == max)
return id;
}
// language_script
if (script_id) {
QLocaleId id = QLocaleId::fromIds(language_id, script_id, 0);
if (id.withLikelySubtagsAdded() == max)
return id;
}
return max;
}
QString QLocaleId::bcp47Name() const
{
if (language_id == QLocale::AnyLanguage)
return QString();
if (m_data->m_language_id == QLocale::C)
if (language_id == QLocale::C)
return QStringLiteral("C");
const unsigned char *lang = language_code_list + 3*(uint(m_data->m_language_id));
const unsigned char *lang = language_code_list + 3*uint(language_id);
const unsigned char *script =
(m_data->m_script_id != QLocale::AnyScript ? script_code_list + 4*(uint(m_data->m_script_id)) : 0);
(script_id != QLocale::AnyScript ? script_code_list + 4*uint(script_id) : 0);
const unsigned char *country =
(m_data->m_country_id != QLocale::AnyCountry ? country_code_list + 3*(uint(m_data->m_country_id)) : 0);
(country_id != QLocale::AnyCountry ? country_code_list + 3*uint(country_id) : 0);
char len = (lang[2] != 0 ? 3 : 2) + (script ? 4+1 : 0) + (country ? (country[2] != 0 ? 3 : 2)+1 : 0);
QString name(len, Qt::Uninitialized);
QChar *uc = name.data();
@ -243,42 +320,59 @@ QString QLocalePrivate::bcp47Name() const
return name;
}
QString QLocalePrivate::bcp47Name() const
{
if (m_data->m_language_id == QLocale::AnyLanguage)
return QString();
if (m_data->m_language_id == QLocale::C)
return QStringLiteral("C");
QLocaleId localeId = QLocaleId::fromIds(m_data->m_language_id, m_data->m_script_id, m_data->m_country_id);
return localeId.withLikelySubtagsRemoved().bcp47Name();
}
const QLocaleData *QLocaleData::findLocaleData(QLocale::Language language, QLocale::Script script, QLocale::Country country)
{
const unsigned language_id = language;
const unsigned script_id = script;
const unsigned country_id = country;
QLocaleId localeId = QLocaleId::fromIds(language, script, country);
localeId = localeId.withLikelySubtagsAdded();
uint idx = locale_index[language_id];
uint idx = locale_index[localeId.language_id];
const QLocaleData *data = locale_data + idx;
if (idx == 0) // default language has no associated country
return data;
if (script == QLocale::AnyScript && country == QLocale::AnyCountry)
return data;
Q_ASSERT(data->m_language_id == localeId.language_id);
Q_ASSERT(data->m_language_id == language_id);
if (country == QLocale::AnyCountry) {
while (data->m_language_id == language_id && data->m_script_id != script_id)
++data;
if (data->m_language_id == language_id && data->m_script_id == script_id)
return data;
} else if (script == QLocale::AnyScript) {
while (data->m_language_id == language_id) {
if (data->m_script_id == script_id && data->m_country_id == country_id)
return data;
++data;
}
} else {
if (localeId.script_id != QLocale::AnyScript && localeId.country_id != QLocale::AnyCountry) {
// both script and country are explicitly specified
while (data->m_language_id == language_id) {
if (data->m_script_id == script_id && data->m_country_id == country_id)
do {
if (data->m_script_id == localeId.script_id && data->m_country_id == localeId.country_id)
return data;
++data;
} while (data->m_language_id == localeId.language_id);
// no match; try again with default script
localeId.script_id = QLocale::AnyScript;
data = locale_data + idx;
}
if (localeId.script_id == QLocale::AnyScript && localeId.country_id == QLocale::AnyCountry)
return data;
if (localeId.script_id == QLocale::AnyScript) {
do {
if (data->m_country_id == localeId.country_id)
return data;
++data;
} while (data->m_language_id == localeId.language_id);
} else if (localeId.country_id == QLocale::AnyCountry) {
do {
if (data->m_script_id == localeId.script_id)
return data;
++data;
} while (data->m_language_id == localeId.language_id);
}
return locale_data + idx;
@ -3227,7 +3321,7 @@ QString QLocale::toCurrencyString(double value, const QString &symbol) const
\since 4.8
Returns an ordered list of locale names for translation purposes in
preference order.
preference order (like "en", "en-US", "en-Latn-US").
The return value represents locale names that the user expects to see the
UI translation in.
@ -3251,7 +3345,20 @@ QStringList QLocale::uiLanguages() const
}
}
#endif
return QStringList(bcp47Name());
QLocaleId id = QLocaleId::fromIds(d->m_data->m_language_id, d->m_data->m_script_id, d->m_data->m_country_id);
const QLocaleId max = id.withLikelySubtagsAdded();
const QLocaleId min = max.withLikelySubtagsRemoved();
QStringList uiLanguages;
uiLanguages.append(min.bcp47Name());
if (id.script_id) {
id.script_id = 0;
if (id != min && id.withLikelySubtagsAdded() == max)
uiLanguages.append(id.bcp47Name());
}
if (max != min && max != id)
uiLanguages.append(max.bcp47Name());
return uiLanguages;
}
/*!

File diff suppressed because it is too large Load Diff

View File

@ -138,6 +138,28 @@ namespace QIcu {
#endif
struct QLocaleId
{
// bypass constructors
static inline QLocaleId fromIds(ushort language, ushort script, ushort country)
{
const QLocaleId localeId = { language, script, country };
return localeId;
}
inline bool operator==(QLocaleId other) const
{ return language_id == other.language_id && script_id == other.script_id && country_id == other.country_id; }
inline bool operator!=(QLocaleId other) const
{ return !operator==(other); }
QLocaleId withLikelySubtagsAdded() const;
QLocaleId withLikelySubtagsRemoved() const;
QString bcp47Name() const;
ushort language_id, script_id, country_id;
};
struct QLocaleData
{
public:

View File

@ -1,7 +1,7 @@
CONFIG += console testcase
CONFIG += parallel_test
CONFIG -= app_bundle
QT = core testlib
QT = core testlib core-private
embedded: QT += gui
SOURCES = ../tst_qlocale.cpp

View File

@ -120,6 +120,7 @@ private slots:
void toDateTime();
void negativeNumbers();
void numberOptions();
void testNames_data();
void testNames();
void dayName_data();
void dayName();
@ -364,16 +365,19 @@ void tst_QLocale::ctor()
+ "/" + QLocale::countryToString(l.country())).toLatin1().constData()); \
}
TEST_CTOR("zh_CN", Chinese, AnyScript, China)
TEST_CTOR("zh_CN", Chinese, SimplifiedHanScript, China)
TEST_CTOR("zh_Hans_CN", Chinese, SimplifiedHanScript, China)
TEST_CTOR("zh_Hans", Chinese, SimplifiedHanScript, China)
TEST_CTOR("zh_Hant", Chinese, TraditionalHanScript, HongKong)
TEST_CTOR("zh_Hant", Chinese, TraditionalHanScript, Taiwan)
TEST_CTOR("zh_Hans_MO", Chinese, SimplifiedHanScript, Macau)
TEST_CTOR("zh_Hant_MO", Chinese, TraditionalHanScript, Macau)
TEST_CTOR("az_Latn_AZ", Azerbaijani, LatinScript, Azerbaijan)
TEST_CTOR("ha_Arab_NG", Hausa, ArabicScript, Nigeria)
TEST_CTOR("ha_Latn_NG", Hausa, LatinScript, Nigeria)
TEST_CTOR("ru", Russian, CyrillicScript, RussianFederation)
TEST_CTOR("ru_Cyrl", Russian, CyrillicScript, RussianFederation)
#undef TEST_CTOR
}
@ -1410,370 +1414,62 @@ void tst_QLocale::negativeNumbers()
QCOMPARE(i, -1000000);
}
struct LocaleListItem
#include <private/qlocale_p.h>
#include <private/qlocale_data_p.h>
static const int locale_data_count = sizeof(locale_data)/sizeof(locale_data[0]);
void tst_QLocale::testNames_data()
{
int language;
int country;
};
QTest::addColumn<int>("language");
QTest::addColumn<int>("country");
// first two rows of locale_data[] in qlocale_data_p.h
static const LocaleListItem g_locale_list[] = {
{ 1, 0}, // C/AnyCountry
{ 3, 69}, // Afan/Ethiopia
{ 3, 111}, // Afan/Kenya
{ 4, 59}, // Afar/Djibouti
{ 4, 67}, // Afar/Eritrea
{ 4, 69}, // Afar/Ethiopia
{ 5, 195}, // Afrikaans/SouthAfrica
{ 5, 148}, // Afrikaans/Namibia
{ 6, 2}, // Albanian/Albania
{ 7, 69}, // Amharic/Ethiopia
{ 8, 186}, // Arabic/SaudiArabia
{ 8, 3}, // Arabic/Algeria
{ 8, 17}, // Arabic/Bahrain
{ 8, 64}, // Arabic/Egypt
{ 8, 103}, // Arabic/Iraq
{ 8, 109}, // Arabic/Jordan
{ 8, 115}, // Arabic/Kuwait
{ 8, 119}, // Arabic/Lebanon
{ 8, 122}, // Arabic/LibyanArabJamahiriya
{ 8, 145}, // Arabic/Morocco
{ 8, 162}, // Arabic/Oman
{ 8, 175}, // Arabic/Qatar
{ 8, 201}, // Arabic/Sudan
{ 8, 207}, // Arabic/SyrianArabRepublic
{ 8, 216}, // Arabic/Tunisia
{ 8, 223}, // Arabic/UnitedArabEmirates
{ 8, 237}, // Arabic/Yemen
{ 9, 11}, // Armenian/Armenia
{ 10, 100}, // Assamese/India
{ 12, 15}, // Azerbaijani/Azerbaijan
{ 12, 102}, // Azerbaijani/Iran
{ 14, 197}, // Basque/Spain
{ 15, 18}, // Bengali/Bangladesh
{ 15, 100}, // Bengali/India
{ 16, 25}, // Bhutani/Bhutan
{ 19, 74}, // Breton/France
{ 20, 33}, // Bulgarian/Bulgaria
{ 21, 147}, // Burmese/Myanmar
{ 22, 20}, // Byelorussian/Belarus
{ 23, 36}, // Cambodian/Cambodia
{ 24, 197}, // Catalan/Spain
{ 25, 44}, // Chinese/China
{ 25, 97}, // Chinese/HongKong
{ 25, 126}, // Chinese/Macau
{ 25, 190}, // Chinese/Singapore
{ 25, 208}, // Chinese/Taiwan
{ 27, 54}, // Croatian/Croatia
{ 28, 57}, // Czech/CzechRepublic
{ 29, 58}, // Danish/Denmark
{ 30, 151}, // Dutch/Netherlands
{ 30, 21}, // Dutch/Belgium
{ 31, 225}, // English/UnitedStates
{ 31, 4}, // English/AmericanSamoa
{ 31, 13}, // English/Australia
{ 31, 21}, // English/Belgium
{ 31, 22}, // English/Belize
{ 31, 28}, // English/Botswana
{ 31, 38}, // English/Canada
{ 31, 89}, // English/Guam
{ 31, 97}, // English/HongKong
{ 31, 100}, // English/India
{ 31, 104}, // English/Ireland
{ 31, 107}, // English/Jamaica
{ 31, 133}, // English/Malta
{ 31, 134}, // English/MarshallIslands
{ 31, 137}, // English/Mauritius
{ 31, 148}, // English/Namibia
{ 31, 154}, // English/NewZealand
{ 31, 160}, // English/NorthernMarianaIslands
{ 31, 163}, // English/Pakistan
{ 31, 170}, // English/Philippines
{ 31, 190}, // English/Singapore
{ 31, 195}, // English/SouthAfrica
{ 31, 215}, // English/TrinidadAndTobago
{ 31, 224}, // English/UnitedKingdom
{ 31, 226}, // English/UnitedStatesMinorOutlyingIslands
{ 31, 234}, // English/USVirginIslands
{ 31, 240}, // English/Zimbabwe
{ 33, 68}, // Estonian/Estonia
{ 34, 71}, // Faroese/FaroeIslands
{ 36, 73}, // Finnish/Finland
{ 37, 74}, // French/France
{ 37, 21}, // French/Belgium
{ 37, 37}, // French/Cameroon
{ 37, 38}, // French/Canada
{ 37, 41}, // French/CentralAfricanRepublic
{ 37, 53}, // French/IvoryCoast
{ 37, 88}, // French/Guadeloupe
{ 37, 91}, // French/Guinea
{ 37, 125}, // French/Luxembourg
{ 37, 128}, // French/Madagascar
{ 37, 132}, // French/Mali
{ 37, 135}, // French/Martinique
{ 37, 142}, // French/Monaco
{ 37, 156}, // French/Niger
{ 37, 176}, // French/Reunion
{ 37, 187}, // French/Senegal
{ 37, 206}, // French/Switzerland
{ 37, 244}, // French/Saint Barthelemy
{ 37, 245}, // French/Saint Martin
{ 40, 197}, // Galician/Spain
{ 41, 81}, // Georgian/Georgia
{ 42, 82}, // German/Germany
{ 42, 14}, // German/Austria
{ 42, 21}, // German/Belgium
{ 42, 123}, // German/Liechtenstein
{ 42, 125}, // German/Luxembourg
{ 42, 206}, // German/Switzerland
{ 43, 85}, // Greek/Greece
{ 43, 56}, // Greek/Cyprus
{ 44, 86}, // Greenlandic/Greenland
{ 46, 100}, // Gujarati/India
{ 47, 83}, // Hausa/Ghana
{ 47, 156}, // Hausa/Niger
{ 47, 157}, // Hausa/Nigeria
{ 47, 201}, // Hausa/Sudan
{ 48, 105}, // Hebrew/Israel
{ 49, 100}, // Hindi/India
{ 50, 98}, // Hungarian/Hungary
{ 51, 99}, // Icelandic/Iceland
{ 52, 101}, // Indonesian/Indonesia
{ 57, 104}, // Irish/Ireland
{ 58, 106}, // Italian/Italy
{ 58, 206}, // Italian/Switzerland
{ 59, 108}, // Japanese/Japan
{ 61, 100}, // Kannada/India
{ 63, 110}, // Kazakh/Kazakhstan
{ 64, 179}, // Kinyarwanda/Rwanda
{ 65, 116}, // Kirghiz/Kyrgyzstan
{ 66, 114}, // Korean/RepublicOfKorea
{ 67, 102}, // Kurdish/Iran
{ 67, 103}, // Kurdish/Iraq
{ 67, 207}, // Kurdish/SyrianArabRepublic
{ 67, 217}, // Kurdish/Turkey
{ 69, 117}, // Laothian/Lao
{ 71, 118}, // Latvian/Latvia
{ 72, 49}, // Lingala/DemocraticRepublicOfCongo
{ 72, 50}, // Lingala/PeoplesRepublicOfCongo
{ 73, 124}, // Lithuanian/Lithuania
{ 74, 127}, // Macedonian/Macedonia
{ 75, 128}, // Malagasy/Madagascar
{ 76, 130}, // Malay/Malaysia
{ 76, 32}, // Malay/BruneiDarussalam
{ 77, 100}, // Malayalam/India
{ 78, 133}, // Maltese/Malta
{ 79, 154}, // Maori/NewZealand
{ 80, 100}, // Marathi/India
{ 82, 44}, // Mongolian/China
{ 82, 143}, // Mongolian/Mongolia
{ 84, 100}, // Nepali/India
{ 84, 150}, // Nepali/Nepal
{ 85, 161}, // Norwegian/Norway
{ 86, 74}, // Occitan/France
{ 87, 100}, // Oriya/India
{ 88, 1}, // Pashto/Afghanistan
{ 89, 102}, // Persian/Iran
{ 89, 1}, // Persian/Afghanistan
{ 90, 172}, // Polish/Poland
{ 91, 173}, // Portuguese/Portugal
{ 91, 30}, // Portuguese/Brazil
{ 91, 92}, // Portuguese/GuineaBissau
{ 91, 146}, // Portuguese/Mozambique
{ 92, 100}, // Punjabi/India
{ 92, 163}, // Punjabi/Pakistan
{ 94, 206}, // RhaetoRomance/Switzerland
{ 95, 141}, // Romanian/Moldova
{ 95, 177}, // Romanian/Romania
{ 96, 178}, // Russian/RussianFederation
{ 96, 141}, // Russian/Moldova
{ 96, 222}, // Russian/Ukraine
{ 98, 41}, // Sangho/CentralAfricanRepublic
{ 99, 100}, // Sanskrit/India
{ 100, 27}, // Serbian/BosniaAndHerzegowina
{ 100, 242}, // Serbian/Montenegro
{ 100, 243}, // Serbian/Serbia
{ 102, 120}, // Sesotho/Lesotho
{ 102, 195}, // Sesotho/SouthAfrica
{ 103, 195}, // Setswana/SouthAfrica
{ 104, 240}, // Shona/Zimbabwe
{ 106, 198}, // Singhalese/SriLanka
{ 107, 195}, // Siswati/SouthAfrica
{ 107, 204}, // Siswati/Swaziland
{ 108, 191}, // Slovak/Slovakia
{ 109, 192}, // Slovenian/Slovenia
{ 110, 194}, // Somali/Somalia
{ 110, 59}, // Somali/Djibouti
{ 110, 69}, // Somali/Ethiopia
{ 110, 111}, // Somali/Kenya
{ 111, 197}, // Spanish/Spain
{ 111, 10}, // Spanish/Argentina
{ 111, 26}, // Spanish/Bolivia
{ 111, 43}, // Spanish/Chile
{ 111, 47}, // Spanish/Colombia
{ 111, 52}, // Spanish/CostaRica
{ 111, 61}, // Spanish/DominicanRepublic
{ 111, 63}, // Spanish/Ecuador
{ 111, 65}, // Spanish/ElSalvador
{ 111, 66}, // Spanish/EquatorialGuinea
{ 111, 90}, // Spanish/Guatemala
{ 111, 96}, // Spanish/Honduras
{ 111, 139}, // Spanish/Mexico
{ 111, 155}, // Spanish/Nicaragua
{ 111, 166}, // Spanish/Panama
{ 111, 168}, // Spanish/Paraguay
{ 111, 169}, // Spanish/Peru
{ 111, 174}, // Spanish/PuertoRico
{ 111, 225}, // Spanish/UnitedStates
{ 111, 227}, // Spanish/Uruguay
{ 111, 231}, // Spanish/Venezuela
{ 113, 111}, // Swahili/Kenya
{ 113, 210}, // Swahili/Tanzania
{ 114, 205}, // Swedish/Sweden
{ 114, 73}, // Swedish/Finland
{ 116, 209}, // Tajik/Tajikistan
{ 117, 100}, // Tamil/India
{ 117, 198}, // Tamil/SriLanka
{ 118, 178}, // Tatar/RussianFederation
{ 119, 100}, // Telugu/India
{ 120, 211}, // Thai/Thailand
{ 121, 44}, // Tibetan/China
{ 121, 100}, // Tibetan/India
{ 122, 67}, // Tigrinya/Eritrea
{ 122, 69}, // Tigrinya/Ethiopia
{ 123, 214}, // Tonga/Tonga
{ 124, 195}, // Tsonga/SouthAfrica
{ 125, 217}, // Turkish/Turkey
{ 128, 44}, // Uigur/China
{ 129, 222}, // Ukrainian/Ukraine
{ 130, 100}, // Urdu/India
{ 130, 163}, // Urdu/Pakistan
{ 131, 228}, // Uzbek/Uzbekistan
{ 131, 1}, // Uzbek/Afghanistan
{ 132, 232}, // Vietnamese/VietNam
{ 134, 224}, // Welsh/UnitedKingdom
{ 135, 187}, // Wolof/Senegal
{ 136, 195}, // Xhosa/SouthAfrica
{ 138, 157}, // Yoruba/Nigeria
{ 140, 195}, // Zulu/SouthAfrica
{ 141, 161}, // Nynorsk/Norway
{ 142, 27}, // Bosnian/BosniaAndHerzegowina
{ 143, 131}, // Divehi/Maldives
{ 144, 224}, // Manx/UnitedKingdom
{ 145, 224}, // Cornish/UnitedKingdom
{ 146, 83}, // Akan/Ghana
{ 147, 100}, // Konkani/India
{ 148, 83}, // Ga/Ghana
{ 149, 157}, // Igbo/Nigeria
{ 150, 111}, // Kamba/Kenya
{ 151, 207}, // Syriac/SyrianArabRepublic
{ 152, 67}, // Blin/Eritrea
{ 153, 67}, // Geez/Eritrea
{ 153, 69}, // Geez/Ethiopia
{ 154, 53}, // Koro/IvoryCoast
{ 155, 69}, // Sidamo/Ethiopia
{ 156, 157}, // Atsam/Nigeria
{ 157, 67}, // Tigre/Eritrea
{ 158, 157}, // Jju/Nigeria
{ 159, 106}, // Friulian/Italy
{ 160, 195}, // Venda/SouthAfrica
{ 161, 83}, // Ewe/Ghana
{ 161, 212}, // Ewe/Togo
{ 162, 69}, // Walamo/Ethiopia
{ 163, 225}, // Hawaiian/UnitedStates
{ 164, 157}, // Tyap/Nigeria
{ 165, 129}, // Chewa/Malawi
{ 166, 170}, // Filipino/Philippines
{ 167, 206}, // Swiss German/Switzerland
{ 168, 44}, // Sichuan Yi/China
{ 169, 91}, // Kpelle/Guinea
{ 169, 121}, // Kpelle/Liberia
{ 170, 82}, // Low German/Germany
{ 171, 195}, // South Ndebele/SouthAfrica
{ 172, 195}, // Northern Sotho/SouthAfrica
{ 173, 73}, // Northern Sami/Finland
{ 173, 161}, // Northern Sami/Norway
{ 174, 208}, // Taroko/Taiwan
{ 175, 111}, // Gusii/Kenya
{ 176, 111}, // Taita/Kenya
{ 177, 187}, // Fulah/Senegal
{ 178, 111}, // Kikuyu/Kenya
{ 179, 111}, // Samburu/Kenya
{ 180, 146}, // Sena/Mozambique
{ 181, 240}, // North Ndebele/Zimbabwe
{ 182, 210}, // Rombo/Tanzania
{ 183, 145}, // Tachelhit/Morocco
{ 184, 3}, // Kabyle/Algeria
{ 185, 221}, // Nyankole/Uganda
{ 186, 210}, // Bena/Tanzania
{ 187, 210}, // Vunjo/Tanzania
{ 188, 132}, // Bambara/Mali
{ 189, 111}, // Embu/Kenya
{ 190, 225}, // Cherokee/UnitedStates
{ 191, 137}, // Morisyen/Mauritius
{ 192, 210}, // Makonde/Tanzania
{ 193, 210}, // Langi/Tanzania
{ 194, 221}, // Ganda/Uganda
{ 195, 239}, // Bemba/Zambia
{ 196, 39}, // Kabuverdianu/CapeVerde
{ 197, 111}, // Meru/Kenya
{ 198, 111}, // Kalenjin/Kenya
{ 199, 148}, // Nama/Namibia
{ 200, 210}, // Machame/Tanzania
{ 201, 82}, // Colognian/Germany
{ 202, 111}, // Masai/Kenya
{ 202, 210}, // Masai/Tanzania
{ 203, 221}, // Soga/Uganda
{ 204, 111}, // Luyia/Kenya
{ 205, 210}, // Asu/Tanzania
{ 206, 111}, // Teso/Kenya
{ 206, 221}, // Teso/Uganda
{ 207, 67}, // Saho/Eritrea
{ 208, 132}, // Koyra Chiini/Mali
{ 209, 210}, // Rwa/Tanzania
{ 210, 111}, // Luo/Kenya
{ 211, 221}, // Chiga/Uganda
{ 212, 145}, // Central Morocco Tamazight/Morocco
{ 213, 132}, // Koyraboro Senni/Mali
{ 214, 210} // Shambala/Tanzania
};
static const int g_locale_list_count = sizeof(g_locale_list)/sizeof(g_locale_list[0]);
for (int i = 0; i < locale_data_count; ++i) {
const QLocaleData &item = locale_data[i];
const QString testName = QString::fromLatin1("data_%1 (%2/%3)").arg(i)
.arg(QLocale::languageToString((QLocale::Language)item.m_language_id))
.arg(QLocale::countryToString((QLocale::Country)item.m_country_id));
QTest::newRow(testName.toLatin1().constData()) << (int)item.m_language_id << (int)item.m_country_id;
}
}
void tst_QLocale::testNames()
{
for (int i = 0; i < g_locale_list_count; ++i) {
const LocaleListItem &item = g_locale_list[i];
QLocale l1((QLocale::Language)item.language, (QLocale::Country)item.country);
QCOMPARE((int)l1.language(), item.language);
QCOMPARE((int)l1.country(), item.country);
QFETCH(int, language);
QFETCH(int, country);
QLocale l1((QLocale::Language)language, (QLocale::Country)country);
if (language == QLocale::AnyLanguage && country == QLocale::AnyCountry)
language = QLocale::C;
QCOMPARE((int)l1.language(), language);
QCOMPARE((int)l1.country(), country);
QString name = l1.name();
QLocale l2(name);
QCOMPARE((int)l2.language(), item.language);
QCOMPARE((int)l2.country(), item.country);
QCOMPARE((int)l2.language(), language);
QCOMPARE((int)l2.country(), country);
QCOMPARE(l2.name(), name);
QLocale l3(name + QLatin1String("@foo"));
QCOMPARE((int)l3.language(), item.language);
QCOMPARE((int)l3.country(), item.country);
QCOMPARE((int)l3.language(), language);
QCOMPARE((int)l3.country(), country);
QCOMPARE(l3.name(), name);
QLocale l4(name + QLatin1String(".foo"));
QCOMPARE((int)l4.language(), item.language);
QCOMPARE((int)l4.country(), item.country);
QCOMPARE((int)l4.language(), language);
QCOMPARE((int)l4.country(), country);
QCOMPARE(l4.name(), name);
if (item.language != QLocale::C) {
if (language != QLocale::C) {
int idx = name.indexOf(QLatin1Char('_'));
QVERIFY(idx != -1);
QString lang = name.left(idx);
QCOMPARE((int)QLocale(lang).language(), item.language);
QCOMPARE((int)QLocale(lang + QLatin1String("@foo")).language(), item.language);
QCOMPARE((int)QLocale(lang + QLatin1String(".foo")).language(), item.language);
}
QCOMPARE((int)QLocale(lang).language(), language);
QCOMPARE((int)QLocale(lang + QLatin1String("@foo")).language(), language);
QCOMPARE((int)QLocale(lang + QLatin1String(".foo")).language(), language);
}
}
@ -2028,12 +1724,37 @@ void tst_QLocale::uiLanguages()
QCOMPARE(c.uiLanguages().at(0), QLatin1String("C"));
const QLocale en_US("en_US");
QCOMPARE(en_US.uiLanguages().size(), 1);
QCOMPARE(en_US.uiLanguages().at(0), QLatin1String("en-US"));
QCOMPARE(en_US.uiLanguages().size(), 3);
QCOMPARE(en_US.uiLanguages().at(0), QLatin1String("en"));
QCOMPARE(en_US.uiLanguages().at(1), QLatin1String("en-US"));
QCOMPARE(en_US.uiLanguages().at(2), QLatin1String("en-Latn-US"));
const QLocale en_Latn_US("en_Latn_US");
QCOMPARE(en_Latn_US.uiLanguages().size(), 3);
QCOMPARE(en_Latn_US.uiLanguages().at(0), QLatin1String("en"));
QCOMPARE(en_Latn_US.uiLanguages().at(1), QLatin1String("en-US"));
QCOMPARE(en_Latn_US.uiLanguages().at(2), QLatin1String("en-Latn-US"));
const QLocale en_GB("en_GB");
QCOMPARE(en_GB.uiLanguages().size(), 2);
QCOMPARE(en_GB.uiLanguages().at(0), QLatin1String("en-GB"));
QCOMPARE(en_GB.uiLanguages().at(1), QLatin1String("en-Latn-GB"));
const QLocale en_Dsrt_US("en_Dsrt_US");
QCOMPARE(en_Dsrt_US.uiLanguages().size(), 2);
QCOMPARE(en_Dsrt_US.uiLanguages().at(0), QLatin1String("en-Dsrt"));
QCOMPARE(en_Dsrt_US.uiLanguages().at(1), QLatin1String("en-Dsrt-US"));
const QLocale ru_RU("ru_RU");
QCOMPARE(ru_RU.uiLanguages().size(), 1);
QCOMPARE(ru_RU.uiLanguages().at(0), QLatin1String("ru-RU"));
QCOMPARE(ru_RU.uiLanguages().size(), 3);
QCOMPARE(ru_RU.uiLanguages().at(0), QLatin1String("ru"));
QCOMPARE(ru_RU.uiLanguages().at(1), QLatin1String("ru-RU"));
QCOMPARE(ru_RU.uiLanguages().at(2), QLatin1String("ru-Cyrl-RU"));
const QLocale zh_Hant("zh_Hant");
QCOMPARE(zh_Hant.uiLanguages().size(), 2);
QCOMPARE(zh_Hant.uiLanguages().at(0), QLatin1String("zh-TW"));
QCOMPARE(zh_Hant.uiLanguages().at(1), QLatin1String("zh-Hant-TW"));
}
void tst_QLocale::weekendDays()

View File

@ -48,6 +48,7 @@ from xpathlite import DraftResolution
from dateconverter import convert_date
import re
findAlias = xpathlite.findAlias
findEntry = xpathlite.findEntry
findEntryInFile = xpathlite._findEntryInFile
findTagsInFile = xpathlite.findTagsInFile
@ -116,6 +117,12 @@ def generateLocaleInfo(path):
if not path.endswith(".xml"):
return {}
# skip legacy/compatibility ones
alias = findAlias(path)
if alias:
raise xpathlite.Error("alias to \"%s\"" % alias)
language_code = findEntryInFile(path, "identity/language", attribute="type")[0]
if language_code == 'root':
# just skip it
@ -128,18 +135,16 @@ def generateLocaleInfo(path):
# ### actually there is only one locale with variant: en_US_POSIX
# does anybody care about it at all?
if variant_code:
return {}
raise xpathlite.Error("we do not support variants (\"%s\")" % variant_code)
language_id = enumdata.languageCodeToId(language_code)
if language_id <= 0:
sys.stderr.write("unknown language code \"" + language_code + "\"\n")
return {}
raise xpathlite.Error("unknown language code \"%s\"" % language_code)
language = enumdata.language_list[language_id][0]
script_id = enumdata.scriptCodeToId(script_code)
if script_id == -1:
sys.stderr.write("unknown script code \"" + script_code + "\"\n")
return {}
raise xpathlite.Error("unknown script code \"%s\"" % script_code)
script = enumdata.script_list[script_id][0]
# we should handle fully qualified names with the territory
@ -147,8 +152,7 @@ def generateLocaleInfo(path):
return {}
country_id = enumdata.countryCodeToId(country_code)
if country_id <= 0:
sys.stderr.write("unknown country code \"" + country_code + "\"\n")
return {}
raise xpathlite.Error("unknown country code \"%s\"" % country_code)
country = enumdata.country_list[country_id][0]
# So we say we accept only those values that have "contributed" or
@ -557,10 +561,14 @@ cldr_files = os.listdir(cldr_dir)
locale_database = {}
for file in cldr_files:
try:
l = generateLocaleInfo(cldr_dir + "/" + file)
if not l:
sys.stderr.write("skipping file \"" + file + "\"\n")
continue
except xpathlite.Error as e:
sys.stderr.write("skipping file \"%s\" (%s)\n" % (file, str(e)))
continue
locale_database[(l['language_id'], l['script_id'], l['country_id'], l['variant_code'])] = l
@ -611,16 +619,15 @@ def _parseLocale(l):
script = "AnyScript"
country = "AnyCountry"
if l == "und": # we are treating unknown locale like C
return (None, None, None)
if l == "und":
raise xpathlite.Error("we are treating unknown locale like C")
items = l.split("_")
language_code = items[0]
if language_code != "und":
language_id = enumdata.languageCodeToId(language_code)
if language_id == -1:
sys.stderr.write("unknown language code \"" + language_code + "\"\n")
return (None, None, None)
raise xpathlite.Error("unknown language code \"%s\"" % language_code)
language = enumdata.language_list[language_id][0]
if len(items) > 1:
@ -631,16 +638,14 @@ def _parseLocale(l):
if len(script_code) == 4:
script_id = enumdata.scriptCodeToId(script_code)
if script_id == -1:
sys.stderr.write("unknown script code \"" + script_code + "\"\n")
return (None, None, None)
raise xpathlite.Error("unknown script code \"%s\"" % script_code)
script = enumdata.script_list[script_id][0]
else:
country_code = script_code
if country_code:
country_id = enumdata.countryCodeToId(country_code)
if country_id == -1:
sys.stderr.write("unknown country code \"" + country_code + "\"\n")
return (None, None, None)
raise xpathlite.Error("unknown country code \"%s\"" % country_code)
country = enumdata.country_list[country_id][0]
return (language, script, country)
@ -651,13 +656,15 @@ for ns in findTagsInFile(cldr_dir + "/../supplemental/likelySubtags.xml", "likel
for data in ns[1:][0]: # ns looks like this: [u'likelySubtag', [(u'from', u'aa'), (u'to', u'aa_Latn_ET')]]
tmp[data[0]] = data[1]
try:
(from_language, from_script, from_country) = _parseLocale(tmp[u"from"])
if not from_language:
sys.stderr.write("skipping likelySubtag " + tmp[u"from"] + " -> " + tmp[u"to"] + "\n")
except xpathlite.Error as e:
sys.stderr.write("skipping likelySubtag \"%s\" -> \"%s\" (%s)\n" % (tmp[u"from"], tmp[u"to"], str(e)))
continue
try:
(to_language, to_script, to_country) = _parseLocale(tmp[u"to"])
if not to_language:
sys.stderr.write("skipping likelySubtag " + tmp[u"from"] + " -> " + tmp[u"to"] + "\n")
except xpathlite.Error as e:
sys.stderr.write("skipping likelySubtag \"%s\" -> \"%s\" (%s)\n" % (tmp[u"from"], tmp[u"to"], str(e)))
continue
# substitute according to http://www.unicode.org/reports/tr35/#Likely_Subtags
if to_country == "AnyCountry" and from_country != to_country:

View File

@ -291,7 +291,7 @@ class Locale:
self.currencyFormat = eltText(firstChildElt(elt, "currencyFormat"))
self.currencyNegativeFormat = eltText(firstChildElt(elt, "currencyNegativeFormat"))
def loadLocaleMap(doc, language_map, script_map, country_map):
def loadLocaleMap(doc, language_map, script_map, country_map, likely_subtags_map):
result = {}
locale_list_elt = firstChildElt(doc.documentElement, "localeList")
@ -307,6 +307,28 @@ def loadLocaleMap(doc, language_map, script_map, country_map):
country_id = countryNameToId(locale.country, country_map)
if country_id == -1:
sys.stderr.write("Cannot find a country id for '%s'\n" % locale.country)
if language_id != 1: # C
if country_id == 0:
sys.stderr.write("loadLocaleMap: No country id for '%s'\n" % locale.language)
if script_id == 0:
# find default script for a given language and country (see http://www.unicode.org/reports/tr35/#Likely_Subtags)
for key in likely_subtags_map.keys():
tmp = likely_subtags_map[key]
if tmp["from"][0] == locale.language and tmp["from"][1] == "AnyScript" and tmp["from"][2] == locale.country:
locale.script = tmp["to"][1]
script_id = scriptNameToId(locale.script, script_map)
break
if script_id == 0 and country_id != 0:
# try with no country
for key in likely_subtags_map.keys():
tmp = likely_subtags_map[key]
if tmp["from"][0] == locale.language and tmp["from"][1] == "AnyScript" and tmp["from"][2] == "AnyCountry":
locale.script = tmp["to"][1]
script_id = scriptNameToId(locale.script, script_map)
break
result[(language_id, script_id, country_id)] = locale
locale_elt = nextSiblingElt(locale_elt, "locale")
@ -321,13 +343,21 @@ def compareLocaleKeys(key1, key2):
l1 = compareLocaleKeys.locale_map[key1]
l2 = compareLocaleKeys.locale_map[key2]
if l1.language in compareLocaleKeys.default_map:
default = compareLocaleKeys.default_map[l1.language]
if l1.country == default and key1[1] == 0:
if (l1.language, l1.script) in compareLocaleKeys.default_map.keys():
default = compareLocaleKeys.default_map[(l1.language, l1.script)]
if l1.country == default:
return -1
if l2.country == default and key2[1] == 0:
if l2.country == default:
return 1
if key1[1] != key2[1]:
if (l2.language, l2.script) in compareLocaleKeys.default_map.keys():
default = compareLocaleKeys.default_map[(l2.language, l2.script)]
if l2.country == default:
return 1
if l1.country == default:
return -1
if key1[1] != key2[1]:
return key1[1] - key2[1]
else:
@ -476,9 +506,9 @@ def main():
default_map = {}
for key in likely_subtags_map.keys():
tmp = likely_subtags_map[key]
if tmp["from"][2] == "AnyCountry" and tmp["to"][2] != "AnyCountry" and tmp["from"][1] == "AnyScript":
default_map[tmp["to"][0]] = tmp["to"][2]
locale_map = loadLocaleMap(doc, language_map, script_map, country_map)
if tmp["from"][1] == "AnyScript" and tmp["from"][2] == "AnyCountry" and tmp["to"][2] != "AnyCountry":
default_map[(tmp["to"][0], tmp["to"][1])] = tmp["to"][2]
locale_map = loadLocaleMap(doc, language_map, script_map, country_map, likely_subtags_map)
dupes = findDupes(language_map, country_map)
cldr_version = eltText(firstChildElt(doc.documentElement, "version"))
@ -495,6 +525,57 @@ def main():
*/\n\n\n\
" % (str(datetime.date.today()), cldr_version) )
# Likely subtags map
data_temp_file.write("static const QLocaleId likely_subtags[] = {\n")
index = 0
for key in likely_subtags_map.keys():
tmp = likely_subtags_map[key]
from_language = languageNameToId(tmp["from"][0], language_map)
from_script = scriptNameToId(tmp["from"][1], script_map)
from_country = countryNameToId(tmp["from"][2], country_map)
to_language = languageNameToId(tmp["to"][0], language_map)
to_script = scriptNameToId(tmp["to"][1], script_map)
to_country = countryNameToId(tmp["to"][2], country_map)
cmnt_from = ""
if from_language != 0:
cmnt_from = cmnt_from + language_map[from_language][1]
else:
cmnt_from = cmnt_from + "und"
if from_script != 0:
if cmnt_from:
cmnt_from = cmnt_from + "_"
cmnt_from = cmnt_from + script_map[from_script][1]
if from_country != 0:
if cmnt_from:
cmnt_from = cmnt_from + "_"
cmnt_from = cmnt_from + country_map[from_country][1]
cmnt_to = ""
if to_language != 0:
cmnt_to = cmnt_to + language_map[to_language][1]
else:
cmnt_from = cmnt_from + "und"
if to_script != 0:
if cmnt_to:
cmnt_to = cmnt_to + "_"
cmnt_to = cmnt_to + script_map[to_script][1]
if to_country != 0:
if cmnt_to:
cmnt_to = cmnt_to + "_"
cmnt_to = cmnt_to + country_map[to_country][1]
data_temp_file.write(" ")
data_temp_file.write("{ %3d, %2d, %3d }, { %3d, %2d, %3d }" % (from_language, from_script, from_country, to_language, to_script, to_country))
index += 1
if index != len(likely_subtags_map):
data_temp_file.write(",")
else:
data_temp_file.write(" ")
data_temp_file.write(" // %s -> %s\n" % (cmnt_from, cmnt_to))
data_temp_file.write("};\n")
data_temp_file.write("\n")
# Locale index
data_temp_file.write("static const quint16 locale_index[] = {\n")
index = 0