From 1551c6546824f86aec50f984b5062faab9cc985a Mon Sep 17 00:00:00 2001 From: Yoshito Umaoka Date: Fri, 3 May 2013 21:36:03 +0000 Subject: [PATCH] ICU-10098 Fixed LCID to posix-style locale ID mapping problem. The fix required to change the internal LCID mapping function interface, and resolved a threading issue in uloc_getLocaleForLCID() and some other internal implementation problems in Win32DateFormat/Win32NumberFormat. X-SVN-Rev: 33586 --- icu4c/source/common/locmap.c | 97 ++++++++++++++++++-------- icu4c/source/common/locmap.h | 4 +- icu4c/source/common/putil.cpp | 31 ++++++-- icu4c/source/common/uloc.cpp | 47 +++++++++---- icu4c/source/i18n/windtfmt.cpp | 9 +-- icu4c/source/i18n/windtfmt.h | 4 +- icu4c/source/i18n/winnmfmt.cpp | 17 +++-- icu4c/source/i18n/winnmfmt.h | 3 +- icu4c/source/test/cintltst/cldrtest.c | 4 +- icu4c/source/test/intltest/winutil.cpp | 18 +++-- 10 files changed, 164 insertions(+), 70 deletions(-) diff --git a/icu4c/source/common/locmap.c b/icu4c/source/common/locmap.c index bf4498bd2f..25ef2f6d4e 100644 --- a/icu4c/source/common/locmap.c +++ b/icu4c/source/common/locmap.c @@ -26,7 +26,6 @@ */ #include "locmap.h" -#include "unicode/uloc.h" #include "cstring.h" #include "cmemory.h" @@ -111,7 +110,7 @@ static const ILcidPosixElement locmap_ ## id [] = * Create the map for the posixID. This macro supposes that the language string * name is the same as the global variable name, and that the first element * in the ILcidPosixElement is just the language. - * @param _posixID the full POSIX ID for this entry. + * @param _posixID the full POSIX ID for this entry. */ #define ILCID_POSIX_MAP(_posixID) \ {sizeof(locmap_ ## _posixID)/sizeof(ILcidPosixElement), locmap_ ## _posixID} @@ -129,6 +128,11 @@ static const ILcidPosixElement locmap_ ## id [] = // Microsoft is moving away from LCID in favor of locale name as of Vista. This table needs to be // maintained for support of older Windows version. // Update: Windows 7 (091130) +// +// Note: Microsoft assign a different LCID if a locale has a sorting variant. POSIX IDs below may contain +// @collation=XXX, but no other keywords are allowed (at least for now). When uprv_convertToLCID() is +// called from uloc_getLCID(), keywords other than collation are already removed. If we really need +// to support other keywords in this mapping data, we must update the implementation. //////////////////////////////////////////// */ @@ -957,13 +961,6 @@ getPosixID(const ILcidPosixMap *this_0, uint32_t hostID) ///////////////////////////////////// */ #ifdef USE_WINDOWS_LOCALE_API -/* - * Change the tag separator from '-' to '_' - */ -#define FIX_LOCALE_ID_TAG_SEPARATOR(buffer, len, i) \ - for(i = 0; i < len; i++) \ - if (buffer[i] == '-') buffer[i] = '_'; - /* * Various language tags needs to be changed: * quz -> qu @@ -980,39 +977,83 @@ getPosixID(const ILcidPosixMap *this_0, uint32_t hostID) } \ } -static char gPosixFromLCID[ULOC_FULLNAME_CAPACITY]; #endif -U_CAPI const char * -uprv_convertToPosix(uint32_t hostid, UErrorCode* status) +U_CAPI int32_t +uprv_convertToPosix(uint32_t hostid, char *posixID, int32_t posixIDCapacity, UErrorCode* status) { uint16_t langID; uint32_t localeIndex; + UBool bLookup = TRUE; + const char *pPosixID = NULL; + #ifdef USE_WINDOWS_LOCALE_API - int32_t ret = 0; + int32_t tmpLen = 0; + char locName[157]; /* ULOC_FULLNAME_CAPACITY */ - uprv_memset(gPosixFromLCID, 0, sizeof(gPosixFromLCID)); - - ret = GetLocaleInfoA(hostid, LOCALE_SNAME, (LPSTR)gPosixFromLCID, sizeof(gPosixFromLCID)); - if (ret > 1) { - FIX_LOCALE_ID_TAG_SEPARATOR(gPosixFromLCID, (uint32_t)ret, localeIndex) - FIX_LANGUAGE_ID_TAG(gPosixFromLCID, ret) - - return gPosixFromLCID; + tmpLen = GetLocaleInfoA(hostid, LOCALE_SNAME, (LPSTR)locName, sizeof(locName)/sizeof(locName[0])); + if (tmpLen > 1) { + /* Windows locale name may contain sorting variant, such as "es-ES_tradnl". + In such case, we need special mapping data found in the hardcoded table + in this source file. */ + char *p = uprv_strchr(locName, '_'); + if (p) { + /* Keep the base locale, without variant */ + *p = 0; + tmpLen = uprv_strlen(locName); + } else { + /* No hardcoded table lookup necessary */ + bLookup = FALSE; + } + /* Change the tag separator from '-' to '_' */ + p = locName; + while (*p) { + if (*p == '-') { + *p = '_'; + } + p++; + } + FIX_LANGUAGE_ID_TAG(locName, tmpLen); + pPosixID = locName; } #endif - langID = LANGUAGE_LCID(hostid); + if (bLookup) { + const char *pCandidate = NULL; + langID = LANGUAGE_LCID(hostid); - for (localeIndex = 0; localeIndex < gLocaleCount; localeIndex++) - { - if (langID == gPosixIDmap[localeIndex].regionMaps->hostID) - { - return getPosixID(&gPosixIDmap[localeIndex], hostid); + for (localeIndex = 0; localeIndex < gLocaleCount; localeIndex++) { + if (langID == gPosixIDmap[localeIndex].regionMaps->hostID) { + pCandidate = getPosixID(&gPosixIDmap[localeIndex], hostid); + } } + + /* On Windows, when locale name has a variant, we still look up the hardcoded table. + If a match in the hardcoded table is longer than the Windows locale name without + variant, we use the one as the result */ + if (pCandidate && (pPosixID == NULL || uprv_strlen(pCandidate) > uprv_strlen(pPosixID))) { + pPosixID = pCandidate; + } + } + + if (pPosixID) { + int32_t resLen = uprv_strlen(pPosixID); + int32_t copyLen = resLen <= posixIDCapacity ? resLen : posixIDCapacity; + uprv_memcpy(posixID, pPosixID, copyLen); + if (resLen < posixIDCapacity) { + posixID[resLen] = 0; + if (*status == U_STRING_NOT_TERMINATED_WARNING) { + *status = U_ZERO_ERROR; + } + } else if (resLen == posixIDCapacity) { + *status = U_STRING_NOT_TERMINATED_WARNING; + } else { + *status = U_BUFFER_OVERFLOW_ERROR; + } + return resLen; } /* no match found */ *status = U_ILLEGAL_ARGUMENT_ERROR; - return NULL; + return -1; } /* diff --git a/icu4c/source/common/locmap.h b/icu4c/source/common/locmap.h index 7db06070ec..214bbcec6a 100644 --- a/icu4c/source/common/locmap.h +++ b/icu4c/source/common/locmap.h @@ -1,7 +1,7 @@ /* ****************************************************************************** * -* Copyright (C) 1996-2004, International Business Machines +* Copyright (C) 1996-2013, International Business Machines * Corporation and others. All Rights Reserved. * ****************************************************************************** @@ -28,7 +28,7 @@ #define LANGUAGE_LCID(hostID) (uint16_t)(0x03FF & hostID) -U_CAPI const char *uprv_convertToPosix(uint32_t hostid, UErrorCode* status); +U_CAPI int32_t uprv_convertToPosix(uint32_t hostid, char* posixID, int32_t posixIDCapacity, UErrorCode* status); /* Don't call this function directly. Use uloc_getLCID instead. */ U_CAPI uint32_t uprv_convertToLCID(const char *langID, const char* posixID, UErrorCode* status); diff --git a/icu4c/source/common/putil.cpp b/icu4c/source/common/putil.cpp index a3d8df5a00..12e5a47bce 100644 --- a/icu4c/source/common/putil.cpp +++ b/icu4c/source/common/putil.cpp @@ -1120,7 +1120,7 @@ uprv_tzname(int n) /* Get and set the ICU data directory --------------------------------------- */ static char *gDataDirectory = NULL; -#if U_POSIX_LOCALE +#if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API static char *gCorrectedPOSIXLocale = NULL; /* Heap allocated */ #endif @@ -1130,7 +1130,7 @@ static UBool U_CALLCONV putil_cleanup(void) uprv_free(gDataDirectory); } gDataDirectory = NULL; -#if U_POSIX_LOCALE +#if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API if (gCorrectedPOSIXLocale) { uprv_free(gCorrectedPOSIXLocale); gCorrectedPOSIXLocale = NULL; @@ -1600,14 +1600,31 @@ The leftmost codepage (.xxx) wins. return posixID; #elif U_PLATFORM_USES_ONLY_WIN32_API +#define POSIX_LOCALE_CAPACITY 64 UErrorCode status = U_ZERO_ERROR; - LCID id = GetThreadLocale(); - const char* locID = uprv_convertToPosix(id, &status); + char *correctedPOSIXLocale = 0; - if (U_FAILURE(status)) { - locID = "en_US"; + if (gCorrectedPOSIXLocale != NULL) { + return gCorrectedPOSIXLocale; } - return locID; + + LCID id = GetThreadLocale(); + correctedPOSIXLocale = static_cast(uprv_malloc(POSIX_LOCALE_CAPACITY + 1)); + if (correctedPOSIXLocale) { + int32_t posixLen = uprv_convertToPosix(id, correctedPOSIXLocale, POSIX_LOCALE_CAPACITY, &status); + if (U_SUCCESS(status)) { + *(correctedPOSIXLocale + posixLen) = 0; + gCorrectedPOSIXLocale = correctedPOSIXLocale; + ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup); + } else { + uprv_free(correctedPOSIXLocale); + } + } + + if (gCorrectedPOSIXLocale == NULL) { + return "en_US"; + } + return gCorrectedPOSIXLocale; #elif U_PLATFORM == U_PF_CLASSIC_MACOS int32_t script = MAC_LC_INIT_NUMBER; diff --git a/icu4c/source/common/uloc.cpp b/icu4c/source/common/uloc.cpp index 627b4d9da4..66905b022f 100644 --- a/icu4c/source/common/uloc.cpp +++ b/icu4c/source/common/uloc.cpp @@ -2095,6 +2095,39 @@ uloc_getLCID(const char* localeID) return 0; } + if (uprv_strchr(localeID, '@')) { + // uprv_convertToLCID does not support keywords other than collation. + // Remove all keywords except collation. + int32_t len; + char collVal[ULOC_KEYWORDS_CAPACITY]; + char tmpLocaleID[ULOC_FULLNAME_CAPACITY]; + + len = uloc_getKeywordValue(localeID, "collation", collVal, + sizeof(collVal)/sizeof(collVal[0]) - 1, &status); + + if (U_SUCCESS(status) && len > 0) { + collVal[len] = 0; + + len = uloc_getBaseName(localeID, tmpLocaleID, + sizeof(tmpLocaleID)/sizeof(tmpLocaleID[0]) - 1, &status); + + if (U_SUCCESS(status)) { + tmpLocaleID[len] = 0; + + len = uloc_setKeywordValue("collation", collVal, tmpLocaleID, + sizeof(tmpLocaleID)/sizeof(tmpLocaleID[0]) - len - 1, &status); + + if (U_SUCCESS(status)) { + tmpLocaleID[len] = 0; + return uprv_convertToLCID(langID, tmpLocaleID, &status); + } + } + } + + // fall through - all keywords are simply ignored + status = U_ZERO_ERROR; + } + return uprv_convertToLCID(langID, localeID, &status); } @@ -2102,19 +2135,7 @@ U_CAPI int32_t U_EXPORT2 uloc_getLocaleForLCID(uint32_t hostid, char *locale, int32_t localeCapacity, UErrorCode *status) { - int32_t length; - const char *posix = uprv_convertToPosix(hostid, status); - if (U_FAILURE(*status) || posix == NULL) { - return 0; - } - length = (int32_t)uprv_strlen(posix); - if (length+1 > localeCapacity) { - *status = U_BUFFER_OVERFLOW_ERROR; - } - else { - uprv_strcpy(locale, posix); - } - return length; + return uprv_convertToPosix(hostid, locale, localeCapacity, status); } /* ### Default locale **************************************************/ diff --git a/icu4c/source/i18n/windtfmt.cpp b/icu4c/source/i18n/windtfmt.cpp index 0fda3419bc..1378491a60 100644 --- a/icu4c/source/i18n/windtfmt.cpp +++ b/icu4c/source/i18n/windtfmt.cpp @@ -1,6 +1,6 @@ /* ******************************************************************************** -* Copyright (C) 2005-2011, International Business Machines +* Copyright (C) 2005-2013, International Business Machines * Corporation and others. All Rights Reserved. ******************************************************************************** * @@ -94,7 +94,7 @@ UnicodeString* Win32DateFormat::getTimeDateFormat(const Calendar *cal, const Loc // TODO: Range-check timeStyle, dateStyle Win32DateFormat::Win32DateFormat(DateFormat::EStyle timeStyle, DateFormat::EStyle dateStyle, const Locale &locale, UErrorCode &status) - : DateFormat(), fDateTimeMsg(NULL), fTimeStyle(timeStyle), fDateStyle(dateStyle), fLocale(&locale), fZoneID() + : DateFormat(), fDateTimeMsg(NULL), fTimeStyle(timeStyle), fDateStyle(dateStyle), fLocale(locale), fZoneID() { if (U_SUCCESS(status)) { fLCID = locale.getLCID(); @@ -127,6 +127,7 @@ Win32DateFormat &Win32DateFormat::operator=(const Win32DateFormat &other) this->fDateTimeMsg = other.fDateTimeMsg; this->fTimeStyle = other.fTimeStyle; this->fDateStyle = other.fDateStyle; + this->fLocale = other.fLocale; this->fLCID = other.fLCID; // this->fCalendar = other.fCalendar->clone(); this->fZoneID = other.fZoneID; @@ -178,7 +179,7 @@ UnicodeString &Win32DateFormat::format(Calendar &cal, UnicodeString &appendTo, F timeDateArray[1].adoptString(date); if (strcmp(fCalendar->getType(), cal.getType()) != 0) { - pattern = getTimeDateFormat(&cal, fLocale, status); + pattern = getTimeDateFormat(&cal, &fLocale, status); } MessageFormat::format(*pattern, timeDateArray, 2, appendTo, status); @@ -203,7 +204,7 @@ void Win32DateFormat::adoptCalendar(Calendar *newCalendar) if (fDateStyle != DateFormat::kNone && fTimeStyle != DateFormat::kNone) { delete fDateTimeMsg; - fDateTimeMsg = getTimeDateFormat(newCalendar, fLocale, status); + fDateTimeMsg = getTimeDateFormat(newCalendar, &fLocale, status); } } diff --git a/icu4c/source/i18n/windtfmt.h b/icu4c/source/i18n/windtfmt.h index 39ac8afb62..0c182f54e6 100644 --- a/icu4c/source/i18n/windtfmt.h +++ b/icu4c/source/i18n/windtfmt.h @@ -1,6 +1,6 @@ /* ******************************************************************************** -* Copyright (C) 2005-2011, International Business Machines +* Copyright (C) 2005-2013, International Business Machines * Corporation and others. All Rights Reserved. ******************************************************************************** * @@ -121,7 +121,7 @@ private: UnicodeString *fDateTimeMsg; DateFormat::EStyle fTimeStyle; DateFormat::EStyle fDateStyle; - const Locale *fLocale; + Locale fLocale; int32_t fLCID; UnicodeString fZoneID; TIME_ZONE_INFORMATION *fTZI; diff --git a/icu4c/source/i18n/winnmfmt.cpp b/icu4c/source/i18n/winnmfmt.cpp index 74136d0b56..de5b154667 100644 --- a/icu4c/source/i18n/winnmfmt.cpp +++ b/icu4c/source/i18n/winnmfmt.cpp @@ -1,6 +1,6 @@ /* ******************************************************************************** -* Copyright (C) 2005-2011, International Business Machines +* Copyright (C) 2005-2013, International Business Machines * Corporation and others. All Rights Reserved. ******************************************************************************** * @@ -135,13 +135,21 @@ static void freeCurrencyFormat(CURRENCYFMTW *fmt) } } -// TODO: keep locale too? Win32NumberFormat::Win32NumberFormat(const Locale &locale, UBool currency, UErrorCode &status) : NumberFormat(), fCurrency(currency), fFractionDigitsSet(FALSE), fFormatInfo(NULL) { if (!U_FAILURE(status)) { fLCID = locale.getLCID(); + // Resolve actual locale to be used later + UErrorCode tmpsts = U_ZERO_ERROR; + char tmpLocID[ULOC_FULLNAME_CAPACITY]; + int32_t len = uloc_getLocaleForLCID(fLCID, tmpLocID, sizeof(tmpLocID)/sizeof(tmpLocID[0]) - 1, &tmpsts); + if (U_SUCCESS(tmpsts)) { + tmpLocID[len] = 0; + fLocale = Locale((const char*)tmpLocID); + } + fFormatInfo = (FormatInfo*)uprv_malloc(sizeof(FormatInfo)); if (fCurrency) { @@ -179,6 +187,7 @@ Win32NumberFormat &Win32NumberFormat::operator=(const Win32NumberFormat &other) NumberFormat::operator=(other); this->fCurrency = other.fCurrency; + this->fLocale = other.fLocale; this->fLCID = other.fLCID; this->fFractionDigitsSet = other.fFractionDigitsSet; @@ -213,12 +222,10 @@ UnicodeString& Win32NumberFormat::format(int64_t number, UnicodeString& appendTo return format(getMinimumFractionDigits(), appendTo, L"%I64d", number); } -// TODO: cache Locale and NumberFormat? Could keep locale passed to constructor... void Win32NumberFormat::parse(const UnicodeString& text, Formattable& result, ParsePosition& parsePosition) const { UErrorCode status = U_ZERO_ERROR; - Locale loc(uprv_convertToPosix(fLCID, &status)); - NumberFormat *nf = fCurrency? NumberFormat::createCurrencyInstance(loc, status) : NumberFormat::createInstance(loc, status); + NumberFormat *nf = fCurrency? NumberFormat::createCurrencyInstance(fLocale, status) : NumberFormat::createInstance(fLocale, status); nf->parse(text, result, parsePosition); delete nf; diff --git a/icu4c/source/i18n/winnmfmt.h b/icu4c/source/i18n/winnmfmt.h index e75adf9e7e..ac61d5f2f6 100644 --- a/icu4c/source/i18n/winnmfmt.h +++ b/icu4c/source/i18n/winnmfmt.h @@ -1,6 +1,6 @@ /* ******************************************************************************** -* Copyright (C) 2005-2011, International Business Machines +* Copyright (C) 2005-2013, International Business Machines * Corporation and others. All Rights Reserved. ******************************************************************************** * @@ -146,6 +146,7 @@ private: UnicodeString &format(int32_t numDigits, UnicodeString &appendTo, wchar_t *format, ...) const; UBool fCurrency; + Locale fLocale; int32_t fLCID; FormatInfo *fFormatInfo; UBool fFractionDigitsSet; diff --git a/icu4c/source/test/cintltst/cldrtest.c b/icu4c/source/test/cintltst/cldrtest.c index 027a2b374b..9a48bd3f53 100644 --- a/icu4c/source/test/cintltst/cldrtest.c +++ b/icu4c/source/test/cintltst/cldrtest.c @@ -453,6 +453,7 @@ testLCID(UResourceBundle *currentBundle, UErrorCode status = U_ZERO_ERROR; uint32_t expectedLCID; char lcidStringC[64] = {0}; + int32_t len; expectedLCID = uloc_getLCID(localeName); if (expectedLCID == 0) { @@ -462,11 +463,12 @@ testLCID(UResourceBundle *currentBundle, } status = U_ZERO_ERROR; - uprv_strcpy(lcidStringC, uprv_convertToPosix(expectedLCID, &status)); + len = uprv_convertToPosix(expectedLCID, lcidStringC, sizeof(lcidStringC)/sizeof(lcidStringC[0]) - 1, &status); if (U_FAILURE(status)) { log_err("ERROR: %.4x does not have a POSIX mapping due to %s\n", expectedLCID, u_errorName(status)); } + lcidStringC[len] = 0; if(strcmp(localeName, lcidStringC) != 0) { char langName[1024]; diff --git a/icu4c/source/test/intltest/winutil.cpp b/icu4c/source/test/intltest/winutil.cpp index 0b7bd6d7a9..ef51fdd53a 100644 --- a/icu4c/source/test/intltest/winutil.cpp +++ b/icu4c/source/test/intltest/winutil.cpp @@ -1,6 +1,6 @@ /* ******************************************************************************** -* Copyright (C) 2005-2011, International Business Machines +* Copyright (C) 2005-2013, International Business Machines * Corporation and others. All Rights Reserved. ******************************************************************************** * @@ -35,7 +35,8 @@ static int32_t lcidMax = 0; BOOL CALLBACK EnumLocalesProc(LPSTR lpLocaleString) { - const char* localeID = NULL; + char localeID[ULOC_FULLNAME_CAPACITY]; + int32_t localeIDLen; UErrorCode status = U_ZERO_ERROR; if (lcidCount >= lcidMax) { @@ -52,11 +53,14 @@ BOOL CALLBACK EnumLocalesProc(LPSTR lpLocaleString) sscanf(lpLocaleString, "%8x", &lcidRecords[lcidCount].lcid); - localeID = uprv_convertToPosix(lcidRecords[lcidCount].lcid, &status); - - lcidRecords[lcidCount].localeID = new char[strlen(localeID)]; - - strcpy(lcidRecords[lcidCount].localeID, localeID); + localeIDLen = uprv_convertToPosix(lcidRecords[lcidCount].lcid, localeID, sizeof(localeID)/sizeof(localeID[0]), &status); + if (U_SUCCESS(status)) { + lcidRecords[lcidCount].localeID = new char[localeIDLen + 1]; + memcpy(lcidRecords[lcidCount].localeID, localeID, localeIDLen); + lcidRecords[lcidCount].localeID[localeIDLen] = 0; + } else { + lcidRecords[lcidCount].localeID = NULL; + } lcidCount += 1;