ICU-20575 fix broken default locale mapping for C.UTF-8

Regression was in 1afef30549 PR #418 [ICU-20187] - We dropped the mapping from "C" in uloc_canonicalize, but then putil did not handle cases where a codepage was set (such as C.UTF-8). - Add an additional check in uprv_getDefaultLocaleID() for locales that end up as "C" or "POSIX" after removing codepage suffix. - Also fix regression where aa@bb would become aa__BB__BB (incorrectly doubled __BB)
2019-04-25 10:40:28 -07:00 · 2019-04-25 10:40:28 -07:00 · 075cefb2e2
commit 075cefb2e2
parent 0aa19c0d22
1 changed files with 17 additions and 6 deletions
--- a/icu4c/source/common/putil.cpp
+++ b/icu4c/source/common/putil.cpp
@ -1560,6 +1560,10 @@ static const char *uprv_getPOSIXIDForCategory(int category)
    {
        /* Nothing worked.  Give it a nice POSIX default value. */
        posixID = "en_US_POSIX";
+        // Note: this test will not catch 'C.UTF-8',
+        // that will be handled in uprv_getDefaultLocaleID().
+        // Leave this mapping here for the uprv_getPOSIXIDForDefaultCodepage()
+        // caller which expects to see "en_US_POSIX" in many branches.
    }
    return posixID;
 }
@ -1631,8 +1635,8 @@ The leftmost codepage (.xxx) wins.
    }

    // Copy the ID into owned memory.
-    // Over-allocate in case we replace "@" with "__".
-    char *correctedPOSIXLocale = static_cast<char *>(uprv_malloc(uprv_strlen(posixID) + 1 + 1));
+    // Over-allocate in case we replace "C" with "en_US_POSIX" (+10), + null termination
+    char *correctedPOSIXLocale = static_cast<char *>(uprv_malloc(uprv_strlen(posixID) + 10 + 1));
    if (correctedPOSIXLocale == nullptr) {
        return nullptr;
    }
@ -1641,11 +1645,18 @@ The leftmost codepage (.xxx) wins.
    char *limit;
    if ((limit = uprv_strchr(correctedPOSIXLocale, '.')) != nullptr) {
        *limit = 0;
-        if ((limit = uprv_strchr(correctedPOSIXLocale, '@')) != nullptr) {
-            *limit = 0;
-        }
+    }
+    if ((limit = uprv_strchr(correctedPOSIXLocale, '@')) != nullptr) {
+        *limit = 0;
    }

+    if ((uprv_strcmp("C", correctedPOSIXLocale) == 0) // no @ variant
+        || (uprv_strcmp("POSIX", correctedPOSIXLocale) == 0)) {
+      // Raw input was C.* or POSIX.*, Give it a nice POSIX default value.
+      // (The "C"/"POSIX" case is handled in uprv_getPOSIXIDForCategory())
+      uprv_strcpy(correctedPOSIXLocale, "en_US_POSIX");
+    }
+ 
    /* Note that we scan the *uncorrected* ID. */
    const char *p;
    if ((p = uprv_strrchr(posixID, '@')) != nullptr) {
@ -1668,7 +1679,7 @@ The leftmost codepage (.xxx) wins.
        if ((q = uprv_strchr(p, '.')) != nullptr) {
            /* How big will the resulting string be? */
            int32_t len = (int32_t)(uprv_strlen(correctedPOSIXLocale) + (q-p));
-            uprv_strncat(correctedPOSIXLocale, p, q-p);
+            uprv_strncat(correctedPOSIXLocale, p, q-p); // do not include charset
            correctedPOSIXLocale[len] = 0;
        }
        else {