ICU-20590 Ensure time format consistency for SHORT vs j patterns if no locale data

This commit is contained in:
Peter Edberg 2020-09-22 10:10:30 -07:00 committed by Peter Edberg
parent 47230019c6
commit 3d8ae5eb6d
8 changed files with 272 additions and 55 deletions

View File

@ -311,6 +311,16 @@ DateTimePatternGenerator::createInstance(const Locale& locale, UErrorCode& statu
return U_SUCCESS(status) ? result.orphan() : nullptr;
}
DateTimePatternGenerator* U_EXPORT2
DateTimePatternGenerator::createInstanceNoStdPat(const Locale& locale, UErrorCode& status) {
if (U_FAILURE(status)) {
return nullptr;
}
LocalPointer<DateTimePatternGenerator> result(
new DateTimePatternGenerator(locale, status, true), status);
return U_SUCCESS(status) ? result.orphan() : nullptr;
}
DateTimePatternGenerator* U_EXPORT2
DateTimePatternGenerator::createEmptyInstance(UErrorCode& status) {
if (U_FAILURE(status)) {
@ -336,7 +346,7 @@ DateTimePatternGenerator::DateTimePatternGenerator(UErrorCode &status) :
}
}
DateTimePatternGenerator::DateTimePatternGenerator(const Locale& locale, UErrorCode &status) :
DateTimePatternGenerator::DateTimePatternGenerator(const Locale& locale, UErrorCode &status, UBool skipStdPatterns) :
skipMatcher(nullptr),
fAvailableFormatKeyHash(nullptr),
fDefaultHourFormatChar(0),
@ -350,7 +360,7 @@ DateTimePatternGenerator::DateTimePatternGenerator(const Locale& locale, UErrorC
internalErrorCode = status = U_MEMORY_ALLOCATION_ERROR;
}
else {
initData(locale, status);
initData(locale, status, skipStdPatterns);
}
}
@ -489,13 +499,15 @@ enum AllowedHourFormat{
} // namespace
void
DateTimePatternGenerator::initData(const Locale& locale, UErrorCode &status) {
DateTimePatternGenerator::initData(const Locale& locale, UErrorCode &status, UBool skipStdPatterns) {
//const char *baseLangName = locale.getBaseName(); // unused
skipMatcher = nullptr;
fAvailableFormatKeyHash=nullptr;
addCanonicalItems(status);
addICUPatterns(locale, status);
if (!skipStdPatterns) { // skip to prevent circular dependency when called from SimpleDateFormat::construct
addICUPatterns(locale, status);
}
addCLDRData(locale, status);
setDateTimeFromCalendar(locale, status);
setDecimalSymbols(locale, status);

View File

@ -54,6 +54,7 @@
#include "unicode/udisplaycontext.h"
#include "unicode/brkiter.h"
#include "unicode/rbnf.h"
#include "unicode/dtptngen.h"
#include "uresimp.h"
#include "olsontz.h"
#include "patternprops.h"
@ -650,6 +651,12 @@ SimpleDateFormat::operator==(const Format& other) const
}
//----------------------------------------------------------------------
static const UChar* timeSkeletons[4] = {
u"jmmsszzzz", // kFull
u"jmmssz", // kLong
u"jmmss", // kMedium
u"jmm", // kShort
};
void SimpleDateFormat::construct(EStyle timeStyle,
EStyle dateStyle,
@ -714,35 +721,75 @@ void SimpleDateFormat::construct(EStyle timeStyle,
fDateOverride.setToBogus();
fTimeOverride.setToBogus();
UnicodeString timePattern;
if (timeStyle >= kFull && timeStyle <= kShort) {
const char* baseLocID = locale.getBaseName();
if (baseLocID[0]!=0 && uprv_strcmp(baseLocID,"und")!=0) {
UErrorCode useStatus = U_ZERO_ERROR;
Locale baseLoc(baseLocID);
Locale validLoc(getLocale(ULOC_VALID_LOCALE, useStatus));
if (U_SUCCESS(useStatus) && validLoc!=baseLoc) {
bool useDTPG = false;
const char* baseReg = baseLoc.getCountry(); // empty string if no region
if ((baseReg[0]!=0 && uprv_strncmp(baseReg,validLoc.getCountry(),ULOC_COUNTRY_CAPACITY)!=0)
|| uprv_strncmp(baseLoc.getLanguage(),validLoc.getLanguage(),ULOC_LANG_CAPACITY)!=0) {
// use DTPG if
// * baseLoc has a region and validLoc does not have the same one (or has none), OR
// * validLoc has a different language code than baseLoc
useDTPG = true;
}
if (useDTPG) {
// The standard time formats may have the wrong time cycle, because:
// the valid locale differs in important ways (region, language) from
// the base locale.
// We could *also* check whether they do actually have a mismatch with
// the time cycle preferences for the region, but that is a lot more
// work for little or no additional benefit, since just going ahead
// and always synthesizing the time format as per the following should
// create a locale-appropriate pattern with cycle that matches the
// region preferences anyway.
LocalPointer<DateTimePatternGenerator> dtpg(DateTimePatternGenerator::createInstanceNoStdPat(locale, useStatus));
if (U_SUCCESS(useStatus)) {
UnicodeString timeSkeleton(TRUE, timeSkeletons[timeStyle], -1);
timePattern = dtpg->getBestPattern(timeSkeleton, useStatus);
}
}
}
}
}
// if the pattern should include both date and time information, use the date/time
// pattern string as a guide to tell use how to glue together the appropriate date
// and time pattern strings.
if ((timeStyle != kNone) && (dateStyle != kNone))
{
currentBundle.adoptInstead(
ures_getByIndex(dateTimePatterns.getAlias(), (int32_t)timeStyle, NULL, &status));
if (U_FAILURE(status)) {
status = U_INVALID_FORMAT_ERROR;
return;
}
switch (ures_getType(currentBundle.getAlias())) {
case URES_STRING: {
resStr = ures_getString(currentBundle.getAlias(), &resStrLen, &status);
break;
}
case URES_ARRAY: {
resStr = ures_getStringByIndex(currentBundle.getAlias(), 0, &resStrLen, &status);
ovrStr = ures_getStringByIndex(currentBundle.getAlias(), 1, &ovrStrLen, &status);
fTimeOverride.setTo(TRUE, ovrStr, ovrStrLen);
break;
}
default: {
UnicodeString tempus1(timePattern);
if (tempus1.length() == 0) {
currentBundle.adoptInstead(
ures_getByIndex(dateTimePatterns.getAlias(), (int32_t)timeStyle, NULL, &status));
if (U_FAILURE(status)) {
status = U_INVALID_FORMAT_ERROR;
return;
}
}
switch (ures_getType(currentBundle.getAlias())) {
case URES_STRING: {
resStr = ures_getString(currentBundle.getAlias(), &resStrLen, &status);
break;
}
case URES_ARRAY: {
resStr = ures_getStringByIndex(currentBundle.getAlias(), 0, &resStrLen, &status);
ovrStr = ures_getStringByIndex(currentBundle.getAlias(), 1, &ovrStrLen, &status);
fTimeOverride.setTo(TRUE, ovrStr, ovrStrLen);
break;
}
default: {
status = U_INVALID_FORMAT_ERROR;
return;
}
}
UnicodeString tempus1(TRUE, resStr, resStrLen);
tempus1.setTo(TRUE, resStr, resStrLen);
}
currentBundle.adoptInstead(
ures_getByIndex(dateTimePatterns.getAlias(), (int32_t)dateStyle, NULL, &status));
@ -784,29 +831,32 @@ void SimpleDateFormat::construct(EStyle timeStyle,
// pattern string from the resources
// setTo() - see DateFormatSymbols::assignArray comments
else if (timeStyle != kNone) {
currentBundle.adoptInstead(
ures_getByIndex(dateTimePatterns.getAlias(), (int32_t)timeStyle, NULL, &status));
if (U_FAILURE(status)) {
status = U_INVALID_FORMAT_ERROR;
return;
}
switch (ures_getType(currentBundle.getAlias())) {
case URES_STRING: {
resStr = ures_getString(currentBundle.getAlias(), &resStrLen, &status);
break;
}
case URES_ARRAY: {
resStr = ures_getStringByIndex(currentBundle.getAlias(), 0, &resStrLen, &status);
ovrStr = ures_getStringByIndex(currentBundle.getAlias(), 1, &ovrStrLen, &status);
fDateOverride.setTo(TRUE, ovrStr, ovrStrLen);
break;
}
default: {
fPattern.setTo(timePattern);
if (fPattern.length() == 0) {
currentBundle.adoptInstead(
ures_getByIndex(dateTimePatterns.getAlias(), (int32_t)timeStyle, NULL, &status));
if (U_FAILURE(status)) {
status = U_INVALID_FORMAT_ERROR;
return;
}
switch (ures_getType(currentBundle.getAlias())) {
case URES_STRING: {
resStr = ures_getString(currentBundle.getAlias(), &resStrLen, &status);
break;
}
case URES_ARRAY: {
resStr = ures_getStringByIndex(currentBundle.getAlias(), 0, &resStrLen, &status);
ovrStr = ures_getStringByIndex(currentBundle.getAlias(), 1, &ovrStrLen, &status);
fDateOverride.setTo(TRUE, ovrStr, ovrStrLen);
break;
}
default: {
status = U_INVALID_FORMAT_ERROR;
return;
}
}
fPattern.setTo(TRUE, resStr, resStrLen);
}
fPattern.setTo(TRUE, resStr, resStrLen);
}
else if (dateStyle != kNone) {
currentBundle.adoptInstead(

View File

@ -76,6 +76,13 @@ public:
#ifndef U_HIDE_INTERNAL_API
/**
* For ICU use only. Skips loading the standard date/time patterns (which is done via DateFormat).
*
* @internal
*/
static DateTimePatternGenerator* U_EXPORT2 createInstanceNoStdPat(const Locale& uLocale, UErrorCode& status);
/**
* For ICU use only
*
@ -526,7 +533,7 @@ private:
/**
* Constructor.
*/
DateTimePatternGenerator(const Locale& locale, UErrorCode & status);
DateTimePatternGenerator(const Locale& locale, UErrorCode & status, UBool skipStdPatterns = false);
/**
* Copy constructor.
@ -573,7 +580,7 @@ private:
// with #13183, no longer need flags for b, B
};
void initData(const Locale &locale, UErrorCode &status);
void initData(const Locale &locale, UErrorCode &status, UBool skipStdPatterns = false);
void addCanonicalItems(UErrorCode &status);
void addICUPatterns(const Locale& locale, UErrorCode& status);
void hackTimes(const UnicodeString& hackPattern, UErrorCode& status);

View File

@ -44,6 +44,7 @@ void IntlTestDateTimePatternGeneratorAPI::runIndexedTest( int32_t index, UBool e
TESTCASE(8, test20640_HourCyclArsEnNH);
TESTCASE(9, testFallbackWithDefaultRootLocale);
TESTCASE(10, testGetDefaultHourCycle_OnEmptyInstance);
TESTCASE(11, test_jConsistencyOddLocales);
default: name = ""; break;
}
}
@ -1396,7 +1397,9 @@ void IntlTestDateTimePatternGeneratorAPI::testJjMapping() {
shortPattern.extract(0, shortPattern.length(), spBuf, 32);
jPattern.extract(0, jPattern.length(), jpBuf, 32);
const char* dfmtCalType = (dfmt->getCalendar())->getType();
errln("ERROR: locale %s, expected j resolved char %s to occur in short time pattern '%s' for %s (best pattern: '%s')", localeID, jcBuf, spBuf, dfmtCalType, jpBuf);
const char* validLoc = dfmt->getLocaleID(ULOC_VALID_LOCALE, status);
errln("ERROR: locale %s (valid %s), expected j resolved char %s to occur in short time pattern '%s' for %s (best pattern: '%s')",
localeID, validLoc, jcBuf, spBuf, dfmtCalType, jpBuf);
}
break;
}
@ -1420,8 +1423,7 @@ void IntlTestDateTimePatternGeneratorAPI::test20640_HourCyclArsEnNH() {
// formerly New Hebrides, now Vanuatu => VU => h.
{"en_NH", u"h a", u"h:mm a", UDAT_HOUR_CYCLE_12},
// ch_ZH is a typo (should be zh_CN), but we should fail gracefully.
// {"cn_ZH", u"HH", u"H:mm"}, // TODO(ICU-20653): Desired behavior
{"cn_ZH", u"HH", u"h:mm a", UDAT_HOUR_CYCLE_23 }, // Actual behavior
{"cn_ZH", u"HH", u"HH:mm", UDAT_HOUR_CYCLE_23 }, // Desired & now actual behavior (does this fix ICU-20653?)
// a non-BCP47 locale without a country code should not fail
{"ja_TRADITIONAL", u"H時", u"H:mm", UDAT_HOUR_CYCLE_23},
};
@ -1507,4 +1509,52 @@ void IntlTestDateTimePatternGeneratorAPI::testGetDefaultHourCycle_OnEmptyInstanc
}
}
void IntlTestDateTimePatternGeneratorAPI::test_jConsistencyOddLocales() { // ICU-20590
static const char* localeIDs[] = {
"en", "ro", // known languages 12h / 24h
"en-RO", "ro-US", // known languages with known regions, hour conflict language vs region
"en-XZ", "ro-XZ", // known languages 12h / 24h, unknown region
"xz-RO", "xz-US", // unknown language with known regions
"xz", // unknown language
"xz-ZX", // unknown language with unknown country
"ars", "wuu" // aliased locales
};
static const UChar* skeleton = u"jm";
for (const char* localeID: localeIDs) {
UErrorCode status = U_ZERO_ERROR;
Locale locale(localeID);
LocalPointer<DateFormat> dtfShort(DateFormat::createTimeInstance(DateFormat::kShort, locale), status);
if (U_FAILURE(status)) {
errln("DateFormat::createTimeInstance failed for locale %s: %s", localeID, u_errorName(status));
continue;
}
LocalPointer<DateFormat> dtfSkel(DateFormat::createInstanceForSkeleton(skeleton, locale, status));
if (U_FAILURE(status)) {
errln("DateFormat::createInstanceForSkeleton failed for locale %s: %s", localeID, u_errorName(status));
continue;
}
LocalPointer<DateTimePatternGenerator> dtpg(DateTimePatternGenerator::createInstance(locale, status));
if (U_FAILURE(status)) {
errln("DateTimePatternGenerator::createInstance failed for locale %s: %s", localeID, u_errorName(status));
continue;
}
UnicodeString dtfShortPattern, dtfSkelPattern;
dynamic_cast<SimpleDateFormat*>(dtfShort.getAlias())->toPattern(dtfShortPattern);
dynamic_cast<SimpleDateFormat*>(dtfSkel.getAlias())->toPattern(dtfSkelPattern);
UnicodeString dtpgPattern = (dtpg.getAlias())->getBestPattern(skeleton, status);
if (U_FAILURE(status)) {
errln("DateTimePatternGenerator::getBestPattern failed for locale %s: %s", localeID, u_errorName(status));
continue;
}
if (dtfShortPattern != dtfSkelPattern || dtfSkelPattern != dtpgPattern) {
const char* dtfShortValidLoc = dtfShort->getLocaleID(ULOC_VALID_LOCALE, status);
const char* dtfShortActualLoc = dtfShort->getLocaleID(ULOC_ACTUAL_LOCALE, status);
errln(UnicodeString("For locale ") + localeID +
" expected same pattern from DateTimePatGen: " + dtpgPattern +
", DateFmt-forSkel: " + dtfSkelPattern + ", DateFmt-short: " + dtfShortPattern +
"; latter has validLoc " + dtfShortValidLoc + ", actualLoc " + dtfShortActualLoc);
}
}
}
#endif /* #if !UCONFIG_NO_FORMATTING */

View File

@ -36,6 +36,7 @@ private:
void test20640_HourCyclArsEnNH();
void testFallbackWithDefaultRootLocale();
void testGetDefaultHourCycle_OnEmptyInstance();
void test_jConsistencyOddLocales();
};
#endif /* #if !UCONFIG_NO_FORMATTING */

View File

@ -125,7 +125,7 @@ public class DateTimePatternGenerator implements Freezable<DateTimePatternGenera
}
result = new DateTimePatternGenerator();
result.initData(uLocale);
result.initData(uLocale, false);
// freeze and cache
result.freeze();
@ -133,16 +133,39 @@ public class DateTimePatternGenerator implements Freezable<DateTimePatternGenera
return result;
}
private void initData(ULocale uLocale) {
/**
* Construct a non-frozen instance of DateTimePatternGenerator for a
* given locale that skips using the standard date and time patterns.
* Because this is different than the normal instance for the locale,
* it does not set or use the cache.
* @param uLocale The locale to pass.
* @internal
* @deprecated This API is ICU internal only.
*/
@Deprecated
public static DateTimePatternGenerator getInstanceNoStdPat(ULocale uLocale) {
DateTimePatternGenerator result = new DateTimePatternGenerator();
result.initData(uLocale, true);
return result;
}
private void initData(ULocale uLocale, boolean skipStdPatterns) {
// This instance of PatternInfo is required for calling some functions. It is used for
// passing additional information to the caller. We won't use this extra information, but
// we still need to make a temporary instance.
PatternInfo returnInfo = new PatternInfo();
addCanonicalItems();
addICUPatterns(returnInfo, uLocale);
if (!skipStdPatterns) { // skip to prevent circular dependency when used by Calendar
addICUPatterns(returnInfo, uLocale);
}
addCLDRData(returnInfo, uLocale);
setDateTimeFromCalendar(uLocale);
if (!skipStdPatterns) { // also skip to prevent circular dependency from Calendar
setDateTimeFromCalendar(uLocale);
} else {
// instead, since from Calendar we do not care about dateTimePattern, use a fallback
setDateTimeFormat("{1} {0}");
}
setDecimalSymbols(uLocale);
getAllowedHourFormats(uLocale);
fillInMissing();

View File

@ -27,6 +27,7 @@ import com.ibm.icu.impl.SimpleFormatterImpl;
import com.ibm.icu.impl.SoftCache;
import com.ibm.icu.text.DateFormat;
import com.ibm.icu.text.DateFormatSymbols;
import com.ibm.icu.text.DateTimePatternGenerator;
import com.ibm.icu.text.SimpleDateFormat;
import com.ibm.icu.util.ULocale.Category;
@ -3514,6 +3515,13 @@ public abstract class Calendar implements Serializable, Cloneable, Comparable<Ca
"{1} {0}",
"{1} {0}"
};
// final fallback patterns
private static final String[] TIME_SKELETONS = {
"jmmsszzzz", // Full
"jmmssz", // Long
"jmmss", // Medium
"jmm" // Short
};
static private DateFormat formatHelper(Calendar cal, ULocale loc, int dateStyle,
int timeStyle) {
@ -3615,7 +3623,39 @@ public abstract class Calendar implements Serializable, Cloneable, Comparable<Ca
int patternsSize = dtPatternsRb.getSize();
String[] dateTimePatterns = new String[patternsSize];
String[] dateTimePatternsOverrides = new String[patternsSize];
for (int i = 0; i < patternsSize; i++) {
int i = 0; // index for dateTimePatterns, dateTimePatternsOverrides
String baseLocID = locale.getBaseName();
if (baseLocID.length() > 0 && !baseLocID.equals("und")) {
ULocale baseLoc = new ULocale(baseLocID);
// The following is different from ICU4C, where we can get the valid locale
// for the SimpleDateFormat object. Here we do not have a SimpleDateFormat and
// valid locale for the Calendar is a bit meaningless.
ULocale validLoc = ULocale.addLikelySubtags(dtPatternsRb.getULocale());
if (validLoc != baseLoc) {
String baseReg = baseLoc.getCountry();
if ((baseReg.length() > 0 && !baseReg.equals(validLoc.getCountry()))
|| !baseLoc.getLanguage().equals(validLoc.getLanguage())) {
// use DTPG if the standard time formats may have the wrong time cycle,
// because the valid locale differs in important ways (region, language)
// from the base locale.
// We could *also* check whether they do actually have a mismatch with
// the time cycle preferences for the region, but that is a lot more
// work for little or no additional benefit, since just going ahead
// and always synthesizing the time format as per the following should
// create a locale-appropriate pattern with cycle that matches the
// region preferences anyway.
// In this case we get the first 4 entries of dateTimePatterns using
// DateTimePatternGenerator, not resource data.
DateTimePatternGenerator dtpg = DateTimePatternGenerator.getInstanceNoStdPat(locale);
for (; i < 4; i++) {
dateTimePatterns[i] = dtpg.getBestPattern(TIME_SKELETONS[i]);
}
}
}
}
for (; i < patternsSize; i++) { // get all or remaining dateTimePatterns entries
ICUResourceBundle concatenationPatternRb = (ICUResourceBundle) dtPatternsRb.get(i);
switch (concatenationPatternRb.getType()) {
case UResourceBundle.STRING:

View File

@ -1737,8 +1737,7 @@ public class DateTimeGeneratorTest extends TestFmwk {
// en_NH is interesting because NH is a depregated region code.
{"en_NH", "h a", "h:mm a", "HOUR_CYCLE_12"},
// ch_ZH is a typo (should be zh_CN), but we should fail gracefully.
// {"cn_ZH", "HH", "H:mm"}, // TODO(ICU-20653): Desired behavior
{"cn_ZH", "HH", "h:mm a", "HOUR_CYCLE_23"}, // Actual behavior
{"cn_ZH", "HH", "HH:mm", "HOUR_CYCLE_23"}, // Desired & now actual behavior (does this fix ICU-20653?)
// a non-BCP47 locale without a country code should not fail
{"ja_TRADITIONAL", "H時", "H:mm", "HOUR_CYCLE_23"},
};
@ -1759,4 +1758,39 @@ public class DateTimeGeneratorTest extends TestFmwk {
cas[3], dtpg.getDefaultHourCycle().toString());
}
}
@Test
public void test_jConsistencyOddLocales() { // ICU-20590
String[] localeIDs = {
"en", "ro", // known languages 12h / 24h
"en-RO", "ro-US", // known languages with known regions, hour conflict language vs region
"en-XZ", "ro-XZ", // known languages 12h / 24h, unknown region
"xz-RO", "xz-US", // unknown language with known regions
"xz", // unknown language
"xz-ZX", // unknown language with unknown country
"ars", "wuu" // aliased locales
};
final String skeleton = "jm";
for (String localeID : localeIDs) {
ULocale locale = new ULocale(localeID);
DateFormat dtfShort = DateFormat.getTimeInstance(DateFormat.SHORT, locale);
String dtfShortPattern = ((SimpleDateFormat)dtfShort).toPattern();
DateFormat dtfSkel = DateFormat.getInstanceForSkeleton(skeleton, locale);
String dtfSkelPattern = ((SimpleDateFormat)dtfSkel).toPattern();
DateTimePatternGenerator dtpg = DateTimePatternGenerator.getInstance(locale);
String dtpgPattern = dtpg.getBestPattern(skeleton);
if (!dtfShortPattern.equals(dtfSkelPattern) || !dtfSkelPattern.equals(dtpgPattern)) {
String dtfShortValidLoc = dtfShort.getLocale(ULocale.VALID_LOCALE).getName();
String dtfShortActualLoc = dtfShort.getLocale(ULocale.ACTUAL_LOCALE).getName();
errln("For locale " + localeID +
" expected same pattern from DateTimePatGen: " + dtpgPattern +
", DateFmt-forSkel: " + dtfSkelPattern + ", DateFmt-short: " + dtfShortPattern +
"; latter has validLoc " + dtfShortValidLoc + ", actualLoc " + dtfShortActualLoc);
}
}
}
}