ICU-20738 Best-match pattern for 'sS' uses <appendItem> data

This commit is contained in:
Mihai Nita 2020-02-07 15:49:52 -08:00
parent 9eca171a39
commit dd50e38f45
4 changed files with 70 additions and 8 deletions

View File

@ -1517,6 +1517,7 @@ DateTimePatternGenerator::getBestRaw(DateTimeMatcher& source,
UErrorCode &status,
const PtnSkeleton** specifiedSkeletonPtr) {
int32_t bestDistance = 0x7fffffff;
int32_t bestMissingFieldMask = -1;
DistanceInfo tempInfo;
const UnicodeString *bestPattern=nullptr;
const PtnSkeleton* specifiedSkeleton=nullptr;
@ -1530,8 +1531,15 @@ DateTimePatternGenerator::getBestRaw(DateTimeMatcher& source,
continue;
}
int32_t distance=source.getDistance(trial, includeMask, tempInfo);
if (distance<bestDistance) {
// Because we iterate over a map the order is undefined. Can change between implementations,
// versions, and will very likely be different between Java and C/C++.
// So if we have patterns with the same distance we also look at the missingFieldMask,
// and we favour the smallest one. Because the field is a bitmask this technically means we
// favour differences in the "least significant fields". For example we prefer the one with differences
// in seconds field vs one with difference in the hours field.
if (distance<bestDistance || (distance==bestDistance && bestMissingFieldMask<tempInfo.missingFieldMask)) {
bestDistance=distance;
bestMissingFieldMask=tempInfo.missingFieldMask;
bestPattern=patternMap->getPatternFromSkeleton(*trial.getSkeletonPtr(), &specifiedSkeleton);
missingFields->setTo(tempInfo);
if (distance==0) {
@ -2222,8 +2230,16 @@ DateTimeMatcher::set(const UnicodeString& pattern, FormatParser* fp, PtnSkeleton
skeletonResult.type[field] = subField;
}
// #20739, we have a skeleton with milliseconde, but no seconds
if (!skeletonResult.original.isFieldEmpty(UDATPG_FRACTIONAL_SECOND_FIELD)
// #20739, we have a skeleton with minutes and milliseconds, but no seconds
//
// Theoretically we would need to check and fix all fields with "gaps":
// for example year-day (no month), month-hour (no day), and so on, All the possible field combinations.
// Plus some smartness: year + hour => should we add month, or add day-of-year?
// What about month + day-of-week, or month + am/pm indicator.
// I think beyond a certain point we should not try to fix bad developer input and try guessing what they mean.
// Garbage in, garbage out.
if (!skeletonResult.original.isFieldEmpty(UDATPG_MINUTE_FIELD)
&& !skeletonResult.original.isFieldEmpty(UDATPG_FRACTIONAL_SECOND_FIELD)
&& skeletonResult.original.isFieldEmpty(UDATPG_SECOND_FIELD)) {
// Force the use of seconds
for (i = 0; dtTypes[i].patternChar != 0; i++) {

View File

@ -4915,6 +4915,7 @@ void DateFormatTest::TestPatternFromSkeleton() {
{Locale::getGerman(), "jjmm", "HH:mm"},
{Locale::getGerman(), "JJmm", "HH:mm"},
// Ticket #20739
// minutes+milliseconds, seconds missing, should be repaired
{Locale::getEnglish(), "SSSSm", "mm:ss.SSSS"},
{Locale::getEnglish(), "mSSSS", "mm:ss.SSSS"},
{Locale::getEnglish(), "SSSm", "mm:ss.SSS"},
@ -4923,12 +4924,26 @@ void DateFormatTest::TestPatternFromSkeleton() {
{Locale::getEnglish(), "mSS", "mm:ss.SS"},
{Locale::getEnglish(), "Sm", "mm:ss.S"},
{Locale::getEnglish(), "mS", "mm:ss.S"},
// only milliseconds, untouched, no repairs
{Locale::getEnglish(), "S", "S"},
{Locale::getEnglish(), "SS", "SS"},
{Locale::getEnglish(), "SSS", "SSS"},
{Locale::getEnglish(), "SSSS", "SSSS"},
// hour:minute+seconds+milliseconds, correct, no repairs, proper pattern
{Locale::getEnglish(), "jmsSSS", "h:mm:ss.SSS a"},
{Locale::getEnglish(), "jmSSS", "h:mm:ss.SSS a"}
{Locale::getEnglish(), "jmSSS", "h:mm:ss.SSS a"},
// Ticket #20738
// seconds+milliseconds, correct, no repairs, proper pattern
{Locale::getEnglish(), "sS", "s.S"},
{Locale::getEnglish(), "sSS", "s.SS"},
{Locale::getEnglish(), "sSSS", "s.SSS"},
{Locale::getEnglish(), "sSSSS", "s.SSSS"},
{Locale::getEnglish(), "sS", "s.S"},
// minutes+seconds+milliseconds, correct, no repairs, proper pattern
{Locale::getEnglish(), "msS", "mm:ss.S"},
{Locale::getEnglish(), "msSS", "mm:ss.SS"},
{Locale::getEnglish(), "msSSS", "mm:ss.SSS"},
{Locale::getEnglish(), "msSSSS", "mm:ss.SSSS"}
};
for (size_t i = 0; i < UPRV_LENGTHOF(TESTDATA); i++) {

View File

@ -2068,6 +2068,7 @@ public class DateTimePatternGenerator implements Freezable<DateTimePatternGenera
// if (SHOW_DISTANCE) System.out.println("Searching for: " + source.pattern
// + ", mask: " + showMask(includeMask));
int bestDistance = Integer.MAX_VALUE;
int bestMissingFieldMask = Integer.MIN_VALUE;
PatternWithMatcher bestPatternWithMatcher = new PatternWithMatcher("", null);
DistanceInfo tempInfo = new DistanceInfo();
for (DateTimeMatcher trial : skeleton2pattern.keySet()) {
@ -2077,8 +2078,16 @@ public class DateTimePatternGenerator implements Freezable<DateTimePatternGenera
int distance = source.getDistance(trial, includeMask, tempInfo);
// if (SHOW_DISTANCE) System.out.println("\tDistance: " + trial.pattern + ":\t"
// + distance + ",\tmissing fields: " + tempInfo);
if (distance < bestDistance) {
// Because we iterate over a map the order is undefined. Can change between implementations,
// versions, and will very likely be different between Java and C/C++.
// So if we have patterns with the same distance we also look at the missingFieldMask,
// and we favour the smallest one. Because the field is a bitmask this technically means we
// favour differences in the "least significant fields". For example we prefer the one with differences
// in seconds field vs one with difference in the hours field.
if (distance < bestDistance || (distance == bestDistance && bestMissingFieldMask < tempInfo.missingFieldMask)) {
bestDistance = distance;
bestMissingFieldMask = tempInfo.missingFieldMask;
PatternWithSkeletonFlag patternWithSkelFlag = skeleton2pattern.get(trial);
bestPatternWithMatcher.pattern = patternWithSkelFlag.pattern;
// If the best raw match had a specified skeleton then return it too.
@ -2702,8 +2711,15 @@ public class DateTimePatternGenerator implements Freezable<DateTimePatternGenera
type[field] = subField;
}
// #20739, we have a skeleton with milliseconde, but no seconds
if (!original.isFieldEmpty(FRACTIONAL_SECOND) && original.isFieldEmpty(SECOND)) {
// #20739, we have a skeleton with minutes and milliseconds, but no seconds
//
// Theoretically we would need to check and fix all fields with "gaps":
// for example year-day (no month), month-hour (no day), and so on, All the possible field combinations.
// Plus some smartness: year + hour => should we add month, or add day-of-year?
// What about month + day-of-week, or month + am/pm indicator.
// I think beyond a certain point we should not try to fix bad developer input and try guessing what they mean.
// Garbage in, garbage out.
if (!original.isFieldEmpty(MINUTE) && !original.isFieldEmpty(FRACTIONAL_SECOND) && original.isFieldEmpty(SECOND)) {
// Force the use of seconds
for (int i = 0; i < types.length; ++i) {
int[] row = types[i];

View File

@ -5436,6 +5436,8 @@ public class DateFormatTest extends TestFmwk {
@Test
public void test20739_MillisecondsWithoutSeconds() {
String[][] cases = new String[][]{
// Ticket #20739
// minutes+milliseconds, seconds missing, should be repaired
{"SSSSm", "mm:ss.SSSS"},
{"mSSSS", "mm:ss.SSSS"},
{"SSSm", "mm:ss.SSS"},
@ -5444,12 +5446,25 @@ public class DateFormatTest extends TestFmwk {
{"mSS", "mm:ss.SS"},
{"Sm", "mm:ss.S"},
{"mS", "mm:ss.S"},
// only milliseconds, untouched, no repairs
{"S", "S"},
{"SS", "SS"},
{"SSS", "SSS"},
{"SSSS", "SSSS"},
// hour:minute+seconds+milliseconds, correct, no repairs, proper pattern
{"jmsSSS", "h:mm:ss.SSS a"},
{"jmSSS", "h:mm:ss.SSS a"}
{"jmSSS", "h:mm:ss.SSS a"},
// Ticket #20738
// seconds+milliseconds, correct, no repairs, proper pattern
{"sS", "s.S"},
{"sSS", "s.SS"},
{"sSSS", "s.SSS"},
{"sSSSS", "s.SSSS"},
// minutes+seconds+milliseconds, correct, no repairs, proper pattern
{"msS", "mm:ss.S"},
{"msSS", "mm:ss.SS"},
{"msSSS", "mm:ss.SSS"},
{"msSSSS", "mm:ss.SSSS"}
};
ULocale locale = ULocale.ENGLISH;