ICU-8820 Parse string fields in pattern as strings before trying digits (if lenient)

X-SVN-Rev: 30766
This commit is contained in:
Peter Edberg 2011-09-30 20:07:28 +00:00
parent 0e5fb3e2cb
commit 307efef922
2 changed files with 200 additions and 107 deletions

View File

@ -2571,19 +2571,19 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC
// a number value. We handle further, more generic cases below. We need // a number value. We handle further, more generic cases below. We need
// to handle some of them here because some fields require extra processing on // to handle some of them here because some fields require extra processing on
// the parsed value. // the parsed value.
if (patternCharIndex == UDAT_HOUR_OF_DAY1_FIELD || if (patternCharIndex == UDAT_HOUR_OF_DAY1_FIELD || // k
patternCharIndex == UDAT_HOUR_OF_DAY0_FIELD || patternCharIndex == UDAT_HOUR_OF_DAY0_FIELD || // H
patternCharIndex == UDAT_HOUR1_FIELD || patternCharIndex == UDAT_HOUR1_FIELD || // h
patternCharIndex == UDAT_HOUR0_FIELD || patternCharIndex == UDAT_HOUR0_FIELD || // K
patternCharIndex == UDAT_DOW_LOCAL_FIELD || (patternCharIndex == UDAT_DOW_LOCAL_FIELD && count <= 2) || // e
patternCharIndex == UDAT_STANDALONE_DAY_FIELD || (patternCharIndex == UDAT_STANDALONE_DAY_FIELD && count <= 2) || // c
patternCharIndex == UDAT_MONTH_FIELD || (patternCharIndex == UDAT_MONTH_FIELD && count <= 2) || // M
patternCharIndex == UDAT_STANDALONE_MONTH_FIELD || (patternCharIndex == UDAT_STANDALONE_MONTH_FIELD && count <= 2) || // L
patternCharIndex == UDAT_QUARTER_FIELD || (patternCharIndex == UDAT_QUARTER_FIELD && count <= 2) || // Q
patternCharIndex == UDAT_STANDALONE_QUARTER_FIELD || (patternCharIndex == UDAT_STANDALONE_QUARTER_FIELD && count <= 2) || // q
patternCharIndex == UDAT_YEAR_FIELD || patternCharIndex == UDAT_YEAR_FIELD || // y
patternCharIndex == UDAT_YEAR_WOY_FIELD || patternCharIndex == UDAT_YEAR_WOY_FIELD || // Y
patternCharIndex == UDAT_FRACTIONAL_SECOND_FIELD) patternCharIndex == UDAT_FRACTIONAL_SECOND_FIELD) // S
{ {
int32_t parseStart = pos.getIndex(); int32_t parseStart = pos.getIndex();
// It would be good to unify this with the obeyCount logic below, // It would be good to unify this with the obeyCount logic below,
@ -2657,26 +2657,6 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC
break; break;
case UDAT_DOW_LOCAL_FIELD:
case UDAT_STANDALONE_DAY_FIELD:
case UDAT_MONTH_FIELD:
case UDAT_STANDALONE_MONTH_FIELD:
case UDAT_QUARTER_FIELD:
case UDAT_STANDALONE_QUARTER_FIELD:
// in strict mode, these can only
// be a number if count <= 2
if (!lenient && gotNumber && count > 2) {
// We have a string pattern in strict mode
// but the input parsed as a number. Ignore
// the fact that the input parsed as a number
// and try to match it as a string. (Some
// locales have numbers for the month names.)
gotNumber = FALSE;
pos.setIndex(start);
}
break;
default: default:
// we check the rest of the fields below. // we check the rest of the fields below.
break; break;
@ -2786,13 +2766,17 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC
// Try count == 4 first: // Try count == 4 first:
int32_t newStart = 0; int32_t newStart = 0;
if ((newStart = matchString(text, start, UCAL_MONTH, if ((newStart = matchString(text, start, UCAL_MONTH, // try MMMM
fSymbols->fMonths, fSymbols->fMonthsCount, cal)) > 0) fSymbols->fMonths, fSymbols->fMonthsCount, cal)) > 0)
return newStart; return newStart;
else // count == 4 failed, now try count == 3 else if ((newStart = matchString(text, start, UCAL_MONTH, // try MMM
return matchString(text, start, UCAL_MONTH, fSymbols->fShortMonths, fSymbols->fShortMonthsCount, cal)) > 0)
fSymbols->fShortMonths, fSymbols->fShortMonthsCount, cal); return newStart;
else if (!lenient) // currently we do not try to parse MMMMM: #8860
return newStart;
// else we allowing parsing as number, below
} }
break;
case UDAT_STANDALONE_MONTH_FIELD: case UDAT_STANDALONE_MONTH_FIELD:
if (gotNumber) // i.e., L or LL. if (gotNumber) // i.e., L or LL.
@ -2811,10 +2795,14 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC
if ((newStart = matchString(text, start, UCAL_MONTH, if ((newStart = matchString(text, start, UCAL_MONTH,
fSymbols->fStandaloneMonths, fSymbols->fStandaloneMonthsCount, cal)) > 0) fSymbols->fStandaloneMonths, fSymbols->fStandaloneMonthsCount, cal)) > 0)
return newStart; return newStart;
else // count == 4 failed, now try count == 3 else if ((newStart = matchString(text, start, UCAL_MONTH,
return matchString(text, start, UCAL_MONTH, fSymbols->fStandaloneShortMonths, fSymbols->fStandaloneShortMonthsCount, cal)) > 0)
fSymbols->fStandaloneShortMonths, fSymbols->fStandaloneShortMonthsCount, cal); return newStart;
else if (!lenient) // currently we do not try to parse LLLLL: #8860
return newStart;
// else we allowing parsing as number, below
} }
break;
case UDAT_HOUR_OF_DAY1_FIELD: case UDAT_HOUR_OF_DAY1_FIELD:
// [We computed 'value' above.] // [We computed 'value' above.]
@ -2868,10 +2856,14 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC
fSymbols->fShortWeekdays, fSymbols->fShortWeekdaysCount, cal)) > 0) fSymbols->fShortWeekdays, fSymbols->fShortWeekdaysCount, cal)) > 0)
return newStart; return newStart;
// EEE failed, now try EEEEE // EEE failed, now try EEEEE
else else if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK,
return matchString(text, start, UCAL_DAY_OF_WEEK, fSymbols->fNarrowWeekdays, fSymbols->fNarrowWeekdaysCount, cal)) > 0)
fSymbols->fNarrowWeekdays, fSymbols->fNarrowWeekdaysCount, cal); return newStart;
else if (!lenient || patternCharIndex == UDAT_DAY_OF_WEEK_FIELD)
return newStart;
// else we allowing parsing as number, below
} }
break;
case UDAT_STANDALONE_DAY_FIELD: case UDAT_STANDALONE_DAY_FIELD:
{ {
@ -2887,10 +2879,14 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC
if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK, if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK,
fSymbols->fStandaloneWeekdays, fSymbols->fStandaloneWeekdaysCount, cal)) > 0) fSymbols->fStandaloneWeekdays, fSymbols->fStandaloneWeekdaysCount, cal)) > 0)
return newStart; return newStart;
else // cccc failed, now try ccc else if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK,
return matchString(text, start, UCAL_DAY_OF_WEEK, fSymbols->fStandaloneShortWeekdays, fSymbols->fStandaloneShortWeekdaysCount, cal)) > 0)
fSymbols->fStandaloneShortWeekdays, fSymbols->fStandaloneShortWeekdaysCount, cal); return newStart;
else if (!lenient)
return newStart;
// else we allowing parsing as number, below
} }
break;
case UDAT_AM_PM_FIELD: case UDAT_AM_PM_FIELD:
return matchString(text, start, UCAL_AM_PM, fSymbols->fAmPms, fSymbols->fAmPmsCount, cal); return matchString(text, start, UCAL_AM_PM, fSymbols->fAmPms, fSymbols->fAmPmsCount, cal);
@ -2923,10 +2919,14 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC
if ((newStart = matchQuarterString(text, start, UCAL_MONTH, if ((newStart = matchQuarterString(text, start, UCAL_MONTH,
fSymbols->fQuarters, fSymbols->fQuartersCount, cal)) > 0) fSymbols->fQuarters, fSymbols->fQuartersCount, cal)) > 0)
return newStart; return newStart;
else // count == 4 failed, now try count == 3 else if ((newStart = matchQuarterString(text, start, UCAL_MONTH,
return matchQuarterString(text, start, UCAL_MONTH, fSymbols->fShortQuarters, fSymbols->fShortQuartersCount, cal)) > 0)
fSymbols->fShortQuarters, fSymbols->fShortQuartersCount, cal); return newStart;
else if (!lenient)
return newStart;
// else we allowing parsing as number, below
} }
break;
case UDAT_STANDALONE_QUARTER_FIELD: case UDAT_STANDALONE_QUARTER_FIELD:
if (gotNumber) // i.e., q or qq. if (gotNumber) // i.e., q or qq.
@ -2945,10 +2945,14 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC
if ((newStart = matchQuarterString(text, start, UCAL_MONTH, if ((newStart = matchQuarterString(text, start, UCAL_MONTH,
fSymbols->fStandaloneQuarters, fSymbols->fStandaloneQuartersCount, cal)) > 0) fSymbols->fStandaloneQuarters, fSymbols->fStandaloneQuartersCount, cal)) > 0)
return newStart; return newStart;
else // count == 4 failed, now try count == 3 else if ((newStart = matchQuarterString(text, start, UCAL_MONTH,
return matchQuarterString(text, start, UCAL_MONTH, fSymbols->fStandaloneShortQuarters, fSymbols->fStandaloneShortQuartersCount, cal)) > 0)
fSymbols->fStandaloneShortQuarters, fSymbols->fStandaloneShortQuartersCount, cal); return newStart;
else if (!lenient)
return newStart;
// else we allowing parsing as number, below
} }
break;
case UDAT_TIMEZONE_FIELD: case UDAT_TIMEZONE_FIELD:
case UDAT_TIMEZONE_RFC_FIELD: case UDAT_TIMEZONE_RFC_FIELD:
@ -3128,34 +3132,79 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC
default: default:
// Handle "generic" fields // Handle "generic" fields
int32_t parseStart = pos.getIndex(); // this is now handled below, outside the switch block
const UnicodeString* src; break;
if (obeyCount) { }
if ((start+count) > text.length()) { // Handle "generic" fields:
// switch default case now handled here (outside switch block) to allow
// parsing of some string fields as digits for lenient case
int32_t parseStart = pos.getIndex();
const UnicodeString* src;
if (obeyCount) {
if ((start+count) > text.length()) {
return -start;
}
text.extractBetween(0, start + count, temp);
src = &temp;
} else {
src = &text;
}
parseInt(*src, number, pos, allowNegative,currentNumberFormat);
if (pos.getIndex() != parseStart) {
int32_t value = number.getLong();
// Don't need suffix processing here (as in number processing at the beginning of the function);
// the new fields being handled as numeric values (month, weekdays, quarters) should not have suffixes.
if (!lenient) {
// Check the range of the value
int32_t bias = gFieldRangeBias[patternCharIndex];
if (bias >= 0 && (value > cal.getMaximum(field) + bias || value < cal.getMinimum(field) + bias)) {
return -start; return -start;
} }
text.extractBetween(0, start + count, temp);
src = &temp;
} else {
src = &text;
} }
parseInt(*src, number, pos, allowNegative,currentNumberFormat);
if (pos.getIndex() != parseStart) {
int32_t value = number.getLong();
if (!lenient) { // For the following, need to repeat some of the "if (gotNumber)" code above:
// Check the range of the value // UDAT_[STANDALONE_]MONTH_FIELD, UDAT_DOW_LOCAL_FIELD, UDAT_STANDALONE_DAY_FIELD,
int32_t bias = gFieldRangeBias[patternCharIndex]; // UDAT_[STANDALONE_]QUARTER_FIELD
if (bias >= 0 && (value > cal.getMaximum(field) + bias || value < cal.getMinimum(field) + bias)) { switch (patternCharIndex) {
return -start; case UDAT_MONTH_FIELD:
// See notes under UDAT_MONTH_FIELD case above
if (!strcmp(cal.getType(),"hebrew")) {
HebrewCalendar *hc = (HebrewCalendar*)&cal;
if (cal.isSet(UCAL_YEAR)) {
UErrorCode status = U_ZERO_ERROR;
if (!hc->isLeapYear(hc->get(UCAL_YEAR,status)) && value >= 6) {
cal.set(UCAL_MONTH, value);
} else {
cal.set(UCAL_MONTH, value - 1);
}
} else {
saveHebrewMonth = value;
} }
} else {
cal.set(UCAL_MONTH, value - 1);
} }
break;
case UDAT_STANDALONE_MONTH_FIELD:
cal.set(UCAL_MONTH, value - 1);
break;
case UDAT_DOW_LOCAL_FIELD:
case UDAT_STANDALONE_DAY_FIELD:
cal.set(UCAL_DOW_LOCAL, value);
break;
case UDAT_QUARTER_FIELD:
case UDAT_STANDALONE_QUARTER_FIELD:
cal.set(UCAL_MONTH, (value - 1) * 3);
break;
default:
cal.set(field, value); cal.set(field, value);
return pos.getIndex(); break;
} }
return -start; return pos.getIndex();
} }
return -start;
} }
/** /**

View File

@ -828,10 +828,15 @@ DateFormatTest::TestBadInput135()
if (U_SUCCESS(status)) if (U_SUCCESS(status))
{ {
UnicodeString format; UnicodeString format;
UnicodeString pattern;
SimpleDateFormat* sdtfmt = dynamic_cast<SimpleDateFormat*>(df);
if (sdtfmt != NULL) {
sdtfmt->toPattern(pattern);
}
full->format(when, format); full->format(when, format);
logln(prefix + "OK: " + format); logln(prefix + "OK: " + format);
if (0!=format.compareBetween(0, expected.length(), expected, 0, expected.length())) if (0!=format.compareBetween(0, expected.length(), expected, 0, expected.length()))
errln((UnicodeString)"FAIL: Expected " + expected + " got " + format); errln((UnicodeString)"FAIL: Parse \"" + text + "\", pattern \"" + pattern + "\", expected " + expected + " got " + format);
} }
//} //}
//catch(ParseException e) { //catch(ParseException e) {
@ -958,7 +963,7 @@ DateFormatTest::TestBadInput135a()
if (expected == 0) if (expected == 0)
errln((UnicodeString)"FAIL: Expected parse failure, got " + result); errln((UnicodeString)"FAIL: Expected parse failure, got " + result);
else if (!(result == expected)) else if (!(result == expected))
errln(UnicodeString("FAIL: Expected ") + expected + UnicodeString(", got ") + result); errln(UnicodeString("FAIL: Parse \"") + s + UnicodeString("\", expected ") + expected + UnicodeString(", got ") + result);
} }
} }
else if (expected != 0) { else if (expected != 0) {
@ -1269,13 +1274,13 @@ void DateFormatTest::TestSpaceParsing() {
"MMMM d yy", " Apr 05 06", "2006 04 05 00:00:00", "MMMM d yy", " Apr 05 06", "2006 04 05 00:00:00",
NULL, "Apr 05 06", "2006 04 05 00:00:00", NULL, "Apr 05 06", "2006 04 05 00:00:00",
NULL, "Apr05 06", "2006 04 05 00:00:00", NULL, "Apr05 06", "2006 04 05 00:00:00",
"hh:mm:ss a", "12:34:56 PM", "1970 01 01 12:34:56", "hh:mm:ss a", "12:34:56 PM", "1970 01 01 12:34:56",
NULL, "12:34:56PM", "1970 01 01 12:34:56", NULL, "12:34:56PM", "1970 01 01 12:34:56",
NULL, "12.34.56PM", "1970 01 01 12:34:56", NULL, "12.34.56PM", "1970 01 01 12:34:56",
NULL, "12-34-56 PM", "1970 01 01 12:34:56", NULL, "12-34-56 PM", "1970 01 01 12:34:56",
NULL, "12 : 34 : 56 PM", "1970 01 01 12:34:56", NULL, "12 : 34 : 56 PM", "1970 01 01 12:34:56",
"MM d yy 'at' hh:mm:ss a", "04/05/06 12:34:56 PM", "2006 04 05 12:34:56", "MM d yy 'at' hh:mm:ss a", "04/05/06 12:34:56 PM", "2006 04 05 12:34:56",
@ -3482,35 +3487,73 @@ void DateFormatTest::Test6880() {
delete fmt; delete fmt;
} }
typedef struct {
const char * localeStr;
UBool lenient;
UBool expectFail;
UnicodeString datePattern;
UnicodeString dateString;
} NumAsStringItem;
void DateFormatTest::TestNumberAsStringParsing() void DateFormatTest::TestNumberAsStringParsing()
{ {
UErrorCode status = U_ZERO_ERROR; const NumAsStringItem items[] = {
UnicodeString dateString("2009 7 2 08:14:16"); // loc lenient fail? datePattern dateString
UnicodeString datePattern("y MMMM d HH:mm:ss"); { "", FALSE, FALSE, UnicodeString("y MMMM d HH:mm:ss"), UnicodeString("2009 7 14 08:43:57") },
SimpleDateFormat *formatter = new SimpleDateFormat(datePattern, Locale(""), status); { "", TRUE, FALSE, UnicodeString("y MMMM d HH:mm:ss"), UnicodeString("2009 7 14 08:43:57") },
UDate date1 = 0; { "en", FALSE, FALSE, UnicodeString("MMM d, y"), UnicodeString("Jul 14, 2009") },
{ "en", TRUE, FALSE, UnicodeString("MMM d, y"), UnicodeString("Jul 14, 2009") },
if (formatter == NULL || U_FAILURE(status)) { { "en", FALSE, TRUE, UnicodeString("MMM d, y"), UnicodeString("7 14, 2009") },
dataerrln("Unable to create SimpleDateFormat - %s", u_errorName(status)); { "en", TRUE, FALSE, UnicodeString("MMM d, y"), UnicodeString("7 14, 2009") },
return; { "ja", FALSE, FALSE, UnicodeString("yyyy/MM/dd"), UnicodeString("2009/07/14") },
} { "ja", TRUE, FALSE, UnicodeString("yyyy/MM/dd"), UnicodeString("2009/07/14") },
//{ "ja", FALSE, FALSE, UnicodeString("yyyy/MMMMM/d"), UnicodeString("2009/7/14") }, // #8860 covers test failure
formatter->setLenient(FALSE); { "ja", TRUE, FALSE, UnicodeString("yyyy/MMMMM/d"), UnicodeString("2009/7/14") },
date1 = formatter->parse(dateString, status); { "ja", FALSE, FALSE, CharsToUnicodeString("y\\u5E74M\\u6708d\\u65E5"), CharsToUnicodeString("2009\\u5E747\\u670814\\u65E5") },
{ "ja", TRUE, FALSE, CharsToUnicodeString("y\\u5E74M\\u6708d\\u65E5"), CharsToUnicodeString("2009\\u5E747\\u670814\\u65E5") },
if (U_FAILURE(status)) { { "ja", FALSE, FALSE, CharsToUnicodeString("y\\u5E74MMMd\\u65E5"), CharsToUnicodeString("2009\\u5E747\\u670814\\u65E5") },
errln("FAIL: Could not parse \"2009 7 2 08:14:16\" with pattern \"y MMMM d HH:mm:ss\""); { "ja", TRUE, FALSE, CharsToUnicodeString("y\\u5E74MMMd\\u65E5"), CharsToUnicodeString("2009\\u5E747\\u670814\\u65E5") }, // #8820 fixes test failure
} else { { "ko", FALSE, FALSE, UnicodeString("yyyy. M. d."), UnicodeString("2009. 7. 14.") },
UnicodeString formatted; { "ko", TRUE, FALSE, UnicodeString("yyyy. M. d."), UnicodeString("2009. 7. 14.") },
{ "ko", FALSE, FALSE, UnicodeString("yyyy. MMMMM d."), CharsToUnicodeString("2009. 7\\uC6D4 14.") },
formatter->format(date1, formatted); { "ko", TRUE, FALSE, UnicodeString("yyyy. MMMMM d."), CharsToUnicodeString("2009. 7\\uC6D4 14.") }, // #8820 fixes test failure
{ "ko", FALSE, FALSE, CharsToUnicodeString("y\\uB144 M\\uC6D4 d\\uC77C"), CharsToUnicodeString("2009\\uB144 7\\uC6D4 14\\uC77C") },
if (formatted != dateString) { { "ko", TRUE, FALSE, CharsToUnicodeString("y\\uB144 M\\uC6D4 d\\uC77C"), CharsToUnicodeString("2009\\uB144 7\\uC6D4 14\\uC77C") },
errln("FAIL: parsed string did not match input."); { "ko", FALSE, FALSE, CharsToUnicodeString("y\\uB144 MMM d\\uC77C"), CharsToUnicodeString("2009\\uB144 7\\uC6D4 14\\uC77C") },
{ "ko", TRUE, FALSE, CharsToUnicodeString("y\\uB144 MMM d\\uC77C"), CharsToUnicodeString("2009\\uB144 7\\uC6D4 14\\uC77C") }, // #8820 fixes test failure
{ NULL, FALSE, FALSE, UnicodeString(""), UnicodeString("") }
};
const NumAsStringItem * itemPtr;
for (itemPtr = items; itemPtr->localeStr != NULL; itemPtr++ ) {
Locale locale = Locale::createFromName(itemPtr->localeStr);
UErrorCode status = U_ZERO_ERROR;
SimpleDateFormat *formatter = new SimpleDateFormat(itemPtr->datePattern, locale, status);
if (formatter == NULL || U_FAILURE(status)) {
dataerrln("Unable to create SimpleDateFormat - %s", u_errorName(status));
return;
} }
}
delete formatter; formatter->setLenient(itemPtr->lenient);
UDate date1 = formatter->parse(itemPtr->dateString, status);
if (U_FAILURE(status)) {
if (!itemPtr->expectFail) {
errln("FAIL, err when expected success: Locale \"" + UnicodeString(itemPtr->localeStr) + "\", lenient " + itemPtr->lenient +
": using pattern \"" + itemPtr->datePattern + "\", could not parse \"" + itemPtr->dateString + "\"; err: " + u_errorName(status) );
}
} else if (itemPtr->expectFail) {
errln("FAIL, expected err but got none: Locale \"" + UnicodeString(itemPtr->localeStr) + "\", lenient " + itemPtr->lenient +
": using pattern \"" + itemPtr->datePattern + "\", did parse \"" + itemPtr->dateString + "\"." );
} else if (!itemPtr->lenient) {
UnicodeString formatted;
formatter->format(date1, formatted);
if (formatted != itemPtr->dateString) {
errln("FAIL, mismatch formatting parsed date: Locale \"" + UnicodeString(itemPtr->localeStr) + "\", lenient " + itemPtr->lenient +
": using pattern \"" + itemPtr->datePattern + "\", did parse \"" + itemPtr->dateString + "\", formatted result \"" + formatted + "\".");
}
}
delete formatter;
}
} }
void DateFormatTest::TestISOEra() { void DateFormatTest::TestISOEra() {
@ -3648,7 +3691,8 @@ void DateFormatTest::TestParsePosition() {
input += TestData[i][3]; input += TestData[i][3];
ParsePosition pos(startPos); ParsePosition pos(startPos);
UDate d = sdf->parse(input, pos); //UDate d = sdf->parse(input, pos);
(void)sdf->parse(input, pos);
if (pos.getIndex() != resPos) { if (pos.getIndex() != resPos) {
errln(UnicodeString("FAIL: Parsing [") + input + "] with pattern [" + TestData[i][0] + "] returns position - " errln(UnicodeString("FAIL: Parsing [") + input + "] with pattern [" + TestData[i][0] + "] returns position - "