ICU-8820 Parse string fields in pattern as strings before trying digits (if lenient)

X-SVN-Rev: 30766
This commit is contained in:
Peter Edberg 2011-09-30 20:07:28 +00:00
parent 0e5fb3e2cb
commit 307efef922
2 changed files with 200 additions and 107 deletions

View File

@ -2571,19 +2571,19 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC
// a number value. We handle further, more generic cases below. We need
// to handle some of them here because some fields require extra processing on
// the parsed value.
if (patternCharIndex == UDAT_HOUR_OF_DAY1_FIELD ||
patternCharIndex == UDAT_HOUR_OF_DAY0_FIELD ||
patternCharIndex == UDAT_HOUR1_FIELD ||
patternCharIndex == UDAT_HOUR0_FIELD ||
patternCharIndex == UDAT_DOW_LOCAL_FIELD ||
patternCharIndex == UDAT_STANDALONE_DAY_FIELD ||
patternCharIndex == UDAT_MONTH_FIELD ||
patternCharIndex == UDAT_STANDALONE_MONTH_FIELD ||
patternCharIndex == UDAT_QUARTER_FIELD ||
patternCharIndex == UDAT_STANDALONE_QUARTER_FIELD ||
patternCharIndex == UDAT_YEAR_FIELD ||
patternCharIndex == UDAT_YEAR_WOY_FIELD ||
patternCharIndex == UDAT_FRACTIONAL_SECOND_FIELD)
if (patternCharIndex == UDAT_HOUR_OF_DAY1_FIELD || // k
patternCharIndex == UDAT_HOUR_OF_DAY0_FIELD || // H
patternCharIndex == UDAT_HOUR1_FIELD || // h
patternCharIndex == UDAT_HOUR0_FIELD || // K
(patternCharIndex == UDAT_DOW_LOCAL_FIELD && count <= 2) || // e
(patternCharIndex == UDAT_STANDALONE_DAY_FIELD && count <= 2) || // c
(patternCharIndex == UDAT_MONTH_FIELD && count <= 2) || // M
(patternCharIndex == UDAT_STANDALONE_MONTH_FIELD && count <= 2) || // L
(patternCharIndex == UDAT_QUARTER_FIELD && count <= 2) || // Q
(patternCharIndex == UDAT_STANDALONE_QUARTER_FIELD && count <= 2) || // q
patternCharIndex == UDAT_YEAR_FIELD || // y
patternCharIndex == UDAT_YEAR_WOY_FIELD || // Y
patternCharIndex == UDAT_FRACTIONAL_SECOND_FIELD) // S
{
int32_t parseStart = pos.getIndex();
// It would be good to unify this with the obeyCount logic below,
@ -2657,26 +2657,6 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC
break;
case UDAT_DOW_LOCAL_FIELD:
case UDAT_STANDALONE_DAY_FIELD:
case UDAT_MONTH_FIELD:
case UDAT_STANDALONE_MONTH_FIELD:
case UDAT_QUARTER_FIELD:
case UDAT_STANDALONE_QUARTER_FIELD:
// in strict mode, these can only
// be a number if count <= 2
if (!lenient && gotNumber && count > 2) {
// We have a string pattern in strict mode
// but the input parsed as a number. Ignore
// the fact that the input parsed as a number
// and try to match it as a string. (Some
// locales have numbers for the month names.)
gotNumber = FALSE;
pos.setIndex(start);
}
break;
default:
// we check the rest of the fields below.
break;
@ -2786,13 +2766,17 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC
// Try count == 4 first:
int32_t newStart = 0;
if ((newStart = matchString(text, start, UCAL_MONTH,
if ((newStart = matchString(text, start, UCAL_MONTH, // try MMMM
fSymbols->fMonths, fSymbols->fMonthsCount, cal)) > 0)
return newStart;
else // count == 4 failed, now try count == 3
return matchString(text, start, UCAL_MONTH,
fSymbols->fShortMonths, fSymbols->fShortMonthsCount, cal);
else if ((newStart = matchString(text, start, UCAL_MONTH, // try MMM
fSymbols->fShortMonths, fSymbols->fShortMonthsCount, cal)) > 0)
return newStart;
else if (!lenient) // currently we do not try to parse MMMMM: #8860
return newStart;
// else we allowing parsing as number, below
}
break;
case UDAT_STANDALONE_MONTH_FIELD:
if (gotNumber) // i.e., L or LL.
@ -2811,10 +2795,14 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC
if ((newStart = matchString(text, start, UCAL_MONTH,
fSymbols->fStandaloneMonths, fSymbols->fStandaloneMonthsCount, cal)) > 0)
return newStart;
else // count == 4 failed, now try count == 3
return matchString(text, start, UCAL_MONTH,
fSymbols->fStandaloneShortMonths, fSymbols->fStandaloneShortMonthsCount, cal);
else if ((newStart = matchString(text, start, UCAL_MONTH,
fSymbols->fStandaloneShortMonths, fSymbols->fStandaloneShortMonthsCount, cal)) > 0)
return newStart;
else if (!lenient) // currently we do not try to parse LLLLL: #8860
return newStart;
// else we allowing parsing as number, below
}
break;
case UDAT_HOUR_OF_DAY1_FIELD:
// [We computed 'value' above.]
@ -2868,10 +2856,14 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC
fSymbols->fShortWeekdays, fSymbols->fShortWeekdaysCount, cal)) > 0)
return newStart;
// EEE failed, now try EEEEE
else
return matchString(text, start, UCAL_DAY_OF_WEEK,
fSymbols->fNarrowWeekdays, fSymbols->fNarrowWeekdaysCount, cal);
else if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK,
fSymbols->fNarrowWeekdays, fSymbols->fNarrowWeekdaysCount, cal)) > 0)
return newStart;
else if (!lenient || patternCharIndex == UDAT_DAY_OF_WEEK_FIELD)
return newStart;
// else we allowing parsing as number, below
}
break;
case UDAT_STANDALONE_DAY_FIELD:
{
@ -2887,10 +2879,14 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC
if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK,
fSymbols->fStandaloneWeekdays, fSymbols->fStandaloneWeekdaysCount, cal)) > 0)
return newStart;
else // cccc failed, now try ccc
return matchString(text, start, UCAL_DAY_OF_WEEK,
fSymbols->fStandaloneShortWeekdays, fSymbols->fStandaloneShortWeekdaysCount, cal);
else if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK,
fSymbols->fStandaloneShortWeekdays, fSymbols->fStandaloneShortWeekdaysCount, cal)) > 0)
return newStart;
else if (!lenient)
return newStart;
// else we allowing parsing as number, below
}
break;
case UDAT_AM_PM_FIELD:
return matchString(text, start, UCAL_AM_PM, fSymbols->fAmPms, fSymbols->fAmPmsCount, cal);
@ -2923,10 +2919,14 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC
if ((newStart = matchQuarterString(text, start, UCAL_MONTH,
fSymbols->fQuarters, fSymbols->fQuartersCount, cal)) > 0)
return newStart;
else // count == 4 failed, now try count == 3
return matchQuarterString(text, start, UCAL_MONTH,
fSymbols->fShortQuarters, fSymbols->fShortQuartersCount, cal);
else if ((newStart = matchQuarterString(text, start, UCAL_MONTH,
fSymbols->fShortQuarters, fSymbols->fShortQuartersCount, cal)) > 0)
return newStart;
else if (!lenient)
return newStart;
// else we allowing parsing as number, below
}
break;
case UDAT_STANDALONE_QUARTER_FIELD:
if (gotNumber) // i.e., q or qq.
@ -2945,10 +2945,14 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC
if ((newStart = matchQuarterString(text, start, UCAL_MONTH,
fSymbols->fStandaloneQuarters, fSymbols->fStandaloneQuartersCount, cal)) > 0)
return newStart;
else // count == 4 failed, now try count == 3
return matchQuarterString(text, start, UCAL_MONTH,
fSymbols->fStandaloneShortQuarters, fSymbols->fStandaloneShortQuartersCount, cal);
else if ((newStart = matchQuarterString(text, start, UCAL_MONTH,
fSymbols->fStandaloneShortQuarters, fSymbols->fStandaloneShortQuartersCount, cal)) > 0)
return newStart;
else if (!lenient)
return newStart;
// else we allowing parsing as number, below
}
break;
case UDAT_TIMEZONE_FIELD:
case UDAT_TIMEZONE_RFC_FIELD:
@ -3128,34 +3132,79 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC
default:
// Handle "generic" fields
int32_t parseStart = pos.getIndex();
const UnicodeString* src;
if (obeyCount) {
if ((start+count) > text.length()) {
// this is now handled below, outside the switch block
break;
}
// Handle "generic" fields:
// switch default case now handled here (outside switch block) to allow
// parsing of some string fields as digits for lenient case
int32_t parseStart = pos.getIndex();
const UnicodeString* src;
if (obeyCount) {
if ((start+count) > text.length()) {
return -start;
}
text.extractBetween(0, start + count, temp);
src = &temp;
} else {
src = &text;
}
parseInt(*src, number, pos, allowNegative,currentNumberFormat);
if (pos.getIndex() != parseStart) {
int32_t value = number.getLong();
// Don't need suffix processing here (as in number processing at the beginning of the function);
// the new fields being handled as numeric values (month, weekdays, quarters) should not have suffixes.
if (!lenient) {
// Check the range of the value
int32_t bias = gFieldRangeBias[patternCharIndex];
if (bias >= 0 && (value > cal.getMaximum(field) + bias || value < cal.getMinimum(field) + bias)) {
return -start;
}
text.extractBetween(0, start + count, temp);
src = &temp;
} else {
src = &text;
}
parseInt(*src, number, pos, allowNegative,currentNumberFormat);
if (pos.getIndex() != parseStart) {
int32_t value = number.getLong();
if (!lenient) {
// Check the range of the value
int32_t bias = gFieldRangeBias[patternCharIndex];
if (bias >= 0 && (value > cal.getMaximum(field) + bias || value < cal.getMinimum(field) + bias)) {
return -start;
// For the following, need to repeat some of the "if (gotNumber)" code above:
// UDAT_[STANDALONE_]MONTH_FIELD, UDAT_DOW_LOCAL_FIELD, UDAT_STANDALONE_DAY_FIELD,
// UDAT_[STANDALONE_]QUARTER_FIELD
switch (patternCharIndex) {
case UDAT_MONTH_FIELD:
// See notes under UDAT_MONTH_FIELD case above
if (!strcmp(cal.getType(),"hebrew")) {
HebrewCalendar *hc = (HebrewCalendar*)&cal;
if (cal.isSet(UCAL_YEAR)) {
UErrorCode status = U_ZERO_ERROR;
if (!hc->isLeapYear(hc->get(UCAL_YEAR,status)) && value >= 6) {
cal.set(UCAL_MONTH, value);
} else {
cal.set(UCAL_MONTH, value - 1);
}
} else {
saveHebrewMonth = value;
}
} else {
cal.set(UCAL_MONTH, value - 1);
}
break;
case UDAT_STANDALONE_MONTH_FIELD:
cal.set(UCAL_MONTH, value - 1);
break;
case UDAT_DOW_LOCAL_FIELD:
case UDAT_STANDALONE_DAY_FIELD:
cal.set(UCAL_DOW_LOCAL, value);
break;
case UDAT_QUARTER_FIELD:
case UDAT_STANDALONE_QUARTER_FIELD:
cal.set(UCAL_MONTH, (value - 1) * 3);
break;
default:
cal.set(field, value);
return pos.getIndex();
break;
}
return -start;
return pos.getIndex();
}
return -start;
}
/**

View File

@ -828,10 +828,15 @@ DateFormatTest::TestBadInput135()
if (U_SUCCESS(status))
{
UnicodeString format;
UnicodeString pattern;
SimpleDateFormat* sdtfmt = dynamic_cast<SimpleDateFormat*>(df);
if (sdtfmt != NULL) {
sdtfmt->toPattern(pattern);
}
full->format(when, format);
logln(prefix + "OK: " + format);
if (0!=format.compareBetween(0, expected.length(), expected, 0, expected.length()))
errln((UnicodeString)"FAIL: Expected " + expected + " got " + format);
errln((UnicodeString)"FAIL: Parse \"" + text + "\", pattern \"" + pattern + "\", expected " + expected + " got " + format);
}
//}
//catch(ParseException e) {
@ -958,7 +963,7 @@ DateFormatTest::TestBadInput135a()
if (expected == 0)
errln((UnicodeString)"FAIL: Expected parse failure, got " + result);
else if (!(result == expected))
errln(UnicodeString("FAIL: Expected ") + expected + UnicodeString(", got ") + result);
errln(UnicodeString("FAIL: Parse \"") + s + UnicodeString("\", expected ") + expected + UnicodeString(", got ") + result);
}
}
else if (expected != 0) {
@ -1256,7 +1261,7 @@ void DateFormatTest::TestSpaceParsing() {
// pattern, input, expected parse or NULL if expect parse failure
"MMMM d yy", " 04 05 06", "2006 04 05 00:00:00",
NULL, "04 05 06", "2006 04 05 00:00:00",
"MM d yy", " 04 05 06", "2006 04 05 00:00:00",
NULL, "04 05 06", "2006 04 05 00:00:00",
NULL, "04/05/06", "2006 04 05 00:00:00",
@ -1266,16 +1271,16 @@ void DateFormatTest::TestSpaceParsing() {
NULL, "Apr / 05/ 06", "2006 04 05 00:00:00",
NULL, "Apr-05-06", "2006 04 05 00:00:00",
NULL, "Apr 05, 2006", "2006 04 05 00:00:00",
"MMMM d yy", " Apr 05 06", "2006 04 05 00:00:00",
NULL, "Apr 05 06", "2006 04 05 00:00:00",
NULL, "Apr05 06", "2006 04 05 00:00:00",
"hh:mm:ss a", "12:34:56 PM", "1970 01 01 12:34:56",
NULL, "12:34:56PM", "1970 01 01 12:34:56",
NULL, "Apr05 06", "2006 04 05 00:00:00",
"hh:mm:ss a", "12:34:56 PM", "1970 01 01 12:34:56",
NULL, "12:34:56PM", "1970 01 01 12:34:56",
NULL, "12.34.56PM", "1970 01 01 12:34:56",
NULL, "12-34-56 PM", "1970 01 01 12:34:56",
NULL, "12 : 34 : 56 PM", "1970 01 01 12:34:56",
NULL, "12 : 34 : 56 PM", "1970 01 01 12:34:56",
"MM d yy 'at' hh:mm:ss a", "04/05/06 12:34:56 PM", "2006 04 05 12:34:56",
@ -3482,35 +3487,73 @@ void DateFormatTest::Test6880() {
delete fmt;
}
typedef struct {
const char * localeStr;
UBool lenient;
UBool expectFail;
UnicodeString datePattern;
UnicodeString dateString;
} NumAsStringItem;
void DateFormatTest::TestNumberAsStringParsing()
{
UErrorCode status = U_ZERO_ERROR;
UnicodeString dateString("2009 7 2 08:14:16");
UnicodeString datePattern("y MMMM d HH:mm:ss");
SimpleDateFormat *formatter = new SimpleDateFormat(datePattern, Locale(""), status);
UDate date1 = 0;
if (formatter == NULL || U_FAILURE(status)) {
dataerrln("Unable to create SimpleDateFormat - %s", u_errorName(status));
return;
}
formatter->setLenient(FALSE);
date1 = formatter->parse(dateString, status);
if (U_FAILURE(status)) {
errln("FAIL: Could not parse \"2009 7 2 08:14:16\" with pattern \"y MMMM d HH:mm:ss\"");
} else {
UnicodeString formatted;
formatter->format(date1, formatted);
if (formatted != dateString) {
errln("FAIL: parsed string did not match input.");
const NumAsStringItem items[] = {
// loc lenient fail? datePattern dateString
{ "", FALSE, FALSE, UnicodeString("y MMMM d HH:mm:ss"), UnicodeString("2009 7 14 08:43:57") },
{ "", TRUE, FALSE, UnicodeString("y MMMM d HH:mm:ss"), UnicodeString("2009 7 14 08:43:57") },
{ "en", FALSE, FALSE, UnicodeString("MMM d, y"), UnicodeString("Jul 14, 2009") },
{ "en", TRUE, FALSE, UnicodeString("MMM d, y"), UnicodeString("Jul 14, 2009") },
{ "en", FALSE, TRUE, UnicodeString("MMM d, y"), UnicodeString("7 14, 2009") },
{ "en", TRUE, FALSE, UnicodeString("MMM d, y"), UnicodeString("7 14, 2009") },
{ "ja", FALSE, FALSE, UnicodeString("yyyy/MM/dd"), UnicodeString("2009/07/14") },
{ "ja", TRUE, FALSE, UnicodeString("yyyy/MM/dd"), UnicodeString("2009/07/14") },
//{ "ja", FALSE, FALSE, UnicodeString("yyyy/MMMMM/d"), UnicodeString("2009/7/14") }, // #8860 covers test failure
{ "ja", TRUE, FALSE, UnicodeString("yyyy/MMMMM/d"), UnicodeString("2009/7/14") },
{ "ja", FALSE, FALSE, CharsToUnicodeString("y\\u5E74M\\u6708d\\u65E5"), CharsToUnicodeString("2009\\u5E747\\u670814\\u65E5") },
{ "ja", TRUE, FALSE, CharsToUnicodeString("y\\u5E74M\\u6708d\\u65E5"), CharsToUnicodeString("2009\\u5E747\\u670814\\u65E5") },
{ "ja", FALSE, FALSE, CharsToUnicodeString("y\\u5E74MMMd\\u65E5"), CharsToUnicodeString("2009\\u5E747\\u670814\\u65E5") },
{ "ja", TRUE, FALSE, CharsToUnicodeString("y\\u5E74MMMd\\u65E5"), CharsToUnicodeString("2009\\u5E747\\u670814\\u65E5") }, // #8820 fixes test failure
{ "ko", FALSE, FALSE, UnicodeString("yyyy. M. d."), UnicodeString("2009. 7. 14.") },
{ "ko", TRUE, FALSE, UnicodeString("yyyy. M. d."), UnicodeString("2009. 7. 14.") },
{ "ko", FALSE, FALSE, UnicodeString("yyyy. MMMMM d."), CharsToUnicodeString("2009. 7\\uC6D4 14.") },
{ "ko", TRUE, FALSE, UnicodeString("yyyy. MMMMM d."), CharsToUnicodeString("2009. 7\\uC6D4 14.") }, // #8820 fixes test failure
{ "ko", FALSE, FALSE, CharsToUnicodeString("y\\uB144 M\\uC6D4 d\\uC77C"), CharsToUnicodeString("2009\\uB144 7\\uC6D4 14\\uC77C") },
{ "ko", TRUE, FALSE, CharsToUnicodeString("y\\uB144 M\\uC6D4 d\\uC77C"), CharsToUnicodeString("2009\\uB144 7\\uC6D4 14\\uC77C") },
{ "ko", FALSE, FALSE, CharsToUnicodeString("y\\uB144 MMM d\\uC77C"), CharsToUnicodeString("2009\\uB144 7\\uC6D4 14\\uC77C") },
{ "ko", TRUE, FALSE, CharsToUnicodeString("y\\uB144 MMM d\\uC77C"), CharsToUnicodeString("2009\\uB144 7\\uC6D4 14\\uC77C") }, // #8820 fixes test failure
{ NULL, FALSE, FALSE, UnicodeString(""), UnicodeString("") }
};
const NumAsStringItem * itemPtr;
for (itemPtr = items; itemPtr->localeStr != NULL; itemPtr++ ) {
Locale locale = Locale::createFromName(itemPtr->localeStr);
UErrorCode status = U_ZERO_ERROR;
SimpleDateFormat *formatter = new SimpleDateFormat(itemPtr->datePattern, locale, status);
if (formatter == NULL || U_FAILURE(status)) {
dataerrln("Unable to create SimpleDateFormat - %s", u_errorName(status));
return;
}
formatter->setLenient(itemPtr->lenient);
UDate date1 = formatter->parse(itemPtr->dateString, status);
if (U_FAILURE(status)) {
if (!itemPtr->expectFail) {
errln("FAIL, err when expected success: Locale \"" + UnicodeString(itemPtr->localeStr) + "\", lenient " + itemPtr->lenient +
": using pattern \"" + itemPtr->datePattern + "\", could not parse \"" + itemPtr->dateString + "\"; err: " + u_errorName(status) );
}
} else if (itemPtr->expectFail) {
errln("FAIL, expected err but got none: Locale \"" + UnicodeString(itemPtr->localeStr) + "\", lenient " + itemPtr->lenient +
": using pattern \"" + itemPtr->datePattern + "\", did parse \"" + itemPtr->dateString + "\"." );
} else if (!itemPtr->lenient) {
UnicodeString formatted;
formatter->format(date1, formatted);
if (formatted != itemPtr->dateString) {
errln("FAIL, mismatch formatting parsed date: Locale \"" + UnicodeString(itemPtr->localeStr) + "\", lenient " + itemPtr->lenient +
": using pattern \"" + itemPtr->datePattern + "\", did parse \"" + itemPtr->dateString + "\", formatted result \"" + formatted + "\".");
}
}
delete formatter;
}
delete formatter;
}
void DateFormatTest::TestISOEra() {
@ -3556,7 +3599,7 @@ void DateFormatTest::TestISOEra() {
}
}
delete fmt1;
delete fmt1;
}
void DateFormatTest::TestFormalChineseDate() {
@ -3648,7 +3691,8 @@ void DateFormatTest::TestParsePosition() {
input += TestData[i][3];
ParsePosition pos(startPos);
UDate d = sdf->parse(input, pos);
//UDate d = sdf->parse(input, pos);
(void)sdf->parse(input, pos);
if (pos.getIndex() != resPos) {
errln(UnicodeString("FAIL: Parsing [") + input + "] with pattern [" + TestData[i][0] + "] returns position - "