ICU-10261 give DateFormat more granular leniency control

X-SVN-Rev: 34326
This commit is contained in:
Scott Russell 2013-09-16 12:20:50 +00:00
parent 36e54c57f8
commit bfbffd7805
3 changed files with 132 additions and 9 deletions

View File

@ -11,6 +11,7 @@ import java.text.Format;
import java.text.ParseException;
import java.text.ParsePosition;
import java.util.Date;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
@ -444,6 +445,28 @@ public abstract class DateFormat extends UFormat {
public final static int FIELD_COUNT = 34; // must == DateFormatSymbols.patternChars.length()
/**
* boolean attributes
* <br/>
* PARSE_ALLOW_WHITESPACE - indicates whitespace tolerance. Also included is trailing dot tolerance.
* <br/>
* PARSE_ALLOW_NUMERIC - indicates tolerance of numeric data when String data may be assumed. eg: YEAR_NAME_FIELD
*
* @internal ICU 5.2 technology preview
*/
public enum BooleanAttribute {
/** indicates whitespace tolerance. Also included is trailing dot tolerance. */
PARSE_ALLOW_WHITESPACE,
/** indicates tolerance of numeric data when String data may be assumed. eg: YEAR_NAME_FIELD */
PARSE_ALLOW_NUMERIC
};
/**
* boolean attributes for this instance. Inclusion in this is indicates a true condition.
*/
private EnumSet<BooleanAttribute> booleanAttributes = EnumSet.allOf(BooleanAttribute.class);
// Proclaim serial compatibility with 1.1 FCS
private static final long serialVersionUID = 7218322306649953788L;
@ -1434,8 +1457,12 @@ public abstract class DateFormat extends UFormat {
* lenient parsing, the parser may use heuristics to interpret inputs that
* do not precisely match this object's format. With strict parsing,
* inputs must match this object's format.
* <br/><br/>
* <b>Note:</b> This method is specific to the encapsulated Calendar object. DateFormat
* leniency aspects are controlled by setBooleanAttribute.
* @param lenient when true, parsing is lenient
* @see com.ibm.icu.util.Calendar#setLenient
* @see #setBooleanAttribute(BooleanAttribute, boolean)
* @stable ICU 2.0
*/
public void setLenient(boolean lenient)
@ -1444,7 +1471,7 @@ public abstract class DateFormat extends UFormat {
}
/**
* Returns whether date/time parsing is lenient.
* Returns whether date/time parsing in the encapsulated Calendar object is lenient.
* @stable ICU 2.0
*/
public boolean isLenient()
@ -1452,6 +1479,41 @@ public abstract class DateFormat extends UFormat {
return calendar.isLenient();
}
/**
* set a boolean attribute for this instance. Aspects of DateFormat leniency are controlled by
* boolean attributes.
*
* @see BooleanAttribute
* @internal ICU 5.2 technology preview
*/
public DateFormat setBooleanAttribute(BooleanAttribute key, boolean value)
{
if(booleanAttributes.contains(key) && value == false)
booleanAttributes.remove(key);
if(value == true && !booleanAttributes.contains(key))
booleanAttributes.add(key);
return this;
}
/**
* get the current value for the specified BooleanAttribute for this instance
*
* if attribute is missing false is returned.
*
* @see BooleanAttribute
* @internal ICU 5.2 technology preview
*/
public boolean getBooleanAttribute(BooleanAttribute key)
{
if(booleanAttributes.contains(key))
return true;
else
return false;
}
/**
* Overrides hashCode.
* @stable ICU 2.0

View File

@ -2211,7 +2211,7 @@ public class SimpleDateFormat extends DateFormat {
// Special hack for trailing "." after non-numeric field.
if (pos < text.length()) {
char extra = text.charAt(pos);
if (extra == '.' && isLenient() && items.length != 0) {
if (extra == '.' && getBooleanAttribute(DateFormat.BooleanAttribute.PARSE_ALLOW_WHITESPACE) && items.length != 0) {
// only do if the last field is not numeric
Object lastItem = items[items.length - 1];
if (lastItem instanceof PatternItem && !((PatternItem)lastItem).isNumeric) {
@ -2426,7 +2426,7 @@ public class SimpleDateFormat extends DateFormat {
++pos;
}
} else if (pch != ich) {
if (ich == '.' && pos == originalPos && 0 < itemIndex && isLenient()) {
if (ich == '.' && pos == originalPos && 0 < itemIndex && getBooleanAttribute(DateFormat.BooleanAttribute.PARSE_ALLOW_WHITESPACE)) {
Object before = items[itemIndex-1];
if (before instanceof PatternItem) {
boolean isNumeric = ((PatternItem) before).isNumeric;
@ -2435,6 +2435,9 @@ public class SimpleDateFormat extends DateFormat {
continue;
}
}
} else if ((pch == ' ' || pch == '.') && getBooleanAttribute(DateFormat.BooleanAttribute.PARSE_ALLOW_WHITESPACE)) {
++idx;
continue;
}
break;
}
@ -2442,7 +2445,7 @@ public class SimpleDateFormat extends DateFormat {
++pos;
}
complete[0] = idx == plen;
if (complete[0] == false && isLenient() && 0 < itemIndex && itemIndex < items.length - 1) {
if (complete[0] == false && getBooleanAttribute(DateFormat.BooleanAttribute.PARSE_ALLOW_WHITESPACE) && 0 < itemIndex && itemIndex < items.length - 1) {
// If fully lenient, accept " "* for any text between a date and a time field
// We don't go more lenient, because we don't want to accept "12/31" for "12:31".
// People may be trying to parse for a date, then for a time.
@ -2687,7 +2690,6 @@ public class SimpleDateFormat extends DateFormat {
int value = 0;
int i;
ParsePosition pos = new ParsePosition(0);
boolean lenient = isLenient();
//int patternCharIndex = DateFormatSymbols.patternChars.indexOf(ch);c
int patternCharIndex = -1;
@ -2843,7 +2845,7 @@ public class SimpleDateFormat extends DateFormat {
return newStart;
}
}
if ( number != null && (lenient || formatData.shortYearNames == null || value > formatData.shortYearNames.length) ) {
if ( number != null && (getBooleanAttribute(DateFormat.BooleanAttribute.PARSE_ALLOW_NUMERIC) || formatData.shortYearNames == null || value > formatData.shortYearNames.length) ) {
cal.set(Calendar.YEAR, value);
return pos.getIndex();
}

View File

@ -4274,7 +4274,66 @@ public class DateFormatTest extends com.ibm.icu.dev.test.TestFmwk {
return ok;
}
public void TestDateFormatLeniency() {
// For details see http://bugs.icu-project.org/trac/ticket/10261
class TestDateFormatLeniencyItem {
public boolean leniency;
public String parseString;
public String pattern;
public String expectedResult; // null indicates expected error
// Simple constructor
public TestDateFormatLeniencyItem(boolean len, String parString, String patt, String expResult) {
leniency = len;
pattern = patt;
parseString = parString;
expectedResult = expResult;
}
};
final TestDateFormatLeniencyItem[] items = {
// leniency parse String pattern expected result
new TestDateFormatLeniencyItem(true, "2008-Jan 02", "yyyy-LLL. dd", "2008-Jan. 02"),
new TestDateFormatLeniencyItem(false, "2008-Jan 03", "yyyy-LLL. dd", null),
new TestDateFormatLeniencyItem(true, "2008-Jan--04", "yyyy-MMM' -- 'dd", "2008-Jan -- 04"),
new TestDateFormatLeniencyItem(false, "2008-Jan--05", "yyyy-MMM' -- 'dd", null),
new TestDateFormatLeniencyItem(true, "2008-12-31", "yyyy-mm-dd", "2008-12-31")
};
StringBuffer result = new StringBuffer();
Date d = new Date();
Calendar cal = GregorianCalendar.getInstance(TimeZone.getTimeZone("GMT"), Locale.US);
SimpleDateFormat sdfmt = new SimpleDateFormat();
ParsePosition p = new ParsePosition(0);
for (TestDateFormatLeniencyItem item: items) {
cal.clear();
sdfmt.setCalendar(cal);
sdfmt.applyPattern(item.pattern);
sdfmt.setLenient(item.leniency);
sdfmt.setBooleanAttribute(DateFormat.BooleanAttribute.PARSE_ALLOW_WHITESPACE, item.leniency);
sdfmt.setBooleanAttribute(DateFormat.BooleanAttribute.PARSE_ALLOW_NUMERIC, item.leniency);
result.setLength(0);
p.setIndex(0);
p.setErrorIndex(-1);
d = sdfmt.parse(item.parseString, p);
if(item.expectedResult == null) {
if(p.getErrorIndex() != -1)
continue;
else
errln("error: unexpected parse success..."+item.parseString + " w/ lenient="+item.leniency+" should have faile");
}
if(p.getErrorIndex() != -1) {
errln("error: parse error for string " +item.parseString + " -- idx["+p.getIndex()+"] errIdx["+p.getErrorIndex()+"]");
continue;
}
cal.setTime(d);
result = sdfmt.format(cal, result, new FieldPosition(0));
if(!result.toString().equalsIgnoreCase(item.expectedResult)) {
errln("error: unexpected format result. expected - " + item.expectedResult + " but result was - " + result);
} else {
logln("formatted results match! - " + result.toString());
}
}
}
}