ICU-10261 give DateFormat more granular leniency control

X-SVN-Rev: 34327
This commit is contained in:
Scott Russell 2013-09-16 12:45:41 +00:00
parent bfbffd7805
commit 41d2876025
4 changed files with 137 additions and 15 deletions

View File

@ -1,6 +1,6 @@
/*
*******************************************************************************
* Copyright (C) 1997-2011, International Business Machines Corporation and *
* Copyright (C) 1997-2013, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
@ -74,6 +74,7 @@ DateFormat& DateFormat::operator=(const DateFormat& other)
} else {
fNumberFormat = NULL;
}
fBoolFlags = other.fBoolFlags;
}
return *this;
}
@ -511,6 +512,29 @@ DateFormat::isLenient() const
return FALSE;
}
//----------------------------------------------------------------------
DateFormat&
DateFormat::setBooleanAttribute(UDateFormatBooleanAttribute attr,
UBool newValue,
UErrorCode &status) {
if(!fBoolFlags.isValidValue(newValue)) {
status = U_ILLEGAL_ARGUMENT_ERROR;
} else {
fBoolFlags.set(attr, newValue);
}
return *this;
}
//----------------------------------------------------------------------
UBool
DateFormat::getBooleanAttribute(UDateFormatBooleanAttribute attr, UErrorCode &status) const {
return fBoolFlags.get(attr);
}
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_FORMATTING */

View File

@ -248,6 +248,7 @@ SimpleDateFormat::SimpleDateFormat(UErrorCode& status)
fOverrideList(NULL),
fCapitalizationContext(UDISPCTX_CAPITALIZATION_NONE)
{
setBooleanAttribute(UDAT_PARSE_ALLOW_WHITESPACE, true, status).setBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC, true, status);
construct(kShort, (EStyle) (kShort + kDateOffset), fLocale, status);
initializeDefaultCentury();
}
@ -266,6 +267,7 @@ SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern,
{
fDateOverride.setToBogus();
fTimeOverride.setToBogus();
setBooleanAttribute(UDAT_PARSE_ALLOW_WHITESPACE, true, status).setBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC, true, status);
initializeSymbols(fLocale, initializeCalendar(NULL,fLocale,status), status);
initialize(fLocale, status);
initializeDefaultCentury();
@ -286,6 +288,7 @@ SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern,
{
fDateOverride.setTo(override);
fTimeOverride.setToBogus();
setBooleanAttribute(UDAT_PARSE_ALLOW_WHITESPACE, true, status).setBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC, true, status);
initializeSymbols(fLocale, initializeCalendar(NULL,fLocale,status), status);
initialize(fLocale, status);
initializeDefaultCentury();
@ -309,6 +312,7 @@ SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern,
fDateOverride.setToBogus();
fTimeOverride.setToBogus();
setBooleanAttribute(UDAT_PARSE_ALLOW_WHITESPACE, true, status).setBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC, true, status);
initializeSymbols(fLocale, initializeCalendar(NULL,fLocale,status), status);
initialize(fLocale, status);
@ -331,6 +335,7 @@ SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern,
fDateOverride.setTo(override);
fTimeOverride.setToBogus();
setBooleanAttribute(UDAT_PARSE_ALLOW_WHITESPACE, true, status).setBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC, true, status);
initializeSymbols(fLocale, initializeCalendar(NULL,fLocale,status), status);
initialize(fLocale, status);
@ -356,6 +361,7 @@ SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern,
fDateOverride.setToBogus();
fTimeOverride.setToBogus();
setBooleanAttribute(UDAT_PARSE_ALLOW_WHITESPACE, true, status).setBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC, true, status);
initializeCalendar(NULL,fLocale,status);
initialize(fLocale, status);
@ -378,6 +384,7 @@ SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern,
fDateOverride.setToBogus();
fTimeOverride.setToBogus();
setBooleanAttribute(UDAT_PARSE_ALLOW_WHITESPACE, true, status).setBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC, true, status);
initializeCalendar(NULL, fLocale, status);
initialize(fLocale, status);
@ -398,6 +405,7 @@ SimpleDateFormat::SimpleDateFormat(EStyle timeStyle,
fOverrideList(NULL),
fCapitalizationContext(UDISPCTX_CAPITALIZATION_NONE)
{
setBooleanAttribute(UDAT_PARSE_ALLOW_WHITESPACE, true, status).setBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC, true, status);
construct(timeStyle, dateStyle, fLocale, status);
if(U_SUCCESS(status)) {
initializeDefaultCentury();
@ -438,6 +446,7 @@ SimpleDateFormat::SimpleDateFormat(const Locale& locale,
fDateOverride.setToBogus();
fTimeOverride.setToBogus();
setBooleanAttribute(UDAT_PARSE_ALLOW_WHITESPACE, true, status).setBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC, true, status);
initialize(fLocale, status);
if(U_SUCCESS(status)) {
@ -456,6 +465,8 @@ SimpleDateFormat::SimpleDateFormat(const SimpleDateFormat& other)
fOverrideList(NULL),
fCapitalizationContext(UDISPCTX_CAPITALIZATION_NONE)
{
UErrorCode status = U_ZERO_ERROR;
setBooleanAttribute(UDAT_PARSE_ALLOW_WHITESPACE, true, status).setBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC, true, status);
*this = other;
}
@ -1689,8 +1700,6 @@ SimpleDateFormat::parse(const UnicodeString& text, Calendar& cal, ParsePosition&
int32_t saveHebrewMonth = -1;
int32_t count = 0;
UBool lenient = isLenient();
// hack, reset tztype, cast away const
((SimpleDateFormat*)this)->tztype = UTZFMT_TIME_TYPE_UNKNOWN;
@ -1838,7 +1847,7 @@ SimpleDateFormat::parse(const UnicodeString& text, Calendar& cal, ParsePosition&
abutPat = -1; // End of any abutting fields
if (! matchLiterals(fPattern, i, text, pos, lenient)) {
if (! matchLiterals(fPattern, i, text, pos, getBooleanAttribute(UDAT_PARSE_ALLOW_WHITESPACE, status))) {
status = U_PARSE_ERROR;
goto ExitParse;
}
@ -1846,7 +1855,7 @@ SimpleDateFormat::parse(const UnicodeString& text, Calendar& cal, ParsePosition&
}
// Special hack for trailing "." after non-numeric field.
if (text.charAt(pos) == 0x2e && lenient) {
if (text.charAt(pos) == 0x2e && getBooleanAttribute(UDAT_PARSE_ALLOW_WHITESPACE, status)) {
// only do if the last field is not numeric
if (isAfterNonNumericField(fPattern, fPattern.length())) {
pos++; // skip the extra "."
@ -2407,11 +2416,11 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC
int32_t value = 0;
int32_t i;
int32_t ps = 0;
UErrorCode status = U_ZERO_ERROR;
ParsePosition pos(0);
UDateFormatField patternCharIndex = DateFormatSymbols::getPatternCharIndex(ch);
NumberFormat *currentNumberFormat;
UnicodeString temp;
UBool lenient = isLenient();
UBool gotNumber = FALSE;
#if defined (U_DEBUG_CAL)
@ -2517,7 +2526,7 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC
txtLoc = checkIntSuffix(text, txtLoc, patLoc+1, FALSE);
}
if (!lenient) {
if (!getBooleanAttribute(UDAT_PARSE_ALLOW_WHITESPACE, status)) {
// Check the range of the value
int32_t bias = gFieldRangeBias[patternCharIndex];
if (bias >= 0 && (value > cal.getMaximum(field) + bias || value < cal.getMinimum(field) + bias)) {
@ -2653,7 +2662,7 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC
return newStart;
}
}
if (gotNumber && (lenient || value > fSymbols->fShortYearNamesCount)) {
if (gotNumber && (getBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC,status) || value > fSymbols->fShortYearNamesCount)) {
cal.set(UCAL_YEAR, value);
return pos.getIndex();
}
@ -2714,7 +2723,7 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC
}
newStart = matchString(text, start, UCAL_MONTH, fSymbols->fStandaloneShortMonths, fSymbols->fStandaloneShortMonthsCount, shortMonthPat, cal); // try LLL
}
if (newStart > 0 || !lenient) // currently we do not try to parse MMMMM/LLLLL: #8860
if (newStart > 0 || !getBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC, status)) // currently we do not try to parse MMMMM/LLLLL: #8860
return newStart;
// else we allowing parsing as number, below
}
@ -2779,7 +2788,7 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC
else if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK,
fSymbols->fNarrowWeekdays, fSymbols->fNarrowWeekdaysCount, NULL, cal)) > 0)
return newStart;
else if (!lenient || patternCharIndex == UDAT_DAY_OF_WEEK_FIELD)
else if (!getBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC, status) || patternCharIndex == UDAT_DAY_OF_WEEK_FIELD)
return newStart;
// else we allowing parsing as number, below
}
@ -2805,7 +2814,7 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC
else if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK,
fSymbols->fStandaloneShorterWeekdays, fSymbols->fStandaloneShorterWeekdaysCount, NULL, cal)) > 0)
return newStart;
else if (!lenient)
else if (!getBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC, status))
return newStart;
// else we allowing parsing as number, below
}
@ -2845,7 +2854,7 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC
else if ((newStart = matchQuarterString(text, start, UCAL_MONTH,
fSymbols->fShortQuarters, fSymbols->fShortQuartersCount, cal)) > 0)
return newStart;
else if (!lenient)
else if (!getBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC, status))
return newStart;
// else we allowing parsing as number, below
}
@ -2871,7 +2880,7 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC
else if ((newStart = matchQuarterString(text, start, UCAL_MONTH,
fSymbols->fStandaloneShortQuarters, fSymbols->fStandaloneShortQuartersCount, cal)) > 0)
return newStart;
else if (!lenient)
else if (!getBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC, status))
return newStart;
// else we allowing parsing as number, below
}
@ -3038,7 +3047,7 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC
// Don't need suffix processing here (as in number processing at the beginning of the function);
// the new fields being handled as numeric values (month, weekdays, quarters) should not have suffixes.
if (!lenient) {
if (!getBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC, status)) {
// Check the range of the value
int32_t bias = gFieldRangeBias[patternCharIndex];
if (bias >= 0 && (value > cal.getMaximum(field) + bias || value < cal.getMinimum(field) + bias)) {

View File

@ -28,6 +28,7 @@
#include "unicode/numfmt.h"
#include "unicode/format.h"
#include "unicode/locid.h"
#include "unicode/enumset.h"
/**
* \file
@ -39,6 +40,13 @@ U_NAMESPACE_BEGIN
class TimeZone;
class DateTimePatternGenerator;
// explicit template instantiation. see digitlst.h
#if defined (_MSC_VER)
template class U_I18N_API EnumSet<UDateFormatBooleanAttribute,
0,
UDAT_BOOLEAN_ATTRIBUTE_COUNT>;
#endif
/**
* DateFormat is an abstract class for a family of classes that convert dates and
* times from their internal representations to textual form and back again in a
@ -572,7 +580,7 @@ public:
static const Locale* U_EXPORT2 getAvailableLocales(int32_t& count);
/**
* Returns true if the formatter is set for lenient parsing.
* Returns true if the encapsulated Calendar object is set for lenient parsing.
* @stable ICU 2.0
*/
virtual UBool isLenient(void) const;
@ -583,6 +591,9 @@ public:
* precisely match this object's format. With strict parsing, inputs must
* match this object's format.
*
* Note: This method is specific to the encapsulated Calendar object. DateFormat
* leniency aspects are controlled by setBooleanAttribute.
*
* @param lenient True specifies date/time interpretation to be lenient.
* @see Calendar::setLenient
* @stable ICU 2.0
@ -662,6 +673,32 @@ public:
*/
virtual void setTimeZone(const TimeZone& zone);
/**
* Set an boolean attribute on this DateFormat.
* May return U_UNSUPPORTED_ERROR if this instance does not support
* the specified attribute.
* @param attr the attribute to set
* @param newvalue new value
* @param status the error type
* @return *this - for chaining (example: format.setAttribute(...).setAttribute(...) )
* @internal ICU 5.2 technology preview
*/
virtual DateFormat& U_EXPORT2 setBooleanAttribute(UDateFormatBooleanAttribute attr,
UBool newvalue,
UErrorCode &status);
/**
* Get an boolean from this DateFormat
* May return U_UNSUPPORTED_ERROR if this instance does not support
* the specified attribute.
* @param attr the attribute to set
* @param status the error type
* @return the attribute value. Undefined if there is an error.
* @internal ICU 5.2 technology preview
*/
virtual UBool U_EXPORT2 getBooleanAttribute(UDateFormatBooleanAttribute attr, UErrorCode &status) const;
protected:
/**
* Default constructor. Creates a DateFormat with no Calendar or NumberFormat
@ -699,6 +736,7 @@ protected:
*/
NumberFormat* fNumberFormat;
private:
/**
* Gets the date/time formatter with the given formatting styles for the
@ -710,6 +748,13 @@ private:
*/
static DateFormat* U_EXPORT2 create(EStyle timeStyle, EStyle dateStyle, const Locale& inLocale);
/**
* enum set of active boolean attributes for this instance
*/
EnumSet<UDateFormatBooleanAttribute, 0, UDAT_BOOLEAN_ATTRIBUTE_COUNT> fBoolFlags;
public:
#ifndef U_HIDE_OBSOLETE_API
/**

View File

@ -826,6 +826,50 @@ udat_open(UDateFormatStyle timeStyle,
U_STABLE void U_EXPORT2
udat_close(UDateFormat* format);
/**
* DateFormat boolean attributes
* @internal ICU 5.2 technology preview
*/
typedef enum UDateFormatBooleanAttribute {
/** indicates whether whitespace is allowed. Includes trailing dot tolerance. */
UDAT_PARSE_ALLOW_WHITESPACE,
/** indicates tolerance of numeric data when String data may be assumed. eg: UDAT_YEAR_NAME_FIELD,
* UDAT_STANDALONE_MONTH_FIELD, UDAT_DAY_OF_WEEK_FIELD */
UDAT_PARSE_ALLOW_NUMERIC,
/** count boolean date format constants */
UDAT_BOOLEAN_ATTRIBUTE_COUNT
} UDateFormatBooleanAttribute;
/**
* Get a boolean attribute associated with a UDateFormat.
* An example would be a true value for a key of UDAT_PARSE_ALLOW_WHITESPACE indicating allowing whitespace leniency.
* If the formatter does not understand the attribute, -1 is returned.
* @param fmt The formatter to query.
* @param attr The attribute to query; e.g. UDAT_PARSE_ALLOW_WHITESPACE.
* @param status A pointer to an UErrorCode to receive any errors
* @return The value of attr.
* @internal ICU 5.2 technology preview
*/
U_INTERNAL UBool U_EXPORT2
udat_getBooleanAttribute(const UDateFormat* fmt, UDateFormatBooleanAttribute attr, UErrorCode* status);
/**
* Set a boolean attribute associated with a UDateFormat.
* An example of a boolean attribute is parse leniency control. If the formatter does not understand
* the attribute, the call is ignored.
* @param fmt The formatter to set.
* @param attr The attribute to set; one of UDAT_PARSE_ALLOW_WHITESPACE or UDAT_PARSE_ALLOW_NUMERIC
* @param newValue The new value of attr.
* @param status A pointer to an UErrorCode to receive any errors
* @internal ICU 5.2 technology preview
*/
U_INTERNAL void U_EXPORT2
udat_setBooleanAttribute(UDateFormat *fmt, UDateFormatBooleanAttribute attr, UBool, UErrorCode* status);
#if U_SHOW_CPLUSPLUS_API
U_NAMESPACE_BEGIN