ICU-21155 Add support for all hour and day-period skeleton characters to DateIntervalFormat

This commit is contained in:
Rich Gillam 2020-09-02 15:11:49 -07:00
parent caadb345e5
commit edaebfa64e
9 changed files with 499 additions and 70 deletions

View File

@ -64,11 +64,13 @@
#define LOW_Z ((UChar)0x007A)
#define CAP_A ((UChar)0x0041)
#define CAP_B ((UChar)0x0042)
#define CAP_C ((UChar)0x0043)
#define CAP_D ((UChar)0x0044)
#define CAP_E ((UChar)0x0045)
#define CAP_F ((UChar)0x0046)
#define CAP_G ((UChar)0x0047)
#define CAP_J ((UChar)0x004A)
#define CAP_H ((UChar)0x0048)
#define CAP_K ((UChar)0x004B)
#define CAP_L ((UChar)0x004C)

View File

@ -751,7 +751,7 @@ DateIntervalFormat::initializePattern(UErrorCode& status) {
/* the difference between time skeleton and normalizedTimeSkeleton are:
* 1. (Formerly, normalized time skeleton folded 'H' to 'h'; no longer true)
* 2. 'a' is omitted in normalized time skeleton.
* 2. (Formerly, 'a' was omitted in normalized time skeleton; this is now handled elsewhere)
* 3. there is only one appearance for 'h' or 'H', 'm','v', 'z' in normalized
* time skeleton
*
@ -760,7 +760,8 @@ DateIntervalFormat::initializePattern(UErrorCode& status) {
* 2. 'E' and 'EE' are normalized into 'EEE'
* 3. 'MM' is normalized into 'M'
*/
getDateTimeSkeleton(fSkeleton, dateSkeleton, normalizedDateSkeleton,
UnicodeString convertedSkeleton = normalizeHourMetacharacters(fSkeleton);
getDateTimeSkeleton(convertedSkeleton, dateSkeleton, normalizedDateSkeleton,
timeSkeleton, normalizedTimeSkeleton);
#ifdef DTITVFMT_DEBUG
@ -899,6 +900,91 @@ DateIntervalFormat::initializePattern(UErrorCode& status) {
UnicodeString
DateIntervalFormat::normalizeHourMetacharacters(const UnicodeString& skeleton) const {
UnicodeString result = skeleton;
UChar hourMetachar = u'\0';
int32_t metacharStart = 0;
int32_t metacharCount = 0;
for (int32_t i = 0; i < result.length(); i++) {
UChar c = result[i];
if (c == LOW_J || c == CAP_J || c == CAP_C) {
if (hourMetachar == u'\0') {
hourMetachar = c;
metacharStart = i;
}
++metacharCount;
} else {
if (hourMetachar != u'\0') {
break;
}
}
}
if (hourMetachar != u'\0') {
UErrorCode err = U_ZERO_ERROR;
UChar hourChar = CAP_H;
UChar dayPeriodChar = LOW_A;
UnicodeString convertedPattern = DateFormat::getBestPattern(fLocale, UnicodeString(hourMetachar), err);
if (U_SUCCESS(err)) {
// strip literal text from the pattern (so literal characters don't get mistaken for pattern
// characters-- such as the 'h' in 'Uhr' in Germam)
int32_t firstQuotePos;
while ((firstQuotePos = convertedPattern.indexOf(u'\'')) != -1) {
int32_t secondQuotePos = convertedPattern.indexOf(u'\'', firstQuotePos + 1);
if (secondQuotePos == -1) {
secondQuotePos = firstQuotePos;
}
convertedPattern.replace(firstQuotePos, (secondQuotePos - firstQuotePos) + 1, UnicodeString());
}
if (convertedPattern.indexOf(LOW_H) != -1) {
hourChar = LOW_H;
} else if (convertedPattern.indexOf(CAP_K) != -1) {
hourChar = CAP_K;
} else if (convertedPattern.indexOf(LOW_K) != -1) {
hourChar = LOW_K;
}
if (convertedPattern.indexOf(LOW_B) != -1) {
dayPeriodChar = LOW_B;
} else if (convertedPattern.indexOf(CAP_B) != -1) {
dayPeriodChar = CAP_B;
}
}
if (hourChar == CAP_H || hourChar == LOW_K) {
result.replace(metacharStart, metacharCount, hourChar);
} else {
UnicodeString hourAndDayPeriod(hourChar);
switch (metacharCount) {
case 1:
case 2:
default:
hourAndDayPeriod.append(UnicodeString(dayPeriodChar));
break;
case 3:
case 4:
for (int32_t i = 0; i < 4; i++) {
hourAndDayPeriod.append(dayPeriodChar);
}
break;
case 5:
case 6:
for (int32_t i = 0; i < 5; i++) {
hourAndDayPeriod.append(dayPeriodChar);
}
break;
}
result.replace(metacharStart, metacharCount, hourAndDayPeriod);
}
}
return result;
}
void U_EXPORT2
DateIntervalFormat::getDateTimeSkeleton(const UnicodeString& skeleton,
UnicodeString& dateSkeleton,
@ -911,11 +997,10 @@ DateIntervalFormat::getDateTimeSkeleton(const UnicodeString& skeleton,
int32_t dCount = 0;
int32_t MCount = 0;
int32_t yCount = 0;
int32_t hCount = 0;
int32_t HCount = 0;
int32_t mCount = 0;
int32_t vCount = 0;
int32_t zCount = 0;
UChar hourChar = u'\0';
int32_t i;
for (i = 0; i < skeleton.length(); ++i) {
@ -956,17 +1041,14 @@ DateIntervalFormat::getDateTimeSkeleton(const UnicodeString& skeleton,
normalizedDateSkeleton.append(ch);
dateSkeleton.append(ch);
break;
case LOW_A:
// 'a' is implicitly handled
timeSkeleton.append(ch);
break;
case LOW_H:
timeSkeleton.append(ch);
++hCount;
break;
case CAP_H:
case LOW_K:
case CAP_K:
timeSkeleton.append(ch);
++HCount;
if (hourChar == u'\0') {
hourChar = ch;
}
break;
case LOW_M:
timeSkeleton.append(ch);
@ -980,14 +1062,15 @@ DateIntervalFormat::getDateTimeSkeleton(const UnicodeString& skeleton,
++vCount;
timeSkeleton.append(ch);
break;
case LOW_A:
case CAP_V:
case CAP_Z:
case LOW_K:
case CAP_K:
case LOW_J:
case LOW_S:
case CAP_S:
case CAP_A:
case LOW_B:
case CAP_B:
timeSkeleton.append(ch);
normalizedTimeSkeleton.append(ch);
break;
@ -1023,11 +1106,8 @@ DateIntervalFormat::getDateTimeSkeleton(const UnicodeString& skeleton,
}
/* generate normalized form for time */
if ( HCount != 0 ) {
normalizedTimeSkeleton.append(CAP_H);
}
else if ( hCount != 0 ) {
normalizedTimeSkeleton.append(LOW_H);
if ( hourChar != u'\0' ) {
normalizedTimeSkeleton.append(hourChar);
}
if ( mCount != 0 ) {
normalizedTimeSkeleton.append(LOW_M);
@ -1335,10 +1415,11 @@ DateIntervalFormat::setIntervalPattern(UCalendarDateFields field,
}
}
if ( !pattern.isEmpty() ) {
if ( differenceInfo != 0 ) {
UBool suppressDayPeriodField = fSkeleton.indexOf(CAP_J) != -1;
if ( differenceInfo != 0 || suppressDayPeriodField) {
UnicodeString adjustIntervalPattern;
adjustFieldWidth(*skeleton, *bestSkeleton, pattern, differenceInfo,
adjustIntervalPattern);
suppressDayPeriodField, adjustIntervalPattern);
setIntervalPattern(field, adjustIntervalPattern);
} else {
setIntervalPattern(field, pattern);
@ -1530,6 +1611,7 @@ DateIntervalFormat::adjustFieldWidth(const UnicodeString& inputSkeleton,
const UnicodeString& bestMatchSkeleton,
const UnicodeString& bestIntervalPattern,
int8_t differenceInfo,
UBool suppressDayPeriodField,
UnicodeString& adjustedPtn) {
adjustedPtn = bestIntervalPattern;
int32_t inputSkeletonFieldWidth[] =
@ -1556,19 +1638,44 @@ DateIntervalFormat::adjustFieldWidth(const UnicodeString& inputSkeleton,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
const int8_t PATTERN_CHAR_BASE = 0x41;
DateIntervalInfo::parseSkeleton(inputSkeleton, inputSkeletonFieldWidth);
DateIntervalInfo::parseSkeleton(bestMatchSkeleton, bestMatchSkeletonFieldWidth);
if ( differenceInfo == 2 ) {
adjustedPtn.findAndReplace(UnicodeString((UChar)0x76 /* v */),
UnicodeString((UChar)0x7a /* z */));
if (suppressDayPeriodField) {
adjustedPtn.findAndReplace(UnicodeString(LOW_A), UnicodeString());
adjustedPtn.findAndReplace(UnicodeString(" "), UnicodeString(" "));
adjustedPtn.trim();
}
if ( differenceInfo == 2 ) {
if (inputSkeleton.indexOf(LOW_Z) != -1) {
adjustedPtn.findAndReplace(UnicodeString(LOW_V),
UnicodeString(LOW_Z));
}
if (inputSkeleton.indexOf(CAP_K) != -1) {
adjustedPtn.findAndReplace(UnicodeString(LOW_H),
UnicodeString(CAP_K));
}
if (inputSkeleton.indexOf(LOW_K) != -1) {
adjustedPtn.findAndReplace(UnicodeString(CAP_H),
UnicodeString(LOW_K));
}
if (inputSkeleton.indexOf(LOW_B) != -1) {
adjustedPtn.findAndReplace(UnicodeString(LOW_A),
UnicodeString(LOW_B));
}
}
if (adjustedPtn.indexOf(LOW_A) != -1 && bestMatchSkeletonFieldWidth[LOW_A - PATTERN_CHAR_BASE] == 0) {
bestMatchSkeletonFieldWidth[LOW_A - PATTERN_CHAR_BASE] = 1;
}
if (adjustedPtn.indexOf(LOW_B) != -1 && bestMatchSkeletonFieldWidth[LOW_B - PATTERN_CHAR_BASE] == 0) {
bestMatchSkeletonFieldWidth[LOW_B - PATTERN_CHAR_BASE] = 1;
}
UBool inQuote = false;
UChar prevCh = 0;
int32_t count = 0;
const int8_t PATTERN_CHAR_BASE = 0x41;
// loop through the pattern string character by character
int32_t adjustedPtnLength = adjustedPtn.length();
int32_t i;

View File

@ -339,6 +339,9 @@ struct DateIntervalInfo::DateIntervalSink : public ResourceSink {
return UCAL_DATE;
} else if (c0 == 'a') {
return UCAL_AM_PM;
} else if (c0 == 'B') {
// TODO: Using AM/PM as a proxy for flexible day period isn't really correct, but it's close
return UCAL_AM_PM;
} else if (c0 == 'h' || c0 == 'H') {
return UCAL_HOUR;
} else if (c0 == 'm') {
@ -594,20 +597,23 @@ DateIntervalInfo::getBestSkeleton(const UnicodeString& skeleton,
const int32_t DIFFERENT_FIELD = 0x1000;
const int32_t STRING_NUMERIC_DIFFERENCE = 0x100;
const int32_t BASE = 0x41;
const UChar CHAR_V = 0x0076;
const UChar CHAR_Z = 0x007A;
// hack for 'v' and 'z'.
// resource bundle only have time skeletons ending with 'v',
// but not for time skeletons ending with 'z'.
UBool replaceZWithV = false;
// hack for certain alternate characters
// resource bundles only have time skeletons containing 'v', 'h', and 'H'
// but not time skeletons containing 'z', 'K', or 'k'
// the skeleton may also include 'a' or 'b', which never occur in the resource bundles, so strip them out too
UBool replacedAlternateChars = false;
const UnicodeString* inputSkeleton = &skeleton;
UnicodeString copySkeleton;
if ( skeleton.indexOf(CHAR_Z) != -1 ) {
if ( skeleton.indexOf(LOW_Z) != -1 || skeleton.indexOf(LOW_K) != -1 || skeleton.indexOf(CAP_K) != -1 || skeleton.indexOf(LOW_A) != -1 || skeleton.indexOf(LOW_B) != -1 ) {
copySkeleton = skeleton;
copySkeleton.findAndReplace(UnicodeString(CHAR_Z), UnicodeString(CHAR_V));
copySkeleton.findAndReplace(UnicodeString(LOW_Z), UnicodeString(LOW_V));
copySkeleton.findAndReplace(UnicodeString(LOW_K), UnicodeString(CAP_H));
copySkeleton.findAndReplace(UnicodeString(CAP_K), UnicodeString(LOW_H));
copySkeleton.findAndReplace(UnicodeString(LOW_A), UnicodeString());
copySkeleton.findAndReplace(UnicodeString(LOW_B), UnicodeString());
inputSkeleton = &copySkeleton;
replaceZWithV = true;
replacedAlternateChars = true;
}
parseSkeleton(*inputSkeleton, inputSkeletonFieldWidth);
@ -616,7 +622,7 @@ DateIntervalInfo::getBestSkeleton(const UnicodeString& skeleton,
// 0 means exact the same skeletons;
// 1 means having the same field, but with different length,
// 2 means only z/v differs
// 2 means only z/v, h/K, or H/k differs
// -1 means having different field.
bestMatchDistanceInfo = 0;
int8_t fieldLength = UPRV_LENGTHOF(skeletonFieldWidth);
@ -672,7 +678,7 @@ DateIntervalInfo::getBestSkeleton(const UnicodeString& skeleton,
break;
}
}
if ( replaceZWithV && bestMatchDistanceInfo != -1 ) {
if ( replacedAlternateChars && bestMatchDistanceInfo != -1 ) {
bestMatchDistanceInfo = 2;
}
return bestSkeleton;

View File

@ -867,6 +867,19 @@ private:
/**
* Converts special hour metacharacters (such as 'j') in the skeleton into locale-appropriate
* pattern characters.
*
*
* @param skeleton The skeleton to convert
* @return A copy of the skeleton, which "j" and any other special hour metacharacters converted to the regular ones.
*
*/
UnicodeString normalizeHourMetacharacters(const UnicodeString& skeleton) const;
/**
* get separated date and time skeleton from a combined skeleton.
*
@ -984,6 +997,7 @@ private:
* @param differenceInfo the difference between 2 skeletons
* 1 means only field width differs
* 2 means v/z exchange
* @param suppressDayPeriodField if true, remove the day period field from the pattern, if there is one
* @param adjustedIntervalPattern adjusted interval pattern
*/
static void U_EXPORT2 adjustFieldWidth(
@ -991,6 +1005,7 @@ private:
const UnicodeString& bestMatchSkeleton,
const UnicodeString& bestMatchIntervalPattern,
int8_t differenceInfo,
UBool suppressDayPeriodField,
UnicodeString& adjustedIntervalPattern);
/**

View File

@ -60,6 +60,7 @@ void DateIntervalFormatTest::runIndexedTest( int32_t index, UBool exec, const ch
TESTCASE(11, testCreateInstanceForAllLocales);
TESTCASE(12, testTicket20707);
TESTCASE(13, testFormatMillisecond);
TESTCASE(14, testHourMetacharacters);
default: name = ""; break;
}
}
@ -1076,6 +1077,93 @@ void DateIntervalFormatTest::testFormat() {
}
/**
* Test handling of hour and day period metacharacters
*/
void DateIntervalFormatTest::testHourMetacharacters() {
// first item is date pattern
// followed by a group of locale/from_data/to_data/skeleton/interval_data
// Note that from_data/to_data are specified using era names from root, for the calendar specified by locale.
const char* DATA[] = {
"GGGGG y MM dd HH:mm:ss", // pattern for from_data/to_data
// This test is for tickets ICU-21154, ICU-21155, and ICU-21156 and is intended to verify
// that all of the special skeleton characters for hours and day periods work as expected
// with date intervals:
// - If a, b, or B is included in the skeleton, it correctly sets the length of the day-period field
// - If k or K is included, it behaves the same as H or h, except for the difference in the actual
// number used for the hour.
// - If j is included, it behaves the same as either h or H as appropriate, and multiple j's have the
// intended effect on the length of the day period field (if there is one)
// - If J is included, it correctly suppresses the day period field if j would include it
// - If C is included, it behaves the same as j and brings up the correct day period field
// - In all cases, if the day period of both ends of the range is the same, you only see it once
// baseline (h and H)
"en", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "hh", "12 \\u2013 1 AM",
"de", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "HH", "00\\u201301 Uhr",
// k and K (ICU-21154 and ICU-21156)
"en", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "KK", "0 \\u2013 1 AM",
"de", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "kk", "24\\u201301 Uhr",
// different lengths of the 'a' field
"en", "CE 2010 09 27 10:00:00", "CE 2010 09 27 13:00:00", "ha", "10 AM \\u2013 1 PM",
"en", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "ha", "12 \\u2013 1 AM",
"en", "CE 2010 09 27 10:00:00", "CE 2010 09 27 12:00:00", "haaaaa", "10 a \\u2013 12 p",
"en", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "haaaaa", "12 \\u2013 1 a",
// j (ICU-21155)
"en", "CE 2010 09 27 10:00:00", "CE 2010 09 27 13:00:00", "jj", "10 AM \\u2013 1 PM",
"en", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "jj", "12 \\u2013 1 AM",
"en", "CE 2010 09 27 10:00:00", "CE 2010 09 27 13:00:00", "jjjjj", "10 a \\u2013 1 p",
"en", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "jjjjj", "12 \\u2013 1 a",
"de", "CE 2010 09 27 10:00:00", "CE 2010 09 27 13:00:00", "jj", "10\\u201313 Uhr",
"de", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "jj", "00\\u201301 Uhr",
"de", "CE 2010 09 27 10:00:00", "CE 2010 09 27 13:00:00", "jjjjj", "10\\u201313 Uhr",
"de", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "jjjjj", "00\\u201301 Uhr",
// b and B
"en", "CE 2010 09 27 10:00:00", "CE 2010 09 27 12:00:00", "hb", "10 AM \\u2013 12 noon",
"en", "CE 2010 09 27 10:00:00", "CE 2010 09 27 12:00:00", "hbbbbb", "10 a \\u2013 12 n",
"en", "CE 2010 09 27 13:00:00", "CE 2010 09 27 14:00:00", "hb", "1 \\u2013 2 PM",
"en", "CE 2010 09 27 10:00:00", "CE 2010 09 27 13:00:00", "hB", "10 in the morning \\u2013 1 in the afternoon",
"en", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "hB", "12 \\u2013 1 at night",
// J
"en", "CE 2010 09 27 10:00:00", "CE 2010 09 27 13:00:00", "J", "10 \\u2013 1",
"en", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "J", "12 \\u2013 1",
"de", "CE 2010 09 27 10:00:00", "CE 2010 09 27 13:00:00", "J", "10\\u201313 Uhr",
"de", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "J", "00\\u201301 Uhr",
// C
// (for English and German, C should do the same thing as j)
"en", "CE 2010 09 27 10:00:00", "CE 2010 09 27 13:00:00", "CC", "10 AM \\u2013 1 PM",
"en", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "CC", "12 \\u2013 1 AM",
"en", "CE 2010 09 27 10:00:00", "CE 2010 09 27 13:00:00", "CCCCC", "10 a \\u2013 1 p",
"en", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "CCCCC", "12 \\u2013 1 a",
"de", "CE 2010 09 27 10:00:00", "CE 2010 09 27 13:00:00", "CC", "10\\u201313 Uhr",
"de", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "CC", "00\\u201301 Uhr",
"de", "CE 2010 09 27 10:00:00", "CE 2010 09 27 13:00:00", "CCCCC", "10\\u201313 Uhr",
"de", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "CCCCC", "00\\u201301 Uhr",
// (for zh_HK and hi_IN, j maps to ha, but C maps to hB)
"zh_HK", "CE 2010 09 27 10:00:00", "CE 2010 09 27 13:00:00", "jj", "\\u4E0A\\u534810\\u6642\\u81F3\\u4E0B\\u53481\\u6642",
"zh_HK", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "jj", "\\u4E0A\\u534812\\u6642\\u81F31\\u6642",
"zh_HK", "CE 2010 09 27 10:00:00", "CE 2010 09 27 13:00:00", "hB", "\\u4E0A\\u534810\\u6642 \\u2013 \\u4E0B\\u53481\\u6642",
"zh_HK", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "hB", "\\u51CC\\u666812\\u20131\\u6642",
"zh_HK", "CE 2010 09 27 10:00:00", "CE 2010 09 27 13:00:00", "CC", "\\u4E0A\\u534810\\u6642 \\u2013 \\u4E0B\\u53481\\u6642",
"zh_HK", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "CC", "\\u51CC\\u666812\\u20131\\u6642",
"hi_IN", "CE 2010 09 27 10:00:00", "CE 2010 09 27 13:00:00", "jj", "10 am \\u2013 1 pm",
"hi_IN", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "jj", "12\\u20131 am",
"hi_IN", "CE 2010 09 27 10:00:00", "CE 2010 09 27 13:00:00", "hB", "\\u0938\\u0941\\u092C\\u0939 10 \\u2013 \\u0926\\u094B\\u092A\\u0939\\u0930 1",
"hi_IN", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "hB", "\\u0930\\u093E\\u0924 12\\u20131",
"hi_IN", "CE 2010 09 27 10:00:00", "CE 2010 09 27 13:00:00", "CC", "\\u0938\\u0941\\u092C\\u0939 10 \\u2013 \\u0926\\u094B\\u092A\\u0939\\u0930 1",
"hi_IN", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "CC", "\\u0930\\u093E\\u0924 12\\u20131",
};
expect(DATA, UPRV_LENGTHOF(DATA));
}
void DateIntervalFormatTest::expect(const char** data, int32_t data_length) {
int32_t i = 0;
UErrorCode ec = U_ZERO_ERROR;

View File

@ -34,6 +34,12 @@ public:
*/
void testFormat();
/**
* Test handling of hour and day period metacharacters
*/
void testHourMetacharacters();
void testFormatMillisecond();
/**

View File

@ -1331,7 +1331,7 @@ public class DateIntervalFormat extends UFormat {
// or by getInstance(String skeleton, .... )
fSkeleton = dtpng.getSkeleton(fullPattern);
}
String skeleton = fSkeleton;
String skeleton = normalizeHourMetacharacters(fSkeleton, locale);
HashMap<String, PatternInfo> intervalPatterns = new HashMap<>();
@ -1547,6 +1547,89 @@ public class DateIntervalFormat extends UFormat {
}
*/
private String normalizeHourMetacharacters(String skeleton, ULocale locale) {
StringBuilder result = new StringBuilder(skeleton);
char hourMetachar = '\0';
int metacharStart = 0;
int metacharCount = 0;
for (int i = 0; i < result.length(); i++) {
char c = result.charAt(i);
if (c == 'j' || c == 'J' || c == 'C') {
if (hourMetachar == '\0') {
hourMetachar = c;
metacharStart = i;
}
++metacharCount;
} else {
if (hourMetachar != '\0') {
break;
}
}
}
if (hourMetachar != '\0') {
char hourChar = 'H';
char dayPeriodChar = 'a';
DateTimePatternGenerator dtptng = DateTimePatternGenerator.getInstance(locale);
String convertedPattern = dtptng.getBestPattern(String.valueOf(hourMetachar));
// strip literal text from the pattern (so literal characters don't get mistaken for pattern
// characters-- such as the 'h' in 'Uhr' in German)
int firstQuotePos;
while ((firstQuotePos = convertedPattern.indexOf('\'')) != -1) {
int secondQuotePos = convertedPattern.indexOf('\'', firstQuotePos + 1);
if (secondQuotePos == -1) {
secondQuotePos = firstQuotePos;
}
convertedPattern = convertedPattern.substring(0, firstQuotePos) + convertedPattern.substring(secondQuotePos + 1);
}
if (convertedPattern.indexOf('h') != -1) {
hourChar = 'h';
} else if (convertedPattern.indexOf('K') != -1) {
hourChar = 'K';
} else if (convertedPattern.indexOf('k') != -1) {
hourChar = 'k';
}
if (convertedPattern.indexOf('b') != -1) {
dayPeriodChar = 'b';
} else if (convertedPattern.indexOf('B') != -1) {
dayPeriodChar = 'B';
}
if (hourChar == 'H' || hourChar == 'k') {
result.replace(metacharStart, metacharStart + metacharCount, String.valueOf(hourChar));
} else {
StringBuilder hourAndDayPeriod = new StringBuilder();
hourAndDayPeriod.append(hourChar);
switch (metacharCount) {
case 1:
case 2:
default:
hourAndDayPeriod.append(dayPeriodChar);
break;
case 3:
case 4:
for (int i = 0; i < 4; i++) {
hourAndDayPeriod.append(dayPeriodChar);
}
break;
case 5:
case 6:
for (int i = 0; i < 5; i++) {
hourAndDayPeriod.append(dayPeriodChar);
}
break;
}
result.replace(metacharStart, metacharStart + metacharCount, hourAndDayPeriod.toString());
}
}
return result.toString();
}
/*
* get separated date and time skeleton from a combined skeleton.
*
@ -1583,11 +1666,10 @@ public class DateIntervalFormat extends UFormat {
int dCount = 0;
int MCount = 0;
int yCount = 0;
int hCount = 0;
int HCount = 0;
int mCount = 0;
int vCount = 0;
int zCount = 0;
char hourChar = '\0';
for (i = 0; i < skeleton.length(); ++i) {
char ch = skeleton.charAt(i);
@ -1627,17 +1709,14 @@ public class DateIntervalFormat extends UFormat {
normalizedDateSkeleton.append(ch);
dateSkeleton.append(ch);
break;
case 'a':
// 'a' is implicitly handled
timeSkeleton.append(ch);
break;
case 'h':
timeSkeleton.append(ch);
++hCount;
break;
case 'H':
case 'k':
case 'K':
timeSkeleton.append(ch);
++HCount;
if (hourChar == '\0') {
hourChar = ch;
}
break;
case 'm':
timeSkeleton.append(ch);
@ -1651,14 +1730,15 @@ public class DateIntervalFormat extends UFormat {
++vCount;
timeSkeleton.append(ch);
break;
case 'a':
case 'V':
case 'Z':
case 'k':
case 'K':
case 'j':
case 's':
case 'S':
case 'A':
case 'b':
case 'B':
timeSkeleton.append(ch);
normalizedTimeSkeleton.append(ch);
break;
@ -1694,11 +1774,8 @@ public class DateIntervalFormat extends UFormat {
}
/* generate normalized form for time */
if ( HCount != 0 ) {
normalizedTimeSkeleton.append('H');
}
else if ( hCount != 0 ) {
normalizedTimeSkeleton.append('h');
if ( hourChar != '\0' ) {
normalizedTimeSkeleton.append(hourChar);
}
if ( mCount != 0 ) {
normalizedTimeSkeleton.append('m');
@ -1894,10 +1971,11 @@ public class DateIntervalFormat extends UFormat {
}
if ( pattern != null ) {
if ( differenceInfo != 0 ) {
boolean suppressDayPeriodField = fSkeleton.indexOf('J') != -1;
String part1 = adjustFieldWidth(skeleton, bestSkeleton,
pattern.getFirstPart(), differenceInfo);
pattern.getFirstPart(), differenceInfo, suppressDayPeriodField);
String part2 = adjustFieldWidth(skeleton, bestSkeleton,
pattern.getSecondPart(), differenceInfo);
pattern.getSecondPart(), differenceInfo, suppressDayPeriodField);
pattern = new PatternInfo(part1, part2,
pattern.firstDateInPtnIsLaterDate());
} else {
@ -1936,12 +2014,14 @@ public class DateIntervalFormat extends UFormat {
* @param differenceInfo the difference between 2 skeletons
* 1 means only field width differs
* 2 means v/z exchange
* @param suppressDayPeriodField if true, remove the day period field from the result
* @return the adjusted interval pattern
*/
private static String adjustFieldWidth(String inputSkeleton,
String bestMatchSkeleton,
String bestMatchIntervalPattern,
int differenceInfo ) {
int differenceInfo,
boolean suppressDayPeriodField ) {
if ( bestMatchIntervalPattern == null ) {
return null; // the 2nd part could be null
@ -1963,20 +2043,46 @@ public class DateIntervalFormat extends UFormat {
*/
int PATTERN_CHAR_BASE = 0x41;
DateIntervalInfo.parseSkeleton(inputSkeleton, inputSkeletonFieldWidth);
DateIntervalInfo.parseSkeleton(bestMatchSkeleton, bestMatchSkeletonFieldWidth);
if (suppressDayPeriodField) {
if (bestMatchIntervalPattern.indexOf(" a") != -1) {
bestMatchIntervalPattern = bestMatchIntervalPattern.replace(" a", "");
} else if (bestMatchIntervalPattern.indexOf("a ") != -1) {
bestMatchIntervalPattern = bestMatchIntervalPattern.replace("a ", "");
}
bestMatchIntervalPattern = bestMatchIntervalPattern.replace("a", "");
}
if ( differenceInfo == 2 ) {
bestMatchIntervalPattern = bestMatchIntervalPattern.replace('v', 'z');
if (inputSkeleton.indexOf('z') != -1) {
bestMatchIntervalPattern = bestMatchIntervalPattern.replace('v', 'z');
}
if (inputSkeleton.indexOf('K') != -1) {
bestMatchIntervalPattern = bestMatchIntervalPattern.replace('h', 'K');
}
if (inputSkeleton.indexOf('k') != -1) {
bestMatchIntervalPattern = bestMatchIntervalPattern.replace('H', 'k');
}
if (inputSkeleton.indexOf('b') != -1) {
bestMatchIntervalPattern = bestMatchIntervalPattern.replace('a', 'b');
}
}
if (bestMatchIntervalPattern.indexOf('a') != -1 && bestMatchSkeletonFieldWidth['a' - PATTERN_CHAR_BASE] == 0) {
bestMatchSkeletonFieldWidth['a' - PATTERN_CHAR_BASE] = 1;
}
if (bestMatchIntervalPattern.indexOf('b') != -1 && bestMatchSkeletonFieldWidth['b' - PATTERN_CHAR_BASE] == 0) {
bestMatchSkeletonFieldWidth['b' - PATTERN_CHAR_BASE] = 1;
}
StringBuilder adjustedPtn = new StringBuilder(bestMatchIntervalPattern);
boolean inQuote = false;
char prevCh = 0;
int count = 0;
int PATTERN_CHAR_BASE = 0x41;
// loop through the pattern string character by character
int adjustedPtnLength = adjustedPtn.length();
for (int i = 0; i < adjustedPtnLength; ++i) {

View File

@ -532,7 +532,7 @@ public class DateIntervalInfo implements Cloneable, Freezable<DateIntervalInfo>,
// Check that the pattern letter is accepted
char letter = patternLetter.charAt(0);
if (ACCEPTED_PATTERN_LETTERS.indexOf(letter) < 0) {
if (ACCEPTED_PATTERN_LETTERS.indexOf(letter) < 0 && letter != 'B') {
return null;
}
@ -541,6 +541,12 @@ public class DateIntervalInfo implements Cloneable, Freezable<DateIntervalInfo>,
patternLetter = CALENDAR_FIELD_TO_PATTERN_LETTER[Calendar.HOUR];
}
// Replace 'a' for 'B'
// TODO: Using AM/PM as a proxy for flexible day period isnt really correct, but its close
if (letter == 'B') {
patternLetter = CALENDAR_FIELD_TO_PATTERN_LETTER[Calendar.AM_PM];
}
return patternLetter;
}
@ -1081,20 +1087,25 @@ public class DateIntervalInfo implements Cloneable, Freezable<DateIntervalInfo>,
final int STRING_NUMERIC_DIFFERENCE = 0x100;
final int BASE = 0x41;
// TODO: this is a hack for 'v' and 'z'
// resource bundle only have time skeletons ending with 'v',
// but not for time skeletons ending with 'z'.
boolean replaceZWithV = false;
if ( inputSkeleton.indexOf('z') != -1 ) {
// hack for certain alternate characters
// resource bundles only have time skeletons containing 'v', 'h', and 'H'
// but not time skeletons containing 'z', 'K', or 'k'
// the skeleton may also include 'a' or 'b', which never occur in the resource bundles, so strip them out too
boolean replacedAlternateChars = false;
if ( inputSkeleton.indexOf('z') != -1 || inputSkeleton.indexOf('k') != -1 || inputSkeleton.indexOf('K') != -1 || inputSkeleton.indexOf('a') != -1 || inputSkeleton.indexOf('b') != -1 ) {
inputSkeleton = inputSkeleton.replace('z', 'v');
replaceZWithV = true;
inputSkeleton = inputSkeleton.replace('k', 'H');
inputSkeleton = inputSkeleton.replace('K', 'h');
inputSkeleton = inputSkeleton.replace("a", "");
inputSkeleton = inputSkeleton.replace("b", "");
replacedAlternateChars = true;
}
parseSkeleton(inputSkeleton, inputSkeletonFieldWidth);
int bestDistance = Integer.MAX_VALUE;
// 0 means exact the same skeletons;
// 1 means having the same field, but with different length,
// 2 means only z/v differs
// 2 means only z/v, h/K, or H/k differs
// -1 means having different field.
int bestFieldDifference = 0;
for (String skeleton : fIntervalPatterns.keySet()) {
@ -1135,7 +1146,7 @@ public class DateIntervalInfo implements Cloneable, Freezable<DateIntervalInfo>,
break;
}
}
if ( replaceZWithV && bestFieldDifference != -1 ) {
if ( replacedAlternateChars && bestFieldDifference != -1 ) {
bestFieldDifference = 2;
}
return new DateIntervalFormat.BestMatchInfo(bestSkeleton, bestFieldDifference);

View File

@ -721,6 +721,94 @@ public class DateIntervalFormatTest extends TestFmwk {
}
/**
* Test handling of hour and day period metacharacters
*/
@Test
public void TestHourMetacharacters() {
// first item is date pattern
// followed by a group of locale/from_data/to_data/skeleton/interval_data
// Note that from_data/to_data are specified using era names from root, for the calendar specified by locale.
String[] DATA = {
"GGGGG y MM dd HH:mm:ss", // pattern for from_data/to_data
// This test is for tickets ICU-21154, ICU-21155, and ICU-21156 and is intended to verify
// that all of the special skeleton characters for hours and day periods work as expected
// with date intervals:
// - If a, b, or B is included in the skeleton, it correctly sets the length of the day-period field
// - If k or K is included, it behaves the same as H or h, except for the difference in the actual
// number used for the hour.
// - If j is included, it behaves the same as either h or H as appropriate, and multiple j's have the
// intended effect on the length of the day period field (if there is one)
// - If J is included, it correctly suppresses the day period field if j would include it
// - If C is included, it behaves the same as j and brings up the correct day period field
// - In all cases, if the day period of both ends of the range is the same, you only see it once
// baseline (h and H)
"en", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "hh", "12 \\u2013 1 AM",
"de", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "HH", "00\\u201301 Uhr",
// k and K (ICU-21154 and ICU-21156)
"en", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "KK", "0 \\u2013 1 AM",
"de", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "kk", "24\\u201301 Uhr",
// different lengths of the 'a' field
"en", "CE 2010 09 27 10:00:00", "CE 2010 09 27 13:00:00", "ha", "10 AM \\u2013 1 PM",
"en", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "ha", "12 \\u2013 1 AM",
"en", "CE 2010 09 27 10:00:00", "CE 2010 09 27 12:00:00", "haaaaa", "10 a \\u2013 12 p",
"en", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "haaaaa", "12 \\u2013 1 a",
// j (ICU-21155)
"en", "CE 2010 09 27 10:00:00", "CE 2010 09 27 13:00:00", "jj", "10 AM \\u2013 1 PM",
"en", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "jj", "12 \\u2013 1 AM",
"en", "CE 2010 09 27 10:00:00", "CE 2010 09 27 13:00:00", "jjjjj", "10 a \\u2013 1 p",
"en", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "jjjjj", "12 \\u2013 1 a",
"de", "CE 2010 09 27 10:00:00", "CE 2010 09 27 13:00:00", "jj", "10\\u201313 Uhr",
"de", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "jj", "00\\u201301 Uhr",
"de", "CE 2010 09 27 10:00:00", "CE 2010 09 27 13:00:00", "jjjjj", "10\\u201313 Uhr",
"de", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "jjjjj", "00\\u201301 Uhr",
// b and B
"en", "CE 2010 09 27 10:00:00", "CE 2010 09 27 12:00:00", "hb", "10 AM \\u2013 12 noon",
"en", "CE 2010 09 27 10:00:00", "CE 2010 09 27 12:00:00", "hbbbbb", "10 a \\u2013 12 n",
"en", "CE 2010 09 27 13:00:00", "CE 2010 09 27 14:00:00", "hb", "1 \\u2013 2 PM",
"en", "CE 2010 09 27 10:00:00", "CE 2010 09 27 13:00:00", "hB", "10 in the morning \\u2013 1 in the afternoon",
"en", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "hB", "12 \\u2013 1 at night",
// J
"en", "CE 2010 09 27 10:00:00", "CE 2010 09 27 13:00:00", "J", "10 \\u2013 1",
"en", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "J", "12 \\u2013 1",
"de", "CE 2010 09 27 10:00:00", "CE 2010 09 27 13:00:00", "J", "10\\u201313 Uhr",
"de", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "J", "00\\u201301 Uhr",
// C
// (for English and German, C should do the same thing as j)
"en", "CE 2010 09 27 10:00:00", "CE 2010 09 27 13:00:00", "CC", "10 AM \\u2013 1 PM",
"en", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "CC", "12 \\u2013 1 AM",
"en", "CE 2010 09 27 10:00:00", "CE 2010 09 27 13:00:00", "CCCCC", "10 a \\u2013 1 p",
"en", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "CCCCC", "12 \\u2013 1 a",
"de", "CE 2010 09 27 10:00:00", "CE 2010 09 27 13:00:00", "CC", "10\\u201313 Uhr",
"de", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "CC", "00\\u201301 Uhr",
"de", "CE 2010 09 27 10:00:00", "CE 2010 09 27 13:00:00", "CCCCC", "10\\u201313 Uhr",
"de", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "CCCCC", "00\\u201301 Uhr",
// (for zh_HK and hi_IN, j maps to ha, but C maps to hB)
"zh_HK", "CE 2010 09 27 10:00:00", "CE 2010 09 27 13:00:00", "jj", "\\u4E0A\\u534810\\u6642\\u81F3\\u4E0B\\u53481\\u6642",
"zh_HK", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "jj", "\\u4E0A\\u534812\\u6642\\u81F31\\u6642",
"zh_HK", "CE 2010 09 27 10:00:00", "CE 2010 09 27 13:00:00", "hB", "\\u4E0A\\u534810\\u6642 \\u2013 \\u4E0B\\u53481\\u6642",
"zh_HK", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "hB", "\\u51CC\\u666812\\u20131\\u6642",
"zh_HK", "CE 2010 09 27 10:00:00", "CE 2010 09 27 13:00:00", "CC", "\\u4E0A\\u534810\\u6642 \\u2013 \\u4E0B\\u53481\\u6642",
"zh_HK", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "CC", "\\u51CC\\u666812\\u20131\\u6642",
"hi_IN", "CE 2010 09 27 10:00:00", "CE 2010 09 27 13:00:00", "jj", "10 am \\u2013 1 pm",
"hi_IN", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "jj", "12\\u20131 am",
"hi_IN", "CE 2010 09 27 10:00:00", "CE 2010 09 27 13:00:00", "hB", "\\u0938\\u0941\\u092C\\u0939 10 \\u2013 \\u0926\\u094B\\u092A\\u0939\\u0930 1",
"hi_IN", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "hB", "\\u0930\\u093E\\u0924 12\\u20131",
"hi_IN", "CE 2010 09 27 10:00:00", "CE 2010 09 27 13:00:00", "CC", "\\u0938\\u0941\\u092C\\u0939 10 \\u2013 \\u0926\\u094B\\u092A\\u0939\\u0930 1",
"hi_IN", "CE 2010 09 27 00:00:00", "CE 2010 09 27 01:00:00", "CC", "\\u0930\\u093E\\u0924 12\\u20131",
};
expect(DATA, DATA.length);
}
private void expect(String[] data, int data_length) {
int i = 0;
String pattern = data[i++];