ICU-20826 Updating C++ ListFormatter impl to be more like Java

This commit is contained in:
Shane F. Carr 2020-08-14 05:48:42 -05:00
parent a827ab72c9
commit caadb345e5
7 changed files with 253 additions and 219 deletions

View File

@ -263,6 +263,8 @@ UnicodeString SimpleFormatter::getTextWithNoArguments(
sb.append(compiledPattern + i, n);
i += n;
} else if (n < offsetsLength) {
// TODO(ICU-20406): This does not distinguish between "{0}{1}" and "{1}{0}".
// Consider removing this function and replacing it with an iterator interface.
offsets[n] = sb.length();
}
}

View File

@ -69,6 +69,9 @@ U_NAMESPACE_BEGIN
/**
* Implementation of FormattedValue using FieldPositionHandler to accept fields.
*
* TODO(ICU-20897): This class is unused. If it is not needed when fixing ICU-20897,
* it should be deleted.
*/
class FormattedValueFieldPositionIteratorImpl : public UMemory, public FormattedValue {
public:
@ -114,6 +117,18 @@ private:
};
// Export an explicit template instantiation of the MaybeStackArray that
// is used as a data member of CEBuffer.
//
// When building DLLs for Windows this is required even though
// no direct access to the MaybeStackArray leaks out of the i18n library.
//
// See digitlst.h, pluralaffix.h, datefmt.h, and others for similar examples.
//
#if U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN
template class U_I18N_API MaybeStackArray<int32_t, 8>;
#endif
/**
* Implementation of FormattedValue based on FormattedStringBuilder.
*
@ -147,12 +162,17 @@ public:
return fString;
}
void appendSpanIndex(int32_t index);
void prependSpanIndex(int32_t index);
private:
FormattedStringBuilder fString;
FormattedStringBuilder::Field fNumericField;
MaybeStackArray<int32_t, 8> spanIndices;
bool nextPositionImpl(ConstrainedFieldPosition& cfpos, FormattedStringBuilder::Field numericField, UErrorCode& status) const;
static bool isIntOrGroup(FormattedStringBuilder::Field field);
static bool isTrimmable(FormattedStringBuilder::Field field);
int32_t trimBack(int32_t limit) const;
int32_t trimFront(int32_t start) const;
};

View File

@ -15,6 +15,7 @@
#include "formatted_string_builder.h"
#include "number_utils.h"
#include "static_unicode_sets.h"
#include "unicode/listformatter.h"
U_NAMESPACE_BEGIN
@ -102,14 +103,25 @@ static constexpr Field kEndField = Field(0xf, 0xf);
bool FormattedValueStringBuilderImpl::nextPositionImpl(ConstrainedFieldPosition& cfpos, Field numericField, UErrorCode& /*status*/) const {
int32_t fieldStart = -1;
Field currField = kUndefinedField;
UFieldCategory spanCategory = UFIELD_CATEGORY_UNDEFINED;
int32_t spanValue;
for (int32_t i = fString.fZero + cfpos.getLimit(); i <= fString.fZero + fString.fLength; i++) {
Field _field = (i < fString.fZero + fString.fLength) ? fString.getFieldPtr()[i] : kEndField;
// Case 1: currently scanning a field.
if (currField != kUndefinedField) {
if (currField != _field) {
int32_t end = i - fString.fZero;
// Handle span fields; don't trim them
if (spanCategory != UFIELD_CATEGORY_UNDEFINED) {
cfpos.setState(
spanCategory,
spanValue,
fieldStart,
end);
return true;
}
// Grouping separators can be whitespace; don't throw them out!
if (currField != Field(UFIELD_CATEGORY_NUMBER, UNUM_GROUPING_SEPARATOR_FIELD)) {
if (isTrimmable(currField)) {
end = trimBack(i - fString.fZero);
}
if (end <= fieldStart) {
@ -120,7 +132,7 @@ bool FormattedValueStringBuilderImpl::nextPositionImpl(ConstrainedFieldPosition&
continue;
}
int32_t start = fieldStart;
if (currField != Field(UFIELD_CATEGORY_NUMBER, UNUM_GROUPING_SEPARATOR_FIELD)) {
if (isTrimmable(currField)) {
start = trimFront(start);
}
cfpos.setState(currField.getCategory(), currField.getField(), start, end);
@ -154,7 +166,8 @@ bool FormattedValueStringBuilderImpl::nextPositionImpl(ConstrainedFieldPosition&
|| cfpos.getField() != numericField.getField())
&& fString.getFieldPtr()[i - 1].isNumeric()
&& !_field.isNumeric()) {
int j = i - 1;
// Re-wind to the beginning of the field and then emit it
int32_t j = i - 1;
for (; j >= fString.fZero && fString.getFieldPtr()[j].isNumeric(); j--) {}
cfpos.setState(
numericField.getCategory(),
@ -163,6 +176,23 @@ bool FormattedValueStringBuilderImpl::nextPositionImpl(ConstrainedFieldPosition&
i - fString.fZero);
return true;
}
// Special case: emit normalField if we are pointing at the end of spanField.
if (i > fString.fZero) {
auto elementField = fString.getFieldPtr()[i-1];
if (elementField == Field(UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD)
&& cfpos.matchesField(elementField.getCategory(), elementField.getField())
&& (cfpos.getLimit() < i - fString.fZero || cfpos.getCategory() != elementField.getCategory())) {
// Re-wind to the beginning of the field and then emit it
int32_t j = i - 1;
for (; j >= fString.fZero && fString.getFieldPtr()[j] == fString.getFieldPtr()[i-1]; j--) {}
cfpos.setState(
elementField.getCategory(),
elementField.getField(),
j - fString.fZero + 1,
i - fString.fZero);
return true;
}
}
// Special case: skip over INTEGER; will be coalesced later.
if (_field == Field(UFIELD_CATEGORY_NUMBER, UNUM_INTEGER_FIELD)) {
_field = kUndefinedField;
@ -172,6 +202,23 @@ bool FormattedValueStringBuilderImpl::nextPositionImpl(ConstrainedFieldPosition&
continue;
}
// Case 3: check for field starting at this position
// Case 3a: Need to add a SpanField
if (_field == Field(UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD)
// don't return the same field twice in a row:
&& (i == fString.fZero
|| fString.getFieldPtr()[i-1].getCategory() != UFIELD_CATEGORY_LIST
|| fString.getFieldPtr()[i-1].getField() != ULISTFMT_ELEMENT_FIELD)) {
int64_t si = cfpos.getInt64IterationContext();
spanValue = spanIndices[si];
cfpos.setInt64IterationContext(si + 1);
if (cfpos.matchesField(UFIELD_CATEGORY_LIST_SPAN, spanValue)) {
spanCategory = UFIELD_CATEGORY_LIST_SPAN;
fieldStart = i - fString.fZero;
currField = _field;
continue;
}
}
// Case 3b: No SpanField or SpanField did not match
if (cfpos.matchesField(_field.getCategory(), _field.getField())) {
fieldStart = i - fString.fZero;
currField = _field;
@ -182,11 +229,33 @@ bool FormattedValueStringBuilderImpl::nextPositionImpl(ConstrainedFieldPosition&
return false;
}
void FormattedValueStringBuilderImpl::appendSpanIndex(int32_t position) {
if (spanIndices.getCapacity() <= position) {
spanIndices.resize(position * 2);
}
spanIndices[position] = position;
}
void FormattedValueStringBuilderImpl::prependSpanIndex(int32_t position) {
if (spanIndices.getCapacity() <= position) {
spanIndices.resize(position * 2);
}
for (int32_t i = 0; i < position; i++) {
spanIndices[i+1] = spanIndices[i];
}
spanIndices[0] = position;
}
bool FormattedValueStringBuilderImpl::isIntOrGroup(Field field) {
return field == Field(UFIELD_CATEGORY_NUMBER, UNUM_INTEGER_FIELD)
|| field == Field(UFIELD_CATEGORY_NUMBER, UNUM_GROUPING_SEPARATOR_FIELD);
}
bool FormattedValueStringBuilderImpl::isTrimmable(Field field) {
return field != Field(UFIELD_CATEGORY_NUMBER, UNUM_GROUPING_SEPARATOR_FIELD)
&& field.getCategory() != UFIELD_CATEGORY_LIST;
}
int32_t FormattedValueStringBuilderImpl::trimBack(int32_t limit) const {
return unisets::get(unisets::DEFAULT_IGNORABLES)->spanBack(
fString.getCharPtr() + fString.fZero,

View File

@ -52,10 +52,12 @@ public:
virtual PatternHandler* clone() const { return new PatternHandler(twoPattern, endPattern); }
/** Argument: final string in the list. */
virtual const SimpleFormatter& getTwoPattern(const UnicodeString&) const {
return twoPattern;
}
/** Argument: final string in the list. */
virtual const SimpleFormatter& getEndPattern(const UnicodeString&) const {
return endPattern;
}
@ -237,9 +239,9 @@ ListFormatInternal(const ListFormatInternal &other) :
#if !UCONFIG_NO_FORMATTING
class FormattedListData : public FormattedValueFieldPositionIteratorImpl {
class FormattedListData : public FormattedValueStringBuilderImpl {
public:
FormattedListData(UErrorCode& status) : FormattedValueFieldPositionIteratorImpl(5, status) {}
FormattedListData(UErrorCode&) : FormattedValueStringBuilderImpl(kUndefinedField) {}
virtual ~FormattedListData();
};
@ -557,50 +559,89 @@ ListFormatter::~ListFormatter() {
delete owned;
}
/**
* Joins first and second using the pattern pat.
* On entry offset is an offset into first or -1 if offset unspecified.
* On exit offset is offset of second in result if recordOffset was set
* Otherwise if it was >=0 it is set to point into result where it used
* to point into first. On exit, result is the join of first and second
* according to pat. Any previous value of result gets replaced.
*/
static void joinStringsAndReplace(
const SimpleFormatter& pat,
const UnicodeString& first,
const UnicodeString& second,
UnicodeString &result,
UBool recordOffset,
int32_t &offset,
int32_t *offsetFirst,
int32_t *offsetSecond,
UErrorCode& errorCode) {
if (U_FAILURE(errorCode)) {
return;
namespace {
class FormattedListBuilder {
public:
LocalPointer<FormattedListData> data;
/** For lists of length 1+ */
FormattedListBuilder(const UnicodeString& start, UErrorCode& status)
: data(new FormattedListData(status), status) {
if (U_SUCCESS(status)) {
data->getStringRef().append(
start,
{UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD},
status);
data->appendSpanIndex(0);
}
}
const UnicodeString *params[2] = {&first, &second};
int32_t offsets[2];
pat.formatAndReplace(
params,
UPRV_LENGTHOF(params),
result,
offsets,
UPRV_LENGTHOF(offsets),
errorCode);
if (U_FAILURE(errorCode)) {
return;
/** For lists of length 0 */
FormattedListBuilder(UErrorCode& status)
: data(new FormattedListData(status), status) {
}
if (offsets[0] == -1 || offsets[1] == -1) {
errorCode = U_INVALID_FORMAT_ERROR;
return;
void append(const SimpleFormatter& pattern, const UnicodeString& next, int32_t position, UErrorCode& status) {
if (U_FAILURE(status)) {
return;
}
if (pattern.getArgumentLimit() != 2) {
status = U_INTERNAL_PROGRAM_ERROR;
return;
}
// In the pattern, {0} are the pre-existing elements and {1} is the new element.
int32_t offsets[] = {0, 0};
UnicodeString temp = pattern.getTextWithNoArguments(offsets, 2);
if (offsets[0] <= offsets[1]) {
// prefix{0}infix{1}suffix
// Prepend prefix, then append infix, element, and suffix
data->getStringRef().insert(
0,
temp.tempSubStringBetween(0, offsets[0]),
{UFIELD_CATEGORY_LIST, ULISTFMT_LITERAL_FIELD},
status);
data->getStringRef().append(
temp.tempSubStringBetween(offsets[0], offsets[1]),
{UFIELD_CATEGORY_LIST, ULISTFMT_LITERAL_FIELD},
status);
data->getStringRef().append(
next,
{UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD},
status);
data->appendSpanIndex(position);
data->getStringRef().append(
temp.tempSubString(offsets[1]),
{UFIELD_CATEGORY_LIST, ULISTFMT_LITERAL_FIELD},
status);
} else {
// prefix{1}infix{0}suffix
// Prepend infix, element, and prefix, then append suffix.
// (We prepend in reverse order because prepending at index 0 is fast.)
data->getStringRef().insert(
0,
temp.tempSubStringBetween(offsets[1], offsets[0]),
{UFIELD_CATEGORY_LIST, ULISTFMT_LITERAL_FIELD},
status);
data->getStringRef().insert(
0,
next,
{UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD},
status);
data->prependSpanIndex(position);
data->getStringRef().insert(
0,
temp.tempSubStringBetween(0, offsets[1]),
{UFIELD_CATEGORY_LIST, ULISTFMT_LITERAL_FIELD},
status);
data->getStringRef().append(
temp.tempSubString(offsets[0]),
{UFIELD_CATEGORY_LIST, ULISTFMT_LITERAL_FIELD},
status);
}
}
if (recordOffset) {
offset = offsets[1];
} else if (offset >= 0) {
offset += offsets[0];
}
if (offsetFirst != nullptr) *offsetFirst = offsets[0];
if (offsetSecond != nullptr) *offsetSecond = offsets[1];
};
}
UnicodeString& ListFormatter::format(
@ -619,7 +660,19 @@ UnicodeString& ListFormatter::format(
int32_t index,
int32_t &offset,
UErrorCode& errorCode) const {
return format_(items, nItems, appendTo, index, offset, nullptr, errorCode);
#if !UCONFIG_NO_FORMATTING
int32_t initialOffset = appendTo.length();
auto result = formatStringsToValue(items, nItems, errorCode);
UnicodeStringAppendable appendable(appendTo);
result.appendTo(appendable, errorCode);
if (index >= 0) {
ConstrainedFieldPosition cfpos;
cfpos.constrainField(UFIELD_CATEGORY_LIST_SPAN, index);
result.nextPosition(cfpos, errorCode);
offset = initialOffset + cfpos.getStart();
}
#endif
return appendTo;
}
#if !UCONFIG_NO_FORMATTING
@ -627,182 +680,68 @@ FormattedList ListFormatter::formatStringsToValue(
const UnicodeString items[],
int32_t nItems,
UErrorCode& errorCode) const {
LocalPointer<FormattedListData> result(new FormattedListData(errorCode), errorCode);
if (U_FAILURE(errorCode)) {
return FormattedList(errorCode);
}
UnicodeString string;
int32_t offset;
auto handler = result->getHandler(errorCode);
handler.setCategory(UFIELD_CATEGORY_LIST);
format_(items, nItems, string, -1, offset, &handler, errorCode);
handler.getError(errorCode);
result->appendString(string, errorCode);
if (U_FAILURE(errorCode)) {
return FormattedList(errorCode);
if (nItems == 0) {
FormattedListBuilder result(errorCode);
if (U_FAILURE(errorCode)) {
return FormattedList(errorCode);
} else {
return FormattedList(result.data.orphan());
}
} else if (nItems == 1) {
FormattedListBuilder result(items[0], errorCode);
result.data->getStringRef().writeTerminator(errorCode);
if (U_FAILURE(errorCode)) {
return FormattedList(errorCode);
} else {
return FormattedList(result.data.orphan());
}
} else if (nItems == 2) {
FormattedListBuilder result(items[0], errorCode);
if (U_FAILURE(errorCode)) {
return FormattedList(errorCode);
}
result.append(
data->patternHandler->getTwoPattern(items[1]),
items[1],
1,
errorCode);
result.data->getStringRef().writeTerminator(errorCode);
if (U_FAILURE(errorCode)) {
return FormattedList(errorCode);
} else {
return FormattedList(result.data.orphan());
}
}
// Add span fields and sort
ConstrainedFieldPosition cfpos;
cfpos.constrainField(UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD);
int32_t i = 0;
handler.setCategory(UFIELD_CATEGORY_LIST_SPAN);
while (result->nextPosition(cfpos, errorCode)) {
handler.addAttribute(i++, cfpos.getStart(), cfpos.getLimit());
}
handler.getError(errorCode);
FormattedListBuilder result(items[0], errorCode);
if (U_FAILURE(errorCode)) {
return FormattedList(errorCode);
}
result->sort();
return FormattedList(result.orphan());
result.append(
data->startPattern,
items[1],
1,
errorCode);
for (int32_t i = 2; i < nItems - 1; i++) {
result.append(
data->middlePattern,
items[i],
i,
errorCode);
}
result.append(
data->patternHandler->getEndPattern(items[nItems-1]),
items[nItems-1],
nItems-1,
errorCode);
result.data->getStringRef().writeTerminator(errorCode);
if (U_FAILURE(errorCode)) {
return FormattedList(errorCode);
} else {
return FormattedList(result.data.orphan());
}
}
#endif
UnicodeString& ListFormatter::format_(
const UnicodeString items[],
int32_t nItems,
UnicodeString& appendTo,
int32_t index,
int32_t &offset,
FieldPositionHandler* handler,
UErrorCode& errorCode) const {
#if !UCONFIG_NO_FORMATTING
offset = -1;
if (U_FAILURE(errorCode)) {
return appendTo;
}
if (data == nullptr) {
errorCode = U_INVALID_STATE_ERROR;
return appendTo;
}
if (nItems <= 0) {
return appendTo;
}
if (nItems == 1) {
if (index == 0) {
offset = appendTo.length();
}
if (handler != nullptr) {
handler->addAttribute(ULISTFMT_ELEMENT_FIELD,
appendTo.length(),
appendTo.length() + items[0].length());
}
appendTo.append(items[0]);
return appendTo;
}
UnicodeString result(items[0]);
if (index == 0) {
offset = 0;
}
int32_t offsetFirst = 0;
int32_t offsetSecond = 0;
int32_t prefixLength = 0;
// for n items, there are 2 * (n + 1) boundary including 0 and the upper
// edge.
MaybeStackArray<int32_t, 10> offsets((handler != nullptr) ? 2 * (nItems + 1) : 0, errorCode);
if (nItems == 2) {
joinStringsAndReplace(
data->patternHandler->getTwoPattern(items[1]),
result,
items[1],
result,
index == 1,
offset,
&offsetFirst,
&offsetSecond,
errorCode);
} else {
joinStringsAndReplace(
data->startPattern,
result,
items[1],
result,
index == 1,
offset,
&offsetFirst,
&offsetSecond,
errorCode);
}
if (handler != nullptr) {
offsets[0] = 0;
prefixLength += offsetFirst;
offsets[1] = offsetSecond - prefixLength;
}
if (nItems > 2) {
for (int32_t i = 2; i < nItems - 1; ++i) {
joinStringsAndReplace(
data->middlePattern,
result,
items[i],
result,
index == i,
offset,
&offsetFirst,
&offsetSecond,
errorCode);
if (handler != nullptr) {
prefixLength += offsetFirst;
offsets[i] = offsetSecond - prefixLength;
}
}
joinStringsAndReplace(
data->patternHandler->getEndPattern(items[nItems - 1]),
result,
items[nItems - 1],
result,
index == nItems - 1,
offset,
&offsetFirst,
&offsetSecond,
errorCode);
if (handler != nullptr) {
prefixLength += offsetFirst;
offsets[nItems - 1] = offsetSecond - prefixLength;
}
}
if (handler != nullptr) {
// If there are already some data in appendTo, we need to adjust the index
// by shifting that lenght while insert into handler.
int32_t shift = appendTo.length() + prefixLength;
// Output the ULISTFMT_ELEMENT_FIELD in the order of the input elements
for (int32_t i = 0; i < nItems; ++i) {
offsets[i + nItems] = offsets[i] + items[i].length() + shift;
offsets[i] += shift;
handler->addAttribute(
ULISTFMT_ELEMENT_FIELD, // id
offsets[i], // index
offsets[i + nItems]); // limit
}
// The locale pattern may reorder the items (such as in ur-IN locale),
// so we cannot assume the array is in accendning order.
// To handle the edging case, just insert the two ends into the array
// and sort. Then we output ULISTFMT_LITERAL_FIELD if the indecies
// between the even and odd position are not the same in the sorted array.
offsets[2 * nItems] = shift - prefixLength;
offsets[2 * nItems + 1] = result.length() + shift - prefixLength;
uprv_sortArray(offsets.getAlias(), 2 * (nItems + 1), sizeof(int32_t),
uprv_int32Comparator, nullptr,
false, &errorCode);
for (int32_t i = 0; i <= nItems; ++i) {
if (offsets[i * 2] != offsets[i * 2 + 1]) {
handler->addAttribute(
ULISTFMT_LITERAL_FIELD, // id
offsets[i * 2], // index
offsets[i * 2 + 1]); // limit
}
}
}
if (U_SUCCESS(errorCode)) {
if (offset >= 0) {
offset += appendTo.length();
}
appendTo += result;
}
#endif
return appendTo;
}
U_NAMESPACE_END

View File

@ -861,7 +861,7 @@ UnicodeString &MeasureFormat::formatMeasuresSlowTrack(
return appendTo;
}
// Fix up FieldPosition indexes if our field is found.
if (offset != -1) {
if (fieldPositionFoundIndex != -1 && offset != -1) {
pos.setBeginIndex(fpos.getBeginIndex() + offset);
pos.setEndIndex(fpos.getEndIndex() + offset);
}

View File

@ -946,7 +946,7 @@ group: dayperiodrules
group: listformatter
listformatter.o ulistformatter.o
deps
uchar resourcebundle simpleformatter format uclean_i18n formatted_value_iterimpl
uchar resourcebundle simpleformatter format uclean_i18n formatted_value_sbimpl
group: double_conversion
double-conversion-bignum.o double-conversion-double-to-string.o
@ -1056,6 +1056,7 @@ group: formatting
sharedbreakiterator # for reldatefmt.o
uclean_i18n
region
formatted_value_iterimpl # NOTE (2020-08-27): Not currently used.
group: sharedbreakiterator
sharedbreakiterator.o

View File

@ -239,7 +239,10 @@ void IntlTestWithFieldPosition::checkMixedFormattedValue(
assertFalse(baseMessage + u"A after loop: " + CFPosToUnicodeString(cfpos), afterLoopResult);
// Check nextPosition constrained over each category one at a time
for (int32_t category=0; category<UFIELD_CATEGORY_COUNT; category++) {
for (int32_t category=0; category<UFIELD_CATEGORY_COUNT+1; category++) {
if (category == UFIELD_CATEGORY_COUNT+1) {
category = UFIELD_CATEGORY_LIST_SPAN;
}
cfpos.reset();
cfpos.constrainCategory(static_cast<UFieldCategory>(category));
for (int32_t i = 0; i < length; i++) {