ICU-13177 Initial commit of ICU4C NumberFormatter

X-SVN-Rev: 40465
This commit is contained in:
Shane Carr 2017-09-27 00:25:20 +00:00
parent c09ca5d6b9
commit a01fc429cb
44 changed files with 12372 additions and 5 deletions

View File

@ -102,7 +102,13 @@ digitinterval.o digitformatter.o digitaffix.o valueformatter.o \
digitaffixesandpadding.o pluralaffix.o precision.o \
affixpatternparser.o smallintformatter.o decimfmtimpl.o \
visibledigits.o dayperiodrules.o \
nounit.o
nounit.o \
number_affixutils.o number_compact.o number_decimalquantity.o \
number_decimfmtprops.o number_fluent.o number_formatimpl.o number_grouping.o \
number_integerwidth.o number_longnames.o number_modifiers.o number_notation.o \
number_padding.o number_patternmodifier.o number_patternstring.o \
number_rounding.o number_scientific.o number_stringbuilder.o
## Header files to install
HEADERS = $(srcdir)/unicode/*.h

View File

@ -0,0 +1,390 @@
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#include "number_affixutils.h"
#include "unicode/utf16.h"
using namespace icu::number::impl;
int32_t AffixUtils::estimateLength(const CharSequence &patternString, UErrorCode &status) {
AffixPatternState state = STATE_BASE;
int32_t offset = 0;
int32_t length = 0;
for (; offset < patternString.length();) {
UChar32 cp = patternString.codePointAt(offset);
switch (state) {
case STATE_BASE:
if (cp == '\'') {
// First quote
state = STATE_FIRST_QUOTE;
} else {
// Unquoted symbol
length++;
}
break;
case STATE_FIRST_QUOTE:
if (cp == '\'') {
// Repeated quote
length++;
state = STATE_BASE;
} else {
// Quoted code point
length++;
state = STATE_INSIDE_QUOTE;
}
break;
case STATE_INSIDE_QUOTE:
if (cp == '\'') {
// End of quoted sequence
state = STATE_AFTER_QUOTE;
} else {
// Quoted code point
length++;
}
break;
case STATE_AFTER_QUOTE:
if (cp == '\'') {
// Double quote inside of quoted sequence
length++;
state = STATE_INSIDE_QUOTE;
} else {
// Unquoted symbol
length++;
}
break;
default:
U_ASSERT(false);
}
offset += U16_LENGTH(cp);
}
switch (state) {
case STATE_FIRST_QUOTE:
case STATE_INSIDE_QUOTE:
status = U_ILLEGAL_ARGUMENT_ERROR;
default:
break;
}
return length;
}
UnicodeString AffixUtils::escape(const CharSequence &input) {
AffixPatternState state = STATE_BASE;
int32_t offset = 0;
UnicodeString output;
for (; offset < input.length();) {
int32_t cp = input.codePointAt(offset);
switch (cp) {
case '\'':
output.append(u"''", -1);
break;
case '-':
case '+':
case '%':
case u'':
case u'¤':
if (state == STATE_BASE) {
output.append('\'');
output.append(cp);
state = STATE_INSIDE_QUOTE;
} else {
output.append(cp);
}
break;
default:
if (state == STATE_INSIDE_QUOTE) {
output.append('\'');
output.append(cp);
state = STATE_BASE;
} else {
output.append(cp);
}
break;
}
offset += U16_LENGTH(cp);
}
if (state == STATE_INSIDE_QUOTE) {
output.append('\'');
}
return output;
}
Field AffixUtils::getFieldForType(AffixPatternType type) {
switch (type) {
case TYPE_MINUS_SIGN:
return Field::UNUM_SIGN_FIELD;
case TYPE_PLUS_SIGN:
return Field::UNUM_SIGN_FIELD;
case TYPE_PERCENT:
return Field::UNUM_PERCENT_FIELD;
case TYPE_PERMILLE:
return Field::UNUM_PERMILL_FIELD;
case TYPE_CURRENCY_SINGLE:
return Field::UNUM_CURRENCY_FIELD;
case TYPE_CURRENCY_DOUBLE:
return Field::UNUM_CURRENCY_FIELD;
case TYPE_CURRENCY_TRIPLE:
return Field::UNUM_CURRENCY_FIELD;
case TYPE_CURRENCY_QUAD:
return Field::UNUM_CURRENCY_FIELD;
case TYPE_CURRENCY_QUINT:
return Field::UNUM_CURRENCY_FIELD;
case TYPE_CURRENCY_OVERFLOW:
return Field::UNUM_CURRENCY_FIELD;
default:
U_ASSERT(false);
return Field::UNUM_FIELD_COUNT; // suppress "control reaches end of non-void function"
}
}
int32_t
AffixUtils::unescape(const CharSequence &affixPattern, NumberStringBuilder &output, int32_t position,
const SymbolProvider &provider, UErrorCode &status) {
int32_t length = 0;
AffixTag tag = {0};
while (hasNext(tag, affixPattern)) {
tag = nextToken(tag, affixPattern, status);
if (tag.type == TYPE_CURRENCY_OVERFLOW) {
// Don't go to the provider for this special case
length += output.insertCodePoint(position + length, 0xFFFD, UNUM_CURRENCY_FIELD, status);
} else if (tag.type < 0) {
length += output.insert(
position + length, provider.getSymbol(tag.type), getFieldForType(tag.type), status);
} else {
length += output.insertCodePoint(position + length, tag.codePoint, UNUM_FIELD_COUNT, status);
}
}
return length;
}
int32_t AffixUtils::unescapedCodePointCount(const CharSequence &affixPattern,
const SymbolProvider &provider, UErrorCode &status) {
int32_t length = 0;
AffixTag tag = {0};
while (hasNext(tag, affixPattern)) {
tag = nextToken(tag, affixPattern, status);
if (tag.type == TYPE_CURRENCY_OVERFLOW) {
length += 1;
} else if (tag.type < 0) {
length += provider.getSymbol(tag.type).length();
} else {
length += U16_LENGTH(tag.codePoint);
}
}
return length;
}
bool
AffixUtils::containsType(const CharSequence &affixPattern, AffixPatternType type, UErrorCode &status) {
if (affixPattern.length() == 0) {
return false;
}
AffixTag tag = {0};
while (hasNext(tag, affixPattern)) {
tag = nextToken(tag, affixPattern, status);
if (tag.type == type) {
return true;
}
}
return false;
}
bool AffixUtils::hasCurrencySymbols(const CharSequence &affixPattern, UErrorCode &status) {
if (affixPattern.length() == 0) {
return false;
}
AffixTag tag = {0};
while (hasNext(tag, affixPattern)) {
tag = nextToken(tag, affixPattern, status);
if (tag.type < 0 && getFieldForType(tag.type) == UNUM_CURRENCY_FIELD) {
return true;
}
}
return false;
}
UnicodeString AffixUtils::replaceType(const CharSequence &affixPattern, AffixPatternType type,
char16_t replacementChar, UErrorCode &status) {
UnicodeString output = affixPattern.toUnicodeString();
if (affixPattern.length() == 0) {
return output;
};
AffixTag tag = {0};
while (hasNext(tag, affixPattern)) {
tag = nextToken(tag, affixPattern, status);
if (tag.type == type) {
output.replace(tag.offset - 1, 1, replacementChar);
}
}
return output;
}
AffixTag AffixUtils::nextToken(AffixTag tag, const CharSequence &patternString, UErrorCode &status) {
int32_t offset = tag.offset;
int32_t state = tag.state;
for (; offset < patternString.length();) {
UChar32 cp = patternString.codePointAt(offset);
int32_t count = U16_LENGTH(cp);
switch (state) {
case STATE_BASE:
switch (cp) {
case '\'':
state = STATE_FIRST_QUOTE;
offset += count;
// continue to the next code point
break;
case '-':
return makeTag(offset + count, TYPE_MINUS_SIGN, STATE_BASE, 0);
case '+':
return makeTag(offset + count, TYPE_PLUS_SIGN, STATE_BASE, 0);
case u'%':
return makeTag(offset + count, TYPE_PERCENT, STATE_BASE, 0);
case u'':
return makeTag(offset + count, TYPE_PERMILLE, STATE_BASE, 0);
case u'¤':
state = STATE_FIRST_CURR;
offset += count;
// continue to the next code point
break;
default:
return makeTag(offset + count, TYPE_CODEPOINT, STATE_BASE, cp);
}
break;
case STATE_FIRST_QUOTE:
if (cp == '\'') {
return makeTag(offset + count, TYPE_CODEPOINT, STATE_BASE, cp);
} else {
return makeTag(offset + count, TYPE_CODEPOINT, STATE_INSIDE_QUOTE, cp);
}
case STATE_INSIDE_QUOTE:
if (cp == '\'') {
state = STATE_AFTER_QUOTE;
offset += count;
// continue to the next code point
break;
} else {
return makeTag(offset + count, TYPE_CODEPOINT, STATE_INSIDE_QUOTE, cp);
}
case STATE_AFTER_QUOTE:
if (cp == '\'') {
return makeTag(offset + count, TYPE_CODEPOINT, STATE_INSIDE_QUOTE, cp);
} else {
state = STATE_BASE;
// re-evaluate this code point
break;
}
case STATE_FIRST_CURR:
if (cp == u'¤') {
state = STATE_SECOND_CURR;
offset += count;
// continue to the next code point
break;
} else {
return makeTag(offset, TYPE_CURRENCY_SINGLE, STATE_BASE, 0);
}
case STATE_SECOND_CURR:
if (cp == u'¤') {
state = STATE_THIRD_CURR;
offset += count;
// continue to the next code point
break;
} else {
return makeTag(offset, TYPE_CURRENCY_DOUBLE, STATE_BASE, 0);
}
case STATE_THIRD_CURR:
if (cp == u'¤') {
state = STATE_FOURTH_CURR;
offset += count;
// continue to the next code point
break;
} else {
return makeTag(offset, TYPE_CURRENCY_TRIPLE, STATE_BASE, 0);
}
case STATE_FOURTH_CURR:
if (cp == u'¤') {
state = STATE_FIFTH_CURR;
offset += count;
// continue to the next code point
break;
} else {
return makeTag(offset, TYPE_CURRENCY_QUAD, STATE_BASE, 0);
}
case STATE_FIFTH_CURR:
if (cp == u'¤') {
state = STATE_OVERFLOW_CURR;
offset += count;
// continue to the next code point
break;
} else {
return makeTag(offset, TYPE_CURRENCY_QUINT, STATE_BASE, 0);
}
case STATE_OVERFLOW_CURR:
if (cp == u'¤') {
offset += count;
// continue to the next code point and loop back to this state
break;
} else {
return makeTag(offset, TYPE_CURRENCY_OVERFLOW, STATE_BASE, 0);
}
default:
U_ASSERT(false);
}
}
// End of string
switch (state) {
case STATE_BASE:
// No more tokens in string.
return {-1};
case STATE_FIRST_QUOTE:
case STATE_INSIDE_QUOTE:
// For consistent behavior with the JDK and ICU 58, set an error here.
status = U_ILLEGAL_ARGUMENT_ERROR;
return {-1};
case STATE_AFTER_QUOTE:
// No more tokens in string.
return {-1};
case STATE_FIRST_CURR:
return makeTag(offset, TYPE_CURRENCY_SINGLE, STATE_BASE, 0);
case STATE_SECOND_CURR:
return makeTag(offset, TYPE_CURRENCY_DOUBLE, STATE_BASE, 0);
case STATE_THIRD_CURR:
return makeTag(offset, TYPE_CURRENCY_TRIPLE, STATE_BASE, 0);
case STATE_FOURTH_CURR:
return makeTag(offset, TYPE_CURRENCY_QUAD, STATE_BASE, 0);
case STATE_FIFTH_CURR:
return makeTag(offset, TYPE_CURRENCY_QUINT, STATE_BASE, 0);
case STATE_OVERFLOW_CURR:
return makeTag(offset, TYPE_CURRENCY_OVERFLOW, STATE_BASE, 0);
default:
U_ASSERT(false);
return {-1}; // suppress "control reaches end of non-void function"
}
}
bool AffixUtils::hasNext(const AffixTag &tag, const CharSequence &string) {
// First check for the {-1} and {0} initializer syntax.
if (tag.offset < 0) {
return false;
} else if (tag.offset == 0) {
return string.length() > 0;
}
// The rest of the fields are safe to use now.
// Special case: the last character in string is an end quote.
if (tag.state == STATE_INSIDE_QUOTE && tag.offset == string.length() - 1 &&
string.charAt(tag.offset) == '\'') {
return false;
} else if (tag.state != STATE_BASE) {
return true;
} else {
return tag.offset < string.length();
}
}

View File

@ -0,0 +1,214 @@
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#ifndef NUMBERFORMAT_AFFIXPATTERNUTILS_H
#define NUMBERFORMAT_AFFIXPATTERNUTILS_H
#include <cstdint>
#include "number_types.h"
#include "unicode/stringpiece.h"
#include "unicode/unistr.h"
#include "number_stringbuilder.h"
U_NAMESPACE_BEGIN namespace number {
namespace impl {
enum AffixPatternState {
STATE_BASE = 0,
STATE_FIRST_QUOTE = 1,
STATE_INSIDE_QUOTE = 2,
STATE_AFTER_QUOTE = 3,
STATE_FIRST_CURR = 4,
STATE_SECOND_CURR = 5,
STATE_THIRD_CURR = 6,
STATE_FOURTH_CURR = 7,
STATE_FIFTH_CURR = 8,
STATE_OVERFLOW_CURR = 9
};
// enum AffixPatternType defined in internals.h
struct AffixTag {
int32_t offset;
UChar32 codePoint;
AffixPatternState state;
AffixPatternType type;
AffixTag(int32_t offset) : offset(offset) {}
AffixTag(int32_t offset, UChar32 codePoint, AffixPatternState state, AffixPatternType type)
: offset(offset), codePoint(codePoint), state(state), type(type)
{}
};
class SymbolProvider {
public:
// TODO: Could this be more efficient if it returned by reference?
virtual UnicodeString getSymbol(AffixPatternType type) const = 0;
};
/**
* Performs manipulations on affix patterns: the prefix and suffix strings associated with a decimal
* format pattern. For example:
*
* <table>
* <tr><th>Affix Pattern</th><th>Example Unescaped (Formatted) String</th></tr>
* <tr><td>abc</td><td>abc</td></tr>
* <tr><td>ab-</td><td>ab</td></tr>
* <tr><td>ab'-'</td><td>ab-</td></tr>
* <tr><td>ab''</td><td>ab'</td></tr>
* </table>
*
* To manually iterate over tokens in a literal string, use the following pattern, which is designed
* to be efficient.
*
* <pre>
* long tag = 0L;
* while (AffixPatternUtils.hasNext(tag, patternString)) {
* tag = AffixPatternUtils.nextToken(tag, patternString);
* int typeOrCp = AffixPatternUtils.getTypeOrCp(tag);
* switch (typeOrCp) {
* case AffixPatternUtils.TYPE_MINUS_SIGN:
* // Current token is a minus sign.
* break;
* case AffixPatternUtils.TYPE_PLUS_SIGN:
* // Current token is a plus sign.
* break;
* case AffixPatternUtils.TYPE_PERCENT:
* // Current token is a percent sign.
* break;
* // ... other types ...
* default:
* // Current token is an arbitrary code point.
* // The variable typeOrCp is the code point.
* break;
* }
* }
* </pre>
*/
class AffixUtils {
public:
/**
* Estimates the number of code points present in an unescaped version of the affix pattern string
* (one that would be returned by {@link #unescape}), assuming that all interpolated symbols
* consume one code point and that currencies consume as many code points as their symbol width.
* Used for computing padding width.
*
* @param patternString The original string whose width will be estimated.
* @return The length of the unescaped string.
*/
static int32_t estimateLength(const CharSequence &patternString, UErrorCode &status);
/**
* Takes a string and escapes (quotes) characters that have special meaning in the affix pattern
* syntax. This function does not reverse-lookup symbols.
*
* <p>Example input: "-$x"; example output: "'-'$x"
*
* @param input The string to be escaped.
* @return The resulting UnicodeString.
*/
static UnicodeString escape(const CharSequence &input);
static Field getFieldForType(AffixPatternType type);
/**
* Executes the unescape state machine. Replaces the unquoted characters "-", "+", "%", "", and
* "¤" with the corresponding symbols provided by the {@link SymbolProvider}, and inserts the
* result into the NumberStringBuilder at the requested location.
*
* <p>Example input: "'-'¤x"; example output: "-$x"
*
* @param affixPattern The original string to be unescaped.
* @param output The NumberStringBuilder to mutate with the result.
* @param position The index into the NumberStringBuilder to insert the the string.
* @param provider An object to generate locale symbols.
*/
static int32_t
unescape(const CharSequence &affixPattern, NumberStringBuilder &output, int32_t position,
const SymbolProvider &provider, UErrorCode &status);
/**
* Sames as {@link #unescape}, but only calculates the code point count. More efficient than {@link #unescape}
* if you only need the length but not the string itself.
*
* @param affixPattern The original string to be unescaped.
* @param provider An object to generate locale symbols.
* @return The same return value as if you called {@link #unescape}.
*/
static int32_t unescapedCodePointCount(const CharSequence &affixPattern,
const SymbolProvider &provider, UErrorCode &status);
/**
* Checks whether the given affix pattern contains at least one token of the given type, which is
* one of the constants "TYPE_" in {@link AffixPatternUtils}.
*
* @param affixPattern The affix pattern to check.
* @param type The token type.
* @return true if the affix pattern contains the given token type; false otherwise.
*/
static bool
containsType(const CharSequence &affixPattern, AffixPatternType type, UErrorCode &status);
/**
* Checks whether the specified affix pattern has any unquoted currency symbols ("¤").
*
* @param affixPattern The string to check for currency symbols.
* @return true if the literal has at least one unquoted currency symbol; false otherwise.
*/
static bool hasCurrencySymbols(const CharSequence &affixPattern, UErrorCode &status);
/**
* Replaces all occurrences of tokens with the given type with the given replacement char.
*
* @param affixPattern The source affix pattern (does not get modified).
* @param type The token type.
* @param replacementChar The char to substitute in place of chars of the given token type.
* @return A string containing the new affix pattern.
*/
static UnicodeString
replaceType(const CharSequence &affixPattern, AffixPatternType type, char16_t replacementChar,
UErrorCode &status);
/**
* Returns the next token from the affix pattern.
*
* @param tag A bitmask used for keeping track of state from token to token. The initial value
* should be 0L.
* @param patternString The affix pattern.
* @return The bitmask tag to pass to the next call of this method to retrieve the following token
* (never negative), or -1 if there were no more tokens in the affix pattern.
* @see #hasNext
*/
static AffixTag nextToken(AffixTag tag, const CharSequence &patternString, UErrorCode &status);
/**
* Returns whether the affix pattern string has any more tokens to be retrieved from a call to
* {@link #nextToken}.
*
* @param tag The bitmask tag of the previous token, as returned by {@link #nextToken}.
* @param string The affix pattern.
* @return true if there are more tokens to consume; false otherwise.
*/
static bool hasNext(const AffixTag &tag, const CharSequence &string);
private:
/**
* Encodes the given values into a tag struct.
* The order of the arguments is consistent with Java, but the order of the stored
* fields is not necessarily the same.
*/
static inline AffixTag
makeTag(int32_t offset, AffixPatternType type, AffixPatternState state, UChar32 cp) {
return {offset, cp, state, type};
}
};
} // namespace impl
} // namespace number
U_NAMESPACE_END
#endif //NUMBERFORMAT_AFFIXPATTERNUTILS_H

View File

@ -0,0 +1,315 @@
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#include "resource.h"
#include "number_compact.h"
#include "unicode/ustring.h"
#include "unicode/ures.h"
#include <cstring>
#include <charstr.h>
#include <uresimp.h>
using namespace icu::number::impl;
namespace {
// A dummy object used when a "0" compact decimal entry is encountered. This is necessary
// in order to prevent falling back to root. Object equality ("==") is intended.
const UChar *USE_FALLBACK = u"<USE FALLBACK>";
/** Produces a string like "NumberElements/latn/patternsShort/decimalFormat". */
void getResourceBundleKey(const char *nsName, CompactStyle compactStyle, CompactType compactType,
CharString &sb, UErrorCode &status) {
sb.clear();
sb.append("NumberElements/", status);
sb.append(nsName, status);
sb.append(compactStyle == CompactStyle::UNUM_SHORT ? "/patternsShort" : "/patternsLong", status);
sb.append(compactType == CompactType::TYPE_DECIMAL ? "/decimalFormat" : "/currencyFormat", status);
}
int32_t getIndex(int32_t magnitude, StandardPlural::Form plural) {
return magnitude * StandardPlural::COUNT + plural;
}
int32_t countZeros(const UChar *patternString, int32_t patternLength) {
// NOTE: This strategy for computing the number of zeros is a hack for efficiency.
// It could break if there are any 0s that aren't part of the main pattern.
int32_t numZeros = 0;
for (int32_t i = 0; i < patternLength; i++) {
if (patternString[i] == u'0') {
numZeros++;
} else if (numZeros > 0) {
break; // zeros should always be contiguous
}
}
return numZeros;
}
} // namespace
// NOTE: patterns and multipliers both get zero-initialized.
CompactData::CompactData() : patterns(), multipliers(), largestMagnitude(0), isEmpty(TRUE) {
}
void CompactData::populate(const Locale &locale, const char *nsName, CompactStyle compactStyle,
CompactType compactType, UErrorCode &status) {
CompactDataSink sink(*this);
LocalUResourceBundlePointer rb(ures_open(nullptr, locale.getName(), &status));
bool nsIsLatn = strcmp(nsName, "latn") == 0;
bool compactIsShort = compactStyle == CompactStyle::UNUM_SHORT;
// Fall back to latn numbering system and/or short compact style.
CharString resourceKey;
getResourceBundleKey(nsName, compactStyle, compactType, resourceKey, status);
UErrorCode localStatus = U_ZERO_ERROR;
ures_getAllItemsWithFallback(rb.getAlias(), resourceKey.data(), sink, localStatus);
if (isEmpty && !nsIsLatn) {
getResourceBundleKey("latn", compactStyle, compactType, resourceKey, status);
localStatus = U_ZERO_ERROR;
ures_getAllItemsWithFallback(rb.getAlias(), resourceKey.data(), sink, localStatus);
}
if (isEmpty && !compactIsShort) {
getResourceBundleKey(nsName, CompactStyle::UNUM_SHORT, compactType, resourceKey, status);
localStatus = U_ZERO_ERROR;
ures_getAllItemsWithFallback(rb.getAlias(), resourceKey.data(), sink, localStatus);
}
if (isEmpty && !nsIsLatn && !compactIsShort) {
getResourceBundleKey("latn", CompactStyle::UNUM_SHORT, compactType, resourceKey, status);
localStatus = U_ZERO_ERROR;
ures_getAllItemsWithFallback(rb.getAlias(), resourceKey.data(), sink, localStatus);
}
// The last fallback should be guaranteed to return data.
if (isEmpty) {
status = U_INTERNAL_PROGRAM_ERROR;
}
}
int32_t CompactData::getMultiplier(int32_t magnitude) const {
if (magnitude < 0) {
return 0;
}
if (magnitude > largestMagnitude) {
magnitude = largestMagnitude;
}
return multipliers[magnitude];
}
const UChar *CompactData::getPattern(int32_t magnitude, StandardPlural::Form plural) const {
if (magnitude < 0) {
return nullptr;
}
if (magnitude > largestMagnitude) {
magnitude = largestMagnitude;
}
const UChar *patternString = patterns[getIndex(magnitude, plural)];
if (patternString == nullptr && plural != StandardPlural::OTHER) {
// Fall back to "other" plural variant
patternString = patterns[getIndex(magnitude, StandardPlural::OTHER)];
}
if (patternString == USE_FALLBACK) { // == is intended
// Return null if USE_FALLBACK is present
patternString = nullptr;
}
return patternString;
}
void CompactData::getUniquePatterns(UVector &output, UErrorCode &status) const {
U_ASSERT(output.isEmpty());
// NOTE: In C++, this is done more manually with a UVector.
// In Java, we can take advantage of JDK HashSet.
for (auto pattern : patterns) {
if (pattern == nullptr || pattern == USE_FALLBACK) {
continue;
}
// Insert pattern into the UVector if the UVector does not already contain the pattern.
// Search the UVector from the end since identical patterns are likely to be adjacent.
for (int32_t i = output.size() - 1; i >= 0; i--) {
if (u_strcmp(pattern, static_cast<const UChar *>(output[i])) == 0) {
goto continue_outer;
}
}
// The string was not found; add it to the UVector.
// ANDY: This requires a const_cast. Why?
output.addElement(const_cast<UChar *>(pattern), status);
continue_outer:
continue;
}
}
void CompactData::CompactDataSink::put(const char *key, ResourceValue &value, UBool /*noFallback*/,
UErrorCode &status) {
// traverse into the table of powers of ten
ResourceTable powersOfTenTable = value.getTable(status);
if (U_FAILURE(status)) { return; }
for (int i3 = 0; powersOfTenTable.getKeyAndValue(i3, key, value); ++i3) {
// Assumes that the keys are always of the form "10000" where the magnitude is the
// length of the key minus one. We expect magnitudes to be less than MAX_DIGITS.
auto magnitude = static_cast<int8_t> (strlen(key) - 1);
int8_t multiplier = data.multipliers[magnitude];
U_ASSERT(magnitude < COMPACT_MAX_DIGITS);
// Iterate over the plural variants ("one", "other", etc)
ResourceTable pluralVariantsTable = value.getTable(status);
if (U_FAILURE(status)) { return; }
for (int i4 = 0; pluralVariantsTable.getKeyAndValue(i4, key, value); ++i4) {
// Skip this magnitude/plural if we already have it from a child locale.
// Note: This also skips USE_FALLBACK entries.
StandardPlural::Form plural = StandardPlural::fromString(key, status);
if (U_FAILURE(status)) { return; }
if (data.patterns[getIndex(magnitude, plural)] != nullptr) {
continue;
}
// The value "0" means that we need to use the default pattern and not fall back
// to parent locales. Example locale where this is relevant: 'it'.
int32_t patternLength;
const UChar *patternString = value.getString(patternLength, status);
if (U_FAILURE(status)) { return; }
if (u_strcmp(patternString, u"0") == 0) {
patternString = USE_FALLBACK;
patternLength = 0;
}
// Save the pattern string. We will parse it lazily.
data.patterns[getIndex(magnitude, plural)] = patternString;
// If necessary, compute the multiplier: the difference between the magnitude
// and the number of zeros in the pattern.
if (multiplier == 0) {
int32_t numZeros = countZeros(patternString, patternLength);
if (numZeros > 0) { // numZeros==0 in certain cases, like Somali "Kun"
multiplier = static_cast<int8_t> (numZeros - magnitude - 1);
}
}
}
// Save the multiplier.
if (data.multipliers[magnitude] == 0) {
data.multipliers[magnitude] = multiplier;
if (magnitude > data.largestMagnitude) {
data.largestMagnitude = magnitude;
}
data.isEmpty = false;
} else {
U_ASSERT(data.multipliers[magnitude] == multiplier);
}
}
}
///////////////////////////////////////////////////////////
/// END OF CompactData.java; BEGIN CompactNotation.java ///
///////////////////////////////////////////////////////////
CompactHandler::CompactHandler(CompactStyle compactStyle, const Locale &locale, const char *nsName,
CompactType compactType, const PluralRules *rules,
MutablePatternModifier *buildReference, const MicroPropsGenerator *parent,
UErrorCode &status)
: rules(rules), parent(parent) {
data.populate(locale, nsName, compactStyle, compactType, status);
if (U_FAILURE(status)) { return; }
if (buildReference != nullptr) {
// Safe code path
precomputeAllModifiers(*buildReference, status);
safe = TRUE;
} else {
// Unsafe code path
safe = FALSE;
}
}
CompactHandler::~CompactHandler() {
for (int32_t i = 0; i < precomputedModsLength; i++) {
delete precomputedMods[i].mod;
}
}
void CompactHandler::precomputeAllModifiers(MutablePatternModifier &buildReference, UErrorCode &status) {
// Initial capacity of 12 for 0K, 00K, 000K, ...M, ...B, and ...T
UVector allPatterns(12, status);
if (U_FAILURE(status)) { return; }
data.getUniquePatterns(allPatterns, status);
if (U_FAILURE(status)) { return; }
// C++ only: ensure that precomputedMods has room.
precomputedModsLength = allPatterns.size();
if (precomputedMods.getCapacity() < precomputedModsLength) {
precomputedMods.resize(allPatterns.size(), status);
if (U_FAILURE(status)) { return; }
}
for (int32_t i = 0; i < precomputedModsLength; i++) {
auto patternString = static_cast<const UChar *>(allPatterns[i]);
UnicodeString hello(patternString);
CompactModInfo &info = precomputedMods[i];
ParsedPatternInfo patternInfo;
PatternParser::parseToPatternInfo(UnicodeString(patternString), patternInfo, status);
if (U_FAILURE(status)) { return; }
buildReference.setPatternInfo(&patternInfo);
info.mod = buildReference.createImmutable(status);
if (U_FAILURE(status)) { return; }
info.numDigits = patternInfo.positive.integerTotal;
info.patternString = patternString;
}
}
void CompactHandler::processQuantity(DecimalQuantity &quantity, MicroProps &micros,
UErrorCode &status) const {
parent->processQuantity(quantity, micros, status);
if (U_FAILURE(status)) { return; }
// Treat zero as if it had magnitude 0
int magnitude;
if (quantity.isZero()) {
magnitude = 0;
micros.rounding.apply(quantity, status);
} else {
// TODO: Revisit chooseMultiplierAndApply
int multiplier = micros.rounding.chooseMultiplierAndApply(quantity, data, status);
magnitude = quantity.isZero() ? 0 : quantity.getMagnitude();
magnitude -= multiplier;
}
StandardPlural::Form plural = quantity.getStandardPlural(rules);
const UChar *patternString = data.getPattern(magnitude, plural);
int numDigits = -1;
if (patternString == nullptr) {
// Use the default (non-compact) modifier.
// No need to take any action.
} else if (safe) {
// Safe code path.
// Java uses a hash set here for O(1) lookup. C++ uses a linear search.
// TODO: Benchmark this and maybe change to a binary search or hash table.
int32_t i = 0;
for (; i < precomputedModsLength; i++) {
const CompactModInfo &info = precomputedMods[i];
if (u_strcmp(patternString, info.patternString) == 0) {
info.mod->applyToMicros(micros, quantity);
numDigits = info.numDigits;
break;
}
}
// It should be guaranteed that we found the entry.
U_ASSERT(i < precomputedModsLength);
} else {
// Unsafe code path.
// Overwrite the PatternInfo in the existing modMiddle.
// C++ Note: Use unsafePatternInfo for proper lifecycle.
ParsedPatternInfo &patternInfo = const_cast<CompactHandler *>(this)->unsafePatternInfo;
PatternParser::parseToPatternInfo(UnicodeString(patternString), patternInfo, status);
dynamic_cast<MutablePatternModifier *>(const_cast<Modifier *>(micros.modMiddle))->setPatternInfo(
&patternInfo);
numDigits = patternInfo.positive.integerTotal;
}
// FIXME: Deal with numDigits == 0 (Awaiting a test case)
// We already performed rounding. Do not perform it again.
micros.rounding = Rounder::constructPassThrough();
}

View File

@ -0,0 +1,86 @@
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#ifndef NUMBERFORMAT_NUMFMTTER_COMPACT_H
#define NUMBERFORMAT_NUMFMTTER_COMPACT_H
#include <standardplural.h>
#include "number_types.h"
#include "unicode/unum.h"
#include "uvector.h"
#include "resource.h"
#include "number_patternmodifier.h"
U_NAMESPACE_BEGIN namespace number {
namespace impl {
static const int32_t COMPACT_MAX_DIGITS = 15;
class CompactData : public MultiplierProducer {
public:
CompactData();
void populate(const Locale &locale, const char *nsName, CompactStyle compactStyle,
CompactType compactType, UErrorCode &status);
int32_t getMultiplier(int32_t magnitude) const override;
const UChar *getPattern(int32_t magnitude, StandardPlural::Form plural) const;
void getUniquePatterns(UVector &output, UErrorCode &status) const;
private:
const UChar *patterns[(COMPACT_MAX_DIGITS + 1) * StandardPlural::COUNT];
int8_t multipliers[COMPACT_MAX_DIGITS + 1];
int8_t largestMagnitude;
UBool isEmpty;
class CompactDataSink : public ResourceSink {
public:
explicit CompactDataSink(CompactData &data) : data(data) {}
void put(const char *key, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) override;
private:
CompactData &data;
};
};
struct CompactModInfo {
const ImmutablePatternModifier *mod;
const UChar* patternString;
int32_t numDigits;
};
class CompactHandler : public MicroPropsGenerator, public UMemory {
public:
CompactHandler(CompactStyle compactStyle, const Locale &locale, const char *nsName,
CompactType compactType, const PluralRules *rules,
MutablePatternModifier *buildReference, const MicroPropsGenerator *parent,
UErrorCode &status);
~CompactHandler() override;
void
processQuantity(DecimalQuantity &quantity, MicroProps &micros, UErrorCode &status) const override;
private:
const PluralRules *rules;
const MicroPropsGenerator *parent;
// Initial capacity of 12 for 0K, 00K, 000K, ...M, ...B, and ...T
MaybeStackArray<CompactModInfo, 12> precomputedMods;
int32_t precomputedModsLength = 0;
CompactData data;
ParsedPatternInfo unsafePatternInfo;
UBool safe;
/** Used by the safe code path */
void precomputeAllModifiers(MutablePatternModifier &buildReference, UErrorCode &status);
};
} // namespace impl
} // namespace number
U_NAMESPACE_END
#endif //NUMBERFORMAT_NUMFMTTER_COMPACT_H

View File

@ -0,0 +1,995 @@
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#include <uassert.h>
#include <cmath>
#include <cmemory.h>
#include <decNumber.h>
#include <limits>
#include "number_decimalquantity.h"
#include "decContext.h"
#include "decNumber.h"
#include "number_roundingutils.h"
#include "unicode/plurrule.h"
using namespace icu::number::impl;
namespace {
int8_t NEGATIVE_FLAG = 1;
int8_t INFINITY_FLAG = 2;
int8_t NAN_FLAG = 4;
/** Helper function to convert a decNumber-compatible string into a decNumber. */
void stringToDecNumber(StringPiece n, decNumber &dn) {
decContext set;
uprv_decContextDefault(&set, DEC_INIT_BASE);
uprv_decContextSetRounding(&set, DEC_ROUND_HALF_EVEN);
set.traps = 0; // no traps, thank you
set.digits = 34; // work with up to 34 digits
uprv_decNumberFromString(&dn, n.data(), &set);
U_ASSERT(DECDPUN == 1);
}
/** Helper function for safe subtraction (no overflow). */
inline int32_t safeSubtract(int32_t a, int32_t b) {
int32_t diff = a - b;
if (b < 0 && diff < a) { return INT32_MAX; }
if (b > 0 && diff > a) { return INT32_MIN; }
return diff;
}
static double DOUBLE_MULTIPLIERS[] = {
1e0,
1e1,
1e2,
1e3,
1e4,
1e5,
1e6,
1e7,
1e8,
1e9,
1e10,
1e11,
1e12,
1e13,
1e14,
1e15,
1e16,
1e17,
1e18,
1e19,
1e20,
1e21};
} // namespace
DecimalQuantity::DecimalQuantity() {
setBcdToZero();
}
DecimalQuantity::~DecimalQuantity() {
if (usingBytes) {
delete[] fBCD.bcdBytes.ptr;
fBCD.bcdBytes.ptr = nullptr;
usingBytes = false;
}
}
DecimalQuantity::DecimalQuantity(const DecimalQuantity &other) {
*this = other;
}
DecimalQuantity &DecimalQuantity::operator=(const DecimalQuantity &other) {
if (this == &other) {
return *this;
}
copyBcdFrom(other);
lOptPos = other.lOptPos;
lReqPos = other.lReqPos;
rReqPos = other.rReqPos;
rOptPos = other.rOptPos;
scale = other.scale;
precision = other.precision;
flags = other.flags;
origDouble = other.origDouble;
origDelta = other.origDelta;
isApproximate = other.isApproximate;
return *this;
}
void DecimalQuantity::clear() {
lOptPos = INT32_MAX;
lReqPos = 0;
rReqPos = 0;
rOptPos = INT32_MIN;
flags = 0;
setBcdToZero(); // sets scale, precision, hasDouble, origDouble, origDelta, and BCD data
}
void DecimalQuantity::setIntegerLength(int32_t minInt, int32_t maxInt) {
// Validation should happen outside of DecimalQuantity, e.g., in the Rounder class.
U_ASSERT(minInt >= 0);
U_ASSERT(maxInt >= minInt);
// Save values into internal state
// Negation is safe for minFrac/maxFrac because -Integer.MAX_VALUE > Integer.MIN_VALUE
lOptPos = maxInt;
lReqPos = minInt;
}
void DecimalQuantity::setFractionLength(int32_t minFrac, int32_t maxFrac) {
// Validation should happen outside of DecimalQuantity, e.g., in the Rounder class.
U_ASSERT(minFrac >= 0);
U_ASSERT(maxFrac >= minFrac);
// Save values into internal state
// Negation is safe for minFrac/maxFrac because -Integer.MAX_VALUE > Integer.MIN_VALUE
rReqPos = -minFrac;
rOptPos = -maxFrac;
}
uint64_t DecimalQuantity::getPositionFingerprint() const {
uint64_t fingerprint = 0;
fingerprint ^= lOptPos;
fingerprint ^= (lReqPos << 16);
fingerprint ^= ((long) rReqPos << 32);
fingerprint ^= ((long) rOptPos << 48);
return fingerprint;
}
void DecimalQuantity::roundToIncrement(double roundingIncrement, RoundingMode roundingMode,
UErrorCode& status) {
// TODO: This is innefficient. Improve?
// TODO: Should we convert to decNumber instead?
double temp = toDouble();
temp /= roundingIncrement;
setToDouble(temp);
roundToMagnitude(0, roundingMode, status);
temp = toDouble();
temp *= roundingIncrement;
setToDouble(temp);
}
void DecimalQuantity::multiplyBy(int32_t multiplicand) {
if (isInfinite() || isZero() || isNaN()) {
return;
}
// TODO: Should we convert to decNumber instead?
double temp = toDouble();
temp *= multiplicand;
setToDouble(temp);
}
int32_t DecimalQuantity::getMagnitude() const {
U_ASSERT(precision != 0);
return scale + precision - 1;
}
void DecimalQuantity::adjustMagnitude(int32_t delta) {
if (precision != 0) {
scale += delta;
origDelta += delta;
}
}
StandardPlural::Form DecimalQuantity::getStandardPlural(const PluralRules *rules) const {
if (rules == nullptr) {
// Fail gracefully if the user didn't provide a PluralRules
return StandardPlural::Form::OTHER;
} else {
UnicodeString ruleString = rules->select(*this);
return StandardPlural::orOtherFromString(ruleString);
}
}
double DecimalQuantity::getPluralOperand(PluralOperand operand) const {
// If this assertion fails, you need to call roundToInfinity() or some other rounding method.
// See the comment at the top of this file explaining the "isApproximate" field.
U_ASSERT(!isApproximate);
switch (operand) {
case PLURAL_OPERAND_I:
return toLong();
case PLURAL_OPERAND_F:
return toFractionLong(true);
case PLURAL_OPERAND_T:
return toFractionLong(false);
case PLURAL_OPERAND_V:
return fractionCount();
case PLURAL_OPERAND_W:
return fractionCountWithoutTrailingZeros();
default:
return std::abs(toDouble());
}
}
int32_t DecimalQuantity::getUpperDisplayMagnitude() const {
// If this assertion fails, you need to call roundToInfinity() or some other rounding method.
// See the comment in the header file explaining the "isApproximate" field.
U_ASSERT(!isApproximate);
int32_t magnitude = scale + precision;
int32_t result = (lReqPos > magnitude) ? lReqPos : (lOptPos < magnitude) ? lOptPos : magnitude;
return result - 1;
}
int32_t DecimalQuantity::getLowerDisplayMagnitude() const {
// If this assertion fails, you need to call roundToInfinity() or some other rounding method.
// See the comment in the header file explaining the "isApproximate" field.
U_ASSERT(!isApproximate);
int32_t magnitude = scale;
int32_t result = (rReqPos < magnitude) ? rReqPos : (rOptPos > magnitude) ? rOptPos : magnitude;
return result;
}
int8_t DecimalQuantity::getDigit(int32_t magnitude) const {
// If this assertion fails, you need to call roundToInfinity() or some other rounding method.
// See the comment at the top of this file explaining the "isApproximate" field.
U_ASSERT(!isApproximate);
return getDigitPos(magnitude - scale);
}
int32_t DecimalQuantity::fractionCount() const {
return -getLowerDisplayMagnitude();
}
int32_t DecimalQuantity::fractionCountWithoutTrailingZeros() const {
return -scale > 0 ? -scale : 0; // max(-scale, 0)
}
bool DecimalQuantity::isNegative() const {
return (flags & NEGATIVE_FLAG) != 0;
}
bool DecimalQuantity::isInfinite() const {
return (flags & INFINITY_FLAG) != 0;
}
bool DecimalQuantity::isNaN() const {
return (flags & NAN_FLAG) != 0;
}
bool DecimalQuantity::isZero() const {
return precision == 0;
}
DecimalQuantity &DecimalQuantity::setToInt(int32_t n) {
setBcdToZero();
flags = 0;
if (n < 0) {
flags |= NEGATIVE_FLAG;
n = -n;
}
if (n != 0) {
_setToInt(n);
compact();
}
return *this;
}
void DecimalQuantity::_setToInt(int32_t n) {
if (n == INT32_MIN) {
readLongToBcd(-static_cast<int64_t>(n));
} else {
readIntToBcd(n);
}
}
DecimalQuantity &DecimalQuantity::setToLong(int64_t n) {
setBcdToZero();
flags = 0;
if (n < 0) {
flags |= NEGATIVE_FLAG;
n = -n;
}
if (n != 0) {
_setToLong(n);
compact();
}
return *this;
}
void DecimalQuantity::_setToLong(int64_t n) {
if (n == INT64_MIN) {
static const char *int64minStr = "9.223372036854775808E+18";
decNumber dn;
stringToDecNumber(int64minStr, dn);
readDecNumberToBcd(&dn);
} else if (n <= INT32_MAX) {
readIntToBcd(static_cast<int32_t>(n));
} else {
readLongToBcd(n);
}
}
DecimalQuantity &DecimalQuantity::setToDouble(double n) {
setBcdToZero();
flags = 0;
// signbit() from <math.h> handles +0.0 vs -0.0
if (std::signbit(n) != 0) {
flags |= NEGATIVE_FLAG;
n = -n;
}
if (std::isnan(n) != 0) {
flags |= NAN_FLAG;
} else if (std::isfinite(n) == 0) {
flags |= INFINITY_FLAG;
} else if (n != 0) {
_setToDoubleFast(n);
compact();
}
return *this;
}
void DecimalQuantity::_setToDoubleFast(double n) {
isApproximate = true;
origDouble = n;
origDelta = 0;
// Make sure the double is an IEEE 754 double. If not, fall back to the slow path right now.
// TODO: Make a fast path for other types of doubles.
if (!std::numeric_limits<double>::is_iec559) {
convertToAccurateDouble();
// Turn off the approximate double flag, since the value is now exact.
isApproximate = false;
origDouble = 0.0;
return;
}
// To get the bits from the double, use memcpy, which takes care of endianness.
uint64_t ieeeBits;
uprv_memcpy(&ieeeBits, &n, sizeof(n));
int32_t exponent = static_cast<int32_t>((ieeeBits & 0x7ff0000000000000L) >> 52) - 0x3ff;
// Not all integers can be represented exactly for exponent > 52
if (exponent <= 52 && static_cast<int64_t>(n) == n) {
_setToLong(static_cast<int64_t>(n));
return;
}
// 3.3219... is log2(10)
auto fracLength = static_cast<int32_t> ((52 - exponent) / 3.32192809489);
if (fracLength >= 0) {
int32_t i = fracLength;
// 1e22 is the largest exact double.
for (; i >= 22; i -= 22) n *= 1e22;
n *= DOUBLE_MULTIPLIERS[i];
} else {
int32_t i = fracLength;
// 1e22 is the largest exact double.
for (; i <= -22; i += 22) n /= 1e22;
n /= DOUBLE_MULTIPLIERS[-i];
}
auto result = static_cast<int64_t>(round(n));
if (result != 0) {
_setToLong(result);
scale -= fracLength;
}
}
void DecimalQuantity::convertToAccurateDouble() {
double n = origDouble;
U_ASSERT(n != 0);
int32_t delta = origDelta;
setBcdToZero();
// Call the slow oracle function (Double.toString in Java, sprintf in C++).
// The <float.h> constant DBL_DIG defines a platform-specific number of digits in a double.
// However, this tends to be too low (see #11318). Instead, we always use 14 digits.
char dstr[14 + 8]; // Extra space for '+', '.', e+NNN, and '\0'
sprintf(dstr, "%+1.14e", n);
// uprv_decNumberFromString() will parse the string expecting '.' as a
// decimal separator, however sprintf() can use ',' in certain locales.
// Overwrite a ',' with '.' here before proceeding.
char *decimalSeparator = strchr(dstr, ',');
if (decimalSeparator != nullptr) {
*decimalSeparator = '.';
}
decNumber dn;
stringToDecNumber(dstr, dn);
_setToDecNumber(&dn);
scale += delta;
explicitExactDouble = true;
}
DecimalQuantity &DecimalQuantity::setToDecNumber(StringPiece n) {
setBcdToZero();
flags = 0;
decNumber dn;
stringToDecNumber(n, dn);
// The code path for decNumber is modeled after BigDecimal in Java.
if (decNumberIsNegative(&dn)) {
flags |= NEGATIVE_FLAG;
}
if (!decNumberIsZero(&dn)) {
_setToDecNumber(&dn);
}
return *this;
}
void DecimalQuantity::_setToDecNumber(decNumber *n) {
// Java fastpaths for ints here. In C++, just always read directly from the decNumber.
readDecNumberToBcd(n);
compact();
}
int64_t DecimalQuantity::toLong() const {
int64_t result = 0L;
for (int32_t magnitude = scale + precision - 1; magnitude >= 0; magnitude--) {
result = result * 10 + getDigitPos(magnitude - scale);
}
return result;
}
int64_t DecimalQuantity::toFractionLong(bool includeTrailingZeros) const {
int64_t result = 0L;
int32_t magnitude = -1;
for (; (magnitude >= scale || (includeTrailingZeros && magnitude >= rReqPos)) &&
magnitude >= rOptPos; magnitude--) {
result = result * 10 + getDigitPos(magnitude - scale);
}
return result;
}
double DecimalQuantity::toDouble() const {
if (isApproximate) {
return toDoubleFromOriginal();
}
if (isNaN()) {
return NAN;
} else if (isInfinite()) {
return isNegative() ? -INFINITY : INFINITY;
}
int64_t tempLong = 0L;
int32_t lostDigits = precision - (precision < 17 ? precision : 17);
for (int shift = precision - 1; shift >= lostDigits; shift--) {
tempLong = tempLong * 10 + getDigitPos(shift);
}
double result = tempLong;
int32_t _scale = scale + lostDigits;
if (_scale >= 0) {
// 1e22 is the largest exact double.
int32_t i = _scale;
for (; i >= 22; i -= 22) result *= 1e22;
result *= DOUBLE_MULTIPLIERS[i];
} else {
// 1e22 is the largest exact double.
int32_t i = _scale;
for (; i <= -22; i += 22) result /= 1e22;
result /= DOUBLE_MULTIPLIERS[-i];
}
if (isNegative()) { result = -result; }
return result;
}
double DecimalQuantity::toDoubleFromOriginal() const {
double result = origDouble;
int32_t delta = origDelta;
if (delta >= 0) {
// 1e22 is the largest exact double.
for (; delta >= 22; delta -= 22) result *= 1e22;
result *= DOUBLE_MULTIPLIERS[delta];
} else {
// 1e22 is the largest exact double.
for (; delta <= -22; delta += 22) result /= 1e22;
result /= DOUBLE_MULTIPLIERS[-delta];
}
if (isNegative()) { result *= -1; }
return result;
}
void DecimalQuantity::roundToMagnitude(int32_t magnitude, RoundingMode roundingMode, UErrorCode& status) {
// The position in the BCD at which rounding will be performed; digits to the right of position
// will be rounded away.
// TODO: Andy: There was a test failure because of integer overflow here. Should I do
// "safe subtraction" everywhere in the code? What's the nicest way to do it?
int position = safeSubtract(magnitude, scale);
if (position <= 0 && !isApproximate) {
// All digits are to the left of the rounding magnitude.
} else if (precision == 0) {
// No rounding for zero.
} else {
// Perform rounding logic.
// "leading" = most significant digit to the right of rounding
// "trailing" = least significant digit to the left of rounding
int8_t leadingDigit = getDigitPos(safeSubtract(position, 1));
int8_t trailingDigit = getDigitPos(position);
// Compute which section of the number we are in.
// EDGE means we are at the bottom or top edge, like 1.000 or 1.999 (used by doubles)
// LOWER means we are between the bottom edge and the midpoint, like 1.391
// MIDPOINT means we are exactly in the middle, like 1.500
// UPPER means we are between the midpoint and the top edge, like 1.916
roundingutils::Section section = roundingutils::SECTION_MIDPOINT;
if (!isApproximate) {
if (leadingDigit < 5) {
section = roundingutils::SECTION_LOWER;
} else if (leadingDigit > 5) {
section = roundingutils::SECTION_UPPER;
} else {
for (int p = safeSubtract(position, 2); p >= 0; p--) {
if (getDigitPos(p) != 0) {
section = roundingutils::SECTION_UPPER;
break;
}
}
}
} else {
int32_t p = safeSubtract(position, 2);
int32_t minP = uprv_max(0, precision - 14);
if (leadingDigit == 0) {
section = roundingutils::SECTION_LOWER_EDGE;
for (; p >= minP; p--) {
if (getDigitPos(p) != 0) {
section = roundingutils::SECTION_LOWER;
break;
}
}
} else if (leadingDigit == 4) {
for (; p >= minP; p--) {
if (getDigitPos(p) != 9) {
section = roundingutils::SECTION_LOWER;
break;
}
}
} else if (leadingDigit == 5) {
for (; p >= minP; p--) {
if (getDigitPos(p) != 0) {
section = roundingutils::SECTION_UPPER;
break;
}
}
} else if (leadingDigit == 9) {
section = roundingutils::SECTION_UPPER_EDGE;
for (; p >= minP; p--) {
if (getDigitPos(p) != 9) {
section = roundingutils::SECTION_UPPER;
break;
}
}
} else if (leadingDigit < 5) {
section = roundingutils::SECTION_LOWER;
} else {
section = roundingutils::SECTION_UPPER;
}
bool roundsAtMidpoint = roundingutils::roundsAtMidpoint(roundingMode);
if (safeSubtract(position, 1) < precision - 14 ||
(roundsAtMidpoint && section == roundingutils::SECTION_MIDPOINT) ||
(!roundsAtMidpoint && section < 0 /* i.e. at upper or lower edge */)) {
// Oops! This means that we have to get the exact representation of the double, because
// the zone of uncertainty is along the rounding boundary.
convertToAccurateDouble();
roundToMagnitude(magnitude, roundingMode, status); // start over
return;
}
// Turn off the approximate double flag, since the value is now confirmed to be exact.
isApproximate = false;
origDouble = 0.0;
origDelta = 0;
if (position <= 0) {
// All digits are to the left of the rounding magnitude.
return;
}
// Good to continue rounding.
if (section == -1) { section = roundingutils::SECTION_LOWER; }
if (section == -2) { section = roundingutils::SECTION_UPPER; }
}
bool roundDown = roundingutils::getRoundingDirection((trailingDigit % 2) == 0,
isNegative(),
section,
roundingMode,
status);
if (U_FAILURE(status)) {
return;
}
// Perform truncation
if (position >= precision) {
setBcdToZero();
scale = magnitude;
} else {
shiftRight(position);
}
// Bubble the result to the higher digits
if (!roundDown) {
if (trailingDigit == 9) {
int bubblePos = 0;
// Note: in the long implementation, the most digits BCD can have at this point is 15,
// so bubblePos <= 15 and getDigitPos(bubblePos) is safe.
for (; getDigitPos(bubblePos) == 9; bubblePos++) {}
shiftRight(bubblePos); // shift off the trailing 9s
}
int8_t digit0 = getDigitPos(0);
U_ASSERT(digit0 != 9);
setDigitPos(0, static_cast<int8_t>(digit0 + 1));
precision += 1; // in case an extra digit got added
}
compact();
}
}
void DecimalQuantity::roundToInfinity() {
if (isApproximate) {
convertToAccurateDouble();
}
}
void DecimalQuantity::appendDigit(int8_t value, int32_t leadingZeros, bool appendAsInteger) {
U_ASSERT(leadingZeros >= 0);
// Zero requires special handling to maintain the invariant that the least-significant digit
// in the BCD is nonzero.
if (value == 0) {
if (appendAsInteger && precision != 0) {
scale += leadingZeros + 1;
}
return;
}
// Deal with trailing zeros
if (scale > 0) {
leadingZeros += scale;
if (appendAsInteger) {
scale = 0;
}
}
// Append digit
shiftLeft(leadingZeros + 1);
setDigitPos(0, value);
// Fix scale if in integer mode
if (appendAsInteger) {
scale += leadingZeros + 1;
}
}
UnicodeString DecimalQuantity::toPlainString() const {
UnicodeString sb;
if (isNegative()) {
sb.append('-');
}
for (int m = getUpperDisplayMagnitude(); m >= getLowerDisplayMagnitude(); m--) {
sb.append(getDigit(m) + '0');
if (m == 0) { sb.append('.'); }
}
return sb;
}
////////////////////////////////////////////////////
/// End of DecimalQuantity_AbstractBCD.java ///
/// Start of DecimalQuantity_DualStorageBCD.java ///
////////////////////////////////////////////////////
int8_t DecimalQuantity::getDigitPos(int32_t position) const {
if (usingBytes) {
if (position < 0 || position > precision) { return 0; }
return fBCD.bcdBytes.ptr[position];
} else {
if (position < 0 || position >= 16) { return 0; }
return (int8_t) ((fBCD.bcdLong >> (position * 4)) & 0xf);
}
}
void DecimalQuantity::setDigitPos(int32_t position, int8_t value) {
U_ASSERT(position >= 0);
if (usingBytes) {
ensureCapacity(position + 1);
fBCD.bcdBytes.ptr[position] = value;
} else if (position >= 16) {
switchStorage();
ensureCapacity(position + 1);
fBCD.bcdBytes.ptr[position] = value;
} else {
int shift = position * 4;
fBCD.bcdLong = (fBCD.bcdLong & ~(0xfL << shift)) | ((long) value << shift);
}
}
void DecimalQuantity::shiftLeft(int32_t numDigits) {
if (!usingBytes && precision + numDigits > 16) {
switchStorage();
}
if (usingBytes) {
ensureCapacity(precision + numDigits);
int i = precision + numDigits - 1;
for (; i >= numDigits; i--) {
fBCD.bcdBytes.ptr[i] = fBCD.bcdBytes.ptr[i - numDigits];
}
for (; i >= 0; i--) {
fBCD.bcdBytes.ptr[i] = 0;
}
} else {
fBCD.bcdLong <<= (numDigits * 4);
}
scale -= numDigits;
precision += numDigits;
}
void DecimalQuantity::shiftRight(int32_t numDigits) {
if (usingBytes) {
int i = 0;
for (; i < precision - numDigits; i++) {
fBCD.bcdBytes.ptr[i] = fBCD.bcdBytes.ptr[i + numDigits];
}
for (; i < precision; i++) {
fBCD.bcdBytes.ptr[i] = 0;
}
} else {
fBCD.bcdLong >>= (numDigits * 4);
}
scale += numDigits;
precision -= numDigits;
}
void DecimalQuantity::setBcdToZero() {
if (usingBytes) {
delete[] fBCD.bcdBytes.ptr;
fBCD.bcdBytes.ptr = nullptr;
usingBytes = false;
}
fBCD.bcdLong = 0L;
scale = 0;
precision = 0;
isApproximate = false;
origDouble = 0;
origDelta = 0;
}
void DecimalQuantity::readIntToBcd(int32_t n) {
U_ASSERT(n != 0);
// ints always fit inside the long implementation.
uint64_t result = 0L;
int i = 16;
for (; n != 0; n /= 10, i--) {
result = (result >> 4) + ((static_cast<uint64_t>(n) % 10) << 60);
}
U_ASSERT(!usingBytes);
fBCD.bcdLong = result >> (i * 4);
scale = 0;
precision = 16 - i;
}
void DecimalQuantity::readLongToBcd(int64_t n) {
U_ASSERT(n != 0);
if (n >= 10000000000000000L) {
ensureCapacity();
int i = 0;
for (; n != 0L; n /= 10L, i++) {
fBCD.bcdBytes.ptr[i] = static_cast<int8_t>(n % 10);
}
U_ASSERT(usingBytes);
scale = 0;
precision = i;
} else {
uint64_t result = 0L;
int i = 16;
for (; n != 0L; n /= 10L, i--) {
result = (result >> 4) + ((n % 10) << 60);
}
U_ASSERT(i >= 0);
U_ASSERT(!usingBytes);
fBCD.bcdLong = result >> (i * 4);
scale = 0;
precision = 16 - i;
}
}
void DecimalQuantity::readDecNumberToBcd(decNumber *dn) {
if (dn->digits > 16) {
ensureCapacity(dn->digits);
for (int32_t i = 0; i < dn->digits; i++) {
fBCD.bcdBytes.ptr[i] = dn->lsu[i];
}
} else {
uint64_t result = 0L;
for (int32_t i = 0; i < dn->digits; i++) {
result |= static_cast<uint64_t>(dn->lsu[i]) << (4 * i);
}
fBCD.bcdLong = result;
}
scale = dn->exponent;
precision = dn->digits;
}
void DecimalQuantity::compact() {
if (usingBytes) {
int32_t delta = 0;
for (; delta < precision && fBCD.bcdBytes.ptr[delta] == 0; delta++);
if (delta == precision) {
// Number is zero
setBcdToZero();
return;
} else {
// Remove trailing zeros
shiftRight(delta);
}
// Compute precision
int32_t leading = precision - 1;
for (; leading >= 0 && fBCD.bcdBytes.ptr[leading] == 0; leading--);
precision = leading + 1;
// Switch storage mechanism if possible
if (precision <= 16) {
switchStorage();
}
} else {
if (fBCD.bcdLong == 0L) {
// Number is zero
setBcdToZero();
return;
}
// Compact the number (remove trailing zeros)
// TODO: Use a more efficient algorithm here and below. There is a logarithmic one.
int32_t delta = 0;
for (; delta < precision && getDigitPos(delta) == 0; delta++);
fBCD.bcdLong >>= delta * 4;
scale += delta;
// Compute precision
int32_t leading = precision - 1;
for (; leading >= 0 && getDigitPos(leading) == 0; leading--);
precision = leading + 1;
}
}
void DecimalQuantity::ensureCapacity() {
ensureCapacity(40);
}
void DecimalQuantity::ensureCapacity(int32_t capacity) {
if (capacity == 0) { return; }
int32_t oldCapacity = usingBytes ? fBCD.bcdBytes.len : 0;
if (!usingBytes) {
// TODO: There is nothing being done to check for memory allocation failures.
fBCD.bcdBytes.ptr = new int8_t[capacity];
fBCD.bcdBytes.len = capacity;
// Initialize the byte array to zeros (this is done automatically in Java)
uprv_memset(fBCD.bcdBytes.ptr, 0, capacity * sizeof(int8_t));
} else if (oldCapacity < capacity) {
auto bcd1 = new int8_t[capacity * 2];
uprv_memcpy(bcd1, fBCD.bcdBytes.ptr, oldCapacity * sizeof(int8_t));
// Initialize the rest of the byte array to zeros (this is done automatically in Java)
uprv_memset(fBCD.bcdBytes.ptr + oldCapacity, 0, (capacity - oldCapacity) * sizeof(int8_t));
delete[] fBCD.bcdBytes.ptr;
fBCD.bcdBytes.ptr = bcd1;
fBCD.bcdBytes.len = capacity * 2;
}
usingBytes = true;
}
void DecimalQuantity::switchStorage() {
if (usingBytes) {
// Change from bytes to long
uint64_t bcdLong = 0L;
for (int i = precision - 1; i >= 0; i--) {
bcdLong <<= 4;
bcdLong |= fBCD.bcdBytes.ptr[i];
}
delete[] fBCD.bcdBytes.ptr;
fBCD.bcdBytes.ptr = nullptr;
fBCD.bcdLong = bcdLong;
usingBytes = false;
} else {
// Change from long to bytes
// Copy the long into a local variable since it will get munged when we allocate the bytes
uint64_t bcdLong = fBCD.bcdLong;
ensureCapacity();
for (int i = 0; i < precision; i++) {
fBCD.bcdBytes.ptr[i] = static_cast<int8_t>(bcdLong & 0xf);
bcdLong >>= 4;
}
U_ASSERT(usingBytes);
}
}
void DecimalQuantity::copyBcdFrom(const DecimalQuantity &other) {
setBcdToZero();
if (other.usingBytes) {
ensureCapacity(other.precision);
uprv_memcpy(fBCD.bcdBytes.ptr, other.fBCD.bcdBytes.ptr, other.precision * sizeof(int8_t));
} else {
fBCD.bcdLong = other.fBCD.bcdLong;
}
}
const char16_t* DecimalQuantity::checkHealth() const {
if (usingBytes) {
if (precision == 0) { return u"Zero precision but we are in byte mode"; }
int32_t capacity = fBCD.bcdBytes.len;
if (precision > capacity) { return u"Precision exceeds length of byte array"; }
if (getDigitPos(precision - 1) == 0) { return u"Most significant digit is zero in byte mode"; }
if (getDigitPos(0) == 0) { return u"Least significant digit is zero in long mode"; }
for (int i = 0; i < precision; i++) {
if (getDigitPos(i) >= 10) { return u"Digit exceeding 10 in byte array"; }
if (getDigitPos(i) < 0) { return u"Digit below 0 in byte array"; }
}
for (int i = precision; i < capacity; i++) {
if (getDigitPos(i) != 0) { return u"Nonzero digits outside of range in byte array"; }
}
} else {
if (precision == 0 && fBCD.bcdLong != 0) {
return u"Value in bcdLong even though precision is zero";
}
if (precision > 16) { return u"Precision exceeds length of long"; }
if (precision != 0 && getDigitPos(precision - 1) == 0) {
return u"Most significant digit is zero in long mode";
}
if (precision != 0 && getDigitPos(0) == 0) {
return u"Least significant digit is zero in long mode";
}
for (int i = 0; i < precision; i++) {
if (getDigitPos(i) >= 10) { return u"Digit exceeding 10 in long"; }
if (getDigitPos(i) < 0) { return u"Digit below 0 in long (?!)"; }
}
for (int i = precision; i < 16; i++) {
if (getDigitPos(i) != 0) { return u"Nonzero digits outside of range in long"; }
}
}
// No error
return nullptr;
}
UnicodeString DecimalQuantity::toString() const {
auto digits = new char[precision + 1];
for (int32_t i = 0; i < precision; i++) {
digits[i] = getDigitPos(precision - i - 1) + '0';
}
digits[precision] = 0;
char buffer8[100];
snprintf(
buffer8,
100,
"<DecimalQuantity %d:%d:%d:%d %s %s%s%d>",
(lOptPos > 999 ? 999 : lOptPos),
lReqPos,
rReqPos,
(rOptPos < -999 ? -999 : rOptPos),
(usingBytes ? "bytes" : "long"),
(precision == 0 ? "0" : digits),
"E",
scale);
delete[] digits;
// Convert from char to char16_t to avoid codepage conversion
char16_t buffer16[100];
for (int32_t i = 0; i < 100; i++) {
buffer16[i] = static_cast<char16_t>(buffer8[i]);
}
return UnicodeString(buffer16);
}
UnicodeString DecimalQuantity::toNumberString() const {
auto digits = new char[precision + 11];
for (int32_t i = 0; i < precision; i++) {
digits[i] = getDigitPos(precision - i - 1) + '0';
}
snprintf(digits + precision, 11, "E%d", scale);
UnicodeString ret(digits);
delete[] digits;
return ret;
}

View File

@ -0,0 +1,432 @@
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#ifndef NUMBERFORMAT_DECIMALQUANTITY_H
#define NUMBERFORMAT_DECIMALQUANTITY_H
#include <cstdint>
#include <unicode/umachine.h>
#include <decNumber.h>
#include <standardplural.h>
#include <plurrule_impl.h>
#include "number_types.h"
U_NAMESPACE_BEGIN namespace number {
namespace impl {
/**
* An class for representing a number to be processed by the decimal formatting pipeline. Includes
* methods for rounding, plural rules, and decimal digit extraction.
*
* <p>By design, this is NOT IMMUTABLE and NOT THREAD SAFE. It is intended to be an intermediate
* object holding state during a pass through the decimal formatting pipeline.
*
* <p>Represents numbers and digit display properties using Binary Coded Decimal (BCD).
*
* <p>Java has multiple implementations for testing, but C++ has only one implementation.
*/
class DecimalQuantity : public IFixedDecimal, public UMemory {
public:
/** Copy constructor. */
DecimalQuantity(const DecimalQuantity &other);
DecimalQuantity();
~DecimalQuantity();
/**
* Sets this instance to be equal to another instance.
*
* @param other The instance to copy from.
*/
DecimalQuantity &operator=(const DecimalQuantity &other);
/**
* Sets the minimum and maximum integer digits that this {@link DecimalQuantity} should generate.
* This method does not perform rounding.
*
* @param minInt The minimum number of integer digits.
* @param maxInt The maximum number of integer digits.
*/
void setIntegerLength(int32_t minInt, int32_t maxInt);
/**
* Sets the minimum and maximum fraction digits that this {@link DecimalQuantity} should generate.
* This method does not perform rounding.
*
* @param minFrac The minimum number of fraction digits.
* @param maxFrac The maximum number of fraction digits.
*/
void setFractionLength(int32_t minFrac, int32_t maxFrac);
/**
* Rounds the number to a specified interval, such as 0.05.
*
* <p>If rounding to a power of ten, use the more efficient {@link #roundToMagnitude} instead.
*
* @param roundingIncrement The increment to which to round.
* @param mathContext The {@link RoundingMode} to use if rounding is necessary.
*/
void roundToIncrement(double roundingIncrement, RoundingMode roundingMode, UErrorCode& status);
/**
* Rounds the number to a specified magnitude (power of ten).
*
* @param roundingMagnitude The power of ten to which to round. For example, a value of -2 will
* round to 2 decimal places.
* @param mathContext The {@link RoundingMode} to use if rounding is necessary.
*/
void roundToMagnitude(int32_t magnitude, RoundingMode roundingMode, UErrorCode& status);
/**
* Rounds the number to an infinite number of decimal points. This has no effect except for
* forcing the double in {@link DecimalQuantity_AbstractBCD} to adopt its exact representation.
*/
void roundToInfinity();
/**
* Multiply the internal value.
*
* @param multiplicand The value by which to multiply.
*/
void multiplyBy(int32_t multiplicand);
/**
* Scales the number by a power of ten. For example, if the value is currently "1234.56", calling
* this method with delta=-3 will change the value to "1.23456".
*
* @param delta The number of magnitudes of ten to change by.
*/
void adjustMagnitude(int32_t delta);
/**
* @return The power of ten corresponding to the most significant nonzero digit.
* The number must not be zero.
*/
int32_t getMagnitude() const;
/** @return Whether the value represented by this {@link DecimalQuantity} is zero. */
bool isZero() const;
/** @return Whether the value represented by this {@link DecimalQuantity} is less than zero. */
bool isNegative() const;
/** @return Whether the value represented by this {@link DecimalQuantity} is infinite. */
bool isInfinite() const override;
/** @return Whether the value represented by this {@link DecimalQuantity} is not a number. */
bool isNaN() const override;
int64_t toLong() const;
int64_t toFractionLong(bool includeTrailingZeros) const;
/** @return The value contained in this {@link DecimalQuantity} approximated as a double. */
double toDouble() const;
DecimalQuantity &setToInt(int32_t n);
DecimalQuantity &setToLong(int64_t n);
DecimalQuantity &setToDouble(double n);
/** decNumber is similar to BigDecimal in Java. */
DecimalQuantity &setToDecNumber(StringPiece n);
/**
* Appends a digit, optionally with one or more leading zeros, to the end of the value represented
* by this DecimalQuantity.
*
* <p>The primary use of this method is to construct numbers during a parsing loop. It allows
* parsing to take advantage of the digit list infrastructure primarily designed for formatting.
*
* @param value The digit to append.
* @param leadingZeros The number of zeros to append before the digit. For example, if the value
* in this instance starts as 12.3, and you append a 4 with 1 leading zero, the value becomes
* 12.304.
* @param appendAsInteger If true, increase the magnitude of existing digits to make room for the
* new digit. If false, append to the end like a fraction digit. If true, there must not be
* any fraction digits already in the number.
* @internal
* @deprecated This API is ICU internal only.
*/
void appendDigit(int8_t value, int32_t leadingZeros, bool appendAsInteger);
/**
* Computes the plural form for this number based on the specified set of rules.
*
* @param rules A {@link PluralRules} object representing the set of rules.
* @return The {@link StandardPlural} according to the PluralRules. If the plural form is not in
* the set of standard plurals, {@link StandardPlural#OTHER} is returned instead.
*/
StandardPlural::Form getStandardPlural(const PluralRules *rules) const;
double getPluralOperand(PluralOperand operand) const override;
/**
* Gets the digit at the specified magnitude. For example, if the represented number is 12.3,
* getDigit(-1) returns 3, since 3 is the digit corresponding to 10^-1.
*
* @param magnitude The magnitude of the digit.
* @return The digit at the specified magnitude.
*/
int8_t getDigit(int32_t magnitude) const;
/**
* Gets the largest power of ten that needs to be displayed. The value returned by this function
* will be bounded between minInt and maxInt.
*
* @return The highest-magnitude digit to be displayed.
*/
int32_t getUpperDisplayMagnitude() const;
/**
* Gets the smallest power of ten that needs to be displayed. The value returned by this function
* will be bounded between -minFrac and -maxFrac.
*
* @return The lowest-magnitude digit to be displayed.
*/
int32_t getLowerDisplayMagnitude() const;
int32_t fractionCount() const;
int32_t fractionCountWithoutTrailingZeros() const;
void clear();
/** This method is for internal testing only. */
uint64_t getPositionFingerprint() const;
// /**
// * If the given {@link FieldPosition} is a {@link UFieldPosition}, populates it with the fraction
// * length and fraction long value. If the argument is not a {@link UFieldPosition}, nothing
// * happens.
// *
// * @param fp The {@link UFieldPosition} to populate.
// */
// void populateUFieldPosition(FieldPosition fp);
/**
* Checks whether the bytes stored in this instance are all valid. For internal unit testing only.
*
* @return An error message if this instance is invalid, or null if this instance is healthy.
*/
const char16_t* checkHealth() const;
UnicodeString toString() const;
/* Returns the string in exponential notation. */
UnicodeString toNumberString() const;
/* Returns the string without exponential notation. Slightly slower than toNumberString(). */
UnicodeString toPlainString() const;
/** Visible for testing */
inline bool isUsingBytes() { return usingBytes; }
/** Visible for testing */
inline bool isExplicitExactDouble() { return explicitExactDouble; };
private:
/**
* The power of ten corresponding to the least significant digit in the BCD. For example, if this
* object represents the number "3.14", the BCD will be "0x314" and the scale will be -2.
*
* <p>Note that in {@link java.math.BigDecimal}, the scale is defined differently: the number of
* digits after the decimal place, which is the negative of our definition of scale.
*/
int32_t scale;
/**
* The number of digits in the BCD. For example, "1007" has BCD "0x1007" and precision 4. The
* maximum precision is 16 since a long can hold only 16 digits.
*
* <p>This value must be re-calculated whenever the value in bcd changes by using {@link
* #computePrecisionAndCompact()}.
*/
int32_t precision;
/**
* A bitmask of properties relating to the number represented by this object.
*
* @see #NEGATIVE_FLAG
* @see #INFINITY_FLAG
* @see #NAN_FLAG
*/
int8_t flags;
// The following three fields relate to the double-to-ascii fast path algorithm.
// When a double is given to DecimalQuantityBCD, it is converted to using a fast algorithm. The
// fast algorithm guarantees correctness to only the first ~12 digits of the double. The process
// of rounding the number ensures that the converted digits are correct, falling back to a slow-
// path algorithm if required. Therefore, if a DecimalQuantity is constructed from a double, it
// is *required* that roundToMagnitude(), roundToIncrement(), or roundToInfinity() is called. If
// you don't round, assertions will fail in certain other methods if you try calling them.
/**
* Whether the value in the BCD comes from the double fast path without having been rounded to
* ensure correctness
*/
UBool isApproximate;
/**
* The original number provided by the user and which is represented in BCD. Used when we need to
* re-compute the BCD for an exact double representation.
*/
double origDouble;
/**
* The change in magnitude relative to the original double. Used when we need to re-compute the
* BCD for an exact double representation.
*/
int32_t origDelta;
// Four positions: left optional '(', left required '[', right required ']', right optional ')'.
// These four positions determine which digits are displayed in the output string. They do NOT
// affect rounding. These positions are internal-only and can be specified only by the public
// endpoints like setFractionLength, setIntegerLength, and setSignificantDigits, among others.
//
// * Digits between lReqPos and rReqPos are in the "required zone" and are always displayed.
// * Digits between lOptPos and rOptPos but outside the required zone are in the "optional zone"
// and are displayed unless they are trailing off the left or right edge of the number and
// have a numerical value of zero. In order to be "trailing", the digits need to be beyond
// the decimal point in their respective directions.
// * Digits outside of the "optional zone" are never displayed.
//
// See the table below for illustrative examples.
//
// +---------+---------+---------+---------+------------+------------------------+--------------+
// | lOptPos | lReqPos | rReqPos | rOptPos | number | positions | en-US string |
// +---------+---------+---------+---------+------------+------------------------+--------------+
// | 5 | 2 | -1 | -5 | 1234.567 | ( 12[34.5]67 ) | 1,234.567 |
// | 3 | 2 | -1 | -5 | 1234.567 | 1(2[34.5]67 ) | 234.567 |
// | 3 | 2 | -1 | -2 | 1234.567 | 1(2[34.5]6)7 | 234.56 |
// | 6 | 4 | 2 | -5 | 123456789. | 123(45[67]89. ) | 456,789. |
// | 6 | 4 | 2 | 1 | 123456789. | 123(45[67]8)9. | 456,780. |
// | -1 | -1 | -3 | -4 | 0.123456 | 0.1([23]4)56 | .0234 |
// | 6 | 4 | -2 | -2 | 12.3 | ( [ 12.3 ]) | 0012.30 |
// +---------+---------+---------+---------+------------+------------------------+--------------+
//
int32_t lOptPos = INT32_MAX;
int32_t lReqPos = 0;
int32_t rReqPos = 0;
int32_t rOptPos = INT32_MIN;
/**
* The BCD of the 16 digits of the number represented by this object. Every 4 bits of the long map
* to one digit. For example, the number "12345" in BCD is "0x12345".
*
* <p>Whenever bcd changes internally, {@link #compact()} must be called, except in special cases
* like setting the digit to zero.
*/
union {
struct {
int8_t *ptr;
int32_t len;
} bcdBytes;
uint64_t bcdLong;
} fBCD;
bool usingBytes = false;
/**
* Whether this {@link DecimalQuantity} has been explicitly converted to an exact double. true if
* backed by a double that was explicitly converted via convertToAccurateDouble; false otherwise.
* Used for testing.
*/
bool explicitExactDouble = false;
/**
* Returns a single digit from the BCD list. No internal state is changed by calling this method.
*
* @param position The position of the digit to pop, counted in BCD units from the least
* significant digit. If outside the range supported by the implementation, zero is returned.
* @return The digit at the specified location.
*/
int8_t getDigitPos(int32_t position) const;
/**
* Sets the digit in the BCD list. This method only sets the digit; it is the caller's
* responsibility to call {@link #compact} after setting the digit.
*
* @param position The position of the digit to pop, counted in BCD units from the least
* significant digit. If outside the range supported by the implementation, an AssertionError
* is thrown.
* @param value The digit to set at the specified location.
*/
void setDigitPos(int32_t position, int8_t value);
/**
* Adds zeros to the end of the BCD list. This will result in an invalid BCD representation; it is
* the caller's responsibility to do further manipulation and then call {@link #compact}.
*
* @param numDigits The number of zeros to add.
*/
void shiftLeft(int32_t numDigits);
void shiftRight(int32_t numDigits);
/**
* Sets the internal representation to zero. Clears any values stored in scale, precision,
* hasDouble, origDouble, origDelta, and BCD data.
*/
void setBcdToZero();
/**
* Sets the internal BCD state to represent the value in the given int. The int is guaranteed to
* be either positive. The internal state is guaranteed to be empty when this method is called.
*
* @param n The value to consume.
*/
void readIntToBcd(int32_t n);
/**
* Sets the internal BCD state to represent the value in the given long. The long is guaranteed to
* be either positive. The internal state is guaranteed to be empty when this method is called.
*
* @param n The value to consume.
*/
void readLongToBcd(int64_t n);
void readDecNumberToBcd(decNumber *dn);
void copyBcdFrom(const DecimalQuantity &other);
/**
* Removes trailing zeros from the BCD (adjusting the scale as required) and then computes the
* precision. The precision is the number of digits in the number up through the greatest nonzero
* digit.
*
* <p>This method must always be called when bcd changes in order for assumptions to be correct in
* methods like {@link #fractionCount()}.
*/
void compact();
void _setToInt(int32_t n);
void _setToLong(int64_t n);
void _setToDoubleFast(double n);
void _setToDecNumber(decNumber *n);
void convertToAccurateDouble();
double toDoubleFromOriginal() const;
/** Ensure that a byte array of at least 40 digits is allocated. */
void ensureCapacity();
void ensureCapacity(int32_t capacity);
/** Switches the internal storage mechanism between the 64-bit long and the byte array. */
void switchStorage();
};
} // namespace impl
} // namespace number
U_NAMESPACE_END
#endif //NUMBERFORMAT_DECIMALQUANTITY_H

View File

@ -0,0 +1,94 @@
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#include "number_decimfmtprops.h"
using namespace icu::number::impl;
DecimalFormatProperties::DecimalFormatProperties() {
clear();
}
void DecimalFormatProperties::clear() {
compactStyle.nullify();
currency.nullify();
currencyPluralInfo.adoptInstead(nullptr);
currencyUsage.nullify();
decimalPatternMatchRequired = false;
decimalSeparatorAlwaysShown = false;
exponentSignAlwaysShown = false;
formatWidth = -1;
groupingSize = -1;
magnitudeMultiplier = 0;
maximumFractionDigits = -1;
maximumIntegerDigits = -1;
maximumSignificantDigits = -1;
minimumExponentDigits = -1;
minimumFractionDigits = -1;
minimumGroupingDigits = -1;
minimumIntegerDigits = -1;
minimumSignificantDigits = -1;
multiplier = 0;
negativePrefix.setToBogus();
negativePrefixPattern.setToBogus();
negativeSuffix.setToBogus();
negativeSuffixPattern.setToBogus();
padPosition.nullify();
padString.setToBogus();
parseCaseSensitive = false;
parseIntegerOnly = false;
parseLenient = false;
parseNoExponent = false;
parseToBigDecimal = false;
positivePrefix.setToBogus();
positivePrefixPattern.setToBogus();
positiveSuffix.setToBogus();
positiveSuffixPattern.setToBogus();
roundingIncrement = 0.0;
roundingMode.nullify();
secondaryGroupingSize = -1;
signAlwaysShown = false;
}
bool DecimalFormatProperties::operator==(const DecimalFormatProperties &other) const {
bool eq = true;
eq = eq && compactStyle == other.compactStyle;
eq = eq && currency == other.currency;
eq = eq && currencyPluralInfo.getAlias() == other.currencyPluralInfo.getAlias();
eq = eq && currencyUsage == other.currencyUsage;
eq = eq && decimalPatternMatchRequired == other.decimalPatternMatchRequired;
eq = eq && decimalSeparatorAlwaysShown == other.decimalSeparatorAlwaysShown;
eq = eq && exponentSignAlwaysShown == other.exponentSignAlwaysShown;
eq = eq && formatWidth == other.formatWidth;
eq = eq && groupingSize == other.groupingSize;
eq = eq && magnitudeMultiplier == other.magnitudeMultiplier;
eq = eq && maximumFractionDigits == other.maximumFractionDigits;
eq = eq && maximumIntegerDigits == other.maximumIntegerDigits;
eq = eq && maximumSignificantDigits == other.maximumSignificantDigits;
eq = eq && minimumExponentDigits == other.minimumExponentDigits;
eq = eq && minimumFractionDigits == other.minimumFractionDigits;
eq = eq && minimumGroupingDigits == other.minimumGroupingDigits;
eq = eq && minimumIntegerDigits == other.minimumIntegerDigits;
eq = eq && minimumSignificantDigits == other.minimumSignificantDigits;
eq = eq && multiplier == other.multiplier;
eq = eq && negativePrefix == other.negativePrefix;
eq = eq && negativePrefixPattern == other.negativePrefixPattern;
eq = eq && negativeSuffix == other.negativeSuffix;
eq = eq && negativeSuffixPattern == other.negativeSuffixPattern;
eq = eq && padPosition == other.padPosition;
eq = eq && padString == other.padString;
eq = eq && parseCaseSensitive == other.parseCaseSensitive;
eq = eq && parseIntegerOnly == other.parseIntegerOnly;
eq = eq && parseLenient == other.parseLenient;
eq = eq && parseNoExponent == other.parseNoExponent;
eq = eq && parseToBigDecimal == other.parseToBigDecimal;
eq = eq && positivePrefix == other.positivePrefix;
eq = eq && positivePrefixPattern == other.positivePrefixPattern;
eq = eq && positiveSuffix == other.positiveSuffix;
eq = eq && positiveSuffixPattern == other.positiveSuffixPattern;
eq = eq && roundingIncrement == other.roundingIncrement;
eq = eq && roundingMode == other.roundingMode;
eq = eq && secondaryGroupingSize == other.secondaryGroupingSize;
eq = eq && signAlwaysShown == other.signAlwaysShown;
return eq;
}

View File

@ -0,0 +1,77 @@
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#ifndef NUMBERFORMAT_PROPERTIES_H
#define NUMBERFORMAT_PROPERTIES_H
#include "unicode/unistr.h"
#include <cstdint>
#include <unicode/plurrule.h>
#include <unicode/currpinf.h>
#include "unicode/unum.h"
#include "number_types.h"
U_NAMESPACE_BEGIN
namespace number {
namespace impl {
struct DecimalFormatProperties {
public:
NullableValue<UNumberCompactStyle> compactStyle;
NullableValue<CurrencyUnit> currency;
CopyableLocalPointer <CurrencyPluralInfo> currencyPluralInfo;
NullableValue<UCurrencyUsage> currencyUsage;
bool decimalPatternMatchRequired;
bool decimalSeparatorAlwaysShown;
bool exponentSignAlwaysShown;
int32_t formatWidth;
int32_t groupingSize;
int32_t magnitudeMultiplier;
int32_t maximumFractionDigits;
int32_t maximumIntegerDigits;
int32_t maximumSignificantDigits;
int32_t minimumExponentDigits;
int32_t minimumFractionDigits;
int32_t minimumGroupingDigits;
int32_t minimumIntegerDigits;
int32_t minimumSignificantDigits;
int32_t multiplier;
UnicodeString negativePrefix;
UnicodeString negativePrefixPattern;
UnicodeString negativeSuffix;
UnicodeString negativeSuffixPattern;
NullableValue<PadPosition> padPosition;
UnicodeString padString;
bool parseCaseSensitive;
bool parseIntegerOnly;
bool parseLenient;
bool parseNoExponent;
bool parseToBigDecimal;
//PluralRules pluralRules;
UnicodeString positivePrefix;
UnicodeString positivePrefixPattern;
UnicodeString positiveSuffix;
UnicodeString positiveSuffixPattern;
double roundingIncrement;
NullableValue<RoundingMode> roundingMode;
int32_t secondaryGroupingSize;
bool signAlwaysShown;
DecimalFormatProperties();
//DecimalFormatProperties(const DecimalFormatProperties &other) = default;
DecimalFormatProperties &operator=(const DecimalFormatProperties &other) = default;
bool operator==(const DecimalFormatProperties &other) const;
void clear();
};
} // namespace impl
} // namespace number
U_NAMESPACE_END
#endif //NUMBERFORMAT_PROPERTIES_H

View File

@ -0,0 +1,318 @@
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#include <uassert.h>
#include "unicode/numberformatter.h"
#include "number_decimalquantity.h"
#include "number_results.h"
#include "number_formatimpl.h"
using namespace icu::number;
using namespace icu::number::impl;
template<typename Derived>
Derived NumberFormatterSettings<Derived>::notation(const Notation &notation) const {
Derived copy(*this);
// NOTE: Slicing is OK.
copy.fMacros.notation = notation;
return copy;
}
template<typename Derived>
Derived NumberFormatterSettings<Derived>::unit(const icu::MeasureUnit &unit) const {
Derived copy(*this);
// NOTE: Slicing occurs here. However, CurrencyUnit can be restored from MeasureUnit.
// TimeUnit may be affected, but TimeUnit is not as relevant to number formatting.
copy.fMacros.unit = unit;
return copy;
}
template<typename Derived>
Derived NumberFormatterSettings<Derived>::adoptUnit(const icu::MeasureUnit *unit) const {
Derived copy(*this);
// Just copy the unit into the MacroProps by value, and delete it since we have ownership.
// NOTE: Slicing occurs here. However, CurrencyUnit can be restored from MeasureUnit.
// TimeUnit may be affected, but TimeUnit is not as relevant to number formatting.
if (unit != nullptr) {
copy.fMacros.unit = *unit;
delete unit;
}
return copy;
}
template<typename Derived>
Derived NumberFormatterSettings<Derived>::rounding(const Rounder &rounder) const {
Derived copy(*this);
// NOTE: Slicing is OK.
copy.fMacros.rounder = rounder;
return copy;
}
template<typename Derived>
Derived NumberFormatterSettings<Derived>::grouping(const Grouper &grouper) const {
Derived copy(*this);
copy.fMacros.grouper = grouper;
return copy;
}
template<typename Derived>
Derived NumberFormatterSettings<Derived>::integerWidth(const IntegerWidth &style) const {
Derived copy(*this);
copy.fMacros.integerWidth = style;
return copy;
}
template<typename Derived>
Derived NumberFormatterSettings<Derived>::symbols(const DecimalFormatSymbols &symbols) const {
Derived copy(*this);
copy.fMacros.symbols.setTo(symbols);
return copy;
}
template<typename Derived>
Derived NumberFormatterSettings<Derived>::adoptSymbols(const NumberingSystem *ns) const {
Derived copy(*this);
copy.fMacros.symbols.setTo(ns);
return copy;
}
template<typename Derived>
Derived NumberFormatterSettings<Derived>::unitWidth(const UNumberUnitWidth &width) const {
Derived copy(*this);
copy.fMacros.unitWidth = width;
return copy;
}
template<typename Derived>
Derived NumberFormatterSettings<Derived>::sign(const UNumberSignDisplay &style) const {
Derived copy(*this);
copy.fMacros.sign = style;
return copy;
}
template<typename Derived>
Derived NumberFormatterSettings<Derived>::decimal(const UNumberDecimalSeparatorDisplay &style) const {
Derived copy(*this);
copy.fMacros.decimal = style;
return copy;
}
template<typename Derived>
Derived NumberFormatterSettings<Derived>::padding(const Padder &padder) const {
Derived copy(*this);
copy.fMacros.padder = padder;
return copy;
}
template<typename Derived>
Derived NumberFormatterSettings<Derived>::threshold(uint32_t threshold) const {
Derived copy(*this);
copy.fMacros.threshold = threshold;
return copy;
}
// Declare all classes that implement NumberFormatterSettings
// See https://stackoverflow.com/a/495056/1407170
template
class icu::number::NumberFormatterSettings<icu::number::UnlocalizedNumberFormatter>;
template
class icu::number::NumberFormatterSettings<icu::number::LocalizedNumberFormatter>;
UnlocalizedNumberFormatter NumberFormatter::with() {
UnlocalizedNumberFormatter result;
return result;
}
LocalizedNumberFormatter NumberFormatter::withLocale(const Locale &locale) {
return with().locale(locale);
}
// Make the child class constructor that takes the parent class call the parent class's copy constructor
UnlocalizedNumberFormatter::UnlocalizedNumberFormatter(
const NumberFormatterSettings <UnlocalizedNumberFormatter> &other)
: NumberFormatterSettings<UnlocalizedNumberFormatter>(other) {
}
// Make the child class constructor that takes the parent class call the parent class's copy constructor
// For LocalizedNumberFormatter, also copy over the extra fields
LocalizedNumberFormatter::LocalizedNumberFormatter(
const NumberFormatterSettings <LocalizedNumberFormatter> &other)
: NumberFormatterSettings<LocalizedNumberFormatter>(other) {
// No additional copies required
}
LocalizedNumberFormatter::LocalizedNumberFormatter(const MacroProps &macros, const Locale &locale) {
fMacros = macros;
fMacros.locale = locale;
}
LocalizedNumberFormatter UnlocalizedNumberFormatter::locale(const Locale &locale) const {
return LocalizedNumberFormatter(fMacros, locale);
}
SymbolsWrapper::SymbolsWrapper(const SymbolsWrapper &other) {
doCopyFrom(other);
}
SymbolsWrapper &SymbolsWrapper::operator=(const SymbolsWrapper &other) {
if (this == &other) {
return *this;
}
doCleanup();
doCopyFrom(other);
return *this;
}
SymbolsWrapper::~SymbolsWrapper() {
doCleanup();
}
void SymbolsWrapper::setTo(const DecimalFormatSymbols &dfs) {
doCleanup();
fType = SYMPTR_DFS;
fPtr.dfs = new DecimalFormatSymbols(dfs);
}
void SymbolsWrapper::setTo(const NumberingSystem *ns) {
doCleanup();
fType = SYMPTR_NS;
fPtr.ns = ns;
}
void SymbolsWrapper::doCopyFrom(const SymbolsWrapper &other) {
fType = other.fType;
switch (fType) {
case SYMPTR_NONE:
// No action necessary
break;
case SYMPTR_DFS:
// Memory allocation failures are exposed in copyErrorTo()
if (other.fPtr.dfs != nullptr) {
fPtr.dfs = new DecimalFormatSymbols(*other.fPtr.dfs);
} else {
fPtr.dfs = nullptr;
}
break;
case SYMPTR_NS:
// Memory allocation failures are exposed in copyErrorTo()
if (other.fPtr.ns != nullptr) {
fPtr.ns = new NumberingSystem(*other.fPtr.ns);
} else {
fPtr.ns = nullptr;
}
break;
}
}
void SymbolsWrapper::doCleanup() {
switch (fType) {
case SYMPTR_NONE:
// No action necessary
break;
case SYMPTR_DFS:
delete fPtr.dfs;
break;
case SYMPTR_NS:
delete fPtr.ns;
break;
}
}
bool SymbolsWrapper::isDecimalFormatSymbols() const {
return fType == SYMPTR_DFS;
}
bool SymbolsWrapper::isNumberingSystem() const {
return fType == SYMPTR_NS;
}
const DecimalFormatSymbols* SymbolsWrapper::getDecimalFormatSymbols() const {
U_ASSERT(fType == SYMPTR_DFS);
return fPtr.dfs;
}
const NumberingSystem* SymbolsWrapper::getNumberingSystem() const {
U_ASSERT(fType == SYMPTR_NS);
return fPtr.ns;
}
LocalizedNumberFormatter::~LocalizedNumberFormatter() {
delete fCompiled.load();
}
FormattedNumber LocalizedNumberFormatter::formatInt(int64_t value, UErrorCode &status) const {
if (U_FAILURE(status)) { return FormattedNumber(); }
auto results = new NumberFormatterResults();
if (results == nullptr) {
status = U_MEMORY_ALLOCATION_ERROR;
return FormattedNumber();
}
results->quantity.setToLong(value);
return formatImpl(results, status);
}
FormattedNumber LocalizedNumberFormatter::formatDouble(double value, UErrorCode &status) const {
if (U_FAILURE(status)) { return FormattedNumber(); }
auto results = new NumberFormatterResults();
if (results == nullptr) {
status = U_MEMORY_ALLOCATION_ERROR;
return FormattedNumber();
}
results->quantity.setToDouble(value);
return formatImpl(results, status);
}
FormattedNumber LocalizedNumberFormatter::formatDecimal(StringPiece value, UErrorCode &status) const {
if (U_FAILURE(status)) { return FormattedNumber(); }
auto results = new NumberFormatterResults();
if (results == nullptr) {
status = U_MEMORY_ALLOCATION_ERROR;
return FormattedNumber();
}
results->quantity.setToDecNumber(value);
return formatImpl(results, status);
}
FormattedNumber
LocalizedNumberFormatter::formatImpl(impl::NumberFormatterResults *results, UErrorCode &status) const {
uint32_t currentCount = fCallCount.load();
if (currentCount <= fMacros.threshold && fMacros.threshold > 0) {
currentCount = const_cast<LocalizedNumberFormatter *>(this)->fCallCount.fetch_add(1) + 1;
}
const NumberFormatterImpl *compiled;
if (currentCount == fMacros.threshold && fMacros.threshold > 0) {
compiled = NumberFormatterImpl::fromMacros(fMacros, status);
U_ASSERT(fCompiled.load() == nullptr);
const_cast<LocalizedNumberFormatter *>(this)->fCompiled.store(compiled);
compiled->apply(results->quantity, results->string, status);
} else if ((compiled = fCompiled.load()) != nullptr) {
compiled->apply(results->quantity, results->string, status);
} else {
NumberFormatterImpl::applyStatic(fMacros, results->quantity, results->string, status);
}
return FormattedNumber(results);
}
UnicodeString FormattedNumber::toString() const {
return fResults->string.toUnicodeString();
}
Appendable &FormattedNumber::appendTo(Appendable &appendable) {
appendable.appendString(fResults->string.chars(), fResults->string.length());
return appendable;
}
void FormattedNumber::populateFieldPosition(FieldPosition &fieldPosition, UErrorCode &status) {
fResults->string.populateFieldPosition(fieldPosition, 0, status);
}
void
FormattedNumber::populateFieldPositionIterator(FieldPositionIterator &iterator, UErrorCode &status) {
fResults->string.populateFieldPositionIterator(iterator, status);
}
FormattedNumber::~FormattedNumber() {
delete fResults;
}

View File

@ -0,0 +1,455 @@
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#include <cstring.h>
#include <unicode/ures.h>
#include <uresimp.h>
#include <charstr.h>
#include "number_formatimpl.h"
#include "unicode/numfmt.h"
#include "number_patternstring.h"
#include "number_utils.h"
#include "unicode/numberformatter.h"
#include "unicode/dcfmtsym.h"
#include "number_scientific.h"
#include "number_compact.h"
using namespace icu::number::impl;
namespace {
// NOTE: In Java, the method to get a pattern from the resource bundle exists in NumberFormat.
// In C++, we have to implement that logic here.
// TODO: Make Java and C++ consistent?
enum CldrPatternStyle {
CLDR_PATTERN_STYLE_DECIMAL,
CLDR_PATTERN_STYLE_CURRENCY,
CLDR_PATTERN_STYLE_ACCOUNTING,
CLDR_PATTERN_STYLE_PERCENT
// TODO: Consider scientific format.
};
const char16_t *
doGetPattern(UResourceBundle *res, const char *nsName, const char *patternKey, UErrorCode &publicStatus,
UErrorCode &localStatus) {
// Construct the path into the resource bundle
CharString key;
key.append("NumberElements/", publicStatus);
key.append(nsName, publicStatus);
key.append("/patterns/", publicStatus);
key.append(patternKey, publicStatus);
if (U_FAILURE(publicStatus)) {
return u"";
}
return ures_getStringByKeyWithFallback(res, key.data(), nullptr, &localStatus);
}
const char16_t *getPatternForStyle(const Locale &locale, const char *nsName, CldrPatternStyle style,
UErrorCode &status) {
const char *patternKey;
switch (style) {
case CLDR_PATTERN_STYLE_DECIMAL:
patternKey = "decimalFormat";
break;
case CLDR_PATTERN_STYLE_CURRENCY:
patternKey = "currencyFormat";
break;
case CLDR_PATTERN_STYLE_ACCOUNTING:
patternKey = "accountingFormat";
break;
case CLDR_PATTERN_STYLE_PERCENT:
default:
patternKey = "percentFormat";
break;
}
LocalUResourceBundlePointer res(ures_open(nullptr, locale.getName(), &status));
// Attempt to get the pattern with the native numbering system.
UErrorCode localStatus = U_ZERO_ERROR;
const char16_t *pattern;
pattern = doGetPattern(res.getAlias(), nsName, patternKey, status, localStatus);
if (U_FAILURE(status)) { return u""; }
// Fall back to latn if native numbering system does not have the right pattern
if (U_FAILURE(localStatus) && uprv_strcmp("latn", nsName) != 0) {
localStatus = U_ZERO_ERROR;
pattern = doGetPattern(res.getAlias(), "latn", patternKey, status, localStatus);
if (U_FAILURE(status)) { return u""; }
}
return pattern;
}
inline bool unitIsCurrency(const MeasureUnit &unit) {
return uprv_strcmp("currency", unit.getType()) == 0;
}
inline bool unitIsNoUnit(const MeasureUnit &unit) {
return uprv_strcmp("none", unit.getType()) == 0;
}
inline bool unitIsPercent(const MeasureUnit &unit) {
return uprv_strcmp("percent", unit.getSubtype()) == 0;
}
inline bool unitIsPermille(const MeasureUnit &unit) {
return uprv_strcmp("permille", unit.getSubtype()) == 0;
}
} // namespace
NumberFormatterImpl *NumberFormatterImpl::fromMacros(const MacroProps &macros, UErrorCode &status) {
return new NumberFormatterImpl(macros, true, status);
}
void NumberFormatterImpl::applyStatic(const MacroProps &macros, DecimalQuantity &inValue,
NumberStringBuilder &outString, UErrorCode &status) {
NumberFormatterImpl impl(macros, false, status);
impl.applyUnsafe(inValue, outString, status);
}
// NOTE: C++ SPECIFIC DIFFERENCE FROM JAVA:
// The "safe" apply method uses a new MicroProps. In the MicroPropsGenerator, fMicros is copied into the new instance.
// The "unsafe" method simply re-uses fMicros, eliminating the extra copy operation.
// See MicroProps::processQuantity() for details.
void NumberFormatterImpl::apply(DecimalQuantity &inValue, NumberStringBuilder &outString,
UErrorCode &status) const {
if (U_FAILURE(status)) { return; }
MicroProps micros;
fMicroPropsGenerator->processQuantity(inValue, micros, status);
if (U_FAILURE(status)) { return; }
microsToString(micros, inValue, outString, status);
}
void NumberFormatterImpl::applyUnsafe(DecimalQuantity &inValue, NumberStringBuilder &outString,
UErrorCode &status) {
if (U_FAILURE(status)) { return; }
fMicroPropsGenerator->processQuantity(inValue, fMicros, status);
if (U_FAILURE(status)) { return; }
microsToString(fMicros, inValue, outString, status);
}
NumberFormatterImpl::NumberFormatterImpl(const MacroProps &macros, bool safe, UErrorCode &status) {
fMicroPropsGenerator = macrosToMicroGenerator(macros, safe, status);
}
//////////
const MicroPropsGenerator *
NumberFormatterImpl::macrosToMicroGenerator(const MacroProps &macros, bool safe, UErrorCode &status) {
const MicroPropsGenerator *chain = &fMicros;
// Check that macros is error-free before continuing.
if (macros.copyErrorTo(status)) {
return nullptr;
}
// TODO: Accept currency symbols from DecimalFormatSymbols?
// Pre-compute a few values for efficiency.
bool isCurrency = unitIsCurrency(macros.unit);
bool isNoUnit = unitIsNoUnit(macros.unit);
bool isPercent = isNoUnit && unitIsPercent(macros.unit);
bool isPermille = isNoUnit && unitIsPermille(macros.unit);
bool isCldrUnit = !isCurrency && !isNoUnit;
bool isAccounting =
macros.sign == UNUM_SIGN_ACCOUNTING || macros.sign == UNUM_SIGN_ACCOUNTING_ALWAYS;
CurrencyUnit currency(kDefaultCurrency, status);
if (isCurrency) {
currency = CurrencyUnit(macros.unit, status); // Restore CurrencyUnit from MeasureUnit
}
UNumberUnitWidth unitWidth = UNUM_UNIT_WIDTH_SHORT;
if (macros.unitWidth != UNUM_UNIT_WIDTH_COUNT) {
unitWidth = macros.unitWidth;
}
// Select the numbering system.
LocalPointer<const NumberingSystem> nsLocal;
const NumberingSystem *ns;
if (macros.symbols.isNumberingSystem()) {
ns = macros.symbols.getNumberingSystem();
} else {
// TODO: Is there a way to avoid creating the NumberingSystem object?
ns = NumberingSystem::createInstance(macros.locale, status);
// Give ownership to the function scope.
nsLocal.adoptInstead(ns);
}
const char *nsName = ns->getName();
// Load and parse the pattern string. It is used for grouping sizes and affixes only.
CldrPatternStyle patternStyle;
if (isPercent || isPermille) {
patternStyle = CLDR_PATTERN_STYLE_PERCENT;
} else if (!isCurrency || unitWidth == UNUM_UNIT_WIDTH_FULL_NAME) {
patternStyle = CLDR_PATTERN_STYLE_DECIMAL;
} else if (isAccounting) {
// NOTE: Although ACCOUNTING and ACCOUNTING_ALWAYS are only supported in currencies right now,
// the API contract allows us to add support to other units in the future.
patternStyle = CLDR_PATTERN_STYLE_ACCOUNTING;
} else {
patternStyle = CLDR_PATTERN_STYLE_CURRENCY;
}
const char16_t *pattern = getPatternForStyle(macros.locale, nsName, patternStyle, status);
auto patternInfo = new ParsedPatternInfo();
fPatternInfo.adoptInstead(patternInfo);
PatternParser::parseToPatternInfo(UnicodeString(pattern), *patternInfo, status);
/////////////////////////////////////////////////////////////////////////////////////
/// START POPULATING THE DEFAULT MICROPROPS AND BUILDING THE MICROPROPS GENERATOR ///
/////////////////////////////////////////////////////////////////////////////////////
// Symbols
if (macros.symbols.isDecimalFormatSymbols()) {
fMicros.symbols = macros.symbols.getDecimalFormatSymbols();
} else {
fMicros.symbols = new DecimalFormatSymbols(macros.locale, *ns, status);
// Give ownership to the NumberFormatterImpl.
fSymbols.adoptInstead(fMicros.symbols);
}
// Rounding strategy
if (!macros.rounder.isBogus()) {
fMicros.rounding = macros.rounder;
} else if (macros.notation.fType == Notation::NTN_COMPACT) {
fMicros.rounding = Rounder::integer().withMinDigits(2);
} else if (isCurrency) {
fMicros.rounding = Rounder::currency(UCURR_USAGE_STANDARD);
} else {
fMicros.rounding = Rounder::maxFraction(6);
}
fMicros.rounding.setLocaleData(currency, status);
// Grouping strategy
if (!macros.grouper.isBogus()) {
fMicros.grouping = macros.grouper;
} else if (macros.notation.fType == Notation::NTN_COMPACT) {
// Compact notation uses minGrouping by default since ICU 59
fMicros.grouping = Grouper::minTwoDigits();
} else {
fMicros.grouping = Grouper::defaults();
}
fMicros.grouping.setLocaleData(*fPatternInfo);
// Padding strategy
if (!macros.padder.isBogus()) {
fMicros.padding = macros.padder;
} else {
fMicros.padding = Padder::none();
}
// Integer width
if (!macros.integerWidth.isBogus()) {
fMicros.integerWidth = macros.integerWidth;
} else {
fMicros.integerWidth = IntegerWidth::zeroFillTo(1);
}
// Sign display
if (macros.sign != UNUM_SIGN_COUNT) {
fMicros.sign = macros.sign;
} else {
fMicros.sign = UNUM_SIGN_AUTO;
}
// Decimal mark display
if (macros.decimal != UNUM_DECIMAL_SEPARATOR_COUNT) {
fMicros.decimal = macros.decimal;
} else {
fMicros.decimal = UNUM_DECIMAL_SEPARATOR_AUTO;
}
// Use monetary separator symbols
fMicros.useCurrency = isCurrency;
// Inner modifier (scientific notation)
if (macros.notation.fType == Notation::NTN_SCIENTIFIC) {
fScientificHandler.adoptInstead(new ScientificHandler(&macros.notation, fMicros.symbols, chain));
chain = fScientificHandler.getAlias();
} else {
// No inner modifier required
fMicros.modInner = &fMicros.helpers.emptyStrongModifier;
}
// Middle modifier (patterns, positive/negative, currency symbols, percent)
auto patternModifier = new MutablePatternModifier(false);
fPatternModifier.adoptInstead(patternModifier);
patternModifier->setPatternInfo(fPatternInfo.getAlias());
patternModifier->setPatternAttributes(fMicros.sign, isPermille);
if (patternModifier->needsPlurals()) {
patternModifier->setSymbols(
fMicros.symbols,
currency,
unitWidth,
resolvePluralRules(macros.rules, macros.locale, status));
} else {
patternModifier->setSymbols(fMicros.symbols, currency, unitWidth, nullptr);
}
if (safe) {
fImmutablePatternModifier.adoptInstead(patternModifier->createImmutableAndChain(chain, status));
chain = fImmutablePatternModifier.getAlias();
} else {
patternModifier->addToChain(chain);
chain = patternModifier;
}
// Outer modifier (CLDR units and currency long names)
if (isCldrUnit) {
fLongNameHandler.adoptInstead(
new LongNameHandler(
LongNameHandler::forMeasureUnit(
macros.locale,
macros.unit,
unitWidth,
resolvePluralRules(macros.rules, macros.locale, status),
chain,
status)));
chain = fLongNameHandler.getAlias();
} else if (isCurrency && unitWidth == UNUM_UNIT_WIDTH_FULL_NAME) {
fLongNameHandler.adoptInstead(
new LongNameHandler(
LongNameHandler::forCurrencyLongNames(
macros.locale,
currency,
resolvePluralRules(macros.rules, macros.locale, status),
chain,
status)));
chain = fLongNameHandler.getAlias();
} else {
// No outer modifier required
fMicros.modOuter = &fMicros.helpers.emptyWeakModifier;
}
// Compact notation
// NOTE: Compact notation can (but might not) override the middle modifier and rounding.
// It therefore needs to go at the end of the chain.
if (macros.notation.fType == Notation::NTN_COMPACT) {
CompactType compactType = (isCurrency && unitWidth != UNUM_UNIT_WIDTH_FULL_NAME)
? CompactType::TYPE_CURRENCY : CompactType::TYPE_DECIMAL;
fCompactHandler.adoptInstead(
new CompactHandler(
macros.notation.fUnion.compactStyle,
macros.locale,
nsName,
compactType,
resolvePluralRules(macros.rules, macros.locale, status),
safe ? patternModifier : nullptr,
chain,
status));
chain = fCompactHandler.getAlias();
}
return chain;
}
const PluralRules *
NumberFormatterImpl::resolvePluralRules(const PluralRules *rulesPtr, const Locale &locale,
UErrorCode &status) {
if (rulesPtr != nullptr) {
return rulesPtr;
}
// Lazily create PluralRules
if (fRules.isNull()) {
fRules.adoptInstead(PluralRules::forLocale(locale, status));
}
return fRules.getAlias();
}
int32_t NumberFormatterImpl::microsToString(const MicroProps &micros, DecimalQuantity &quantity,
NumberStringBuilder &string, UErrorCode &status) {
micros.rounding.apply(quantity, status);
micros.integerWidth.apply(quantity, status);
int32_t length = writeNumber(micros, quantity, string, status);
// NOTE: When range formatting is added, these modifiers can bubble up.
// For now, apply them all here at once.
// Always apply the inner modifier (which is "strong").
length += micros.modInner->apply(string, 0, length, status);
if (micros.padding.isValid()) {
length += micros.padding
.padAndApply(*micros.modMiddle, *micros.modOuter, string, 0, length, status);
} else {
length += micros.modMiddle->apply(string, 0, length, status);
length += micros.modOuter->apply(string, 0, length, status);
}
return length;
}
int32_t NumberFormatterImpl::writeNumber(const MicroProps &micros, DecimalQuantity &quantity,
NumberStringBuilder &string, UErrorCode &status) {
int32_t length = 0;
if (quantity.isInfinite()) {
length += string.insert(
length,
micros.symbols->getSymbol(DecimalFormatSymbols::ENumberFormatSymbol::kInfinitySymbol),
UNUM_INTEGER_FIELD,
status);
} else if (quantity.isNaN()) {
length += string.insert(
length,
micros.symbols->getSymbol(DecimalFormatSymbols::ENumberFormatSymbol::kNaNSymbol),
UNUM_INTEGER_FIELD,
status);
} else {
// Add the integer digits
length += writeIntegerDigits(micros, quantity, string, status);
// Add the decimal point
if (quantity.getLowerDisplayMagnitude() < 0 || micros.decimal == UNUM_DECIMAL_SEPARATOR_ALWAYS) {
length += string.insert(
length,
micros.useCurrency ? micros.symbols->getSymbol(
DecimalFormatSymbols::ENumberFormatSymbol::kMonetarySeparatorSymbol) : micros
.symbols
->getSymbol(
DecimalFormatSymbols::ENumberFormatSymbol::kDecimalSeparatorSymbol),
UNUM_DECIMAL_SEPARATOR_FIELD,
status);
}
// Add the fraction digits
length += writeFractionDigits(micros, quantity, string, status);
}
return length;
}
int32_t NumberFormatterImpl::writeIntegerDigits(const MicroProps &micros, DecimalQuantity &quantity,
NumberStringBuilder &string, UErrorCode &status) {
int length = 0;
int integerCount = quantity.getUpperDisplayMagnitude() + 1;
for (int i = 0; i < integerCount; i++) {
// Add grouping separator
if (micros.grouping.groupAtPosition(i, quantity)) {
length += string.insert(
0,
micros.useCurrency ? micros.symbols->getSymbol(
DecimalFormatSymbols::ENumberFormatSymbol::kMonetaryGroupingSeparatorSymbol)
: micros.symbols->getSymbol(
DecimalFormatSymbols::ENumberFormatSymbol::kGroupingSeparatorSymbol),
UNUM_GROUPING_SEPARATOR_FIELD,
status);
}
// Get and append the next digit value
int8_t nextDigit = quantity.getDigit(i);
length += string.insert(
0, getDigitFromSymbols(nextDigit, *micros.symbols), UNUM_INTEGER_FIELD, status);
}
return length;
}
int32_t NumberFormatterImpl::writeFractionDigits(const MicroProps &micros, DecimalQuantity &quantity,
NumberStringBuilder &string, UErrorCode &status) {
int length = 0;
int fractionCount = -quantity.getLowerDisplayMagnitude();
for (int i = 0; i < fractionCount; i++) {
// Get and append the next digit value
int8_t nextDigit = quantity.getDigit(-i - 1);
length += string.append(
getDigitFromSymbols(nextDigit, *micros.symbols), UNUM_FRACTION_FIELD, status);
}
return length;
}

View File

@ -0,0 +1,120 @@
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#ifndef NUMBERFORMAT_NUMBERFORMATTERIMPL_H
#define NUMBERFORMAT_NUMBERFORMATTERIMPL_H
#include "number_types.h"
#include "number_stringbuilder.h"
#include "number_patternstring.h"
#include "number_utils.h"
#include "number_patternmodifier.h"
#include "number_longnames.h"
#include "number_compact.h"
U_NAMESPACE_BEGIN namespace number {
namespace impl {
/**
* This is the "brain" of the number formatting pipeline. It ties all the pieces together, taking in a MacroProps and a
* DecimalQuantity and outputting a properly formatted number string.
*/
class NumberFormatterImpl {
public:
/**
* Builds a "safe" MicroPropsGenerator, which is thread-safe and can be used repeatedly.
* The caller owns the returned NumberFormatterImpl.
*/
static NumberFormatterImpl *fromMacros(const MacroProps &macros, UErrorCode &status);
/**
* Builds and evaluates an "unsafe" MicroPropsGenerator, which is cheaper but can be used only once.
*/
static void
applyStatic(const MacroProps &macros, DecimalQuantity &inValue, NumberStringBuilder &outString,
UErrorCode &status);
/**
* Evaluates the "safe" MicroPropsGenerator created by "fromMacros".
*/
void apply(DecimalQuantity &inValue, NumberStringBuilder &outString, UErrorCode &status) const;
private:
// Head of the MicroPropsGenerator linked list:
const MicroPropsGenerator *fMicroPropsGenerator = nullptr;
// Tail of the list:
MicroProps fMicros;
// Other fields possibly used by the number formatting pipeline:
// TODO: Convert some of these LocalPointers to value objects to reduce the number of news?
LocalPointer<const DecimalFormatSymbols> fSymbols;
LocalPointer<const PluralRules> fRules;
LocalPointer<const ParsedPatternInfo> fPatternInfo;
LocalPointer<const ScientificHandler> fScientificHandler;
LocalPointer<const MutablePatternModifier> fPatternModifier;
LocalPointer<const ImmutablePatternModifier> fImmutablePatternModifier;
LocalPointer<const LongNameHandler> fLongNameHandler;
LocalPointer<const CompactHandler> fCompactHandler;
NumberFormatterImpl(const MacroProps &macros, bool safe, UErrorCode &status);
void applyUnsafe(DecimalQuantity &inValue, NumberStringBuilder &outString, UErrorCode &status);
/**
* If rulesPtr is non-null, return it. Otherwise, return a PluralRules owned by this object for the
* specified locale, creating it if necessary.
*/
const PluralRules *
resolvePluralRules(const PluralRules *rulesPtr, const Locale &locale, UErrorCode &status);
/**
* Synthesizes the MacroProps into a MicroPropsGenerator. All information, including the locale, is encoded into the
* MicroPropsGenerator, except for the quantity itself, which is left abstract and must be provided to the returned
* MicroPropsGenerator instance.
*
* @see MicroPropsGenerator
* @param macros
* The {@link MacroProps} to consume. This method does not mutate the MacroProps instance.
* @param safe
* If true, the returned MicroPropsGenerator will be thread-safe. If false, the returned value will
* <em>not</em> be thread-safe, intended for a single "one-shot" use only. Building the thread-safe
* object is more expensive.
*/
const MicroPropsGenerator *
macrosToMicroGenerator(const MacroProps &macros, bool safe, UErrorCode &status);
/**
* Synthesizes the output string from a MicroProps and DecimalQuantity.
*
* @param micros
* The MicroProps after the quantity has been consumed. Will not be mutated.
* @param quantity
* The DecimalQuantity to be rendered. May be mutated.
* @param string
* The output string. Will be mutated.
*/
static int32_t
microsToString(const MicroProps &micros, DecimalQuantity &quantity, NumberStringBuilder &string,
UErrorCode &status);
static int32_t
writeNumber(const MicroProps &micros, DecimalQuantity &quantity, NumberStringBuilder &string,
UErrorCode &status);
static int32_t
writeIntegerDigits(const MicroProps &micros, DecimalQuantity &quantity, NumberStringBuilder &string,
UErrorCode &status);
static int32_t
writeFractionDigits(const MicroProps &micros, DecimalQuantity &quantity, NumberStringBuilder &string,
UErrorCode &status);
};
} // namespace impl
} // namespace number
U_NAMESPACE_END
#endif //NUMBERFORMAT_NUMBERFORMATTERIMPL_H

View File

@ -0,0 +1,47 @@
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#include "unicode/numberformatter.h"
#include "number_patternstring.h"
using namespace icu::number;
Grouper Grouper::defaults() {
return {-2, -2, false};
}
Grouper Grouper::minTwoDigits() {
return {-2, -2, true};
}
Grouper Grouper::none() {
return {-1, -1, false};
}
void Grouper::setLocaleData(const impl::ParsedPatternInfo &patternInfo) {
if (fGrouping1 != -2) {
return;
}
auto grouping1 = static_cast<int8_t> (patternInfo.positive.groupingSizes & 0xffff);
auto grouping2 = static_cast<int8_t> ((patternInfo.positive.groupingSizes >> 16) & 0xffff);
auto grouping3 = static_cast<int8_t> ((patternInfo.positive.groupingSizes >> 32) & 0xffff);
if (grouping2 == -1) {
grouping1 = -1;
}
if (grouping3 == -1) {
grouping2 = grouping1;
}
fGrouping1 = grouping1;
fGrouping2 = grouping2;
}
bool Grouper::groupAtPosition(int32_t position, const impl::DecimalQuantity &value) const {
U_ASSERT(fGrouping1 > -2);
if (fGrouping1 == -1 || fGrouping1 == 0) {
// Either -1 or 0 means "no grouping"
return false;
}
position -= fGrouping1;
return position >= 0 && (position % fGrouping2) == 0
&& value.getUpperDisplayMagnitude() - fGrouping1 + 1 >= (fMin2 ? 2 : 1);
}

View File

@ -0,0 +1,41 @@
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#include "unicode/numberformatter.h"
#include "number_types.h"
#include "number_decimalquantity.h"
using namespace icu::number;
using namespace icu::number::impl;
IntegerWidth::IntegerWidth(int8_t minInt, int8_t maxInt) {
fUnion.minMaxInt.fMinInt = minInt;
fUnion.minMaxInt.fMaxInt = maxInt;
}
IntegerWidth IntegerWidth::zeroFillTo(int32_t minInt) {
if (minInt >= 0 && minInt <= kMaxIntFracSig) {
return {static_cast<int8_t>(minInt), -1};
} else {
return {U_NUMBER_DIGIT_WIDTH_OUT_OF_RANGE_ERROR};
}
}
IntegerWidth IntegerWidth::truncateAt(int32_t maxInt) {
if (fHasError) { return *this; } // No-op on error
if (maxInt >= 0 && maxInt <= kMaxIntFracSig) {
return {fUnion.minMaxInt.fMinInt, static_cast<int8_t>(maxInt)};
} else {
return {U_NUMBER_DIGIT_WIDTH_OUT_OF_RANGE_ERROR};
}
}
void IntegerWidth::apply(impl::DecimalQuantity &quantity, UErrorCode &status) const {
if (fHasError) {
status = U_ILLEGAL_ARGUMENT_ERROR;
} else if (fUnion.minMaxInt.fMaxInt == -1) {
quantity.setIntegerLength(fUnion.minMaxInt.fMinInt, INT32_MAX);
} else {
quantity.setIntegerLength(fUnion.minMaxInt.fMinInt, fUnion.minMaxInt.fMaxInt);
}
}

View File

@ -0,0 +1,157 @@
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#include <unicode/ures.h>
#include <ureslocs.h>
#include <charstr.h>
#include <uresimp.h>
#include "number_longnames.h"
#include <algorithm>
#include <cstring.h>
using namespace icu::number::impl;
namespace {
//////////////////////////
/// BEGIN DATA LOADING ///
//////////////////////////
class PluralTableSink : public ResourceSink {
public:
explicit PluralTableSink(UnicodeString *outArray) : outArray(outArray) {
// Initialize the array to bogus strings.
for (int32_t i = 0; i < StandardPlural::Form::COUNT; i++) {
outArray[i].setToBogus();
}
}
void put(const char *key, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) override {
ResourceTable pluralsTable = value.getTable(status);
if (U_FAILURE(status)) { return; }
for (int i = 0; pluralsTable.getKeyAndValue(i, key, value); ++i) {
// In MeasureUnit data, ignore dnam and per units for now.
if (uprv_strcmp(key, "dnam") == 0 || uprv_strcmp(key, "per") == 0) {
continue;
}
StandardPlural::Form plural = StandardPlural::fromString(key, status);
if (U_FAILURE(status)) { return; }
if (!outArray[plural].isBogus()) {
continue;
}
outArray[plural] = value.getUnicodeString(status);
if (U_FAILURE(status)) { return; }
}
}
private:
UnicodeString *outArray;
};
// NOTE: outArray MUST have room for all StandardPlural values. No bounds checking is performed.
void getMeasureData(const Locale &locale, const MeasureUnit &unit, const UNumberUnitWidth &width,
UnicodeString *outArray, UErrorCode &status) {
PluralTableSink sink(outArray);
LocalUResourceBundlePointer unitsBundle(ures_open(U_ICUDATA_UNIT, locale.getName(), &status));
if (U_FAILURE(status)) { return; }
CharString key;
key.append("units", status);
if (width == UNUM_UNIT_WIDTH_NARROW) {
key.append("Narrow", status);
} else if (width == UNUM_UNIT_WIDTH_SHORT) {
key.append("Short", status);
}
key.append("/", status);
key.append(unit.getType(), status);
key.append("/", status);
key.append(unit.getSubtype(), status);
ures_getAllItemsWithFallback(unitsBundle.getAlias(), key.data(), sink, status);
}
void getCurrencyLongNameData(const Locale &locale, const CurrencyUnit &currency, UnicodeString *outArray,
UErrorCode &status) {
// In ICU4J, this method gets a CurrencyData from CurrencyData.provider.
// TODO(ICU4J): Implement this without going through CurrencyData, like in ICU4C?
PluralTableSink sink(outArray);
LocalUResourceBundlePointer unitsBundle(ures_open(U_ICUDATA_CURR, locale.getName(), &status));
if (U_FAILURE(status)) { return; }
ures_getAllItemsWithFallback(unitsBundle.getAlias(), "CurrencyUnitPatterns", sink, status);
if (U_FAILURE(status)) { return; }
for (int32_t i = 0; i < StandardPlural::Form::COUNT; i++) {
UnicodeString &pattern = outArray[i];
if (pattern.isBogus()) {
continue;
}
UBool isChoiceFormat = FALSE;
int32_t longNameLen = 0;
const char16_t *longName = ucurr_getPluralName(
currency.getISOCurrency(),
locale.getName(),
&isChoiceFormat,
StandardPlural::getKeyword(static_cast<StandardPlural::Form>(i)),
&longNameLen,
&status);
// Example pattern from data: "{0} {1}"
// Example output after find-and-replace: "{0} US dollars"
pattern.findAndReplace(UnicodeString(u"{1}"), UnicodeString(longName, longNameLen));
}
}
////////////////////////
/// END DATA LOADING ///
////////////////////////
} // namespace
LongNameHandler
LongNameHandler::forMeasureUnit(const Locale &loc, const MeasureUnit &unit, const UNumberUnitWidth &width,
const PluralRules *rules, const MicroPropsGenerator *parent,
UErrorCode &status) {
LongNameHandler result(rules, parent);
UnicodeString simpleFormats[StandardPlural::Form::COUNT];
getMeasureData(loc, unit, width, simpleFormats, status);
if (U_FAILURE(status)) { return result; }
// TODO: What field to use for units?
simpleFormatsToModifiers(simpleFormats, UNUM_FIELD_COUNT, result.fModifiers, status);
return result;
}
LongNameHandler LongNameHandler::forCurrencyLongNames(const Locale &loc, const CurrencyUnit &currency,
const PluralRules *rules,
const MicroPropsGenerator *parent,
UErrorCode &status) {
LongNameHandler result(rules, parent);
UnicodeString simpleFormats[StandardPlural::Form::COUNT];
getCurrencyLongNameData(loc, currency, simpleFormats, status);
if (U_FAILURE(status)) { return result; }
simpleFormatsToModifiers(simpleFormats, UNUM_CURRENCY_FIELD, result.fModifiers, status);
return result;
}
void LongNameHandler::simpleFormatsToModifiers(const UnicodeString *simpleFormats, Field field,
SimpleModifier *output, UErrorCode &status) {
for (int32_t i = 0; i < StandardPlural::Form::COUNT; i++) {
UnicodeString simpleFormat = simpleFormats[i];
if (simpleFormat.isBogus()) {
simpleFormat = simpleFormats[StandardPlural::Form::OTHER];
}
if (simpleFormat.isBogus()) {
// There should always be data in the "other" plural variant.
status = U_INTERNAL_PROGRAM_ERROR;
return;
}
SimpleFormatter compiledFormatter(simpleFormat, 1, 1, status);
output[i] = SimpleModifier(compiledFormatter, field, false);
}
}
void LongNameHandler::processQuantity(DecimalQuantity &quantity, MicroProps &micros,
UErrorCode &status) const {
parent->processQuantity(quantity, micros, status);
// TODO: Avoid the copy here?
DecimalQuantity copy(quantity);
micros.rounding.apply(copy, status);
micros.modOuter = &fModifiers[copy.getStandardPlural(rules)];
}

View File

@ -0,0 +1,43 @@
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#ifndef NUMBERFORMAT_LONGNAMEHANDLER_H
#define NUMBERFORMAT_LONGNAMEHANDLER_H
#include "unicode/uversion.h"
#include "number_utils.h"
#include "number_modifiers.h"
U_NAMESPACE_BEGIN namespace number {
namespace impl {
class LongNameHandler : public MicroPropsGenerator, public UObject {
public:
static LongNameHandler
forCurrencyLongNames(const Locale &loc, const CurrencyUnit &currency, const PluralRules *rules,
const MicroPropsGenerator *parent, UErrorCode &status);
static LongNameHandler
forMeasureUnit(const Locale &loc, const MeasureUnit &unit, const UNumberUnitWidth &width,
const PluralRules *rules, const MicroPropsGenerator *parent, UErrorCode &status);
void
processQuantity(DecimalQuantity &quantity, MicroProps &micros, UErrorCode &status) const override;
private:
SimpleModifier fModifiers[StandardPlural::Form::COUNT];
const PluralRules *rules;
const MicroPropsGenerator *parent;
LongNameHandler(const PluralRules *rules, const MicroPropsGenerator *parent)
: rules(rules), parent(parent) {}
static void simpleFormatsToModifiers(const UnicodeString *simpleFormats, Field field,
SimpleModifier *output, UErrorCode &status);
};
} // namespace impl
} // namespace number
U_NAMESPACE_END
#endif //NUMBERFORMAT_LONGNAMEHANDLER_H

View File

@ -0,0 +1,294 @@
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#include <umutex.h>
#include <ucln_cmn.h>
#include <ucln_in.h>
#include "number_modifiers.h"
using namespace icu::number::impl;
namespace {
// TODO: This is copied from simpleformatter.cpp
const int32_t ARG_NUM_LIMIT = 0x100;
// These are the default currency spacing UnicodeSets in CLDR.
// Pre-compute them for performance.
// The Java unit test testCurrencySpacingPatternStability() will start failing if these change in CLDR.
icu::UInitOnce gDefaultCurrencySpacingInitOnce = U_INITONCE_INITIALIZER;
UnicodeSet *UNISET_DIGIT = nullptr;
UnicodeSet *UNISET_NOTS = nullptr;
UBool U_CALLCONV cleanupDefaultCurrencySpacing() {
delete UNISET_DIGIT;
UNISET_DIGIT = nullptr;
delete UNISET_NOTS;
UNISET_NOTS = nullptr;
return TRUE;
}
void U_CALLCONV initDefaultCurrencySpacing(UErrorCode &status) {
ucln_i18n_registerCleanup(UCLN_I18N_CURRENCY_SPACING, cleanupDefaultCurrencySpacing);
UNISET_DIGIT = new UnicodeSet(UnicodeString(u"[:digit:]"), status);
UNISET_NOTS = new UnicodeSet(UnicodeString(u"[:^S:]"), status);
if (UNISET_DIGIT == nullptr || UNISET_NOTS == nullptr) {
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
UNISET_DIGIT->freeze();
UNISET_NOTS->freeze();
}
} // namespace
int32_t ConstantAffixModifier::apply(NumberStringBuilder &output, int leftIndex, int rightIndex,
UErrorCode &status) const {
// Insert the suffix first since inserting the prefix will change the rightIndex
int length = output.insert(rightIndex, fSuffix, fField, status);
length += output.insert(leftIndex, fPrefix, fField, status);
return length;
}
int32_t ConstantAffixModifier::getPrefixLength(UErrorCode &status) const {
(void)status;
return fPrefix.length();
}
int32_t ConstantAffixModifier::getCodePointCount(UErrorCode &status) const {
(void)status;
return fPrefix.countChar32() + fSuffix.countChar32();
}
bool ConstantAffixModifier::isStrong() const {
return fStrong;
}
SimpleModifier::SimpleModifier(const SimpleFormatter &simpleFormatter, Field field, bool strong)
: fCompiledPattern(simpleFormatter.getCompiledPattern()), fField(field), fStrong(strong) {
U_ASSERT(1 ==
SimpleFormatter::getArgumentLimit(compiledPattern.getBuffer(), compiledPattern.length()));
if (fCompiledPattern.charAt(1) != 0) {
fPrefixLength = fCompiledPattern.charAt(1) - ARG_NUM_LIMIT;
fSuffixOffset = 3 + fPrefixLength;
} else {
fPrefixLength = 0;
fSuffixOffset = 2;
}
if (3 + fPrefixLength < fCompiledPattern.length()) {
fSuffixLength = fCompiledPattern.charAt(fSuffixOffset) - ARG_NUM_LIMIT;
} else {
fSuffixLength = 0;
}
}
SimpleModifier::SimpleModifier() : fStrong(false), fPrefixLength(0), fSuffixLength(0) {
}
int32_t SimpleModifier::apply(NumberStringBuilder &output, int leftIndex, int rightIndex,
UErrorCode &status) const {
return formatAsPrefixSuffix(output, leftIndex, rightIndex, fField, status);
}
int32_t SimpleModifier::getPrefixLength(UErrorCode &status) const {
(void)status;
return fPrefixLength;
}
int32_t SimpleModifier::getCodePointCount(UErrorCode &status) const {
(void)status;
int32_t count = 0;
if (fPrefixLength > 0) {
count += fCompiledPattern.countChar32(2, fPrefixLength);
}
if (fSuffixLength > 0) {
count += fCompiledPattern.countChar32(1 + fSuffixOffset, fSuffixLength);
}
return count;
}
bool SimpleModifier::isStrong() const {
return fStrong;
}
int32_t
SimpleModifier::formatAsPrefixSuffix(NumberStringBuilder &result, int32_t startIndex, int32_t endIndex,
Field field, UErrorCode &status) const {
if (fPrefixLength > 0) {
result.insert(startIndex, fCompiledPattern, 2, 2 + fPrefixLength, field, status);
}
if (fSuffixLength > 0) {
result.insert(
endIndex + fPrefixLength,
fCompiledPattern,
1 + fSuffixOffset,
1 + fSuffixOffset + fSuffixLength,
field,
status);
}
return fPrefixLength + fSuffixLength;
}
int32_t ConstantMultiFieldModifier::apply(NumberStringBuilder &output, int leftIndex, int rightIndex,
UErrorCode &status) const {
// Insert the suffix first since inserting the prefix will change the rightIndex
int32_t length = output.insert(rightIndex, fSuffix, status);
length += output.insert(leftIndex, fPrefix, status);
return length;
}
int32_t ConstantMultiFieldModifier::getPrefixLength(UErrorCode &status) const {
(void)status;
return fPrefix.length();
}
int32_t ConstantMultiFieldModifier::getCodePointCount(UErrorCode &status) const {
(void)status;
return fPrefix.codePointCount() + fSuffix.codePointCount();
}
bool ConstantMultiFieldModifier::isStrong() const {
return fStrong;
}
CurrencySpacingEnabledModifier::CurrencySpacingEnabledModifier(const NumberStringBuilder &prefix,
const NumberStringBuilder &suffix,
bool strong,
const DecimalFormatSymbols &symbols,
UErrorCode &status)
: ConstantMultiFieldModifier(prefix, suffix, strong) {
// Check for currency spacing. Do not build the UnicodeSets unless there is
// a currency code point at a boundary.
if (prefix.length() > 0 && prefix.fieldAt(prefix.length() - 1) == UNUM_CURRENCY_FIELD) {
int prefixCp = prefix.getLastCodePoint();
UnicodeSet prefixUnicodeSet = getUnicodeSet(symbols, IN_CURRENCY, PREFIX, status);
if (prefixUnicodeSet.contains(prefixCp)) {
fAfterPrefixUnicodeSet = getUnicodeSet(symbols, IN_NUMBER, PREFIX, status);
fAfterPrefixUnicodeSet.freeze();
fAfterPrefixInsert = getInsertString(symbols, PREFIX, status);
} else {
fAfterPrefixUnicodeSet.setToBogus();
fAfterPrefixInsert.setToBogus();
}
} else {
fAfterPrefixUnicodeSet.setToBogus();
fAfterPrefixInsert.setToBogus();
}
if (suffix.length() > 0 && suffix.fieldAt(0) == UNUM_CURRENCY_FIELD) {
int suffixCp = suffix.getLastCodePoint();
UnicodeSet suffixUnicodeSet = getUnicodeSet(symbols, IN_CURRENCY, SUFFIX, status);
if (suffixUnicodeSet.contains(suffixCp)) {
fBeforeSuffixUnicodeSet = getUnicodeSet(symbols, IN_NUMBER, SUFFIX, status);
fBeforeSuffixUnicodeSet.freeze();
fBeforeSuffixInsert = getInsertString(symbols, SUFFIX, status);
} else {
fBeforeSuffixUnicodeSet.setToBogus();
fBeforeSuffixInsert.setToBogus();
}
} else {
fBeforeSuffixUnicodeSet.setToBogus();
fBeforeSuffixInsert.setToBogus();
}
}
int32_t CurrencySpacingEnabledModifier::apply(NumberStringBuilder &output, int leftIndex, int rightIndex,
UErrorCode &status) const {
// Currency spacing logic
int length = 0;
if (rightIndex - leftIndex > 0 && !fAfterPrefixUnicodeSet.isBogus() &&
fAfterPrefixUnicodeSet.contains(output.codePointAt(leftIndex))) {
// TODO: Should we use the CURRENCY field here?
length += output.insert(leftIndex, fAfterPrefixInsert, UNUM_FIELD_COUNT, status);
}
if (rightIndex - leftIndex > 0 && !fBeforeSuffixUnicodeSet.isBogus() &&
fBeforeSuffixUnicodeSet.contains(output.codePointBefore(rightIndex))) {
// TODO: Should we use the CURRENCY field here?
length += output.insert(rightIndex + length, fBeforeSuffixInsert, UNUM_FIELD_COUNT, status);
}
// Call super for the remaining logic
length += ConstantMultiFieldModifier::apply(output, leftIndex, rightIndex + length, status);
return length;
}
int32_t
CurrencySpacingEnabledModifier::applyCurrencySpacing(NumberStringBuilder &output, int32_t prefixStart,
int32_t prefixLen, int32_t suffixStart,
int32_t suffixLen,
const DecimalFormatSymbols &symbols,
UErrorCode &status) {
int length = 0;
bool hasPrefix = (prefixLen > 0);
bool hasSuffix = (suffixLen > 0);
bool hasNumber = (suffixStart - prefixStart - prefixLen > 0); // could be empty string
if (hasPrefix && hasNumber) {
length += applyCurrencySpacingAffix(output, prefixStart + prefixLen, PREFIX, symbols, status);
}
if (hasSuffix && hasNumber) {
length += applyCurrencySpacingAffix(output, suffixStart + length, SUFFIX, symbols, status);
}
return length;
}
int32_t
CurrencySpacingEnabledModifier::applyCurrencySpacingAffix(NumberStringBuilder &output, int32_t index,
EAffix affix,
const DecimalFormatSymbols &symbols,
UErrorCode &status) {
// NOTE: For prefix, output.fieldAt(index-1) gets the last field type in the prefix.
// This works even if the last code point in the prefix is 2 code units because the
// field value gets populated to both indices in the field array.
Field affixField = (affix == PREFIX) ? output.fieldAt(index - 1) : output.fieldAt(index);
if (affixField != UNUM_CURRENCY_FIELD) {
return 0;
}
int affixCp = (affix == PREFIX) ? output.codePointBefore(index) : output.codePointAt(index);
UnicodeSet affixUniset = getUnicodeSet(symbols, IN_CURRENCY, affix, status);
if (!affixUniset.contains(affixCp)) {
return 0;
}
int numberCp = (affix == PREFIX) ? output.codePointAt(index) : output.codePointBefore(index);
UnicodeSet numberUniset = getUnicodeSet(symbols, IN_NUMBER, affix, status);
if (!numberUniset.contains(numberCp)) {
return 0;
}
UnicodeString spacingString = getInsertString(symbols, affix, status);
// NOTE: This next line *inserts* the spacing string, triggering an arraycopy.
// It would be more efficient if this could be done before affixes were attached,
// so that it could be prepended/appended instead of inserted.
// However, the build code path is more efficient, and this is the most natural
// place to put currency spacing in the non-build code path.
// TODO: Should we use the CURRENCY field here?
return output.insert(index, spacingString, UNUM_FIELD_COUNT, status);
}
UnicodeSet
CurrencySpacingEnabledModifier::getUnicodeSet(const DecimalFormatSymbols &symbols, EPosition position,
EAffix affix, UErrorCode &status) {
// Ensure the static defaults are initialized:
umtx_initOnce(gDefaultCurrencySpacingInitOnce, &initDefaultCurrencySpacing, status);
if (U_FAILURE(status)) {
return UnicodeSet();
}
const UnicodeString& pattern = symbols.getPatternForCurrencySpacing(
position == IN_CURRENCY ? UNUM_CURRENCY_MATCH : UNUM_CURRENCY_SURROUNDING_MATCH,
affix == SUFFIX,
status);
if (pattern.compare(u"[:digit:]", -1) == 0) {
return *UNISET_DIGIT;
} else if (pattern.compare(u"[:^S:]", -1) == 0) {
return *UNISET_NOTS;
} else {
return UnicodeSet(pattern, status);
}
}
UnicodeString
CurrencySpacingEnabledModifier::getInsertString(const DecimalFormatSymbols &symbols, EAffix affix,
UErrorCode &status) {
return symbols.getPatternForCurrencySpacing(UNUM_CURRENCY_INSERT, affix == SUFFIX, status);
}

View File

@ -0,0 +1,249 @@
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#ifndef NUMBERFORMAT_MODIFIERS_H
#define NUMBERFORMAT_MODIFIERS_H
#include <algorithm>
#include <cstdint>
#include <unicode/uniset.h>
#include <unicode/simpleformatter.h>
#include <standardplural.h>
#include "number_stringbuilder.h"
#include "number_types.h"
U_NAMESPACE_BEGIN namespace number {
namespace impl {
/**
* The canonical implementation of {@link Modifier}, containing a prefix and suffix string.
* TODO: This is not currently being used by real code and could be removed.
*/
class ConstantAffixModifier : public Modifier, public UObject {
public:
ConstantAffixModifier(const UnicodeString &prefix, const UnicodeString &suffix, Field field,
bool strong)
: fPrefix(prefix), fSuffix(suffix), fField(field), fStrong(strong) {}
int32_t apply(NumberStringBuilder &output, int32_t leftIndex, int32_t rightIndex,
UErrorCode &status) const override;
int32_t getPrefixLength(UErrorCode &status) const override;
int32_t getCodePointCount(UErrorCode &status) const override;
bool isStrong() const override;
private:
UnicodeString fPrefix;
UnicodeString fSuffix;
Field fField;
bool fStrong;
};
/**
* The second primary implementation of {@link Modifier}, this one consuming a {@link SimpleFormatter}
* pattern.
*/
class SimpleModifier : public Modifier, public UMemory {
public:
SimpleModifier(const SimpleFormatter &simpleFormatter, Field field, bool strong);
// Default constructor for LongNameHandler.h
SimpleModifier();
int32_t apply(NumberStringBuilder &output, int32_t leftIndex, int32_t rightIndex,
UErrorCode &status) const override;
int32_t getPrefixLength(UErrorCode &status) const override;
int32_t getCodePointCount(UErrorCode &status) const override;
bool isStrong() const override;
/**
* TODO: This belongs in SimpleFormatterImpl. The only reason I haven't moved it there yet is because
* DoubleSidedStringBuilder is an internal class and SimpleFormatterImpl feels like it should not depend on it.
*
* <p>
* Formats a value that is already stored inside the StringBuilder <code>result</code> between the indices
* <code>startIndex</code> and <code>endIndex</code> by inserting characters before the start index and after the
* end index.
*
* <p>
* This is well-defined only for patterns with exactly one argument.
*
* @param result
* The StringBuilder containing the value argument.
* @param startIndex
* The left index of the value within the string builder.
* @param endIndex
* The right index of the value within the string builder.
* @return The number of characters (UTF-16 code points) that were added to the StringBuilder.
*/
int32_t
formatAsPrefixSuffix(NumberStringBuilder &result, int32_t startIndex, int32_t endIndex, Field field,
UErrorCode &status) const;
private:
UnicodeString fCompiledPattern;
Field fField;
bool fStrong;
int32_t fPrefixLength;
int32_t fSuffixOffset;
int32_t fSuffixLength;
};
/**
* An implementation of {@link Modifier} that allows for multiple types of fields in the same modifier. Constructed
* based on the contents of two {@link NumberStringBuilder} instances (one for the prefix, one for the suffix).
*/
class ConstantMultiFieldModifier : public Modifier, public UMemory {
public:
ConstantMultiFieldModifier(const NumberStringBuilder &prefix, const NumberStringBuilder &suffix,
bool strong) : fPrefix(prefix), fSuffix(suffix), fStrong(strong) {}
int32_t apply(NumberStringBuilder &output, int32_t leftIndex, int32_t rightIndex,
UErrorCode &status) const override;
int32_t getPrefixLength(UErrorCode &status) const override;
int32_t getCodePointCount(UErrorCode &status) const override;
bool isStrong() const override;
protected:
// NOTE: In Java, these are stored as array pointers. In C++, the NumberStringBuilder is stored by
// value and is treated internally as immutable.
NumberStringBuilder fPrefix;
NumberStringBuilder fSuffix;
bool fStrong;
};
/** Identical to {@link ConstantMultiFieldModifier}, but supports currency spacing. */
class CurrencySpacingEnabledModifier : public ConstantMultiFieldModifier {
public:
/** Safe code path */
CurrencySpacingEnabledModifier(const NumberStringBuilder &prefix, const NumberStringBuilder &suffix,
bool strong, const DecimalFormatSymbols &symbols, UErrorCode &status);
int32_t apply(NumberStringBuilder &output, int32_t leftIndex, int32_t rightIndex,
UErrorCode &status) const override;
/** Unsafe code path */
static int32_t
applyCurrencySpacing(NumberStringBuilder &output, int32_t prefixStart, int32_t prefixLen,
int32_t suffixStart, int32_t suffixLen, const DecimalFormatSymbols &symbols,
UErrorCode &status);
private:
UnicodeSet fAfterPrefixUnicodeSet;
UnicodeString fAfterPrefixInsert;
UnicodeSet fBeforeSuffixUnicodeSet;
UnicodeString fBeforeSuffixInsert;
enum EAffix {
PREFIX, SUFFIX
};
enum EPosition {
IN_CURRENCY, IN_NUMBER
};
/** Unsafe code path */
static int32_t applyCurrencySpacingAffix(NumberStringBuilder &output, int32_t index, EAffix affix,
const DecimalFormatSymbols &symbols, UErrorCode &status);
static UnicodeSet
getUnicodeSet(const DecimalFormatSymbols &symbols, EPosition position, EAffix affix,
UErrorCode &status);
static UnicodeString
getInsertString(const DecimalFormatSymbols &symbols, EAffix affix, UErrorCode &status);
};
/** A Modifier that does not do anything. */
class EmptyModifier : public Modifier, public UMemory {
public:
explicit EmptyModifier(bool isStrong) : fStrong(isStrong) {}
int32_t apply(NumberStringBuilder &output, int32_t leftIndex, int32_t rightIndex,
UErrorCode &status) const override {
(void)output;
(void)leftIndex;
(void)rightIndex;
(void)status;
return 0;
}
int32_t getPrefixLength(UErrorCode &status) const override {
(void)status;
return 0;
}
int32_t getCodePointCount(UErrorCode &status) const override {
(void)status;
return 0;
}
bool isStrong() const override {
return fStrong;
}
private:
bool fStrong;
};
/**
* A ParameterizedModifier by itself is NOT a Modifier. Rather, it wraps a data structure containing two or more
* Modifiers and returns the modifier appropriate for the current situation.
*/
class ParameterizedModifier : public UMemory {
public:
// NOTE: mods is zero-initialized (to nullptr)
ParameterizedModifier() : mods() {
}
// No copying!
ParameterizedModifier(const ParameterizedModifier &other) = delete;
~ParameterizedModifier() {
for (const Modifier *mod : mods) {
delete mod;
}
}
void adoptPositiveNegativeModifiers(const Modifier *positive, const Modifier *negative) {
mods[0] = positive;
mods[1] = negative;
}
/** The modifier is ADOPTED. */
void adoptSignPluralModifier(bool isNegative, StandardPlural::Form plural, const Modifier *mod) {
mods[getModIndex(isNegative, plural)] = mod;
}
/** Returns a reference to the modifier; no ownership change. */
const Modifier *getModifier(bool isNegative) const {
return mods[isNegative ? 1 : 0];
}
/** Returns a reference to the modifier; no ownership change. */
const Modifier *getModifier(bool isNegative, StandardPlural::Form plural) const {
return mods[getModIndex(isNegative, plural)];
}
private:
const Modifier *mods[2 * StandardPlural::COUNT];
inline static int32_t getModIndex(bool isNegative, StandardPlural::Form plural) {
return static_cast<int32_t>(plural) * 2 + (isNegative ? 1 : 0);
}
};
} // namespace impl
} // namespace number
U_NAMESPACE_END
#endif //NUMBERFORMAT_MODIFIERS_H

View File

@ -0,0 +1,68 @@
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#include "unicode/numberformatter.h"
#include "number_types.h"
using namespace icu::number;
using namespace icu::number::impl;
ScientificNotation Notation::scientific() {
// NOTE: ISO C++ does not allow C99 designated initializers.
ScientificSettings settings;
settings.fEngineeringInterval = 1;
settings.fRequireMinInt = false;
settings.fMinExponentDigits = 1;
settings.fExponentSignDisplay = UNUM_SIGN_AUTO;
NotationUnion union_;
union_.scientific = settings;
return {NTN_SCIENTIFIC, union_};
}
ScientificNotation Notation::engineering() {
ScientificSettings settings;
settings.fEngineeringInterval = 3;
settings.fRequireMinInt = false;
settings.fMinExponentDigits = 1;
settings.fExponentSignDisplay = UNUM_SIGN_AUTO;
NotationUnion union_;
union_.scientific = settings;
return {NTN_SCIENTIFIC, union_};
}
Notation Notation::compactShort() {
NotationUnion union_;
union_.compactStyle = CompactStyle::UNUM_SHORT;
return {NTN_COMPACT, union_};
}
Notation Notation::compactLong() {
NotationUnion union_;
union_.compactStyle = CompactStyle::UNUM_LONG;
return {NTN_COMPACT, union_};
}
Notation Notation::simple() {
return {};
}
ScientificNotation
ScientificNotation::withMinExponentDigits(int32_t minExponentDigits) const {
if (minExponentDigits >= 0 && minExponentDigits < kMaxIntFracSig) {
ScientificSettings settings = fUnion.scientific;
settings.fMinExponentDigits = (int8_t) minExponentDigits;
NotationUnion union_ = {settings};
return {NTN_SCIENTIFIC, union_};
} else {
return {U_NUMBER_DIGIT_WIDTH_OUT_OF_RANGE_ERROR};
}
}
ScientificNotation
ScientificNotation::withExponentSignDisplay(UNumberSignDisplay exponentSignDisplay) const {
ScientificSettings settings = fUnion.scientific;
settings.fExponentSignDisplay = exponentSignDisplay;
NotationUnion union_ = {settings};
return {NTN_SCIENTIFIC, union_};
}

View File

@ -0,0 +1,76 @@
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#include "unicode/numberformatter.h"
#include "number_types.h"
#include "number_stringbuilder.h"
using namespace icu::number::impl;
namespace {
int32_t
addPaddingHelper(UChar32 paddingCp, int32_t requiredPadding, NumberStringBuilder &string, int32_t index,
UErrorCode &status) {
for (int32_t i = 0; i < requiredPadding; i++) {
// TODO: If appending to the end, this will cause actual insertion operations. Improve.
string.insertCodePoint(index, paddingCp, UNUM_FIELD_COUNT, status);
}
return U16_LENGTH(paddingCp) * requiredPadding;
}
}
Padder::Padder(UChar32 cp, int32_t width, UNumberFormatPadPosition position) : fWidth(width) {
fUnion.padding.fCp = cp;
fUnion.padding.fPosition = position;
}
Padder::Padder(int32_t width) : fWidth(width) {}
Padder Padder::none() {
return {-1};
}
Padder Padder::codePoints(UChar32 cp, int32_t targetWidth, UNumberFormatPadPosition position) {
// TODO: Validate the code point?
if (targetWidth >= 0) {
return {cp, targetWidth, position};
} else {
return {U_NUMBER_PADDING_WIDTH_OUT_OF_RANGE_ERROR};
}
}
int32_t Padder::padAndApply(const impl::Modifier &mod1, const impl::Modifier &mod2,
impl::NumberStringBuilder &string, int32_t leftIndex, int32_t rightIndex,
UErrorCode &status) const {
int32_t modLength = mod1.getCodePointCount(status) + mod2.getCodePointCount(status);
int32_t requiredPadding = fWidth - modLength - string.codePointCount();
U_ASSERT(leftIndex == 0 &&
rightIndex == string.length()); // fix the previous line to remove this assertion
int length = 0;
if (requiredPadding <= 0) {
// Padding is not required.
length += mod1.apply(string, leftIndex, rightIndex, status);
length += mod2.apply(string, leftIndex, rightIndex + length, status);
return length;
}
PadPosition position = fUnion.padding.fPosition;
UChar32 paddingCp = fUnion.padding.fCp;
if (position == UNUM_PAD_AFTER_PREFIX) {
length += addPaddingHelper(paddingCp, requiredPadding, string, leftIndex, status);
} else if (position == UNUM_PAD_BEFORE_SUFFIX) {
length += addPaddingHelper(paddingCp, requiredPadding, string, rightIndex + length, status);
}
length += mod1.apply(string, leftIndex, rightIndex + length, status);
length += mod2.apply(string, leftIndex, rightIndex + length, status);
if (position == UNUM_PAD_BEFORE_PREFIX) {
length += addPaddingHelper(paddingCp, requiredPadding, string, leftIndex, status);
} else if (position == UNUM_PAD_AFTER_SUFFIX) {
length += addPaddingHelper(paddingCp, requiredPadding, string, rightIndex + length, status);
}
return length;
}

View File

@ -0,0 +1,342 @@
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#include <cstring.h>
#include "number_patternmodifier.h"
#include "unicode/dcfmtsym.h"
#include "unicode/ucurr.h"
using namespace icu::number::impl;
MutablePatternModifier::MutablePatternModifier(bool isStrong) : fStrong(isStrong) {}
void MutablePatternModifier::setPatternInfo(const AffixPatternProvider *patternInfo) {
this->patternInfo = patternInfo;
}
void MutablePatternModifier::setPatternAttributes(UNumberSignDisplay signDisplay, bool perMille) {
this->signDisplay = signDisplay;
this->perMilleReplacesPercent = perMille;
}
void
MutablePatternModifier::setSymbols(const DecimalFormatSymbols *symbols, const CurrencyUnit &currency,
const UNumberUnitWidth unitWidth, const PluralRules *rules) {
U_ASSERT((rules == nullptr) == needsPlurals());
this->symbols = symbols;
uprv_memcpy(static_cast<char16_t *>(this->currencyCode),
currency.getISOCurrency(),
sizeof(char16_t) * 4);
this->unitWidth = unitWidth;
this->rules = rules;
}
void MutablePatternModifier::setNumberProperties(bool isNegative, StandardPlural::Form plural) {
this->isNegative = isNegative;
this->plural = plural;
}
bool MutablePatternModifier::needsPlurals() const {
UErrorCode statusLocal = U_ZERO_ERROR;
return patternInfo->containsSymbolType(AffixPatternType::TYPE_CURRENCY_TRIPLE, statusLocal);
// Silently ignore any error codes.
}
ImmutablePatternModifier *MutablePatternModifier::createImmutable(UErrorCode &status) {
return createImmutableAndChain(nullptr, status);
}
ImmutablePatternModifier *
MutablePatternModifier::createImmutableAndChain(const MicroPropsGenerator *parent, UErrorCode &status) {
// TODO: Move StandardPlural VALUES to standardplural.h
static const StandardPlural::Form STANDARD_PLURAL_VALUES[] = {
StandardPlural::Form::ZERO,
StandardPlural::Form::ONE,
StandardPlural::Form::TWO,
StandardPlural::Form::FEW,
StandardPlural::Form::MANY,
StandardPlural::Form::OTHER};
auto pm = new ParameterizedModifier();
if (pm == nullptr) {
status = U_MEMORY_ALLOCATION_ERROR;
return nullptr;
}
if (needsPlurals()) {
// Slower path when we require the plural keyword.
for (StandardPlural::Form plural : STANDARD_PLURAL_VALUES) {
setNumberProperties(false, plural);
pm->adoptSignPluralModifier(false, plural, createConstantModifier(status));
setNumberProperties(true, plural);
pm->adoptSignPluralModifier(true, plural, createConstantModifier(status));
}
if (U_FAILURE(status)) {
delete pm;
return nullptr;
}
return new ImmutablePatternModifier(pm, rules, parent); // adopts pm
} else {
// Faster path when plural keyword is not needed.
setNumberProperties(false, StandardPlural::Form::COUNT);
Modifier *positive = createConstantModifier(status);
setNumberProperties(true, StandardPlural::Form::COUNT);
Modifier *negative = createConstantModifier(status);
pm->adoptPositiveNegativeModifiers(positive, negative);
if (U_FAILURE(status)) {
delete pm;
return nullptr;
}
return new ImmutablePatternModifier(pm, nullptr, parent); // adopts pm
}
}
ConstantMultiFieldModifier *MutablePatternModifier::createConstantModifier(UErrorCode &status) {
NumberStringBuilder a;
NumberStringBuilder b;
insertPrefix(a, 0, status);
insertSuffix(b, 0, status);
if (patternInfo->hasCurrencySign()) {
return new CurrencySpacingEnabledModifier(a, b, fStrong, *symbols, status);
} else {
return new ConstantMultiFieldModifier(a, b, fStrong);
}
}
ImmutablePatternModifier::ImmutablePatternModifier(ParameterizedModifier *pm, const PluralRules *rules,
const MicroPropsGenerator *parent)
: pm(pm), rules(rules), parent(parent) {}
void ImmutablePatternModifier::processQuantity(DecimalQuantity &quantity, MicroProps &micros,
UErrorCode &status) const {
parent->processQuantity(quantity, micros, status);
applyToMicros(micros, quantity);
}
void ImmutablePatternModifier::applyToMicros(MicroProps &micros, DecimalQuantity &quantity) const {
if (rules == nullptr) {
micros.modMiddle = pm->getModifier(quantity.isNegative());
} else {
// TODO: Fix this. Avoid the copy.
DecimalQuantity copy(quantity);
copy.roundToInfinity();
StandardPlural::Form plural = copy.getStandardPlural(rules);
micros.modMiddle = pm->getModifier(quantity.isNegative(), plural);
}
}
/** Used by the unsafe code path. */
MicroPropsGenerator &MutablePatternModifier::addToChain(const MicroPropsGenerator *parent) {
this->parent = parent;
return *this;
}
void MutablePatternModifier::processQuantity(DecimalQuantity &fq, MicroProps &micros,
UErrorCode &status) const {
parent->processQuantity(fq, micros, status);
// The unsafe code path performs self-mutation, so we need a const_cast.
// This method needs to be const because it overrides a const method in the parent class.
auto nonConstThis = const_cast<MutablePatternModifier *>(this);
if (needsPlurals()) {
// TODO: Fix this. Avoid the copy.
DecimalQuantity copy(fq);
micros.rounding.apply(copy, status);
nonConstThis->setNumberProperties(fq.isNegative(), copy.getStandardPlural(rules));
} else {
nonConstThis->setNumberProperties(fq.isNegative(), StandardPlural::Form::COUNT);
}
micros.modMiddle = this;
}
int32_t MutablePatternModifier::apply(NumberStringBuilder &output, int32_t leftIndex, int32_t rightIndex,
UErrorCode &status) const {
// The unsafe code path performs self-mutation, so we need a const_cast.
// This method needs to be const because it overrides a const method in the parent class.
auto nonConstThis = const_cast<MutablePatternModifier *>(this);
int32_t prefixLen = nonConstThis->insertPrefix(output, leftIndex, status);
int32_t suffixLen = nonConstThis->insertSuffix(output, rightIndex + prefixLen, status);
CurrencySpacingEnabledModifier::applyCurrencySpacing(
output, leftIndex, prefixLen, rightIndex + prefixLen, suffixLen, *symbols, status);
return prefixLen + suffixLen;
}
int32_t MutablePatternModifier::getPrefixLength(UErrorCode &status) const {
// The unsafe code path performs self-mutation, so we need a const_cast.
// This method needs to be const because it overrides a const method in the parent class.
auto nonConstThis = const_cast<MutablePatternModifier *>(this);
// Enter and exit CharSequence Mode to get the length.
nonConstThis->enterCharSequenceMode(true);
int result = AffixUtils::unescapedCodePointCount(*this, *this, status); // prefix length
nonConstThis->exitCharSequenceMode();
return result;
}
int32_t MutablePatternModifier::getCodePointCount(UErrorCode &status) const {
// The unsafe code path performs self-mutation, so we need a const_cast.
// This method needs to be const because it overrides a const method in the parent class.
auto nonConstThis = const_cast<MutablePatternModifier *>(this);
// Enter and exit CharSequence Mode to get the length.
nonConstThis->enterCharSequenceMode(true);
int result = AffixUtils::unescapedCodePointCount(*this, *this, status); // prefix length
nonConstThis->exitCharSequenceMode();
nonConstThis->enterCharSequenceMode(false);
result += AffixUtils::unescapedCodePointCount(*this, *this, status); // suffix length
nonConstThis->exitCharSequenceMode();
return result;
}
bool MutablePatternModifier::isStrong() const {
return fStrong;
}
int32_t MutablePatternModifier::insertPrefix(NumberStringBuilder &sb, int position, UErrorCode &status) {
enterCharSequenceMode(true);
int length = AffixUtils::unescape(*this, sb, position, *this, status);
exitCharSequenceMode();
return length;
}
int32_t MutablePatternModifier::insertSuffix(NumberStringBuilder &sb, int position, UErrorCode &status) {
enterCharSequenceMode(false);
int length = AffixUtils::unescape(*this, sb, position, *this, status);
exitCharSequenceMode();
return length;
}
UnicodeString MutablePatternModifier::getSymbol(AffixPatternType type) const {
switch (type) {
case AffixPatternType::TYPE_MINUS_SIGN:
return symbols->getSymbol(DecimalFormatSymbols::ENumberFormatSymbol::kMinusSignSymbol);
case AffixPatternType::TYPE_PLUS_SIGN:
return symbols->getSymbol(DecimalFormatSymbols::ENumberFormatSymbol::kPlusSignSymbol);
case AffixPatternType::TYPE_PERCENT:
return symbols->getSymbol(DecimalFormatSymbols::ENumberFormatSymbol::kPercentSymbol);
case AffixPatternType::TYPE_PERMILLE:
return symbols->getSymbol(DecimalFormatSymbols::ENumberFormatSymbol::kPerMillSymbol);
case AffixPatternType::TYPE_CURRENCY_SINGLE: {
// UnitWidth ISO and HIDDEN overrides the singular currency symbol.
if (unitWidth == UNumberUnitWidth::UNUM_UNIT_WIDTH_ISO_CODE) {
return UnicodeString(currencyCode, 3);
} else if (unitWidth == UNumberUnitWidth::UNUM_UNIT_WIDTH_HIDDEN) {
return UnicodeString();
} else {
UErrorCode status = U_ZERO_ERROR;
UBool isChoiceFormat = FALSE;
int32_t symbolLen = 0;
const char16_t *symbol = ucurr_getName(
currencyCode,
symbols->getLocale().getName(),
UCurrNameStyle::UCURR_SYMBOL_NAME,
&isChoiceFormat,
&symbolLen,
&status);
return UnicodeString(symbol, symbolLen);
}
}
case AffixPatternType::TYPE_CURRENCY_DOUBLE:
return UnicodeString(currencyCode, 3);
case AffixPatternType::TYPE_CURRENCY_TRIPLE: {
// NOTE: This is the code path only for patterns containing "¤¤¤".
// Plural currencies set via the API are formatted in LongNameHandler.
// This code path is used by DecimalFormat via CurrencyPluralInfo.
U_ASSERT(plural != StandardPlural::Form::COUNT);
UErrorCode status = U_ZERO_ERROR;
UBool isChoiceFormat = FALSE;
int32_t symbolLen = 0;
const char16_t *symbol = ucurr_getPluralName(
currencyCode,
symbols->getLocale().getName(),
&isChoiceFormat,
StandardPlural::getKeyword(plural),
&symbolLen,
&status);
return UnicodeString(symbol, symbolLen);
}
case AffixPatternType::TYPE_CURRENCY_QUAD:
return UnicodeString(u"\uFFFD");
case AffixPatternType::TYPE_CURRENCY_QUINT:
return UnicodeString(u"\uFFFD");
default:
U_ASSERT(false);
return UnicodeString();
}
}
/** This method contains the heart of the logic for rendering LDML affix strings. */
void MutablePatternModifier::enterCharSequenceMode(bool isPrefix) {
U_ASSERT(!inCharSequenceMode);
inCharSequenceMode = true;
// Should the output render '+' where '-' would normally appear in the pattern?
plusReplacesMinusSign = !isNegative && (
signDisplay == UNUM_SIGN_ALWAYS ||
signDisplay == UNUM_SIGN_ACCOUNTING_ALWAYS) &&
patternInfo->positiveHasPlusSign() == false;
// Should we use the affix from the negative subpattern? (If not, we will use the positive subpattern.)
bool useNegativeAffixPattern = patternInfo->hasNegativeSubpattern() && (
isNegative || (patternInfo->negativeHasMinusSign() && plusReplacesMinusSign));
// Resolve the flags for the affix pattern.
fFlags = 0;
if (useNegativeAffixPattern) {
fFlags |= AffixPatternProvider::AFFIX_NEGATIVE_SUBPATTERN;
}
if (isPrefix) {
fFlags |= AffixPatternProvider::AFFIX_PREFIX;
}
if (plural != StandardPlural::Form::COUNT) {
U_ASSERT(plural == (AffixPatternProvider::AFFIX_PLURAL_MASK & plural));
fFlags |= plural;
}
// Should we prepend a sign to the pattern?
if (!isPrefix || useNegativeAffixPattern) {
prependSign = false;
} else if (isNegative) {
prependSign = signDisplay != UNUM_SIGN_NEVER;
} else {
prependSign = plusReplacesMinusSign;
}
// Finally, compute the length of the affix pattern.
fLength = patternInfo->length(fFlags) + (prependSign ? 1 : 0);
}
void MutablePatternModifier::exitCharSequenceMode() {
U_ASSERT(inCharSequenceMode)
inCharSequenceMode = false;
}
int32_t MutablePatternModifier::length() const {
U_ASSERT(inCharSequenceMode);
return fLength;
}
char16_t MutablePatternModifier::charAt(int32_t index) const {
U_ASSERT(inCharSequenceMode);
char16_t candidate;
if (prependSign && index == 0) {
candidate = '-';
} else if (prependSign) {
candidate = patternInfo->charAt(fFlags, index - 1);
} else {
candidate = patternInfo->charAt(fFlags, index);
}
if (plusReplacesMinusSign && candidate == '-') {
return '+';
}
if (perMilleReplacesPercent && candidate == '%') {
return u'';
}
return candidate;
}
UnicodeString MutablePatternModifier::toUnicodeString() const {
// Never called by AffixUtils
U_ASSERT(false);
return UnicodeString();
}

View File

@ -0,0 +1,234 @@
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#ifndef NUMBERFORMAT_MUTABLEPATTERNMODIFIER_H
#define NUMBERFORMAT_MUTABLEPATTERNMODIFIER_H
#include <standardplural.h>
#include "unicode/numberformatter.h"
#include "number_patternstring.h"
#include "number_types.h"
#include "number_modifiers.h"
#include "number_utils.h"
U_NAMESPACE_BEGIN
namespace number {
namespace impl {
class ImmutablePatternModifier : public MicroPropsGenerator {
public:
ImmutablePatternModifier(ParameterizedModifier *pm, const PluralRules *rules,
const MicroPropsGenerator *parent);
~ImmutablePatternModifier() override = default;
void processQuantity(DecimalQuantity &, MicroProps &micros, UErrorCode &status) const override;
void applyToMicros(MicroProps &micros, DecimalQuantity &quantity) const;
private:
const LocalPointer<ParameterizedModifier> pm;
const PluralRules *rules;
const MicroPropsGenerator *parent;
};
/**
* This class is a {@link Modifier} that wraps a decimal format pattern. It applies the pattern's affixes in
* {@link Modifier#apply}.
*
* <p>
* In addition to being a Modifier, this class contains the business logic for substituting the correct locale symbols
* into the affixes of the decimal format pattern.
*
* <p>
* In order to use this class, create a new instance and call the following four setters: {@link #setPatternInfo},
* {@link #setPatternAttributes}, {@link #setSymbols}, and {@link #setNumberProperties}. After calling these four
* setters, the instance will be ready for use as a Modifier.
*
* <p>
* This is a MUTABLE, NON-THREAD-SAFE class designed for performance. Do NOT save references to this or attempt to use
* it from multiple threads! Instead, you can obtain a safe, immutable decimal format pattern modifier by calling
* {@link MutablePatternModifier#createImmutable}, in effect treating this instance as a builder for the immutable
* variant.
*/
class MutablePatternModifier
: public MicroPropsGenerator, public Modifier, public SymbolProvider, public CharSequence {
public:
~MutablePatternModifier() override = default;
/**
* @param isStrong
* Whether the modifier should be considered strong. For more information, see
* {@link Modifier#isStrong()}. Most of the time, decimal format pattern modifiers should be considered
* as non-strong.
*/
explicit MutablePatternModifier(bool isStrong);
/**
* Sets a reference to the parsed decimal format pattern, usually obtained from
* {@link PatternStringParser#parseToPatternInfo(String)}, but any implementation of {@link AffixPatternProvider} is
* accepted.
*/
void setPatternInfo(const AffixPatternProvider *patternInfo);
/**
* Sets attributes that imply changes to the literal interpretation of the pattern string affixes.
*
* @param signDisplay
* Whether to force a plus sign on positive numbers.
* @param perMille
* Whether to substitute the percent sign in the pattern with a permille sign.
*/
void setPatternAttributes(UNumberSignDisplay signDisplay, bool perMille);
/**
* Sets locale-specific details that affect the symbols substituted into the pattern string affixes.
*
* @param symbols
* The desired instance of DecimalFormatSymbols.
* @param currency
* The currency to be used when substituting currency values into the affixes.
* @param unitWidth
* The width used to render currencies.
* @param rules
* Required if the triple currency sign, "¤¤¤", appears in the pattern, which can be determined from the
* convenience method {@link #needsPlurals()}.
*/
void
setSymbols(const DecimalFormatSymbols *symbols, const CurrencyUnit &currency, UNumberUnitWidth unitWidth,
const PluralRules *rules);
/**
* Sets attributes of the current number being processed.
*
* @param isNegative
* Whether the number is negative.
* @param plural
* The plural form of the number, required only if the pattern contains the triple currency sign, "¤¤¤"
* (and as indicated by {@link #needsPlurals()}).
*/
void setNumberProperties(bool isNegative, StandardPlural::Form plural);
/**
* Returns true if the pattern represented by this MurkyModifier requires a plural keyword in order to localize.
* This is currently true only if there is a currency long name placeholder in the pattern ("¤¤¤").
*/
bool needsPlurals() const;
/**
* Creates a new quantity-dependent Modifier that behaves the same as the current instance, but which is immutable
* and can be saved for future use. The number properties in the current instance are mutated; all other properties
* are left untouched.
*
* <p>
* The resulting modifier cannot be used in a QuantityChain.
*
* <p>
* CREATES A NEW HEAP OBJECT; THE CALLER GETS OWNERSHIP.
*
* @return An immutable that supports both positive and negative numbers.
*/
ImmutablePatternModifier *createImmutable(UErrorCode &status);
/**
* Creates a new quantity-dependent Modifier that behaves the same as the current instance, but which is immutable
* and can be saved for future use. The number properties in the current instance are mutated; all other properties
* are left untouched.
*
* <p>
* CREATES A NEW HEAP OBJECT; THE CALLER GETS OWNERSHIP.
*
* @param parent
* The QuantityChain to which to chain this immutable.
* @return An immutable that supports both positive and negative numbers.
*/
ImmutablePatternModifier *
createImmutableAndChain(const MicroPropsGenerator *parent, UErrorCode &status);
MicroPropsGenerator &addToChain(const MicroPropsGenerator *parent);
void processQuantity(DecimalQuantity &, MicroProps &micros, UErrorCode &status) const override;
int32_t apply(NumberStringBuilder &output, int32_t leftIndex, int32_t rightIndex,
UErrorCode &status) const override;
int32_t getPrefixLength(UErrorCode &status) const override;
int32_t getCodePointCount(UErrorCode &status) const override;
bool isStrong() const override;
/**
* Returns the string that substitutes a given symbol type in a pattern.
*/
UnicodeString getSymbol(AffixPatternType type) const override;
int32_t length() const override;
char16_t charAt(int32_t index) const override;
// Use default implementation of codePointAt
UnicodeString toUnicodeString() const override;
private:
// Modifier details
const bool fStrong;
// Pattern details
const AffixPatternProvider *patternInfo;
UNumberSignDisplay signDisplay;
bool perMilleReplacesPercent;
// Symbol details
const DecimalFormatSymbols *symbols;
UNumberUnitWidth unitWidth;
char16_t currencyCode[4];
const PluralRules *rules;
// Number details
bool isNegative;
StandardPlural::Form plural;
// QuantityChain details
const MicroPropsGenerator *parent;
// Transient CharSequence fields
bool inCharSequenceMode;
int32_t fFlags;
int32_t fLength;
bool prependSign;
bool plusReplacesMinusSign;
/**
* Uses the current properties to create a single {@link ConstantMultiFieldModifier} with currency spacing support
* if required.
*
* <p>
* CREATES A NEW HEAP OBJECT; THE CALLER GETS OWNERSHIP.
*
* @param a
* A working NumberStringBuilder object; passed from the outside to prevent the need to create many new
* instances if this method is called in a loop.
* @param b
* Another working NumberStringBuilder object.
* @return The constant modifier object.
*/
ConstantMultiFieldModifier *createConstantModifier(UErrorCode &status);
int32_t insertPrefix(NumberStringBuilder &sb, int position, UErrorCode &status);
int32_t insertSuffix(NumberStringBuilder &sb, int position, UErrorCode &status);
void enterCharSequenceMode(bool isPrefix);
void exitCharSequenceMode();
};
} // namespace impl
} // namespace number
U_NAMESPACE_END
#endif //NUMBERFORMAT_MUTABLEPATTERNMODIFIER_H

View File

@ -0,0 +1,831 @@
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#include <uassert.h>
#include "number_patternstring.h"
#include "unicode/utf16.h"
#include "number_utils.h"
using namespace icu::number::impl;
void PatternParser::parseToPatternInfo(const UnicodeString& patternString, ParsedPatternInfo& patternInfo, UErrorCode &status) {
patternInfo.consumePattern(patternString, status);
}
DecimalFormatProperties
PatternParser::parseToProperties(const UnicodeString& pattern, IgnoreRounding ignoreRounding,
UErrorCode &status) {
DecimalFormatProperties properties;
parseToExistingPropertiesImpl(pattern, properties, ignoreRounding, status);
return properties;
}
void PatternParser::parseToExistingProperties(const UnicodeString& pattern, DecimalFormatProperties properties,
IgnoreRounding ignoreRounding, UErrorCode &status) {
parseToExistingPropertiesImpl(pattern, properties, ignoreRounding, status);
}
char16_t ParsedPatternInfo::charAt(int32_t flags, int32_t index) const {
const Endpoints &endpoints = getEndpoints(flags);
if (index < 0 || index >= endpoints.end - endpoints.start) {
U_ASSERT(false);
}
return pattern.charAt(endpoints.start + index);
}
int32_t ParsedPatternInfo::length(int32_t flags) const {
return getLengthFromEndpoints(getEndpoints(flags));
}
int32_t ParsedPatternInfo::getLengthFromEndpoints(const Endpoints &endpoints) {
return endpoints.end - endpoints.start;
}
UnicodeString ParsedPatternInfo::getString(int32_t flags) const {
const Endpoints &endpoints = getEndpoints(flags);
if (endpoints.start == endpoints.end) {
return UnicodeString();
}
// Create a new UnicodeString
return UnicodeString(pattern, endpoints.start, endpoints.end - endpoints.start);
}
const Endpoints &ParsedPatternInfo::getEndpoints(int32_t flags) const {
bool prefix = (flags & AFFIX_PREFIX) != 0;
bool isNegative = (flags & AFFIX_NEGATIVE_SUBPATTERN) != 0;
bool padding = (flags & AFFIX_PADDING) != 0;
if (isNegative && padding) {
return negative.paddingEndpoints;
} else if (padding) {
return positive.paddingEndpoints;
} else if (prefix && isNegative) {
return negative.prefixEndpoints;
} else if (prefix) {
return positive.prefixEndpoints;
} else if (isNegative) {
return negative.suffixEndpoints;
} else {
return positive.suffixEndpoints;
}
}
bool ParsedPatternInfo::positiveHasPlusSign() const {
return positive.hasPlusSign;
}
bool ParsedPatternInfo::hasNegativeSubpattern() const {
return fHasNegativeSubpattern;
}
bool ParsedPatternInfo::negativeHasMinusSign() const {
return negative.hasMinusSign;
}
bool ParsedPatternInfo::hasCurrencySign() const {
return positive.hasCurrencySign || (fHasNegativeSubpattern && negative.hasCurrencySign);
}
bool ParsedPatternInfo::containsSymbolType(AffixPatternType type, UErrorCode &status) const {
return AffixUtils::containsType(UnicodeStringCharSequence(pattern), type, status);
}
/////////////////////////////////////////////////////
/// BEGIN RECURSIVE DESCENT PARSER IMPLEMENTATION ///
/////////////////////////////////////////////////////
UChar32 ParsedPatternInfo::ParserState::peek() {
if (offset == pattern.length()) {
return -1;
} else {
return pattern.char32At(offset);
}
}
UChar32 ParsedPatternInfo::ParserState::next() {
int codePoint = peek();
offset += U16_LENGTH(codePoint);
return codePoint;
}
void ParsedPatternInfo::consumePattern(const UnicodeString& patternString, UErrorCode &status) {
if (U_FAILURE(status)) { return; }
this->pattern = patternString;
// pattern := subpattern (';' subpattern)?
currentSubpattern = &positive;
consumeSubpattern(status);
if (U_FAILURE(status)) { return; }
if (state.peek() == ';') {
state.next(); // consume the ';'
// Don't consume the negative subpattern if it is empty (trailing ';')
if (state.peek() != -1) {
fHasNegativeSubpattern = true;
currentSubpattern = &negative;
consumeSubpattern(status);
if (U_FAILURE(status)) { return; }
}
}
if (state.peek() != -1) {
state.toParseException(u"Found unquoted special character");
status = U_UNQUOTED_SPECIAL;
}
}
void ParsedPatternInfo::consumeSubpattern(UErrorCode &status) {
// subpattern := literals? number exponent? literals?
consumePadding(PadPosition::UNUM_PAD_BEFORE_PREFIX, status);
if (U_FAILURE(status)) { return; }
consumeAffix(currentSubpattern->prefixEndpoints, status);
if (U_FAILURE(status)) { return; }
consumePadding(PadPosition::UNUM_PAD_AFTER_PREFIX, status);
if (U_FAILURE(status)) { return; }
consumeFormat(status);
if (U_FAILURE(status)) { return; }
consumeExponent(status);
if (U_FAILURE(status)) { return; }
consumePadding(PadPosition::UNUM_PAD_BEFORE_SUFFIX, status);
if (U_FAILURE(status)) { return; }
consumeAffix(currentSubpattern->suffixEndpoints, status);
if (U_FAILURE(status)) { return; }
consumePadding(PadPosition::UNUM_PAD_AFTER_SUFFIX, status);
if (U_FAILURE(status)) { return; }
}
void ParsedPatternInfo::consumePadding(PadPosition paddingLocation, UErrorCode &status) {
if (state.peek() != '*') {
return;
}
if (!currentSubpattern->paddingLocation.isNull()) {
state.toParseException(u"Cannot have multiple pad specifiers");
status = U_MULTIPLE_PAD_SPECIFIERS;
return;
}
currentSubpattern->paddingLocation = paddingLocation;
state.next(); // consume the '*'
currentSubpattern->paddingEndpoints.start = state.offset;
consumeLiteral(status);
currentSubpattern->paddingEndpoints.end = state.offset;
}
void ParsedPatternInfo::consumeAffix(Endpoints &endpoints, UErrorCode &status) {
// literals := { literal }
endpoints.start = state.offset;
while (true) {
switch (state.peek()) {
case '#':
case '@':
case ';':
case '*':
case '.':
case ',':
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
case -1:
// Characters that cannot appear unquoted in a literal
// break outer;
goto after_outer;
case '%':
currentSubpattern->hasPercentSign = true;
break;
case u'':
currentSubpattern->hasPerMilleSign = true;
break;
case u'¤':
currentSubpattern->hasCurrencySign = true;
break;
case '-':
currentSubpattern->hasMinusSign = true;
break;
case '+':
currentSubpattern->hasPlusSign = true;
break;
default:
break;
}
consumeLiteral(status);
if (U_FAILURE(status)) { return; }
}
after_outer:
endpoints.end = state.offset;
}
void ParsedPatternInfo::consumeLiteral(UErrorCode &status) {
if (state.peek() == -1) {
state.toParseException(u"Expected unquoted literal but found EOL");
status = U_PATTERN_SYNTAX_ERROR;
return;
} else if (state.peek() == '\'') {
state.next(); // consume the starting quote
while (state.peek() != '\'') {
if (state.peek() == -1) {
state.toParseException(u"Expected quoted literal but found EOL");
status = U_PATTERN_SYNTAX_ERROR;
return;
} else {
state.next(); // consume a quoted character
}
}
state.next(); // consume the ending quote
} else {
// consume a non-quoted literal character
state.next();
}
}
void ParsedPatternInfo::consumeFormat(UErrorCode &status) {
consumeIntegerFormat(status);
if (U_FAILURE(status)) { return; }
if (state.peek() == '.') {
state.next(); // consume the decimal point
currentSubpattern->hasDecimal = true;
currentSubpattern->widthExceptAffixes += 1;
consumeFractionFormat(status);
if (U_FAILURE(status)) { return; }
}
}
void ParsedPatternInfo::consumeIntegerFormat(UErrorCode &status) {
// Convenience reference:
ParsedSubpatternInfo &result = *currentSubpattern;
while (true) {
switch (state.peek()) {
case ',':
result.widthExceptAffixes += 1;
result.groupingSizes <<= 16;
break;
case '#':
if (result.integerNumerals > 0) {
state.toParseException(u"# cannot follow 0 before decimal point");
status = U_UNEXPECTED_TOKEN;
return;
}
result.widthExceptAffixes += 1;
result.groupingSizes += 1;
if (result.integerAtSigns > 0) {
result.integerTrailingHashSigns += 1;
} else {
result.integerLeadingHashSigns += 1;
}
result.integerTotal += 1;
break;
case '@':
if (result.integerNumerals > 0) {
state.toParseException(u"Cannot mix 0 and @");
status = U_UNEXPECTED_TOKEN;
return;
}
if (result.integerTrailingHashSigns > 0) {
state.toParseException(u"Cannot nest # inside of a run of @");
status = U_UNEXPECTED_TOKEN;
return;
}
result.widthExceptAffixes += 1;
result.groupingSizes += 1;
result.integerAtSigns += 1;
result.integerTotal += 1;
break;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
if (result.integerAtSigns > 0) {
state.toParseException(u"Cannot mix @ and 0");
status = U_UNEXPECTED_TOKEN;
return;
}
result.widthExceptAffixes += 1;
result.groupingSizes += 1;
result.integerNumerals += 1;
result.integerTotal += 1;
if (!result.rounding.isZero() || state.peek() != '0') {
result.rounding.appendDigit(static_cast<int8_t>(state.peek() - '0'), 0, true);
}
break;
default:
goto after_outer;
}
state.next(); // consume the symbol
}
after_outer:
// Disallow patterns with a trailing ',' or with two ',' next to each other
auto grouping1 = static_cast<int16_t> (result.groupingSizes & 0xffff);
auto grouping2 = static_cast<int16_t> ((result.groupingSizes >> 16) & 0xffff);
auto grouping3 = static_cast<int16_t> ((result.groupingSizes >> 32) & 0xffff);
if (grouping1 == 0 && grouping2 != -1) {
state.toParseException(u"Trailing grouping separator is invalid");
status = U_UNEXPECTED_TOKEN;
return;
}
if (grouping2 == 0 && grouping3 != -1) {
state.toParseException(u"Grouping width of zero is invalid");
status = U_PATTERN_SYNTAX_ERROR;
return;
}
}
void ParsedPatternInfo::consumeFractionFormat(UErrorCode &status) {
// Convenience reference:
ParsedSubpatternInfo &result = *currentSubpattern;
int32_t zeroCounter = 0;
while (true) {
switch (state.peek()) {
case '#':
result.widthExceptAffixes += 1;
result.fractionHashSigns += 1;
result.fractionTotal += 1;
zeroCounter++;
break;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
if (result.fractionHashSigns > 0) {
state.toParseException(u"0 cannot follow # after decimal point");
status = U_UNEXPECTED_TOKEN;
return;
}
result.widthExceptAffixes += 1;
result.fractionNumerals += 1;
result.fractionTotal += 1;
if (state.peek() == '0') {
zeroCounter++;
} else {
result.rounding
.appendDigit(static_cast<int8_t>(state.peek() - '0'), zeroCounter, false);
zeroCounter = 0;
}
break;
default:
return;
}
state.next(); // consume the symbol
}
}
void ParsedPatternInfo::consumeExponent(UErrorCode &status) {
// Convenience reference:
ParsedSubpatternInfo &result = *currentSubpattern;
if (state.peek() != 'E') {
return;
}
if ((result.groupingSizes & 0xffff0000L) != 0xffff0000L) {
state.toParseException(u"Cannot have grouping separator in scientific notation");
status = U_MALFORMED_EXPONENTIAL_PATTERN;
return;
}
state.next(); // consume the E
result.widthExceptAffixes++;
if (state.peek() == '+') {
state.next(); // consume the +
result.exponentHasPlusSign = true;
result.widthExceptAffixes++;
}
while (state.peek() == '0') {
state.next(); // consume the 0
result.exponentZeros += 1;
result.widthExceptAffixes++;
}
}
///////////////////////////////////////////////////
/// END RECURSIVE DESCENT PARSER IMPLEMENTATION ///
///////////////////////////////////////////////////
void
PatternParser::parseToExistingPropertiesImpl(const UnicodeString& pattern, DecimalFormatProperties &properties,
IgnoreRounding ignoreRounding, UErrorCode &status) {
if (pattern.length() == 0) {
// Backwards compatibility requires that we reset to the default values.
// TODO: Only overwrite the properties that "saveToProperties" normally touches?
properties.clear();
return;
}
ParsedPatternInfo patternInfo;
parseToPatternInfo(pattern, patternInfo, status);
if (U_FAILURE(status)) { return; }
patternInfoToProperties(properties, patternInfo, ignoreRounding, status);
}
void PatternParser::patternInfoToProperties(DecimalFormatProperties &properties,
ParsedPatternInfo patternInfo,
IgnoreRounding _ignoreRounding, UErrorCode &status) {
// Translate from PatternParseResult to Properties.
// Note that most data from "negative" is ignored per the specification of DecimalFormat.
const ParsedSubpatternInfo &positive = patternInfo.positive;
bool ignoreRounding;
if (_ignoreRounding == IGNORE_ROUNDING_NEVER) {
ignoreRounding = false;
} else if (_ignoreRounding == IGNORE_ROUNDING_IF_CURRENCY) {
ignoreRounding = positive.hasCurrencySign;
} else {
U_ASSERT(_ignoreRounding == IGNORE_ROUNDING_ALWAYS);
ignoreRounding = true;
}
// Grouping settings
auto grouping1 = static_cast<int16_t> (positive.groupingSizes & 0xffff);
auto grouping2 = static_cast<int16_t> ((positive.groupingSizes >> 16) & 0xffff);
auto grouping3 = static_cast<int16_t> ((positive.groupingSizes >> 32) & 0xffff);
if (grouping2 != -1) {
properties.groupingSize = grouping1;
} else {
properties.groupingSize = -1;
}
if (grouping3 != -1) {
properties.secondaryGroupingSize = grouping2;
} else {
properties.secondaryGroupingSize = -1;
}
// For backwards compatibility, require that the pattern emit at least one min digit.
int minInt, minFrac;
if (positive.integerTotal == 0 && positive.fractionTotal > 0) {
// patterns like ".##"
minInt = 0;
minFrac = uprv_max(1, positive.fractionNumerals);
} else if (positive.integerNumerals == 0 && positive.fractionNumerals == 0) {
// patterns like "#.##"
minInt = 1;
minFrac = 0;
} else {
minInt = positive.integerNumerals;
minFrac = positive.fractionNumerals;
}
// Rounding settings
// Don't set basic rounding when there is a currency sign; defer to CurrencyUsage
if (positive.integerAtSigns > 0) {
properties.minimumFractionDigits = -1;
properties.maximumFractionDigits = -1;
properties.roundingIncrement = 0.0;
properties.minimumSignificantDigits = positive.integerAtSigns;
properties.maximumSignificantDigits =
positive.integerAtSigns + positive.integerTrailingHashSigns;
} else if (!positive.rounding.isZero()) {
if (!ignoreRounding) {
properties.minimumFractionDigits = minFrac;
properties.maximumFractionDigits = positive.fractionTotal;
properties.roundingIncrement = positive.rounding.toDouble();
} else {
properties.minimumFractionDigits = -1;
properties.maximumFractionDigits = -1;
properties.roundingIncrement = 0.0;
}
properties.minimumSignificantDigits = -1;
properties.maximumSignificantDigits = -1;
} else {
if (!ignoreRounding) {
properties.minimumFractionDigits = minFrac;
properties.maximumFractionDigits = positive.fractionTotal;
properties.roundingIncrement = 0.0;
} else {
properties.minimumFractionDigits = -1;
properties.maximumFractionDigits = -1;
properties.roundingIncrement = 0.0;
}
properties.minimumSignificantDigits = -1;
properties.maximumSignificantDigits = -1;
}
// If the pattern ends with a '.' then force the decimal point.
if (positive.hasDecimal && positive.fractionTotal == 0) {
properties.decimalSeparatorAlwaysShown = true;
} else {
properties.decimalSeparatorAlwaysShown = false;
}
// Scientific notation settings
if (positive.exponentZeros > 0) {
properties.exponentSignAlwaysShown = positive.exponentHasPlusSign;
properties.minimumExponentDigits = positive.exponentZeros;
if (positive.integerAtSigns == 0) {
// patterns without '@' can define max integer digits, used for engineering notation
properties.minimumIntegerDigits = positive.integerNumerals;
properties.maximumIntegerDigits = positive.integerTotal;
} else {
// patterns with '@' cannot define max integer digits
properties.minimumIntegerDigits = 1;
properties.maximumIntegerDigits = -1;
}
} else {
properties.exponentSignAlwaysShown = false;
properties.minimumExponentDigits = -1;
properties.minimumIntegerDigits = minInt;
properties.maximumIntegerDigits = -1;
}
// Compute the affix patterns (required for both padding and affixes)
UnicodeString posPrefix = patternInfo.getString(AffixPatternProvider::AFFIX_PREFIX);
UnicodeString posSuffix = patternInfo.getString(0);
// Padding settings
if (!positive.paddingLocation.isNull()) {
// The width of the positive prefix and suffix templates are included in the padding
int paddingWidth =
positive.widthExceptAffixes + AffixUtils::estimateLength(UnicodeStringCharSequence(posPrefix), status) +
AffixUtils::estimateLength(UnicodeStringCharSequence(posSuffix), status);
properties.formatWidth = paddingWidth;
UnicodeString rawPaddingString = patternInfo.getString(AffixPatternProvider::AFFIX_PADDING);
if (rawPaddingString.length() == 1) {
properties.padString = rawPaddingString;
} else if (rawPaddingString.length() == 2) {
if (rawPaddingString.charAt(0) == '\'') {
properties.padString.setTo(u"'", -1);
} else {
properties.padString = rawPaddingString;
}
} else {
properties.padString = UnicodeString(rawPaddingString, 1, rawPaddingString.length() - 2);
}
properties.padPosition = positive.paddingLocation;
} else {
properties.formatWidth = -1;
properties.padString.setToBogus();
properties.padPosition.nullify();
}
// Set the affixes
// Always call the setter, even if the prefixes are empty, especially in the case of the
// negative prefix pattern, to prevent default values from overriding the pattern.
properties.positivePrefixPattern = posPrefix;
properties.positiveSuffixPattern = posSuffix;
if (patternInfo.fHasNegativeSubpattern) {
properties.negativePrefixPattern = patternInfo.getString(
AffixPatternProvider::AFFIX_NEGATIVE_SUBPATTERN | AffixPatternProvider::AFFIX_PREFIX);
properties.negativeSuffixPattern = patternInfo.getString(
AffixPatternProvider::AFFIX_NEGATIVE_SUBPATTERN);
} else {
properties.negativePrefixPattern.setToBogus();
properties.negativeSuffixPattern.setToBogus();
}
// Set the magnitude multiplier
if (positive.hasPercentSign) {
properties.magnitudeMultiplier = 2;
} else if (positive.hasPerMilleSign) {
properties.magnitudeMultiplier = 3;
} else {
properties.magnitudeMultiplier = 0;
}
}
///////////////////////////////////////////////////////////////////
/// End PatternStringParser.java; begin PatternStringUtils.java ///
///////////////////////////////////////////////////////////////////
UnicodeString PatternStringUtils::propertiesToPatternString(const DecimalFormatProperties &properties,
UErrorCode &status) {
UnicodeString sb;
// Convenience references
// The uprv_min() calls prevent DoS
int dosMax = 100;
int groupingSize = uprv_min(properties.secondaryGroupingSize, dosMax);
int firstGroupingSize = uprv_min(properties.groupingSize, dosMax);
int paddingWidth = uprv_min(properties.formatWidth, dosMax);
NullableValue<PadPosition> paddingLocation = properties.padPosition;
UnicodeString paddingString = properties.padString;
int minInt = uprv_max(uprv_min(properties.minimumIntegerDigits, dosMax), 0);
int maxInt = uprv_min(properties.maximumIntegerDigits, dosMax);
int minFrac = uprv_max(uprv_min(properties.minimumFractionDigits, dosMax), 0);
int maxFrac = uprv_min(properties.maximumFractionDigits, dosMax);
int minSig = uprv_min(properties.minimumSignificantDigits, dosMax);
int maxSig = uprv_min(properties.maximumSignificantDigits, dosMax);
bool alwaysShowDecimal = properties.decimalSeparatorAlwaysShown;
int exponentDigits = uprv_min(properties.minimumExponentDigits, dosMax);
bool exponentShowPlusSign = properties.exponentSignAlwaysShown;
UnicodeString pp = properties.positivePrefix;
UnicodeString ppp = properties.positivePrefixPattern;
UnicodeString ps = properties.positiveSuffix;
UnicodeString psp = properties.positiveSuffixPattern;
UnicodeString np = properties.negativePrefix;
UnicodeString npp = properties.negativePrefixPattern;
UnicodeString ns = properties.negativeSuffix;
UnicodeString nsp = properties.negativeSuffixPattern;
// Prefixes
if (!ppp.isBogus()) {
sb.append(ppp);
}
sb.append(AffixUtils::escape(UnicodeStringCharSequence(pp)));
int afterPrefixPos = sb.length();
// Figure out the grouping sizes.
int grouping1, grouping2, grouping;
if (groupingSize != uprv_min(dosMax, -1) && firstGroupingSize != uprv_min(dosMax, -1) &&
groupingSize != firstGroupingSize) {
grouping = groupingSize;
grouping1 = groupingSize;
grouping2 = firstGroupingSize;
} else if (groupingSize != uprv_min(dosMax, -1)) {
grouping = groupingSize;
grouping1 = 0;
grouping2 = groupingSize;
} else if (firstGroupingSize != uprv_min(dosMax, -1)) {
grouping = groupingSize;
grouping1 = 0;
grouping2 = firstGroupingSize;
} else {
grouping = 0;
grouping1 = 0;
grouping2 = 0;
}
int groupingLength = grouping1 + grouping2 + 1;
// Figure out the digits we need to put in the pattern.
double roundingInterval = properties.roundingIncrement;
UnicodeString digitsString;
int digitsStringScale = 0;
if (maxSig != uprv_min(dosMax, -1)) {
// Significant Digits.
while (digitsString.length() < minSig) {
digitsString.append('@');
}
while (digitsString.length() < maxSig) {
digitsString.append('#');
}
} else if (roundingInterval != 0.0) {
// Rounding Interval.
digitsStringScale = minFrac;
// TODO: Check for DoS here?
DecimalQuantity incrementQuantity;
incrementQuantity.setToDouble(roundingInterval);
incrementQuantity.adjustMagnitude(minFrac);
incrementQuantity.roundToMagnitude(0, kDefaultMode, status);
UnicodeString str = incrementQuantity.toPlainString();
if (str.charAt(0) == '-') {
// TODO: Unsupported operation exception or fail silently?
digitsString.append(str, 1, str.length() - 1);
} else {
digitsString.append(str);
}
}
while (digitsString.length() + digitsStringScale < minInt) {
digitsString.insert(0, '0');
}
while (-digitsStringScale < minFrac) {
digitsString.append('0');
digitsStringScale--;
}
// Write the digits to the string builder
int m0 = uprv_max(groupingLength, digitsString.length() + digitsStringScale);
m0 = (maxInt != dosMax) ? uprv_max(maxInt, m0) - 1 : m0 - 1;
int mN = (maxFrac != dosMax) ? uprv_min(-maxFrac, digitsStringScale) : digitsStringScale;
for (int magnitude = m0; magnitude >= mN; magnitude--) {
int di = digitsString.length() + digitsStringScale - magnitude - 1;
if (di < 0 || di >= digitsString.length()) {
sb.append('#');
} else {
sb.append(digitsString.charAt(di));
}
if (magnitude > grouping2 && grouping > 0 && (magnitude - grouping2) % grouping == 0) {
sb.append(',');
} else if (magnitude > 0 && magnitude == grouping2) {
sb.append(',');
} else if (magnitude == 0 && (alwaysShowDecimal || mN < 0)) {
sb.append('.');
}
}
// Exponential notation
if (exponentDigits != uprv_min(dosMax, -1)) {
sb.append('E');
if (exponentShowPlusSign) {
sb.append('+');
}
for (int i = 0; i < exponentDigits; i++) {
sb.append('0');
}
}
// Suffixes
int beforeSuffixPos = sb.length();
if (!psp.isBogus()) {
sb.append(psp);
}
sb.append(AffixUtils::escape(UnicodeStringCharSequence(ps)));
// Resolve Padding
if (paddingWidth != -1 && !paddingLocation.isNull()) {
while (paddingWidth - sb.length() > 0) {
sb.insert(afterPrefixPos, '#');
beforeSuffixPos++;
}
int addedLength;
switch (paddingLocation.get(status)) {
case PadPosition::UNUM_PAD_BEFORE_PREFIX:
addedLength = escapePaddingString(paddingString, sb, 0, status);
sb.insert(0, '*');
afterPrefixPos += addedLength + 1;
beforeSuffixPos += addedLength + 1;
break;
case PadPosition::UNUM_PAD_AFTER_PREFIX:
addedLength = escapePaddingString(paddingString, sb, afterPrefixPos, status);
sb.insert(afterPrefixPos, '*');
afterPrefixPos += addedLength + 1;
beforeSuffixPos += addedLength + 1;
break;
case PadPosition::UNUM_PAD_BEFORE_SUFFIX:
escapePaddingString(paddingString, sb, beforeSuffixPos, status);
sb.insert(beforeSuffixPos, '*');
break;
case PadPosition::UNUM_PAD_AFTER_SUFFIX:
sb.append('*');
escapePaddingString(paddingString, sb, sb.length(), status);
break;
}
if (U_FAILURE(status)) { return sb; }
}
// Negative affixes
// Ignore if the negative prefix pattern is "-" and the negative suffix is empty
if (!np.isBogus() || !ns.isBogus() || (npp.isBogus() && !nsp.isBogus()) ||
(!npp.isBogus() && (npp.length() != 1 || npp.charAt(0) != '-' || nsp.length() != 0))) {
sb.append(';');
if (!npp.isBogus()) {
sb.append(npp);
}
sb.append(AffixUtils::escape(UnicodeStringCharSequence(np)));
// Copy the positive digit format into the negative.
// This is optional; the pattern is the same as if '#' were appended here instead.
sb.append(sb, afterPrefixPos, beforeSuffixPos);
if (!nsp.isBogus()) {
sb.append(nsp);
}
sb.append(AffixUtils::escape(UnicodeStringCharSequence(ns)));
}
return sb;
}
int PatternStringUtils::escapePaddingString(UnicodeString input, UnicodeString& output, int startIndex,
UErrorCode &status) {
(void)status;
if (input.length() == 0) {
input.setTo(kFallbackPaddingString, -1);
}
int startLength = output.length();
if (input.length() == 1) {
if (input.compare(u"'", -1) == 0) {
output.insert(startIndex, u"''", -1);
} else {
output.insert(startIndex, input);
}
} else {
output.insert(startIndex, '\'');
int offset = 1;
for (int i = 0; i < input.length(); i++) {
// it's okay to deal in chars here because the quote mark is the only interesting thing.
char16_t ch = input.charAt(i);
if (ch == '\'') {
output.insert(startIndex + offset, u"''", -1);
offset += 2;
} else {
output.insert(startIndex + offset, ch);
offset += 1;
}
}
output.insert(startIndex + offset, '\'');
}
return output.length() - startLength;
}

View File

@ -0,0 +1,257 @@
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#ifndef NUMBERFORMAT_PATTERNPARSER_H
#define NUMBERFORMAT_PATTERNPARSER_H
#include <cstdint>
#include <unicode/unum.h>
#include <unicode/unistr.h>
#include "number_types.h"
#include "number_decimalquantity.h"
#include "number_decimfmtprops.h"
#include "number_affixutils.h"
U_NAMESPACE_BEGIN namespace number {
namespace impl {
// Forward declaration
class PatternParser;
struct Endpoints {
int32_t start = 0;
int32_t end = 0;
};
struct ParsedSubpatternInfo {
int64_t groupingSizes = 0x0000ffffffff0000L;
int32_t integerLeadingHashSigns = 0;
int32_t integerTrailingHashSigns = 0;
int32_t integerNumerals = 0;
int32_t integerAtSigns = 0;
int32_t integerTotal = 0; // for convenience
int32_t fractionNumerals = 0;
int32_t fractionHashSigns = 0;
int32_t fractionTotal = 0; // for convenience
bool hasDecimal = false;
int32_t widthExceptAffixes = 0;
NullableValue<UNumberFormatPadPosition> paddingLocation;
DecimalQuantity rounding;
bool exponentHasPlusSign = false;
int32_t exponentZeros = 0;
bool hasPercentSign = false;
bool hasPerMilleSign = false;
bool hasCurrencySign = false;
bool hasMinusSign = false;
bool hasPlusSign = false;
Endpoints prefixEndpoints;
Endpoints suffixEndpoints;
Endpoints paddingEndpoints;
};
struct ParsedPatternInfo : public AffixPatternProvider {
UnicodeString pattern;
ParsedSubpatternInfo positive;
ParsedSubpatternInfo negative;
ParsedPatternInfo() : state(this->pattern), currentSubpattern(nullptr) {}
~ParsedPatternInfo() override = default;
static int32_t getLengthFromEndpoints(const Endpoints &endpoints);
char16_t charAt(int32_t flags, int32_t index) const override;
int32_t length(int32_t flags) const override;
UnicodeString getString(int32_t flags) const;
bool positiveHasPlusSign() const override;
bool hasNegativeSubpattern() const override;
bool negativeHasMinusSign() const override;
bool hasCurrencySign() const override;
bool containsSymbolType(AffixPatternType type, UErrorCode &status) const override;
private:
struct ParserState {
const UnicodeString &pattern; // reference to the parent
int32_t offset = 0;
explicit ParserState(const UnicodeString &_pattern) : pattern(_pattern) {};
UChar32 peek();
UChar32 next();
// TODO: We don't currently do anything with the message string.
// This method is here as a shell for Java compatibility.
inline void toParseException(const char16_t *message) { (void)message; }
} state;
// NOTE: In Java, these are written as pure functions.
// In C++, they're written as methods.
// The behavior is the same.
// Mutable transient pointer:
ParsedSubpatternInfo *currentSubpattern;
// In Java, "negative == null" tells us whether or not we had a negative subpattern.
// In C++, we need to remember in another boolean.
bool fHasNegativeSubpattern = false;
const Endpoints &getEndpoints(int32_t flags) const;
/** Run the recursive descent parser. */
void consumePattern(const UnicodeString &patternString, UErrorCode &status);
void consumeSubpattern(UErrorCode &status);
void consumePadding(PadPosition paddingLocation, UErrorCode &status);
void consumeAffix(Endpoints &endpoints, UErrorCode &status);
void consumeLiteral(UErrorCode &status);
void consumeFormat(UErrorCode &status);
void consumeIntegerFormat(UErrorCode &status);
void consumeFractionFormat(UErrorCode &status);
void consumeExponent(UErrorCode &status);
friend class PatternParser;
};
class PatternParser {
public:
/**
* Runs the recursive descent parser on the given pattern string, returning a data structure with raw information
* about the pattern string.
*
* <p>
* To obtain a more useful form of the data, consider using {@link #parseToProperties} instead.
*
* TODO: Change argument type to const char16_t* instead of UnicodeString?
*
* @param patternString
* The LDML decimal format pattern (Excel-style pattern) to parse.
* @return The results of the parse.
*/
static void
parseToPatternInfo(const UnicodeString& patternString, ParsedPatternInfo &patternInfo, UErrorCode &status);
enum IgnoreRounding {
IGNORE_ROUNDING_NEVER = 0, IGNORE_ROUNDING_IF_CURRENCY = 1, IGNORE_ROUNDING_ALWAYS = 2
};
/**
* Parses a pattern string into a new property bag.
*
* @param pattern
* The pattern string, like "#,##0.00"
* @param ignoreRounding
* Whether to leave out rounding information (minFrac, maxFrac, and rounding increment) when parsing the
* pattern. This may be desirable if a custom rounding mode, such as CurrencyUsage, is to be used
* instead.
* @return A property bag object.
* @throws IllegalArgumentException
* If there is a syntax error in the pattern string.
*/
static DecimalFormatProperties
parseToProperties(const UnicodeString& pattern, IgnoreRounding ignoreRounding, UErrorCode &status);
/**
* Parses a pattern string into an existing property bag. All properties that can be encoded into a pattern string
* will be overwritten with either their default value or with the value coming from the pattern string. Properties
* that cannot be encoded into a pattern string, such as rounding mode, are not modified.
*
* @param pattern
* The pattern string, like "#,##0.00"
* @param properties
* The property bag object to overwrite.
* @param ignoreRounding
* See {@link #parseToProperties(String pattern, int ignoreRounding)}.
* @throws IllegalArgumentException
* If there was a syntax error in the pattern string.
*/
static void parseToExistingProperties(const UnicodeString& pattern, DecimalFormatProperties properties,
IgnoreRounding ignoreRounding, UErrorCode &status);
private:
static void
parseToExistingPropertiesImpl(const UnicodeString& pattern, DecimalFormatProperties &properties,
IgnoreRounding ignoreRounding, UErrorCode &status);
/** Finalizes the temporary data stored in the ParsedPatternInfo to the Properties. */
static void
patternInfoToProperties(DecimalFormatProperties &properties, ParsedPatternInfo patternInfo,
IgnoreRounding _ignoreRounding, UErrorCode &status);
};
class PatternStringUtils {
public:
/**
* Creates a pattern string from a property bag.
*
* <p>
* Since pattern strings support only a subset of the functionality available in a property bag, a new property bag
* created from the string returned by this function may not be the same as the original property bag.
*
* @param properties
* The property bag to serialize.
* @return A pattern string approximately serializing the property bag.
*/
static UnicodeString
propertiesToPatternString(const DecimalFormatProperties &properties, UErrorCode &status);
/**
* Converts a pattern between standard notation and localized notation. Localized notation means that instead of
* using generic placeholders in the pattern, you use the corresponding locale-specific characters instead. For
* example, in locale <em>fr-FR</em>, the period in the pattern "0.000" means "decimal" in standard notation (as it
* does in every other locale), but it means "grouping" in localized notation.
*
* <p>
* A greedy string-substitution strategy is used to substitute locale symbols. If two symbols are ambiguous or have
* the same prefix, the result is not well-defined.
*
* <p>
* Locale symbols are not allowed to contain the ASCII quote character.
*
* <p>
* This method is provided for backwards compatibility and should not be used in any new code.
*
* TODO(C++): This method is not yet implemented.
*
* @param input
* The pattern to convert.
* @param symbols
* The symbols corresponding to the localized pattern.
* @param toLocalized
* true to convert from standard to localized notation; false to convert from localized to standard
* notation.
* @return The pattern expressed in the other notation.
*/
static UnicodeString
convertLocalized(UnicodeString input, DecimalFormatSymbols symbols, bool toLocalized,
UErrorCode &status);
private:
/** @return The number of chars inserted. */
static int
escapePaddingString(UnicodeString input, UnicodeString &output, int startIndex, UErrorCode &status);
};
} // namespace impl
} // namespace number
U_NAMESPACE_END
#endif //NUMBERFORMAT_PATTERNPARSER_H

View File

@ -0,0 +1,20 @@
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#ifndef NUMBERFORMAT_NUMFMTTER_RESULTS_H
#define NUMBERFORMAT_NUMFMTTER_RESULTS_H
// FIXME: Remove this file?
#include "number_types.h"
#include "number_decimalquantity.h"
#include "number_stringbuilder.h"
U_NAMESPACE_BEGIN namespace number {
namespace impl {
} // namespace impl
} // namespace number
U_NAMESPACE_END
#endif //NUMBERFORMAT_NUMFMTTER_RESULTS_H

View File

@ -0,0 +1,339 @@
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#include <uassert.h>
#include "unicode/numberformatter.h"
#include "number_types.h"
#include "number_decimalquantity.h"
using namespace icu::number;
using namespace icu::number::impl;
namespace {
int32_t getRoundingMagnitudeFraction(int maxFrac) {
if (maxFrac == -1) {
return INT32_MIN;
}
return -maxFrac;
}
int32_t getRoundingMagnitudeSignificant(const DecimalQuantity &value, int maxSig) {
if (maxSig == -1) {
return INT32_MIN;
}
int magnitude = value.isZero() ? 0 : value.getMagnitude();
return magnitude - maxSig + 1;
}
int32_t getDisplayMagnitudeFraction(int minFrac) {
if (minFrac == 0) {
return INT32_MAX;
}
return -minFrac;
}
int32_t getDisplayMagnitudeSignificant(const DecimalQuantity &value, int minSig) {
int magnitude = value.isZero() ? 0 : value.getMagnitude();
return magnitude - minSig + 1;
}
}
Rounder Rounder::unlimited() {
return Rounder(RND_NONE, {}, kDefaultMode);
}
FractionRounder Rounder::integer() {
return constructFraction(0, 0);
}
FractionRounder Rounder::fixedFraction(int32_t minMaxFractionPlaces) {
if (minMaxFractionPlaces >= 0 && minMaxFractionPlaces <= kMaxIntFracSig) {
return constructFraction(minMaxFractionPlaces, minMaxFractionPlaces);
} else {
return {U_NUMBER_DIGIT_WIDTH_OUT_OF_RANGE_ERROR};
}
}
FractionRounder Rounder::minFraction(int32_t minFractionPlaces) {
if (minFractionPlaces >= 0 && minFractionPlaces <= kMaxIntFracSig) {
return constructFraction(minFractionPlaces, -1);
} else {
return {U_NUMBER_DIGIT_WIDTH_OUT_OF_RANGE_ERROR};
}
}
FractionRounder Rounder::maxFraction(int32_t maxFractionPlaces) {
if (maxFractionPlaces >= 0 && maxFractionPlaces <= kMaxIntFracSig) {
return constructFraction(0, maxFractionPlaces);
} else {
return {U_NUMBER_DIGIT_WIDTH_OUT_OF_RANGE_ERROR};
}
}
FractionRounder Rounder::minMaxFraction(int32_t minFractionPlaces, int32_t maxFractionPlaces) {
if (minFractionPlaces >= 0 && maxFractionPlaces <= kMaxIntFracSig &&
minFractionPlaces <= maxFractionPlaces) {
return constructFraction(minFractionPlaces, maxFractionPlaces);
} else {
return {U_NUMBER_DIGIT_WIDTH_OUT_OF_RANGE_ERROR};
}
}
Rounder Rounder::fixedDigits(int32_t minMaxSignificantDigits) {
if (minMaxSignificantDigits >= 0 && minMaxSignificantDigits <= kMaxIntFracSig) {
return constructSignificant(minMaxSignificantDigits, minMaxSignificantDigits);
} else {
return {U_NUMBER_DIGIT_WIDTH_OUT_OF_RANGE_ERROR};
}
}
Rounder Rounder::minDigits(int32_t minSignificantDigits) {
if (minSignificantDigits >= 0 && minSignificantDigits <= kMaxIntFracSig) {
return constructSignificant(minSignificantDigits, -1);
} else {
return {U_NUMBER_DIGIT_WIDTH_OUT_OF_RANGE_ERROR};
}
}
Rounder Rounder::maxDigits(int32_t maxSignificantDigits) {
if (maxSignificantDigits >= 0 && maxSignificantDigits <= kMaxIntFracSig) {
return constructSignificant(0, maxSignificantDigits);
} else {
return {U_NUMBER_DIGIT_WIDTH_OUT_OF_RANGE_ERROR};
}
}
Rounder Rounder::minMaxDigits(int32_t minSignificantDigits, int32_t maxSignificantDigits) {
if (minSignificantDigits >= 0 && maxSignificantDigits <= kMaxIntFracSig &&
minSignificantDigits <= maxSignificantDigits) {
return constructSignificant(minSignificantDigits, maxSignificantDigits);
} else {
return {U_NUMBER_DIGIT_WIDTH_OUT_OF_RANGE_ERROR};
}
}
IncrementRounder Rounder::increment(double roundingIncrement) {
if (roundingIncrement > 0.0) {
return constructIncrement(roundingIncrement, 0);
} else {
return {U_NUMBER_DIGIT_WIDTH_OUT_OF_RANGE_ERROR};
}
}
CurrencyRounder Rounder::currency(UCurrencyUsage currencyUsage) {
return constructCurrency(currencyUsage);
}
Rounder Rounder::withMode(RoundingMode roundingMode) const {
if (fType == RND_ERROR) { return *this; } // no-op in error state
return {fType, fUnion, roundingMode};
}
Rounder FractionRounder::withMinDigits(int32_t minSignificantDigits) const {
if (fType == RND_ERROR) { return *this; } // no-op in error state
if (minSignificantDigits >= 0 && minSignificantDigits <= kMaxIntFracSig) {
return constructFractionSignificant(*this, minSignificantDigits, -1);
} else {
return {U_NUMBER_DIGIT_WIDTH_OUT_OF_RANGE_ERROR};
}
}
Rounder FractionRounder::withMaxDigits(int32_t maxSignificantDigits) const {
if (fType == RND_ERROR) { return *this; } // no-op in error state
if (maxSignificantDigits >= 0 && maxSignificantDigits <= kMaxIntFracSig) {
return constructFractionSignificant(*this, -1, maxSignificantDigits);
} else {
return {U_NUMBER_DIGIT_WIDTH_OUT_OF_RANGE_ERROR};
}
}
// Private method on base class
Rounder Rounder::withCurrency(const CurrencyUnit &currency, UErrorCode &status) const {
if (fType == RND_ERROR) { return *this; } // no-op in error state
U_ASSERT(fType == RND_CURRENCY);
const char16_t *isoCode = currency.getISOCurrency();
double increment = ucurr_getRoundingIncrementForUsage(isoCode, fUnion.currencyUsage, &status);
int32_t minMaxFrac = ucurr_getDefaultFractionDigitsForUsage(
isoCode, fUnion.currencyUsage, &status);
if (increment != 0.0) {
return constructIncrement(increment, minMaxFrac);
} else {
return constructFraction(minMaxFrac, minMaxFrac);
}
}
// Public method on CurrencyRounder subclass
Rounder CurrencyRounder::withCurrency(const CurrencyUnit &currency) const {
UErrorCode localStatus = U_ZERO_ERROR;
Rounder result = Rounder::withCurrency(currency, localStatus);
if (U_FAILURE(localStatus)) {
return {localStatus};
}
return result;
}
Rounder IncrementRounder::withMinFraction(int32_t minFrac) const {
if (fType == RND_ERROR) { return *this; } // no-op in error state
if (minFrac >= 0 && minFrac <= kMaxIntFracSig) {
return constructIncrement(fUnion.increment.fIncrement, minFrac);
} else {
return {U_NUMBER_DIGIT_WIDTH_OUT_OF_RANGE_ERROR};
}
}
FractionRounder Rounder::constructFraction(int32_t minFrac, int32_t maxFrac) {
FractionSignificantSettings settings;
settings.fMinFrac = static_cast<int8_t> (minFrac);
settings.fMaxFrac = static_cast<int8_t> (maxFrac);
settings.fMinSig = -1;
settings.fMaxSig = -1;
RounderUnion union_;
union_.fracSig = settings;
return {RND_FRACTION, union_, kDefaultMode};
}
Rounder Rounder::constructSignificant(int32_t minSig, int32_t maxSig) {
FractionSignificantSettings settings;
settings.fMinFrac = -1;
settings.fMaxFrac = -1;
settings.fMinSig = static_cast<int8_t>(minSig);
settings.fMaxSig = static_cast<int8_t>(maxSig);
RounderUnion union_;
union_.fracSig = settings;
return {RND_SIGNIFICANT, union_, kDefaultMode};
}
Rounder
Rounder::constructFractionSignificant(const FractionRounder &base, int32_t minSig, int32_t maxSig) {
FractionSignificantSettings settings = base.fUnion.fracSig;
settings.fMinSig = static_cast<int8_t>(minSig);
settings.fMaxSig = static_cast<int8_t>(maxSig);
RounderUnion union_;
union_.fracSig = settings;
return {RND_FRACTION_SIGNIFICANT, union_, kDefaultMode};
}
IncrementRounder Rounder::constructIncrement(double increment, int32_t minFrac) {
IncrementSettings settings;
settings.fIncrement = increment;
settings.fMinFrac = minFrac;
RounderUnion union_;
union_.increment = settings;
return {RND_INCREMENT, union_, kDefaultMode};
}
CurrencyRounder Rounder::constructCurrency(UCurrencyUsage usage) {
RounderUnion union_;
union_.currencyUsage = usage;
return {RND_CURRENCY, union_, kDefaultMode};
}
Rounder Rounder::constructPassThrough() {
RounderUnion union_;
union_.errorCode = U_ZERO_ERROR; // initialize the variable
return {RND_PASS_THROUGH, union_, kDefaultMode};
}
void Rounder::setLocaleData(const CurrencyUnit &currency, UErrorCode &status) {
if (fType == RND_CURRENCY) {
*this = withCurrency(currency, status);
}
}
int32_t
Rounder::chooseMultiplierAndApply(impl::DecimalQuantity &input, const impl::MultiplierProducer &producer,
UErrorCode &status) {
// TODO: Make a better and more efficient implementation.
// TODO: Avoid the object creation here.
DecimalQuantity copy(input);
U_ASSERT(!input.isZero());
int32_t magnitude = input.getMagnitude();
int32_t multiplier = producer.getMultiplier(magnitude);
input.adjustMagnitude(multiplier);
apply(input, status);
// If the number turned to zero when rounding, do not re-attempt the rounding.
if (!input.isZero() && input.getMagnitude() == magnitude + multiplier + 1) {
magnitude += 1;
input = copy;
multiplier = producer.getMultiplier(magnitude);
input.adjustMagnitude(multiplier);
U_ASSERT(input.getMagnitude() == magnitude + multiplier - 1);
apply(input, status);
U_ASSERT(input.getMagnitude() == magnitude + multiplier);
}
return multiplier;
}
/** This is the method that contains the actual rounding logic. */
void Rounder::apply(impl::DecimalQuantity &value, UErrorCode& status) const {
switch (fType) {
case RND_BOGUS:
case RND_ERROR:
// Errors should be caught before the apply() method is called
status = U_INTERNAL_PROGRAM_ERROR;
break;
case RND_NONE:
value.roundToInfinity();
break;
case RND_FRACTION:
value.roundToMagnitude(
getRoundingMagnitudeFraction(fUnion.fracSig.fMaxFrac), fRoundingMode, status);
value.setFractionLength(
uprv_max(0, -getDisplayMagnitudeFraction(fUnion.fracSig.fMinFrac)), INT32_MAX);
break;
case RND_SIGNIFICANT:
value.roundToMagnitude(
getRoundingMagnitudeSignificant(value, fUnion.fracSig.fMaxSig),
fRoundingMode,
status);
value.setFractionLength(
uprv_max(0, -getDisplayMagnitudeSignificant(value, fUnion.fracSig.fMinSig)),
INT32_MAX);
break;
case RND_FRACTION_SIGNIFICANT: {
int32_t displayMag = getDisplayMagnitudeFraction(fUnion.fracSig.fMinFrac);
int32_t roundingMag = getRoundingMagnitudeFraction(fUnion.fracSig.fMaxFrac);
if (fUnion.fracSig.fMinSig == -1) {
// Max Sig override
int32_t candidate = getRoundingMagnitudeSignificant(value, fUnion.fracSig.fMaxSig);
roundingMag = uprv_max(roundingMag, candidate);
} else {
// Min Sig override
int32_t candidate = getDisplayMagnitudeSignificant(value, fUnion.fracSig.fMinSig);
roundingMag = uprv_min(roundingMag, candidate);
}
value.roundToMagnitude(roundingMag, fRoundingMode, status);
value.setFractionLength(uprv_max(0, -displayMag), INT32_MAX);
break;
}
case RND_INCREMENT:
value.roundToIncrement(fUnion.increment.fIncrement, fRoundingMode, status);
value.setFractionLength(fUnion.increment.fMinFrac, fUnion.increment.fMinFrac);
break;
case RND_CURRENCY:
// Call .withCurrency() before .apply()!
U_ASSERT(false);
case RND_PASS_THROUGH:
break;
}
}
void Rounder::apply(impl::DecimalQuantity &value, int32_t minInt, UErrorCode /*status*/) {
// This method is intended for the one specific purpose of helping print "00.000E0".
U_ASSERT(fType == RND_SIGNIFICANT);
U_ASSERT(value.isZero());
value.setFractionLength(fUnion.fracSig.fMinSig - minInt, INT32_MAX);
}

View File

@ -0,0 +1,136 @@
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#ifndef NUMBERFORMAT_ROUNDINGUTILS_H
#define NUMBERFORMAT_ROUNDINGUTILS_H
#include "number_types.h"
U_NAMESPACE_BEGIN
namespace number {
namespace impl {
namespace roundingutils {
enum Section {
SECTION_LOWER_EDGE = -1,
SECTION_UPPER_EDGE = -2,
SECTION_LOWER = 1,
SECTION_MIDPOINT = 2,
SECTION_UPPER = 3
};
/**
* Converts a rounding mode and metadata about the quantity being rounded to a boolean determining
* whether the value should be rounded toward infinity or toward zero.
*
* <p>The parameters are of type int because benchmarks on an x86-64 processor against OpenJDK
* showed that ints were demonstrably faster than enums in switch statements.
*
* @param isEven Whether the digit immediately before the rounding magnitude is even.
* @param isNegative Whether the quantity is negative.
* @param section Whether the part of the quantity to the right of the rounding magnitude is
* exactly halfway between two digits, whether it is in the lower part (closer to zero), or
* whether it is in the upper part (closer to infinity). See {@link #SECTION_LOWER}, {@link
* #SECTION_MIDPOINT}, and {@link #SECTION_UPPER}.
* @param roundingMode The integer version of the {@link RoundingMode}, which you can get via
* {@link RoundingMode#ordinal}.
* @param status Error code, set to U_FORMAT_INEXACT_ERROR if the rounding mode is kRoundUnnecessary.
* @return true if the number should be rounded toward zero; false if it should be rounded toward
* infinity.
*/
inline bool
getRoundingDirection(bool isEven, bool isNegative, Section section, RoundingMode roundingMode,
UErrorCode &status) {
switch (roundingMode) {
case RoundingMode::UNUM_ROUND_UP:
// round away from zero
return false;
case RoundingMode::UNUM_ROUND_DOWN:
// round toward zero
return true;
case RoundingMode::UNUM_ROUND_CEILING:
// round toward positive infinity
return isNegative;
case RoundingMode::UNUM_ROUND_FLOOR:
// round toward negative infinity
return !isNegative;
case RoundingMode::UNUM_ROUND_HALFUP:
switch (section) {
case SECTION_MIDPOINT:
return false;
case SECTION_LOWER:
return true;
case SECTION_UPPER:
return false;
default:
break;
}
break;
case RoundingMode::UNUM_ROUND_HALFDOWN:
switch (section) {
case SECTION_MIDPOINT:
return true;
case SECTION_LOWER:
return true;
case SECTION_UPPER:
return false;
default:
break;
}
break;
case RoundingMode::UNUM_ROUND_HALFEVEN:
switch (section) {
case SECTION_MIDPOINT:
return isEven;
case SECTION_LOWER:
return true;
case SECTION_UPPER:
return false;
default:
break;
}
break;
default:
break;
}
status = U_FORMAT_INEXACT_ERROR;
return false;
}
/**
* Gets whether the given rounding mode's rounding boundary is at the midpoint. The rounding
* boundary is the point at which a number switches from being rounded down to being rounded up.
* For example, with rounding mode HALF_EVEN, HALF_UP, or HALF_DOWN, the rounding boundary is at
* the midpoint, and this function would return true. However, for UP, DOWN, CEILING, and FLOOR,
* the rounding boundary is at the "edge", and this function would return false.
*
* @param roundingMode The integer version of the {@link RoundingMode}.
* @return true if rounding mode is HALF_EVEN, HALF_UP, or HALF_DOWN; false otherwise.
*/
inline bool roundsAtMidpoint(int roundingMode) {
switch (roundingMode) {
case RoundingMode::UNUM_ROUND_UP:
case RoundingMode::UNUM_ROUND_DOWN:
case RoundingMode::UNUM_ROUND_CEILING:
case RoundingMode::UNUM_ROUND_FLOOR:
return false;
default:
return true;
}
}
} // namespace roundingutils
} // namespace impl
} // namespace number
U_NAMESPACE_END
#endif //NUMBERFORMAT_ROUNDINGUTILS_H

View File

@ -0,0 +1,129 @@
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#include <cstdlib>
#include "number_scientific.h"
#include "number_utils.h"
#include "number_stringbuilder.h"
#include "unicode/unum.h"
using namespace icu::number::impl;
// NOTE: The object lifecycle of ScientificModifier and ScientificHandler differ greatly in Java and C++.
//
// During formatting, we need to provide an object with state (the exponent) as the inner modifier.
//
// In Java, where the priority is put on reducing object creations, the unsafe code path re-uses the
// ScientificHandler as a ScientificModifier, and the safe code path pre-computes 25 ScientificModifier
// instances. This scheme reduces the number of object creations by 1 in both safe and unsafe.
//
// In C++, MicroProps provides a pre-allocated ScientificModifier, and ScientificHandler simply populates
// the state (the exponent) into that ScientificModifier. There is no difference between safe and unsafe.
ScientificModifier::ScientificModifier() : fExponent(0), fHandler(nullptr) {}
void ScientificModifier::set(int32_t exponent, const ScientificHandler *handler) {
// ScientificModifier should be set only once.
U_ASSERT(fHandler == nullptr);
fExponent = exponent;
fHandler = handler;
}
int32_t ScientificModifier::apply(NumberStringBuilder &output, int32_t /*leftIndex*/, int32_t rightIndex,
UErrorCode &status) const {
// FIXME: Localized exponent separator location.
int i = rightIndex;
// Append the exponent separator and sign
i += output.insert(
i,
fHandler->fSymbols->getSymbol(DecimalFormatSymbols::ENumberFormatSymbol::kExponentialSymbol),
UNUM_EXPONENT_SYMBOL_FIELD,
status);
if (fExponent < 0 && fHandler->fSettings.fExponentSignDisplay != UNUM_SIGN_NEVER) {
i += output.insert(
i,
fHandler->fSymbols
->getSymbol(DecimalFormatSymbols::ENumberFormatSymbol::kMinusSignSymbol),
UNUM_EXPONENT_SIGN_FIELD,
status);
} else if (fExponent >= 0 && fHandler->fSettings.fExponentSignDisplay == UNUM_SIGN_ALWAYS) {
i += output.insert(
i,
fHandler->fSymbols
->getSymbol(DecimalFormatSymbols::ENumberFormatSymbol::kPlusSignSymbol),
UNUM_EXPONENT_SIGN_FIELD,
status);
}
// Append the exponent digits (using a simple inline algorithm)
int32_t disp = std::abs(fExponent);
for (int j = 0; j < fHandler->fSettings.fMinExponentDigits || disp > 0; j++, disp /= 10) {
auto d = static_cast<int8_t>(disp % 10);
const UnicodeString &digitString = getDigitFromSymbols(d, *fHandler->fSymbols);
i += output.insert(i - j, digitString, UNUM_EXPONENT_FIELD, status);
}
return i - rightIndex;
}
int32_t ScientificModifier::getPrefixLength(UErrorCode &status) const {
(void)status;
// TODO: Localized exponent separator location.
return 0;
}
int32_t ScientificModifier::getCodePointCount(UErrorCode &status) const {
(void)status;
// This method is not used for strong modifiers.
U_ASSERT(false);
return 0;
}
bool ScientificModifier::isStrong() const {
// Scientific is always strong
return true;
}
ScientificHandler::ScientificHandler(const Notation *notation, const DecimalFormatSymbols *symbols,
const MicroPropsGenerator *parent) : fSettings(
notation->fUnion.scientific), fSymbols(symbols), fParent(parent) {}
void ScientificHandler::processQuantity(DecimalQuantity &quantity, MicroProps &micros,
UErrorCode &status) const {
fParent->processQuantity(quantity, micros, status);
if (U_FAILURE(status)) { return; }
// Treat zero as if it had magnitude 0
int32_t exponent;
if (quantity.isZero()) {
if (fSettings.fRequireMinInt && micros.rounding.fType == Rounder::RND_SIGNIFICANT) {
// Show "00.000E0" on pattern "00.000E0"
micros.rounding.apply(quantity, fSettings.fEngineeringInterval, status);
exponent = 0;
} else {
micros.rounding.apply(quantity, status);
exponent = 0;
}
} else {
exponent = -micros.rounding.chooseMultiplierAndApply(quantity, *this, status);
}
// Use MicroProps's helper ScientificModifier and save it as the modInner.
ScientificModifier &mod = micros.helpers.scientificModifier;
mod.set(exponent, this);
micros.modInner = &mod;
}
int32_t ScientificHandler::getMultiplier(int32_t magnitude) const {
int32_t interval = fSettings.fEngineeringInterval;
int32_t digitsShown;
if (fSettings.fRequireMinInt) {
// For patterns like "000.00E0" and ".00E0"
digitsShown = interval;
} else if (interval <= 1) {
// For patterns like "0.00E0" and "@@@E0"
digitsShown = 1;
} else {
// For patterns like "##0.00"
digitsShown = ((magnitude % interval + interval) % interval) + 1;
}
return digitsShown - magnitude - 1;
}

View File

@ -0,0 +1,57 @@
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#ifndef NUMBERFORMAT_NUMFMTTER_SCIENTIFIC_H
#define NUMBERFORMAT_NUMFMTTER_SCIENTIFIC_H
#include "number_types.h"
U_NAMESPACE_BEGIN namespace number {
namespace impl {
// Forward-declare
class ScientificHandler;
class ScientificModifier : public UMemory, public Modifier {
public:
ScientificModifier();
void set(int32_t exponent, const ScientificHandler *handler);
int32_t apply(NumberStringBuilder &output, int32_t leftIndex, int32_t rightIndex,
UErrorCode &status) const override;
int32_t getPrefixLength(UErrorCode &status) const override;
int32_t getCodePointCount(UErrorCode &status) const override;
bool isStrong() const override;
private:
int32_t fExponent;
const ScientificHandler *fHandler;
};
class ScientificHandler : public UMemory, public MicroPropsGenerator, public MultiplierProducer {
public:
ScientificHandler(const Notation *notation, const DecimalFormatSymbols *symbols,
const MicroPropsGenerator *parent);
void
processQuantity(DecimalQuantity &quantity, MicroProps &micros, UErrorCode &status) const override;
int32_t getMultiplier(int32_t magnitude) const override;
private:
const Notation::ScientificSettings& fSettings;
const DecimalFormatSymbols *fSymbols;
const MicroPropsGenerator *fParent;
friend class ScientificModifier;
};
} // namespace impl
} // namespace number
U_NAMESPACE_END
#endif //NUMBERFORMAT_NUMFMTTER_SCIENTIFIC_H

View File

@ -0,0 +1,432 @@
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#include "number_stringbuilder.h"
#include <unicode/utf16.h>
#include <uvectr32.h>
using namespace icu::number::impl;
NumberStringBuilder::NumberStringBuilder() = default;
NumberStringBuilder::~NumberStringBuilder() {
if (fUsingHeap) {
uprv_free(fChars.heap.ptr);
uprv_free(fFields.heap.ptr);
}
}
NumberStringBuilder::NumberStringBuilder(const NumberStringBuilder &other) {
*this = other;
}
NumberStringBuilder &NumberStringBuilder::operator=(const NumberStringBuilder &other) {
// Check for self-assignment
if (this == &other) {
return *this;
}
// Continue with deallocation and copying
if (fUsingHeap) {
uprv_free(fChars.heap.ptr);
uprv_free(fFields.heap.ptr);
fUsingHeap = false;
}
int32_t capacity = other.getCapacity();
if (capacity > DEFAULT_CAPACITY) {
// FIXME: uprv_malloc
// C++ note: malloc appears in two places: here and in prepareForInsertHelper.
auto newChars = static_cast<char16_t *> (uprv_malloc(sizeof(char16_t) * capacity));
auto newFields = static_cast<Field *>(uprv_malloc(sizeof(Field) * capacity));
if (newChars == nullptr || newFields == nullptr) {
// UErrorCode is not available; fail silently.
uprv_free(newChars);
uprv_free(newFields);
*this = NumberStringBuilder(); // can't fail
return *this;
}
fUsingHeap = true;
fChars.heap.capacity = capacity;
fChars.heap.ptr = newChars;
fFields.heap.capacity = capacity;
fFields.heap.ptr = newFields;
}
uprv_memcpy(getCharPtr(), other.getCharPtr(), sizeof(char16_t) * capacity);
uprv_memcpy(getFieldPtr(), other.getFieldPtr(), sizeof(Field) * capacity);
fZero = other.fZero;
fLength = other.fLength;
return *this;
}
int32_t NumberStringBuilder::length() const {
return fLength;
}
int32_t NumberStringBuilder::codePointCount() const {
return u_countChar32(getCharPtr() + fZero, fLength);
}
UChar32 NumberStringBuilder::getFirstCodePoint() const {
if (fLength == 0) {
return -1;
}
UChar32 cp;
U16_GET(getCharPtr() + fZero, 0, 0, fLength, cp);
return cp;
}
UChar32 NumberStringBuilder::getLastCodePoint() const {
if (fLength == 0) {
return -1;
}
int32_t offset = fLength;
U16_BACK_1(getCharPtr() + fZero, 0, offset);
UChar32 cp;
U16_GET(getCharPtr() + fZero, 0, offset, fLength, cp);
return cp;
}
UChar32 NumberStringBuilder::codePointAt(int32_t index) const {
UChar32 cp;
U16_GET(getCharPtr() + fZero, 0, index, fLength, cp);
return cp;
}
UChar32 NumberStringBuilder::codePointBefore(int32_t index) const {
int32_t offset = index;
U16_BACK_1(getCharPtr() + fZero, 0, offset);
UChar32 cp;
U16_GET(getCharPtr() + fZero, 0, offset, fLength, cp);
return cp;
}
NumberStringBuilder &NumberStringBuilder::clear() {
// TODO: Reset the heap here?
fZero = getCapacity() / 2;
fLength = 0;
return *this;
}
int32_t NumberStringBuilder::appendCodePoint(UChar32 codePoint, Field field, UErrorCode &status) {
return insertCodePoint(fLength, codePoint, field, status);
}
int32_t
NumberStringBuilder::insertCodePoint(int32_t index, UChar32 codePoint, Field field, UErrorCode &status) {
int32_t count = U16_LENGTH(codePoint);
int32_t position = prepareForInsert(index, count, status);
if (U_FAILURE(status)) {
return count;
}
if (count == 1) {
getCharPtr()[position] = (char16_t) codePoint;
getFieldPtr()[position] = field;
} else {
getCharPtr()[position] = U16_LEAD(codePoint);
getCharPtr()[position + 1] = U16_TRAIL(codePoint);
getFieldPtr()[position] = getFieldPtr()[position + 1] = field;
}
return count;
}
int32_t NumberStringBuilder::append(const UnicodeString &unistr, Field field, UErrorCode &status) {
return insert(fLength, unistr, field, status);
}
int32_t NumberStringBuilder::insert(int32_t index, const UnicodeString &unistr, Field field,
UErrorCode &status) {
if (unistr.length() == 0) {
// Nothing to insert.
return 0;
} else if (unistr.length() == 1) {
// Fast path: insert using insertCodePoint.
return insertCodePoint(index, unistr.charAt(0), field, status);
} else {
return insert(index, unistr, 0, unistr.length(), field, status);
}
}
int32_t
NumberStringBuilder::insert(int32_t index, const UnicodeString &unistr, int32_t start, int32_t end,
Field field, UErrorCode &status) {
int32_t count = end - start;
int32_t position = prepareForInsert(index, count, status);
if (U_FAILURE(status)) {
return count;
}
for (int32_t i = 0; i < count; i++) {
getCharPtr()[position + i] = unistr.charAt(start + i);
getFieldPtr()[position + i] = field;
}
return count;
}
int32_t NumberStringBuilder::append(const NumberStringBuilder &other, UErrorCode &status) {
return insert(fLength, other, status);
}
int32_t
NumberStringBuilder::insert(int32_t index, const NumberStringBuilder &other, UErrorCode &status) {
if (this == &other) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
int32_t count = other.fLength;
if (count == 0) {
// Nothing to insert.
return 0;
}
int32_t position = prepareForInsert(index, count, status);
if (U_FAILURE(status)) {
return count;
}
for (int32_t i = 0; i < count; i++) {
getCharPtr()[position + i] = other.charAt(i);
getFieldPtr()[position + i] = other.fieldAt(i);
}
return count;
}
int32_t NumberStringBuilder::prepareForInsert(int32_t index, int32_t count, UErrorCode &status) {
if (index == 0 && fZero - count >= 0) {
// Append to start
fZero -= count;
fLength += count;
return fZero;
} else if (index == fLength && fZero + fLength + count < getCapacity()) {
// Append to end
fLength += count;
return fZero + fLength - count;
} else {
// Move chars around and/or allocate more space
return prepareForInsertHelper(index, count, status);
}
}
int32_t NumberStringBuilder::prepareForInsertHelper(int32_t index, int32_t count, UErrorCode &status) {
int32_t oldCapacity = getCapacity();
int32_t oldZero = fZero;
char16_t *oldChars = getCharPtr();
Field *oldFields = getFieldPtr();
if (fLength + count > oldCapacity) {
int32_t newCapacity = (fLength + count) * 2;
int32_t newZero = newCapacity / 2 - (fLength + count) / 2;
// C++ note: malloc appears in two places: here and in the assignment operator.
auto newChars = static_cast<char16_t *> (uprv_malloc(sizeof(char16_t) * newCapacity));
auto newFields = static_cast<Field *>(uprv_malloc(sizeof(Field) * newCapacity));
if (newChars == nullptr || newFields == nullptr) {
uprv_free(newChars);
uprv_free(newFields);
status = U_MEMORY_ALLOCATION_ERROR;
return -1;
}
// First copy the prefix and then the suffix, leaving room for the new chars that the
// caller wants to insert.
// C++ note: memcpy is OK because the src and dest do not overlap.
uprv_memcpy(newChars + newZero, oldChars + oldZero, sizeof(char16_t) * index);
uprv_memcpy(newChars + newZero + index + count,
oldChars + oldZero + index,
sizeof(char16_t) * (fLength - index));
uprv_memcpy(newFields + newZero, oldFields + oldZero, sizeof(Field) * index);
uprv_memcpy(newFields + newZero + index + count,
oldFields + oldZero + index,
sizeof(Field) * (fLength - index));
if (fUsingHeap) {
uprv_free(oldChars);
uprv_free(oldFields);
}
fUsingHeap = true;
fChars.heap.ptr = newChars;
fChars.heap.capacity = newCapacity;
fFields.heap.ptr = newFields;
fFields.heap.capacity = newCapacity;
fZero = newZero;
fLength += count;
} else {
int32_t newZero = oldCapacity / 2 - (fLength + count) / 2;
// C++ note: memmove is required because src and dest may overlap.
// First copy the entire string to the location of the prefix, and then move the suffix
// to make room for the new chars that the caller wants to insert.
uprv_memmove(oldChars + newZero, oldChars + oldZero, sizeof(char16_t) * fLength);
uprv_memmove(oldChars + newZero + index + count,
oldChars + newZero + index,
sizeof(char16_t) * (fLength - index));
uprv_memmove(oldFields + newZero, oldFields + oldZero, sizeof(Field) * fLength);
uprv_memmove(oldFields + newZero + index + count,
oldFields + newZero + index,
sizeof(Field) * (fLength - index));
fZero = newZero;
fLength += count;
}
return fZero + index;
}
UnicodeString NumberStringBuilder::toUnicodeString() const {
return UnicodeString(getCharPtr() + fZero, fLength);
}
UnicodeString NumberStringBuilder::toDebugString() const {
UnicodeString sb;
sb.append(u"<NumberStringBuilder [", -1);
sb.append(toUnicodeString());
sb.append(u"] [", -1);
for (int i = 0; i < fLength; i++) {
if (fieldAt(i) == UNUM_FIELD_COUNT) {
sb.append(u'n');
} else {
char16_t c;
switch (fieldAt(i)) {
case UNUM_SIGN_FIELD:
c = u'-';
break;
case UNUM_INTEGER_FIELD:
c = u'i';
break;
case UNUM_FRACTION_FIELD:
c = u'f';
break;
case UNUM_EXPONENT_FIELD:
c = u'e';
break;
case UNUM_EXPONENT_SIGN_FIELD:
c = u'+';
break;
case UNUM_EXPONENT_SYMBOL_FIELD:
c = u'E';
break;
case UNUM_DECIMAL_SEPARATOR_FIELD:
c = u'.';
break;
case UNUM_GROUPING_SEPARATOR_FIELD:
c = u',';
break;
case UNUM_PERCENT_FIELD:
c = u'%';
break;
case UNUM_PERMILL_FIELD:
c = u'';
break;
case UNUM_CURRENCY_FIELD:
c = u'$';
break;
default:
c = u'?';
break;
}
sb.append(c);
}
}
sb.append(u"]>", -1);
return sb;
}
const char16_t *NumberStringBuilder::chars() const {
return getCharPtr() + fZero;
}
bool NumberStringBuilder::contentEquals(const NumberStringBuilder &other) const {
if (fLength != other.fLength) {
return false;
}
for (int32_t i = 0; i < fLength; i++) {
if (charAt(i) != other.charAt(i) || fieldAt(i) != other.fieldAt(i)) {
return false;
}
}
return true;
}
void NumberStringBuilder::populateFieldPosition(FieldPosition &fp, int32_t offset, UErrorCode &status) const {
int32_t rawField = fp.getField();
if (rawField == FieldPosition::DONT_CARE) {
return;
}
if (rawField < 0 || rawField >= UNUM_FIELD_COUNT) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
auto field = static_cast<Field>(rawField);
bool seenStart = false;
int32_t fractionStart = -1;
for (int i = fZero; i <= fZero + fLength; i++) {
Field _field = UNUM_FIELD_COUNT;
if (i < fZero + fLength) {
_field = getFieldPtr()[i];
}
if (seenStart && field != _field) {
// Special case: GROUPING_SEPARATOR counts as an INTEGER.
if (field == UNUM_INTEGER_FIELD && _field == UNUM_GROUPING_SEPARATOR_FIELD) {
continue;
}
fp.setEndIndex(i - fZero + offset);
break;
} else if (!seenStart && field == _field) {
fp.setBeginIndex(i - fZero + offset);
seenStart = true;
}
if (_field == UNUM_INTEGER_FIELD || _field == UNUM_DECIMAL_SEPARATOR_FIELD) {
fractionStart = i - fZero + 1;
}
}
// Backwards compatibility: FRACTION needs to start after INTEGER if empty
if (field == UNUM_FRACTION_FIELD && !seenStart) {
fp.setBeginIndex(fractionStart + offset);
fp.setEndIndex(fractionStart + offset);
}
}
void NumberStringBuilder::populateFieldPositionIterator(FieldPositionIterator &fpi, UErrorCode &status) const {
// TODO: Set an initial capacity on uvec?
LocalPointer <UVector32> uvec(new UVector32(status));
if (U_FAILURE(status)) {
return;
}
Field current = UNUM_FIELD_COUNT;
int32_t currentStart = -1;
for (int32_t i = 0; i < fLength; i++) {
Field field = fieldAt(i);
if (current == UNUM_INTEGER_FIELD && field == UNUM_GROUPING_SEPARATOR_FIELD) {
// Special case: GROUPING_SEPARATOR counts as an INTEGER.
// Add the field, followed by the start index, followed by the end index to uvec.
uvec->addElement(UNUM_GROUPING_SEPARATOR_FIELD, status);
uvec->addElement(i, status);
uvec->addElement(i + 1, status);
} else if (current != field) {
if (current != UNUM_FIELD_COUNT) {
// Add the field, followed by the start index, followed by the end index to uvec.
uvec->addElement(current, status);
uvec->addElement(currentStart, status);
uvec->addElement(i, status);
}
current = field;
currentStart = i;
}
if (U_FAILURE(status)) {
return;
}
}
if (current != UNUM_FIELD_COUNT) {
// Add the field, followed by the start index, followed by the end index to uvec.
uvec->addElement(current, status);
uvec->addElement(currentStart, status);
uvec->addElement(fLength, status);
}
// Give uvec to the FieldPositionIterator, which adopts it.
fpi.setData(uvec.orphan(), status);
}

View File

@ -0,0 +1,130 @@
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#ifndef NUMBERFORMAT_NUMBERSTRINGBUILDER_H
#define NUMBERFORMAT_NUMBERSTRINGBUILDER_H
#include <cstdint>
#include <unicode/numfmt.h>
#include <unicode/ustring.h>
#include <cstring>
#include <uassert.h>
#include "number_types.h"
U_NAMESPACE_BEGIN namespace number {
namespace impl {
class NumberStringBuilder : public UMemory {
private:
static const int32_t DEFAULT_CAPACITY = 40;
template<typename T>
union ValueOrHeapArray {
T value[DEFAULT_CAPACITY];
struct {
T *ptr;
int32_t capacity;
} heap;
};
public:
NumberStringBuilder();
~NumberStringBuilder();
NumberStringBuilder(const NumberStringBuilder &other);
NumberStringBuilder &operator=(const NumberStringBuilder &other);
int32_t length() const;
int32_t codePointCount() const;
inline char16_t charAt(int32_t index) const {
U_ASSERT(index >= 0);
U_ASSERT(index < fLength);
return getCharPtr()[fZero + index];
}
inline Field fieldAt(int32_t index) const {
U_ASSERT(index >= 0);
U_ASSERT(index < fLength);
return getFieldPtr()[fZero + index];
}
UChar32 getFirstCodePoint() const;
UChar32 getLastCodePoint() const;
UChar32 codePointAt(int32_t index) const;
UChar32 codePointBefore(int32_t index) const;
NumberStringBuilder &clear();
int32_t appendCodePoint(UChar32 codePoint, Field field, UErrorCode &status);
int32_t insertCodePoint(int32_t index, UChar32 codePoint, Field field, UErrorCode &status);
int32_t append(const UnicodeString &unistr, Field field, UErrorCode &status);
int32_t insert(int32_t index, const UnicodeString &unistr, Field field, UErrorCode &status);
int32_t insert(int32_t index, const UnicodeString &unistr, int32_t start, int32_t end, Field field,
UErrorCode &status);
int32_t append(const NumberStringBuilder &other, UErrorCode &status);
int32_t insert(int32_t index, const NumberStringBuilder &other, UErrorCode &status);
UnicodeString toUnicodeString() const;
UnicodeString toDebugString() const;
const char16_t *chars() const;
bool contentEquals(const NumberStringBuilder &other) const;
void populateFieldPosition(FieldPosition &fp, int32_t offset, UErrorCode &status) const;
void populateFieldPositionIterator(FieldPositionIterator &fpi, UErrorCode &status) const;
private:
bool fUsingHeap = false;
ValueOrHeapArray<char16_t> fChars;
ValueOrHeapArray<Field> fFields;
int32_t fZero = DEFAULT_CAPACITY / 2;
int32_t fLength = 0;
inline char16_t *getCharPtr() {
return fUsingHeap ? fChars.heap.ptr : fChars.value;
}
inline const char16_t *getCharPtr() const {
return fUsingHeap ? fChars.heap.ptr : fChars.value;
}
inline Field *getFieldPtr() {
return fUsingHeap ? fFields.heap.ptr : fFields.value;
}
inline const Field *getFieldPtr() const {
return fUsingHeap ? fFields.heap.ptr : fFields.value;
}
inline int32_t getCapacity() const {
return fUsingHeap ? fChars.heap.capacity : DEFAULT_CAPACITY;
}
int32_t prepareForInsert(int32_t index, int32_t count, UErrorCode &status);
int32_t prepareForInsertHelper(int32_t index, int32_t count, UErrorCode &status);
};
} // namespace impl
} // namespace number
U_NAMESPACE_END
#endif //NUMBERFORMAT_NUMBERSTRINGBUILDER_H

View File

@ -0,0 +1,282 @@
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#ifndef NUMBERFORMAT_INTERNALS_H
#define NUMBERFORMAT_INTERNALS_H
#include <cstdint>
#include <unicode/decimfmt.h>
#include <unicode/unum.h>
#include <unicode/numsys.h>
#include "unicode/numberformatter.h"
#include <unicode/utf16.h>
#include <uassert.h>
U_NAMESPACE_BEGIN
namespace number {
namespace impl {
// Typedef several enums for brevity and for easier comparison to Java.
typedef UNumberFormatFields Field;
typedef UNumberFormatRoundingMode RoundingMode;
typedef UNumberFormatPadPosition PadPosition;
typedef UNumberCompactStyle CompactStyle;
// ICU4J Equivalent: RoundingUtils.MAX_INT_FRAC_SIG
static constexpr int32_t kMaxIntFracSig = 100;
// ICU4J Equivalent: RoundingUtils.DEFAULT_ROUNDING_MODE
static constexpr RoundingMode kDefaultMode = RoundingMode::UNUM_FOUND_HALFEVEN;
// ICU4J Equivalent: Padder.FALLBACK_PADDING_STRING
static constexpr char16_t kFallbackPaddingString[] = u" ";
// ICU4J Equivalent: NumberFormatterImpl.DEFAULT_CURRENCY
static constexpr char16_t kDefaultCurrency[] = u"XXX";
// FIXME: New error codes:
static constexpr UErrorCode U_NUMBER_DIGIT_WIDTH_OUT_OF_RANGE_ERROR = U_ILLEGAL_ARGUMENT_ERROR;
static constexpr UErrorCode U_NUMBER_PADDING_WIDTH_OUT_OF_RANGE_ERROR = U_ILLEGAL_ARGUMENT_ERROR;
// Forward declarations:
class Modifier;
class MutablePatternModifier;
class DecimalQuantity;
class NumberStringBuilder;
struct MicroProps;
enum AffixPatternType {
// Represents a literal character; the value is stored in the code point field.
TYPE_CODEPOINT = 0,
// Represents a minus sign symbol '-'.
TYPE_MINUS_SIGN = -1,
// Represents a plus sign symbol '+'.
TYPE_PLUS_SIGN = -2,
// Represents a percent sign symbol '%'.
TYPE_PERCENT = -3,
// Represents a permille sign symbol '‰'.
TYPE_PERMILLE = -4,
// Represents a single currency symbol '¤'.
TYPE_CURRENCY_SINGLE = -5,
// Represents a double currency symbol '¤¤'.
TYPE_CURRENCY_DOUBLE = -6,
// Represents a triple currency symbol '¤¤¤'.
TYPE_CURRENCY_TRIPLE = -7,
// Represents a quadruple currency symbol '¤¤¤¤'.
TYPE_CURRENCY_QUAD = -8,
// Represents a quintuple currency symbol '¤¤¤¤¤'.
TYPE_CURRENCY_QUINT = -9,
// Represents a sequence of six or more currency symbols.
TYPE_CURRENCY_OVERFLOW = -15
};
enum CompactType {
TYPE_DECIMAL,
TYPE_CURRENCY
};
// TODO: Should this be moved somewhere else, maybe where other ICU classes can use it?
class CharSequence {
public:
virtual ~CharSequence() = default;
virtual int32_t length() const = 0;
virtual char16_t charAt(int32_t index) const = 0;
virtual UChar32 codePointAt(int32_t index) const {
// Default implementation; can be overriden with a more efficient version
char16_t leading = charAt(index);
if (U16_IS_LEAD(leading) && length() > index + 1) {
char16_t trailing = charAt(index + 1);
return U16_GET_SUPPLEMENTARY(leading, trailing);
} else {
return leading;
}
}
virtual UnicodeString toUnicodeString() const = 0;
};
class AffixPatternProvider {
public:
static const int32_t AFFIX_PLURAL_MASK = 0xff;
static const int32_t AFFIX_PREFIX = 0x100;
static const int32_t AFFIX_NEGATIVE_SUBPATTERN = 0x200;
static const int32_t AFFIX_PADDING = 0x400;
virtual ~AffixPatternProvider() = default;
virtual char16_t charAt(int flags, int i) const = 0;
virtual int length(int flags) const = 0;
virtual bool hasCurrencySign() const = 0;
virtual bool positiveHasPlusSign() const = 0;
virtual bool hasNegativeSubpattern() const = 0;
virtual bool negativeHasMinusSign() const = 0;
virtual bool containsSymbolType(AffixPatternType, UErrorCode &) const = 0;
};
/**
* A Modifier is an object that can be passed through the formatting pipeline until it is finally applied to the string
* builder. A Modifier usually contains a prefix and a suffix that are applied, but it could contain something else,
* like a {@link com.ibm.icu.text.SimpleFormatter} pattern.
*
* A Modifier is usually immutable, except in cases such as {@link MurkyModifier}, which are mutable for performance
* reasons.
*/
class Modifier {
public:
virtual ~Modifier() = default;
/**
* Apply this Modifier to the string builder.
*
* @param output
* The string builder to which to apply this modifier.
* @param leftIndex
* The left index of the string within the builder. Equal to 0 when only one number is being formatted.
* @param rightIndex
* The right index of the string within the string builder. Equal to length when only one number is being
* formatted.
* @return The number of characters (UTF-16 code units) that were added to the string builder.
*/
virtual int32_t
apply(NumberStringBuilder &output, int leftIndex, int rightIndex, UErrorCode &status) const = 0;
/**
* Gets the length of the prefix. This information can be used in combination with {@link #apply} to extract the
* prefix and suffix strings.
*
* @return The number of characters (UTF-16 code units) in the prefix.
*/
virtual int32_t getPrefixLength(UErrorCode& status) const = 0;
/**
* Returns the number of code points in the modifier, prefix plus suffix.
*/
virtual int32_t getCodePointCount(UErrorCode &status) const = 0;
/**
* Whether this modifier is strong. If a modifier is strong, it should always be applied immediately and not allowed
* to bubble up. With regard to padding, strong modifiers are considered to be on the inside of the prefix and
* suffix.
*
* @return Whether the modifier is strong.
*/
virtual bool isStrong() const = 0;
};
/**
* This interface is used when all number formatting settings, including the locale, are known, except for the quantity
* itself. The {@link #processQuantity} method performs the final step in the number processing pipeline: it uses the
* quantity to generate a finalized {@link MicroProps}, which can be used to render the number to output.
*
* <p>
* In other words, this interface is used for the parts of number processing that are <em>quantity-dependent</em>.
*
* <p>
* In order to allow for multiple different objects to all mutate the same MicroProps, a "chain" of MicroPropsGenerators
* are linked together, and each one is responsible for manipulating a certain quantity-dependent part of the
* MicroProps. At the tail of the linked list is a base instance of {@link MicroProps} with properties that are not
* quantity-dependent. Each element in the linked list calls {@link #processQuantity} on its "parent", then does its
* work, and then returns the result.
*
* @author sffc
*
*/
class MicroPropsGenerator {
public:
virtual ~MicroPropsGenerator() = default;
/**
* Considers the given {@link DecimalQuantity}, optionally mutates it, and returns a {@link MicroProps}.
*
* @param quantity
* The quantity for consideration and optional mutation.
* @param micros
* The MicroProps instance to populate.
* @return A MicroProps instance resolved for the quantity.
*/
virtual void processQuantity(DecimalQuantity& quantity, MicroProps& micros, UErrorCode& status) const = 0;
};
class MultiplierProducer {
public:
virtual ~MultiplierProducer() = default;
virtual int32_t getMultiplier(int32_t magnitude) const = 0;
};
template<typename T>
class NullableValue {
public:
NullableValue() : fNull(true) {}
NullableValue(const NullableValue<T> &other) = default;
explicit NullableValue(const T &other) {
fValue = other;
fNull = false;
}
NullableValue<T> &operator=(const NullableValue<T> &other) = default;
NullableValue<T> &operator=(const T &other) {
fValue = other;
fNull = false;
return *this;
}
bool operator==(const NullableValue &other) const {
return fNull ? other.fNull : fValue == other.fValue;
}
void nullify() {
// TODO: It might be nice to call the destructor here.
fNull = true;
}
bool isNull() const {
return fNull;
}
T get(UErrorCode &status) const {
if (fNull) {
status = U_UNDEFINED_VARIABLE;
}
return fValue;
}
private:
bool fNull;
T fValue;
};
} // namespace impl
} // namespace number
U_NAMESPACE_END
#endif //NUMBERFORMAT_INTERNALS_H

View File

@ -0,0 +1,125 @@
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#ifndef NUMBERFORMAT_NUMFMTTER_UTILS_H
#define NUMBERFORMAT_NUMFMTTER_UTILS_H
#include "unicode/numberformatter.h"
#include "number_types.h"
#include "number_decimalquantity.h"
#include "number_scientific.h"
#include "number_patternstring.h"
#include "number_modifiers.h"
U_NAMESPACE_BEGIN namespace number {
namespace impl {
class UnicodeStringCharSequence : public CharSequence {
public:
explicit UnicodeStringCharSequence(const UnicodeString &other) {
fStr = other;
}
~UnicodeStringCharSequence() override = default;
int32_t length() const override {
return fStr.length();
}
char16_t charAt(int32_t index) const override {
return fStr.charAt(index);
}
UChar32 codePointAt(int32_t index) const override {
return fStr.char32At(index);
}
UnicodeString toUnicodeString() const override {
// Allocate a UnicodeString of the correct length
UnicodeString output(length(), 0, -1);
for (int32_t i = 0; i < length(); i++) {
output.append(charAt(i));
}
return output;
}
private:
UnicodeString fStr;
};
struct MicroProps : public MicroPropsGenerator {
// NOTE: All of these fields are properly initialized in NumberFormatterImpl.
Rounder rounding;
Grouper grouping;
Padder padding;
IntegerWidth integerWidth;
UNumberSignDisplay sign;
UNumberDecimalSeparatorDisplay decimal;
bool useCurrency;
// Note: This struct has no direct ownership of the following pointers.
const DecimalFormatSymbols *symbols;
const Modifier *modOuter;
const Modifier *modMiddle;
const Modifier *modInner;
// The following "helper" fields may optionally be used during the MicroPropsGenerator.
// They live here to retain memory.
struct {
ScientificModifier scientificModifier;
EmptyModifier emptyWeakModifier{false};
EmptyModifier emptyStrongModifier{true};
} helpers;
MicroProps() = default;
MicroProps(const MicroProps &other) = default;
MicroProps &operator=(const MicroProps &other) = default;
void processQuantity(DecimalQuantity &, MicroProps &micros, UErrorCode &status) const override {
(void)status;
if (this == &micros) {
// Unsafe path: no need to perform a copy.
U_ASSERT(!exhausted);
micros.exhausted = true;
U_ASSERT(exhausted);
} else {
// Safe path: copy self into the output micros.
micros = *this;
}
}
private:
// Internal fields:
bool exhausted = false;
};
/**
* This struct provides the result of the number formatting pipeline to FormattedNumber.
*
* The DecimalQuantity is not currently being used by FormattedNumber, but at some point it could be used
* to add a toDecNumber() or similar method.
*/
struct NumberFormatterResults : public UMemory {
DecimalQuantity quantity;
NumberStringBuilder string;
};
inline const UnicodeString getDigitFromSymbols(int8_t digit, const DecimalFormatSymbols &symbols) {
// TODO: Implement DecimalFormatSymbols.getCodePointZero()?
if (digit == 0) {
return symbols.getSymbol(DecimalFormatSymbols::ENumberFormatSymbol::kZeroDigitSymbol);
} else {
return symbols.getSymbol(static_cast<DecimalFormatSymbols::ENumberFormatSymbol>(
DecimalFormatSymbols::ENumberFormatSymbol::kOneDigitSymbol + digit - 1));
}
}
} // namespace impl
} // namespace number
U_NAMESPACE_END
#endif //NUMBERFORMAT_NUMFMTTER_UTILS_H

File diff suppressed because it is too large Load Diff

View File

@ -61,7 +61,11 @@ windttst.o winnmtst.o winutil.o csdetest.o tzrulets.o tzoffloc.o tzfmttst.o ssea
tufmtts.o itspoof.o simplethread.o bidiconf.o locnmtst.o dcfmtest.o alphaindextst.o listformattertest.o genderinfotest.o compactdecimalformattest.o regiontst.o \
reldatefmttest.o simpleformattertest.o measfmttest.o numfmtspectest.o unifiedcachetest.o quantityformattertest.o \
scientificnumberformattertest.o datadrivennumberformattestsuite.o \
numberformattesttuple.o numberformat2test.o pluralmaptest.o
numberformattesttuple.o numberformat2test.o pluralmaptest.o \
numbertest_affixutils.o numbertest_stringbuilder.o
# FIXME
#numbertest_api.o numbertest_decimalquantity.o \
#numbertest_modifiers.o numbertest_patternmodifier.o numbertest_patternstring.o \
DEPS = $(OBJECTS:.o=.d)

View File

@ -59,6 +59,7 @@
#include "dcfmtest.h" // DecimalFormatTest
#include "listformattertest.h" // ListFormatterTest
#include "regiontst.h" // RegionTest
#include "numbertest.h" // All NumberFormatter tests
extern IntlTest *createCompactDecimalFormatTest();
extern IntlTest *createGenderInfoTest();
@ -204,7 +205,7 @@ void IntlTestFormat::runIndexedTest( int32_t index, UBool exec, const char* &nam
callTest(*test, par);
}
break;
case 49:
case 49:
name = "ScientificNumberFormatterTest";
if (exec) {
logln("ScientificNumberFormatterTest test---");
@ -213,8 +214,8 @@ void IntlTestFormat::runIndexedTest( int32_t index, UBool exec, const char* &nam
callTest(*test, par);
}
break;
case 50:
name = "NumberFormat2Test";
case 50:
name = "NumberFormat2Test";
if (exec) {
logln("NumberFormat2Test test---");
logln((UnicodeString)"");
@ -222,6 +223,8 @@ void IntlTestFormat::runIndexedTest( int32_t index, UBool exec, const char* &nam
callTest(*test, par);
}
break;
TESTCLASS(51,AffixUtilsTest);
TESTCLASS(52,NumberStringBuilderTest);
default: name = ""; break; //needed to end loop
}
if (exec) {

View File

@ -0,0 +1,50 @@
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#pragma once
#include "number_stringbuilder.h"
#include "intltest.h"
#include "number_affixutils.h"
using namespace icu::number;
using namespace icu::number::impl;
class AffixUtilsTest : public IntlTest {
public:
void testEscape();
void testUnescape();
void testContainsReplaceType();
void testInvalid();
void testUnescapeWithSymbolProvider();
void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par = 0);
private:
UnicodeString unescapeWithDefaults(const SymbolProvider &defaultProvider, UnicodeString input,
UErrorCode &status);
};
class NumberStringBuilderTest : public IntlTest {
public:
void testInsertAppendUnicodeString();
void testInsertAppendCodePoint();
void testCopy();
void testFields();
void testUnlimitedCapacity();
void testCodePoints();
void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par = 0);
private:
void assertEqualsImpl(const UnicodeString &a, const NumberStringBuilder &b);
};

View File

@ -0,0 +1,242 @@
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#include "putilimp.h"
#include "unicode/dcfmtsym.h"
#include "numbertest.h"
#include "number_utils.h"
using namespace icu::number::impl;
class DefaultSymbolProvider : public SymbolProvider {
DecimalFormatSymbols fSymbols;
public:
DefaultSymbolProvider(UErrorCode &status) : fSymbols(Locale("ar_SA"), status) {}
virtual UnicodeString getSymbol(AffixPatternType type) const {
switch (type) {
case TYPE_MINUS_SIGN:
return u"";
case TYPE_PLUS_SIGN:
return fSymbols.getConstSymbol(DecimalFormatSymbols::ENumberFormatSymbol::kPlusSignSymbol);
case TYPE_PERCENT:
return fSymbols.getConstSymbol(DecimalFormatSymbols::ENumberFormatSymbol::kPercentSymbol);
case TYPE_PERMILLE:
return fSymbols.getConstSymbol(DecimalFormatSymbols::ENumberFormatSymbol::kPerMillSymbol);
case TYPE_CURRENCY_SINGLE:
return u"$";
case TYPE_CURRENCY_DOUBLE:
return u"XXX";
case TYPE_CURRENCY_TRIPLE:
return u"long name";
case TYPE_CURRENCY_QUAD:
return u"\uFFFD";
case TYPE_CURRENCY_QUINT:
// TODO: Add support for narrow currency symbols here.
return u"\uFFFD";
case TYPE_CURRENCY_OVERFLOW:
return u"\uFFFD";
default:
U_ASSERT(false);
return 0; // silence compiler warnings
}
}
};
void AffixUtilsTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char *) {
if (exec) {
logln("TestSuite AffixUtilsTest: ");
}
TESTCASE_AUTO_BEGIN;
TESTCASE_AUTO(testEscape);
TESTCASE_AUTO(testUnescape);
TESTCASE_AUTO(testContainsReplaceType);
TESTCASE_AUTO(testInvalid);
TESTCASE_AUTO(testUnescapeWithSymbolProvider);
TESTCASE_AUTO_END;
}
void AffixUtilsTest::testEscape() {
static const char16_t *cases[][2] = {{u"", u""},
{u"abc", u"abc"},
{u"-", u"'-'"},
{u"-!", u"'-'!"},
{u"", u""},
{u"---", u"'---'"},
{u"-%-", u"'-%-'"},
{u"'", u"''"},
{u"-'", u"'-'''"},
{u"-'-", u"'-''-'"},
{u"a-'-", u"a'-''-'"}};
for (auto &cas : cases) {
UnicodeString input(cas[0]);
UnicodeString expected(cas[1]);
UnicodeString result = AffixUtils::escape(UnicodeStringCharSequence(input));
assertEquals(input, expected, result);
}
}
void AffixUtilsTest::testUnescape() {
static struct TestCase {
const char16_t *input;
bool currency;
int32_t expectedLength;
const char16_t *output;
} cases[] = {{u"", false, 0, u""},
{u"abc", false, 3, u"abc"},
{u"-", false, 1, u""},
{u"-!", false, 2, u"!"},
{u"+", false, 1, u"\u061C+"},
{u"+!", false, 2, u"\u061C+!"},
{u"", false, 1, u"؉"},
{u"‰!", false, 2, u"؉!"},
{u"-x", false, 2, u"x"},
{u"'-'x", false, 2, u"-x"},
{u"'--''-'-x", false, 6, u"--'-x"},
{u"''", false, 1, u"'"},
{u"''''", false, 2, u"''"},
{u"''''''", false, 3, u"'''"},
{u"''x''", false, 3, u"'x'"},
{u"¤", true, 1, u"$"},
{u"¤¤", true, 2, u"XXX"},
{u"¤¤¤", true, 3, u"long name"},
{u"¤¤¤¤", true, 4, u"\uFFFD"},
{u"¤¤¤¤¤", true, 5, u"\uFFFD"},
{u"¤¤¤¤¤¤", true, 6, u"\uFFFD"},
{u"¤¤¤a¤¤¤¤", true, 8, u"long namea\uFFFD"},
{u"a¤¤¤¤b¤¤¤¤¤c", true, 12, u"a\uFFFDb\uFFFDc"},
{u"¤!", true, 2, u"$!"},
{u"¤¤!", true, 3, u"XXX!"},
{u"¤¤¤!", true, 4, u"long name!"},
{u"-¤¤", true, 3, u"XXX"},
{u"¤¤-", true, 3, u"XXX"},
{u"'¤'", false, 1, u"¤"},
{u"%", false, 1, u"٪\u061C"},
{u"'%'", false, 1, u"%"},
{u"¤'-'%", true, 3, u"$-٪\u061C"},
{u"#0#@#*#;#", false, 9, u"#0#@#*#;#"}};
UErrorCode status = U_ZERO_ERROR;
DefaultSymbolProvider defaultProvider(status);
assertSuccess("Constructing DefaultSymbolProvider", status);
for (TestCase cas : cases) {
UnicodeString input(cas.input);
UnicodeString output(cas.output);
assertEquals(input, cas.currency, AffixUtils::hasCurrencySymbols(UnicodeStringCharSequence(input), status));
assertSuccess("Spot 1", status);
assertEquals(input, cas.expectedLength, AffixUtils::estimateLength(UnicodeStringCharSequence(input), status));
assertSuccess("Spot 2", status);
UnicodeString actual = unescapeWithDefaults(defaultProvider, input, status);
assertSuccess("Spot 3", status);
assertEquals(input, output, actual);
int32_t ulength = AffixUtils::unescapedCodePointCount(UnicodeStringCharSequence(input), defaultProvider, status);
assertSuccess("Spot 4", status);
assertEquals(input, output.countChar32(), ulength);
}
}
void AffixUtilsTest::testContainsReplaceType() {
static struct TestCase {
const char16_t *input;
bool hasMinusSign;
const char16_t *output;
} cases[] = {{u"", false, u""},
{u"-", true, u"+"},
{u"-a", true, u"+a"},
{u"a-", true, u"a+"},
{u"a-b", true, u"a+b"},
{u"--", true, u"++"},
{u"x", false, u"x"}};
UErrorCode status = U_ZERO_ERROR;
for (TestCase cas : cases) {
UnicodeString input(cas.input);
bool hasMinusSign = cas.hasMinusSign;
UnicodeString output(cas.output);
assertEquals(
input, hasMinusSign, AffixUtils::containsType(UnicodeStringCharSequence(input), TYPE_MINUS_SIGN, status));
assertSuccess("Spot 1", status);
assertEquals(
input, output, AffixUtils::replaceType(UnicodeStringCharSequence(input), TYPE_MINUS_SIGN, u'+', status));
assertSuccess("Spot 2", status);
}
}
void AffixUtilsTest::testInvalid() {
static const char16_t *invalidExamples[] = {
u"'", u"x'", u"'x", u"'x''", u"''x'"};
UErrorCode status = U_ZERO_ERROR;
DefaultSymbolProvider defaultProvider(status);
assertSuccess("Constructing DefaultSymbolProvider", status);
for (const char16_t *strPtr : invalidExamples) {
UnicodeString str(strPtr);
status = U_ZERO_ERROR;
AffixUtils::hasCurrencySymbols(UnicodeStringCharSequence(str), status);
assertEquals("Should set error code spot 1", status, U_ILLEGAL_ARGUMENT_ERROR);
status = U_ZERO_ERROR;
AffixUtils::estimateLength(UnicodeStringCharSequence(str), status);
assertEquals("Should set error code spot 2", status, U_ILLEGAL_ARGUMENT_ERROR);
status = U_ZERO_ERROR;
unescapeWithDefaults(defaultProvider, str, status);
assertEquals("Should set error code spot 3", status, U_ILLEGAL_ARGUMENT_ERROR);
}
}
class NumericSymbolProvider : public SymbolProvider {
public:
virtual UnicodeString getSymbol(AffixPatternType type) const {
return Int64ToUnicodeString(type < 0 ? -type : type);
}
};
void AffixUtilsTest::testUnescapeWithSymbolProvider() {
static const char16_t* cases[][2] = {
{u"", u""},
{u"-", u"1"},
{u"'-'", u"-"},
{u"- + % ‰ ¤ ¤¤ ¤¤¤ ¤¤¤¤ ¤¤¤¤¤", u"1 2 3 4 5 6 7 8 9"},
{u"'¤¤¤¤¤¤'", u"¤¤¤¤¤¤"},
{u"¤¤¤¤¤¤", u"\uFFFD"}
};
NumericSymbolProvider provider;
UErrorCode status = U_ZERO_ERROR;
NumberStringBuilder sb;
for (auto cas : cases) {
UnicodeString input(cas[0]);
UnicodeString expected(cas[1]);
sb.clear();
AffixUtils::unescape(UnicodeStringCharSequence(input), sb, 0, provider, status);
assertSuccess("Spot 1", status);
assertEquals(input, expected, sb.toUnicodeString());
}
// Test insertion position
sb.clear();
sb.append(u"abcdefg", UNUM_FIELD_COUNT, status);
assertSuccess("Spot 2", status);
AffixUtils::unescape(UnicodeStringCharSequence(UnicodeString(u"-+%")), sb, 4, provider, status);
assertSuccess("Spot 3", status);
assertEquals(u"Symbol provider into middle", u"abcd123efg", sb.toUnicodeString());
}
UnicodeString AffixUtilsTest::unescapeWithDefaults(const SymbolProvider &defaultProvider,
UnicodeString input, UErrorCode &status) {
NumberStringBuilder nsb;
int32_t length = AffixUtils::unescape(UnicodeStringCharSequence(input), nsb, 0, defaultProvider, status);
assertEquals("Return value of unescape", nsb.length(), length);
return nsb.toUnicodeString();
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,280 @@
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#include "number_decimalquantity.h"
#include "math.h"
#include <cmath>
using namespace icu::number::impl;
class DecimalQuantityTest : public IntlTest {
public:
void testDecimalQuantityBehaviorStandalone();
void testSwitchStorage();
void testAppend();
void testConvertToAccurateDouble();
void testUseApproximateDoubleWhenAble();
void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par = 0);
private:
void assertDoubleEquals(const char *message, double a, double b);
void assertHealth(const DecimalQuantity &fq);
void assertToStringAndHealth(const DecimalQuantity &fq, const UnicodeString &expected);
void checkDoubleBehavior(double d, bool explicitRequired);
};
void DecimalQuantityTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char *) {
if (exec) {
logln("TestSuite DecimalQuantityTest: ");
}
TESTCASE_AUTO_BEGIN;
TESTCASE_AUTO(testDecimalQuantityBehaviorStandalone);
TESTCASE_AUTO(testSwitchStorage);
TESTCASE_AUTO(testAppend);
TESTCASE_AUTO(testConvertToAccurateDouble);
TESTCASE_AUTO(testUseApproximateDoubleWhenAble);
TESTCASE_AUTO_END;
}
void DecimalQuantityTest::assertDoubleEquals(const char *message, double a, double b) {
if (a == b) {
return;
}
double diff = a - b;
diff = diff < 0 ? -diff : diff;
double bound = a < 0 ? -a * 1e-6 : a * 1e-6;
if (diff > bound) {
errln(message);
}
}
void DecimalQuantityTest::assertHealth(const DecimalQuantity &fq) {
UnicodeString health = fq.checkHealth();
if (!health.isBogus()) {
errln(UnicodeString("HEALTH FAILURE: ") + fq.toString());
}
}
void
DecimalQuantityTest::assertToStringAndHealth(const DecimalQuantity &fq, const UnicodeString &expected) {
UnicodeString actual = fq.toString();
assertEquals("DecimalQuantity toString failed", expected, actual);
assertHealth(fq);
}
void DecimalQuantityTest::checkDoubleBehavior(double d, bool explicitRequired) {
DecimalQuantity fq;
fq.setToDouble(d);
if (explicitRequired) {
assertTrue("Should be using approximate double", !fq.isExplicitExactDouble());
}
assertDoubleEquals("Initial construction from hard double", d, fq.toDouble());
fq.roundToInfinity();
if (explicitRequired) {
assertTrue("Should not be using approximate double", fq.isExplicitExactDouble());
}
assertDoubleEquals("After conversion to exact BCD (double)", d, fq.toDouble());
}
void DecimalQuantityTest::testDecimalQuantityBehaviorStandalone() {
UErrorCode status = U_ZERO_ERROR;
DecimalQuantity fq;
assertToStringAndHealth(fq, u"<DecimalQuantity 999:0:0:-999 long 0E0>");
fq.setToInt(51423);
assertToStringAndHealth(fq, "<DecimalQuantity 999:0:0:-999 long 51423E0>");
fq.adjustMagnitude(-3);
assertToStringAndHealth(fq, "<DecimalQuantity 999:0:0:-999 long 51423E-3>");
fq.setToLong(999999999999000L);
assertToStringAndHealth(fq, "<DecimalQuantity 999:0:0:-999 long 999999999999E3>");
fq.setIntegerLength(2, 5);
assertToStringAndHealth(fq, "<DecimalQuantity 5:2:0:-999 long 999999999999E3>");
fq.setFractionLength(3, 6);
assertToStringAndHealth(fq, "<DecimalQuantity 5:2:-3:-6 long 999999999999E3>");
fq.setToDouble(987.654321);
assertToStringAndHealth(fq, "<DecimalQuantity 5:2:-3:-6 long 987654321E-6>");
fq.roundToInfinity();
assertToStringAndHealth(fq, "<DecimalQuantity 5:2:-3:-6 long 987654321E-6>");
fq.roundToIncrement(0.005, RoundingMode::kRoundHalfEven, status);
assertSuccess("Rounding to increment", status);
assertToStringAndHealth(fq, "<DecimalQuantity 5:2:-3:-6 long 987655E-3>");
fq.roundToMagnitude(-2, RoundingMode::kRoundHalfEven, status);
assertSuccess("Rounding to magnitude", status);
assertToStringAndHealth(fq, "<DecimalQuantity 5:2:-3:-6 long 98766E-2>");
}
void DecimalQuantityTest::testSwitchStorage() {
UErrorCode status = U_ZERO_ERROR;
DecimalQuantity fq;
fq.setToLong(1234123412341234L);
assertFalse("Should not be using byte array", fq.isUsingBytes());
assertEquals("Failed on initialize", "1234123412341234E0", fq.toNumberString());
assertHealth(fq);
// Long -> Bytes
fq.appendDigit(5, 0, true);
assertTrue("Should be using byte array", fq.isUsingBytes());
assertEquals("Failed on multiply", "12341234123412345E0", fq.toNumberString());
assertHealth(fq);
// Bytes -> Long
fq.roundToMagnitude(5, RoundingMode::kRoundHalfEven, status);
assertSuccess("Rounding to magnitude", status);
assertFalse("Should not be using byte array", fq.isUsingBytes());
assertEquals("Failed on round", "123412341234E5", fq.toNumberString());
assertHealth(fq);
}
void DecimalQuantityTest::testAppend() {
DecimalQuantity fq;
fq.appendDigit(1, 0, true);
assertEquals("Failed on append", "1E0", fq.toNumberString());
assertHealth(fq);
fq.appendDigit(2, 0, true);
assertEquals("Failed on append", "12E0", fq.toNumberString());
assertHealth(fq);
fq.appendDigit(3, 1, true);
assertEquals("Failed on append", "1203E0", fq.toNumberString());
assertHealth(fq);
fq.appendDigit(0, 1, true);
assertEquals("Failed on append", "1203E2", fq.toNumberString());
assertHealth(fq);
fq.appendDigit(4, 0, true);
assertEquals("Failed on append", "1203004E0", fq.toNumberString());
assertHealth(fq);
fq.appendDigit(0, 0, true);
assertEquals("Failed on append", "1203004E1", fq.toNumberString());
assertHealth(fq);
fq.appendDigit(5, 0, false);
assertEquals("Failed on append", "120300405E-1", fq.toNumberString());
assertHealth(fq);
fq.appendDigit(6, 0, false);
assertEquals("Failed on append", "1203004056E-2", fq.toNumberString());
assertHealth(fq);
fq.appendDigit(7, 3, false);
assertEquals("Failed on append", "12030040560007E-6", fq.toNumberString());
assertHealth(fq);
UnicodeString baseExpected("12030040560007");
for (int i = 0; i < 10; i++) {
fq.appendDigit(8, 0, false);
baseExpected.append('8');
UnicodeString expected(baseExpected);
expected.append("E-");
if (i >= 3) {
expected.append('1');
}
expected.append(((7 + i) % 10) + '0');
assertEquals("Failed on append", expected, fq.toNumberString());
assertHealth(fq);
}
fq.appendDigit(9, 2, false);
baseExpected.append("009");
UnicodeString expected(baseExpected);
expected.append("E-19");
assertEquals("Failed on append", expected, fq.toNumberString());
assertHealth(fq);
}
void DecimalQuantityTest::testConvertToAccurateDouble() {
// based on https://github.com/google/double-conversion/issues/28
static double hardDoubles[] = {
1651087494906221570.0,
-5074790912492772E-327,
83602530019752571E-327,
2.207817077636718750000000000000,
1.818351745605468750000000000000,
3.941719055175781250000000000000,
3.738609313964843750000000000000,
3.967735290527343750000000000000,
1.328025817871093750000000000000,
3.920967102050781250000000000000,
1.015235900878906250000000000000,
1.335227966308593750000000000000,
1.344520568847656250000000000000,
2.879127502441406250000000000000,
3.695838928222656250000000000000,
1.845344543457031250000000000000,
3.793952941894531250000000000000,
3.211402893066406250000000000000,
2.565971374511718750000000000000,
0.965156555175781250000000000000,
2.700004577636718750000000000000,
0.767097473144531250000000000000,
1.780448913574218750000000000000,
2.624839782714843750000000000000,
1.305290222167968750000000000000,
3.834922790527343750000000000000,};
static double integerDoubles[] = {
51423,
51423e10,
4.503599627370496E15,
6.789512076111555E15,
9.007199254740991E15,
9.007199254740992E15};
for (double d : hardDoubles) {
checkDoubleBehavior(d, true);
}
for (double d : integerDoubles) {
checkDoubleBehavior(d, false);
}
assertDoubleEquals("NaN check failed", NAN, DecimalQuantity().setToDouble(NAN).toDouble());
assertDoubleEquals(
"Inf check failed", INFINITY, DecimalQuantity().setToDouble(INFINITY).toDouble());
assertDoubleEquals(
"-Inf check failed", -INFINITY, DecimalQuantity().setToDouble(-INFINITY).toDouble());
// Generate random doubles
for (int32_t i = 0; i < 1000000; i++) {
uint8_t bytes[8];
for (int32_t j = 0; j < 8; j++) {
bytes[j] = static_cast<uint8_t>(rand() % 256);
}
double d;
uprv_memcpy(&d, bytes, 8);
if (std::isnan(d) || !std::isfinite(d)) { continue; }
checkDoubleBehavior(d, false);
}
}
void DecimalQuantityTest::testUseApproximateDoubleWhenAble() {
struct TestCase {
double d;
int32_t maxFrac;
RoundingMode roundingMode;
bool usesExact;
} cases[] = {{1.2345678, 1, RoundingMode::kRoundHalfEven, false},
{1.2345678, 7, RoundingMode::kRoundHalfEven, false},
{1.2345678, 12, RoundingMode::kRoundHalfEven, false},
{1.2345678, 13, RoundingMode::kRoundHalfEven, true},
{1.235, 1, RoundingMode::kRoundHalfEven, false},
{1.235, 2, RoundingMode::kRoundHalfEven, true},
{1.235, 3, RoundingMode::kRoundHalfEven, false},
{1.000000000000001, 0, RoundingMode::kRoundHalfEven, false},
{1.000000000000001, 0, RoundingMode::kRoundCeiling, true},
{1.235, 1, RoundingMode::kRoundCeiling, false},
{1.235, 2, RoundingMode::kRoundCeiling, false},
{1.235, 3, RoundingMode::kRoundCeiling, true}};
UErrorCode status = U_ZERO_ERROR;
for (TestCase cas : cases) {
DecimalQuantity fq;
fq.setToDouble(cas.d);
assertTrue("Should be using approximate double", !fq.isExplicitExactDouble());
fq.roundToMagnitude(-cas.maxFrac, cas.roundingMode, status);
assertSuccess("Rounding to magnitude", status);
if (cas.usesExact != fq.isExplicitExactDouble()) {
errln(UnicodeString(u"Using approximate double after rounding: ") + fq.toString());
}
}
}

View File

@ -0,0 +1,197 @@
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#include <putilimp.h>
#include "intltest.h"
#include "number_stringbuilder.h"
#include "number_modifiers.h"
using namespace icu::number::impl;
class ModifiersTest : public IntlTest {
public:
void testConstantAffixModifier();
void testConstantMultiFieldModifier();
void testSimpleModifier();
void testCurrencySpacingEnabledModifier();
void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par = 0);
private:
void assertModifierEquals(const Modifier &mod, int32_t expectedPrefixLength, bool expectedStrong,
UnicodeString expectedChars, UnicodeString expectedFields,
UErrorCode &status);
void assertModifierEquals(const Modifier &mod, NumberStringBuilder &sb, int32_t expectedPrefixLength,
bool expectedStrong, UnicodeString expectedChars,
UnicodeString expectedFields, UErrorCode &status);
};
void ModifiersTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char *) {
if (exec) {
logln("TestSuite ModifiersTest: ");
}
TESTCASE_AUTO_BEGIN;
TESTCASE_AUTO(testConstantAffixModifier);
TESTCASE_AUTO(testConstantMultiFieldModifier);
TESTCASE_AUTO(testSimpleModifier);
TESTCASE_AUTO(testCurrencySpacingEnabledModifier);
TESTCASE_AUTO_END;
}
void ModifiersTest::testConstantAffixModifier() {
UErrorCode status = U_ZERO_ERROR;
ConstantAffixModifier mod0(u"", u"", UNUM_PERCENT_FIELD, true);
assertModifierEquals(mod0, 0, true, u"|", u"n", status);
assertSuccess("Spot 1", status);
ConstantAffixModifier mod1(u"a📻", u"b", UNUM_PERCENT_FIELD, true);
assertModifierEquals(mod1, 3, true, u"a📻|b", u"%%%n%", status);
assertSuccess("Spot 2", status);
}
void ModifiersTest::testConstantMultiFieldModifier() {
UErrorCode status = U_ZERO_ERROR;
NumberStringBuilder prefix;
NumberStringBuilder suffix;
ConstantMultiFieldModifier mod1(prefix, suffix, true);
assertModifierEquals(mod1, 0, true, u"|", u"n", status);
assertSuccess("Spot 1", status);
prefix.append(u"a📻", UNUM_PERCENT_FIELD, status);
suffix.append(u"b", UNUM_CURRENCY_FIELD, status);
ConstantMultiFieldModifier mod2(prefix, suffix, true);
assertModifierEquals(mod2, 3, true, u"a📻|b", u"%%%n$", status);
assertSuccess("Spot 2", status);
// Make sure the first modifier is still the same (that it stayed constant)
assertModifierEquals(mod1, 0, true, u"|", u"n", status);
assertSuccess("Spot 3", status);
}
void ModifiersTest::testSimpleModifier() {
static const int32_t NUM_CASES = 5;
static const int32_t NUM_OUTPUTS = 4;
static const char16_t *patterns[] = {u"{0}", u"X{0}Y", u"XX{0}YYY", u"{0}YY", u"XX📺XX{0}"};
static const struct {
const char16_t *baseString;
int32_t leftIndex;
int32_t rightIndex;
} outputs[NUM_OUTPUTS] = {{u"", 0, 0}, {u"a📻bcde", 0, 0}, {u"a📻bcde", 4, 4}, {u"a📻bcde", 3, 5}};
static const int32_t prefixLens[] = {0, 1, 2, 0, 6};
static const char16_t *expectedCharFields[][2] = {{u"|", u"n"},
{u"X|Y", u"%n%"},
{u"XX|YYY", u"%%n%%%"},
{u"|YY", u"n%%"},
{u"XX📺XX|", u"%%%%%%n"}};
static const char16_t *expecteds[][NUM_CASES] = // force auto-format line break
{{
u"", u"XY", u"XXYYY", u"YY", u"XX📺XX"}, {
u"a📻bcde", u"XYa📻bcde", u"XXYYYa📻bcde", u"YYa📻bcde", u"XX📺XXa📻bcde"}, {
u"a📻bcde", u"a📻bXYcde", u"a📻bXXYYYcde", u"a📻bYYcde", u"a📻bXX📺XXcde"}, {
u"a📻bcde", u"a📻XbcYde", u"a📻XXbcYYYde", u"a📻bcYYde", u"a📻XX📺XXbcde"}};
UErrorCode status = U_ZERO_ERROR;
for (int32_t i = 0; i < NUM_CASES; i++) {
const UnicodeString pattern(patterns[i]);
SimpleFormatter compiledFormatter(pattern, 1, 1, status);
assertSuccess("Spot 1", status);
SimpleModifier mod(compiledFormatter, UNUM_PERCENT_FIELD, false);
assertModifierEquals(
mod, prefixLens[i], false, expectedCharFields[i][0], expectedCharFields[i][1], status);
assertSuccess("Spot 2", status);
// Test strange insertion positions
for (int32_t j = 0; j < NUM_OUTPUTS; j++) {
NumberStringBuilder output;
output.append(outputs[j].baseString, UNUM_FIELD_COUNT, status);
mod.apply(output, outputs[j].leftIndex, outputs[j].rightIndex, status);
UnicodeString expected = expecteds[j][i];
UnicodeString actual = output.toUnicodeString();
assertEquals("Strange insertion position", expected, actual);
assertSuccess("Spot 3", status);
}
}
}
void ModifiersTest::testCurrencySpacingEnabledModifier() {
UErrorCode status = U_ZERO_ERROR;
DecimalFormatSymbols symbols(Locale("en"), status);
assertSuccess("Spot 1", status);
NumberStringBuilder prefix;
NumberStringBuilder suffix;
CurrencySpacingEnabledModifier mod1(prefix, suffix, true, symbols, status);
assertSuccess("Spot 2", status);
assertModifierEquals(mod1, 0, true, u"|", u"n", status);
assertSuccess("Spot 3", status);
prefix.append(u"USD", UNUM_CURRENCY_FIELD, status);
assertSuccess("Spot 4", status);
CurrencySpacingEnabledModifier mod2(prefix, suffix, true, symbols, status);
assertSuccess("Spot 5", status);
assertModifierEquals(mod2, 3, true, u"USD|", u"$$$n", status);
assertSuccess("Spot 6", status);
// Test the default currency spacing rules
NumberStringBuilder sb;
sb.append("123", UNUM_INTEGER_FIELD, status);
assertSuccess("Spot 7", status);
NumberStringBuilder sb1(sb);
assertModifierEquals(mod2, sb1, 3, true, u"USD\u00A0123", u"$$$niii", status);
assertSuccess("Spot 8", status);
// Compare with the unsafe code path
NumberStringBuilder sb2(sb);
sb2.insert(0, "USD", UNUM_CURRENCY_FIELD, status);
assertSuccess("Spot 9", status);
CurrencySpacingEnabledModifier::applyCurrencySpacing(sb2, 0, 3, 6, 0, symbols, status);
assertSuccess("Spot 10", status);
assertTrue(sb1.toDebugString() + " vs " + sb2.toDebugString(), sb1.contentEquals(sb2));
// Test custom patterns
// The following line means that the last char of the number should be a | (rather than a digit)
symbols.setPatternForCurrencySpacing(UNUM_CURRENCY_SURROUNDING_MATCH, true, u"[|]");
suffix.append("XYZ", UNUM_CURRENCY_FIELD, status);
assertSuccess("Spot 11", status);
CurrencySpacingEnabledModifier mod3(prefix, suffix, true, symbols, status);
assertSuccess("Spot 12", status);
assertModifierEquals(mod3, 3, true, u"USD|\u00A0XYZ", u"$$$nn$$$", status);
assertSuccess("Spot 13", status);
}
void ModifiersTest::assertModifierEquals(const Modifier &mod, int32_t expectedPrefixLength,
bool expectedStrong, UnicodeString expectedChars,
UnicodeString expectedFields, UErrorCode &status) {
NumberStringBuilder sb;
sb.appendCodePoint('|', UNUM_FIELD_COUNT, status);
assertModifierEquals(
mod, sb, expectedPrefixLength, expectedStrong, expectedChars, expectedFields, status);
}
void ModifiersTest::assertModifierEquals(const Modifier &mod, NumberStringBuilder &sb,
int32_t expectedPrefixLength, bool expectedStrong,
UnicodeString expectedChars, UnicodeString expectedFields,
UErrorCode &status) {
int32_t oldCount = sb.codePointCount();
mod.apply(sb, 0, sb.length(), status);
assertEquals("Prefix length", expectedPrefixLength, mod.getPrefixLength(status));
assertEquals("Strong", expectedStrong, mod.isStrong());
if (dynamic_cast<const CurrencySpacingEnabledModifier*>(&mod) == nullptr) {
// i.e., if mod is not a CurrencySpacingEnabledModifier
assertEquals("Code point count equals actual code point count",
sb.codePointCount() - oldCount, mod.getCodePointCount(status));
}
UnicodeString debugString;
debugString.append(u"<NumberStringBuilder [");
debugString.append(expectedChars);
debugString.append(u"] [");
debugString.append(expectedFields);
debugString.append(u"]>");
assertEquals("Debug string", debugString, sb.toDebugString());
}

View File

@ -0,0 +1,119 @@
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#include "intltest.h"
class PatternStringTest : public IntlTest {
public:
testBasic();
testMutableEqualsImmutable();
private:
static UnicodeString getPrefix(const MutablePatternModifier &mod, UErrorCode &status);
static UnicodeString getSuffix(const MutablePatternModifier &mod, UErrorCode &status);
};
void PatternStringTest::testBasic() {
UErrorCode status = U_ZERO_ERROR;
MutablePatternModifier mod(false);
ParsedPatternInfo patternInfo;
PatternParser::parseToPatternInfo(u"a0b", patternInfo, status);
assertSuccess("Spot 1", status);
mod.setPatternInfo(&patternInfo);
mod.setPatternAttributes(UNUM_SIGN_AUTO, false);
DecimalFormatSymbols symbols(Locale::getEnglish(), status);
CurrencyUnit currency(u"USD", status);
assertSuccess("Spot 2", status);
mod.setSymbols(&symbols, currency, UNUM_UNIT_WIDTH_SHORT, nullptr);
mod.setNumberProperties(false, StandardPlural::Form::COUNT);
assertEquals("Pattern a0b", u"a", getPrefix(mod, status));
assertEquals("Pattern a0b", u"b", getSuffix(mod, status));
mod.setPatternAttributes(UNUM_SIGN_ALWAYS, false);
assertEquals("Pattern a0b", u"+a", getPrefix(mod, status));
assertEquals("Pattern a0b", u"b", getSuffix(mod, status));
mod.setNumberProperties(true, StandardPlural::Form::COUNT);
assertEquals("Pattern a0b", u"-a", getPrefix(mod, status));
assertEquals("Pattern a0b", u"b", getSuffix(mod, status));
mod.setPatternAttributes(UNUM_SIGN_NEVER, false);
assertEquals("Pattern a0b", u"a", getPrefix(mod, status));
assertEquals("Pattern a0b", u"b", getSuffix(mod, status));
assertSuccess("Spot 3", status);
ParsedPatternInfo patternInfo2;
PatternParser::parseToPatternInfo(u"a0b;c-0d", patternInfo2, status);
assertSuccess("Spot 4", status);
mod.setPatternInfo(&patternInfo2);
mod.setPatternAttributes(UNUM_SIGN_AUTO, false);
mod.setNumberProperties(false, StandardPlural::Form::COUNT);
assertEquals("Pattern a0b;c-0d", u"a", getPrefix(mod, status));
assertEquals("Pattern a0b;c-0d", u"b", getSuffix(mod, status));
mod.setPatternAttributes(UNUM_SIGN_ALWAYS, false);
assertEquals("Pattern a0b;c-0d", u"c+", getPrefix(mod, status));
assertEquals("Pattern a0b;c-0d", u"d", getSuffix(mod, status));
mod.setNumberProperties(true, StandardPlural::Form::COUNT);
assertEquals("Pattern a0b;c-0d", u"c-", getPrefix(mod, status));
assertEquals("Pattern a0b;c-0d", u"d", getSuffix(mod, status));
mod.setPatternAttributes(UNUM_SIGN_NEVER, false);
assertEquals(
"Pattern a0b;c-0d",
u"c-",
getPrefix(mod, status)); // TODO: What should this behavior be?
assertEquals("Pattern a0b;c-0d", u"d", getSuffix(mod, status));
assertSuccess("Spot 5", status);
}
void PatternStringTest::testMutableEqualsImmutable() {
UErrorCode status = U_ZERO_ERROR;
MutablePatternModifier mod(false);
ParsedPatternInfo patternInfo;
PatternParser::parseToPatternInfo("a0b;c-0d", patternInfo, status);
assertSuccess("Spot 1", status);
mod.setPatternInfo(&patternInfo);
mod.setPatternAttributes(UNUM_SIGN_AUTO, false);
DecimalFormatSymbols symbols(Locale::getEnglish(), status);
CurrencyUnit currency(u"USD", status);
assertSuccess("Spot 2", status);
mod.setSymbols(&symbols, currency, UNUM_UNIT_WIDTH_SHORT, nullptr);
DecimalQuantity fq;
fq.setToInt(1);
NumberStringBuilder nsb1;
MicroProps micros1;
mod.addToChain(&micros1);
mod.processQuantity(fq, micros1, status);
micros1.modMiddle->apply(nsb1, 0, 0, status);
assertSuccess("Spot 3", status);
NumberStringBuilder nsb2;
MicroProps micros2;
ImmutablePatternModifier *immutable = mod.createImmutable(status);
immutable->applyToMicros(micros2, fq);
micros2.modMiddle->apply(nsb2, 0, 0, status);
assertSuccess("Spot 4", status);
NumberStringBuilder nsb3;
MicroProps micros3;
mod.addToChain(&micros3);
mod.setPatternAttributes(UNUM_SIGN_ALWAYS, false);
mod.processQuantity(fq, micros3, status);
micros3.modMiddle->apply(nsb3, 0, 0, status);
assertSuccess("Spot 5", status);
assertTrue(nsb1.toUnicodeString() + " vs " + nsb2.toUnicodeString(), nsb1.contentEquals(nsb2));
assertFalse(nsb1.toUnicodeString() + " vs " + nsb3.toUnicodeString(), nsb1.contentEquals(nsb3));
}
static UnicodeString PatternStringTest::getPrefix(const MutablePatternModifier &mod, UErrorCode &status) {
NumberStringBuilder nsb;
mod.apply(nsb, 0, 0, status);
int32_t prefixLength = mod.getPrefixLength(status);
return UnicodeString(nsb.toUnicodeString(), 0, prefixLength);
}
static UnicodeString PatternStringTest::getSuffix(const MutablePatternModifier &mod, UErrorCode &status) {
NumberStringBuilder nsb;
mod.apply(nsb, 0, 0, status);
int32_t prefixLength = mod.getPrefixLength(status);
return UnicodeString(nsb.toUnicodeString(), prefixLength, nsb.length() - prefixLength);
}

View File

@ -0,0 +1,74 @@
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
void testToPatternSimple() {
const char16_t *cases[][2] = {{u"#", u"0"},
{u"0", u"0"},
{u"#0", u"0"},
{u"###", u"0"},
{u"0.##", u"0.##"},
{u"0.00", u"0.00"},
{u"0.00#", u"0.00#"},
{u"#E0", u"#E0"},
{u"0E0", u"0E0"},
{u"#00E00", u"#00E00"},
{u"#,##0", u"#,##0"},
{u"#;#", u"0;0"},
// ignore a negative prefix pattern of '-' since that is the default:
{u"#;-#", u"0"},
{u"**##0", u"**##0"},
{u"*'x'##0", u"*x##0"},
{u"a''b0", u"a''b0"},
{u"*''##0", u"*''##0"},
{u"*📺##0", u"*'📺'##0"},
{u"*'நி'##0", u"*'நி'##0"},};
UErrorCode status = U_ZERO_ERROR;
for (const char16_t **cas : cases) {
UnicodeString input(cas[0]);
UnicodeString output(cas[1]);
DecimalFormatProperties properties = PatternParser::parseToProperties(
input, PatternParser::IGNORE_ROUNDING_NEVER, status);
assertSuccess(input, status);
UnicodeString actual = PatternStringUtils::propertiesToPatternString(properties, status);
assertEquals(input, output, actual);
}
}
void testExceptionOnInvalid() {
static const char16_t *invalidPatterns[] = {
u"#.#.#",
u"0#",
u"0#.",
u".#0",
u"0#.#0",
u"@0",
u"0@",
u"0,",
u"0,,",
u"0,,0",
u"0,,0,",
u"#,##0E0"};
for (auto pattern : invalidPatterns) {
UErrorCode status = U_ZERO_ERROR;
ParsedPatternInfo patternInfo;
PatternParser::parseToPatternInfo(pattern, patternInfo, status);
assertTrue(pattern, U_FAILURE(status));
}
}
void testBug13117() {
UErrorCode status = U_ZERO_ERROR;
DecimalFormatProperties expected = PatternParser::parseToProperties(
u"0",
PatternParser::IGNORE_ROUNDING_NEVER,
status);
DecimalFormatProperties actual = PatternParser::parseToProperties(
u"0;",
PatternParser::IGNORE_ROUNDING_NEVER,
status);
assertSuccess("Spot 1", status);
assertTrue("Should not consume negative subpattern", expected == actual);
}

View File

@ -0,0 +1,231 @@
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#include "putilimp.h"
#include "numbertest.h"
static const char16_t *EXAMPLE_STRINGS[] = {
u"",
u"xyz",
u"The quick brown fox jumps over the lazy dog",
u"😁",
u"mixed 😇 and ASCII",
u"with combining characters like 🇦🇧🇨🇩",
u"A very very very very very very very very very very long string to force heap"};
void NumberStringBuilderTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char *) {
if (exec) {
logln("TestSuite NumberStringBuilderTest: ");
}
TESTCASE_AUTO_BEGIN;
TESTCASE_AUTO(testInsertAppendUnicodeString);
TESTCASE_AUTO(testInsertAppendCodePoint);
TESTCASE_AUTO(testCopy);
TESTCASE_AUTO(testFields);
TESTCASE_AUTO(testUnlimitedCapacity);
TESTCASE_AUTO(testCodePoints);
TESTCASE_AUTO_END;
}
void NumberStringBuilderTest::testInsertAppendUnicodeString() {
UErrorCode status = U_ZERO_ERROR;
UnicodeString sb1;
NumberStringBuilder sb2;
for (const char16_t* strPtr : EXAMPLE_STRINGS) {
UnicodeString str(strPtr);
NumberStringBuilder sb3;
sb1.append(str);
// Note: UNUM_FIELD_COUNT is like passing null in Java
sb2.append(str, UNUM_FIELD_COUNT, status);
assertSuccess("Appending to sb2", status);
sb3.append(str, UNUM_FIELD_COUNT, status);
assertSuccess("Appending to sb3", status);
assertEqualsImpl(sb1, sb2);
assertEqualsImpl(str, sb3);
UnicodeString sb4;
NumberStringBuilder sb5;
sb4.append(u"😇");
sb4.append(str);
sb4.append(u"xx");
sb5.append(u"😇xx", UNUM_FIELD_COUNT, status);
assertSuccess("Appending to sb5", status);
sb5.insert(2, str, UNUM_FIELD_COUNT, status);
assertSuccess("Inserting into sb5", status);
assertEqualsImpl(sb4, sb5);
int start = uprv_min(1, str.length());
int end = uprv_min(10, str.length());
sb4.insert(3, str, start, end - start); // UnicodeString uses length instead of end index
sb5.insert(3, str, start, end, UNUM_FIELD_COUNT, status);
assertSuccess("Inserting into sb5 again", status);
assertEqualsImpl(sb4, sb5);
UnicodeString sb4cp(sb4);
NumberStringBuilder sb5cp(sb5);
sb4.append(sb4cp);
sb5.append(sb5cp, status);
assertSuccess("Appending again to sb5", status);
assertEqualsImpl(sb4, sb5);
}
}
void NumberStringBuilderTest::testInsertAppendCodePoint() {
static const UChar32 cases[] = {
0, 1, 60, 127, 128, 0x7fff, 0x8000, 0xffff, 0x10000, 0x1f000, 0x10ffff};
UErrorCode status = U_ZERO_ERROR;
UnicodeString sb1;
NumberStringBuilder sb2;
for (UChar32 cas : cases) {
NumberStringBuilder sb3;
sb1.append(cas);
sb2.appendCodePoint(cas, UNUM_FIELD_COUNT, status);
assertSuccess("Appending to sb2", status);
sb3.appendCodePoint(cas, UNUM_FIELD_COUNT, status);
assertSuccess("Appending to sb3", status);
assertEqualsImpl(sb1, sb2);
assertEquals("Length of sb3", U16_LENGTH(cas), sb3.length());
assertEquals("Code point count of sb3", 1, sb3.codePointCount());
assertEquals(
"First code unit in sb3",
!U_IS_SUPPLEMENTARY(cas) ? (char16_t) cas : U16_LEAD(cas),
sb3.charAt(0));
UnicodeString sb4;
NumberStringBuilder sb5;
sb4.append(u"😇xx");
sb4.insert(2, cas);
sb5.append(u"😇xx", UNUM_FIELD_COUNT, status);
assertSuccess("Appending to sb5", status);
sb5.insertCodePoint(2, cas, UNUM_FIELD_COUNT, status);
assertSuccess("Inserting into sb5", status);
assertEqualsImpl(sb4, sb5);
}
}
void NumberStringBuilderTest::testCopy() {
UErrorCode status = U_ZERO_ERROR;
for (UnicodeString str : EXAMPLE_STRINGS) {
NumberStringBuilder sb1;
sb1.append(str, UNUM_FIELD_COUNT, status);
assertSuccess("Appending to sb1 first time", status);
NumberStringBuilder sb2(sb1);
assertTrue("Content should equal itself", sb1.contentEquals(sb2));
sb1.append("12345", UNUM_FIELD_COUNT, status);
assertSuccess("Appending to sb1 second time", status);
assertFalse("Content should no longer equal itself", sb1.contentEquals(sb2));
}
}
void NumberStringBuilderTest::testFields() {
UErrorCode status = U_ZERO_ERROR;
// Note: This is a C++11 for loop that calls the UnicodeString constructor on each iteration.
for (UnicodeString str : EXAMPLE_STRINGS) {
NumberStringBuilder sb;
sb.append(str, UNUM_FIELD_COUNT, status);
assertSuccess("Appending to sb", status);
sb.append(str, UNUM_CURRENCY_FIELD, status);
assertSuccess("Appending to sb", status);
assertEquals("Reference string copied twice", str.length() * 2, sb.length());
for (int32_t i = 0; i < str.length(); i++) {
assertEquals("Null field first", UNUM_FIELD_COUNT, sb.fieldAt(i));
assertEquals("Currency field second", UNUM_CURRENCY_FIELD, sb.fieldAt(i + str.length()));
}
// Very basic FieldPosition test. More robust tests happen in NumberFormatTest.
// Let NumberFormatTest also take care of FieldPositionIterator material.
FieldPosition fp(UNUM_CURRENCY_FIELD);
sb.populateFieldPosition(fp, 0, status);
assertSuccess("Populating the FieldPosition", status);
assertEquals("Currency start position", str.length(), fp.getBeginIndex());
assertEquals("Currency end position", str.length() * 2, fp.getEndIndex());
if (str.length() > 0) {
sb.insertCodePoint(2, 100, UNUM_INTEGER_FIELD, status);
assertSuccess("Inserting code point into sb", status);
assertEquals("New length", str.length() * 2 + 1, sb.length());
assertEquals("Integer field", UNUM_INTEGER_FIELD, sb.fieldAt(2));
}
NumberStringBuilder old(sb);
sb.append(old, status);
assertSuccess("Appending to myself", status);
int32_t numNull = 0;
int32_t numCurr = 0;
int32_t numInt = 0;
for (int32_t i = 0; i < sb.length(); i++) {
UNumberFormatFields field = sb.fieldAt(i);
assertEquals("Field should equal location in old", old.fieldAt(i % old.length()), field);
if (field == UNUM_FIELD_COUNT) {
numNull++;
} else if (field == UNUM_CURRENCY_FIELD) {
numCurr++;
} else if (field == UNUM_INTEGER_FIELD) {
numInt++;
} else {
errln("Encountered unknown field");
}
}
assertEquals("Number of null fields", str.length() * 2, numNull);
assertEquals("Number of currency fields", numNull, numCurr);
assertEquals("Number of integer fields", str.length() > 0 ? 2 : 0, numInt);
}
}
void NumberStringBuilderTest::testUnlimitedCapacity() {
UErrorCode status = U_ZERO_ERROR;
NumberStringBuilder builder;
// The builder should never fail upon repeated appends.
for (int i = 0; i < 1000; i++) {
UnicodeString message("Iteration #");
message += Int64ToUnicodeString(i);
assertEquals(message, builder.length(), i);
builder.appendCodePoint('x', UNUM_FIELD_COUNT, status);
assertSuccess(message, status);
assertEquals(message, builder.length(), i + 1);
}
}
void NumberStringBuilderTest::testCodePoints() {
UErrorCode status = U_ZERO_ERROR;
NumberStringBuilder nsb;
assertEquals("First is -1 on empty string", -1, nsb.getFirstCodePoint());
assertEquals("Last is -1 on empty string", -1, nsb.getLastCodePoint());
assertEquals("Length is 0 on empty string", 0, nsb.codePointCount());
nsb.append(u"q", UNUM_FIELD_COUNT, status);
assertSuccess("Spot 1", status);
assertEquals("First is q", u'q', nsb.getFirstCodePoint());
assertEquals("Last is q", u'q', nsb.getLastCodePoint());
assertEquals("0th is q", u'q', nsb.codePointAt(0));
assertEquals("Before 1st is q", u'q', nsb.codePointBefore(1));
assertEquals("Code point count is 1", 1, nsb.codePointCount());
// 🚀 is two char16s
nsb.append(u"🚀", UNUM_FIELD_COUNT, status);
assertSuccess("Spot 2" ,status);
assertEquals("First is still q", u'q', nsb.getFirstCodePoint());
assertEquals("Last is space ship", 128640, nsb.getLastCodePoint());
assertEquals("1st is space ship", 128640, nsb.codePointAt(1));
assertEquals("Before 1st is q", u'q', nsb.codePointBefore(1));
assertEquals("Before 3rd is space ship", 128640, nsb.codePointBefore(3));
assertEquals("Code point count is 2", 2, nsb.codePointCount());
}
void NumberStringBuilderTest::assertEqualsImpl(const UnicodeString &a, const NumberStringBuilder &b) {
// TODO: Why won't this compile without the IntlTest:: qualifier?
IntlTest::assertEquals("Lengths should be the same", a.length(), b.length());
IntlTest::assertEquals("Code point counts should be the same", a.countChar32(), b.codePointCount());
if (a.length() != b.length()) {
return;
}
for (int32_t i = 0; i < a.length(); i++) {
IntlTest::assertEquals(
UnicodeString("Char at position ") + Int64ToUnicodeString(i) +
UnicodeString(" in string ") + a, a.charAt(i), b.charAt(i));
}
}