From a01fc429cb9fa034edf6cfbe9f69c85bc0469565 Mon Sep 17 00:00:00 2001 From: Shane Carr Date: Wed, 27 Sep 2017 00:25:20 +0000 Subject: [PATCH] ICU-13177 Initial commit of ICU4C NumberFormatter X-SVN-Rev: 40465 --- icu4c/source/i18n/Makefile.in | 8 +- icu4c/source/i18n/number_affixutils.cpp | 390 ++++ icu4c/source/i18n/number_affixutils.h | 214 ++ icu4c/source/i18n/number_compact.cpp | 315 +++ icu4c/source/i18n/number_compact.h | 86 + icu4c/source/i18n/number_decimalquantity.cpp | 995 +++++++++ icu4c/source/i18n/number_decimalquantity.h | 432 ++++ icu4c/source/i18n/number_decimfmtprops.cpp | 94 + icu4c/source/i18n/number_decimfmtprops.h | 77 + icu4c/source/i18n/number_fluent.cpp | 318 +++ icu4c/source/i18n/number_formatimpl.cpp | 455 ++++ icu4c/source/i18n/number_formatimpl.h | 120 ++ icu4c/source/i18n/number_grouping.cpp | 47 + icu4c/source/i18n/number_integerwidth.cpp | 41 + icu4c/source/i18n/number_longnames.cpp | 157 ++ icu4c/source/i18n/number_longnames.h | 43 + icu4c/source/i18n/number_modifiers.cpp | 294 +++ icu4c/source/i18n/number_modifiers.h | 249 +++ icu4c/source/i18n/number_notation.cpp | 68 + icu4c/source/i18n/number_padding.cpp | 76 + icu4c/source/i18n/number_patternmodifier.cpp | 342 ++++ icu4c/source/i18n/number_patternmodifier.h | 234 +++ icu4c/source/i18n/number_patternstring.cpp | 831 ++++++++ icu4c/source/i18n/number_patternstring.h | 257 +++ icu4c/source/i18n/number_results.h | 20 + icu4c/source/i18n/number_rounding.cpp | 339 +++ icu4c/source/i18n/number_roundingutils.h | 136 ++ icu4c/source/i18n/number_scientific.cpp | 129 ++ icu4c/source/i18n/number_scientific.h | 57 + icu4c/source/i18n/number_stringbuilder.cpp | 432 ++++ icu4c/source/i18n/number_stringbuilder.h | 130 ++ icu4c/source/i18n/number_types.h | 282 +++ icu4c/source/i18n/number_utils.h | 125 ++ icu4c/source/i18n/unicode/numberformatter.h | 1824 +++++++++++++++++ icu4c/source/test/intltest/Makefile.in | 6 +- icu4c/source/test/intltest/itformat.cpp | 9 +- icu4c/source/test/intltest/numbertest.h | 50 + .../test/intltest/numbertest_affixutils.cpp | 242 +++ icu4c/source/test/intltest/numbertest_api.cpp | 1552 ++++++++++++++ .../intltest/numbertest_decimalquantity.cpp | 280 +++ .../test/intltest/numbertest_modifiers.cpp | 197 ++ .../intltest/numbertest_patternmodifier.cpp | 119 ++ .../intltest/numbertest_patternstring.cpp | 74 + .../intltest/numbertest_stringbuilder.cpp | 231 +++ 44 files changed, 12372 insertions(+), 5 deletions(-) create mode 100644 icu4c/source/i18n/number_affixutils.cpp create mode 100644 icu4c/source/i18n/number_affixutils.h create mode 100644 icu4c/source/i18n/number_compact.cpp create mode 100644 icu4c/source/i18n/number_compact.h create mode 100644 icu4c/source/i18n/number_decimalquantity.cpp create mode 100644 icu4c/source/i18n/number_decimalquantity.h create mode 100644 icu4c/source/i18n/number_decimfmtprops.cpp create mode 100644 icu4c/source/i18n/number_decimfmtprops.h create mode 100644 icu4c/source/i18n/number_fluent.cpp create mode 100644 icu4c/source/i18n/number_formatimpl.cpp create mode 100644 icu4c/source/i18n/number_formatimpl.h create mode 100644 icu4c/source/i18n/number_grouping.cpp create mode 100644 icu4c/source/i18n/number_integerwidth.cpp create mode 100644 icu4c/source/i18n/number_longnames.cpp create mode 100644 icu4c/source/i18n/number_longnames.h create mode 100644 icu4c/source/i18n/number_modifiers.cpp create mode 100644 icu4c/source/i18n/number_modifiers.h create mode 100644 icu4c/source/i18n/number_notation.cpp create mode 100644 icu4c/source/i18n/number_padding.cpp create mode 100644 icu4c/source/i18n/number_patternmodifier.cpp create mode 100644 icu4c/source/i18n/number_patternmodifier.h create mode 100644 icu4c/source/i18n/number_patternstring.cpp create mode 100644 icu4c/source/i18n/number_patternstring.h create mode 100644 icu4c/source/i18n/number_results.h create mode 100644 icu4c/source/i18n/number_rounding.cpp create mode 100644 icu4c/source/i18n/number_roundingutils.h create mode 100644 icu4c/source/i18n/number_scientific.cpp create mode 100644 icu4c/source/i18n/number_scientific.h create mode 100644 icu4c/source/i18n/number_stringbuilder.cpp create mode 100644 icu4c/source/i18n/number_stringbuilder.h create mode 100644 icu4c/source/i18n/number_types.h create mode 100644 icu4c/source/i18n/number_utils.h create mode 100644 icu4c/source/i18n/unicode/numberformatter.h create mode 100644 icu4c/source/test/intltest/numbertest.h create mode 100644 icu4c/source/test/intltest/numbertest_affixutils.cpp create mode 100644 icu4c/source/test/intltest/numbertest_api.cpp create mode 100644 icu4c/source/test/intltest/numbertest_decimalquantity.cpp create mode 100644 icu4c/source/test/intltest/numbertest_modifiers.cpp create mode 100644 icu4c/source/test/intltest/numbertest_patternmodifier.cpp create mode 100644 icu4c/source/test/intltest/numbertest_patternstring.cpp create mode 100644 icu4c/source/test/intltest/numbertest_stringbuilder.cpp diff --git a/icu4c/source/i18n/Makefile.in b/icu4c/source/i18n/Makefile.in index fa0e47c053..dda6050af5 100644 --- a/icu4c/source/i18n/Makefile.in +++ b/icu4c/source/i18n/Makefile.in @@ -102,7 +102,13 @@ digitinterval.o digitformatter.o digitaffix.o valueformatter.o \ digitaffixesandpadding.o pluralaffix.o precision.o \ affixpatternparser.o smallintformatter.o decimfmtimpl.o \ visibledigits.o dayperiodrules.o \ -nounit.o +nounit.o \ +number_affixutils.o number_compact.o number_decimalquantity.o \ +number_decimfmtprops.o number_fluent.o number_formatimpl.o number_grouping.o \ +number_integerwidth.o number_longnames.o number_modifiers.o number_notation.o \ +number_padding.o number_patternmodifier.o number_patternstring.o \ +number_rounding.o number_scientific.o number_stringbuilder.o + ## Header files to install HEADERS = $(srcdir)/unicode/*.h diff --git a/icu4c/source/i18n/number_affixutils.cpp b/icu4c/source/i18n/number_affixutils.cpp new file mode 100644 index 0000000000..85af1d18d2 --- /dev/null +++ b/icu4c/source/i18n/number_affixutils.cpp @@ -0,0 +1,390 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include "number_affixutils.h" +#include "unicode/utf16.h" + +using namespace icu::number::impl; + +int32_t AffixUtils::estimateLength(const CharSequence &patternString, UErrorCode &status) { + AffixPatternState state = STATE_BASE; + int32_t offset = 0; + int32_t length = 0; + for (; offset < patternString.length();) { + UChar32 cp = patternString.codePointAt(offset); + + switch (state) { + case STATE_BASE: + if (cp == '\'') { + // First quote + state = STATE_FIRST_QUOTE; + } else { + // Unquoted symbol + length++; + } + break; + case STATE_FIRST_QUOTE: + if (cp == '\'') { + // Repeated quote + length++; + state = STATE_BASE; + } else { + // Quoted code point + length++; + state = STATE_INSIDE_QUOTE; + } + break; + case STATE_INSIDE_QUOTE: + if (cp == '\'') { + // End of quoted sequence + state = STATE_AFTER_QUOTE; + } else { + // Quoted code point + length++; + } + break; + case STATE_AFTER_QUOTE: + if (cp == '\'') { + // Double quote inside of quoted sequence + length++; + state = STATE_INSIDE_QUOTE; + } else { + // Unquoted symbol + length++; + } + break; + default: + U_ASSERT(false); + } + + offset += U16_LENGTH(cp); + } + + switch (state) { + case STATE_FIRST_QUOTE: + case STATE_INSIDE_QUOTE: + status = U_ILLEGAL_ARGUMENT_ERROR; + default: + break; + } + + return length; +} + +UnicodeString AffixUtils::escape(const CharSequence &input) { + AffixPatternState state = STATE_BASE; + int32_t offset = 0; + UnicodeString output; + for (; offset < input.length();) { + int32_t cp = input.codePointAt(offset); + + switch (cp) { + case '\'': + output.append(u"''", -1); + break; + + case '-': + case '+': + case '%': + case u'‰': + case u'¤': + if (state == STATE_BASE) { + output.append('\''); + output.append(cp); + state = STATE_INSIDE_QUOTE; + } else { + output.append(cp); + } + break; + + default: + if (state == STATE_INSIDE_QUOTE) { + output.append('\''); + output.append(cp); + state = STATE_BASE; + } else { + output.append(cp); + } + break; + } + offset += U16_LENGTH(cp); + } + + if (state == STATE_INSIDE_QUOTE) { + output.append('\''); + } + + return output; +} + +Field AffixUtils::getFieldForType(AffixPatternType type) { + switch (type) { + case TYPE_MINUS_SIGN: + return Field::UNUM_SIGN_FIELD; + case TYPE_PLUS_SIGN: + return Field::UNUM_SIGN_FIELD; + case TYPE_PERCENT: + return Field::UNUM_PERCENT_FIELD; + case TYPE_PERMILLE: + return Field::UNUM_PERMILL_FIELD; + case TYPE_CURRENCY_SINGLE: + return Field::UNUM_CURRENCY_FIELD; + case TYPE_CURRENCY_DOUBLE: + return Field::UNUM_CURRENCY_FIELD; + case TYPE_CURRENCY_TRIPLE: + return Field::UNUM_CURRENCY_FIELD; + case TYPE_CURRENCY_QUAD: + return Field::UNUM_CURRENCY_FIELD; + case TYPE_CURRENCY_QUINT: + return Field::UNUM_CURRENCY_FIELD; + case TYPE_CURRENCY_OVERFLOW: + return Field::UNUM_CURRENCY_FIELD; + default: + U_ASSERT(false); + return Field::UNUM_FIELD_COUNT; // suppress "control reaches end of non-void function" + } +} + +int32_t +AffixUtils::unescape(const CharSequence &affixPattern, NumberStringBuilder &output, int32_t position, + const SymbolProvider &provider, UErrorCode &status) { + int32_t length = 0; + AffixTag tag = {0}; + while (hasNext(tag, affixPattern)) { + tag = nextToken(tag, affixPattern, status); + if (tag.type == TYPE_CURRENCY_OVERFLOW) { + // Don't go to the provider for this special case + length += output.insertCodePoint(position + length, 0xFFFD, UNUM_CURRENCY_FIELD, status); + } else if (tag.type < 0) { + length += output.insert( + position + length, provider.getSymbol(tag.type), getFieldForType(tag.type), status); + } else { + length += output.insertCodePoint(position + length, tag.codePoint, UNUM_FIELD_COUNT, status); + } + } + return length; +} + +int32_t AffixUtils::unescapedCodePointCount(const CharSequence &affixPattern, + const SymbolProvider &provider, UErrorCode &status) { + int32_t length = 0; + AffixTag tag = {0}; + while (hasNext(tag, affixPattern)) { + tag = nextToken(tag, affixPattern, status); + if (tag.type == TYPE_CURRENCY_OVERFLOW) { + length += 1; + } else if (tag.type < 0) { + length += provider.getSymbol(tag.type).length(); + } else { + length += U16_LENGTH(tag.codePoint); + } + } + return length; +} + +bool +AffixUtils::containsType(const CharSequence &affixPattern, AffixPatternType type, UErrorCode &status) { + if (affixPattern.length() == 0) { + return false; + } + AffixTag tag = {0}; + while (hasNext(tag, affixPattern)) { + tag = nextToken(tag, affixPattern, status); + if (tag.type == type) { + return true; + } + } + return false; +} + +bool AffixUtils::hasCurrencySymbols(const CharSequence &affixPattern, UErrorCode &status) { + if (affixPattern.length() == 0) { + return false; + } + AffixTag tag = {0}; + while (hasNext(tag, affixPattern)) { + tag = nextToken(tag, affixPattern, status); + if (tag.type < 0 && getFieldForType(tag.type) == UNUM_CURRENCY_FIELD) { + return true; + } + } + return false; +} + +UnicodeString AffixUtils::replaceType(const CharSequence &affixPattern, AffixPatternType type, + char16_t replacementChar, UErrorCode &status) { + UnicodeString output = affixPattern.toUnicodeString(); + if (affixPattern.length() == 0) { + return output; + }; + AffixTag tag = {0}; + while (hasNext(tag, affixPattern)) { + tag = nextToken(tag, affixPattern, status); + if (tag.type == type) { + output.replace(tag.offset - 1, 1, replacementChar); + } + } + return output; +} + +AffixTag AffixUtils::nextToken(AffixTag tag, const CharSequence &patternString, UErrorCode &status) { + int32_t offset = tag.offset; + int32_t state = tag.state; + for (; offset < patternString.length();) { + UChar32 cp = patternString.codePointAt(offset); + int32_t count = U16_LENGTH(cp); + + switch (state) { + case STATE_BASE: + switch (cp) { + case '\'': + state = STATE_FIRST_QUOTE; + offset += count; + // continue to the next code point + break; + case '-': + return makeTag(offset + count, TYPE_MINUS_SIGN, STATE_BASE, 0); + case '+': + return makeTag(offset + count, TYPE_PLUS_SIGN, STATE_BASE, 0); + case u'%': + return makeTag(offset + count, TYPE_PERCENT, STATE_BASE, 0); + case u'‰': + return makeTag(offset + count, TYPE_PERMILLE, STATE_BASE, 0); + case u'¤': + state = STATE_FIRST_CURR; + offset += count; + // continue to the next code point + break; + default: + return makeTag(offset + count, TYPE_CODEPOINT, STATE_BASE, cp); + } + break; + case STATE_FIRST_QUOTE: + if (cp == '\'') { + return makeTag(offset + count, TYPE_CODEPOINT, STATE_BASE, cp); + } else { + return makeTag(offset + count, TYPE_CODEPOINT, STATE_INSIDE_QUOTE, cp); + } + case STATE_INSIDE_QUOTE: + if (cp == '\'') { + state = STATE_AFTER_QUOTE; + offset += count; + // continue to the next code point + break; + } else { + return makeTag(offset + count, TYPE_CODEPOINT, STATE_INSIDE_QUOTE, cp); + } + case STATE_AFTER_QUOTE: + if (cp == '\'') { + return makeTag(offset + count, TYPE_CODEPOINT, STATE_INSIDE_QUOTE, cp); + } else { + state = STATE_BASE; + // re-evaluate this code point + break; + } + case STATE_FIRST_CURR: + if (cp == u'¤') { + state = STATE_SECOND_CURR; + offset += count; + // continue to the next code point + break; + } else { + return makeTag(offset, TYPE_CURRENCY_SINGLE, STATE_BASE, 0); + } + case STATE_SECOND_CURR: + if (cp == u'¤') { + state = STATE_THIRD_CURR; + offset += count; + // continue to the next code point + break; + } else { + return makeTag(offset, TYPE_CURRENCY_DOUBLE, STATE_BASE, 0); + } + case STATE_THIRD_CURR: + if (cp == u'¤') { + state = STATE_FOURTH_CURR; + offset += count; + // continue to the next code point + break; + } else { + return makeTag(offset, TYPE_CURRENCY_TRIPLE, STATE_BASE, 0); + } + case STATE_FOURTH_CURR: + if (cp == u'¤') { + state = STATE_FIFTH_CURR; + offset += count; + // continue to the next code point + break; + } else { + return makeTag(offset, TYPE_CURRENCY_QUAD, STATE_BASE, 0); + } + case STATE_FIFTH_CURR: + if (cp == u'¤') { + state = STATE_OVERFLOW_CURR; + offset += count; + // continue to the next code point + break; + } else { + return makeTag(offset, TYPE_CURRENCY_QUINT, STATE_BASE, 0); + } + case STATE_OVERFLOW_CURR: + if (cp == u'¤') { + offset += count; + // continue to the next code point and loop back to this state + break; + } else { + return makeTag(offset, TYPE_CURRENCY_OVERFLOW, STATE_BASE, 0); + } + default: + U_ASSERT(false); + } + } + // End of string + switch (state) { + case STATE_BASE: + // No more tokens in string. + return {-1}; + case STATE_FIRST_QUOTE: + case STATE_INSIDE_QUOTE: + // For consistent behavior with the JDK and ICU 58, set an error here. + status = U_ILLEGAL_ARGUMENT_ERROR; + return {-1}; + case STATE_AFTER_QUOTE: + // No more tokens in string. + return {-1}; + case STATE_FIRST_CURR: + return makeTag(offset, TYPE_CURRENCY_SINGLE, STATE_BASE, 0); + case STATE_SECOND_CURR: + return makeTag(offset, TYPE_CURRENCY_DOUBLE, STATE_BASE, 0); + case STATE_THIRD_CURR: + return makeTag(offset, TYPE_CURRENCY_TRIPLE, STATE_BASE, 0); + case STATE_FOURTH_CURR: + return makeTag(offset, TYPE_CURRENCY_QUAD, STATE_BASE, 0); + case STATE_FIFTH_CURR: + return makeTag(offset, TYPE_CURRENCY_QUINT, STATE_BASE, 0); + case STATE_OVERFLOW_CURR: + return makeTag(offset, TYPE_CURRENCY_OVERFLOW, STATE_BASE, 0); + default: + U_ASSERT(false); + return {-1}; // suppress "control reaches end of non-void function" + } +} + +bool AffixUtils::hasNext(const AffixTag &tag, const CharSequence &string) { + // First check for the {-1} and {0} initializer syntax. + if (tag.offset < 0) { + return false; + } else if (tag.offset == 0) { + return string.length() > 0; + } + // The rest of the fields are safe to use now. + // Special case: the last character in string is an end quote. + if (tag.state == STATE_INSIDE_QUOTE && tag.offset == string.length() - 1 && + string.charAt(tag.offset) == '\'') { + return false; + } else if (tag.state != STATE_BASE) { + return true; + } else { + return tag.offset < string.length(); + } +} diff --git a/icu4c/source/i18n/number_affixutils.h b/icu4c/source/i18n/number_affixutils.h new file mode 100644 index 0000000000..5fc2719cb5 --- /dev/null +++ b/icu4c/source/i18n/number_affixutils.h @@ -0,0 +1,214 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#ifndef NUMBERFORMAT_AFFIXPATTERNUTILS_H +#define NUMBERFORMAT_AFFIXPATTERNUTILS_H + +#include +#include "number_types.h" +#include "unicode/stringpiece.h" +#include "unicode/unistr.h" +#include "number_stringbuilder.h" + +U_NAMESPACE_BEGIN namespace number { +namespace impl { + +enum AffixPatternState { + STATE_BASE = 0, + STATE_FIRST_QUOTE = 1, + STATE_INSIDE_QUOTE = 2, + STATE_AFTER_QUOTE = 3, + STATE_FIRST_CURR = 4, + STATE_SECOND_CURR = 5, + STATE_THIRD_CURR = 6, + STATE_FOURTH_CURR = 7, + STATE_FIFTH_CURR = 8, + STATE_OVERFLOW_CURR = 9 +}; + +// enum AffixPatternType defined in internals.h + +struct AffixTag { + int32_t offset; + UChar32 codePoint; + AffixPatternState state; + AffixPatternType type; + + AffixTag(int32_t offset) : offset(offset) {} + + AffixTag(int32_t offset, UChar32 codePoint, AffixPatternState state, AffixPatternType type) + : offset(offset), codePoint(codePoint), state(state), type(type) + {} +}; + +class SymbolProvider { + public: + // TODO: Could this be more efficient if it returned by reference? + virtual UnicodeString getSymbol(AffixPatternType type) const = 0; +}; + +/** + * Performs manipulations on affix patterns: the prefix and suffix strings associated with a decimal + * format pattern. For example: + * + * + * + * + * + * + * + *
Affix PatternExample Unescaped (Formatted) String
abcabc
ab-ab−
ab'-'ab-
ab''ab'
+ * + * To manually iterate over tokens in a literal string, use the following pattern, which is designed + * to be efficient. + * + *
+ * long tag = 0L;
+ * while (AffixPatternUtils.hasNext(tag, patternString)) {
+ *   tag = AffixPatternUtils.nextToken(tag, patternString);
+ *   int typeOrCp = AffixPatternUtils.getTypeOrCp(tag);
+ *   switch (typeOrCp) {
+ *     case AffixPatternUtils.TYPE_MINUS_SIGN:
+ *       // Current token is a minus sign.
+ *       break;
+ *     case AffixPatternUtils.TYPE_PLUS_SIGN:
+ *       // Current token is a plus sign.
+ *       break;
+ *     case AffixPatternUtils.TYPE_PERCENT:
+ *       // Current token is a percent sign.
+ *       break;
+ *     // ... other types ...
+ *     default:
+ *       // Current token is an arbitrary code point.
+ *       // The variable typeOrCp is the code point.
+ *       break;
+ *   }
+ * }
+ * 
+ */ +class AffixUtils { + + public: + + /** + * Estimates the number of code points present in an unescaped version of the affix pattern string + * (one that would be returned by {@link #unescape}), assuming that all interpolated symbols + * consume one code point and that currencies consume as many code points as their symbol width. + * Used for computing padding width. + * + * @param patternString The original string whose width will be estimated. + * @return The length of the unescaped string. + */ + static int32_t estimateLength(const CharSequence &patternString, UErrorCode &status); + + /** + * Takes a string and escapes (quotes) characters that have special meaning in the affix pattern + * syntax. This function does not reverse-lookup symbols. + * + *

Example input: "-$x"; example output: "'-'$x" + * + * @param input The string to be escaped. + * @return The resulting UnicodeString. + */ + static UnicodeString escape(const CharSequence &input); + + static Field getFieldForType(AffixPatternType type); + + /** + * Executes the unescape state machine. Replaces the unquoted characters "-", "+", "%", "‰", and + * "¤" with the corresponding symbols provided by the {@link SymbolProvider}, and inserts the + * result into the NumberStringBuilder at the requested location. + * + *

Example input: "'-'¤x"; example output: "-$x" + * + * @param affixPattern The original string to be unescaped. + * @param output The NumberStringBuilder to mutate with the result. + * @param position The index into the NumberStringBuilder to insert the the string. + * @param provider An object to generate locale symbols. + */ + static int32_t + unescape(const CharSequence &affixPattern, NumberStringBuilder &output, int32_t position, + const SymbolProvider &provider, UErrorCode &status); + + /** + * Sames as {@link #unescape}, but only calculates the code point count. More efficient than {@link #unescape} + * if you only need the length but not the string itself. + * + * @param affixPattern The original string to be unescaped. + * @param provider An object to generate locale symbols. + * @return The same return value as if you called {@link #unescape}. + */ + static int32_t unescapedCodePointCount(const CharSequence &affixPattern, + const SymbolProvider &provider, UErrorCode &status); + + /** + * Checks whether the given affix pattern contains at least one token of the given type, which is + * one of the constants "TYPE_" in {@link AffixPatternUtils}. + * + * @param affixPattern The affix pattern to check. + * @param type The token type. + * @return true if the affix pattern contains the given token type; false otherwise. + */ + static bool + containsType(const CharSequence &affixPattern, AffixPatternType type, UErrorCode &status); + + /** + * Checks whether the specified affix pattern has any unquoted currency symbols ("¤"). + * + * @param affixPattern The string to check for currency symbols. + * @return true if the literal has at least one unquoted currency symbol; false otherwise. + */ + static bool hasCurrencySymbols(const CharSequence &affixPattern, UErrorCode &status); + + /** + * Replaces all occurrences of tokens with the given type with the given replacement char. + * + * @param affixPattern The source affix pattern (does not get modified). + * @param type The token type. + * @param replacementChar The char to substitute in place of chars of the given token type. + * @return A string containing the new affix pattern. + */ + static UnicodeString + replaceType(const CharSequence &affixPattern, AffixPatternType type, char16_t replacementChar, + UErrorCode &status); + + /** + * Returns the next token from the affix pattern. + * + * @param tag A bitmask used for keeping track of state from token to token. The initial value + * should be 0L. + * @param patternString The affix pattern. + * @return The bitmask tag to pass to the next call of this method to retrieve the following token + * (never negative), or -1 if there were no more tokens in the affix pattern. + * @see #hasNext + */ + static AffixTag nextToken(AffixTag tag, const CharSequence &patternString, UErrorCode &status); + + /** + * Returns whether the affix pattern string has any more tokens to be retrieved from a call to + * {@link #nextToken}. + * + * @param tag The bitmask tag of the previous token, as returned by {@link #nextToken}. + * @param string The affix pattern. + * @return true if there are more tokens to consume; false otherwise. + */ + static bool hasNext(const AffixTag &tag, const CharSequence &string); + + private: + /** + * Encodes the given values into a tag struct. + * The order of the arguments is consistent with Java, but the order of the stored + * fields is not necessarily the same. + */ + static inline AffixTag + makeTag(int32_t offset, AffixPatternType type, AffixPatternState state, UChar32 cp) { + return {offset, cp, state, type}; + } +}; + +} // namespace impl +} // namespace number +U_NAMESPACE_END + + +#endif //NUMBERFORMAT_AFFIXPATTERNUTILS_H diff --git a/icu4c/source/i18n/number_compact.cpp b/icu4c/source/i18n/number_compact.cpp new file mode 100644 index 0000000000..be3abca701 --- /dev/null +++ b/icu4c/source/i18n/number_compact.cpp @@ -0,0 +1,315 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include "resource.h" +#include "number_compact.h" +#include "unicode/ustring.h" +#include "unicode/ures.h" +#include +#include +#include + +using namespace icu::number::impl; + +namespace { + +// A dummy object used when a "0" compact decimal entry is encountered. This is necessary +// in order to prevent falling back to root. Object equality ("==") is intended. +const UChar *USE_FALLBACK = u""; + +/** Produces a string like "NumberElements/latn/patternsShort/decimalFormat". */ +void getResourceBundleKey(const char *nsName, CompactStyle compactStyle, CompactType compactType, + CharString &sb, UErrorCode &status) { + sb.clear(); + sb.append("NumberElements/", status); + sb.append(nsName, status); + sb.append(compactStyle == CompactStyle::UNUM_SHORT ? "/patternsShort" : "/patternsLong", status); + sb.append(compactType == CompactType::TYPE_DECIMAL ? "/decimalFormat" : "/currencyFormat", status); +} + +int32_t getIndex(int32_t magnitude, StandardPlural::Form plural) { + return magnitude * StandardPlural::COUNT + plural; +} + +int32_t countZeros(const UChar *patternString, int32_t patternLength) { + // NOTE: This strategy for computing the number of zeros is a hack for efficiency. + // It could break if there are any 0s that aren't part of the main pattern. + int32_t numZeros = 0; + for (int32_t i = 0; i < patternLength; i++) { + if (patternString[i] == u'0') { + numZeros++; + } else if (numZeros > 0) { + break; // zeros should always be contiguous + } + } + return numZeros; +} + +} // namespace + +// NOTE: patterns and multipliers both get zero-initialized. +CompactData::CompactData() : patterns(), multipliers(), largestMagnitude(0), isEmpty(TRUE) { +} + +void CompactData::populate(const Locale &locale, const char *nsName, CompactStyle compactStyle, + CompactType compactType, UErrorCode &status) { + CompactDataSink sink(*this); + LocalUResourceBundlePointer rb(ures_open(nullptr, locale.getName(), &status)); + + bool nsIsLatn = strcmp(nsName, "latn") == 0; + bool compactIsShort = compactStyle == CompactStyle::UNUM_SHORT; + + // Fall back to latn numbering system and/or short compact style. + CharString resourceKey; + getResourceBundleKey(nsName, compactStyle, compactType, resourceKey, status); + UErrorCode localStatus = U_ZERO_ERROR; + ures_getAllItemsWithFallback(rb.getAlias(), resourceKey.data(), sink, localStatus); + if (isEmpty && !nsIsLatn) { + getResourceBundleKey("latn", compactStyle, compactType, resourceKey, status); + localStatus = U_ZERO_ERROR; + ures_getAllItemsWithFallback(rb.getAlias(), resourceKey.data(), sink, localStatus); + } + if (isEmpty && !compactIsShort) { + getResourceBundleKey(nsName, CompactStyle::UNUM_SHORT, compactType, resourceKey, status); + localStatus = U_ZERO_ERROR; + ures_getAllItemsWithFallback(rb.getAlias(), resourceKey.data(), sink, localStatus); + } + if (isEmpty && !nsIsLatn && !compactIsShort) { + getResourceBundleKey("latn", CompactStyle::UNUM_SHORT, compactType, resourceKey, status); + localStatus = U_ZERO_ERROR; + ures_getAllItemsWithFallback(rb.getAlias(), resourceKey.data(), sink, localStatus); + } + + // The last fallback should be guaranteed to return data. + if (isEmpty) { + status = U_INTERNAL_PROGRAM_ERROR; + } +} + +int32_t CompactData::getMultiplier(int32_t magnitude) const { + if (magnitude < 0) { + return 0; + } + if (magnitude > largestMagnitude) { + magnitude = largestMagnitude; + } + return multipliers[magnitude]; +} + +const UChar *CompactData::getPattern(int32_t magnitude, StandardPlural::Form plural) const { + if (magnitude < 0) { + return nullptr; + } + if (magnitude > largestMagnitude) { + magnitude = largestMagnitude; + } + const UChar *patternString = patterns[getIndex(magnitude, plural)]; + if (patternString == nullptr && plural != StandardPlural::OTHER) { + // Fall back to "other" plural variant + patternString = patterns[getIndex(magnitude, StandardPlural::OTHER)]; + } + if (patternString == USE_FALLBACK) { // == is intended + // Return null if USE_FALLBACK is present + patternString = nullptr; + } + return patternString; +} + +void CompactData::getUniquePatterns(UVector &output, UErrorCode &status) const { + U_ASSERT(output.isEmpty()); + // NOTE: In C++, this is done more manually with a UVector. + // In Java, we can take advantage of JDK HashSet. + for (auto pattern : patterns) { + if (pattern == nullptr || pattern == USE_FALLBACK) { + continue; + } + + // Insert pattern into the UVector if the UVector does not already contain the pattern. + // Search the UVector from the end since identical patterns are likely to be adjacent. + for (int32_t i = output.size() - 1; i >= 0; i--) { + if (u_strcmp(pattern, static_cast(output[i])) == 0) { + goto continue_outer; + } + } + + // The string was not found; add it to the UVector. + // ANDY: This requires a const_cast. Why? + output.addElement(const_cast(pattern), status); + + continue_outer: + continue; + } +} + +void CompactData::CompactDataSink::put(const char *key, ResourceValue &value, UBool /*noFallback*/, + UErrorCode &status) { + // traverse into the table of powers of ten + ResourceTable powersOfTenTable = value.getTable(status); + if (U_FAILURE(status)) { return; } + for (int i3 = 0; powersOfTenTable.getKeyAndValue(i3, key, value); ++i3) { + + // Assumes that the keys are always of the form "10000" where the magnitude is the + // length of the key minus one. We expect magnitudes to be less than MAX_DIGITS. + auto magnitude = static_cast (strlen(key) - 1); + int8_t multiplier = data.multipliers[magnitude]; + U_ASSERT(magnitude < COMPACT_MAX_DIGITS); + + // Iterate over the plural variants ("one", "other", etc) + ResourceTable pluralVariantsTable = value.getTable(status); + if (U_FAILURE(status)) { return; } + for (int i4 = 0; pluralVariantsTable.getKeyAndValue(i4, key, value); ++i4) { + + // Skip this magnitude/plural if we already have it from a child locale. + // Note: This also skips USE_FALLBACK entries. + StandardPlural::Form plural = StandardPlural::fromString(key, status); + if (U_FAILURE(status)) { return; } + if (data.patterns[getIndex(magnitude, plural)] != nullptr) { + continue; + } + + // The value "0" means that we need to use the default pattern and not fall back + // to parent locales. Example locale where this is relevant: 'it'. + int32_t patternLength; + const UChar *patternString = value.getString(patternLength, status); + if (U_FAILURE(status)) { return; } + if (u_strcmp(patternString, u"0") == 0) { + patternString = USE_FALLBACK; + patternLength = 0; + } + + // Save the pattern string. We will parse it lazily. + data.patterns[getIndex(magnitude, plural)] = patternString; + + // If necessary, compute the multiplier: the difference between the magnitude + // and the number of zeros in the pattern. + if (multiplier == 0) { + int32_t numZeros = countZeros(patternString, patternLength); + if (numZeros > 0) { // numZeros==0 in certain cases, like Somali "Kun" + multiplier = static_cast (numZeros - magnitude - 1); + } + } + } + + // Save the multiplier. + if (data.multipliers[magnitude] == 0) { + data.multipliers[magnitude] = multiplier; + if (magnitude > data.largestMagnitude) { + data.largestMagnitude = magnitude; + } + data.isEmpty = false; + } else { + U_ASSERT(data.multipliers[magnitude] == multiplier); + } + } +} + +/////////////////////////////////////////////////////////// +/// END OF CompactData.java; BEGIN CompactNotation.java /// +/////////////////////////////////////////////////////////// + +CompactHandler::CompactHandler(CompactStyle compactStyle, const Locale &locale, const char *nsName, + CompactType compactType, const PluralRules *rules, + MutablePatternModifier *buildReference, const MicroPropsGenerator *parent, + UErrorCode &status) + : rules(rules), parent(parent) { + data.populate(locale, nsName, compactStyle, compactType, status); + if (U_FAILURE(status)) { return; } + if (buildReference != nullptr) { + // Safe code path + precomputeAllModifiers(*buildReference, status); + safe = TRUE; + } else { + // Unsafe code path + safe = FALSE; + } +} + +CompactHandler::~CompactHandler() { + for (int32_t i = 0; i < precomputedModsLength; i++) { + delete precomputedMods[i].mod; + } +} + +void CompactHandler::precomputeAllModifiers(MutablePatternModifier &buildReference, UErrorCode &status) { + // Initial capacity of 12 for 0K, 00K, 000K, ...M, ...B, and ...T + UVector allPatterns(12, status); + if (U_FAILURE(status)) { return; } + data.getUniquePatterns(allPatterns, status); + if (U_FAILURE(status)) { return; } + + // C++ only: ensure that precomputedMods has room. + precomputedModsLength = allPatterns.size(); + if (precomputedMods.getCapacity() < precomputedModsLength) { + precomputedMods.resize(allPatterns.size(), status); + if (U_FAILURE(status)) { return; } + } + + for (int32_t i = 0; i < precomputedModsLength; i++) { + auto patternString = static_cast(allPatterns[i]); + UnicodeString hello(patternString); + CompactModInfo &info = precomputedMods[i]; + ParsedPatternInfo patternInfo; + PatternParser::parseToPatternInfo(UnicodeString(patternString), patternInfo, status); + if (U_FAILURE(status)) { return; } + buildReference.setPatternInfo(&patternInfo); + info.mod = buildReference.createImmutable(status); + if (U_FAILURE(status)) { return; } + info.numDigits = patternInfo.positive.integerTotal; + info.patternString = patternString; + } +} + +void CompactHandler::processQuantity(DecimalQuantity &quantity, MicroProps µs, + UErrorCode &status) const { + parent->processQuantity(quantity, micros, status); + if (U_FAILURE(status)) { return; } + + // Treat zero as if it had magnitude 0 + int magnitude; + if (quantity.isZero()) { + magnitude = 0; + micros.rounding.apply(quantity, status); + } else { + // TODO: Revisit chooseMultiplierAndApply + int multiplier = micros.rounding.chooseMultiplierAndApply(quantity, data, status); + magnitude = quantity.isZero() ? 0 : quantity.getMagnitude(); + magnitude -= multiplier; + } + + StandardPlural::Form plural = quantity.getStandardPlural(rules); + const UChar *patternString = data.getPattern(magnitude, plural); + int numDigits = -1; + if (patternString == nullptr) { + // Use the default (non-compact) modifier. + // No need to take any action. + } else if (safe) { + // Safe code path. + // Java uses a hash set here for O(1) lookup. C++ uses a linear search. + // TODO: Benchmark this and maybe change to a binary search or hash table. + int32_t i = 0; + for (; i < precomputedModsLength; i++) { + const CompactModInfo &info = precomputedMods[i]; + if (u_strcmp(patternString, info.patternString) == 0) { + info.mod->applyToMicros(micros, quantity); + numDigits = info.numDigits; + break; + } + } + // It should be guaranteed that we found the entry. + U_ASSERT(i < precomputedModsLength); + } else { + // Unsafe code path. + // Overwrite the PatternInfo in the existing modMiddle. + // C++ Note: Use unsafePatternInfo for proper lifecycle. + ParsedPatternInfo &patternInfo = const_cast(this)->unsafePatternInfo; + PatternParser::parseToPatternInfo(UnicodeString(patternString), patternInfo, status); + dynamic_cast(const_cast(micros.modMiddle))->setPatternInfo( + &patternInfo); + numDigits = patternInfo.positive.integerTotal; + } + + // FIXME: Deal with numDigits == 0 (Awaiting a test case) + + // We already performed rounding. Do not perform it again. + micros.rounding = Rounder::constructPassThrough(); +} diff --git a/icu4c/source/i18n/number_compact.h b/icu4c/source/i18n/number_compact.h new file mode 100644 index 0000000000..a0de81ffa6 --- /dev/null +++ b/icu4c/source/i18n/number_compact.h @@ -0,0 +1,86 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#ifndef NUMBERFORMAT_NUMFMTTER_COMPACT_H +#define NUMBERFORMAT_NUMFMTTER_COMPACT_H + +#include +#include "number_types.h" +#include "unicode/unum.h" +#include "uvector.h" +#include "resource.h" +#include "number_patternmodifier.h" + +U_NAMESPACE_BEGIN namespace number { +namespace impl { + +static const int32_t COMPACT_MAX_DIGITS = 15; + +class CompactData : public MultiplierProducer { + public: + CompactData(); + + void populate(const Locale &locale, const char *nsName, CompactStyle compactStyle, + CompactType compactType, UErrorCode &status); + + int32_t getMultiplier(int32_t magnitude) const override; + + const UChar *getPattern(int32_t magnitude, StandardPlural::Form plural) const; + + void getUniquePatterns(UVector &output, UErrorCode &status) const; + + private: + const UChar *patterns[(COMPACT_MAX_DIGITS + 1) * StandardPlural::COUNT]; + int8_t multipliers[COMPACT_MAX_DIGITS + 1]; + int8_t largestMagnitude; + UBool isEmpty; + + class CompactDataSink : public ResourceSink { + public: + explicit CompactDataSink(CompactData &data) : data(data) {} + + void put(const char *key, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) override; + + private: + CompactData &data; + }; +}; + +struct CompactModInfo { + const ImmutablePatternModifier *mod; + const UChar* patternString; + int32_t numDigits; +}; + +class CompactHandler : public MicroPropsGenerator, public UMemory { + public: + CompactHandler(CompactStyle compactStyle, const Locale &locale, const char *nsName, + CompactType compactType, const PluralRules *rules, + MutablePatternModifier *buildReference, const MicroPropsGenerator *parent, + UErrorCode &status); + + ~CompactHandler() override; + + void + processQuantity(DecimalQuantity &quantity, MicroProps µs, UErrorCode &status) const override; + + private: + const PluralRules *rules; + const MicroPropsGenerator *parent; + // Initial capacity of 12 for 0K, 00K, 000K, ...M, ...B, and ...T + MaybeStackArray precomputedMods; + int32_t precomputedModsLength = 0; + CompactData data; + ParsedPatternInfo unsafePatternInfo; + UBool safe; + + /** Used by the safe code path */ + void precomputeAllModifiers(MutablePatternModifier &buildReference, UErrorCode &status); +}; + + +} // namespace impl +} // namespace number +U_NAMESPACE_END + +#endif //NUMBERFORMAT_NUMFMTTER_COMPACT_H diff --git a/icu4c/source/i18n/number_decimalquantity.cpp b/icu4c/source/i18n/number_decimalquantity.cpp new file mode 100644 index 0000000000..a4112d09b6 --- /dev/null +++ b/icu4c/source/i18n/number_decimalquantity.cpp @@ -0,0 +1,995 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include +#include +#include +#include +#include +#include "number_decimalquantity.h" +#include "decContext.h" +#include "decNumber.h" +#include "number_roundingutils.h" +#include "unicode/plurrule.h" + +using namespace icu::number::impl; + +namespace { + +int8_t NEGATIVE_FLAG = 1; +int8_t INFINITY_FLAG = 2; +int8_t NAN_FLAG = 4; + +/** Helper function to convert a decNumber-compatible string into a decNumber. */ +void stringToDecNumber(StringPiece n, decNumber &dn) { + decContext set; + uprv_decContextDefault(&set, DEC_INIT_BASE); + uprv_decContextSetRounding(&set, DEC_ROUND_HALF_EVEN); + set.traps = 0; // no traps, thank you + set.digits = 34; // work with up to 34 digits + uprv_decNumberFromString(&dn, n.data(), &set); + U_ASSERT(DECDPUN == 1); +} + +/** Helper function for safe subtraction (no overflow). */ +inline int32_t safeSubtract(int32_t a, int32_t b) { + int32_t diff = a - b; + if (b < 0 && diff < a) { return INT32_MAX; } + if (b > 0 && diff > a) { return INT32_MIN; } + return diff; +} + +static double DOUBLE_MULTIPLIERS[] = { + 1e0, + 1e1, + 1e2, + 1e3, + 1e4, + 1e5, + 1e6, + 1e7, + 1e8, + 1e9, + 1e10, + 1e11, + 1e12, + 1e13, + 1e14, + 1e15, + 1e16, + 1e17, + 1e18, + 1e19, + 1e20, + 1e21}; + +} // namespace + + +DecimalQuantity::DecimalQuantity() { + setBcdToZero(); +} + +DecimalQuantity::~DecimalQuantity() { + if (usingBytes) { + delete[] fBCD.bcdBytes.ptr; + fBCD.bcdBytes.ptr = nullptr; + usingBytes = false; + } +} + +DecimalQuantity::DecimalQuantity(const DecimalQuantity &other) { + *this = other; +} + +DecimalQuantity &DecimalQuantity::operator=(const DecimalQuantity &other) { + if (this == &other) { + return *this; + } + copyBcdFrom(other); + lOptPos = other.lOptPos; + lReqPos = other.lReqPos; + rReqPos = other.rReqPos; + rOptPos = other.rOptPos; + scale = other.scale; + precision = other.precision; + flags = other.flags; + origDouble = other.origDouble; + origDelta = other.origDelta; + isApproximate = other.isApproximate; + return *this; +} + +void DecimalQuantity::clear() { + lOptPos = INT32_MAX; + lReqPos = 0; + rReqPos = 0; + rOptPos = INT32_MIN; + flags = 0; + setBcdToZero(); // sets scale, precision, hasDouble, origDouble, origDelta, and BCD data +} + +void DecimalQuantity::setIntegerLength(int32_t minInt, int32_t maxInt) { + // Validation should happen outside of DecimalQuantity, e.g., in the Rounder class. + U_ASSERT(minInt >= 0); + U_ASSERT(maxInt >= minInt); + + // Save values into internal state + // Negation is safe for minFrac/maxFrac because -Integer.MAX_VALUE > Integer.MIN_VALUE + lOptPos = maxInt; + lReqPos = minInt; +} + +void DecimalQuantity::setFractionLength(int32_t minFrac, int32_t maxFrac) { + // Validation should happen outside of DecimalQuantity, e.g., in the Rounder class. + U_ASSERT(minFrac >= 0); + U_ASSERT(maxFrac >= minFrac); + + // Save values into internal state + // Negation is safe for minFrac/maxFrac because -Integer.MAX_VALUE > Integer.MIN_VALUE + rReqPos = -minFrac; + rOptPos = -maxFrac; +} + +uint64_t DecimalQuantity::getPositionFingerprint() const { + uint64_t fingerprint = 0; + fingerprint ^= lOptPos; + fingerprint ^= (lReqPos << 16); + fingerprint ^= ((long) rReqPos << 32); + fingerprint ^= ((long) rOptPos << 48); + return fingerprint; +} + +void DecimalQuantity::roundToIncrement(double roundingIncrement, RoundingMode roundingMode, + UErrorCode& status) { + // TODO: This is innefficient. Improve? + // TODO: Should we convert to decNumber instead? + double temp = toDouble(); + temp /= roundingIncrement; + setToDouble(temp); + roundToMagnitude(0, roundingMode, status); + temp = toDouble(); + temp *= roundingIncrement; + setToDouble(temp); +} + +void DecimalQuantity::multiplyBy(int32_t multiplicand) { + if (isInfinite() || isZero() || isNaN()) { + return; + } + // TODO: Should we convert to decNumber instead? + double temp = toDouble(); + temp *= multiplicand; + setToDouble(temp); +} + +int32_t DecimalQuantity::getMagnitude() const { + U_ASSERT(precision != 0); + return scale + precision - 1; +} + +void DecimalQuantity::adjustMagnitude(int32_t delta) { + if (precision != 0) { + scale += delta; + origDelta += delta; + } +} + +StandardPlural::Form DecimalQuantity::getStandardPlural(const PluralRules *rules) const { + if (rules == nullptr) { + // Fail gracefully if the user didn't provide a PluralRules + return StandardPlural::Form::OTHER; + } else { + UnicodeString ruleString = rules->select(*this); + return StandardPlural::orOtherFromString(ruleString); + } +} + +double DecimalQuantity::getPluralOperand(PluralOperand operand) const { + // If this assertion fails, you need to call roundToInfinity() or some other rounding method. + // See the comment at the top of this file explaining the "isApproximate" field. + U_ASSERT(!isApproximate); + + switch (operand) { + case PLURAL_OPERAND_I: + return toLong(); + case PLURAL_OPERAND_F: + return toFractionLong(true); + case PLURAL_OPERAND_T: + return toFractionLong(false); + case PLURAL_OPERAND_V: + return fractionCount(); + case PLURAL_OPERAND_W: + return fractionCountWithoutTrailingZeros(); + default: + return std::abs(toDouble()); + } +} + +int32_t DecimalQuantity::getUpperDisplayMagnitude() const { + // If this assertion fails, you need to call roundToInfinity() or some other rounding method. + // See the comment in the header file explaining the "isApproximate" field. + U_ASSERT(!isApproximate); + + int32_t magnitude = scale + precision; + int32_t result = (lReqPos > magnitude) ? lReqPos : (lOptPos < magnitude) ? lOptPos : magnitude; + return result - 1; +} + +int32_t DecimalQuantity::getLowerDisplayMagnitude() const { + // If this assertion fails, you need to call roundToInfinity() or some other rounding method. + // See the comment in the header file explaining the "isApproximate" field. + U_ASSERT(!isApproximate); + + int32_t magnitude = scale; + int32_t result = (rReqPos < magnitude) ? rReqPos : (rOptPos > magnitude) ? rOptPos : magnitude; + return result; +} + +int8_t DecimalQuantity::getDigit(int32_t magnitude) const { + // If this assertion fails, you need to call roundToInfinity() or some other rounding method. + // See the comment at the top of this file explaining the "isApproximate" field. + U_ASSERT(!isApproximate); + + return getDigitPos(magnitude - scale); +} + +int32_t DecimalQuantity::fractionCount() const { + return -getLowerDisplayMagnitude(); +} + +int32_t DecimalQuantity::fractionCountWithoutTrailingZeros() const { + return -scale > 0 ? -scale : 0; // max(-scale, 0) +} + +bool DecimalQuantity::isNegative() const { + return (flags & NEGATIVE_FLAG) != 0; +} + +bool DecimalQuantity::isInfinite() const { + return (flags & INFINITY_FLAG) != 0; +} + +bool DecimalQuantity::isNaN() const { + return (flags & NAN_FLAG) != 0; +} + +bool DecimalQuantity::isZero() const { + return precision == 0; +} + +DecimalQuantity &DecimalQuantity::setToInt(int32_t n) { + setBcdToZero(); + flags = 0; + if (n < 0) { + flags |= NEGATIVE_FLAG; + n = -n; + } + if (n != 0) { + _setToInt(n); + compact(); + } + return *this; +} + +void DecimalQuantity::_setToInt(int32_t n) { + if (n == INT32_MIN) { + readLongToBcd(-static_cast(n)); + } else { + readIntToBcd(n); + } +} + +DecimalQuantity &DecimalQuantity::setToLong(int64_t n) { + setBcdToZero(); + flags = 0; + if (n < 0) { + flags |= NEGATIVE_FLAG; + n = -n; + } + if (n != 0) { + _setToLong(n); + compact(); + } + return *this; +} + +void DecimalQuantity::_setToLong(int64_t n) { + if (n == INT64_MIN) { + static const char *int64minStr = "9.223372036854775808E+18"; + decNumber dn; + stringToDecNumber(int64minStr, dn); + readDecNumberToBcd(&dn); + } else if (n <= INT32_MAX) { + readIntToBcd(static_cast(n)); + } else { + readLongToBcd(n); + } +} + +DecimalQuantity &DecimalQuantity::setToDouble(double n) { + setBcdToZero(); + flags = 0; + // signbit() from handles +0.0 vs -0.0 + if (std::signbit(n) != 0) { + flags |= NEGATIVE_FLAG; + n = -n; + } + if (std::isnan(n) != 0) { + flags |= NAN_FLAG; + } else if (std::isfinite(n) == 0) { + flags |= INFINITY_FLAG; + } else if (n != 0) { + _setToDoubleFast(n); + compact(); + } + return *this; +} + +void DecimalQuantity::_setToDoubleFast(double n) { + isApproximate = true; + origDouble = n; + origDelta = 0; + + // Make sure the double is an IEEE 754 double. If not, fall back to the slow path right now. + // TODO: Make a fast path for other types of doubles. + if (!std::numeric_limits::is_iec559) { + convertToAccurateDouble(); + // Turn off the approximate double flag, since the value is now exact. + isApproximate = false; + origDouble = 0.0; + return; + } + + // To get the bits from the double, use memcpy, which takes care of endianness. + uint64_t ieeeBits; + uprv_memcpy(&ieeeBits, &n, sizeof(n)); + int32_t exponent = static_cast((ieeeBits & 0x7ff0000000000000L) >> 52) - 0x3ff; + + // Not all integers can be represented exactly for exponent > 52 + if (exponent <= 52 && static_cast(n) == n) { + _setToLong(static_cast(n)); + return; + } + + // 3.3219... is log2(10) + auto fracLength = static_cast ((52 - exponent) / 3.32192809489); + if (fracLength >= 0) { + int32_t i = fracLength; + // 1e22 is the largest exact double. + for (; i >= 22; i -= 22) n *= 1e22; + n *= DOUBLE_MULTIPLIERS[i]; + } else { + int32_t i = fracLength; + // 1e22 is the largest exact double. + for (; i <= -22; i += 22) n /= 1e22; + n /= DOUBLE_MULTIPLIERS[-i]; + } + auto result = static_cast(round(n)); + if (result != 0) { + _setToLong(result); + scale -= fracLength; + } +} + +void DecimalQuantity::convertToAccurateDouble() { + double n = origDouble; + U_ASSERT(n != 0); + int32_t delta = origDelta; + setBcdToZero(); + + // Call the slow oracle function (Double.toString in Java, sprintf in C++). + // The constant DBL_DIG defines a platform-specific number of digits in a double. + // However, this tends to be too low (see #11318). Instead, we always use 14 digits. + char dstr[14 + 8]; // Extra space for '+', '.', e+NNN, and '\0' + sprintf(dstr, "%+1.14e", n); + + // uprv_decNumberFromString() will parse the string expecting '.' as a + // decimal separator, however sprintf() can use ',' in certain locales. + // Overwrite a ',' with '.' here before proceeding. + char *decimalSeparator = strchr(dstr, ','); + if (decimalSeparator != nullptr) { + *decimalSeparator = '.'; + } + + decNumber dn; + stringToDecNumber(dstr, dn); + _setToDecNumber(&dn); + + scale += delta; + explicitExactDouble = true; +} + +DecimalQuantity &DecimalQuantity::setToDecNumber(StringPiece n) { + setBcdToZero(); + flags = 0; + + decNumber dn; + stringToDecNumber(n, dn); + + // The code path for decNumber is modeled after BigDecimal in Java. + if (decNumberIsNegative(&dn)) { + flags |= NEGATIVE_FLAG; + } + if (!decNumberIsZero(&dn)) { + _setToDecNumber(&dn); + } + return *this; +} + +void DecimalQuantity::_setToDecNumber(decNumber *n) { + // Java fastpaths for ints here. In C++, just always read directly from the decNumber. + readDecNumberToBcd(n); + compact(); +} + +int64_t DecimalQuantity::toLong() const { + int64_t result = 0L; + for (int32_t magnitude = scale + precision - 1; magnitude >= 0; magnitude--) { + result = result * 10 + getDigitPos(magnitude - scale); + } + return result; +} + +int64_t DecimalQuantity::toFractionLong(bool includeTrailingZeros) const { + int64_t result = 0L; + int32_t magnitude = -1; + for (; (magnitude >= scale || (includeTrailingZeros && magnitude >= rReqPos)) && + magnitude >= rOptPos; magnitude--) { + result = result * 10 + getDigitPos(magnitude - scale); + } + return result; +} + +double DecimalQuantity::toDouble() const { + if (isApproximate) { + return toDoubleFromOriginal(); + } + + if (isNaN()) { + return NAN; + } else if (isInfinite()) { + return isNegative() ? -INFINITY : INFINITY; + } + + int64_t tempLong = 0L; + int32_t lostDigits = precision - (precision < 17 ? precision : 17); + for (int shift = precision - 1; shift >= lostDigits; shift--) { + tempLong = tempLong * 10 + getDigitPos(shift); + } + double result = tempLong; + int32_t _scale = scale + lostDigits; + if (_scale >= 0) { + // 1e22 is the largest exact double. + int32_t i = _scale; + for (; i >= 22; i -= 22) result *= 1e22; + result *= DOUBLE_MULTIPLIERS[i]; + } else { + // 1e22 is the largest exact double. + int32_t i = _scale; + for (; i <= -22; i += 22) result /= 1e22; + result /= DOUBLE_MULTIPLIERS[-i]; + } + if (isNegative()) { result = -result; } + return result; +} + +double DecimalQuantity::toDoubleFromOriginal() const { + double result = origDouble; + int32_t delta = origDelta; + if (delta >= 0) { + // 1e22 is the largest exact double. + for (; delta >= 22; delta -= 22) result *= 1e22; + result *= DOUBLE_MULTIPLIERS[delta]; + } else { + // 1e22 is the largest exact double. + for (; delta <= -22; delta += 22) result /= 1e22; + result /= DOUBLE_MULTIPLIERS[-delta]; + } + if (isNegative()) { result *= -1; } + return result; +} + +void DecimalQuantity::roundToMagnitude(int32_t magnitude, RoundingMode roundingMode, UErrorCode& status) { + // The position in the BCD at which rounding will be performed; digits to the right of position + // will be rounded away. + // TODO: Andy: There was a test failure because of integer overflow here. Should I do + // "safe subtraction" everywhere in the code? What's the nicest way to do it? + int position = safeSubtract(magnitude, scale); + + if (position <= 0 && !isApproximate) { + // All digits are to the left of the rounding magnitude. + } else if (precision == 0) { + // No rounding for zero. + } else { + // Perform rounding logic. + // "leading" = most significant digit to the right of rounding + // "trailing" = least significant digit to the left of rounding + int8_t leadingDigit = getDigitPos(safeSubtract(position, 1)); + int8_t trailingDigit = getDigitPos(position); + + // Compute which section of the number we are in. + // EDGE means we are at the bottom or top edge, like 1.000 or 1.999 (used by doubles) + // LOWER means we are between the bottom edge and the midpoint, like 1.391 + // MIDPOINT means we are exactly in the middle, like 1.500 + // UPPER means we are between the midpoint and the top edge, like 1.916 + roundingutils::Section section = roundingutils::SECTION_MIDPOINT; + if (!isApproximate) { + if (leadingDigit < 5) { + section = roundingutils::SECTION_LOWER; + } else if (leadingDigit > 5) { + section = roundingutils::SECTION_UPPER; + } else { + for (int p = safeSubtract(position, 2); p >= 0; p--) { + if (getDigitPos(p) != 0) { + section = roundingutils::SECTION_UPPER; + break; + } + } + } + } else { + int32_t p = safeSubtract(position, 2); + int32_t minP = uprv_max(0, precision - 14); + if (leadingDigit == 0) { + section = roundingutils::SECTION_LOWER_EDGE; + for (; p >= minP; p--) { + if (getDigitPos(p) != 0) { + section = roundingutils::SECTION_LOWER; + break; + } + } + } else if (leadingDigit == 4) { + for (; p >= minP; p--) { + if (getDigitPos(p) != 9) { + section = roundingutils::SECTION_LOWER; + break; + } + } + } else if (leadingDigit == 5) { + for (; p >= minP; p--) { + if (getDigitPos(p) != 0) { + section = roundingutils::SECTION_UPPER; + break; + } + } + } else if (leadingDigit == 9) { + section = roundingutils::SECTION_UPPER_EDGE; + for (; p >= minP; p--) { + if (getDigitPos(p) != 9) { + section = roundingutils::SECTION_UPPER; + break; + } + } + } else if (leadingDigit < 5) { + section = roundingutils::SECTION_LOWER; + } else { + section = roundingutils::SECTION_UPPER; + } + + bool roundsAtMidpoint = roundingutils::roundsAtMidpoint(roundingMode); + if (safeSubtract(position, 1) < precision - 14 || + (roundsAtMidpoint && section == roundingutils::SECTION_MIDPOINT) || + (!roundsAtMidpoint && section < 0 /* i.e. at upper or lower edge */)) { + // Oops! This means that we have to get the exact representation of the double, because + // the zone of uncertainty is along the rounding boundary. + convertToAccurateDouble(); + roundToMagnitude(magnitude, roundingMode, status); // start over + return; + } + + // Turn off the approximate double flag, since the value is now confirmed to be exact. + isApproximate = false; + origDouble = 0.0; + origDelta = 0; + + if (position <= 0) { + // All digits are to the left of the rounding magnitude. + return; + } + + // Good to continue rounding. + if (section == -1) { section = roundingutils::SECTION_LOWER; } + if (section == -2) { section = roundingutils::SECTION_UPPER; } + } + + bool roundDown = roundingutils::getRoundingDirection((trailingDigit % 2) == 0, + isNegative(), + section, + roundingMode, + status); + if (U_FAILURE(status)) { + return; + } + + // Perform truncation + if (position >= precision) { + setBcdToZero(); + scale = magnitude; + } else { + shiftRight(position); + } + + // Bubble the result to the higher digits + if (!roundDown) { + if (trailingDigit == 9) { + int bubblePos = 0; + // Note: in the long implementation, the most digits BCD can have at this point is 15, + // so bubblePos <= 15 and getDigitPos(bubblePos) is safe. + for (; getDigitPos(bubblePos) == 9; bubblePos++) {} + shiftRight(bubblePos); // shift off the trailing 9s + } + int8_t digit0 = getDigitPos(0); + U_ASSERT(digit0 != 9); + setDigitPos(0, static_cast(digit0 + 1)); + precision += 1; // in case an extra digit got added + } + + compact(); + } +} + +void DecimalQuantity::roundToInfinity() { + if (isApproximate) { + convertToAccurateDouble(); + } +} + +void DecimalQuantity::appendDigit(int8_t value, int32_t leadingZeros, bool appendAsInteger) { + U_ASSERT(leadingZeros >= 0); + + // Zero requires special handling to maintain the invariant that the least-significant digit + // in the BCD is nonzero. + if (value == 0) { + if (appendAsInteger && precision != 0) { + scale += leadingZeros + 1; + } + return; + } + + // Deal with trailing zeros + if (scale > 0) { + leadingZeros += scale; + if (appendAsInteger) { + scale = 0; + } + } + + // Append digit + shiftLeft(leadingZeros + 1); + setDigitPos(0, value); + + // Fix scale if in integer mode + if (appendAsInteger) { + scale += leadingZeros + 1; + } +} + +UnicodeString DecimalQuantity::toPlainString() const { + UnicodeString sb; + if (isNegative()) { + sb.append('-'); + } + for (int m = getUpperDisplayMagnitude(); m >= getLowerDisplayMagnitude(); m--) { + sb.append(getDigit(m) + '0'); + if (m == 0) { sb.append('.'); } + } + return sb; +} + +//////////////////////////////////////////////////// +/// End of DecimalQuantity_AbstractBCD.java /// +/// Start of DecimalQuantity_DualStorageBCD.java /// +//////////////////////////////////////////////////// + +int8_t DecimalQuantity::getDigitPos(int32_t position) const { + if (usingBytes) { + if (position < 0 || position > precision) { return 0; } + return fBCD.bcdBytes.ptr[position]; + } else { + if (position < 0 || position >= 16) { return 0; } + return (int8_t) ((fBCD.bcdLong >> (position * 4)) & 0xf); + } +} + +void DecimalQuantity::setDigitPos(int32_t position, int8_t value) { + U_ASSERT(position >= 0); + if (usingBytes) { + ensureCapacity(position + 1); + fBCD.bcdBytes.ptr[position] = value; + } else if (position >= 16) { + switchStorage(); + ensureCapacity(position + 1); + fBCD.bcdBytes.ptr[position] = value; + } else { + int shift = position * 4; + fBCD.bcdLong = (fBCD.bcdLong & ~(0xfL << shift)) | ((long) value << shift); + } +} + +void DecimalQuantity::shiftLeft(int32_t numDigits) { + if (!usingBytes && precision + numDigits > 16) { + switchStorage(); + } + if (usingBytes) { + ensureCapacity(precision + numDigits); + int i = precision + numDigits - 1; + for (; i >= numDigits; i--) { + fBCD.bcdBytes.ptr[i] = fBCD.bcdBytes.ptr[i - numDigits]; + } + for (; i >= 0; i--) { + fBCD.bcdBytes.ptr[i] = 0; + } + } else { + fBCD.bcdLong <<= (numDigits * 4); + } + scale -= numDigits; + precision += numDigits; +} + +void DecimalQuantity::shiftRight(int32_t numDigits) { + if (usingBytes) { + int i = 0; + for (; i < precision - numDigits; i++) { + fBCD.bcdBytes.ptr[i] = fBCD.bcdBytes.ptr[i + numDigits]; + } + for (; i < precision; i++) { + fBCD.bcdBytes.ptr[i] = 0; + } + } else { + fBCD.bcdLong >>= (numDigits * 4); + } + scale += numDigits; + precision -= numDigits; +} + +void DecimalQuantity::setBcdToZero() { + if (usingBytes) { + delete[] fBCD.bcdBytes.ptr; + fBCD.bcdBytes.ptr = nullptr; + usingBytes = false; + } + fBCD.bcdLong = 0L; + scale = 0; + precision = 0; + isApproximate = false; + origDouble = 0; + origDelta = 0; +} + +void DecimalQuantity::readIntToBcd(int32_t n) { + U_ASSERT(n != 0); + // ints always fit inside the long implementation. + uint64_t result = 0L; + int i = 16; + for (; n != 0; n /= 10, i--) { + result = (result >> 4) + ((static_cast(n) % 10) << 60); + } + U_ASSERT(!usingBytes); + fBCD.bcdLong = result >> (i * 4); + scale = 0; + precision = 16 - i; +} + +void DecimalQuantity::readLongToBcd(int64_t n) { + U_ASSERT(n != 0); + if (n >= 10000000000000000L) { + ensureCapacity(); + int i = 0; + for (; n != 0L; n /= 10L, i++) { + fBCD.bcdBytes.ptr[i] = static_cast(n % 10); + } + U_ASSERT(usingBytes); + scale = 0; + precision = i; + } else { + uint64_t result = 0L; + int i = 16; + for (; n != 0L; n /= 10L, i--) { + result = (result >> 4) + ((n % 10) << 60); + } + U_ASSERT(i >= 0); + U_ASSERT(!usingBytes); + fBCD.bcdLong = result >> (i * 4); + scale = 0; + precision = 16 - i; + } +} + +void DecimalQuantity::readDecNumberToBcd(decNumber *dn) { + if (dn->digits > 16) { + ensureCapacity(dn->digits); + for (int32_t i = 0; i < dn->digits; i++) { + fBCD.bcdBytes.ptr[i] = dn->lsu[i]; + } + } else { + uint64_t result = 0L; + for (int32_t i = 0; i < dn->digits; i++) { + result |= static_cast(dn->lsu[i]) << (4 * i); + } + fBCD.bcdLong = result; + } + scale = dn->exponent; + precision = dn->digits; +} + +void DecimalQuantity::compact() { + if (usingBytes) { + int32_t delta = 0; + for (; delta < precision && fBCD.bcdBytes.ptr[delta] == 0; delta++); + if (delta == precision) { + // Number is zero + setBcdToZero(); + return; + } else { + // Remove trailing zeros + shiftRight(delta); + } + + // Compute precision + int32_t leading = precision - 1; + for (; leading >= 0 && fBCD.bcdBytes.ptr[leading] == 0; leading--); + precision = leading + 1; + + // Switch storage mechanism if possible + if (precision <= 16) { + switchStorage(); + } + + } else { + if (fBCD.bcdLong == 0L) { + // Number is zero + setBcdToZero(); + return; + } + + // Compact the number (remove trailing zeros) + // TODO: Use a more efficient algorithm here and below. There is a logarithmic one. + int32_t delta = 0; + for (; delta < precision && getDigitPos(delta) == 0; delta++); + fBCD.bcdLong >>= delta * 4; + scale += delta; + + // Compute precision + int32_t leading = precision - 1; + for (; leading >= 0 && getDigitPos(leading) == 0; leading--); + precision = leading + 1; + } +} + +void DecimalQuantity::ensureCapacity() { + ensureCapacity(40); +} + +void DecimalQuantity::ensureCapacity(int32_t capacity) { + if (capacity == 0) { return; } + int32_t oldCapacity = usingBytes ? fBCD.bcdBytes.len : 0; + if (!usingBytes) { + // TODO: There is nothing being done to check for memory allocation failures. + fBCD.bcdBytes.ptr = new int8_t[capacity]; + fBCD.bcdBytes.len = capacity; + // Initialize the byte array to zeros (this is done automatically in Java) + uprv_memset(fBCD.bcdBytes.ptr, 0, capacity * sizeof(int8_t)); + } else if (oldCapacity < capacity) { + auto bcd1 = new int8_t[capacity * 2]; + uprv_memcpy(bcd1, fBCD.bcdBytes.ptr, oldCapacity * sizeof(int8_t)); + // Initialize the rest of the byte array to zeros (this is done automatically in Java) + uprv_memset(fBCD.bcdBytes.ptr + oldCapacity, 0, (capacity - oldCapacity) * sizeof(int8_t)); + delete[] fBCD.bcdBytes.ptr; + fBCD.bcdBytes.ptr = bcd1; + fBCD.bcdBytes.len = capacity * 2; + } + usingBytes = true; +} + +void DecimalQuantity::switchStorage() { + if (usingBytes) { + // Change from bytes to long + uint64_t bcdLong = 0L; + for (int i = precision - 1; i >= 0; i--) { + bcdLong <<= 4; + bcdLong |= fBCD.bcdBytes.ptr[i]; + } + delete[] fBCD.bcdBytes.ptr; + fBCD.bcdBytes.ptr = nullptr; + fBCD.bcdLong = bcdLong; + usingBytes = false; + } else { + // Change from long to bytes + // Copy the long into a local variable since it will get munged when we allocate the bytes + uint64_t bcdLong = fBCD.bcdLong; + ensureCapacity(); + for (int i = 0; i < precision; i++) { + fBCD.bcdBytes.ptr[i] = static_cast(bcdLong & 0xf); + bcdLong >>= 4; + } + U_ASSERT(usingBytes); + } +} + +void DecimalQuantity::copyBcdFrom(const DecimalQuantity &other) { + setBcdToZero(); + if (other.usingBytes) { + ensureCapacity(other.precision); + uprv_memcpy(fBCD.bcdBytes.ptr, other.fBCD.bcdBytes.ptr, other.precision * sizeof(int8_t)); + } else { + fBCD.bcdLong = other.fBCD.bcdLong; + } +} + +const char16_t* DecimalQuantity::checkHealth() const { + if (usingBytes) { + if (precision == 0) { return u"Zero precision but we are in byte mode"; } + int32_t capacity = fBCD.bcdBytes.len; + if (precision > capacity) { return u"Precision exceeds length of byte array"; } + if (getDigitPos(precision - 1) == 0) { return u"Most significant digit is zero in byte mode"; } + if (getDigitPos(0) == 0) { return u"Least significant digit is zero in long mode"; } + for (int i = 0; i < precision; i++) { + if (getDigitPos(i) >= 10) { return u"Digit exceeding 10 in byte array"; } + if (getDigitPos(i) < 0) { return u"Digit below 0 in byte array"; } + } + for (int i = precision; i < capacity; i++) { + if (getDigitPos(i) != 0) { return u"Nonzero digits outside of range in byte array"; } + } + } else { + if (precision == 0 && fBCD.bcdLong != 0) { + return u"Value in bcdLong even though precision is zero"; + } + if (precision > 16) { return u"Precision exceeds length of long"; } + if (precision != 0 && getDigitPos(precision - 1) == 0) { + return u"Most significant digit is zero in long mode"; + } + if (precision != 0 && getDigitPos(0) == 0) { + return u"Least significant digit is zero in long mode"; + } + for (int i = 0; i < precision; i++) { + if (getDigitPos(i) >= 10) { return u"Digit exceeding 10 in long"; } + if (getDigitPos(i) < 0) { return u"Digit below 0 in long (?!)"; } + } + for (int i = precision; i < 16; i++) { + if (getDigitPos(i) != 0) { return u"Nonzero digits outside of range in long"; } + } + } + + // No error + return nullptr; +} + +UnicodeString DecimalQuantity::toString() const { + auto digits = new char[precision + 1]; + for (int32_t i = 0; i < precision; i++) { + digits[i] = getDigitPos(precision - i - 1) + '0'; + } + digits[precision] = 0; + char buffer8[100]; + snprintf( + buffer8, + 100, + "", + (lOptPos > 999 ? 999 : lOptPos), + lReqPos, + rReqPos, + (rOptPos < -999 ? -999 : rOptPos), + (usingBytes ? "bytes" : "long"), + (precision == 0 ? "0" : digits), + "E", + scale); + delete[] digits; + + // Convert from char to char16_t to avoid codepage conversion + char16_t buffer16[100]; + for (int32_t i = 0; i < 100; i++) { + buffer16[i] = static_cast(buffer8[i]); + } + return UnicodeString(buffer16); +} + +UnicodeString DecimalQuantity::toNumberString() const { + auto digits = new char[precision + 11]; + for (int32_t i = 0; i < precision; i++) { + digits[i] = getDigitPos(precision - i - 1) + '0'; + } + snprintf(digits + precision, 11, "E%d", scale); + UnicodeString ret(digits); + delete[] digits; + return ret; +} diff --git a/icu4c/source/i18n/number_decimalquantity.h b/icu4c/source/i18n/number_decimalquantity.h new file mode 100644 index 0000000000..ec0d69ebda --- /dev/null +++ b/icu4c/source/i18n/number_decimalquantity.h @@ -0,0 +1,432 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#ifndef NUMBERFORMAT_DECIMALQUANTITY_H +#define NUMBERFORMAT_DECIMALQUANTITY_H + +#include +#include +#include +#include +#include +#include "number_types.h" + +U_NAMESPACE_BEGIN namespace number { +namespace impl { + +/** + * An class for representing a number to be processed by the decimal formatting pipeline. Includes + * methods for rounding, plural rules, and decimal digit extraction. + * + *

By design, this is NOT IMMUTABLE and NOT THREAD SAFE. It is intended to be an intermediate + * object holding state during a pass through the decimal formatting pipeline. + * + *

Represents numbers and digit display properties using Binary Coded Decimal (BCD). + * + *

Java has multiple implementations for testing, but C++ has only one implementation. + */ +class DecimalQuantity : public IFixedDecimal, public UMemory { + public: + /** Copy constructor. */ + DecimalQuantity(const DecimalQuantity &other); + + DecimalQuantity(); + + ~DecimalQuantity(); + + /** + * Sets this instance to be equal to another instance. + * + * @param other The instance to copy from. + */ + DecimalQuantity &operator=(const DecimalQuantity &other); + + /** + * Sets the minimum and maximum integer digits that this {@link DecimalQuantity} should generate. + * This method does not perform rounding. + * + * @param minInt The minimum number of integer digits. + * @param maxInt The maximum number of integer digits. + */ + void setIntegerLength(int32_t minInt, int32_t maxInt); + + /** + * Sets the minimum and maximum fraction digits that this {@link DecimalQuantity} should generate. + * This method does not perform rounding. + * + * @param minFrac The minimum number of fraction digits. + * @param maxFrac The maximum number of fraction digits. + */ + void setFractionLength(int32_t minFrac, int32_t maxFrac); + + /** + * Rounds the number to a specified interval, such as 0.05. + * + *

If rounding to a power of ten, use the more efficient {@link #roundToMagnitude} instead. + * + * @param roundingIncrement The increment to which to round. + * @param mathContext The {@link RoundingMode} to use if rounding is necessary. + */ + void roundToIncrement(double roundingIncrement, RoundingMode roundingMode, UErrorCode& status); + + /** + * Rounds the number to a specified magnitude (power of ten). + * + * @param roundingMagnitude The power of ten to which to round. For example, a value of -2 will + * round to 2 decimal places. + * @param mathContext The {@link RoundingMode} to use if rounding is necessary. + */ + void roundToMagnitude(int32_t magnitude, RoundingMode roundingMode, UErrorCode& status); + + /** + * Rounds the number to an infinite number of decimal points. This has no effect except for + * forcing the double in {@link DecimalQuantity_AbstractBCD} to adopt its exact representation. + */ + void roundToInfinity(); + + /** + * Multiply the internal value. + * + * @param multiplicand The value by which to multiply. + */ + void multiplyBy(int32_t multiplicand); + + /** + * Scales the number by a power of ten. For example, if the value is currently "1234.56", calling + * this method with delta=-3 will change the value to "1.23456". + * + * @param delta The number of magnitudes of ten to change by. + */ + void adjustMagnitude(int32_t delta); + + /** + * @return The power of ten corresponding to the most significant nonzero digit. + * The number must not be zero. + */ + int32_t getMagnitude() const; + + /** @return Whether the value represented by this {@link DecimalQuantity} is zero. */ + bool isZero() const; + + /** @return Whether the value represented by this {@link DecimalQuantity} is less than zero. */ + bool isNegative() const; + + /** @return Whether the value represented by this {@link DecimalQuantity} is infinite. */ + bool isInfinite() const override; + + /** @return Whether the value represented by this {@link DecimalQuantity} is not a number. */ + bool isNaN() const override; + + int64_t toLong() const; + + int64_t toFractionLong(bool includeTrailingZeros) const; + + /** @return The value contained in this {@link DecimalQuantity} approximated as a double. */ + double toDouble() const; + + DecimalQuantity &setToInt(int32_t n); + + DecimalQuantity &setToLong(int64_t n); + + DecimalQuantity &setToDouble(double n); + + /** decNumber is similar to BigDecimal in Java. */ + + DecimalQuantity &setToDecNumber(StringPiece n); + + /** + * Appends a digit, optionally with one or more leading zeros, to the end of the value represented + * by this DecimalQuantity. + * + *

The primary use of this method is to construct numbers during a parsing loop. It allows + * parsing to take advantage of the digit list infrastructure primarily designed for formatting. + * + * @param value The digit to append. + * @param leadingZeros The number of zeros to append before the digit. For example, if the value + * in this instance starts as 12.3, and you append a 4 with 1 leading zero, the value becomes + * 12.304. + * @param appendAsInteger If true, increase the magnitude of existing digits to make room for the + * new digit. If false, append to the end like a fraction digit. If true, there must not be + * any fraction digits already in the number. + * @internal + * @deprecated This API is ICU internal only. + */ + void appendDigit(int8_t value, int32_t leadingZeros, bool appendAsInteger); + + /** + * Computes the plural form for this number based on the specified set of rules. + * + * @param rules A {@link PluralRules} object representing the set of rules. + * @return The {@link StandardPlural} according to the PluralRules. If the plural form is not in + * the set of standard plurals, {@link StandardPlural#OTHER} is returned instead. + */ + StandardPlural::Form getStandardPlural(const PluralRules *rules) const; + + double getPluralOperand(PluralOperand operand) const override; + + /** + * Gets the digit at the specified magnitude. For example, if the represented number is 12.3, + * getDigit(-1) returns 3, since 3 is the digit corresponding to 10^-1. + * + * @param magnitude The magnitude of the digit. + * @return The digit at the specified magnitude. + */ + int8_t getDigit(int32_t magnitude) const; + + /** + * Gets the largest power of ten that needs to be displayed. The value returned by this function + * will be bounded between minInt and maxInt. + * + * @return The highest-magnitude digit to be displayed. + */ + int32_t getUpperDisplayMagnitude() const; + + /** + * Gets the smallest power of ten that needs to be displayed. The value returned by this function + * will be bounded between -minFrac and -maxFrac. + * + * @return The lowest-magnitude digit to be displayed. + */ + int32_t getLowerDisplayMagnitude() const; + + int32_t fractionCount() const; + + int32_t fractionCountWithoutTrailingZeros() const; + + void clear(); + + /** This method is for internal testing only. */ + uint64_t getPositionFingerprint() const; + +// /** +// * If the given {@link FieldPosition} is a {@link UFieldPosition}, populates it with the fraction +// * length and fraction long value. If the argument is not a {@link UFieldPosition}, nothing +// * happens. +// * +// * @param fp The {@link UFieldPosition} to populate. +// */ +// void populateUFieldPosition(FieldPosition fp); + + /** + * Checks whether the bytes stored in this instance are all valid. For internal unit testing only. + * + * @return An error message if this instance is invalid, or null if this instance is healthy. + */ + const char16_t* checkHealth() const; + + UnicodeString toString() const; + + /* Returns the string in exponential notation. */ + UnicodeString toNumberString() const; + + /* Returns the string without exponential notation. Slightly slower than toNumberString(). */ + UnicodeString toPlainString() const; + + /** Visible for testing */ + inline bool isUsingBytes() { return usingBytes; } + + /** Visible for testing */ + inline bool isExplicitExactDouble() { return explicitExactDouble; }; + + private: + /** + * The power of ten corresponding to the least significant digit in the BCD. For example, if this + * object represents the number "3.14", the BCD will be "0x314" and the scale will be -2. + * + *

Note that in {@link java.math.BigDecimal}, the scale is defined differently: the number of + * digits after the decimal place, which is the negative of our definition of scale. + */ + int32_t scale; + + /** + * The number of digits in the BCD. For example, "1007" has BCD "0x1007" and precision 4. The + * maximum precision is 16 since a long can hold only 16 digits. + * + *

This value must be re-calculated whenever the value in bcd changes by using {@link + * #computePrecisionAndCompact()}. + */ + int32_t precision; + + /** + * A bitmask of properties relating to the number represented by this object. + * + * @see #NEGATIVE_FLAG + * @see #INFINITY_FLAG + * @see #NAN_FLAG + */ + int8_t flags; + + // The following three fields relate to the double-to-ascii fast path algorithm. + // When a double is given to DecimalQuantityBCD, it is converted to using a fast algorithm. The + // fast algorithm guarantees correctness to only the first ~12 digits of the double. The process + // of rounding the number ensures that the converted digits are correct, falling back to a slow- + // path algorithm if required. Therefore, if a DecimalQuantity is constructed from a double, it + // is *required* that roundToMagnitude(), roundToIncrement(), or roundToInfinity() is called. If + // you don't round, assertions will fail in certain other methods if you try calling them. + + /** + * Whether the value in the BCD comes from the double fast path without having been rounded to + * ensure correctness + */ + UBool isApproximate; + + /** + * The original number provided by the user and which is represented in BCD. Used when we need to + * re-compute the BCD for an exact double representation. + */ + double origDouble; + + /** + * The change in magnitude relative to the original double. Used when we need to re-compute the + * BCD for an exact double representation. + */ + int32_t origDelta; + + // Four positions: left optional '(', left required '[', right required ']', right optional ')'. + // These four positions determine which digits are displayed in the output string. They do NOT + // affect rounding. These positions are internal-only and can be specified only by the public + // endpoints like setFractionLength, setIntegerLength, and setSignificantDigits, among others. + // + // * Digits between lReqPos and rReqPos are in the "required zone" and are always displayed. + // * Digits between lOptPos and rOptPos but outside the required zone are in the "optional zone" + // and are displayed unless they are trailing off the left or right edge of the number and + // have a numerical value of zero. In order to be "trailing", the digits need to be beyond + // the decimal point in their respective directions. + // * Digits outside of the "optional zone" are never displayed. + // + // See the table below for illustrative examples. + // + // +---------+---------+---------+---------+------------+------------------------+--------------+ + // | lOptPos | lReqPos | rReqPos | rOptPos | number | positions | en-US string | + // +---------+---------+---------+---------+------------+------------------------+--------------+ + // | 5 | 2 | -1 | -5 | 1234.567 | ( 12[34.5]67 ) | 1,234.567 | + // | 3 | 2 | -1 | -5 | 1234.567 | 1(2[34.5]67 ) | 234.567 | + // | 3 | 2 | -1 | -2 | 1234.567 | 1(2[34.5]6)7 | 234.56 | + // | 6 | 4 | 2 | -5 | 123456789. | 123(45[67]89. ) | 456,789. | + // | 6 | 4 | 2 | 1 | 123456789. | 123(45[67]8)9. | 456,780. | + // | -1 | -1 | -3 | -4 | 0.123456 | 0.1([23]4)56 | .0234 | + // | 6 | 4 | -2 | -2 | 12.3 | ( [ 12.3 ]) | 0012.30 | + // +---------+---------+---------+---------+------------+------------------------+--------------+ + // + int32_t lOptPos = INT32_MAX; + int32_t lReqPos = 0; + int32_t rReqPos = 0; + int32_t rOptPos = INT32_MIN; + + /** + * The BCD of the 16 digits of the number represented by this object. Every 4 bits of the long map + * to one digit. For example, the number "12345" in BCD is "0x12345". + * + *

Whenever bcd changes internally, {@link #compact()} must be called, except in special cases + * like setting the digit to zero. + */ + union { + struct { + int8_t *ptr; + int32_t len; + } bcdBytes; + uint64_t bcdLong; + } fBCD; + + bool usingBytes = false; + + /** + * Whether this {@link DecimalQuantity} has been explicitly converted to an exact double. true if + * backed by a double that was explicitly converted via convertToAccurateDouble; false otherwise. + * Used for testing. + */ + bool explicitExactDouble = false; + + /** + * Returns a single digit from the BCD list. No internal state is changed by calling this method. + * + * @param position The position of the digit to pop, counted in BCD units from the least + * significant digit. If outside the range supported by the implementation, zero is returned. + * @return The digit at the specified location. + */ + int8_t getDigitPos(int32_t position) const; + + /** + * Sets the digit in the BCD list. This method only sets the digit; it is the caller's + * responsibility to call {@link #compact} after setting the digit. + * + * @param position The position of the digit to pop, counted in BCD units from the least + * significant digit. If outside the range supported by the implementation, an AssertionError + * is thrown. + * @param value The digit to set at the specified location. + */ + void setDigitPos(int32_t position, int8_t value); + + /** + * Adds zeros to the end of the BCD list. This will result in an invalid BCD representation; it is + * the caller's responsibility to do further manipulation and then call {@link #compact}. + * + * @param numDigits The number of zeros to add. + */ + void shiftLeft(int32_t numDigits); + + void shiftRight(int32_t numDigits); + + /** + * Sets the internal representation to zero. Clears any values stored in scale, precision, + * hasDouble, origDouble, origDelta, and BCD data. + */ + void setBcdToZero(); + + /** + * Sets the internal BCD state to represent the value in the given int. The int is guaranteed to + * be either positive. The internal state is guaranteed to be empty when this method is called. + * + * @param n The value to consume. + */ + void readIntToBcd(int32_t n); + + /** + * Sets the internal BCD state to represent the value in the given long. The long is guaranteed to + * be either positive. The internal state is guaranteed to be empty when this method is called. + * + * @param n The value to consume. + */ + void readLongToBcd(int64_t n); + + void readDecNumberToBcd(decNumber *dn); + + void copyBcdFrom(const DecimalQuantity &other); + + /** + * Removes trailing zeros from the BCD (adjusting the scale as required) and then computes the + * precision. The precision is the number of digits in the number up through the greatest nonzero + * digit. + * + *

This method must always be called when bcd changes in order for assumptions to be correct in + * methods like {@link #fractionCount()}. + */ + void compact(); + + void _setToInt(int32_t n); + + void _setToLong(int64_t n); + + void _setToDoubleFast(double n); + + void _setToDecNumber(decNumber *n); + + void convertToAccurateDouble(); + + double toDoubleFromOriginal() const; + + /** Ensure that a byte array of at least 40 digits is allocated. */ + void ensureCapacity(); + + void ensureCapacity(int32_t capacity); + + /** Switches the internal storage mechanism between the 64-bit long and the byte array. */ + void switchStorage(); +}; + +} // namespace impl +} // namespace number +U_NAMESPACE_END + + +#endif //NUMBERFORMAT_DECIMALQUANTITY_H diff --git a/icu4c/source/i18n/number_decimfmtprops.cpp b/icu4c/source/i18n/number_decimfmtprops.cpp new file mode 100644 index 0000000000..f4009024c6 --- /dev/null +++ b/icu4c/source/i18n/number_decimfmtprops.cpp @@ -0,0 +1,94 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include "number_decimfmtprops.h" + +using namespace icu::number::impl; + +DecimalFormatProperties::DecimalFormatProperties() { + clear(); +} + +void DecimalFormatProperties::clear() { + compactStyle.nullify(); + currency.nullify(); + currencyPluralInfo.adoptInstead(nullptr); + currencyUsage.nullify(); + decimalPatternMatchRequired = false; + decimalSeparatorAlwaysShown = false; + exponentSignAlwaysShown = false; + formatWidth = -1; + groupingSize = -1; + magnitudeMultiplier = 0; + maximumFractionDigits = -1; + maximumIntegerDigits = -1; + maximumSignificantDigits = -1; + minimumExponentDigits = -1; + minimumFractionDigits = -1; + minimumGroupingDigits = -1; + minimumIntegerDigits = -1; + minimumSignificantDigits = -1; + multiplier = 0; + negativePrefix.setToBogus(); + negativePrefixPattern.setToBogus(); + negativeSuffix.setToBogus(); + negativeSuffixPattern.setToBogus(); + padPosition.nullify(); + padString.setToBogus(); + parseCaseSensitive = false; + parseIntegerOnly = false; + parseLenient = false; + parseNoExponent = false; + parseToBigDecimal = false; + positivePrefix.setToBogus(); + positivePrefixPattern.setToBogus(); + positiveSuffix.setToBogus(); + positiveSuffixPattern.setToBogus(); + roundingIncrement = 0.0; + roundingMode.nullify(); + secondaryGroupingSize = -1; + signAlwaysShown = false; +} + +bool DecimalFormatProperties::operator==(const DecimalFormatProperties &other) const { + bool eq = true; + eq = eq && compactStyle == other.compactStyle; + eq = eq && currency == other.currency; + eq = eq && currencyPluralInfo.getAlias() == other.currencyPluralInfo.getAlias(); + eq = eq && currencyUsage == other.currencyUsage; + eq = eq && decimalPatternMatchRequired == other.decimalPatternMatchRequired; + eq = eq && decimalSeparatorAlwaysShown == other.decimalSeparatorAlwaysShown; + eq = eq && exponentSignAlwaysShown == other.exponentSignAlwaysShown; + eq = eq && formatWidth == other.formatWidth; + eq = eq && groupingSize == other.groupingSize; + eq = eq && magnitudeMultiplier == other.magnitudeMultiplier; + eq = eq && maximumFractionDigits == other.maximumFractionDigits; + eq = eq && maximumIntegerDigits == other.maximumIntegerDigits; + eq = eq && maximumSignificantDigits == other.maximumSignificantDigits; + eq = eq && minimumExponentDigits == other.minimumExponentDigits; + eq = eq && minimumFractionDigits == other.minimumFractionDigits; + eq = eq && minimumGroupingDigits == other.minimumGroupingDigits; + eq = eq && minimumIntegerDigits == other.minimumIntegerDigits; + eq = eq && minimumSignificantDigits == other.minimumSignificantDigits; + eq = eq && multiplier == other.multiplier; + eq = eq && negativePrefix == other.negativePrefix; + eq = eq && negativePrefixPattern == other.negativePrefixPattern; + eq = eq && negativeSuffix == other.negativeSuffix; + eq = eq && negativeSuffixPattern == other.negativeSuffixPattern; + eq = eq && padPosition == other.padPosition; + eq = eq && padString == other.padString; + eq = eq && parseCaseSensitive == other.parseCaseSensitive; + eq = eq && parseIntegerOnly == other.parseIntegerOnly; + eq = eq && parseLenient == other.parseLenient; + eq = eq && parseNoExponent == other.parseNoExponent; + eq = eq && parseToBigDecimal == other.parseToBigDecimal; + eq = eq && positivePrefix == other.positivePrefix; + eq = eq && positivePrefixPattern == other.positivePrefixPattern; + eq = eq && positiveSuffix == other.positiveSuffix; + eq = eq && positiveSuffixPattern == other.positiveSuffixPattern; + eq = eq && roundingIncrement == other.roundingIncrement; + eq = eq && roundingMode == other.roundingMode; + eq = eq && secondaryGroupingSize == other.secondaryGroupingSize; + eq = eq && signAlwaysShown == other.signAlwaysShown; + return eq; +} diff --git a/icu4c/source/i18n/number_decimfmtprops.h b/icu4c/source/i18n/number_decimfmtprops.h new file mode 100644 index 0000000000..8d8127875d --- /dev/null +++ b/icu4c/source/i18n/number_decimfmtprops.h @@ -0,0 +1,77 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#ifndef NUMBERFORMAT_PROPERTIES_H +#define NUMBERFORMAT_PROPERTIES_H + +#include "unicode/unistr.h" +#include +#include +#include +#include "unicode/unum.h" +#include "number_types.h" + +U_NAMESPACE_BEGIN +namespace number { +namespace impl { + +struct DecimalFormatProperties { + + public: + NullableValue compactStyle; + NullableValue currency; + CopyableLocalPointer currencyPluralInfo; + NullableValue currencyUsage; + bool decimalPatternMatchRequired; + bool decimalSeparatorAlwaysShown; + bool exponentSignAlwaysShown; + int32_t formatWidth; + int32_t groupingSize; + int32_t magnitudeMultiplier; + int32_t maximumFractionDigits; + int32_t maximumIntegerDigits; + int32_t maximumSignificantDigits; + int32_t minimumExponentDigits; + int32_t minimumFractionDigits; + int32_t minimumGroupingDigits; + int32_t minimumIntegerDigits; + int32_t minimumSignificantDigits; + int32_t multiplier; + UnicodeString negativePrefix; + UnicodeString negativePrefixPattern; + UnicodeString negativeSuffix; + UnicodeString negativeSuffixPattern; + NullableValue padPosition; + UnicodeString padString; + bool parseCaseSensitive; + bool parseIntegerOnly; + bool parseLenient; + bool parseNoExponent; + bool parseToBigDecimal; + //PluralRules pluralRules; + UnicodeString positivePrefix; + UnicodeString positivePrefixPattern; + UnicodeString positiveSuffix; + UnicodeString positiveSuffixPattern; + double roundingIncrement; + NullableValue roundingMode; + int32_t secondaryGroupingSize; + bool signAlwaysShown; + + DecimalFormatProperties(); + + //DecimalFormatProperties(const DecimalFormatProperties &other) = default; + + DecimalFormatProperties &operator=(const DecimalFormatProperties &other) = default; + + bool operator==(const DecimalFormatProperties &other) const; + + void clear(); +}; + +} // namespace impl +} // namespace number +U_NAMESPACE_END + + +#endif //NUMBERFORMAT_PROPERTIES_H diff --git a/icu4c/source/i18n/number_fluent.cpp b/icu4c/source/i18n/number_fluent.cpp new file mode 100644 index 0000000000..ecfd4f9fe8 --- /dev/null +++ b/icu4c/source/i18n/number_fluent.cpp @@ -0,0 +1,318 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include +#include "unicode/numberformatter.h" +#include "number_decimalquantity.h" +#include "number_results.h" +#include "number_formatimpl.h" + +using namespace icu::number; +using namespace icu::number::impl; + +template +Derived NumberFormatterSettings::notation(const Notation ¬ation) const { + Derived copy(*this); + // NOTE: Slicing is OK. + copy.fMacros.notation = notation; + return copy; +} + +template +Derived NumberFormatterSettings::unit(const icu::MeasureUnit &unit) const { + Derived copy(*this); + // NOTE: Slicing occurs here. However, CurrencyUnit can be restored from MeasureUnit. + // TimeUnit may be affected, but TimeUnit is not as relevant to number formatting. + copy.fMacros.unit = unit; + return copy; +} + +template +Derived NumberFormatterSettings::adoptUnit(const icu::MeasureUnit *unit) const { + Derived copy(*this); + // Just copy the unit into the MacroProps by value, and delete it since we have ownership. + // NOTE: Slicing occurs here. However, CurrencyUnit can be restored from MeasureUnit. + // TimeUnit may be affected, but TimeUnit is not as relevant to number formatting. + if (unit != nullptr) { + copy.fMacros.unit = *unit; + delete unit; + } + return copy; +} + +template +Derived NumberFormatterSettings::rounding(const Rounder &rounder) const { + Derived copy(*this); + // NOTE: Slicing is OK. + copy.fMacros.rounder = rounder; + return copy; +} + +template +Derived NumberFormatterSettings::grouping(const Grouper &grouper) const { + Derived copy(*this); + copy.fMacros.grouper = grouper; + return copy; +} + +template +Derived NumberFormatterSettings::integerWidth(const IntegerWidth &style) const { + Derived copy(*this); + copy.fMacros.integerWidth = style; + return copy; +} + +template +Derived NumberFormatterSettings::symbols(const DecimalFormatSymbols &symbols) const { + Derived copy(*this); + copy.fMacros.symbols.setTo(symbols); + return copy; +} + +template +Derived NumberFormatterSettings::adoptSymbols(const NumberingSystem *ns) const { + Derived copy(*this); + copy.fMacros.symbols.setTo(ns); + return copy; +} + +template +Derived NumberFormatterSettings::unitWidth(const UNumberUnitWidth &width) const { + Derived copy(*this); + copy.fMacros.unitWidth = width; + return copy; +} + +template +Derived NumberFormatterSettings::sign(const UNumberSignDisplay &style) const { + Derived copy(*this); + copy.fMacros.sign = style; + return copy; +} + +template +Derived NumberFormatterSettings::decimal(const UNumberDecimalSeparatorDisplay &style) const { + Derived copy(*this); + copy.fMacros.decimal = style; + return copy; +} + +template +Derived NumberFormatterSettings::padding(const Padder &padder) const { + Derived copy(*this); + copy.fMacros.padder = padder; + return copy; +} + +template +Derived NumberFormatterSettings::threshold(uint32_t threshold) const { + Derived copy(*this); + copy.fMacros.threshold = threshold; + return copy; +} + +// Declare all classes that implement NumberFormatterSettings +// See https://stackoverflow.com/a/495056/1407170 +template +class icu::number::NumberFormatterSettings; +template +class icu::number::NumberFormatterSettings; + + +UnlocalizedNumberFormatter NumberFormatter::with() { + UnlocalizedNumberFormatter result; + return result; +} + +LocalizedNumberFormatter NumberFormatter::withLocale(const Locale &locale) { + return with().locale(locale); +} + +// Make the child class constructor that takes the parent class call the parent class's copy constructor +UnlocalizedNumberFormatter::UnlocalizedNumberFormatter( + const NumberFormatterSettings &other) + : NumberFormatterSettings(other) { +} + +// Make the child class constructor that takes the parent class call the parent class's copy constructor +// For LocalizedNumberFormatter, also copy over the extra fields +LocalizedNumberFormatter::LocalizedNumberFormatter( + const NumberFormatterSettings &other) + : NumberFormatterSettings(other) { + // No additional copies required +} + +LocalizedNumberFormatter::LocalizedNumberFormatter(const MacroProps ¯os, const Locale &locale) { + fMacros = macros; + fMacros.locale = locale; +} + +LocalizedNumberFormatter UnlocalizedNumberFormatter::locale(const Locale &locale) const { + return LocalizedNumberFormatter(fMacros, locale); +} + +SymbolsWrapper::SymbolsWrapper(const SymbolsWrapper &other) { + doCopyFrom(other); +} + +SymbolsWrapper &SymbolsWrapper::operator=(const SymbolsWrapper &other) { + if (this == &other) { + return *this; + } + doCleanup(); + doCopyFrom(other); + return *this; +} + +SymbolsWrapper::~SymbolsWrapper() { + doCleanup(); +} + +void SymbolsWrapper::setTo(const DecimalFormatSymbols &dfs) { + doCleanup(); + fType = SYMPTR_DFS; + fPtr.dfs = new DecimalFormatSymbols(dfs); +} + +void SymbolsWrapper::setTo(const NumberingSystem *ns) { + doCleanup(); + fType = SYMPTR_NS; + fPtr.ns = ns; +} + +void SymbolsWrapper::doCopyFrom(const SymbolsWrapper &other) { + fType = other.fType; + switch (fType) { + case SYMPTR_NONE: + // No action necessary + break; + case SYMPTR_DFS: + // Memory allocation failures are exposed in copyErrorTo() + if (other.fPtr.dfs != nullptr) { + fPtr.dfs = new DecimalFormatSymbols(*other.fPtr.dfs); + } else { + fPtr.dfs = nullptr; + } + break; + case SYMPTR_NS: + // Memory allocation failures are exposed in copyErrorTo() + if (other.fPtr.ns != nullptr) { + fPtr.ns = new NumberingSystem(*other.fPtr.ns); + } else { + fPtr.ns = nullptr; + } + break; + } +} + +void SymbolsWrapper::doCleanup() { + switch (fType) { + case SYMPTR_NONE: + // No action necessary + break; + case SYMPTR_DFS: + delete fPtr.dfs; + break; + case SYMPTR_NS: + delete fPtr.ns; + break; + } +} + +bool SymbolsWrapper::isDecimalFormatSymbols() const { + return fType == SYMPTR_DFS; +} + +bool SymbolsWrapper::isNumberingSystem() const { + return fType == SYMPTR_NS; +} + +const DecimalFormatSymbols* SymbolsWrapper::getDecimalFormatSymbols() const { + U_ASSERT(fType == SYMPTR_DFS); + return fPtr.dfs; +} + +const NumberingSystem* SymbolsWrapper::getNumberingSystem() const { + U_ASSERT(fType == SYMPTR_NS); + return fPtr.ns; +} + +LocalizedNumberFormatter::~LocalizedNumberFormatter() { + delete fCompiled.load(); +} + +FormattedNumber LocalizedNumberFormatter::formatInt(int64_t value, UErrorCode &status) const { + if (U_FAILURE(status)) { return FormattedNumber(); } + auto results = new NumberFormatterResults(); + if (results == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + return FormattedNumber(); + } + results->quantity.setToLong(value); + return formatImpl(results, status); +} + +FormattedNumber LocalizedNumberFormatter::formatDouble(double value, UErrorCode &status) const { + if (U_FAILURE(status)) { return FormattedNumber(); } + auto results = new NumberFormatterResults(); + if (results == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + return FormattedNumber(); + } + results->quantity.setToDouble(value); + return formatImpl(results, status); +} + +FormattedNumber LocalizedNumberFormatter::formatDecimal(StringPiece value, UErrorCode &status) const { + if (U_FAILURE(status)) { return FormattedNumber(); } + auto results = new NumberFormatterResults(); + if (results == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + return FormattedNumber(); + } + results->quantity.setToDecNumber(value); + return formatImpl(results, status); +} + +FormattedNumber +LocalizedNumberFormatter::formatImpl(impl::NumberFormatterResults *results, UErrorCode &status) const { + uint32_t currentCount = fCallCount.load(); + if (currentCount <= fMacros.threshold && fMacros.threshold > 0) { + currentCount = const_cast(this)->fCallCount.fetch_add(1) + 1; + } + const NumberFormatterImpl *compiled; + if (currentCount == fMacros.threshold && fMacros.threshold > 0) { + compiled = NumberFormatterImpl::fromMacros(fMacros, status); + U_ASSERT(fCompiled.load() == nullptr); + const_cast(this)->fCompiled.store(compiled); + compiled->apply(results->quantity, results->string, status); + } else if ((compiled = fCompiled.load()) != nullptr) { + compiled->apply(results->quantity, results->string, status); + } else { + NumberFormatterImpl::applyStatic(fMacros, results->quantity, results->string, status); + } + + return FormattedNumber(results); +} + +UnicodeString FormattedNumber::toString() const { + return fResults->string.toUnicodeString(); +} + +Appendable &FormattedNumber::appendTo(Appendable &appendable) { + appendable.appendString(fResults->string.chars(), fResults->string.length()); + return appendable; +} + +void FormattedNumber::populateFieldPosition(FieldPosition &fieldPosition, UErrorCode &status) { + fResults->string.populateFieldPosition(fieldPosition, 0, status); +} + +void +FormattedNumber::populateFieldPositionIterator(FieldPositionIterator &iterator, UErrorCode &status) { + fResults->string.populateFieldPositionIterator(iterator, status); +} + +FormattedNumber::~FormattedNumber() { + delete fResults; +} diff --git a/icu4c/source/i18n/number_formatimpl.cpp b/icu4c/source/i18n/number_formatimpl.cpp new file mode 100644 index 0000000000..6d337a19ab --- /dev/null +++ b/icu4c/source/i18n/number_formatimpl.cpp @@ -0,0 +1,455 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include +#include +#include +#include +#include "number_formatimpl.h" +#include "unicode/numfmt.h" +#include "number_patternstring.h" +#include "number_utils.h" +#include "unicode/numberformatter.h" +#include "unicode/dcfmtsym.h" +#include "number_scientific.h" +#include "number_compact.h" + +using namespace icu::number::impl; + +namespace { + +// NOTE: In Java, the method to get a pattern from the resource bundle exists in NumberFormat. +// In C++, we have to implement that logic here. +// TODO: Make Java and C++ consistent? + +enum CldrPatternStyle { + CLDR_PATTERN_STYLE_DECIMAL, + CLDR_PATTERN_STYLE_CURRENCY, + CLDR_PATTERN_STYLE_ACCOUNTING, + CLDR_PATTERN_STYLE_PERCENT + // TODO: Consider scientific format. +}; + +const char16_t * +doGetPattern(UResourceBundle *res, const char *nsName, const char *patternKey, UErrorCode &publicStatus, + UErrorCode &localStatus) { + // Construct the path into the resource bundle + CharString key; + key.append("NumberElements/", publicStatus); + key.append(nsName, publicStatus); + key.append("/patterns/", publicStatus); + key.append(patternKey, publicStatus); + if (U_FAILURE(publicStatus)) { + return u""; + } + return ures_getStringByKeyWithFallback(res, key.data(), nullptr, &localStatus); +} + +const char16_t *getPatternForStyle(const Locale &locale, const char *nsName, CldrPatternStyle style, + UErrorCode &status) { + const char *patternKey; + switch (style) { + case CLDR_PATTERN_STYLE_DECIMAL: + patternKey = "decimalFormat"; + break; + case CLDR_PATTERN_STYLE_CURRENCY: + patternKey = "currencyFormat"; + break; + case CLDR_PATTERN_STYLE_ACCOUNTING: + patternKey = "accountingFormat"; + break; + case CLDR_PATTERN_STYLE_PERCENT: + default: + patternKey = "percentFormat"; + break; + } + LocalUResourceBundlePointer res(ures_open(nullptr, locale.getName(), &status)); + + // Attempt to get the pattern with the native numbering system. + UErrorCode localStatus = U_ZERO_ERROR; + const char16_t *pattern; + pattern = doGetPattern(res.getAlias(), nsName, patternKey, status, localStatus); + if (U_FAILURE(status)) { return u""; } + + // Fall back to latn if native numbering system does not have the right pattern + if (U_FAILURE(localStatus) && uprv_strcmp("latn", nsName) != 0) { + localStatus = U_ZERO_ERROR; + pattern = doGetPattern(res.getAlias(), "latn", patternKey, status, localStatus); + if (U_FAILURE(status)) { return u""; } + } + + return pattern; +} + +inline bool unitIsCurrency(const MeasureUnit &unit) { + return uprv_strcmp("currency", unit.getType()) == 0; +} + +inline bool unitIsNoUnit(const MeasureUnit &unit) { + return uprv_strcmp("none", unit.getType()) == 0; +} + +inline bool unitIsPercent(const MeasureUnit &unit) { + return uprv_strcmp("percent", unit.getSubtype()) == 0; +} + +inline bool unitIsPermille(const MeasureUnit &unit) { + return uprv_strcmp("permille", unit.getSubtype()) == 0; +} + +} // namespace + +NumberFormatterImpl *NumberFormatterImpl::fromMacros(const MacroProps ¯os, UErrorCode &status) { + return new NumberFormatterImpl(macros, true, status); +} + +void NumberFormatterImpl::applyStatic(const MacroProps ¯os, DecimalQuantity &inValue, + NumberStringBuilder &outString, UErrorCode &status) { + NumberFormatterImpl impl(macros, false, status); + impl.applyUnsafe(inValue, outString, status); +} + +// NOTE: C++ SPECIFIC DIFFERENCE FROM JAVA: +// The "safe" apply method uses a new MicroProps. In the MicroPropsGenerator, fMicros is copied into the new instance. +// The "unsafe" method simply re-uses fMicros, eliminating the extra copy operation. +// See MicroProps::processQuantity() for details. + +void NumberFormatterImpl::apply(DecimalQuantity &inValue, NumberStringBuilder &outString, + UErrorCode &status) const { + if (U_FAILURE(status)) { return; } + MicroProps micros; + fMicroPropsGenerator->processQuantity(inValue, micros, status); + if (U_FAILURE(status)) { return; } + microsToString(micros, inValue, outString, status); +} + +void NumberFormatterImpl::applyUnsafe(DecimalQuantity &inValue, NumberStringBuilder &outString, + UErrorCode &status) { + if (U_FAILURE(status)) { return; } + fMicroPropsGenerator->processQuantity(inValue, fMicros, status); + if (U_FAILURE(status)) { return; } + microsToString(fMicros, inValue, outString, status); +} + +NumberFormatterImpl::NumberFormatterImpl(const MacroProps ¯os, bool safe, UErrorCode &status) { + fMicroPropsGenerator = macrosToMicroGenerator(macros, safe, status); +} + +////////// + +const MicroPropsGenerator * +NumberFormatterImpl::macrosToMicroGenerator(const MacroProps ¯os, bool safe, UErrorCode &status) { + const MicroPropsGenerator *chain = &fMicros; + + // Check that macros is error-free before continuing. + if (macros.copyErrorTo(status)) { + return nullptr; + } + + // TODO: Accept currency symbols from DecimalFormatSymbols? + + // Pre-compute a few values for efficiency. + bool isCurrency = unitIsCurrency(macros.unit); + bool isNoUnit = unitIsNoUnit(macros.unit); + bool isPercent = isNoUnit && unitIsPercent(macros.unit); + bool isPermille = isNoUnit && unitIsPermille(macros.unit); + bool isCldrUnit = !isCurrency && !isNoUnit; + bool isAccounting = + macros.sign == UNUM_SIGN_ACCOUNTING || macros.sign == UNUM_SIGN_ACCOUNTING_ALWAYS; + CurrencyUnit currency(kDefaultCurrency, status); + if (isCurrency) { + currency = CurrencyUnit(macros.unit, status); // Restore CurrencyUnit from MeasureUnit + } + UNumberUnitWidth unitWidth = UNUM_UNIT_WIDTH_SHORT; + if (macros.unitWidth != UNUM_UNIT_WIDTH_COUNT) { + unitWidth = macros.unitWidth; + } + + // Select the numbering system. + LocalPointer nsLocal; + const NumberingSystem *ns; + if (macros.symbols.isNumberingSystem()) { + ns = macros.symbols.getNumberingSystem(); + } else { + // TODO: Is there a way to avoid creating the NumberingSystem object? + ns = NumberingSystem::createInstance(macros.locale, status); + // Give ownership to the function scope. + nsLocal.adoptInstead(ns); + } + const char *nsName = ns->getName(); + + // Load and parse the pattern string. It is used for grouping sizes and affixes only. + CldrPatternStyle patternStyle; + if (isPercent || isPermille) { + patternStyle = CLDR_PATTERN_STYLE_PERCENT; + } else if (!isCurrency || unitWidth == UNUM_UNIT_WIDTH_FULL_NAME) { + patternStyle = CLDR_PATTERN_STYLE_DECIMAL; + } else if (isAccounting) { + // NOTE: Although ACCOUNTING and ACCOUNTING_ALWAYS are only supported in currencies right now, + // the API contract allows us to add support to other units in the future. + patternStyle = CLDR_PATTERN_STYLE_ACCOUNTING; + } else { + patternStyle = CLDR_PATTERN_STYLE_CURRENCY; + } + const char16_t *pattern = getPatternForStyle(macros.locale, nsName, patternStyle, status); + auto patternInfo = new ParsedPatternInfo(); + fPatternInfo.adoptInstead(patternInfo); + PatternParser::parseToPatternInfo(UnicodeString(pattern), *patternInfo, status); + + ///////////////////////////////////////////////////////////////////////////////////// + /// START POPULATING THE DEFAULT MICROPROPS AND BUILDING THE MICROPROPS GENERATOR /// + ///////////////////////////////////////////////////////////////////////////////////// + + // Symbols + if (macros.symbols.isDecimalFormatSymbols()) { + fMicros.symbols = macros.symbols.getDecimalFormatSymbols(); + } else { + fMicros.symbols = new DecimalFormatSymbols(macros.locale, *ns, status); + // Give ownership to the NumberFormatterImpl. + fSymbols.adoptInstead(fMicros.symbols); + } + + // Rounding strategy + if (!macros.rounder.isBogus()) { + fMicros.rounding = macros.rounder; + } else if (macros.notation.fType == Notation::NTN_COMPACT) { + fMicros.rounding = Rounder::integer().withMinDigits(2); + } else if (isCurrency) { + fMicros.rounding = Rounder::currency(UCURR_USAGE_STANDARD); + } else { + fMicros.rounding = Rounder::maxFraction(6); + } + fMicros.rounding.setLocaleData(currency, status); + + // Grouping strategy + if (!macros.grouper.isBogus()) { + fMicros.grouping = macros.grouper; + } else if (macros.notation.fType == Notation::NTN_COMPACT) { + // Compact notation uses minGrouping by default since ICU 59 + fMicros.grouping = Grouper::minTwoDigits(); + } else { + fMicros.grouping = Grouper::defaults(); + } + fMicros.grouping.setLocaleData(*fPatternInfo); + + // Padding strategy + if (!macros.padder.isBogus()) { + fMicros.padding = macros.padder; + } else { + fMicros.padding = Padder::none(); + } + + // Integer width + if (!macros.integerWidth.isBogus()) { + fMicros.integerWidth = macros.integerWidth; + } else { + fMicros.integerWidth = IntegerWidth::zeroFillTo(1); + } + + // Sign display + if (macros.sign != UNUM_SIGN_COUNT) { + fMicros.sign = macros.sign; + } else { + fMicros.sign = UNUM_SIGN_AUTO; + } + + // Decimal mark display + if (macros.decimal != UNUM_DECIMAL_SEPARATOR_COUNT) { + fMicros.decimal = macros.decimal; + } else { + fMicros.decimal = UNUM_DECIMAL_SEPARATOR_AUTO; + } + + // Use monetary separator symbols + fMicros.useCurrency = isCurrency; + + // Inner modifier (scientific notation) + if (macros.notation.fType == Notation::NTN_SCIENTIFIC) { + fScientificHandler.adoptInstead(new ScientificHandler(¯os.notation, fMicros.symbols, chain)); + chain = fScientificHandler.getAlias(); + } else { + // No inner modifier required + fMicros.modInner = &fMicros.helpers.emptyStrongModifier; + } + + // Middle modifier (patterns, positive/negative, currency symbols, percent) + auto patternModifier = new MutablePatternModifier(false); + fPatternModifier.adoptInstead(patternModifier); + patternModifier->setPatternInfo(fPatternInfo.getAlias()); + patternModifier->setPatternAttributes(fMicros.sign, isPermille); + if (patternModifier->needsPlurals()) { + patternModifier->setSymbols( + fMicros.symbols, + currency, + unitWidth, + resolvePluralRules(macros.rules, macros.locale, status)); + } else { + patternModifier->setSymbols(fMicros.symbols, currency, unitWidth, nullptr); + } + if (safe) { + fImmutablePatternModifier.adoptInstead(patternModifier->createImmutableAndChain(chain, status)); + chain = fImmutablePatternModifier.getAlias(); + } else { + patternModifier->addToChain(chain); + chain = patternModifier; + } + + // Outer modifier (CLDR units and currency long names) + if (isCldrUnit) { + fLongNameHandler.adoptInstead( + new LongNameHandler( + LongNameHandler::forMeasureUnit( + macros.locale, + macros.unit, + unitWidth, + resolvePluralRules(macros.rules, macros.locale, status), + chain, + status))); + chain = fLongNameHandler.getAlias(); + } else if (isCurrency && unitWidth == UNUM_UNIT_WIDTH_FULL_NAME) { + fLongNameHandler.adoptInstead( + new LongNameHandler( + LongNameHandler::forCurrencyLongNames( + macros.locale, + currency, + resolvePluralRules(macros.rules, macros.locale, status), + chain, + status))); + chain = fLongNameHandler.getAlias(); + } else { + // No outer modifier required + fMicros.modOuter = &fMicros.helpers.emptyWeakModifier; + } + + // Compact notation + // NOTE: Compact notation can (but might not) override the middle modifier and rounding. + // It therefore needs to go at the end of the chain. + if (macros.notation.fType == Notation::NTN_COMPACT) { + CompactType compactType = (isCurrency && unitWidth != UNUM_UNIT_WIDTH_FULL_NAME) + ? CompactType::TYPE_CURRENCY : CompactType::TYPE_DECIMAL; + fCompactHandler.adoptInstead( + new CompactHandler( + macros.notation.fUnion.compactStyle, + macros.locale, + nsName, + compactType, + resolvePluralRules(macros.rules, macros.locale, status), + safe ? patternModifier : nullptr, + chain, + status)); + chain = fCompactHandler.getAlias(); + } + + return chain; +} + +const PluralRules * +NumberFormatterImpl::resolvePluralRules(const PluralRules *rulesPtr, const Locale &locale, + UErrorCode &status) { + if (rulesPtr != nullptr) { + return rulesPtr; + } + // Lazily create PluralRules + if (fRules.isNull()) { + fRules.adoptInstead(PluralRules::forLocale(locale, status)); + } + return fRules.getAlias(); +} + +int32_t NumberFormatterImpl::microsToString(const MicroProps µs, DecimalQuantity &quantity, + NumberStringBuilder &string, UErrorCode &status) { + micros.rounding.apply(quantity, status); + micros.integerWidth.apply(quantity, status); + int32_t length = writeNumber(micros, quantity, string, status); + // NOTE: When range formatting is added, these modifiers can bubble up. + // For now, apply them all here at once. + // Always apply the inner modifier (which is "strong"). + length += micros.modInner->apply(string, 0, length, status); + if (micros.padding.isValid()) { + length += micros.padding + .padAndApply(*micros.modMiddle, *micros.modOuter, string, 0, length, status); + } else { + length += micros.modMiddle->apply(string, 0, length, status); + length += micros.modOuter->apply(string, 0, length, status); + } + return length; +} + +int32_t NumberFormatterImpl::writeNumber(const MicroProps µs, DecimalQuantity &quantity, + NumberStringBuilder &string, UErrorCode &status) { + int32_t length = 0; + if (quantity.isInfinite()) { + length += string.insert( + length, + micros.symbols->getSymbol(DecimalFormatSymbols::ENumberFormatSymbol::kInfinitySymbol), + UNUM_INTEGER_FIELD, + status); + + } else if (quantity.isNaN()) { + length += string.insert( + length, + micros.symbols->getSymbol(DecimalFormatSymbols::ENumberFormatSymbol::kNaNSymbol), + UNUM_INTEGER_FIELD, + status); + + } else { + // Add the integer digits + length += writeIntegerDigits(micros, quantity, string, status); + + // Add the decimal point + if (quantity.getLowerDisplayMagnitude() < 0 || micros.decimal == UNUM_DECIMAL_SEPARATOR_ALWAYS) { + length += string.insert( + length, + micros.useCurrency ? micros.symbols->getSymbol( + DecimalFormatSymbols::ENumberFormatSymbol::kMonetarySeparatorSymbol) : micros + .symbols + ->getSymbol( + DecimalFormatSymbols::ENumberFormatSymbol::kDecimalSeparatorSymbol), + UNUM_DECIMAL_SEPARATOR_FIELD, + status); + } + + // Add the fraction digits + length += writeFractionDigits(micros, quantity, string, status); + } + + return length; +} + +int32_t NumberFormatterImpl::writeIntegerDigits(const MicroProps µs, DecimalQuantity &quantity, + NumberStringBuilder &string, UErrorCode &status) { + int length = 0; + int integerCount = quantity.getUpperDisplayMagnitude() + 1; + for (int i = 0; i < integerCount; i++) { + // Add grouping separator + if (micros.grouping.groupAtPosition(i, quantity)) { + length += string.insert( + 0, + micros.useCurrency ? micros.symbols->getSymbol( + DecimalFormatSymbols::ENumberFormatSymbol::kMonetaryGroupingSeparatorSymbol) + : micros.symbols->getSymbol( + DecimalFormatSymbols::ENumberFormatSymbol::kGroupingSeparatorSymbol), + UNUM_GROUPING_SEPARATOR_FIELD, + status); + } + + // Get and append the next digit value + int8_t nextDigit = quantity.getDigit(i); + length += string.insert( + 0, getDigitFromSymbols(nextDigit, *micros.symbols), UNUM_INTEGER_FIELD, status); + } + return length; +} + +int32_t NumberFormatterImpl::writeFractionDigits(const MicroProps µs, DecimalQuantity &quantity, + NumberStringBuilder &string, UErrorCode &status) { + int length = 0; + int fractionCount = -quantity.getLowerDisplayMagnitude(); + for (int i = 0; i < fractionCount; i++) { + // Get and append the next digit value + int8_t nextDigit = quantity.getDigit(-i - 1); + length += string.append( + getDigitFromSymbols(nextDigit, *micros.symbols), UNUM_FRACTION_FIELD, status); + } + return length; +} diff --git a/icu4c/source/i18n/number_formatimpl.h b/icu4c/source/i18n/number_formatimpl.h new file mode 100644 index 0000000000..59413847be --- /dev/null +++ b/icu4c/source/i18n/number_formatimpl.h @@ -0,0 +1,120 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#ifndef NUMBERFORMAT_NUMBERFORMATTERIMPL_H +#define NUMBERFORMAT_NUMBERFORMATTERIMPL_H + +#include "number_types.h" +#include "number_stringbuilder.h" +#include "number_patternstring.h" +#include "number_utils.h" +#include "number_patternmodifier.h" +#include "number_longnames.h" +#include "number_compact.h" + +U_NAMESPACE_BEGIN namespace number { +namespace impl { + +/** + * This is the "brain" of the number formatting pipeline. It ties all the pieces together, taking in a MacroProps and a + * DecimalQuantity and outputting a properly formatted number string. + */ +class NumberFormatterImpl { + public: + /** + * Builds a "safe" MicroPropsGenerator, which is thread-safe and can be used repeatedly. + * The caller owns the returned NumberFormatterImpl. + */ + static NumberFormatterImpl *fromMacros(const MacroProps ¯os, UErrorCode &status); + + /** + * Builds and evaluates an "unsafe" MicroPropsGenerator, which is cheaper but can be used only once. + */ + static void + applyStatic(const MacroProps ¯os, DecimalQuantity &inValue, NumberStringBuilder &outString, + UErrorCode &status); + + /** + * Evaluates the "safe" MicroPropsGenerator created by "fromMacros". + */ + void apply(DecimalQuantity &inValue, NumberStringBuilder &outString, UErrorCode &status) const; + + private: + // Head of the MicroPropsGenerator linked list: + const MicroPropsGenerator *fMicroPropsGenerator = nullptr; + + // Tail of the list: + MicroProps fMicros; + + // Other fields possibly used by the number formatting pipeline: + // TODO: Convert some of these LocalPointers to value objects to reduce the number of news? + LocalPointer fSymbols; + LocalPointer fRules; + LocalPointer fPatternInfo; + LocalPointer fScientificHandler; + LocalPointer fPatternModifier; + LocalPointer fImmutablePatternModifier; + LocalPointer fLongNameHandler; + LocalPointer fCompactHandler; + + + NumberFormatterImpl(const MacroProps ¯os, bool safe, UErrorCode &status); + + void applyUnsafe(DecimalQuantity &inValue, NumberStringBuilder &outString, UErrorCode &status); + + /** + * If rulesPtr is non-null, return it. Otherwise, return a PluralRules owned by this object for the + * specified locale, creating it if necessary. + */ + const PluralRules * + resolvePluralRules(const PluralRules *rulesPtr, const Locale &locale, UErrorCode &status); + + /** + * Synthesizes the MacroProps into a MicroPropsGenerator. All information, including the locale, is encoded into the + * MicroPropsGenerator, except for the quantity itself, which is left abstract and must be provided to the returned + * MicroPropsGenerator instance. + * + * @see MicroPropsGenerator + * @param macros + * The {@link MacroProps} to consume. This method does not mutate the MacroProps instance. + * @param safe + * If true, the returned MicroPropsGenerator will be thread-safe. If false, the returned value will + * not be thread-safe, intended for a single "one-shot" use only. Building the thread-safe + * object is more expensive. + */ + const MicroPropsGenerator * + macrosToMicroGenerator(const MacroProps ¯os, bool safe, UErrorCode &status); + + /** + * Synthesizes the output string from a MicroProps and DecimalQuantity. + * + * @param micros + * The MicroProps after the quantity has been consumed. Will not be mutated. + * @param quantity + * The DecimalQuantity to be rendered. May be mutated. + * @param string + * The output string. Will be mutated. + */ + static int32_t + microsToString(const MicroProps µs, DecimalQuantity &quantity, NumberStringBuilder &string, + UErrorCode &status); + + static int32_t + writeNumber(const MicroProps µs, DecimalQuantity &quantity, NumberStringBuilder &string, + UErrorCode &status); + + static int32_t + writeIntegerDigits(const MicroProps µs, DecimalQuantity &quantity, NumberStringBuilder &string, + UErrorCode &status); + + static int32_t + writeFractionDigits(const MicroProps µs, DecimalQuantity &quantity, NumberStringBuilder &string, + UErrorCode &status); +}; + +} // namespace impl +} // namespace number +U_NAMESPACE_END + + +#endif //NUMBERFORMAT_NUMBERFORMATTERIMPL_H diff --git a/icu4c/source/i18n/number_grouping.cpp b/icu4c/source/i18n/number_grouping.cpp new file mode 100644 index 0000000000..cdab741a08 --- /dev/null +++ b/icu4c/source/i18n/number_grouping.cpp @@ -0,0 +1,47 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include "unicode/numberformatter.h" +#include "number_patternstring.h" + +using namespace icu::number; + +Grouper Grouper::defaults() { + return {-2, -2, false}; +} + +Grouper Grouper::minTwoDigits() { + return {-2, -2, true}; +} + +Grouper Grouper::none() { + return {-1, -1, false}; +} + +void Grouper::setLocaleData(const impl::ParsedPatternInfo &patternInfo) { + if (fGrouping1 != -2) { + return; + } + auto grouping1 = static_cast (patternInfo.positive.groupingSizes & 0xffff); + auto grouping2 = static_cast ((patternInfo.positive.groupingSizes >> 16) & 0xffff); + auto grouping3 = static_cast ((patternInfo.positive.groupingSizes >> 32) & 0xffff); + if (grouping2 == -1) { + grouping1 = -1; + } + if (grouping3 == -1) { + grouping2 = grouping1; + } + fGrouping1 = grouping1; + fGrouping2 = grouping2; +} + +bool Grouper::groupAtPosition(int32_t position, const impl::DecimalQuantity &value) const { + U_ASSERT(fGrouping1 > -2); + if (fGrouping1 == -1 || fGrouping1 == 0) { + // Either -1 or 0 means "no grouping" + return false; + } + position -= fGrouping1; + return position >= 0 && (position % fGrouping2) == 0 + && value.getUpperDisplayMagnitude() - fGrouping1 + 1 >= (fMin2 ? 2 : 1); +} diff --git a/icu4c/source/i18n/number_integerwidth.cpp b/icu4c/source/i18n/number_integerwidth.cpp new file mode 100644 index 0000000000..d5a22ac680 --- /dev/null +++ b/icu4c/source/i18n/number_integerwidth.cpp @@ -0,0 +1,41 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include "unicode/numberformatter.h" +#include "number_types.h" +#include "number_decimalquantity.h" + +using namespace icu::number; +using namespace icu::number::impl; + +IntegerWidth::IntegerWidth(int8_t minInt, int8_t maxInt) { + fUnion.minMaxInt.fMinInt = minInt; + fUnion.minMaxInt.fMaxInt = maxInt; +} + +IntegerWidth IntegerWidth::zeroFillTo(int32_t minInt) { + if (minInt >= 0 && minInt <= kMaxIntFracSig) { + return {static_cast(minInt), -1}; + } else { + return {U_NUMBER_DIGIT_WIDTH_OUT_OF_RANGE_ERROR}; + } +} + +IntegerWidth IntegerWidth::truncateAt(int32_t maxInt) { + if (fHasError) { return *this; } // No-op on error + if (maxInt >= 0 && maxInt <= kMaxIntFracSig) { + return {fUnion.minMaxInt.fMinInt, static_cast(maxInt)}; + } else { + return {U_NUMBER_DIGIT_WIDTH_OUT_OF_RANGE_ERROR}; + } +} + +void IntegerWidth::apply(impl::DecimalQuantity &quantity, UErrorCode &status) const { + if (fHasError) { + status = U_ILLEGAL_ARGUMENT_ERROR; + } else if (fUnion.minMaxInt.fMaxInt == -1) { + quantity.setIntegerLength(fUnion.minMaxInt.fMinInt, INT32_MAX); + } else { + quantity.setIntegerLength(fUnion.minMaxInt.fMinInt, fUnion.minMaxInt.fMaxInt); + } +} diff --git a/icu4c/source/i18n/number_longnames.cpp b/icu4c/source/i18n/number_longnames.cpp new file mode 100644 index 0000000000..458c6fdbcf --- /dev/null +++ b/icu4c/source/i18n/number_longnames.cpp @@ -0,0 +1,157 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include +#include +#include +#include +#include "number_longnames.h" +#include +#include + +using namespace icu::number::impl; + +namespace { + + +////////////////////////// +/// BEGIN DATA LOADING /// +////////////////////////// + +class PluralTableSink : public ResourceSink { + public: + explicit PluralTableSink(UnicodeString *outArray) : outArray(outArray) { + // Initialize the array to bogus strings. + for (int32_t i = 0; i < StandardPlural::Form::COUNT; i++) { + outArray[i].setToBogus(); + } + } + + void put(const char *key, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) override { + ResourceTable pluralsTable = value.getTable(status); + if (U_FAILURE(status)) { return; } + for (int i = 0; pluralsTable.getKeyAndValue(i, key, value); ++i) { + // In MeasureUnit data, ignore dnam and per units for now. + if (uprv_strcmp(key, "dnam") == 0 || uprv_strcmp(key, "per") == 0) { + continue; + } + StandardPlural::Form plural = StandardPlural::fromString(key, status); + if (U_FAILURE(status)) { return; } + if (!outArray[plural].isBogus()) { + continue; + } + outArray[plural] = value.getUnicodeString(status); + if (U_FAILURE(status)) { return; } + } + } + + private: + UnicodeString *outArray; +}; + +// NOTE: outArray MUST have room for all StandardPlural values. No bounds checking is performed. + +void getMeasureData(const Locale &locale, const MeasureUnit &unit, const UNumberUnitWidth &width, + UnicodeString *outArray, UErrorCode &status) { + PluralTableSink sink(outArray); + LocalUResourceBundlePointer unitsBundle(ures_open(U_ICUDATA_UNIT, locale.getName(), &status)); + if (U_FAILURE(status)) { return; } + CharString key; + key.append("units", status); + if (width == UNUM_UNIT_WIDTH_NARROW) { + key.append("Narrow", status); + } else if (width == UNUM_UNIT_WIDTH_SHORT) { + key.append("Short", status); + } + key.append("/", status); + key.append(unit.getType(), status); + key.append("/", status); + key.append(unit.getSubtype(), status); + ures_getAllItemsWithFallback(unitsBundle.getAlias(), key.data(), sink, status); +} + +void getCurrencyLongNameData(const Locale &locale, const CurrencyUnit ¤cy, UnicodeString *outArray, + UErrorCode &status) { + // In ICU4J, this method gets a CurrencyData from CurrencyData.provider. + // TODO(ICU4J): Implement this without going through CurrencyData, like in ICU4C? + PluralTableSink sink(outArray); + LocalUResourceBundlePointer unitsBundle(ures_open(U_ICUDATA_CURR, locale.getName(), &status)); + if (U_FAILURE(status)) { return; } + ures_getAllItemsWithFallback(unitsBundle.getAlias(), "CurrencyUnitPatterns", sink, status); + if (U_FAILURE(status)) { return; } + for (int32_t i = 0; i < StandardPlural::Form::COUNT; i++) { + UnicodeString &pattern = outArray[i]; + if (pattern.isBogus()) { + continue; + } + UBool isChoiceFormat = FALSE; + int32_t longNameLen = 0; + const char16_t *longName = ucurr_getPluralName( + currency.getISOCurrency(), + locale.getName(), + &isChoiceFormat, + StandardPlural::getKeyword(static_cast(i)), + &longNameLen, + &status); + // Example pattern from data: "{0} {1}" + // Example output after find-and-replace: "{0} US dollars" + pattern.findAndReplace(UnicodeString(u"{1}"), UnicodeString(longName, longNameLen)); + } +} + +//////////////////////// +/// END DATA LOADING /// +//////////////////////// + +} // namespace + +LongNameHandler +LongNameHandler::forMeasureUnit(const Locale &loc, const MeasureUnit &unit, const UNumberUnitWidth &width, + const PluralRules *rules, const MicroPropsGenerator *parent, + UErrorCode &status) { + LongNameHandler result(rules, parent); + UnicodeString simpleFormats[StandardPlural::Form::COUNT]; + getMeasureData(loc, unit, width, simpleFormats, status); + if (U_FAILURE(status)) { return result; } + // TODO: What field to use for units? + simpleFormatsToModifiers(simpleFormats, UNUM_FIELD_COUNT, result.fModifiers, status); + return result; +} + +LongNameHandler LongNameHandler::forCurrencyLongNames(const Locale &loc, const CurrencyUnit ¤cy, + const PluralRules *rules, + const MicroPropsGenerator *parent, + UErrorCode &status) { + LongNameHandler result(rules, parent); + UnicodeString simpleFormats[StandardPlural::Form::COUNT]; + getCurrencyLongNameData(loc, currency, simpleFormats, status); + if (U_FAILURE(status)) { return result; } + simpleFormatsToModifiers(simpleFormats, UNUM_CURRENCY_FIELD, result.fModifiers, status); + return result; +} + +void LongNameHandler::simpleFormatsToModifiers(const UnicodeString *simpleFormats, Field field, + SimpleModifier *output, UErrorCode &status) { + for (int32_t i = 0; i < StandardPlural::Form::COUNT; i++) { + UnicodeString simpleFormat = simpleFormats[i]; + if (simpleFormat.isBogus()) { + simpleFormat = simpleFormats[StandardPlural::Form::OTHER]; + } + if (simpleFormat.isBogus()) { + // There should always be data in the "other" plural variant. + status = U_INTERNAL_PROGRAM_ERROR; + return; + } + SimpleFormatter compiledFormatter(simpleFormat, 1, 1, status); + output[i] = SimpleModifier(compiledFormatter, field, false); + } +} + +void LongNameHandler::processQuantity(DecimalQuantity &quantity, MicroProps µs, + UErrorCode &status) const { + parent->processQuantity(quantity, micros, status); + // TODO: Avoid the copy here? + DecimalQuantity copy(quantity); + micros.rounding.apply(copy, status); + micros.modOuter = &fModifiers[copy.getStandardPlural(rules)]; +} diff --git a/icu4c/source/i18n/number_longnames.h b/icu4c/source/i18n/number_longnames.h new file mode 100644 index 0000000000..bda07448a1 --- /dev/null +++ b/icu4c/source/i18n/number_longnames.h @@ -0,0 +1,43 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#ifndef NUMBERFORMAT_LONGNAMEHANDLER_H +#define NUMBERFORMAT_LONGNAMEHANDLER_H + +#include "unicode/uversion.h" +#include "number_utils.h" +#include "number_modifiers.h" + +U_NAMESPACE_BEGIN namespace number { +namespace impl { + +class LongNameHandler : public MicroPropsGenerator, public UObject { + public: + static LongNameHandler + forCurrencyLongNames(const Locale &loc, const CurrencyUnit ¤cy, const PluralRules *rules, + const MicroPropsGenerator *parent, UErrorCode &status); + + static LongNameHandler + forMeasureUnit(const Locale &loc, const MeasureUnit &unit, const UNumberUnitWidth &width, + const PluralRules *rules, const MicroPropsGenerator *parent, UErrorCode &status); + + void + processQuantity(DecimalQuantity &quantity, MicroProps µs, UErrorCode &status) const override; + + private: + SimpleModifier fModifiers[StandardPlural::Form::COUNT]; + const PluralRules *rules; + const MicroPropsGenerator *parent; + + LongNameHandler(const PluralRules *rules, const MicroPropsGenerator *parent) + : rules(rules), parent(parent) {} + + static void simpleFormatsToModifiers(const UnicodeString *simpleFormats, Field field, + SimpleModifier *output, UErrorCode &status); +}; + +} // namespace impl +} // namespace number +U_NAMESPACE_END + +#endif //NUMBERFORMAT_LONGNAMEHANDLER_H diff --git a/icu4c/source/i18n/number_modifiers.cpp b/icu4c/source/i18n/number_modifiers.cpp new file mode 100644 index 0000000000..6f1eae10f3 --- /dev/null +++ b/icu4c/source/i18n/number_modifiers.cpp @@ -0,0 +1,294 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include +#include +#include +#include "number_modifiers.h" + +using namespace icu::number::impl; + +namespace { + +// TODO: This is copied from simpleformatter.cpp +const int32_t ARG_NUM_LIMIT = 0x100; + +// These are the default currency spacing UnicodeSets in CLDR. +// Pre-compute them for performance. +// The Java unit test testCurrencySpacingPatternStability() will start failing if these change in CLDR. +icu::UInitOnce gDefaultCurrencySpacingInitOnce = U_INITONCE_INITIALIZER; + +UnicodeSet *UNISET_DIGIT = nullptr; +UnicodeSet *UNISET_NOTS = nullptr; + +UBool U_CALLCONV cleanupDefaultCurrencySpacing() { + delete UNISET_DIGIT; + UNISET_DIGIT = nullptr; + delete UNISET_NOTS; + UNISET_NOTS = nullptr; + return TRUE; +} + +void U_CALLCONV initDefaultCurrencySpacing(UErrorCode &status) { + ucln_i18n_registerCleanup(UCLN_I18N_CURRENCY_SPACING, cleanupDefaultCurrencySpacing); + UNISET_DIGIT = new UnicodeSet(UnicodeString(u"[:digit:]"), status); + UNISET_NOTS = new UnicodeSet(UnicodeString(u"[:^S:]"), status); + if (UNISET_DIGIT == nullptr || UNISET_NOTS == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + UNISET_DIGIT->freeze(); + UNISET_NOTS->freeze(); +} + +} // namespace + + +int32_t ConstantAffixModifier::apply(NumberStringBuilder &output, int leftIndex, int rightIndex, + UErrorCode &status) const { + // Insert the suffix first since inserting the prefix will change the rightIndex + int length = output.insert(rightIndex, fSuffix, fField, status); + length += output.insert(leftIndex, fPrefix, fField, status); + return length; +} + +int32_t ConstantAffixModifier::getPrefixLength(UErrorCode &status) const { + (void)status; + return fPrefix.length(); +} + +int32_t ConstantAffixModifier::getCodePointCount(UErrorCode &status) const { + (void)status; + return fPrefix.countChar32() + fSuffix.countChar32(); +} + +bool ConstantAffixModifier::isStrong() const { + return fStrong; +} + +SimpleModifier::SimpleModifier(const SimpleFormatter &simpleFormatter, Field field, bool strong) + : fCompiledPattern(simpleFormatter.getCompiledPattern()), fField(field), fStrong(strong) { + U_ASSERT(1 == + SimpleFormatter::getArgumentLimit(compiledPattern.getBuffer(), compiledPattern.length())); + if (fCompiledPattern.charAt(1) != 0) { + fPrefixLength = fCompiledPattern.charAt(1) - ARG_NUM_LIMIT; + fSuffixOffset = 3 + fPrefixLength; + } else { + fPrefixLength = 0; + fSuffixOffset = 2; + } + if (3 + fPrefixLength < fCompiledPattern.length()) { + fSuffixLength = fCompiledPattern.charAt(fSuffixOffset) - ARG_NUM_LIMIT; + } else { + fSuffixLength = 0; + } +} + +SimpleModifier::SimpleModifier() : fStrong(false), fPrefixLength(0), fSuffixLength(0) { +} + +int32_t SimpleModifier::apply(NumberStringBuilder &output, int leftIndex, int rightIndex, + UErrorCode &status) const { + return formatAsPrefixSuffix(output, leftIndex, rightIndex, fField, status); +} + +int32_t SimpleModifier::getPrefixLength(UErrorCode &status) const { + (void)status; + return fPrefixLength; +} + +int32_t SimpleModifier::getCodePointCount(UErrorCode &status) const { + (void)status; + int32_t count = 0; + if (fPrefixLength > 0) { + count += fCompiledPattern.countChar32(2, fPrefixLength); + } + if (fSuffixLength > 0) { + count += fCompiledPattern.countChar32(1 + fSuffixOffset, fSuffixLength); + } + return count; +} + +bool SimpleModifier::isStrong() const { + return fStrong; +} + +int32_t +SimpleModifier::formatAsPrefixSuffix(NumberStringBuilder &result, int32_t startIndex, int32_t endIndex, + Field field, UErrorCode &status) const { + if (fPrefixLength > 0) { + result.insert(startIndex, fCompiledPattern, 2, 2 + fPrefixLength, field, status); + } + if (fSuffixLength > 0) { + result.insert( + endIndex + fPrefixLength, + fCompiledPattern, + 1 + fSuffixOffset, + 1 + fSuffixOffset + fSuffixLength, + field, + status); + } + return fPrefixLength + fSuffixLength; +} + +int32_t ConstantMultiFieldModifier::apply(NumberStringBuilder &output, int leftIndex, int rightIndex, + UErrorCode &status) const { + // Insert the suffix first since inserting the prefix will change the rightIndex + int32_t length = output.insert(rightIndex, fSuffix, status); + length += output.insert(leftIndex, fPrefix, status); + return length; +} + +int32_t ConstantMultiFieldModifier::getPrefixLength(UErrorCode &status) const { + (void)status; + return fPrefix.length(); +} + +int32_t ConstantMultiFieldModifier::getCodePointCount(UErrorCode &status) const { + (void)status; + return fPrefix.codePointCount() + fSuffix.codePointCount(); +} + +bool ConstantMultiFieldModifier::isStrong() const { + return fStrong; +} + +CurrencySpacingEnabledModifier::CurrencySpacingEnabledModifier(const NumberStringBuilder &prefix, + const NumberStringBuilder &suffix, + bool strong, + const DecimalFormatSymbols &symbols, + UErrorCode &status) + : ConstantMultiFieldModifier(prefix, suffix, strong) { + // Check for currency spacing. Do not build the UnicodeSets unless there is + // a currency code point at a boundary. + if (prefix.length() > 0 && prefix.fieldAt(prefix.length() - 1) == UNUM_CURRENCY_FIELD) { + int prefixCp = prefix.getLastCodePoint(); + UnicodeSet prefixUnicodeSet = getUnicodeSet(symbols, IN_CURRENCY, PREFIX, status); + if (prefixUnicodeSet.contains(prefixCp)) { + fAfterPrefixUnicodeSet = getUnicodeSet(symbols, IN_NUMBER, PREFIX, status); + fAfterPrefixUnicodeSet.freeze(); + fAfterPrefixInsert = getInsertString(symbols, PREFIX, status); + } else { + fAfterPrefixUnicodeSet.setToBogus(); + fAfterPrefixInsert.setToBogus(); + } + } else { + fAfterPrefixUnicodeSet.setToBogus(); + fAfterPrefixInsert.setToBogus(); + } + if (suffix.length() > 0 && suffix.fieldAt(0) == UNUM_CURRENCY_FIELD) { + int suffixCp = suffix.getLastCodePoint(); + UnicodeSet suffixUnicodeSet = getUnicodeSet(symbols, IN_CURRENCY, SUFFIX, status); + if (suffixUnicodeSet.contains(suffixCp)) { + fBeforeSuffixUnicodeSet = getUnicodeSet(symbols, IN_NUMBER, SUFFIX, status); + fBeforeSuffixUnicodeSet.freeze(); + fBeforeSuffixInsert = getInsertString(symbols, SUFFIX, status); + } else { + fBeforeSuffixUnicodeSet.setToBogus(); + fBeforeSuffixInsert.setToBogus(); + } + } else { + fBeforeSuffixUnicodeSet.setToBogus(); + fBeforeSuffixInsert.setToBogus(); + } +} + +int32_t CurrencySpacingEnabledModifier::apply(NumberStringBuilder &output, int leftIndex, int rightIndex, + UErrorCode &status) const { + // Currency spacing logic + int length = 0; + if (rightIndex - leftIndex > 0 && !fAfterPrefixUnicodeSet.isBogus() && + fAfterPrefixUnicodeSet.contains(output.codePointAt(leftIndex))) { + // TODO: Should we use the CURRENCY field here? + length += output.insert(leftIndex, fAfterPrefixInsert, UNUM_FIELD_COUNT, status); + } + if (rightIndex - leftIndex > 0 && !fBeforeSuffixUnicodeSet.isBogus() && + fBeforeSuffixUnicodeSet.contains(output.codePointBefore(rightIndex))) { + // TODO: Should we use the CURRENCY field here? + length += output.insert(rightIndex + length, fBeforeSuffixInsert, UNUM_FIELD_COUNT, status); + } + + // Call super for the remaining logic + length += ConstantMultiFieldModifier::apply(output, leftIndex, rightIndex + length, status); + return length; +} + +int32_t +CurrencySpacingEnabledModifier::applyCurrencySpacing(NumberStringBuilder &output, int32_t prefixStart, + int32_t prefixLen, int32_t suffixStart, + int32_t suffixLen, + const DecimalFormatSymbols &symbols, + UErrorCode &status) { + int length = 0; + bool hasPrefix = (prefixLen > 0); + bool hasSuffix = (suffixLen > 0); + bool hasNumber = (suffixStart - prefixStart - prefixLen > 0); // could be empty string + if (hasPrefix && hasNumber) { + length += applyCurrencySpacingAffix(output, prefixStart + prefixLen, PREFIX, symbols, status); + } + if (hasSuffix && hasNumber) { + length += applyCurrencySpacingAffix(output, suffixStart + length, SUFFIX, symbols, status); + } + return length; +} + +int32_t +CurrencySpacingEnabledModifier::applyCurrencySpacingAffix(NumberStringBuilder &output, int32_t index, + EAffix affix, + const DecimalFormatSymbols &symbols, + UErrorCode &status) { + // NOTE: For prefix, output.fieldAt(index-1) gets the last field type in the prefix. + // This works even if the last code point in the prefix is 2 code units because the + // field value gets populated to both indices in the field array. + Field affixField = (affix == PREFIX) ? output.fieldAt(index - 1) : output.fieldAt(index); + if (affixField != UNUM_CURRENCY_FIELD) { + return 0; + } + int affixCp = (affix == PREFIX) ? output.codePointBefore(index) : output.codePointAt(index); + UnicodeSet affixUniset = getUnicodeSet(symbols, IN_CURRENCY, affix, status); + if (!affixUniset.contains(affixCp)) { + return 0; + } + int numberCp = (affix == PREFIX) ? output.codePointAt(index) : output.codePointBefore(index); + UnicodeSet numberUniset = getUnicodeSet(symbols, IN_NUMBER, affix, status); + if (!numberUniset.contains(numberCp)) { + return 0; + } + UnicodeString spacingString = getInsertString(symbols, affix, status); + + // NOTE: This next line *inserts* the spacing string, triggering an arraycopy. + // It would be more efficient if this could be done before affixes were attached, + // so that it could be prepended/appended instead of inserted. + // However, the build code path is more efficient, and this is the most natural + // place to put currency spacing in the non-build code path. + // TODO: Should we use the CURRENCY field here? + return output.insert(index, spacingString, UNUM_FIELD_COUNT, status); +} + +UnicodeSet +CurrencySpacingEnabledModifier::getUnicodeSet(const DecimalFormatSymbols &symbols, EPosition position, + EAffix affix, UErrorCode &status) { + // Ensure the static defaults are initialized: + umtx_initOnce(gDefaultCurrencySpacingInitOnce, &initDefaultCurrencySpacing, status); + if (U_FAILURE(status)) { + return UnicodeSet(); + } + + const UnicodeString& pattern = symbols.getPatternForCurrencySpacing( + position == IN_CURRENCY ? UNUM_CURRENCY_MATCH : UNUM_CURRENCY_SURROUNDING_MATCH, + affix == SUFFIX, + status); + if (pattern.compare(u"[:digit:]", -1) == 0) { + return *UNISET_DIGIT; + } else if (pattern.compare(u"[:^S:]", -1) == 0) { + return *UNISET_NOTS; + } else { + return UnicodeSet(pattern, status); + } +} + +UnicodeString +CurrencySpacingEnabledModifier::getInsertString(const DecimalFormatSymbols &symbols, EAffix affix, + UErrorCode &status) { + return symbols.getPatternForCurrencySpacing(UNUM_CURRENCY_INSERT, affix == SUFFIX, status); +} diff --git a/icu4c/source/i18n/number_modifiers.h b/icu4c/source/i18n/number_modifiers.h new file mode 100644 index 0000000000..41637735fe --- /dev/null +++ b/icu4c/source/i18n/number_modifiers.h @@ -0,0 +1,249 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#ifndef NUMBERFORMAT_MODIFIERS_H +#define NUMBERFORMAT_MODIFIERS_H + +#include +#include +#include +#include +#include +#include "number_stringbuilder.h" +#include "number_types.h" + +U_NAMESPACE_BEGIN namespace number { +namespace impl { + +/** + * The canonical implementation of {@link Modifier}, containing a prefix and suffix string. + * TODO: This is not currently being used by real code and could be removed. + */ +class ConstantAffixModifier : public Modifier, public UObject { + public: + ConstantAffixModifier(const UnicodeString &prefix, const UnicodeString &suffix, Field field, + bool strong) + : fPrefix(prefix), fSuffix(suffix), fField(field), fStrong(strong) {} + + int32_t apply(NumberStringBuilder &output, int32_t leftIndex, int32_t rightIndex, + UErrorCode &status) const override; + + int32_t getPrefixLength(UErrorCode &status) const override; + + int32_t getCodePointCount(UErrorCode &status) const override; + + bool isStrong() const override; + + private: + UnicodeString fPrefix; + UnicodeString fSuffix; + Field fField; + bool fStrong; +}; + +/** + * The second primary implementation of {@link Modifier}, this one consuming a {@link SimpleFormatter} + * pattern. + */ +class SimpleModifier : public Modifier, public UMemory { + public: + SimpleModifier(const SimpleFormatter &simpleFormatter, Field field, bool strong); + + // Default constructor for LongNameHandler.h + SimpleModifier(); + + int32_t apply(NumberStringBuilder &output, int32_t leftIndex, int32_t rightIndex, + UErrorCode &status) const override; + + int32_t getPrefixLength(UErrorCode &status) const override; + + int32_t getCodePointCount(UErrorCode &status) const override; + + bool isStrong() const override; + + /** + * TODO: This belongs in SimpleFormatterImpl. The only reason I haven't moved it there yet is because + * DoubleSidedStringBuilder is an internal class and SimpleFormatterImpl feels like it should not depend on it. + * + *

+ * Formats a value that is already stored inside the StringBuilder result between the indices + * startIndex and endIndex by inserting characters before the start index and after the + * end index. + * + *

+ * This is well-defined only for patterns with exactly one argument. + * + * @param result + * The StringBuilder containing the value argument. + * @param startIndex + * The left index of the value within the string builder. + * @param endIndex + * The right index of the value within the string builder. + * @return The number of characters (UTF-16 code points) that were added to the StringBuilder. + */ + int32_t + formatAsPrefixSuffix(NumberStringBuilder &result, int32_t startIndex, int32_t endIndex, Field field, + UErrorCode &status) const; + + private: + UnicodeString fCompiledPattern; + Field fField; + bool fStrong; + int32_t fPrefixLength; + int32_t fSuffixOffset; + int32_t fSuffixLength; +}; + +/** + * An implementation of {@link Modifier} that allows for multiple types of fields in the same modifier. Constructed + * based on the contents of two {@link NumberStringBuilder} instances (one for the prefix, one for the suffix). + */ +class ConstantMultiFieldModifier : public Modifier, public UMemory { + public: + ConstantMultiFieldModifier(const NumberStringBuilder &prefix, const NumberStringBuilder &suffix, + bool strong) : fPrefix(prefix), fSuffix(suffix), fStrong(strong) {} + + int32_t apply(NumberStringBuilder &output, int32_t leftIndex, int32_t rightIndex, + UErrorCode &status) const override; + + int32_t getPrefixLength(UErrorCode &status) const override; + + int32_t getCodePointCount(UErrorCode &status) const override; + + bool isStrong() const override; + + protected: + // NOTE: In Java, these are stored as array pointers. In C++, the NumberStringBuilder is stored by + // value and is treated internally as immutable. + NumberStringBuilder fPrefix; + NumberStringBuilder fSuffix; + bool fStrong; +}; + +/** Identical to {@link ConstantMultiFieldModifier}, but supports currency spacing. */ +class CurrencySpacingEnabledModifier : public ConstantMultiFieldModifier { + public: + /** Safe code path */ + CurrencySpacingEnabledModifier(const NumberStringBuilder &prefix, const NumberStringBuilder &suffix, + bool strong, const DecimalFormatSymbols &symbols, UErrorCode &status); + + int32_t apply(NumberStringBuilder &output, int32_t leftIndex, int32_t rightIndex, + UErrorCode &status) const override; + + /** Unsafe code path */ + static int32_t + applyCurrencySpacing(NumberStringBuilder &output, int32_t prefixStart, int32_t prefixLen, + int32_t suffixStart, int32_t suffixLen, const DecimalFormatSymbols &symbols, + UErrorCode &status); + + private: + UnicodeSet fAfterPrefixUnicodeSet; + UnicodeString fAfterPrefixInsert; + UnicodeSet fBeforeSuffixUnicodeSet; + UnicodeString fBeforeSuffixInsert; + + enum EAffix { + PREFIX, SUFFIX + }; + + enum EPosition { + IN_CURRENCY, IN_NUMBER + }; + + /** Unsafe code path */ + static int32_t applyCurrencySpacingAffix(NumberStringBuilder &output, int32_t index, EAffix affix, + const DecimalFormatSymbols &symbols, UErrorCode &status); + + static UnicodeSet + getUnicodeSet(const DecimalFormatSymbols &symbols, EPosition position, EAffix affix, + UErrorCode &status); + + static UnicodeString + getInsertString(const DecimalFormatSymbols &symbols, EAffix affix, UErrorCode &status); +}; + +/** A Modifier that does not do anything. */ +class EmptyModifier : public Modifier, public UMemory { + public: + explicit EmptyModifier(bool isStrong) : fStrong(isStrong) {} + + int32_t apply(NumberStringBuilder &output, int32_t leftIndex, int32_t rightIndex, + UErrorCode &status) const override { + (void)output; + (void)leftIndex; + (void)rightIndex; + (void)status; + return 0; + } + + int32_t getPrefixLength(UErrorCode &status) const override { + (void)status; + return 0; + } + + int32_t getCodePointCount(UErrorCode &status) const override { + (void)status; + return 0; + } + + bool isStrong() const override { + return fStrong; + } + + private: + bool fStrong; +}; + +/** + * A ParameterizedModifier by itself is NOT a Modifier. Rather, it wraps a data structure containing two or more + * Modifiers and returns the modifier appropriate for the current situation. + */ +class ParameterizedModifier : public UMemory { + public: + // NOTE: mods is zero-initialized (to nullptr) + ParameterizedModifier() : mods() { + } + + // No copying! + ParameterizedModifier(const ParameterizedModifier &other) = delete; + + ~ParameterizedModifier() { + for (const Modifier *mod : mods) { + delete mod; + } + } + + void adoptPositiveNegativeModifiers(const Modifier *positive, const Modifier *negative) { + mods[0] = positive; + mods[1] = negative; + } + + /** The modifier is ADOPTED. */ + void adoptSignPluralModifier(bool isNegative, StandardPlural::Form plural, const Modifier *mod) { + mods[getModIndex(isNegative, plural)] = mod; + } + + /** Returns a reference to the modifier; no ownership change. */ + const Modifier *getModifier(bool isNegative) const { + return mods[isNegative ? 1 : 0]; + } + + /** Returns a reference to the modifier; no ownership change. */ + const Modifier *getModifier(bool isNegative, StandardPlural::Form plural) const { + return mods[getModIndex(isNegative, plural)]; + } + + private: + const Modifier *mods[2 * StandardPlural::COUNT]; + + inline static int32_t getModIndex(bool isNegative, StandardPlural::Form plural) { + return static_cast(plural) * 2 + (isNegative ? 1 : 0); + } +}; + +} // namespace impl +} // namespace number +U_NAMESPACE_END + + +#endif //NUMBERFORMAT_MODIFIERS_H diff --git a/icu4c/source/i18n/number_notation.cpp b/icu4c/source/i18n/number_notation.cpp new file mode 100644 index 0000000000..e459742ee9 --- /dev/null +++ b/icu4c/source/i18n/number_notation.cpp @@ -0,0 +1,68 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include "unicode/numberformatter.h" +#include "number_types.h" + +using namespace icu::number; +using namespace icu::number::impl; + + +ScientificNotation Notation::scientific() { + // NOTE: ISO C++ does not allow C99 designated initializers. + ScientificSettings settings; + settings.fEngineeringInterval = 1; + settings.fRequireMinInt = false; + settings.fMinExponentDigits = 1; + settings.fExponentSignDisplay = UNUM_SIGN_AUTO; + NotationUnion union_; + union_.scientific = settings; + return {NTN_SCIENTIFIC, union_}; +} + +ScientificNotation Notation::engineering() { + ScientificSettings settings; + settings.fEngineeringInterval = 3; + settings.fRequireMinInt = false; + settings.fMinExponentDigits = 1; + settings.fExponentSignDisplay = UNUM_SIGN_AUTO; + NotationUnion union_; + union_.scientific = settings; + return {NTN_SCIENTIFIC, union_}; +} + +Notation Notation::compactShort() { + NotationUnion union_; + union_.compactStyle = CompactStyle::UNUM_SHORT; + return {NTN_COMPACT, union_}; +} + +Notation Notation::compactLong() { + NotationUnion union_; + union_.compactStyle = CompactStyle::UNUM_LONG; + return {NTN_COMPACT, union_}; +} + +Notation Notation::simple() { + return {}; +} + +ScientificNotation +ScientificNotation::withMinExponentDigits(int32_t minExponentDigits) const { + if (minExponentDigits >= 0 && minExponentDigits < kMaxIntFracSig) { + ScientificSettings settings = fUnion.scientific; + settings.fMinExponentDigits = (int8_t) minExponentDigits; + NotationUnion union_ = {settings}; + return {NTN_SCIENTIFIC, union_}; + } else { + return {U_NUMBER_DIGIT_WIDTH_OUT_OF_RANGE_ERROR}; + } +} + +ScientificNotation +ScientificNotation::withExponentSignDisplay(UNumberSignDisplay exponentSignDisplay) const { + ScientificSettings settings = fUnion.scientific; + settings.fExponentSignDisplay = exponentSignDisplay; + NotationUnion union_ = {settings}; + return {NTN_SCIENTIFIC, union_}; +} diff --git a/icu4c/source/i18n/number_padding.cpp b/icu4c/source/i18n/number_padding.cpp new file mode 100644 index 0000000000..40957950e3 --- /dev/null +++ b/icu4c/source/i18n/number_padding.cpp @@ -0,0 +1,76 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include "unicode/numberformatter.h" +#include "number_types.h" +#include "number_stringbuilder.h" + +using namespace icu::number::impl; + +namespace { + +int32_t +addPaddingHelper(UChar32 paddingCp, int32_t requiredPadding, NumberStringBuilder &string, int32_t index, + UErrorCode &status) { + for (int32_t i = 0; i < requiredPadding; i++) { + // TODO: If appending to the end, this will cause actual insertion operations. Improve. + string.insertCodePoint(index, paddingCp, UNUM_FIELD_COUNT, status); + } + return U16_LENGTH(paddingCp) * requiredPadding; +} + +} + +Padder::Padder(UChar32 cp, int32_t width, UNumberFormatPadPosition position) : fWidth(width) { + fUnion.padding.fCp = cp; + fUnion.padding.fPosition = position; +} + +Padder::Padder(int32_t width) : fWidth(width) {} + +Padder Padder::none() { + return {-1}; +} + +Padder Padder::codePoints(UChar32 cp, int32_t targetWidth, UNumberFormatPadPosition position) { + // TODO: Validate the code point? + if (targetWidth >= 0) { + return {cp, targetWidth, position}; + } else { + return {U_NUMBER_PADDING_WIDTH_OUT_OF_RANGE_ERROR}; + } +} + +int32_t Padder::padAndApply(const impl::Modifier &mod1, const impl::Modifier &mod2, + impl::NumberStringBuilder &string, int32_t leftIndex, int32_t rightIndex, + UErrorCode &status) const { + int32_t modLength = mod1.getCodePointCount(status) + mod2.getCodePointCount(status); + int32_t requiredPadding = fWidth - modLength - string.codePointCount(); + U_ASSERT(leftIndex == 0 && + rightIndex == string.length()); // fix the previous line to remove this assertion + + int length = 0; + if (requiredPadding <= 0) { + // Padding is not required. + length += mod1.apply(string, leftIndex, rightIndex, status); + length += mod2.apply(string, leftIndex, rightIndex + length, status); + return length; + } + + PadPosition position = fUnion.padding.fPosition; + UChar32 paddingCp = fUnion.padding.fCp; + if (position == UNUM_PAD_AFTER_PREFIX) { + length += addPaddingHelper(paddingCp, requiredPadding, string, leftIndex, status); + } else if (position == UNUM_PAD_BEFORE_SUFFIX) { + length += addPaddingHelper(paddingCp, requiredPadding, string, rightIndex + length, status); + } + length += mod1.apply(string, leftIndex, rightIndex + length, status); + length += mod2.apply(string, leftIndex, rightIndex + length, status); + if (position == UNUM_PAD_BEFORE_PREFIX) { + length += addPaddingHelper(paddingCp, requiredPadding, string, leftIndex, status); + } else if (position == UNUM_PAD_AFTER_SUFFIX) { + length += addPaddingHelper(paddingCp, requiredPadding, string, rightIndex + length, status); + } + + return length; +} diff --git a/icu4c/source/i18n/number_patternmodifier.cpp b/icu4c/source/i18n/number_patternmodifier.cpp new file mode 100644 index 0000000000..040a572e17 --- /dev/null +++ b/icu4c/source/i18n/number_patternmodifier.cpp @@ -0,0 +1,342 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include +#include "number_patternmodifier.h" +#include "unicode/dcfmtsym.h" +#include "unicode/ucurr.h" + +using namespace icu::number::impl; + +MutablePatternModifier::MutablePatternModifier(bool isStrong) : fStrong(isStrong) {} + +void MutablePatternModifier::setPatternInfo(const AffixPatternProvider *patternInfo) { + this->patternInfo = patternInfo; +} + +void MutablePatternModifier::setPatternAttributes(UNumberSignDisplay signDisplay, bool perMille) { + this->signDisplay = signDisplay; + this->perMilleReplacesPercent = perMille; +} + +void +MutablePatternModifier::setSymbols(const DecimalFormatSymbols *symbols, const CurrencyUnit ¤cy, + const UNumberUnitWidth unitWidth, const PluralRules *rules) { + U_ASSERT((rules == nullptr) == needsPlurals()); + this->symbols = symbols; + uprv_memcpy(static_cast(this->currencyCode), + currency.getISOCurrency(), + sizeof(char16_t) * 4); + this->unitWidth = unitWidth; + this->rules = rules; +} + +void MutablePatternModifier::setNumberProperties(bool isNegative, StandardPlural::Form plural) { + this->isNegative = isNegative; + this->plural = plural; +} + +bool MutablePatternModifier::needsPlurals() const { + UErrorCode statusLocal = U_ZERO_ERROR; + return patternInfo->containsSymbolType(AffixPatternType::TYPE_CURRENCY_TRIPLE, statusLocal); + // Silently ignore any error codes. +} + +ImmutablePatternModifier *MutablePatternModifier::createImmutable(UErrorCode &status) { + return createImmutableAndChain(nullptr, status); +} + +ImmutablePatternModifier * +MutablePatternModifier::createImmutableAndChain(const MicroPropsGenerator *parent, UErrorCode &status) { + + // TODO: Move StandardPlural VALUES to standardplural.h + static const StandardPlural::Form STANDARD_PLURAL_VALUES[] = { + StandardPlural::Form::ZERO, + StandardPlural::Form::ONE, + StandardPlural::Form::TWO, + StandardPlural::Form::FEW, + StandardPlural::Form::MANY, + StandardPlural::Form::OTHER}; + + auto pm = new ParameterizedModifier(); + if (pm == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + return nullptr; + } + + if (needsPlurals()) { + // Slower path when we require the plural keyword. + for (StandardPlural::Form plural : STANDARD_PLURAL_VALUES) { + setNumberProperties(false, plural); + pm->adoptSignPluralModifier(false, plural, createConstantModifier(status)); + setNumberProperties(true, plural); + pm->adoptSignPluralModifier(true, plural, createConstantModifier(status)); + } + if (U_FAILURE(status)) { + delete pm; + return nullptr; + } + return new ImmutablePatternModifier(pm, rules, parent); // adopts pm + } else { + // Faster path when plural keyword is not needed. + setNumberProperties(false, StandardPlural::Form::COUNT); + Modifier *positive = createConstantModifier(status); + setNumberProperties(true, StandardPlural::Form::COUNT); + Modifier *negative = createConstantModifier(status); + pm->adoptPositiveNegativeModifiers(positive, negative); + if (U_FAILURE(status)) { + delete pm; + return nullptr; + } + return new ImmutablePatternModifier(pm, nullptr, parent); // adopts pm + } +} + +ConstantMultiFieldModifier *MutablePatternModifier::createConstantModifier(UErrorCode &status) { + NumberStringBuilder a; + NumberStringBuilder b; + insertPrefix(a, 0, status); + insertSuffix(b, 0, status); + if (patternInfo->hasCurrencySign()) { + return new CurrencySpacingEnabledModifier(a, b, fStrong, *symbols, status); + } else { + return new ConstantMultiFieldModifier(a, b, fStrong); + } +} + +ImmutablePatternModifier::ImmutablePatternModifier(ParameterizedModifier *pm, const PluralRules *rules, + const MicroPropsGenerator *parent) + : pm(pm), rules(rules), parent(parent) {} + +void ImmutablePatternModifier::processQuantity(DecimalQuantity &quantity, MicroProps µs, + UErrorCode &status) const { + parent->processQuantity(quantity, micros, status); + applyToMicros(micros, quantity); +} + +void ImmutablePatternModifier::applyToMicros(MicroProps µs, DecimalQuantity &quantity) const { + if (rules == nullptr) { + micros.modMiddle = pm->getModifier(quantity.isNegative()); + } else { + // TODO: Fix this. Avoid the copy. + DecimalQuantity copy(quantity); + copy.roundToInfinity(); + StandardPlural::Form plural = copy.getStandardPlural(rules); + micros.modMiddle = pm->getModifier(quantity.isNegative(), plural); + } +} + +/** Used by the unsafe code path. */ +MicroPropsGenerator &MutablePatternModifier::addToChain(const MicroPropsGenerator *parent) { + this->parent = parent; + return *this; +} + +void MutablePatternModifier::processQuantity(DecimalQuantity &fq, MicroProps µs, + UErrorCode &status) const { + parent->processQuantity(fq, micros, status); + // The unsafe code path performs self-mutation, so we need a const_cast. + // This method needs to be const because it overrides a const method in the parent class. + auto nonConstThis = const_cast(this); + if (needsPlurals()) { + // TODO: Fix this. Avoid the copy. + DecimalQuantity copy(fq); + micros.rounding.apply(copy, status); + nonConstThis->setNumberProperties(fq.isNegative(), copy.getStandardPlural(rules)); + } else { + nonConstThis->setNumberProperties(fq.isNegative(), StandardPlural::Form::COUNT); + } + micros.modMiddle = this; +} + +int32_t MutablePatternModifier::apply(NumberStringBuilder &output, int32_t leftIndex, int32_t rightIndex, + UErrorCode &status) const { + // The unsafe code path performs self-mutation, so we need a const_cast. + // This method needs to be const because it overrides a const method in the parent class. + auto nonConstThis = const_cast(this); + int32_t prefixLen = nonConstThis->insertPrefix(output, leftIndex, status); + int32_t suffixLen = nonConstThis->insertSuffix(output, rightIndex + prefixLen, status); + CurrencySpacingEnabledModifier::applyCurrencySpacing( + output, leftIndex, prefixLen, rightIndex + prefixLen, suffixLen, *symbols, status); + return prefixLen + suffixLen; +} + +int32_t MutablePatternModifier::getPrefixLength(UErrorCode &status) const { + // The unsafe code path performs self-mutation, so we need a const_cast. + // This method needs to be const because it overrides a const method in the parent class. + auto nonConstThis = const_cast(this); + + // Enter and exit CharSequence Mode to get the length. + nonConstThis->enterCharSequenceMode(true); + int result = AffixUtils::unescapedCodePointCount(*this, *this, status); // prefix length + nonConstThis->exitCharSequenceMode(); + return result; +} + +int32_t MutablePatternModifier::getCodePointCount(UErrorCode &status) const { + // The unsafe code path performs self-mutation, so we need a const_cast. + // This method needs to be const because it overrides a const method in the parent class. + auto nonConstThis = const_cast(this); + + // Enter and exit CharSequence Mode to get the length. + nonConstThis->enterCharSequenceMode(true); + int result = AffixUtils::unescapedCodePointCount(*this, *this, status); // prefix length + nonConstThis->exitCharSequenceMode(); + nonConstThis->enterCharSequenceMode(false); + result += AffixUtils::unescapedCodePointCount(*this, *this, status); // suffix length + nonConstThis->exitCharSequenceMode(); + return result; +} + +bool MutablePatternModifier::isStrong() const { + return fStrong; +} + +int32_t MutablePatternModifier::insertPrefix(NumberStringBuilder &sb, int position, UErrorCode &status) { + enterCharSequenceMode(true); + int length = AffixUtils::unescape(*this, sb, position, *this, status); + exitCharSequenceMode(); + return length; +} + +int32_t MutablePatternModifier::insertSuffix(NumberStringBuilder &sb, int position, UErrorCode &status) { + enterCharSequenceMode(false); + int length = AffixUtils::unescape(*this, sb, position, *this, status); + exitCharSequenceMode(); + return length; +} + +UnicodeString MutablePatternModifier::getSymbol(AffixPatternType type) const { + switch (type) { + case AffixPatternType::TYPE_MINUS_SIGN: + return symbols->getSymbol(DecimalFormatSymbols::ENumberFormatSymbol::kMinusSignSymbol); + case AffixPatternType::TYPE_PLUS_SIGN: + return symbols->getSymbol(DecimalFormatSymbols::ENumberFormatSymbol::kPlusSignSymbol); + case AffixPatternType::TYPE_PERCENT: + return symbols->getSymbol(DecimalFormatSymbols::ENumberFormatSymbol::kPercentSymbol); + case AffixPatternType::TYPE_PERMILLE: + return symbols->getSymbol(DecimalFormatSymbols::ENumberFormatSymbol::kPerMillSymbol); + case AffixPatternType::TYPE_CURRENCY_SINGLE: { + // UnitWidth ISO and HIDDEN overrides the singular currency symbol. + if (unitWidth == UNumberUnitWidth::UNUM_UNIT_WIDTH_ISO_CODE) { + return UnicodeString(currencyCode, 3); + } else if (unitWidth == UNumberUnitWidth::UNUM_UNIT_WIDTH_HIDDEN) { + return UnicodeString(); + } else { + UErrorCode status = U_ZERO_ERROR; + UBool isChoiceFormat = FALSE; + int32_t symbolLen = 0; + const char16_t *symbol = ucurr_getName( + currencyCode, + symbols->getLocale().getName(), + UCurrNameStyle::UCURR_SYMBOL_NAME, + &isChoiceFormat, + &symbolLen, + &status); + return UnicodeString(symbol, symbolLen); + } + } + case AffixPatternType::TYPE_CURRENCY_DOUBLE: + return UnicodeString(currencyCode, 3); + case AffixPatternType::TYPE_CURRENCY_TRIPLE: { + // NOTE: This is the code path only for patterns containing "¤¤¤". + // Plural currencies set via the API are formatted in LongNameHandler. + // This code path is used by DecimalFormat via CurrencyPluralInfo. + U_ASSERT(plural != StandardPlural::Form::COUNT); + UErrorCode status = U_ZERO_ERROR; + UBool isChoiceFormat = FALSE; + int32_t symbolLen = 0; + const char16_t *symbol = ucurr_getPluralName( + currencyCode, + symbols->getLocale().getName(), + &isChoiceFormat, + StandardPlural::getKeyword(plural), + &symbolLen, + &status); + return UnicodeString(symbol, symbolLen); + } + case AffixPatternType::TYPE_CURRENCY_QUAD: + return UnicodeString(u"\uFFFD"); + case AffixPatternType::TYPE_CURRENCY_QUINT: + return UnicodeString(u"\uFFFD"); + default: + U_ASSERT(false); + return UnicodeString(); + } +} + +/** This method contains the heart of the logic for rendering LDML affix strings. */ +void MutablePatternModifier::enterCharSequenceMode(bool isPrefix) { + U_ASSERT(!inCharSequenceMode); + inCharSequenceMode = true; + + // Should the output render '+' where '-' would normally appear in the pattern? + plusReplacesMinusSign = !isNegative && ( + signDisplay == UNUM_SIGN_ALWAYS || + signDisplay == UNUM_SIGN_ACCOUNTING_ALWAYS) && + patternInfo->positiveHasPlusSign() == false; + + // Should we use the affix from the negative subpattern? (If not, we will use the positive subpattern.) + bool useNegativeAffixPattern = patternInfo->hasNegativeSubpattern() && ( + isNegative || (patternInfo->negativeHasMinusSign() && plusReplacesMinusSign)); + + // Resolve the flags for the affix pattern. + fFlags = 0; + if (useNegativeAffixPattern) { + fFlags |= AffixPatternProvider::AFFIX_NEGATIVE_SUBPATTERN; + } + if (isPrefix) { + fFlags |= AffixPatternProvider::AFFIX_PREFIX; + } + if (plural != StandardPlural::Form::COUNT) { + U_ASSERT(plural == (AffixPatternProvider::AFFIX_PLURAL_MASK & plural)); + fFlags |= plural; + } + + // Should we prepend a sign to the pattern? + if (!isPrefix || useNegativeAffixPattern) { + prependSign = false; + } else if (isNegative) { + prependSign = signDisplay != UNUM_SIGN_NEVER; + } else { + prependSign = plusReplacesMinusSign; + } + + // Finally, compute the length of the affix pattern. + fLength = patternInfo->length(fFlags) + (prependSign ? 1 : 0); +} + +void MutablePatternModifier::exitCharSequenceMode() { + U_ASSERT(inCharSequenceMode) + inCharSequenceMode = false; +} + +int32_t MutablePatternModifier::length() const { + U_ASSERT(inCharSequenceMode); + return fLength; +} + +char16_t MutablePatternModifier::charAt(int32_t index) const { + U_ASSERT(inCharSequenceMode); + char16_t candidate; + if (prependSign && index == 0) { + candidate = '-'; + } else if (prependSign) { + candidate = patternInfo->charAt(fFlags, index - 1); + } else { + candidate = patternInfo->charAt(fFlags, index); + } + if (plusReplacesMinusSign && candidate == '-') { + return '+'; + } + if (perMilleReplacesPercent && candidate == '%') { + return u'‰'; + } + return candidate; +} + +UnicodeString MutablePatternModifier::toUnicodeString() const { + // Never called by AffixUtils + U_ASSERT(false); + return UnicodeString(); +} diff --git a/icu4c/source/i18n/number_patternmodifier.h b/icu4c/source/i18n/number_patternmodifier.h new file mode 100644 index 0000000000..be2fbfbeab --- /dev/null +++ b/icu4c/source/i18n/number_patternmodifier.h @@ -0,0 +1,234 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#ifndef NUMBERFORMAT_MUTABLEPATTERNMODIFIER_H +#define NUMBERFORMAT_MUTABLEPATTERNMODIFIER_H + +#include +#include "unicode/numberformatter.h" +#include "number_patternstring.h" +#include "number_types.h" +#include "number_modifiers.h" +#include "number_utils.h" + +U_NAMESPACE_BEGIN +namespace number { +namespace impl { + +class ImmutablePatternModifier : public MicroPropsGenerator { + public: + ImmutablePatternModifier(ParameterizedModifier *pm, const PluralRules *rules, + const MicroPropsGenerator *parent); + + ~ImmutablePatternModifier() override = default; + + void processQuantity(DecimalQuantity &, MicroProps µs, UErrorCode &status) const override; + + void applyToMicros(MicroProps µs, DecimalQuantity &quantity) const; + + private: + const LocalPointer pm; + const PluralRules *rules; + const MicroPropsGenerator *parent; +}; + +/** + * This class is a {@link Modifier} that wraps a decimal format pattern. It applies the pattern's affixes in + * {@link Modifier#apply}. + * + *

+ * In addition to being a Modifier, this class contains the business logic for substituting the correct locale symbols + * into the affixes of the decimal format pattern. + * + *

+ * In order to use this class, create a new instance and call the following four setters: {@link #setPatternInfo}, + * {@link #setPatternAttributes}, {@link #setSymbols}, and {@link #setNumberProperties}. After calling these four + * setters, the instance will be ready for use as a Modifier. + * + *

+ * This is a MUTABLE, NON-THREAD-SAFE class designed for performance. Do NOT save references to this or attempt to use + * it from multiple threads! Instead, you can obtain a safe, immutable decimal format pattern modifier by calling + * {@link MutablePatternModifier#createImmutable}, in effect treating this instance as a builder for the immutable + * variant. + */ +class MutablePatternModifier + : public MicroPropsGenerator, public Modifier, public SymbolProvider, public CharSequence { + public: + + ~MutablePatternModifier() override = default; + + /** + * @param isStrong + * Whether the modifier should be considered strong. For more information, see + * {@link Modifier#isStrong()}. Most of the time, decimal format pattern modifiers should be considered + * as non-strong. + */ + explicit MutablePatternModifier(bool isStrong); + + /** + * Sets a reference to the parsed decimal format pattern, usually obtained from + * {@link PatternStringParser#parseToPatternInfo(String)}, but any implementation of {@link AffixPatternProvider} is + * accepted. + */ + void setPatternInfo(const AffixPatternProvider *patternInfo); + + /** + * Sets attributes that imply changes to the literal interpretation of the pattern string affixes. + * + * @param signDisplay + * Whether to force a plus sign on positive numbers. + * @param perMille + * Whether to substitute the percent sign in the pattern with a permille sign. + */ + void setPatternAttributes(UNumberSignDisplay signDisplay, bool perMille); + + /** + * Sets locale-specific details that affect the symbols substituted into the pattern string affixes. + * + * @param symbols + * The desired instance of DecimalFormatSymbols. + * @param currency + * The currency to be used when substituting currency values into the affixes. + * @param unitWidth + * The width used to render currencies. + * @param rules + * Required if the triple currency sign, "¤¤¤", appears in the pattern, which can be determined from the + * convenience method {@link #needsPlurals()}. + */ + void + setSymbols(const DecimalFormatSymbols *symbols, const CurrencyUnit ¤cy, UNumberUnitWidth unitWidth, + const PluralRules *rules); + + /** + * Sets attributes of the current number being processed. + * + * @param isNegative + * Whether the number is negative. + * @param plural + * The plural form of the number, required only if the pattern contains the triple currency sign, "¤¤¤" + * (and as indicated by {@link #needsPlurals()}). + */ + void setNumberProperties(bool isNegative, StandardPlural::Form plural); + + /** + * Returns true if the pattern represented by this MurkyModifier requires a plural keyword in order to localize. + * This is currently true only if there is a currency long name placeholder in the pattern ("¤¤¤"). + */ + bool needsPlurals() const; + + /** + * Creates a new quantity-dependent Modifier that behaves the same as the current instance, but which is immutable + * and can be saved for future use. The number properties in the current instance are mutated; all other properties + * are left untouched. + * + *

+ * The resulting modifier cannot be used in a QuantityChain. + * + *

+ * CREATES A NEW HEAP OBJECT; THE CALLER GETS OWNERSHIP. + * + * @return An immutable that supports both positive and negative numbers. + */ + ImmutablePatternModifier *createImmutable(UErrorCode &status); + + /** + * Creates a new quantity-dependent Modifier that behaves the same as the current instance, but which is immutable + * and can be saved for future use. The number properties in the current instance are mutated; all other properties + * are left untouched. + * + *

+ * CREATES A NEW HEAP OBJECT; THE CALLER GETS OWNERSHIP. + * + * @param parent + * The QuantityChain to which to chain this immutable. + * @return An immutable that supports both positive and negative numbers. + */ + ImmutablePatternModifier * + createImmutableAndChain(const MicroPropsGenerator *parent, UErrorCode &status); + + MicroPropsGenerator &addToChain(const MicroPropsGenerator *parent); + + void processQuantity(DecimalQuantity &, MicroProps µs, UErrorCode &status) const override; + + int32_t apply(NumberStringBuilder &output, int32_t leftIndex, int32_t rightIndex, + UErrorCode &status) const override; + + int32_t getPrefixLength(UErrorCode &status) const override; + + int32_t getCodePointCount(UErrorCode &status) const override; + + bool isStrong() const override; + + /** + * Returns the string that substitutes a given symbol type in a pattern. + */ + UnicodeString getSymbol(AffixPatternType type) const override; + + int32_t length() const override; + + char16_t charAt(int32_t index) const override; + + // Use default implementation of codePointAt + + UnicodeString toUnicodeString() const override; + + private: + // Modifier details + const bool fStrong; + + // Pattern details + const AffixPatternProvider *patternInfo; + UNumberSignDisplay signDisplay; + bool perMilleReplacesPercent; + + // Symbol details + const DecimalFormatSymbols *symbols; + UNumberUnitWidth unitWidth; + char16_t currencyCode[4]; + const PluralRules *rules; + + // Number details + bool isNegative; + StandardPlural::Form plural; + + // QuantityChain details + const MicroPropsGenerator *parent; + + // Transient CharSequence fields + bool inCharSequenceMode; + int32_t fFlags; + int32_t fLength; + bool prependSign; + bool plusReplacesMinusSign; + + /** + * Uses the current properties to create a single {@link ConstantMultiFieldModifier} with currency spacing support + * if required. + * + *

+ * CREATES A NEW HEAP OBJECT; THE CALLER GETS OWNERSHIP. + * + * @param a + * A working NumberStringBuilder object; passed from the outside to prevent the need to create many new + * instances if this method is called in a loop. + * @param b + * Another working NumberStringBuilder object. + * @return The constant modifier object. + */ + ConstantMultiFieldModifier *createConstantModifier(UErrorCode &status); + + int32_t insertPrefix(NumberStringBuilder &sb, int position, UErrorCode &status); + + int32_t insertSuffix(NumberStringBuilder &sb, int position, UErrorCode &status); + + void enterCharSequenceMode(bool isPrefix); + + void exitCharSequenceMode(); +}; + + +} // namespace impl +} // namespace number +U_NAMESPACE_END + +#endif //NUMBERFORMAT_MUTABLEPATTERNMODIFIER_H diff --git a/icu4c/source/i18n/number_patternstring.cpp b/icu4c/source/i18n/number_patternstring.cpp new file mode 100644 index 0000000000..4de385cc94 --- /dev/null +++ b/icu4c/source/i18n/number_patternstring.cpp @@ -0,0 +1,831 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include +#include "number_patternstring.h" +#include "unicode/utf16.h" +#include "number_utils.h" + +using namespace icu::number::impl; + +void PatternParser::parseToPatternInfo(const UnicodeString& patternString, ParsedPatternInfo& patternInfo, UErrorCode &status) { + patternInfo.consumePattern(patternString, status); +} + +DecimalFormatProperties +PatternParser::parseToProperties(const UnicodeString& pattern, IgnoreRounding ignoreRounding, + UErrorCode &status) { + DecimalFormatProperties properties; + parseToExistingPropertiesImpl(pattern, properties, ignoreRounding, status); + return properties; +} + +void PatternParser::parseToExistingProperties(const UnicodeString& pattern, DecimalFormatProperties properties, + IgnoreRounding ignoreRounding, UErrorCode &status) { + parseToExistingPropertiesImpl(pattern, properties, ignoreRounding, status); +} + +char16_t ParsedPatternInfo::charAt(int32_t flags, int32_t index) const { + const Endpoints &endpoints = getEndpoints(flags); + if (index < 0 || index >= endpoints.end - endpoints.start) { + U_ASSERT(false); + } + return pattern.charAt(endpoints.start + index); +} + +int32_t ParsedPatternInfo::length(int32_t flags) const { + return getLengthFromEndpoints(getEndpoints(flags)); +} + +int32_t ParsedPatternInfo::getLengthFromEndpoints(const Endpoints &endpoints) { + return endpoints.end - endpoints.start; +} + +UnicodeString ParsedPatternInfo::getString(int32_t flags) const { + const Endpoints &endpoints = getEndpoints(flags); + if (endpoints.start == endpoints.end) { + return UnicodeString(); + } + // Create a new UnicodeString + return UnicodeString(pattern, endpoints.start, endpoints.end - endpoints.start); +} + +const Endpoints &ParsedPatternInfo::getEndpoints(int32_t flags) const { + bool prefix = (flags & AFFIX_PREFIX) != 0; + bool isNegative = (flags & AFFIX_NEGATIVE_SUBPATTERN) != 0; + bool padding = (flags & AFFIX_PADDING) != 0; + if (isNegative && padding) { + return negative.paddingEndpoints; + } else if (padding) { + return positive.paddingEndpoints; + } else if (prefix && isNegative) { + return negative.prefixEndpoints; + } else if (prefix) { + return positive.prefixEndpoints; + } else if (isNegative) { + return negative.suffixEndpoints; + } else { + return positive.suffixEndpoints; + } +} + +bool ParsedPatternInfo::positiveHasPlusSign() const { + return positive.hasPlusSign; +} + +bool ParsedPatternInfo::hasNegativeSubpattern() const { + return fHasNegativeSubpattern; +} + +bool ParsedPatternInfo::negativeHasMinusSign() const { + return negative.hasMinusSign; +} + +bool ParsedPatternInfo::hasCurrencySign() const { + return positive.hasCurrencySign || (fHasNegativeSubpattern && negative.hasCurrencySign); +} + +bool ParsedPatternInfo::containsSymbolType(AffixPatternType type, UErrorCode &status) const { + return AffixUtils::containsType(UnicodeStringCharSequence(pattern), type, status); +} + +///////////////////////////////////////////////////// +/// BEGIN RECURSIVE DESCENT PARSER IMPLEMENTATION /// +///////////////////////////////////////////////////// + +UChar32 ParsedPatternInfo::ParserState::peek() { + if (offset == pattern.length()) { + return -1; + } else { + return pattern.char32At(offset); + } +} + +UChar32 ParsedPatternInfo::ParserState::next() { + int codePoint = peek(); + offset += U16_LENGTH(codePoint); + return codePoint; +} + +void ParsedPatternInfo::consumePattern(const UnicodeString& patternString, UErrorCode &status) { + if (U_FAILURE(status)) { return; } + this->pattern = patternString; + + // pattern := subpattern (';' subpattern)? + currentSubpattern = &positive; + consumeSubpattern(status); + if (U_FAILURE(status)) { return; } + if (state.peek() == ';') { + state.next(); // consume the ';' + // Don't consume the negative subpattern if it is empty (trailing ';') + if (state.peek() != -1) { + fHasNegativeSubpattern = true; + currentSubpattern = &negative; + consumeSubpattern(status); + if (U_FAILURE(status)) { return; } + } + } + if (state.peek() != -1) { + state.toParseException(u"Found unquoted special character"); + status = U_UNQUOTED_SPECIAL; + } +} + +void ParsedPatternInfo::consumeSubpattern(UErrorCode &status) { + // subpattern := literals? number exponent? literals? + consumePadding(PadPosition::UNUM_PAD_BEFORE_PREFIX, status); + if (U_FAILURE(status)) { return; } + consumeAffix(currentSubpattern->prefixEndpoints, status); + if (U_FAILURE(status)) { return; } + consumePadding(PadPosition::UNUM_PAD_AFTER_PREFIX, status); + if (U_FAILURE(status)) { return; } + consumeFormat(status); + if (U_FAILURE(status)) { return; } + consumeExponent(status); + if (U_FAILURE(status)) { return; } + consumePadding(PadPosition::UNUM_PAD_BEFORE_SUFFIX, status); + if (U_FAILURE(status)) { return; } + consumeAffix(currentSubpattern->suffixEndpoints, status); + if (U_FAILURE(status)) { return; } + consumePadding(PadPosition::UNUM_PAD_AFTER_SUFFIX, status); + if (U_FAILURE(status)) { return; } +} + +void ParsedPatternInfo::consumePadding(PadPosition paddingLocation, UErrorCode &status) { + if (state.peek() != '*') { + return; + } + if (!currentSubpattern->paddingLocation.isNull()) { + state.toParseException(u"Cannot have multiple pad specifiers"); + status = U_MULTIPLE_PAD_SPECIFIERS; + return; + } + currentSubpattern->paddingLocation = paddingLocation; + state.next(); // consume the '*' + currentSubpattern->paddingEndpoints.start = state.offset; + consumeLiteral(status); + currentSubpattern->paddingEndpoints.end = state.offset; +} + +void ParsedPatternInfo::consumeAffix(Endpoints &endpoints, UErrorCode &status) { + // literals := { literal } + endpoints.start = state.offset; + while (true) { + switch (state.peek()) { + case '#': + case '@': + case ';': + case '*': + case '.': + case ',': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case -1: + // Characters that cannot appear unquoted in a literal + // break outer; + goto after_outer; + + case '%': + currentSubpattern->hasPercentSign = true; + break; + + case u'‰': + currentSubpattern->hasPerMilleSign = true; + break; + + case u'¤': + currentSubpattern->hasCurrencySign = true; + break; + + case '-': + currentSubpattern->hasMinusSign = true; + break; + + case '+': + currentSubpattern->hasPlusSign = true; + break; + + default: + break; + } + consumeLiteral(status); + if (U_FAILURE(status)) { return; } + } + after_outer: + endpoints.end = state.offset; +} + +void ParsedPatternInfo::consumeLiteral(UErrorCode &status) { + if (state.peek() == -1) { + state.toParseException(u"Expected unquoted literal but found EOL"); + status = U_PATTERN_SYNTAX_ERROR; + return; + } else if (state.peek() == '\'') { + state.next(); // consume the starting quote + while (state.peek() != '\'') { + if (state.peek() == -1) { + state.toParseException(u"Expected quoted literal but found EOL"); + status = U_PATTERN_SYNTAX_ERROR; + return; + } else { + state.next(); // consume a quoted character + } + } + state.next(); // consume the ending quote + } else { + // consume a non-quoted literal character + state.next(); + } +} + +void ParsedPatternInfo::consumeFormat(UErrorCode &status) { + consumeIntegerFormat(status); + if (U_FAILURE(status)) { return; } + if (state.peek() == '.') { + state.next(); // consume the decimal point + currentSubpattern->hasDecimal = true; + currentSubpattern->widthExceptAffixes += 1; + consumeFractionFormat(status); + if (U_FAILURE(status)) { return; } + } +} + +void ParsedPatternInfo::consumeIntegerFormat(UErrorCode &status) { + // Convenience reference: + ParsedSubpatternInfo &result = *currentSubpattern; + + while (true) { + switch (state.peek()) { + case ',': + result.widthExceptAffixes += 1; + result.groupingSizes <<= 16; + break; + + case '#': + if (result.integerNumerals > 0) { + state.toParseException(u"# cannot follow 0 before decimal point"); + status = U_UNEXPECTED_TOKEN; + return; + } + result.widthExceptAffixes += 1; + result.groupingSizes += 1; + if (result.integerAtSigns > 0) { + result.integerTrailingHashSigns += 1; + } else { + result.integerLeadingHashSigns += 1; + } + result.integerTotal += 1; + break; + + case '@': + if (result.integerNumerals > 0) { + state.toParseException(u"Cannot mix 0 and @"); + status = U_UNEXPECTED_TOKEN; + return; + } + if (result.integerTrailingHashSigns > 0) { + state.toParseException(u"Cannot nest # inside of a run of @"); + status = U_UNEXPECTED_TOKEN; + return; + } + result.widthExceptAffixes += 1; + result.groupingSizes += 1; + result.integerAtSigns += 1; + result.integerTotal += 1; + break; + + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + if (result.integerAtSigns > 0) { + state.toParseException(u"Cannot mix @ and 0"); + status = U_UNEXPECTED_TOKEN; + return; + } + result.widthExceptAffixes += 1; + result.groupingSizes += 1; + result.integerNumerals += 1; + result.integerTotal += 1; + if (!result.rounding.isZero() || state.peek() != '0') { + result.rounding.appendDigit(static_cast(state.peek() - '0'), 0, true); + } + break; + + default: + goto after_outer; + } + state.next(); // consume the symbol + } + + after_outer: + // Disallow patterns with a trailing ',' or with two ',' next to each other + auto grouping1 = static_cast (result.groupingSizes & 0xffff); + auto grouping2 = static_cast ((result.groupingSizes >> 16) & 0xffff); + auto grouping3 = static_cast ((result.groupingSizes >> 32) & 0xffff); + if (grouping1 == 0 && grouping2 != -1) { + state.toParseException(u"Trailing grouping separator is invalid"); + status = U_UNEXPECTED_TOKEN; + return; + } + if (grouping2 == 0 && grouping3 != -1) { + state.toParseException(u"Grouping width of zero is invalid"); + status = U_PATTERN_SYNTAX_ERROR; + return; + } +} + +void ParsedPatternInfo::consumeFractionFormat(UErrorCode &status) { + // Convenience reference: + ParsedSubpatternInfo &result = *currentSubpattern; + + int32_t zeroCounter = 0; + while (true) { + switch (state.peek()) { + case '#': + result.widthExceptAffixes += 1; + result.fractionHashSigns += 1; + result.fractionTotal += 1; + zeroCounter++; + break; + + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + if (result.fractionHashSigns > 0) { + state.toParseException(u"0 cannot follow # after decimal point"); + status = U_UNEXPECTED_TOKEN; + return; + } + result.widthExceptAffixes += 1; + result.fractionNumerals += 1; + result.fractionTotal += 1; + if (state.peek() == '0') { + zeroCounter++; + } else { + result.rounding + .appendDigit(static_cast(state.peek() - '0'), zeroCounter, false); + zeroCounter = 0; + } + break; + + default: + return; + } + state.next(); // consume the symbol + } +} + +void ParsedPatternInfo::consumeExponent(UErrorCode &status) { + // Convenience reference: + ParsedSubpatternInfo &result = *currentSubpattern; + + if (state.peek() != 'E') { + return; + } + if ((result.groupingSizes & 0xffff0000L) != 0xffff0000L) { + state.toParseException(u"Cannot have grouping separator in scientific notation"); + status = U_MALFORMED_EXPONENTIAL_PATTERN; + return; + } + state.next(); // consume the E + result.widthExceptAffixes++; + if (state.peek() == '+') { + state.next(); // consume the + + result.exponentHasPlusSign = true; + result.widthExceptAffixes++; + } + while (state.peek() == '0') { + state.next(); // consume the 0 + result.exponentZeros += 1; + result.widthExceptAffixes++; + } +} + +/////////////////////////////////////////////////// +/// END RECURSIVE DESCENT PARSER IMPLEMENTATION /// +/////////////////////////////////////////////////// + +void +PatternParser::parseToExistingPropertiesImpl(const UnicodeString& pattern, DecimalFormatProperties &properties, + IgnoreRounding ignoreRounding, UErrorCode &status) { + if (pattern.length() == 0) { + // Backwards compatibility requires that we reset to the default values. + // TODO: Only overwrite the properties that "saveToProperties" normally touches? + properties.clear(); + return; + } + + ParsedPatternInfo patternInfo; + parseToPatternInfo(pattern, patternInfo, status); + if (U_FAILURE(status)) { return; } + patternInfoToProperties(properties, patternInfo, ignoreRounding, status); +} + +void PatternParser::patternInfoToProperties(DecimalFormatProperties &properties, + ParsedPatternInfo patternInfo, + IgnoreRounding _ignoreRounding, UErrorCode &status) { + // Translate from PatternParseResult to Properties. + // Note that most data from "negative" is ignored per the specification of DecimalFormat. + + const ParsedSubpatternInfo &positive = patternInfo.positive; + + bool ignoreRounding; + if (_ignoreRounding == IGNORE_ROUNDING_NEVER) { + ignoreRounding = false; + } else if (_ignoreRounding == IGNORE_ROUNDING_IF_CURRENCY) { + ignoreRounding = positive.hasCurrencySign; + } else { + U_ASSERT(_ignoreRounding == IGNORE_ROUNDING_ALWAYS); + ignoreRounding = true; + } + + // Grouping settings + auto grouping1 = static_cast (positive.groupingSizes & 0xffff); + auto grouping2 = static_cast ((positive.groupingSizes >> 16) & 0xffff); + auto grouping3 = static_cast ((positive.groupingSizes >> 32) & 0xffff); + if (grouping2 != -1) { + properties.groupingSize = grouping1; + } else { + properties.groupingSize = -1; + } + if (grouping3 != -1) { + properties.secondaryGroupingSize = grouping2; + } else { + properties.secondaryGroupingSize = -1; + } + + // For backwards compatibility, require that the pattern emit at least one min digit. + int minInt, minFrac; + if (positive.integerTotal == 0 && positive.fractionTotal > 0) { + // patterns like ".##" + minInt = 0; + minFrac = uprv_max(1, positive.fractionNumerals); + } else if (positive.integerNumerals == 0 && positive.fractionNumerals == 0) { + // patterns like "#.##" + minInt = 1; + minFrac = 0; + } else { + minInt = positive.integerNumerals; + minFrac = positive.fractionNumerals; + } + + // Rounding settings + // Don't set basic rounding when there is a currency sign; defer to CurrencyUsage + if (positive.integerAtSigns > 0) { + properties.minimumFractionDigits = -1; + properties.maximumFractionDigits = -1; + properties.roundingIncrement = 0.0; + properties.minimumSignificantDigits = positive.integerAtSigns; + properties.maximumSignificantDigits = + positive.integerAtSigns + positive.integerTrailingHashSigns; + } else if (!positive.rounding.isZero()) { + if (!ignoreRounding) { + properties.minimumFractionDigits = minFrac; + properties.maximumFractionDigits = positive.fractionTotal; + properties.roundingIncrement = positive.rounding.toDouble(); + } else { + properties.minimumFractionDigits = -1; + properties.maximumFractionDigits = -1; + properties.roundingIncrement = 0.0; + } + properties.minimumSignificantDigits = -1; + properties.maximumSignificantDigits = -1; + } else { + if (!ignoreRounding) { + properties.minimumFractionDigits = minFrac; + properties.maximumFractionDigits = positive.fractionTotal; + properties.roundingIncrement = 0.0; + } else { + properties.minimumFractionDigits = -1; + properties.maximumFractionDigits = -1; + properties.roundingIncrement = 0.0; + } + properties.minimumSignificantDigits = -1; + properties.maximumSignificantDigits = -1; + } + + // If the pattern ends with a '.' then force the decimal point. + if (positive.hasDecimal && positive.fractionTotal == 0) { + properties.decimalSeparatorAlwaysShown = true; + } else { + properties.decimalSeparatorAlwaysShown = false; + } + + // Scientific notation settings + if (positive.exponentZeros > 0) { + properties.exponentSignAlwaysShown = positive.exponentHasPlusSign; + properties.minimumExponentDigits = positive.exponentZeros; + if (positive.integerAtSigns == 0) { + // patterns without '@' can define max integer digits, used for engineering notation + properties.minimumIntegerDigits = positive.integerNumerals; + properties.maximumIntegerDigits = positive.integerTotal; + } else { + // patterns with '@' cannot define max integer digits + properties.minimumIntegerDigits = 1; + properties.maximumIntegerDigits = -1; + } + } else { + properties.exponentSignAlwaysShown = false; + properties.minimumExponentDigits = -1; + properties.minimumIntegerDigits = minInt; + properties.maximumIntegerDigits = -1; + } + + // Compute the affix patterns (required for both padding and affixes) + UnicodeString posPrefix = patternInfo.getString(AffixPatternProvider::AFFIX_PREFIX); + UnicodeString posSuffix = patternInfo.getString(0); + + // Padding settings + if (!positive.paddingLocation.isNull()) { + // The width of the positive prefix and suffix templates are included in the padding + int paddingWidth = + positive.widthExceptAffixes + AffixUtils::estimateLength(UnicodeStringCharSequence(posPrefix), status) + + AffixUtils::estimateLength(UnicodeStringCharSequence(posSuffix), status); + properties.formatWidth = paddingWidth; + UnicodeString rawPaddingString = patternInfo.getString(AffixPatternProvider::AFFIX_PADDING); + if (rawPaddingString.length() == 1) { + properties.padString = rawPaddingString; + } else if (rawPaddingString.length() == 2) { + if (rawPaddingString.charAt(0) == '\'') { + properties.padString.setTo(u"'", -1); + } else { + properties.padString = rawPaddingString; + } + } else { + properties.padString = UnicodeString(rawPaddingString, 1, rawPaddingString.length() - 2); + } + properties.padPosition = positive.paddingLocation; + } else { + properties.formatWidth = -1; + properties.padString.setToBogus(); + properties.padPosition.nullify(); + } + + // Set the affixes + // Always call the setter, even if the prefixes are empty, especially in the case of the + // negative prefix pattern, to prevent default values from overriding the pattern. + properties.positivePrefixPattern = posPrefix; + properties.positiveSuffixPattern = posSuffix; + if (patternInfo.fHasNegativeSubpattern) { + properties.negativePrefixPattern = patternInfo.getString( + AffixPatternProvider::AFFIX_NEGATIVE_SUBPATTERN | AffixPatternProvider::AFFIX_PREFIX); + properties.negativeSuffixPattern = patternInfo.getString( + AffixPatternProvider::AFFIX_NEGATIVE_SUBPATTERN); + } else { + properties.negativePrefixPattern.setToBogus(); + properties.negativeSuffixPattern.setToBogus(); + } + + // Set the magnitude multiplier + if (positive.hasPercentSign) { + properties.magnitudeMultiplier = 2; + } else if (positive.hasPerMilleSign) { + properties.magnitudeMultiplier = 3; + } else { + properties.magnitudeMultiplier = 0; + } +} + +/////////////////////////////////////////////////////////////////// +/// End PatternStringParser.java; begin PatternStringUtils.java /// +/////////////////////////////////////////////////////////////////// + +UnicodeString PatternStringUtils::propertiesToPatternString(const DecimalFormatProperties &properties, + UErrorCode &status) { + UnicodeString sb; + + // Convenience references + // The uprv_min() calls prevent DoS + int dosMax = 100; + int groupingSize = uprv_min(properties.secondaryGroupingSize, dosMax); + int firstGroupingSize = uprv_min(properties.groupingSize, dosMax); + int paddingWidth = uprv_min(properties.formatWidth, dosMax); + NullableValue paddingLocation = properties.padPosition; + UnicodeString paddingString = properties.padString; + int minInt = uprv_max(uprv_min(properties.minimumIntegerDigits, dosMax), 0); + int maxInt = uprv_min(properties.maximumIntegerDigits, dosMax); + int minFrac = uprv_max(uprv_min(properties.minimumFractionDigits, dosMax), 0); + int maxFrac = uprv_min(properties.maximumFractionDigits, dosMax); + int minSig = uprv_min(properties.minimumSignificantDigits, dosMax); + int maxSig = uprv_min(properties.maximumSignificantDigits, dosMax); + bool alwaysShowDecimal = properties.decimalSeparatorAlwaysShown; + int exponentDigits = uprv_min(properties.minimumExponentDigits, dosMax); + bool exponentShowPlusSign = properties.exponentSignAlwaysShown; + UnicodeString pp = properties.positivePrefix; + UnicodeString ppp = properties.positivePrefixPattern; + UnicodeString ps = properties.positiveSuffix; + UnicodeString psp = properties.positiveSuffixPattern; + UnicodeString np = properties.negativePrefix; + UnicodeString npp = properties.negativePrefixPattern; + UnicodeString ns = properties.negativeSuffix; + UnicodeString nsp = properties.negativeSuffixPattern; + + // Prefixes + if (!ppp.isBogus()) { + sb.append(ppp); + } + sb.append(AffixUtils::escape(UnicodeStringCharSequence(pp))); + int afterPrefixPos = sb.length(); + + // Figure out the grouping sizes. + int grouping1, grouping2, grouping; + if (groupingSize != uprv_min(dosMax, -1) && firstGroupingSize != uprv_min(dosMax, -1) && + groupingSize != firstGroupingSize) { + grouping = groupingSize; + grouping1 = groupingSize; + grouping2 = firstGroupingSize; + } else if (groupingSize != uprv_min(dosMax, -1)) { + grouping = groupingSize; + grouping1 = 0; + grouping2 = groupingSize; + } else if (firstGroupingSize != uprv_min(dosMax, -1)) { + grouping = groupingSize; + grouping1 = 0; + grouping2 = firstGroupingSize; + } else { + grouping = 0; + grouping1 = 0; + grouping2 = 0; + } + int groupingLength = grouping1 + grouping2 + 1; + + // Figure out the digits we need to put in the pattern. + double roundingInterval = properties.roundingIncrement; + UnicodeString digitsString; + int digitsStringScale = 0; + if (maxSig != uprv_min(dosMax, -1)) { + // Significant Digits. + while (digitsString.length() < minSig) { + digitsString.append('@'); + } + while (digitsString.length() < maxSig) { + digitsString.append('#'); + } + } else if (roundingInterval != 0.0) { + // Rounding Interval. + digitsStringScale = minFrac; + // TODO: Check for DoS here? + DecimalQuantity incrementQuantity; + incrementQuantity.setToDouble(roundingInterval); + incrementQuantity.adjustMagnitude(minFrac); + incrementQuantity.roundToMagnitude(0, kDefaultMode, status); + UnicodeString str = incrementQuantity.toPlainString(); + if (str.charAt(0) == '-') { + // TODO: Unsupported operation exception or fail silently? + digitsString.append(str, 1, str.length() - 1); + } else { + digitsString.append(str); + } + } + while (digitsString.length() + digitsStringScale < minInt) { + digitsString.insert(0, '0'); + } + while (-digitsStringScale < minFrac) { + digitsString.append('0'); + digitsStringScale--; + } + + // Write the digits to the string builder + int m0 = uprv_max(groupingLength, digitsString.length() + digitsStringScale); + m0 = (maxInt != dosMax) ? uprv_max(maxInt, m0) - 1 : m0 - 1; + int mN = (maxFrac != dosMax) ? uprv_min(-maxFrac, digitsStringScale) : digitsStringScale; + for (int magnitude = m0; magnitude >= mN; magnitude--) { + int di = digitsString.length() + digitsStringScale - magnitude - 1; + if (di < 0 || di >= digitsString.length()) { + sb.append('#'); + } else { + sb.append(digitsString.charAt(di)); + } + if (magnitude > grouping2 && grouping > 0 && (magnitude - grouping2) % grouping == 0) { + sb.append(','); + } else if (magnitude > 0 && magnitude == grouping2) { + sb.append(','); + } else if (magnitude == 0 && (alwaysShowDecimal || mN < 0)) { + sb.append('.'); + } + } + + // Exponential notation + if (exponentDigits != uprv_min(dosMax, -1)) { + sb.append('E'); + if (exponentShowPlusSign) { + sb.append('+'); + } + for (int i = 0; i < exponentDigits; i++) { + sb.append('0'); + } + } + + // Suffixes + int beforeSuffixPos = sb.length(); + if (!psp.isBogus()) { + sb.append(psp); + } + sb.append(AffixUtils::escape(UnicodeStringCharSequence(ps))); + + // Resolve Padding + if (paddingWidth != -1 && !paddingLocation.isNull()) { + while (paddingWidth - sb.length() > 0) { + sb.insert(afterPrefixPos, '#'); + beforeSuffixPos++; + } + int addedLength; + switch (paddingLocation.get(status)) { + case PadPosition::UNUM_PAD_BEFORE_PREFIX: + addedLength = escapePaddingString(paddingString, sb, 0, status); + sb.insert(0, '*'); + afterPrefixPos += addedLength + 1; + beforeSuffixPos += addedLength + 1; + break; + case PadPosition::UNUM_PAD_AFTER_PREFIX: + addedLength = escapePaddingString(paddingString, sb, afterPrefixPos, status); + sb.insert(afterPrefixPos, '*'); + afterPrefixPos += addedLength + 1; + beforeSuffixPos += addedLength + 1; + break; + case PadPosition::UNUM_PAD_BEFORE_SUFFIX: + escapePaddingString(paddingString, sb, beforeSuffixPos, status); + sb.insert(beforeSuffixPos, '*'); + break; + case PadPosition::UNUM_PAD_AFTER_SUFFIX: + sb.append('*'); + escapePaddingString(paddingString, sb, sb.length(), status); + break; + } + if (U_FAILURE(status)) { return sb; } + } + + // Negative affixes + // Ignore if the negative prefix pattern is "-" and the negative suffix is empty + if (!np.isBogus() || !ns.isBogus() || (npp.isBogus() && !nsp.isBogus()) || + (!npp.isBogus() && (npp.length() != 1 || npp.charAt(0) != '-' || nsp.length() != 0))) { + sb.append(';'); + if (!npp.isBogus()) { + sb.append(npp); + } + sb.append(AffixUtils::escape(UnicodeStringCharSequence(np))); + // Copy the positive digit format into the negative. + // This is optional; the pattern is the same as if '#' were appended here instead. + sb.append(sb, afterPrefixPos, beforeSuffixPos); + if (!nsp.isBogus()) { + sb.append(nsp); + } + sb.append(AffixUtils::escape(UnicodeStringCharSequence(ns))); + } + + return sb; +} + +int PatternStringUtils::escapePaddingString(UnicodeString input, UnicodeString& output, int startIndex, + UErrorCode &status) { + (void)status; + if (input.length() == 0) { + input.setTo(kFallbackPaddingString, -1); + } + int startLength = output.length(); + if (input.length() == 1) { + if (input.compare(u"'", -1) == 0) { + output.insert(startIndex, u"''", -1); + } else { + output.insert(startIndex, input); + } + } else { + output.insert(startIndex, '\''); + int offset = 1; + for (int i = 0; i < input.length(); i++) { + // it's okay to deal in chars here because the quote mark is the only interesting thing. + char16_t ch = input.charAt(i); + if (ch == '\'') { + output.insert(startIndex + offset, u"''", -1); + offset += 2; + } else { + output.insert(startIndex + offset, ch); + offset += 1; + } + } + output.insert(startIndex + offset, '\''); + } + return output.length() - startLength; +} diff --git a/icu4c/source/i18n/number_patternstring.h b/icu4c/source/i18n/number_patternstring.h new file mode 100644 index 0000000000..921f68b7d9 --- /dev/null +++ b/icu4c/source/i18n/number_patternstring.h @@ -0,0 +1,257 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#ifndef NUMBERFORMAT_PATTERNPARSER_H +#define NUMBERFORMAT_PATTERNPARSER_H + + +#include +#include +#include +#include "number_types.h" +#include "number_decimalquantity.h" +#include "number_decimfmtprops.h" +#include "number_affixutils.h" + +U_NAMESPACE_BEGIN namespace number { +namespace impl { + +// Forward declaration +class PatternParser; + +struct Endpoints { + int32_t start = 0; + int32_t end = 0; +}; + +struct ParsedSubpatternInfo { + int64_t groupingSizes = 0x0000ffffffff0000L; + int32_t integerLeadingHashSigns = 0; + int32_t integerTrailingHashSigns = 0; + int32_t integerNumerals = 0; + int32_t integerAtSigns = 0; + int32_t integerTotal = 0; // for convenience + int32_t fractionNumerals = 0; + int32_t fractionHashSigns = 0; + int32_t fractionTotal = 0; // for convenience + bool hasDecimal = false; + int32_t widthExceptAffixes = 0; + NullableValue paddingLocation; + DecimalQuantity rounding; + bool exponentHasPlusSign = false; + int32_t exponentZeros = 0; + bool hasPercentSign = false; + bool hasPerMilleSign = false; + bool hasCurrencySign = false; + bool hasMinusSign = false; + bool hasPlusSign = false; + + Endpoints prefixEndpoints; + Endpoints suffixEndpoints; + Endpoints paddingEndpoints; +}; + +struct ParsedPatternInfo : public AffixPatternProvider { + UnicodeString pattern; + ParsedSubpatternInfo positive; + ParsedSubpatternInfo negative; + + ParsedPatternInfo() : state(this->pattern), currentSubpattern(nullptr) {} + + ~ParsedPatternInfo() override = default; + + static int32_t getLengthFromEndpoints(const Endpoints &endpoints); + + char16_t charAt(int32_t flags, int32_t index) const override; + + int32_t length(int32_t flags) const override; + + UnicodeString getString(int32_t flags) const; + + bool positiveHasPlusSign() const override; + + bool hasNegativeSubpattern() const override; + + bool negativeHasMinusSign() const override; + + bool hasCurrencySign() const override; + + bool containsSymbolType(AffixPatternType type, UErrorCode &status) const override; + + private: + struct ParserState { + const UnicodeString &pattern; // reference to the parent + int32_t offset = 0; + + explicit ParserState(const UnicodeString &_pattern) : pattern(_pattern) {}; + + UChar32 peek(); + + UChar32 next(); + + // TODO: We don't currently do anything with the message string. + // This method is here as a shell for Java compatibility. + inline void toParseException(const char16_t *message) { (void)message; } + } state; + + // NOTE: In Java, these are written as pure functions. + // In C++, they're written as methods. + // The behavior is the same. + + // Mutable transient pointer: + ParsedSubpatternInfo *currentSubpattern; + + // In Java, "negative == null" tells us whether or not we had a negative subpattern. + // In C++, we need to remember in another boolean. + bool fHasNegativeSubpattern = false; + + const Endpoints &getEndpoints(int32_t flags) const; + + /** Run the recursive descent parser. */ + void consumePattern(const UnicodeString &patternString, UErrorCode &status); + + void consumeSubpattern(UErrorCode &status); + + void consumePadding(PadPosition paddingLocation, UErrorCode &status); + + void consumeAffix(Endpoints &endpoints, UErrorCode &status); + + void consumeLiteral(UErrorCode &status); + + void consumeFormat(UErrorCode &status); + + void consumeIntegerFormat(UErrorCode &status); + + void consumeFractionFormat(UErrorCode &status); + + void consumeExponent(UErrorCode &status); + + friend class PatternParser; +}; + +class PatternParser { + public: + /** + * Runs the recursive descent parser on the given pattern string, returning a data structure with raw information + * about the pattern string. + * + *

+ * To obtain a more useful form of the data, consider using {@link #parseToProperties} instead. + * + * TODO: Change argument type to const char16_t* instead of UnicodeString? + * + * @param patternString + * The LDML decimal format pattern (Excel-style pattern) to parse. + * @return The results of the parse. + */ + static void + parseToPatternInfo(const UnicodeString& patternString, ParsedPatternInfo &patternInfo, UErrorCode &status); + + enum IgnoreRounding { + IGNORE_ROUNDING_NEVER = 0, IGNORE_ROUNDING_IF_CURRENCY = 1, IGNORE_ROUNDING_ALWAYS = 2 + }; + + /** + * Parses a pattern string into a new property bag. + * + * @param pattern + * The pattern string, like "#,##0.00" + * @param ignoreRounding + * Whether to leave out rounding information (minFrac, maxFrac, and rounding increment) when parsing the + * pattern. This may be desirable if a custom rounding mode, such as CurrencyUsage, is to be used + * instead. + * @return A property bag object. + * @throws IllegalArgumentException + * If there is a syntax error in the pattern string. + */ + static DecimalFormatProperties + parseToProperties(const UnicodeString& pattern, IgnoreRounding ignoreRounding, UErrorCode &status); + + /** + * Parses a pattern string into an existing property bag. All properties that can be encoded into a pattern string + * will be overwritten with either their default value or with the value coming from the pattern string. Properties + * that cannot be encoded into a pattern string, such as rounding mode, are not modified. + * + * @param pattern + * The pattern string, like "#,##0.00" + * @param properties + * The property bag object to overwrite. + * @param ignoreRounding + * See {@link #parseToProperties(String pattern, int ignoreRounding)}. + * @throws IllegalArgumentException + * If there was a syntax error in the pattern string. + */ + static void parseToExistingProperties(const UnicodeString& pattern, DecimalFormatProperties properties, + IgnoreRounding ignoreRounding, UErrorCode &status); + + private: + static void + parseToExistingPropertiesImpl(const UnicodeString& pattern, DecimalFormatProperties &properties, + IgnoreRounding ignoreRounding, UErrorCode &status); + + /** Finalizes the temporary data stored in the ParsedPatternInfo to the Properties. */ + static void + patternInfoToProperties(DecimalFormatProperties &properties, ParsedPatternInfo patternInfo, + IgnoreRounding _ignoreRounding, UErrorCode &status); +}; + +class PatternStringUtils { + public: + /** + * Creates a pattern string from a property bag. + * + *

+ * Since pattern strings support only a subset of the functionality available in a property bag, a new property bag + * created from the string returned by this function may not be the same as the original property bag. + * + * @param properties + * The property bag to serialize. + * @return A pattern string approximately serializing the property bag. + */ + static UnicodeString + propertiesToPatternString(const DecimalFormatProperties &properties, UErrorCode &status); + + + /** + * Converts a pattern between standard notation and localized notation. Localized notation means that instead of + * using generic placeholders in the pattern, you use the corresponding locale-specific characters instead. For + * example, in locale fr-FR, the period in the pattern "0.000" means "decimal" in standard notation (as it + * does in every other locale), but it means "grouping" in localized notation. + * + *

+ * A greedy string-substitution strategy is used to substitute locale symbols. If two symbols are ambiguous or have + * the same prefix, the result is not well-defined. + * + *

+ * Locale symbols are not allowed to contain the ASCII quote character. + * + *

+ * This method is provided for backwards compatibility and should not be used in any new code. + * + * TODO(C++): This method is not yet implemented. + * + * @param input + * The pattern to convert. + * @param symbols + * The symbols corresponding to the localized pattern. + * @param toLocalized + * true to convert from standard to localized notation; false to convert from localized to standard + * notation. + * @return The pattern expressed in the other notation. + */ + static UnicodeString + convertLocalized(UnicodeString input, DecimalFormatSymbols symbols, bool toLocalized, + UErrorCode &status); + + private: + /** @return The number of chars inserted. */ + static int + escapePaddingString(UnicodeString input, UnicodeString &output, int startIndex, UErrorCode &status); +}; + +} // namespace impl +} // namespace number +U_NAMESPACE_END + + +#endif //NUMBERFORMAT_PATTERNPARSER_H diff --git a/icu4c/source/i18n/number_results.h b/icu4c/source/i18n/number_results.h new file mode 100644 index 0000000000..0ef9ff3db3 --- /dev/null +++ b/icu4c/source/i18n/number_results.h @@ -0,0 +1,20 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#ifndef NUMBERFORMAT_NUMFMTTER_RESULTS_H +#define NUMBERFORMAT_NUMFMTTER_RESULTS_H + +// FIXME: Remove this file? + +#include "number_types.h" +#include "number_decimalquantity.h" +#include "number_stringbuilder.h" + +U_NAMESPACE_BEGIN namespace number { +namespace impl { + +} // namespace impl +} // namespace number +U_NAMESPACE_END + +#endif //NUMBERFORMAT_NUMFMTTER_RESULTS_H diff --git a/icu4c/source/i18n/number_rounding.cpp b/icu4c/source/i18n/number_rounding.cpp new file mode 100644 index 0000000000..52229f9ad3 --- /dev/null +++ b/icu4c/source/i18n/number_rounding.cpp @@ -0,0 +1,339 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include +#include "unicode/numberformatter.h" +#include "number_types.h" +#include "number_decimalquantity.h" + +using namespace icu::number; +using namespace icu::number::impl; + +namespace { + +int32_t getRoundingMagnitudeFraction(int maxFrac) { + if (maxFrac == -1) { + return INT32_MIN; + } + return -maxFrac; +} + +int32_t getRoundingMagnitudeSignificant(const DecimalQuantity &value, int maxSig) { + if (maxSig == -1) { + return INT32_MIN; + } + int magnitude = value.isZero() ? 0 : value.getMagnitude(); + return magnitude - maxSig + 1; +} + +int32_t getDisplayMagnitudeFraction(int minFrac) { + if (minFrac == 0) { + return INT32_MAX; + } + return -minFrac; +} + +int32_t getDisplayMagnitudeSignificant(const DecimalQuantity &value, int minSig) { + int magnitude = value.isZero() ? 0 : value.getMagnitude(); + return magnitude - minSig + 1; +} + +} + + +Rounder Rounder::unlimited() { + return Rounder(RND_NONE, {}, kDefaultMode); +} + +FractionRounder Rounder::integer() { + return constructFraction(0, 0); +} + +FractionRounder Rounder::fixedFraction(int32_t minMaxFractionPlaces) { + if (minMaxFractionPlaces >= 0 && minMaxFractionPlaces <= kMaxIntFracSig) { + return constructFraction(minMaxFractionPlaces, minMaxFractionPlaces); + } else { + return {U_NUMBER_DIGIT_WIDTH_OUT_OF_RANGE_ERROR}; + } +} + +FractionRounder Rounder::minFraction(int32_t minFractionPlaces) { + if (minFractionPlaces >= 0 && minFractionPlaces <= kMaxIntFracSig) { + return constructFraction(minFractionPlaces, -1); + } else { + return {U_NUMBER_DIGIT_WIDTH_OUT_OF_RANGE_ERROR}; + } +} + +FractionRounder Rounder::maxFraction(int32_t maxFractionPlaces) { + if (maxFractionPlaces >= 0 && maxFractionPlaces <= kMaxIntFracSig) { + return constructFraction(0, maxFractionPlaces); + } else { + return {U_NUMBER_DIGIT_WIDTH_OUT_OF_RANGE_ERROR}; + } +} + +FractionRounder Rounder::minMaxFraction(int32_t minFractionPlaces, int32_t maxFractionPlaces) { + if (minFractionPlaces >= 0 && maxFractionPlaces <= kMaxIntFracSig && + minFractionPlaces <= maxFractionPlaces) { + return constructFraction(minFractionPlaces, maxFractionPlaces); + } else { + return {U_NUMBER_DIGIT_WIDTH_OUT_OF_RANGE_ERROR}; + } +} + +Rounder Rounder::fixedDigits(int32_t minMaxSignificantDigits) { + if (minMaxSignificantDigits >= 0 && minMaxSignificantDigits <= kMaxIntFracSig) { + return constructSignificant(minMaxSignificantDigits, minMaxSignificantDigits); + } else { + return {U_NUMBER_DIGIT_WIDTH_OUT_OF_RANGE_ERROR}; + } +} + +Rounder Rounder::minDigits(int32_t minSignificantDigits) { + if (minSignificantDigits >= 0 && minSignificantDigits <= kMaxIntFracSig) { + return constructSignificant(minSignificantDigits, -1); + } else { + return {U_NUMBER_DIGIT_WIDTH_OUT_OF_RANGE_ERROR}; + } +} + +Rounder Rounder::maxDigits(int32_t maxSignificantDigits) { + if (maxSignificantDigits >= 0 && maxSignificantDigits <= kMaxIntFracSig) { + return constructSignificant(0, maxSignificantDigits); + } else { + return {U_NUMBER_DIGIT_WIDTH_OUT_OF_RANGE_ERROR}; + } +} + +Rounder Rounder::minMaxDigits(int32_t minSignificantDigits, int32_t maxSignificantDigits) { + if (minSignificantDigits >= 0 && maxSignificantDigits <= kMaxIntFracSig && + minSignificantDigits <= maxSignificantDigits) { + return constructSignificant(minSignificantDigits, maxSignificantDigits); + } else { + return {U_NUMBER_DIGIT_WIDTH_OUT_OF_RANGE_ERROR}; + } +} + +IncrementRounder Rounder::increment(double roundingIncrement) { + if (roundingIncrement > 0.0) { + return constructIncrement(roundingIncrement, 0); + } else { + return {U_NUMBER_DIGIT_WIDTH_OUT_OF_RANGE_ERROR}; + } +} + +CurrencyRounder Rounder::currency(UCurrencyUsage currencyUsage) { + return constructCurrency(currencyUsage); +} + +Rounder Rounder::withMode(RoundingMode roundingMode) const { + if (fType == RND_ERROR) { return *this; } // no-op in error state + return {fType, fUnion, roundingMode}; +} + +Rounder FractionRounder::withMinDigits(int32_t minSignificantDigits) const { + if (fType == RND_ERROR) { return *this; } // no-op in error state + if (minSignificantDigits >= 0 && minSignificantDigits <= kMaxIntFracSig) { + return constructFractionSignificant(*this, minSignificantDigits, -1); + } else { + return {U_NUMBER_DIGIT_WIDTH_OUT_OF_RANGE_ERROR}; + } +} + +Rounder FractionRounder::withMaxDigits(int32_t maxSignificantDigits) const { + if (fType == RND_ERROR) { return *this; } // no-op in error state + if (maxSignificantDigits >= 0 && maxSignificantDigits <= kMaxIntFracSig) { + return constructFractionSignificant(*this, -1, maxSignificantDigits); + } else { + return {U_NUMBER_DIGIT_WIDTH_OUT_OF_RANGE_ERROR}; + } +} + +// Private method on base class +Rounder Rounder::withCurrency(const CurrencyUnit ¤cy, UErrorCode &status) const { + if (fType == RND_ERROR) { return *this; } // no-op in error state + U_ASSERT(fType == RND_CURRENCY); + const char16_t *isoCode = currency.getISOCurrency(); + double increment = ucurr_getRoundingIncrementForUsage(isoCode, fUnion.currencyUsage, &status); + int32_t minMaxFrac = ucurr_getDefaultFractionDigitsForUsage( + isoCode, fUnion.currencyUsage, &status); + if (increment != 0.0) { + return constructIncrement(increment, minMaxFrac); + } else { + return constructFraction(minMaxFrac, minMaxFrac); + } +} + +// Public method on CurrencyRounder subclass +Rounder CurrencyRounder::withCurrency(const CurrencyUnit ¤cy) const { + UErrorCode localStatus = U_ZERO_ERROR; + Rounder result = Rounder::withCurrency(currency, localStatus); + if (U_FAILURE(localStatus)) { + return {localStatus}; + } + return result; +} + +Rounder IncrementRounder::withMinFraction(int32_t minFrac) const { + if (fType == RND_ERROR) { return *this; } // no-op in error state + if (minFrac >= 0 && minFrac <= kMaxIntFracSig) { + return constructIncrement(fUnion.increment.fIncrement, minFrac); + } else { + return {U_NUMBER_DIGIT_WIDTH_OUT_OF_RANGE_ERROR}; + } +} + +FractionRounder Rounder::constructFraction(int32_t minFrac, int32_t maxFrac) { + FractionSignificantSettings settings; + settings.fMinFrac = static_cast (minFrac); + settings.fMaxFrac = static_cast (maxFrac); + settings.fMinSig = -1; + settings.fMaxSig = -1; + RounderUnion union_; + union_.fracSig = settings; + return {RND_FRACTION, union_, kDefaultMode}; +} + +Rounder Rounder::constructSignificant(int32_t minSig, int32_t maxSig) { + FractionSignificantSettings settings; + settings.fMinFrac = -1; + settings.fMaxFrac = -1; + settings.fMinSig = static_cast(minSig); + settings.fMaxSig = static_cast(maxSig); + RounderUnion union_; + union_.fracSig = settings; + return {RND_SIGNIFICANT, union_, kDefaultMode}; +} + +Rounder +Rounder::constructFractionSignificant(const FractionRounder &base, int32_t minSig, int32_t maxSig) { + FractionSignificantSettings settings = base.fUnion.fracSig; + settings.fMinSig = static_cast(minSig); + settings.fMaxSig = static_cast(maxSig); + RounderUnion union_; + union_.fracSig = settings; + return {RND_FRACTION_SIGNIFICANT, union_, kDefaultMode}; +} + +IncrementRounder Rounder::constructIncrement(double increment, int32_t minFrac) { + IncrementSettings settings; + settings.fIncrement = increment; + settings.fMinFrac = minFrac; + RounderUnion union_; + union_.increment = settings; + return {RND_INCREMENT, union_, kDefaultMode}; +} + +CurrencyRounder Rounder::constructCurrency(UCurrencyUsage usage) { + RounderUnion union_; + union_.currencyUsage = usage; + return {RND_CURRENCY, union_, kDefaultMode}; +} + +Rounder Rounder::constructPassThrough() { + RounderUnion union_; + union_.errorCode = U_ZERO_ERROR; // initialize the variable + return {RND_PASS_THROUGH, union_, kDefaultMode}; +} + +void Rounder::setLocaleData(const CurrencyUnit ¤cy, UErrorCode &status) { + if (fType == RND_CURRENCY) { + *this = withCurrency(currency, status); + } +} + +int32_t +Rounder::chooseMultiplierAndApply(impl::DecimalQuantity &input, const impl::MultiplierProducer &producer, + UErrorCode &status) { + // TODO: Make a better and more efficient implementation. + // TODO: Avoid the object creation here. + DecimalQuantity copy(input); + + U_ASSERT(!input.isZero()); + int32_t magnitude = input.getMagnitude(); + int32_t multiplier = producer.getMultiplier(magnitude); + input.adjustMagnitude(multiplier); + apply(input, status); + + // If the number turned to zero when rounding, do not re-attempt the rounding. + if (!input.isZero() && input.getMagnitude() == magnitude + multiplier + 1) { + magnitude += 1; + input = copy; + multiplier = producer.getMultiplier(magnitude); + input.adjustMagnitude(multiplier); + U_ASSERT(input.getMagnitude() == magnitude + multiplier - 1); + apply(input, status); + U_ASSERT(input.getMagnitude() == magnitude + multiplier); + } + + return multiplier; +} + +/** This is the method that contains the actual rounding logic. */ +void Rounder::apply(impl::DecimalQuantity &value, UErrorCode& status) const { + switch (fType) { + case RND_BOGUS: + case RND_ERROR: + // Errors should be caught before the apply() method is called + status = U_INTERNAL_PROGRAM_ERROR; + break; + + case RND_NONE: + value.roundToInfinity(); + break; + + case RND_FRACTION: + value.roundToMagnitude( + getRoundingMagnitudeFraction(fUnion.fracSig.fMaxFrac), fRoundingMode, status); + value.setFractionLength( + uprv_max(0, -getDisplayMagnitudeFraction(fUnion.fracSig.fMinFrac)), INT32_MAX); + break; + + case RND_SIGNIFICANT: + value.roundToMagnitude( + getRoundingMagnitudeSignificant(value, fUnion.fracSig.fMaxSig), + fRoundingMode, + status); + value.setFractionLength( + uprv_max(0, -getDisplayMagnitudeSignificant(value, fUnion.fracSig.fMinSig)), + INT32_MAX); + break; + + case RND_FRACTION_SIGNIFICANT: { + int32_t displayMag = getDisplayMagnitudeFraction(fUnion.fracSig.fMinFrac); + int32_t roundingMag = getRoundingMagnitudeFraction(fUnion.fracSig.fMaxFrac); + if (fUnion.fracSig.fMinSig == -1) { + // Max Sig override + int32_t candidate = getRoundingMagnitudeSignificant(value, fUnion.fracSig.fMaxSig); + roundingMag = uprv_max(roundingMag, candidate); + } else { + // Min Sig override + int32_t candidate = getDisplayMagnitudeSignificant(value, fUnion.fracSig.fMinSig); + roundingMag = uprv_min(roundingMag, candidate); + } + value.roundToMagnitude(roundingMag, fRoundingMode, status); + value.setFractionLength(uprv_max(0, -displayMag), INT32_MAX); + break; + } + + case RND_INCREMENT: + value.roundToIncrement(fUnion.increment.fIncrement, fRoundingMode, status); + value.setFractionLength(fUnion.increment.fMinFrac, fUnion.increment.fMinFrac); + break; + + case RND_CURRENCY: + // Call .withCurrency() before .apply()! + U_ASSERT(false); + + case RND_PASS_THROUGH: + break; + } +} + +void Rounder::apply(impl::DecimalQuantity &value, int32_t minInt, UErrorCode /*status*/) { + // This method is intended for the one specific purpose of helping print "00.000E0". + U_ASSERT(fType == RND_SIGNIFICANT); + U_ASSERT(value.isZero()); + value.setFractionLength(fUnion.fracSig.fMinSig - minInt, INT32_MAX); +} diff --git a/icu4c/source/i18n/number_roundingutils.h b/icu4c/source/i18n/number_roundingutils.h new file mode 100644 index 0000000000..6f68b1ac48 --- /dev/null +++ b/icu4c/source/i18n/number_roundingutils.h @@ -0,0 +1,136 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#ifndef NUMBERFORMAT_ROUNDINGUTILS_H +#define NUMBERFORMAT_ROUNDINGUTILS_H + +#include "number_types.h" + +U_NAMESPACE_BEGIN +namespace number { +namespace impl { +namespace roundingutils { + +enum Section { + SECTION_LOWER_EDGE = -1, + SECTION_UPPER_EDGE = -2, + SECTION_LOWER = 1, + SECTION_MIDPOINT = 2, + SECTION_UPPER = 3 +}; + +/** + * Converts a rounding mode and metadata about the quantity being rounded to a boolean determining + * whether the value should be rounded toward infinity or toward zero. + * + *

The parameters are of type int because benchmarks on an x86-64 processor against OpenJDK + * showed that ints were demonstrably faster than enums in switch statements. + * + * @param isEven Whether the digit immediately before the rounding magnitude is even. + * @param isNegative Whether the quantity is negative. + * @param section Whether the part of the quantity to the right of the rounding magnitude is + * exactly halfway between two digits, whether it is in the lower part (closer to zero), or + * whether it is in the upper part (closer to infinity). See {@link #SECTION_LOWER}, {@link + * #SECTION_MIDPOINT}, and {@link #SECTION_UPPER}. + * @param roundingMode The integer version of the {@link RoundingMode}, which you can get via + * {@link RoundingMode#ordinal}. + * @param status Error code, set to U_FORMAT_INEXACT_ERROR if the rounding mode is kRoundUnnecessary. + * @return true if the number should be rounded toward zero; false if it should be rounded toward + * infinity. + */ +inline bool +getRoundingDirection(bool isEven, bool isNegative, Section section, RoundingMode roundingMode, + UErrorCode &status) { + switch (roundingMode) { + case RoundingMode::UNUM_ROUND_UP: + // round away from zero + return false; + + case RoundingMode::UNUM_ROUND_DOWN: + // round toward zero + return true; + + case RoundingMode::UNUM_ROUND_CEILING: + // round toward positive infinity + return isNegative; + + case RoundingMode::UNUM_ROUND_FLOOR: + // round toward negative infinity + return !isNegative; + + case RoundingMode::UNUM_ROUND_HALFUP: + switch (section) { + case SECTION_MIDPOINT: + return false; + case SECTION_LOWER: + return true; + case SECTION_UPPER: + return false; + default: + break; + } + break; + + case RoundingMode::UNUM_ROUND_HALFDOWN: + switch (section) { + case SECTION_MIDPOINT: + return true; + case SECTION_LOWER: + return true; + case SECTION_UPPER: + return false; + default: + break; + } + break; + + case RoundingMode::UNUM_ROUND_HALFEVEN: + switch (section) { + case SECTION_MIDPOINT: + return isEven; + case SECTION_LOWER: + return true; + case SECTION_UPPER: + return false; + default: + break; + } + break; + + default: + break; + } + + status = U_FORMAT_INEXACT_ERROR; + return false; +} + +/** + * Gets whether the given rounding mode's rounding boundary is at the midpoint. The rounding + * boundary is the point at which a number switches from being rounded down to being rounded up. + * For example, with rounding mode HALF_EVEN, HALF_UP, or HALF_DOWN, the rounding boundary is at + * the midpoint, and this function would return true. However, for UP, DOWN, CEILING, and FLOOR, + * the rounding boundary is at the "edge", and this function would return false. + * + * @param roundingMode The integer version of the {@link RoundingMode}. + * @return true if rounding mode is HALF_EVEN, HALF_UP, or HALF_DOWN; false otherwise. + */ +inline bool roundsAtMidpoint(int roundingMode) { + switch (roundingMode) { + case RoundingMode::UNUM_ROUND_UP: + case RoundingMode::UNUM_ROUND_DOWN: + case RoundingMode::UNUM_ROUND_CEILING: + case RoundingMode::UNUM_ROUND_FLOOR: + return false; + + default: + return true; + } +} + +} // namespace roundingutils +} // namespace impl +} // namespace number +U_NAMESPACE_END + +#endif //NUMBERFORMAT_ROUNDINGUTILS_H diff --git a/icu4c/source/i18n/number_scientific.cpp b/icu4c/source/i18n/number_scientific.cpp new file mode 100644 index 0000000000..8482347417 --- /dev/null +++ b/icu4c/source/i18n/number_scientific.cpp @@ -0,0 +1,129 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include +#include "number_scientific.h" +#include "number_utils.h" +#include "number_stringbuilder.h" +#include "unicode/unum.h" + +using namespace icu::number::impl; + +// NOTE: The object lifecycle of ScientificModifier and ScientificHandler differ greatly in Java and C++. +// +// During formatting, we need to provide an object with state (the exponent) as the inner modifier. +// +// In Java, where the priority is put on reducing object creations, the unsafe code path re-uses the +// ScientificHandler as a ScientificModifier, and the safe code path pre-computes 25 ScientificModifier +// instances. This scheme reduces the number of object creations by 1 in both safe and unsafe. +// +// In C++, MicroProps provides a pre-allocated ScientificModifier, and ScientificHandler simply populates +// the state (the exponent) into that ScientificModifier. There is no difference between safe and unsafe. + +ScientificModifier::ScientificModifier() : fExponent(0), fHandler(nullptr) {} + +void ScientificModifier::set(int32_t exponent, const ScientificHandler *handler) { + // ScientificModifier should be set only once. + U_ASSERT(fHandler == nullptr); + fExponent = exponent; + fHandler = handler; +} + +int32_t ScientificModifier::apply(NumberStringBuilder &output, int32_t /*leftIndex*/, int32_t rightIndex, + UErrorCode &status) const { + // FIXME: Localized exponent separator location. + int i = rightIndex; + // Append the exponent separator and sign + i += output.insert( + i, + fHandler->fSymbols->getSymbol(DecimalFormatSymbols::ENumberFormatSymbol::kExponentialSymbol), + UNUM_EXPONENT_SYMBOL_FIELD, + status); + if (fExponent < 0 && fHandler->fSettings.fExponentSignDisplay != UNUM_SIGN_NEVER) { + i += output.insert( + i, + fHandler->fSymbols + ->getSymbol(DecimalFormatSymbols::ENumberFormatSymbol::kMinusSignSymbol), + UNUM_EXPONENT_SIGN_FIELD, + status); + } else if (fExponent >= 0 && fHandler->fSettings.fExponentSignDisplay == UNUM_SIGN_ALWAYS) { + i += output.insert( + i, + fHandler->fSymbols + ->getSymbol(DecimalFormatSymbols::ENumberFormatSymbol::kPlusSignSymbol), + UNUM_EXPONENT_SIGN_FIELD, + status); + } + // Append the exponent digits (using a simple inline algorithm) + int32_t disp = std::abs(fExponent); + for (int j = 0; j < fHandler->fSettings.fMinExponentDigits || disp > 0; j++, disp /= 10) { + auto d = static_cast(disp % 10); + const UnicodeString &digitString = getDigitFromSymbols(d, *fHandler->fSymbols); + i += output.insert(i - j, digitString, UNUM_EXPONENT_FIELD, status); + } + return i - rightIndex; +} + +int32_t ScientificModifier::getPrefixLength(UErrorCode &status) const { + (void)status; + // TODO: Localized exponent separator location. + return 0; +} + +int32_t ScientificModifier::getCodePointCount(UErrorCode &status) const { + (void)status; + // This method is not used for strong modifiers. + U_ASSERT(false); + return 0; +} + +bool ScientificModifier::isStrong() const { + // Scientific is always strong + return true; +} + +ScientificHandler::ScientificHandler(const Notation *notation, const DecimalFormatSymbols *symbols, + const MicroPropsGenerator *parent) : fSettings( + notation->fUnion.scientific), fSymbols(symbols), fParent(parent) {} + +void ScientificHandler::processQuantity(DecimalQuantity &quantity, MicroProps µs, + UErrorCode &status) const { + fParent->processQuantity(quantity, micros, status); + if (U_FAILURE(status)) { return; } + + // Treat zero as if it had magnitude 0 + int32_t exponent; + if (quantity.isZero()) { + if (fSettings.fRequireMinInt && micros.rounding.fType == Rounder::RND_SIGNIFICANT) { + // Show "00.000E0" on pattern "00.000E0" + micros.rounding.apply(quantity, fSettings.fEngineeringInterval, status); + exponent = 0; + } else { + micros.rounding.apply(quantity, status); + exponent = 0; + } + } else { + exponent = -micros.rounding.chooseMultiplierAndApply(quantity, *this, status); + } + + // Use MicroProps's helper ScientificModifier and save it as the modInner. + ScientificModifier &mod = micros.helpers.scientificModifier; + mod.set(exponent, this); + micros.modInner = &mod; +} + +int32_t ScientificHandler::getMultiplier(int32_t magnitude) const { + int32_t interval = fSettings.fEngineeringInterval; + int32_t digitsShown; + if (fSettings.fRequireMinInt) { + // For patterns like "000.00E0" and ".00E0" + digitsShown = interval; + } else if (interval <= 1) { + // For patterns like "0.00E0" and "@@@E0" + digitsShown = 1; + } else { + // For patterns like "##0.00" + digitsShown = ((magnitude % interval + interval) % interval) + 1; + } + return digitsShown - magnitude - 1; +} diff --git a/icu4c/source/i18n/number_scientific.h b/icu4c/source/i18n/number_scientific.h new file mode 100644 index 0000000000..77737fe3d2 --- /dev/null +++ b/icu4c/source/i18n/number_scientific.h @@ -0,0 +1,57 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#ifndef NUMBERFORMAT_NUMFMTTER_SCIENTIFIC_H +#define NUMBERFORMAT_NUMFMTTER_SCIENTIFIC_H + +#include "number_types.h" + +U_NAMESPACE_BEGIN namespace number { +namespace impl { + +// Forward-declare +class ScientificHandler; + +class ScientificModifier : public UMemory, public Modifier { + public: + ScientificModifier(); + + void set(int32_t exponent, const ScientificHandler *handler); + + int32_t apply(NumberStringBuilder &output, int32_t leftIndex, int32_t rightIndex, + UErrorCode &status) const override; + + int32_t getPrefixLength(UErrorCode &status) const override; + + int32_t getCodePointCount(UErrorCode &status) const override; + + bool isStrong() const override; + + private: + int32_t fExponent; + const ScientificHandler *fHandler; +}; + +class ScientificHandler : public UMemory, public MicroPropsGenerator, public MultiplierProducer { + public: + ScientificHandler(const Notation *notation, const DecimalFormatSymbols *symbols, + const MicroPropsGenerator *parent); + + void + processQuantity(DecimalQuantity &quantity, MicroProps µs, UErrorCode &status) const override; + + int32_t getMultiplier(int32_t magnitude) const override; + + private: + const Notation::ScientificSettings& fSettings; + const DecimalFormatSymbols *fSymbols; + const MicroPropsGenerator *fParent; + + friend class ScientificModifier; +}; + +} // namespace impl +} // namespace number +U_NAMESPACE_END + +#endif //NUMBERFORMAT_NUMFMTTER_SCIENTIFIC_H diff --git a/icu4c/source/i18n/number_stringbuilder.cpp b/icu4c/source/i18n/number_stringbuilder.cpp new file mode 100644 index 0000000000..1703c97b6d --- /dev/null +++ b/icu4c/source/i18n/number_stringbuilder.cpp @@ -0,0 +1,432 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include "number_stringbuilder.h" +#include +#include + +using namespace icu::number::impl; + +NumberStringBuilder::NumberStringBuilder() = default; + +NumberStringBuilder::~NumberStringBuilder() { + if (fUsingHeap) { + uprv_free(fChars.heap.ptr); + uprv_free(fFields.heap.ptr); + } +} + +NumberStringBuilder::NumberStringBuilder(const NumberStringBuilder &other) { + *this = other; +} + +NumberStringBuilder &NumberStringBuilder::operator=(const NumberStringBuilder &other) { + // Check for self-assignment + if (this == &other) { + return *this; + } + + // Continue with deallocation and copying + if (fUsingHeap) { + uprv_free(fChars.heap.ptr); + uprv_free(fFields.heap.ptr); + fUsingHeap = false; + } + + int32_t capacity = other.getCapacity(); + if (capacity > DEFAULT_CAPACITY) { + // FIXME: uprv_malloc + // C++ note: malloc appears in two places: here and in prepareForInsertHelper. + auto newChars = static_cast (uprv_malloc(sizeof(char16_t) * capacity)); + auto newFields = static_cast(uprv_malloc(sizeof(Field) * capacity)); + if (newChars == nullptr || newFields == nullptr) { + // UErrorCode is not available; fail silently. + uprv_free(newChars); + uprv_free(newFields); + *this = NumberStringBuilder(); // can't fail + return *this; + } + + fUsingHeap = true; + fChars.heap.capacity = capacity; + fChars.heap.ptr = newChars; + fFields.heap.capacity = capacity; + fFields.heap.ptr = newFields; + } + + uprv_memcpy(getCharPtr(), other.getCharPtr(), sizeof(char16_t) * capacity); + uprv_memcpy(getFieldPtr(), other.getFieldPtr(), sizeof(Field) * capacity); + + fZero = other.fZero; + fLength = other.fLength; + return *this; +} + +int32_t NumberStringBuilder::length() const { + return fLength; +} + +int32_t NumberStringBuilder::codePointCount() const { + return u_countChar32(getCharPtr() + fZero, fLength); +} + +UChar32 NumberStringBuilder::getFirstCodePoint() const { + if (fLength == 0) { + return -1; + } + UChar32 cp; + U16_GET(getCharPtr() + fZero, 0, 0, fLength, cp); + return cp; +} + +UChar32 NumberStringBuilder::getLastCodePoint() const { + if (fLength == 0) { + return -1; + } + int32_t offset = fLength; + U16_BACK_1(getCharPtr() + fZero, 0, offset); + UChar32 cp; + U16_GET(getCharPtr() + fZero, 0, offset, fLength, cp); + return cp; +} + +UChar32 NumberStringBuilder::codePointAt(int32_t index) const { + UChar32 cp; + U16_GET(getCharPtr() + fZero, 0, index, fLength, cp); + return cp; +} + +UChar32 NumberStringBuilder::codePointBefore(int32_t index) const { + int32_t offset = index; + U16_BACK_1(getCharPtr() + fZero, 0, offset); + UChar32 cp; + U16_GET(getCharPtr() + fZero, 0, offset, fLength, cp); + return cp; +} + +NumberStringBuilder &NumberStringBuilder::clear() { + // TODO: Reset the heap here? + fZero = getCapacity() / 2; + fLength = 0; + return *this; +} + +int32_t NumberStringBuilder::appendCodePoint(UChar32 codePoint, Field field, UErrorCode &status) { + return insertCodePoint(fLength, codePoint, field, status); +} + +int32_t +NumberStringBuilder::insertCodePoint(int32_t index, UChar32 codePoint, Field field, UErrorCode &status) { + int32_t count = U16_LENGTH(codePoint); + int32_t position = prepareForInsert(index, count, status); + if (U_FAILURE(status)) { + return count; + } + if (count == 1) { + getCharPtr()[position] = (char16_t) codePoint; + getFieldPtr()[position] = field; + } else { + getCharPtr()[position] = U16_LEAD(codePoint); + getCharPtr()[position + 1] = U16_TRAIL(codePoint); + getFieldPtr()[position] = getFieldPtr()[position + 1] = field; + } + return count; +} + +int32_t NumberStringBuilder::append(const UnicodeString &unistr, Field field, UErrorCode &status) { + return insert(fLength, unistr, field, status); +} + +int32_t NumberStringBuilder::insert(int32_t index, const UnicodeString &unistr, Field field, + UErrorCode &status) { + if (unistr.length() == 0) { + // Nothing to insert. + return 0; + } else if (unistr.length() == 1) { + // Fast path: insert using insertCodePoint. + return insertCodePoint(index, unistr.charAt(0), field, status); + } else { + return insert(index, unistr, 0, unistr.length(), field, status); + } +} + +int32_t +NumberStringBuilder::insert(int32_t index, const UnicodeString &unistr, int32_t start, int32_t end, + Field field, UErrorCode &status) { + int32_t count = end - start; + int32_t position = prepareForInsert(index, count, status); + if (U_FAILURE(status)) { + return count; + } + for (int32_t i = 0; i < count; i++) { + getCharPtr()[position + i] = unistr.charAt(start + i); + getFieldPtr()[position + i] = field; + } + return count; +} + +int32_t NumberStringBuilder::append(const NumberStringBuilder &other, UErrorCode &status) { + return insert(fLength, other, status); +} + +int32_t +NumberStringBuilder::insert(int32_t index, const NumberStringBuilder &other, UErrorCode &status) { + if (this == &other) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + int32_t count = other.fLength; + if (count == 0) { + // Nothing to insert. + return 0; + } + int32_t position = prepareForInsert(index, count, status); + if (U_FAILURE(status)) { + return count; + } + for (int32_t i = 0; i < count; i++) { + getCharPtr()[position + i] = other.charAt(i); + getFieldPtr()[position + i] = other.fieldAt(i); + } + return count; +} + +int32_t NumberStringBuilder::prepareForInsert(int32_t index, int32_t count, UErrorCode &status) { + if (index == 0 && fZero - count >= 0) { + // Append to start + fZero -= count; + fLength += count; + return fZero; + } else if (index == fLength && fZero + fLength + count < getCapacity()) { + // Append to end + fLength += count; + return fZero + fLength - count; + } else { + // Move chars around and/or allocate more space + return prepareForInsertHelper(index, count, status); + } +} + +int32_t NumberStringBuilder::prepareForInsertHelper(int32_t index, int32_t count, UErrorCode &status) { + int32_t oldCapacity = getCapacity(); + int32_t oldZero = fZero; + char16_t *oldChars = getCharPtr(); + Field *oldFields = getFieldPtr(); + if (fLength + count > oldCapacity) { + int32_t newCapacity = (fLength + count) * 2; + int32_t newZero = newCapacity / 2 - (fLength + count) / 2; + + // C++ note: malloc appears in two places: here and in the assignment operator. + auto newChars = static_cast (uprv_malloc(sizeof(char16_t) * newCapacity)); + auto newFields = static_cast(uprv_malloc(sizeof(Field) * newCapacity)); + if (newChars == nullptr || newFields == nullptr) { + uprv_free(newChars); + uprv_free(newFields); + status = U_MEMORY_ALLOCATION_ERROR; + return -1; + } + + // First copy the prefix and then the suffix, leaving room for the new chars that the + // caller wants to insert. + // C++ note: memcpy is OK because the src and dest do not overlap. + uprv_memcpy(newChars + newZero, oldChars + oldZero, sizeof(char16_t) * index); + uprv_memcpy(newChars + newZero + index + count, + oldChars + oldZero + index, + sizeof(char16_t) * (fLength - index)); + uprv_memcpy(newFields + newZero, oldFields + oldZero, sizeof(Field) * index); + uprv_memcpy(newFields + newZero + index + count, + oldFields + oldZero + index, + sizeof(Field) * (fLength - index)); + + if (fUsingHeap) { + uprv_free(oldChars); + uprv_free(oldFields); + } + fUsingHeap = true; + fChars.heap.ptr = newChars; + fChars.heap.capacity = newCapacity; + fFields.heap.ptr = newFields; + fFields.heap.capacity = newCapacity; + fZero = newZero; + fLength += count; + } else { + int32_t newZero = oldCapacity / 2 - (fLength + count) / 2; + + // C++ note: memmove is required because src and dest may overlap. + // First copy the entire string to the location of the prefix, and then move the suffix + // to make room for the new chars that the caller wants to insert. + uprv_memmove(oldChars + newZero, oldChars + oldZero, sizeof(char16_t) * fLength); + uprv_memmove(oldChars + newZero + index + count, + oldChars + newZero + index, + sizeof(char16_t) * (fLength - index)); + uprv_memmove(oldFields + newZero, oldFields + oldZero, sizeof(Field) * fLength); + uprv_memmove(oldFields + newZero + index + count, + oldFields + newZero + index, + sizeof(Field) * (fLength - index)); + + fZero = newZero; + fLength += count; + } + return fZero + index; +} + +UnicodeString NumberStringBuilder::toUnicodeString() const { + return UnicodeString(getCharPtr() + fZero, fLength); +} + +UnicodeString NumberStringBuilder::toDebugString() const { + UnicodeString sb; + sb.append(u"", -1); + return sb; +} + +const char16_t *NumberStringBuilder::chars() const { + return getCharPtr() + fZero; +} + +bool NumberStringBuilder::contentEquals(const NumberStringBuilder &other) const { + if (fLength != other.fLength) { + return false; + } + for (int32_t i = 0; i < fLength; i++) { + if (charAt(i) != other.charAt(i) || fieldAt(i) != other.fieldAt(i)) { + return false; + } + } + return true; +} + +void NumberStringBuilder::populateFieldPosition(FieldPosition &fp, int32_t offset, UErrorCode &status) const { + int32_t rawField = fp.getField(); + + if (rawField == FieldPosition::DONT_CARE) { + return; + } + + if (rawField < 0 || rawField >= UNUM_FIELD_COUNT) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + + auto field = static_cast(rawField); + + bool seenStart = false; + int32_t fractionStart = -1; + for (int i = fZero; i <= fZero + fLength; i++) { + Field _field = UNUM_FIELD_COUNT; + if (i < fZero + fLength) { + _field = getFieldPtr()[i]; + } + if (seenStart && field != _field) { + // Special case: GROUPING_SEPARATOR counts as an INTEGER. + if (field == UNUM_INTEGER_FIELD && _field == UNUM_GROUPING_SEPARATOR_FIELD) { + continue; + } + fp.setEndIndex(i - fZero + offset); + break; + } else if (!seenStart && field == _field) { + fp.setBeginIndex(i - fZero + offset); + seenStart = true; + } + if (_field == UNUM_INTEGER_FIELD || _field == UNUM_DECIMAL_SEPARATOR_FIELD) { + fractionStart = i - fZero + 1; + } + } + + // Backwards compatibility: FRACTION needs to start after INTEGER if empty + if (field == UNUM_FRACTION_FIELD && !seenStart) { + fp.setBeginIndex(fractionStart + offset); + fp.setEndIndex(fractionStart + offset); + } +} + +void NumberStringBuilder::populateFieldPositionIterator(FieldPositionIterator &fpi, UErrorCode &status) const { + // TODO: Set an initial capacity on uvec? + LocalPointer uvec(new UVector32(status)); + if (U_FAILURE(status)) { + return; + } + + Field current = UNUM_FIELD_COUNT; + int32_t currentStart = -1; + for (int32_t i = 0; i < fLength; i++) { + Field field = fieldAt(i); + if (current == UNUM_INTEGER_FIELD && field == UNUM_GROUPING_SEPARATOR_FIELD) { + // Special case: GROUPING_SEPARATOR counts as an INTEGER. + // Add the field, followed by the start index, followed by the end index to uvec. + uvec->addElement(UNUM_GROUPING_SEPARATOR_FIELD, status); + uvec->addElement(i, status); + uvec->addElement(i + 1, status); + } else if (current != field) { + if (current != UNUM_FIELD_COUNT) { + // Add the field, followed by the start index, followed by the end index to uvec. + uvec->addElement(current, status); + uvec->addElement(currentStart, status); + uvec->addElement(i, status); + } + current = field; + currentStart = i; + } + if (U_FAILURE(status)) { + return; + } + } + if (current != UNUM_FIELD_COUNT) { + // Add the field, followed by the start index, followed by the end index to uvec. + uvec->addElement(current, status); + uvec->addElement(currentStart, status); + uvec->addElement(fLength, status); + } + + // Give uvec to the FieldPositionIterator, which adopts it. + fpi.setData(uvec.orphan(), status); +} diff --git a/icu4c/source/i18n/number_stringbuilder.h b/icu4c/source/i18n/number_stringbuilder.h new file mode 100644 index 0000000000..aab11ce78c --- /dev/null +++ b/icu4c/source/i18n/number_stringbuilder.h @@ -0,0 +1,130 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#ifndef NUMBERFORMAT_NUMBERSTRINGBUILDER_H +#define NUMBERFORMAT_NUMBERSTRINGBUILDER_H + + +#include +#include +#include +#include +#include +#include "number_types.h" + +U_NAMESPACE_BEGIN namespace number { +namespace impl { + +class NumberStringBuilder : public UMemory { + private: + static const int32_t DEFAULT_CAPACITY = 40; + + template + union ValueOrHeapArray { + T value[DEFAULT_CAPACITY]; + struct { + T *ptr; + int32_t capacity; + } heap; + }; + + public: + NumberStringBuilder(); + + ~NumberStringBuilder(); + + NumberStringBuilder(const NumberStringBuilder &other); + + NumberStringBuilder &operator=(const NumberStringBuilder &other); + + int32_t length() const; + + int32_t codePointCount() const; + + inline char16_t charAt(int32_t index) const { + U_ASSERT(index >= 0); + U_ASSERT(index < fLength); + return getCharPtr()[fZero + index]; + } + + inline Field fieldAt(int32_t index) const { + U_ASSERT(index >= 0); + U_ASSERT(index < fLength); + return getFieldPtr()[fZero + index]; + } + + UChar32 getFirstCodePoint() const; + + UChar32 getLastCodePoint() const; + + UChar32 codePointAt(int32_t index) const; + + UChar32 codePointBefore(int32_t index) const; + + NumberStringBuilder &clear(); + + int32_t appendCodePoint(UChar32 codePoint, Field field, UErrorCode &status); + + int32_t insertCodePoint(int32_t index, UChar32 codePoint, Field field, UErrorCode &status); + + int32_t append(const UnicodeString &unistr, Field field, UErrorCode &status); + + int32_t insert(int32_t index, const UnicodeString &unistr, Field field, UErrorCode &status); + + int32_t insert(int32_t index, const UnicodeString &unistr, int32_t start, int32_t end, Field field, + UErrorCode &status); + + int32_t append(const NumberStringBuilder &other, UErrorCode &status); + + int32_t insert(int32_t index, const NumberStringBuilder &other, UErrorCode &status); + + UnicodeString toUnicodeString() const; + + UnicodeString toDebugString() const; + + const char16_t *chars() const; + + bool contentEquals(const NumberStringBuilder &other) const; + + void populateFieldPosition(FieldPosition &fp, int32_t offset, UErrorCode &status) const; + + void populateFieldPositionIterator(FieldPositionIterator &fpi, UErrorCode &status) const; + + private: + bool fUsingHeap = false; + ValueOrHeapArray fChars; + ValueOrHeapArray fFields; + int32_t fZero = DEFAULT_CAPACITY / 2; + int32_t fLength = 0; + + inline char16_t *getCharPtr() { + return fUsingHeap ? fChars.heap.ptr : fChars.value; + } + + inline const char16_t *getCharPtr() const { + return fUsingHeap ? fChars.heap.ptr : fChars.value; + } + + inline Field *getFieldPtr() { + return fUsingHeap ? fFields.heap.ptr : fFields.value; + } + + inline const Field *getFieldPtr() const { + return fUsingHeap ? fFields.heap.ptr : fFields.value; + } + + inline int32_t getCapacity() const { + return fUsingHeap ? fChars.heap.capacity : DEFAULT_CAPACITY; + } + + int32_t prepareForInsert(int32_t index, int32_t count, UErrorCode &status); + + int32_t prepareForInsertHelper(int32_t index, int32_t count, UErrorCode &status); +}; + +} // namespace impl +} // namespace number +U_NAMESPACE_END + + +#endif //NUMBERFORMAT_NUMBERSTRINGBUILDER_H diff --git a/icu4c/source/i18n/number_types.h b/icu4c/source/i18n/number_types.h new file mode 100644 index 0000000000..a9d3cab8ab --- /dev/null +++ b/icu4c/source/i18n/number_types.h @@ -0,0 +1,282 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#ifndef NUMBERFORMAT_INTERNALS_H +#define NUMBERFORMAT_INTERNALS_H + +#include +#include +#include +#include +#include "unicode/numberformatter.h" +#include +#include + +U_NAMESPACE_BEGIN +namespace number { +namespace impl { + +// Typedef several enums for brevity and for easier comparison to Java. + +typedef UNumberFormatFields Field; + +typedef UNumberFormatRoundingMode RoundingMode; + +typedef UNumberFormatPadPosition PadPosition; + +typedef UNumberCompactStyle CompactStyle; + +// ICU4J Equivalent: RoundingUtils.MAX_INT_FRAC_SIG +static constexpr int32_t kMaxIntFracSig = 100; + +// ICU4J Equivalent: RoundingUtils.DEFAULT_ROUNDING_MODE +static constexpr RoundingMode kDefaultMode = RoundingMode::UNUM_FOUND_HALFEVEN; + +// ICU4J Equivalent: Padder.FALLBACK_PADDING_STRING +static constexpr char16_t kFallbackPaddingString[] = u" "; + +// ICU4J Equivalent: NumberFormatterImpl.DEFAULT_CURRENCY +static constexpr char16_t kDefaultCurrency[] = u"XXX"; + +// FIXME: New error codes: +static constexpr UErrorCode U_NUMBER_DIGIT_WIDTH_OUT_OF_RANGE_ERROR = U_ILLEGAL_ARGUMENT_ERROR; +static constexpr UErrorCode U_NUMBER_PADDING_WIDTH_OUT_OF_RANGE_ERROR = U_ILLEGAL_ARGUMENT_ERROR; + +// Forward declarations: + +class Modifier; +class MutablePatternModifier; +class DecimalQuantity; +class NumberStringBuilder; +struct MicroProps; + + +enum AffixPatternType { + // Represents a literal character; the value is stored in the code point field. + TYPE_CODEPOINT = 0, + + // Represents a minus sign symbol '-'. + TYPE_MINUS_SIGN = -1, + + // Represents a plus sign symbol '+'. + TYPE_PLUS_SIGN = -2, + + // Represents a percent sign symbol '%'. + TYPE_PERCENT = -3, + + // Represents a permille sign symbol '‰'. + TYPE_PERMILLE = -4, + + // Represents a single currency symbol '¤'. + TYPE_CURRENCY_SINGLE = -5, + + // Represents a double currency symbol '¤¤'. + TYPE_CURRENCY_DOUBLE = -6, + + // Represents a triple currency symbol '¤¤¤'. + TYPE_CURRENCY_TRIPLE = -7, + + // Represents a quadruple currency symbol '¤¤¤¤'. + TYPE_CURRENCY_QUAD = -8, + + // Represents a quintuple currency symbol '¤¤¤¤¤'. + TYPE_CURRENCY_QUINT = -9, + + // Represents a sequence of six or more currency symbols. + TYPE_CURRENCY_OVERFLOW = -15 +}; + +enum CompactType { + TYPE_DECIMAL, + TYPE_CURRENCY +}; + + +// TODO: Should this be moved somewhere else, maybe where other ICU classes can use it? +class CharSequence { +public: + virtual ~CharSequence() = default; + + virtual int32_t length() const = 0; + + virtual char16_t charAt(int32_t index) const = 0; + + virtual UChar32 codePointAt(int32_t index) const { + // Default implementation; can be overriden with a more efficient version + char16_t leading = charAt(index); + if (U16_IS_LEAD(leading) && length() > index + 1) { + char16_t trailing = charAt(index + 1); + return U16_GET_SUPPLEMENTARY(leading, trailing); + } else { + return leading; + } + } + + virtual UnicodeString toUnicodeString() const = 0; +}; + +class AffixPatternProvider { + public: + static const int32_t AFFIX_PLURAL_MASK = 0xff; + static const int32_t AFFIX_PREFIX = 0x100; + static const int32_t AFFIX_NEGATIVE_SUBPATTERN = 0x200; + static const int32_t AFFIX_PADDING = 0x400; + + virtual ~AffixPatternProvider() = default; + + virtual char16_t charAt(int flags, int i) const = 0; + + virtual int length(int flags) const = 0; + + virtual bool hasCurrencySign() const = 0; + + virtual bool positiveHasPlusSign() const = 0; + + virtual bool hasNegativeSubpattern() const = 0; + + virtual bool negativeHasMinusSign() const = 0; + + virtual bool containsSymbolType(AffixPatternType, UErrorCode &) const = 0; +}; + +/** + * A Modifier is an object that can be passed through the formatting pipeline until it is finally applied to the string + * builder. A Modifier usually contains a prefix and a suffix that are applied, but it could contain something else, + * like a {@link com.ibm.icu.text.SimpleFormatter} pattern. + * + * A Modifier is usually immutable, except in cases such as {@link MurkyModifier}, which are mutable for performance + * reasons. + */ +class Modifier { + public: + virtual ~Modifier() = default; + + /** + * Apply this Modifier to the string builder. + * + * @param output + * The string builder to which to apply this modifier. + * @param leftIndex + * The left index of the string within the builder. Equal to 0 when only one number is being formatted. + * @param rightIndex + * The right index of the string within the string builder. Equal to length when only one number is being + * formatted. + * @return The number of characters (UTF-16 code units) that were added to the string builder. + */ + virtual int32_t + apply(NumberStringBuilder &output, int leftIndex, int rightIndex, UErrorCode &status) const = 0; + + /** + * Gets the length of the prefix. This information can be used in combination with {@link #apply} to extract the + * prefix and suffix strings. + * + * @return The number of characters (UTF-16 code units) in the prefix. + */ + virtual int32_t getPrefixLength(UErrorCode& status) const = 0; + + /** + * Returns the number of code points in the modifier, prefix plus suffix. + */ + virtual int32_t getCodePointCount(UErrorCode &status) const = 0; + + /** + * Whether this modifier is strong. If a modifier is strong, it should always be applied immediately and not allowed + * to bubble up. With regard to padding, strong modifiers are considered to be on the inside of the prefix and + * suffix. + * + * @return Whether the modifier is strong. + */ + virtual bool isStrong() const = 0; +}; + +/** + * This interface is used when all number formatting settings, including the locale, are known, except for the quantity + * itself. The {@link #processQuantity} method performs the final step in the number processing pipeline: it uses the + * quantity to generate a finalized {@link MicroProps}, which can be used to render the number to output. + * + *

+ * In other words, this interface is used for the parts of number processing that are quantity-dependent. + * + *

+ * In order to allow for multiple different objects to all mutate the same MicroProps, a "chain" of MicroPropsGenerators + * are linked together, and each one is responsible for manipulating a certain quantity-dependent part of the + * MicroProps. At the tail of the linked list is a base instance of {@link MicroProps} with properties that are not + * quantity-dependent. Each element in the linked list calls {@link #processQuantity} on its "parent", then does its + * work, and then returns the result. + * + * @author sffc + * + */ +class MicroPropsGenerator { + public: + virtual ~MicroPropsGenerator() = default; + + /** + * Considers the given {@link DecimalQuantity}, optionally mutates it, and returns a {@link MicroProps}. + * + * @param quantity + * The quantity for consideration and optional mutation. + * @param micros + * The MicroProps instance to populate. + * @return A MicroProps instance resolved for the quantity. + */ + virtual void processQuantity(DecimalQuantity& quantity, MicroProps& micros, UErrorCode& status) const = 0; +}; + +class MultiplierProducer { + public: + virtual ~MultiplierProducer() = default; + + virtual int32_t getMultiplier(int32_t magnitude) const = 0; +}; + +template +class NullableValue { + public: + NullableValue() : fNull(true) {} + + NullableValue(const NullableValue &other) = default; + + explicit NullableValue(const T &other) { + fValue = other; + fNull = false; + } + + NullableValue &operator=(const NullableValue &other) = default; + + NullableValue &operator=(const T &other) { + fValue = other; + fNull = false; + return *this; + } + + bool operator==(const NullableValue &other) const { + return fNull ? other.fNull : fValue == other.fValue; + } + + void nullify() { + // TODO: It might be nice to call the destructor here. + fNull = true; + } + + bool isNull() const { + return fNull; + } + + T get(UErrorCode &status) const { + if (fNull) { + status = U_UNDEFINED_VARIABLE; + } + return fValue; + } + + private: + bool fNull; + T fValue; +}; + +} // namespace impl +} // namespace number +U_NAMESPACE_END + +#endif //NUMBERFORMAT_INTERNALS_H diff --git a/icu4c/source/i18n/number_utils.h b/icu4c/source/i18n/number_utils.h new file mode 100644 index 0000000000..608363628f --- /dev/null +++ b/icu4c/source/i18n/number_utils.h @@ -0,0 +1,125 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#ifndef NUMBERFORMAT_NUMFMTTER_UTILS_H +#define NUMBERFORMAT_NUMFMTTER_UTILS_H + +#include "unicode/numberformatter.h" +#include "number_types.h" +#include "number_decimalquantity.h" +#include "number_scientific.h" +#include "number_patternstring.h" +#include "number_modifiers.h" + +U_NAMESPACE_BEGIN namespace number { +namespace impl { + +class UnicodeStringCharSequence : public CharSequence { + public: + explicit UnicodeStringCharSequence(const UnicodeString &other) { + fStr = other; + } + + ~UnicodeStringCharSequence() override = default; + + int32_t length() const override { + return fStr.length(); + } + + char16_t charAt(int32_t index) const override { + return fStr.charAt(index); + } + + UChar32 codePointAt(int32_t index) const override { + return fStr.char32At(index); + } + + UnicodeString toUnicodeString() const override { + // Allocate a UnicodeString of the correct length + UnicodeString output(length(), 0, -1); + for (int32_t i = 0; i < length(); i++) { + output.append(charAt(i)); + } + return output; + } + + private: + UnicodeString fStr; +}; + +struct MicroProps : public MicroPropsGenerator { + + // NOTE: All of these fields are properly initialized in NumberFormatterImpl. + Rounder rounding; + Grouper grouping; + Padder padding; + IntegerWidth integerWidth; + UNumberSignDisplay sign; + UNumberDecimalSeparatorDisplay decimal; + bool useCurrency; + + // Note: This struct has no direct ownership of the following pointers. + const DecimalFormatSymbols *symbols; + const Modifier *modOuter; + const Modifier *modMiddle; + const Modifier *modInner; + + // The following "helper" fields may optionally be used during the MicroPropsGenerator. + // They live here to retain memory. + struct { + ScientificModifier scientificModifier; + EmptyModifier emptyWeakModifier{false}; + EmptyModifier emptyStrongModifier{true}; + } helpers; + + + MicroProps() = default; + + MicroProps(const MicroProps &other) = default; + + MicroProps &operator=(const MicroProps &other) = default; + + void processQuantity(DecimalQuantity &, MicroProps µs, UErrorCode &status) const override { + (void)status; + if (this == µs) { + // Unsafe path: no need to perform a copy. + U_ASSERT(!exhausted); + micros.exhausted = true; + U_ASSERT(exhausted); + } else { + // Safe path: copy self into the output micros. + micros = *this; + } + } + + private: + // Internal fields: + bool exhausted = false; +}; + +/** + * This struct provides the result of the number formatting pipeline to FormattedNumber. + * + * The DecimalQuantity is not currently being used by FormattedNumber, but at some point it could be used + * to add a toDecNumber() or similar method. + */ +struct NumberFormatterResults : public UMemory { + DecimalQuantity quantity; + NumberStringBuilder string; +}; + +inline const UnicodeString getDigitFromSymbols(int8_t digit, const DecimalFormatSymbols &symbols) { + // TODO: Implement DecimalFormatSymbols.getCodePointZero()? + if (digit == 0) { + return symbols.getSymbol(DecimalFormatSymbols::ENumberFormatSymbol::kZeroDigitSymbol); + } else { + return symbols.getSymbol(static_cast( + DecimalFormatSymbols::ENumberFormatSymbol::kOneDigitSymbol + digit - 1)); + } +} + +} // namespace impl +} // namespace number +U_NAMESPACE_END + +#endif //NUMBERFORMAT_NUMFMTTER_UTILS_H diff --git a/icu4c/source/i18n/unicode/numberformatter.h b/icu4c/source/i18n/unicode/numberformatter.h new file mode 100644 index 0000000000..b86bc89fc5 --- /dev/null +++ b/icu4c/source/i18n/unicode/numberformatter.h @@ -0,0 +1,1824 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#ifndef NUMBERFORMAT_HEADERS_H +#define NUMBERFORMAT_HEADERS_H + +#include +#include +#include +#include +#include +#include +#include + +/** + * An enum declaring how to render units, including currencies. Example outputs when formatting 123 USD and 123 + * meters in en-CA: + * + *

+ *

+ * + *

+ * * The narrow format for currencies is not currently supported; this is a known issue that will be fixed in a + * future version. See #11666 for more information. + * + *

+ * This enum is similar to {@link com.ibm.icu.text.MeasureFormat.FormatWidth}. + * + * @draft ICU 60 + */ +typedef enum UNumberUnitWidth { + /** + * Print an abbreviated version of the unit name. Similar to SHORT, but always use the shortest available + * abbreviation or symbol. This option can be used when the context hints at the identity of the unit. For more + * information on the difference between NARROW and SHORT, see SHORT. + * + *

+ * In CLDR, this option corresponds to the "Narrow" format for measure units and the "¤¤¤¤¤" placeholder for + * currencies. + * + * @draft ICU 60 + */ + UNUM_UNIT_WIDTH_NARROW, + + /** + * Print an abbreviated version of the unit name. Similar to NARROW, but use a slightly wider abbreviation or + * symbol when there may be ambiguity. This is the default behavior. + * + *

+ * For example, in es-US, the SHORT form for Fahrenheit is "{0} °F", but the NARROW form is "{0}°", + * since Fahrenheit is the customary unit for temperature in that locale. + * + *

+ * In CLDR, this option corresponds to the "Short" format for measure units and the "¤" placeholder for + * currencies. + * + * @draft ICU 60 + */ + UNUM_UNIT_WIDTH_SHORT, + + /** + * Print the full name of the unit, without any abbreviations. + * + *

+ * In CLDR, this option corresponds to the default format for measure units and the "¤¤¤" placeholder for + * currencies. + * + * @draft ICU 60 + */ + UNUM_UNIT_WIDTH_FULL_NAME, + + /** + * Use the three-digit ISO XXX code in place of the symbol for displaying currencies. The behavior of this + * option is currently undefined for use with measure units. + * + *

+ * In CLDR, this option corresponds to the "¤¤" placeholder for currencies. + * + * @draft ICU 60 + */ + UNUM_UNIT_WIDTH_ISO_CODE, + + /** + * Format the number according to the specified unit, but do not display the unit. For currencies, apply + * monetary symbols and formats as with SHORT, but omit the currency symbol. For measure units, the behavior is + * equivalent to not specifying the unit at all. + * + * @draft ICU 60 + */ + UNUM_UNIT_WIDTH_HIDDEN, + +#ifndef U_HIDE_INTERNAL_API + /** + * One more than the highest UNumberUnitWidth value. + * + * @internal ICU 60: The numeric value may change over time; see ICU ticket #12420. + */ + UNUM_UNIT_WIDTH_COUNT +#endif /* U_HIDE_INTERNAL_API */ +} UNumberUnitWidth; + +/** + * An enum declaring how to denote positive and negative numbers. Example outputs when formatting 123 and -123 in + * en-US: + * + *

+ *

+ * + *

+ * The exact format, including the position and the code point of the sign, differ by locale. + * + * @draft ICU 60 + */ +typedef enum UNumberSignDisplay { + /** + * Show the minus sign on negative numbers, and do not show the sign on positive numbers. This is the default + * behavior. + * + * @draft ICU 60 + */ + UNUM_SIGN_AUTO, + + /** + * Show the minus sign on negative numbers and the plus sign on positive numbers. + * + * @draft ICU 60 + */ + UNUM_SIGN_ALWAYS, + + /** + * Do not show the sign on positive or negative numbers. + * + * @draft ICU 60 + */ + UNUM_SIGN_NEVER, + + /** + * Use the locale-dependent accounting format on negative numbers, and do not show the sign on positive numbers. + * + *

+ * The accounting format is defined in CLDR and varies by locale; in many Western locales, the format is a pair + * of parentheses around the number. + * + *

+ * Note: Since CLDR defines the accounting format in the monetary context only, this option falls back to the + * AUTO sign display strategy when formatting without a currency unit. This limitation may be lifted in the + * future. + * + * @draft ICU 60 + */ + UNUM_SIGN_ACCOUNTING, + + /** + * Use the locale-dependent accounting format on negative numbers, and show the plus sign on positive numbers. + * For more information on the accounting format, see the ACCOUNTING sign display strategy. + * + * @draft ICU 60 + */ + UNUM_SIGN_ACCOUNTING_ALWAYS, + +#ifndef U_HIDE_INTERNAL_API + /** + * One more than the highest UNumberSignDisplay value. + * + * @internal ICU 60: The numeric value may change over time; see ICU ticket #12420. + */ + UNUM_SIGN_COUNT +#endif /* U_HIDE_INTERNAL_API */ +} UNumberSignDisplay; + +/** + * An enum declaring how to render the decimal separator. + * + *

+ *

+ */ +typedef enum UNumberDecimalSeparatorDisplay { + /** + * Show the decimal separator when there are one or more digits to display after the separator, and do not show + * it otherwise. This is the default behavior. + * + * @draft ICU 60 + */ + UNUM_DECIMAL_SEPARATOR_AUTO, + + /** + * Always show the decimal separator, even if there are no digits to display after the separator. + * + * @draft ICU 60 + */ + UNUM_DECIMAL_SEPARATOR_ALWAYS, + +#ifndef U_HIDE_INTERNAL_API + /** + * One more than the highest UNumberDecimalSeparatorDisplay value. + * + * @internal ICU 60: The numeric value may change over time; see ICU ticket #12420. + */ + UNUM_DECIMAL_SEPARATOR_COUNT +#endif /* U_HIDE_INTERNAL_API */ +} UNumberDecimalMarkDisplay; + +U_NAMESPACE_BEGIN namespace number { // icu::number + +// Forward declarations: +class UnlocalizedNumberFormatter; +class LocalizedNumberFormatter; +class FormattedNumber; +class Notation; +class ScientificNotation; +class Rounder; +class FractionRounder; +class CurrencyRounder; +class IncrementRounder; +class Grouper; +class IntegerWidth; + +#ifndef U_HIDE_INTERNAL_API +namespace impl { + +// Forward declarations: +class Padder; +struct MacroProps; +struct MicroProps; +class DecimalQuantity; +struct NumberFormatterResults; +class NumberFormatterImpl; +struct ParsedPatternInfo; +class ScientificModifier; +class MultiplierProducer; +class MutablePatternModifier; +class LongNameHandler; +class ScientificHandler; +class CompactHandler; +class Modifier; +class NumberStringBuilder; + +} // namespace impl +#endif /* U_HIDE_INTERNAL_API */ + +// Reserve extra names in case they are added as classes in the future: +typedef Notation CompactNotation; +typedef Notation SimpleNotation; + +/** + * A class that defines the notation style to be used when formatting numbers in NumberFormatter. + * + * @draft ICU 60 + */ +class Notation : public UMemory { + public: + /** + * Print the number using scientific notation (also known as scientific form, standard index form, or standard form + * in the UK). The format for scientific notation varies by locale; for example, many Western locales display the + * number in the form "#E0", where the number is displayed with one digit before the decimal separator, zero or more + * digits after the decimal separator, and the corresponding power of 10 displayed after the "E". + * + *

+ * Example outputs in en-US when printing 8.765E4 through 8.765E-3: + * + *

+     * 8.765E4
+     * 8.765E3
+     * 8.765E2
+     * 8.765E1
+     * 8.765E0
+     * 8.765E-1
+     * 8.765E-2
+     * 8.765E-3
+     * 0E0
+     * 
+ * + * @return A ScientificNotation for chaining or passing to the NumberFormatter notation() setter. + * @draft ICU 60 + */ + static ScientificNotation scientific(); + + /** + * Print the number using engineering notation, a variant of scientific notation in which the exponent must be + * divisible by 3. + * + *

+ * Example outputs in en-US when printing 8.765E4 through 8.765E-3: + * + *

+     * 87.65E3
+     * 8.765E3
+     * 876.5E0
+     * 87.65E0
+     * 8.765E0
+     * 876.5E-3
+     * 87.65E-3
+     * 8.765E-3
+     * 0E0
+     * 
+ * + * @return A ScientificNotation for chaining or passing to the NumberFormatter notation() setter. + * @draft ICU 60 + */ + static ScientificNotation engineering(); + + /** + * Print the number using short-form compact notation. + * + *

+ * Compact notation, defined in Unicode Technical Standard #35 Part 3 Section 2.4.1, prints numbers with + * localized prefixes or suffixes corresponding to different powers of ten. Compact notation is similar to + * engineering notation in how it scales numbers. + * + *

+ * Compact notation is ideal for displaying large numbers (over ~1000) to humans while at the same time minimizing + * screen real estate. + * + *

+ * In short form, the powers of ten are abbreviated. In en-US, the abbreviations are "K" for thousands, "M" + * for millions, "B" for billions, and "T" for trillions. Example outputs in en-US when printing 8.765E7 + * through 8.765E0: + * + *

+     * 88M
+     * 8.8M
+     * 876K
+     * 88K
+     * 8.8K
+     * 876
+     * 88
+     * 8.8
+     * 
+ * + *

+ * When compact notation is specified without an explicit rounding strategy, numbers are rounded off to the closest + * integer after scaling the number by the corresponding power of 10, but with a digit shown after the decimal + * separator if there is only one digit before the decimal separator. The default compact notation rounding strategy + * is equivalent to: + * + *

+     * Rounder.integer().withMinDigits(2)
+     * 
+ * + * @return A CompactNotation for passing to the NumberFormatter notation() setter. + * @draft ICU 60 + */ + static CompactNotation compactShort(); + + /** + * Print the number using long-form compact notation. For more information on compact notation, see + * {@link #compactShort}. + * + *

+ * In long form, the powers of ten are spelled out fully. Example outputs in en-US when printing 8.765E7 + * through 8.765E0: + * + *

+     * 88 million
+     * 8.8 million
+     * 876 thousand
+     * 88 thousand
+     * 8.8 thousand
+     * 876
+     * 88
+     * 8.8
+     * 
+ * + * @return A CompactNotation for passing to the NumberFormatter notation() setter. + * @draft ICU 60 + */ + static CompactNotation compactLong(); + + /** + * Print the number using simple notation without any scaling by powers of ten. This is the default behavior. + * + *

+ * Since this is the default behavior, this method needs to be called only when it is necessary to override a + * previous setting. + * + *

+ * Example outputs in en-US when printing 8.765E7 through 8.765E0: + * + *

+     * 87,650,000
+     * 8,765,000
+     * 876,500
+     * 87,650
+     * 8,765
+     * 876.5
+     * 87.65
+     * 8.765
+     * 
+ * + * @return A SimpleNotation for passing to the NumberFormatter notation() setter. + * @draft ICU 60 + */ + static SimpleNotation simple(); + + private: + enum NotationType { + NTN_SCIENTIFIC, NTN_COMPACT, NTN_SIMPLE, NTN_ERROR + } fType; + + union NotationUnion { + // For NTN_SCIENTIFIC + struct ScientificSettings { + int8_t fEngineeringInterval; + bool fRequireMinInt; + int8_t fMinExponentDigits; + UNumberSignDisplay fExponentSignDisplay; + } scientific; + + // For NTN_COMPACT + UNumberCompactStyle compactStyle; + + // For NTN_ERROR + UErrorCode errorCode; + } fUnion; + + typedef NotationUnion::ScientificSettings ScientificSettings; + + Notation(const NotationType &type, const NotationUnion &union_) : fType(type), fUnion(union_) {} + + Notation(UErrorCode errorCode) : fType(NTN_ERROR) { + fUnion.errorCode = errorCode; + } + + Notation() : fType(NTN_SIMPLE), fUnion() {} + + UBool copyErrorTo(UErrorCode &status) const { + if (fType == NTN_ERROR) { + status = fUnion.errorCode; + return TRUE; + } + return FALSE; + } + + // To allow MacroProps to initialize empty instances: + friend struct impl::MacroProps; + friend class ScientificNotation; + + // To allow implementation to access internal types: + friend class impl::NumberFormatterImpl; + friend class impl::ScientificModifier; + friend class impl::ScientificHandler; +}; + +/** + * A class that defines the scientific notation style to be used when formatting numbers in NumberFormatter. + * + *

+ * To create a ScientificNotation, use one of the factory methods in {@link Notation}. + * + * @draft ICU 60 + */ +class ScientificNotation : public Notation { + public: + /** + * Sets the minimum number of digits to show in the exponent of scientific notation, padding with zeros if + * necessary. Useful for fixed-width display. + * + *

+ * For example, with minExponentDigits=2, the number 123 will be printed as "1.23E02" in en-US instead of + * the default "1.23E2". + * + * @param minExponentDigits + * The minimum number of digits to show in the exponent. + * @return A ScientificNotation, for chaining. + * @draft ICU 60 + */ + ScientificNotation withMinExponentDigits(int32_t minExponentDigits) const; + + /** + * Sets whether to show the sign on positive and negative exponents in scientific notation. The default is AUTO, + * showing the minus sign but not the plus sign. + * + *

+ * For example, with exponentSignDisplay=ALWAYS, the number 123 will be printed as "1.23E+2" in en-US + * instead of the default "1.23E2". + * + * @param exponentSignDisplay + * The strategy for displaying the sign in the exponent. + * @return A ScientificNotation, for chaining. + * @draft ICU 60 + */ + ScientificNotation withExponentSignDisplay(UNumberSignDisplay exponentSignDisplay) const; + + private: + // Inherit constructor + using Notation::Notation; + + friend class Notation; +}; + +// Reserve extra names in case they are added as classes in the future: +typedef Rounder DigitRounder; + +/** + * A class that defines the rounding strategy to be used when formatting numbers in NumberFormatter. + * + *

+ * To create a Rounder, use one of the factory methods. + * + * @draft ICU 60 + */ +class Rounder : public UMemory { + + public: + /** + * Show all available digits to full precision. + * + *

+ * NOTE: When formatting a double, this method, along with {@link #minFraction} and + * {@link #minDigits}, will trigger complex algorithm similar to Dragon4 to determine the low-order digits + * and the number of digits to display based on the value of the double. If the number of fraction places or + * significant digits can be bounded, consider using {@link #maxFraction} or {@link #maxDigits} instead to maximize + * performance. For more information, read the following blog post. + * + *

+ * http://www.serpentine.com/blog/2011/06/29/here-be-dragons-advances-in-problems-you-didnt-even-know-you-had/ + * + * @return A Rounder for chaining or passing to the NumberFormatter rounding() setter. + * @draft ICU 60 + */ + static Rounder unlimited(); + + /** + * Show numbers rounded if necessary to the nearest integer. + * + * @return A FractionRounder for chaining or passing to the NumberFormatter rounding() setter. + * @draft ICU 60 + */ + static FractionRounder integer(); + + /** + * Show numbers rounded if necessary to a certain number of fraction places (numerals after the decimal separator). + * Additionally, pad with zeros to ensure that this number of places are always shown. + * + *

+ * Example output with minMaxFractionPlaces = 3: + * + *

+ * 87,650.000
+ * 8,765.000
+ * 876.500
+ * 87.650
+ * 8.765
+ * 0.876
+ * 0.088
+ * 0.009
+ * 0.000 (zero) + * + *

+ * This method is equivalent to {@link #minMaxFraction} with both arguments equal. + * + * @param minMaxFractionPlaces + * The minimum and maximum number of numerals to display after the decimal separator (rounding if too + * long or padding with zeros if too short). + * @return A FractionRounder for chaining or passing to the NumberFormatter rounding() setter. + * @draft ICU 60 + */ + static FractionRounder fixedFraction(int32_t minMaxFractionPlaces); + + /** + * Always show at least a certain number of fraction places after the decimal separator, padding with zeros if + * necessary. Do not perform rounding (display numbers to their full precision). + * + *

+ * NOTE: If you are formatting doubles, see the performance note in {@link #unlimited}. + * + * @param minFractionPlaces + * The minimum number of numerals to display after the decimal separator (padding with zeros if + * necessary). + * @return A FractionRounder for chaining or passing to the NumberFormatter rounding() setter. + * @draft ICU 60 + */ + static FractionRounder minFraction(int32_t minFractionPlaces); + + /** + * Show numbers rounded if necessary to a certain number of fraction places (numerals after the decimal separator). + * Unlike the other fraction rounding strategies, this strategy does not pad zeros to the end of the + * number. + * + * @param maxFractionPlaces + * The maximum number of numerals to display after the decimal mark (rounding if necessary). + * @return A FractionRounder for chaining or passing to the NumberFormatter rounding() setter. + * @draft ICU 60 + */ + static FractionRounder maxFraction(int32_t maxFractionPlaces); + + /** + * Show numbers rounded if necessary to a certain number of fraction places (numerals after the decimal separator); + * in addition, always show at least a certain number of places after the decimal separator, padding with zeros if + * necessary. + * + * @param minFractionPlaces + * The minimum number of numerals to display after the decimal separator (padding with zeros if + * necessary). + * @param maxFractionPlaces + * The maximum number of numerals to display after the decimal separator (rounding if necessary). + * @return A FractionRounder for chaining or passing to the NumberFormatter rounding() setter. + * @draft ICU 60 + */ + static FractionRounder minMaxFraction(int32_t minFractionPlaces, int32_t maxFractionPlaces); + + /** + * Show numbers rounded if necessary to a certain number of significant digits or significant figures. Additionally, + * pad with zeros to ensure that this number of significant digits/figures are always shown. + * + *

+ * This method is equivalent to {@link #minMaxDigits} with both arguments equal. + * + * @param minMaxSignificantDigits + * The minimum and maximum number of significant digits to display (rounding if too long or padding with + * zeros if too short). + * @return A Rounder for chaining or passing to the NumberFormatter rounding() setter. + * @draft ICU 60 + */ + static DigitRounder fixedDigits(int32_t minMaxSignificantDigits); + + /** + * Always show at least a certain number of significant digits/figures, padding with zeros if necessary. Do not + * perform rounding (display numbers to their full precision). + * + *

+ * NOTE: If you are formatting doubles, see the performance note in {@link #unlimited}. + * + * @param minSignificantDigits + * The minimum number of significant digits to display (padding with zeros if too short). + * @return A Rounder for chaining or passing to the NumberFormatter rounding() setter. + * @draft ICU 60 + */ + static DigitRounder minDigits(int32_t minSignificantDigits); + + /** + * Show numbers rounded if necessary to a certain number of significant digits/figures. + * + * @param maxSignificantDigits + * The maximum number of significant digits to display (rounding if too long). + * @return A Rounder for chaining or passing to the NumberFormatter rounding() setter. + * @draft ICU 60 + */ + static DigitRounder maxDigits(int32_t maxSignificantDigits); + + /** + * Show numbers rounded if necessary to a certain number of significant digits/figures; in addition, always show at + * least a certain number of significant digits, padding with zeros if necessary. + * + * @param minSignificantDigits + * The minimum number of significant digits to display (padding with zeros if necessary). + * @param maxSignificantDigits + * The maximum number of significant digits to display (rounding if necessary). + * @return A Rounder for chaining or passing to the NumberFormatter rounding() setter. + * @draft ICU 60 + */ + static DigitRounder minMaxDigits(int32_t minSignificantDigits, int32_t maxSignificantDigits); + + /** + * Show numbers rounded if necessary to the closest multiple of a certain rounding increment. For example, if the + * rounding increment is 0.5, then round 1.2 to 1 and round 1.3 to 1.5. + * + *

+ * In order to ensure that numbers are padded to the appropriate number of fraction places, call + * withMinFraction() on the return value of this method. + * For example, to round to the nearest 0.5 and always display 2 numerals after the + * decimal separator (to display 1.2 as "1.00" and 1.3 as "1.50"), you can run: + * + *

+     * Rounder::increment(0.5).withMinFraction(2)
+     * 
+ * + * @param roundingIncrement + * The increment to which to round numbers. + * @return A Rounder for chaining or passing to the NumberFormatter rounding() setter. + * @draft ICU 60 + */ + static IncrementRounder increment(double roundingIncrement); + + /** + * Show numbers rounded and padded according to the rules for the currency unit. The most common rounding settings + * for currencies include Rounder.fixedFraction(2), Rounder.integer(), and + * Rounder.increment(0.05) for cash transactions ("nickel rounding"). + * + *

+ * The exact rounding details will be resolved at runtime based on the currency unit specified in the + * NumberFormatter chain. To round according to the rules for one currency while displaying the symbol for another + * currency, the withCurrency() method can be called on the return value of this method. + * + * @param currencyUsage + * Either STANDARD (for digital transactions) or CASH (for transactions where the rounding increment may + * be limited by the available denominations of cash or coins). + * @return A CurrencyRounder for chaining or passing to the NumberFormatter rounding() setter. + * @draft ICU 60 + */ + static CurrencyRounder currency(UCurrencyUsage currencyUsage); + + /** + * Sets the rounding mode to use when picking the direction to round (up or down). Common values + * include HALF_EVEN, HALF_UP, and FLOOR. The default is HALF_EVEN. + * + * @param roundingMode + * The RoundingMode to use. + * @return A Rounder for passing to the NumberFormatter rounding() setter. + * @draft ICU 60 + */ + Rounder withMode(UNumberFormatRoundingMode roundingMode) const; + + private: + enum RounderType { + RND_BOGUS, + RND_NONE, + RND_FRACTION, + RND_SIGNIFICANT, + RND_FRACTION_SIGNIFICANT, + RND_INCREMENT, + RND_CURRENCY, + RND_PASS_THROUGH, + RND_ERROR + } fType; + + union RounderUnion { + struct FractionSignificantSettings { + // For RND_FRACTION, RND_SIGNIFICANT, and RND_FRACTION_SIGNIFICANT + int8_t fMinFrac; + int8_t fMaxFrac; + int8_t fMinSig; + int8_t fMaxSig; + } fracSig; + struct IncrementSettings { + double fIncrement; + int32_t fMinFrac; + } increment; // For RND_INCREMENT + UCurrencyUsage currencyUsage; // For RND_CURRENCY + UErrorCode errorCode; // For RND_ERROR + } fUnion; + + typedef RounderUnion::FractionSignificantSettings FractionSignificantSettings; + typedef RounderUnion::IncrementSettings IncrementSettings; + + UNumberFormatRoundingMode fRoundingMode; + + Rounder(const RounderType &type, const RounderUnion &union_, UNumberFormatRoundingMode roundingMode) + : fType(type), fUnion(union_), fRoundingMode(roundingMode) {} + + Rounder(UErrorCode errorCode) : fType(RND_ERROR) { + fUnion.errorCode = errorCode; + } + + Rounder() : fType(RND_BOGUS) {} + + bool isBogus() const { + return fType == RND_BOGUS; + } + + UBool copyErrorTo(UErrorCode &status) const { + if (fType == RND_ERROR) { + status = fUnion.errorCode; + return TRUE; + } + return FALSE; + } + + // On the parent type so that this method can be called internally on Rounder instances. + Rounder withCurrency(const CurrencyUnit ¤cy, UErrorCode &status) const; + + /** NON-CONST: mutates the current instance. */ + void setLocaleData(const CurrencyUnit ¤cy, UErrorCode &status); + + void apply(impl::DecimalQuantity &value, UErrorCode &status) const; + + /** Version of {@link #apply} that obeys minInt constraints. Used for scientific notation compatibility mode. */ + void apply(impl::DecimalQuantity &value, int32_t minInt, UErrorCode status); + + int32_t + chooseMultiplierAndApply(impl::DecimalQuantity &input, const impl::MultiplierProducer &producer, + UErrorCode &status); + + static FractionRounder constructFraction(int32_t minFrac, int32_t maxFrac); + + static Rounder constructSignificant(int32_t minSig, int32_t maxSig); + + static Rounder + constructFractionSignificant(const FractionRounder &base, int32_t minSig, int32_t maxSig); + + static IncrementRounder constructIncrement(double increment, int32_t minFrac); + + static CurrencyRounder constructCurrency(UCurrencyUsage usage); + + static Rounder constructPassThrough(); + + // To allow MacroProps/MicroProps to initialize bogus instances: + friend struct impl::MacroProps; + friend struct impl::MicroProps; + + // To allow NumberFormatterImpl to access isBogus() and other internal methods: + friend class impl::NumberFormatterImpl; + + // To give access to apply() and chooseMultiplierAndApply(): + friend class impl::MutablePatternModifier; + friend class impl::LongNameHandler; + friend class impl::ScientificHandler; + friend class impl::CompactHandler; + + // To allow child classes to call private methods: + friend class FractionRounder; + friend class CurrencyRounder; + friend class IncrementRounder; +}; + +/** + * A class that defines a rounding strategy based on a number of fraction places and optionally significant digits to be + * used when formatting numbers in NumberFormatter. + * + *

+ * To create a FractionRounder, use one of the factory methods on Rounder. + * + * @draft ICU 60 + */ +class FractionRounder : public Rounder { + public: + /** + * Ensure that no less than this number of significant digits are retained when rounding according to fraction + * rules. + * + *

+ * For example, with integer rounding, the number 3.141 becomes "3". However, with minimum figures set to 2, 3.141 + * becomes "3.1" instead. + * + *

+ * This setting does not affect the number of trailing zeros. For example, 3.01 would print as "3", not "3.0". + * + * @param minSignificantDigits + * The number of significant figures to guarantee. + * @return A Rounder for chaining or passing to the NumberFormatter rounding() setter. + * @draft ICU 60 + */ + Rounder withMinDigits(int32_t minSignificantDigits) const; + + /** + * Ensure that no more than this number of significant digits are retained when rounding according to fraction + * rules. + * + *

+ * For example, with integer rounding, the number 123.4 becomes "123". However, with maximum figures set to 2, 123.4 + * becomes "120" instead. + * + *

+ * This setting does not affect the number of trailing zeros. For example, with fixed fraction of 2, 123.4 would + * become "120.00". + * + * @param maxSignificantDigits + * Round the number to no more than this number of significant figures. + * @return A Rounder for chaining or passing to the NumberFormatter rounding() setter. + * @draft ICU 60 + */ + Rounder withMaxDigits(int32_t maxSignificantDigits) const; + + private: + // Inherit constructor + using Rounder::Rounder; + + // To allow parent class to call this class's constructor: + friend class Rounder; +}; + +/** + * A class that defines a rounding strategy parameterized by a currency to be used when formatting numbers in + * NumberFormatter. + * + *

+ * To create a CurrencyRounder, use one of the factory methods on Rounder. + * + * @draft ICU 60 + */ +class CurrencyRounder : public Rounder { + public: + /** + * Associates a currency with this rounding strategy. + * + *

+ * Calling this method is not required, because the currency specified in unit() + * is automatically applied to currency rounding strategies. However, + * this method enables you to override that automatic association. + * + *

+ * This method also enables numbers to be formatted using currency rounding rules without explicitly using a + * currency format. + * + * @param currency + * The currency to associate with this rounding strategy. + * @return A Rounder for chaining or passing to the NumberFormatter rounding() setter. + * @draft ICU 60 + */ + Rounder withCurrency(const CurrencyUnit ¤cy) const; + + private: + // Inherit constructor + using Rounder::Rounder; + + // To allow parent class to call this class's constructor: + friend class Rounder; +}; + +/** + * A class that defines a rounding strategy parameterized by a rounding increment to be used when formatting numbers in + * NumberFormatter. + * + *

+ * To create an IncrementRounder, use one of the factory methods on Rounder. + * + * @draft ICU 60 + */ +class IncrementRounder : public Rounder { + public: + /** + * Specifies the minimum number of fraction digits to render after the decimal separator, padding with zeros if + * necessary. By default, no trailing zeros are added. + * + *

+ * For example, if the rounding increment is 0.5 and minFrac is 2, then the resulting strings include "0.00", + * "0.50", "1.00", and "1.50". + * + *

+ * Note: In ICU4J, this functionality is accomplished via the scale of the BigDecimal rounding increment. + * + * @param minFrac The minimum number of digits after the decimal separator. + * @return A Rounder for chaining or passing to the NumberFormatter rounding() setter. + */ + Rounder withMinFraction(int32_t minFrac) const; + + private: + // Inherit constructor + using Rounder::Rounder; + + // To allow parent class to call this class's constructor: + friend class Rounder; +}; + +class Grouper : public UMemory { + public: + static Grouper defaults(); + + static Grouper minTwoDigits(); + + static Grouper none(); + + private: + int8_t fGrouping1; // -3 means "bogus"; -2 means "needs locale data"; -1 means "no grouping" + int8_t fGrouping2; + bool fMin2; + + Grouper(int8_t grouping1, int8_t grouping2, bool min2) + : fGrouping1(grouping1), fGrouping2(grouping2), fMin2(min2) {} + + Grouper() : fGrouping1(-3) {}; + + bool isBogus() const { + return fGrouping1 == -3; + } + + /** NON-CONST: mutates the current instance. */ + void setLocaleData(const impl::ParsedPatternInfo &patternInfo); + + bool groupAtPosition(int32_t position, const impl::DecimalQuantity &value) const; + + // To allow MacroProps/MicroProps to initialize empty instances: + friend struct impl::MacroProps; + friend struct impl::MicroProps; + + // To allow NumberFormatterImpl to access isBogus() and perform other operations: + friend class impl::NumberFormatterImpl; +}; + +/** + * A class that defines the strategy for padding and truncating integers before the decimal separator. + * + *

+ * To create an IntegerWidth, use one of the factory methods. + * + * @draft ICU 60 + * @see NumberFormatter + */ +class IntegerWidth : public UMemory { + public: + /** + * Pad numbers at the beginning with zeros to guarantee a certain number of numerals before the decimal separator. + * + *

+ * For example, with minInt=3, the number 55 will get printed as "055". + * + * @param minInt + * The minimum number of places before the decimal separator. + * @return An IntegerWidth for chaining or passing to the NumberFormatter integerWidth() setter. + * @draft ICU 60 + * @see NumberFormatter + */ + static IntegerWidth zeroFillTo(int32_t minInt); + + /** + * Truncate numbers exceeding a certain number of numerals before the decimal separator. + * + * For example, with maxInt=3, the number 1234 will get printed as "234". + * + * @param maxInt + * The maximum number of places before the decimal separator. + * @return An IntegerWidth for passing to the NumberFormatter integerWidth() setter. + * @draft ICU 60 + * @see NumberFormatter + */ + IntegerWidth truncateAt(int32_t maxInt); + + private: + union { + struct { + int8_t fMinInt; + int8_t fMaxInt; + } minMaxInt; + UErrorCode errorCode; + } fUnion; + bool fHasError = false; + + IntegerWidth(int8_t minInt, int8_t maxInt); + + IntegerWidth(UErrorCode errorCode) { // NOLINT + fUnion.errorCode = errorCode; + fHasError = true; + } + + IntegerWidth() { // NOLINT + fUnion.minMaxInt.fMinInt = -1; + } + + bool isBogus() const { + return !fHasError && fUnion.minMaxInt.fMinInt == -1; + } + + UBool copyErrorTo(UErrorCode &status) const { + if (fHasError) { + status = fUnion.errorCode; + return TRUE; + } + return FALSE; + } + + void apply(impl::DecimalQuantity &quantity, UErrorCode &status) const; + + // To allow MacroProps/MicroProps to initialize empty instances: + friend struct impl::MacroProps; + friend struct impl::MicroProps; + + // To allow NumberFormatterImpl to access isBogus() and perform other operations: + friend class impl::NumberFormatterImpl; +}; + +#ifndef U_HIDE_INTERNAL_API +namespace impl { + +/** + * Use a default threshold of 3. This means that the third time .format() is called, the data structures get built + * using the "safe" code path. The first two calls to .format() will trigger the unsafe code path. + * + * @internal + */ +static uint32_t DEFAULT_THRESHOLD = 3; + +/** @internal */ +class SymbolsWrapper : public UMemory { + public: + /** @internal */ + SymbolsWrapper() : fType(SYMPTR_NONE), fPtr{nullptr} {} + + /** @internal */ + SymbolsWrapper(const SymbolsWrapper &other); + + /** @internal */ + ~SymbolsWrapper(); + + /** @internal */ + SymbolsWrapper &operator=(const SymbolsWrapper &other); + + /** + * The provided object is copied, but we do not adopt it. + * @internal + */ + void setTo(const DecimalFormatSymbols &dfs); + + /** + * Adopt the provided object. + * @internal + */ + void setTo(const NumberingSystem *ns); + + /** + * Whether the object is currently holding a DecimalFormatSymbols. + * @internal + */ + bool isDecimalFormatSymbols() const; + + /** + * Whether the object is currently holding a NumberingSystem. + * @internal + */ + bool isNumberingSystem() const; + + /** + * Get the DecimalFormatSymbols pointer. No ownership change. + * @internal + */ + const DecimalFormatSymbols *getDecimalFormatSymbols() const; + + /** + * Get the NumberingSystem pointer. No ownership change. + * @internal + */ + const NumberingSystem *getNumberingSystem() const; + + /** @internal */ + UBool copyErrorTo(UErrorCode &status) const { + if (fType == SYMPTR_DFS && fPtr.dfs == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + return TRUE; + } else if (fType == SYMPTR_NS && fPtr.ns == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + return TRUE; + } + return FALSE; + } + + private: + enum SymbolsPointerType { + SYMPTR_NONE, SYMPTR_DFS, SYMPTR_NS + } fType; + + union { + const DecimalFormatSymbols *dfs; + const NumberingSystem *ns; + } fPtr; + + void doCopyFrom(const SymbolsWrapper &other); + + void doCleanup(); +}; + +/** @internal */ +class Padder : public UMemory { + public: + /** @internal */ + static Padder none(); + + /** @internal */ + static Padder codePoints(UChar32 cp, int32_t targetWidth, UNumberFormatPadPosition position); + + private: + UChar32 fWidth; // -3 = error; -2 = bogus; -1 = no padding + union { + struct { + int32_t fCp; + UNumberFormatPadPosition fPosition; + } padding; + UErrorCode errorCode; + } fUnion; + + Padder(UChar32 cp, int32_t width, UNumberFormatPadPosition position); + + Padder(int32_t width); + + Padder(UErrorCode errorCode) : fWidth(-3) { // NOLINT + fUnion.errorCode = errorCode; + } + + Padder() : fWidth(-2) {} // NOLINT + + bool isBogus() const { + return fWidth == -2; + } + + UBool copyErrorTo(UErrorCode &status) const { + if (fWidth == -3) { + status = fUnion.errorCode; + return TRUE; + } + return FALSE; + } + + bool isValid() const { + return fWidth > 0; + } + + int32_t padAndApply(const impl::Modifier &mod1, const impl::Modifier &mod2, + impl::NumberStringBuilder &string, int32_t leftIndex, int32_t rightIndex, + UErrorCode &status) const; + + // To allow MacroProps/MicroProps to initialize empty instances: + friend struct MacroProps; + friend struct MicroProps; + + // To allow NumberFormatterImpl to access isBogus() and perform other operations: + friend class impl::NumberFormatterImpl; +}; + +/** @internal */ +struct MacroProps : public UMemory { + /** @internal */ + Notation notation; + + /** @internal */ + MeasureUnit unit; // = NoUnit::base(); + + /** @internal */ + Rounder rounder; // = Rounder(); (bogus) + + /** @internal */ + Grouper grouper; // = Grouper(); (bogus) + + /** @internal */ + Padder padder; // = Padder(); (bogus) + + /** @internal */ + IntegerWidth integerWidth; // = IntegerWidth(); (bogus) + + /** @internal */ + SymbolsWrapper symbols; + + // UNUM_XYZ_COUNT denotes null (bogus) values. + + /** @internal */ + UNumberUnitWidth unitWidth = UNUM_UNIT_WIDTH_COUNT; + + /** @internal */ + UNumberSignDisplay sign = UNUM_SIGN_COUNT; + + /** @internal */ + UNumberDecimalSeparatorDisplay decimal = UNUM_DECIMAL_SEPARATOR_COUNT; + + /** @internal */ + PluralRules *rules = nullptr; // no ownership + + /** @internal */ + uint32_t threshold = DEFAULT_THRESHOLD; + Locale locale; + + /** + * Check all members for errors. + * @internal + */ + bool copyErrorTo(UErrorCode &status) const { + return notation.copyErrorTo(status) || rounder.copyErrorTo(status) || + padder.copyErrorTo(status) || integerWidth.copyErrorTo(status) || + symbols.copyErrorTo(status); + } +}; + +} // namespace impl +#endif /* U_HIDE_INTERNAL_API */ + +/** + * An abstract base class for specifying settings related to number formatting. This class is implemented by + * {@link UnlocalizedNumberFormatter} and {@link LocalizedNumberFormatter}. + */ +template +class NumberFormatterSettings { + public: + /** + * Specifies the notation style (simple, scientific, or compact) for rendering numbers. + * + *

+ * + *

+ * All notation styles will be properly localized with locale data, and all notation styles are compatible with + * units, rounding strategies, and other number formatter settings. + * + *

+ * Pass this method the return value of a {@link Notation} factory method. For example: + * + *

+     * NumberFormatter::with().notation(Notation::compactShort())
+     * 
+ * + * The default is to use simple notation. + * + * @param notation + * The notation strategy to use. + * @return The fluent chain. + * @see Notation + * @draft ICU 60 + */ + Derived notation(const Notation ¬ation) const; + + /** + * Specifies the unit (unit of measure, currency, or percent) to associate with rendered numbers. + * + * + * + *

+ * All units will be properly localized with locale data, and all units are compatible with notation styles, + * rounding strategies, and other number formatter settings. + * + *

+ * Pass this method any instance of {@link MeasureUnit}. For units of measure: + * + *

+     * NumberFormatter.with().adoptUnit(MeasureUnit::createMeter(status))
+     * 
+ * + * Currency: + * + *
+     * NumberFormatter.with()::unit(CurrencyUnit(u"USD", status))
+     * 
+ * + * Percent: + * + *
+     * NumberFormatter.with()::unit(NoUnit.percent())
+     * 
+ * + * The default is to render without units (equivalent to NoUnit.base()). + * + * @param unit + * The unit to render. + * @return The fluent chain. + * @see MeasureUnit + * @see Currency + * @see NoUnit + * @draft ICU 60 + */ + Derived unit(const icu::MeasureUnit &unit) const; + + /** + * Like unit(), but takes ownership of a pointer. Convenient for use with the MeasureFormat factory + * methods, which return pointers that need ownership. + * + * @param unit + * The unit to render. + * @return The fluent chain. + * @see #unit + * @see MeasureUnit + * @draft ICU 60 + */ + Derived adoptUnit(const icu::MeasureUnit *unit) const; + + /** + * Specifies the rounding strategy to use when formatting numbers. + * + * + * + *

+ * Pass this method the return value of one of the factory methods on {@link Rounder}. For example: + * + *

+     * NumberFormatter::with().rounding(Rounder::fixedFraction(2))
+     * 
+ * + * The default is to not perform rounding. + * + * @param rounder + * The rounding strategy to use. + * @return The fluent chain. + * @see Rounder + * @provisional This API might change or be removed in a future release. + * @draft ICU 60 + */ + Derived rounding(const Rounder &rounder) const; + +#ifndef U_HIDE_INTERNAL_API + + /** + * Specifies the grouping strategy to use when formatting numbers. + * + * + * + *

+ * The exact grouping widths will be chosen based on the locale. + * + *

+ * Pass this method the return value of one of the factory methods on {@link Grouper}. For example: + * + *

+     * NumberFormatter::with().grouping(Grouper::min2())
+     * 
+ * + * The default is to perform grouping without concern for the minimum grouping digits. + * + * @param grouper + * The grouping strategy to use. + * @return The fluent chain. + * @see Grouper + * @see Notation + * @internal + * @internal ICU 60: This API is technical preview. + */ + Derived grouping(const Grouper &grouper) const; + +#endif /* U_HIDE_INTERNAL_API */ + + /** + * Specifies the minimum and maximum number of digits to render before the decimal mark. + * + * + * + *

+ * Pass this method the return value of {@link IntegerWidth#zeroFillTo(int)}. For example: + * + *

+     * NumberFormatter::with().integerWidth(IntegerWidth::zeroFillTo(2))
+     * 
+ * + * The default is to have one minimum integer digit. + * + * @param style + * The integer width to use. + * @return The fluent chain. + * @see IntegerWidth + * @draft ICU 60 + */ + Derived integerWidth(const IntegerWidth &style) const; + + /** + * Specifies the symbols (decimal separator, grouping separator, percent sign, numerals, etc.) to use when rendering + * numbers. + * + * + * + *

+ * Pass this method an instance of {@link DecimalFormatSymbols}. For example: + * + *

+     * NumberFormatter::with().symbols(DecimalFormatSymbols(Locale("de_CH"), status))
+     * 
+ * + *

+ * Note: DecimalFormatSymbols automatically chooses the best numbering system based on the locale. + * In the examples above, the first three are using the Latin numbering system, and the fourth is using the Myanmar + * numbering system. + * + *

+ * Note: The instance of DecimalFormatSymbols will be copied: changes made to the symbols object + * after passing it into the fluent chain will not be seen. + * + *

+ * Note: Calling this method will override the NumberingSystem previously specified in + * {@link #symbols(NumberingSystem)}. + * + *

+ * The default is to choose the symbols based on the locale specified in the fluent chain. + * + * @param symbols + * The DecimalFormatSymbols to use. + * @return The fluent chain. + * @see DecimalFormatSymbols + * @draft ICU 60 + */ + Derived symbols(const DecimalFormatSymbols &symbols) const; + + /** + * Specifies that the given numbering system should be used when fetching symbols. + * + *

+ * + *

+ * Pass this method an instance of {@link NumberingSystem}. For example, to force the locale to always use the Latin + * alphabet numbering system (ASCII digits): + * + *

+     * NumberFormatter::with().adoptSymbols(NumberingSystem::createInstanceByName("latn", status))
+     * 
+ * + *

+ * Note: Calling this method will override the DecimalFormatSymbols previously specified in + * {@link #symbols(DecimalFormatSymbols)}. + * + *

+ * The default is to choose the best numbering system for the locale. + * + *

+ * This method takes ownership of a pointer in order to work nicely with the NumberingSystem factory methods. + * + * @param ns + * The NumberingSystem to use. + * @return The fluent chain. + * @see NumberingSystem + * @draft ICU 60 + */ + Derived adoptSymbols(const NumberingSystem *symbols) const; + + /** + * Sets the width of the unit (measure unit or currency). Most common values: + * + *

+ * + *

+ * Pass an element from the {@link UNumberUnitWidth} enum to this setter. For example: + * + *

+     * NumberFormatter::with().unitWidth(UNumberUnitWidth::UNUM_UNIT_WIDTH_FULL_NAME)
+     * 
+ * + *

+ * The default is the SHORT width. + * + * @param style + * The width to use when rendering numbers. + * @return The fluent chain + * @see UNumberUnitWidth + * @draft ICU 60 + */ + Derived unitWidth(const UNumberUnitWidth &width) const; + + /** + * Sets the plus/minus sign display strategy. Most common values: + * + *

+ * + *

+ * Pass an element from the {@link UNumberSignDisplay} enum to this setter. For example: + * + *

+     * NumberFormatter::with().sign(UNumberSignDisplay::UNUM_SIGN_ALWAYS)
+     * 
+ * + *

+ * The default is AUTO sign display. + * + * @param style + * The sign display strategy to use when rendering numbers. + * @return The fluent chain + * @see UNumberSignDisplay + * @provisional This API might change or be removed in a future release. + * @draft ICU 60 + */ + Derived sign(const UNumberSignDisplay &width) const; + + /** + * Sets the decimal separator display strategy. This affects integer numbers with no fraction part. Most common + * values: + * + *

+ * + *

+ * Pass an element from the {@link UNumberDecimalSeparatorDisplay} enum to this setter. For example: + * + *

+     * NumberFormatter::with().decimal(UNumberDecimalSeparatorDisplay::UNUM_DECIMAL_SEPARATOR_ALWAYS)
+     * 
+ * + *

+ * The default is AUTO decimal separator display. + * + * @param style + * The decimal separator display strategy to use when rendering numbers. + * @return The fluent chain + * @see UNumberDecimalSeparatorDisplay + * @provisional This API might change or be removed in a future release. + * @draft ICU 60 + */ + Derived decimal(const UNumberDecimalSeparatorDisplay &width) const; + +#ifndef U_HIDE_INTERNAL_API + + /** + * Set the padding strategy. May be added to ICU 61; see #13338. + * + * @internal ICU 60: This API is ICU internal only. + */ + Derived padding(const impl::Padder &padder) const; + + /** + * Internal fluent setter to support a custom regulation threshold. A threshold of 1 causes the data structures to + * be built right away. A threshold of 0 prevents the data structures from being built. + * + * @internal ICU 60: This API is ICU internal only. + */ + Derived threshold(uint32_t threshold) const; + +#endif /* U_HIDE_INTERNAL_API */ + + /** + * Sets the UErrorCode if an error occurred in the fluent chain. + * Preserves older error codes in the outErrorCode. + * @return TRUE if U_FAILURE(outErrorCode) + * @draft ICU 60 + */ + UBool copyErrorTo(UErrorCode &outErrorCode) const { + if (U_FAILURE(outErrorCode)) { + // Do not overwrite the older error code + return TRUE; + } + fMacros.copyErrorTo(outErrorCode); + return U_FAILURE(outErrorCode); + } + + protected: + impl::MacroProps fMacros; + + private: + // Don't construct me directly! Use (Un)LocalizedNumberFormatter. + NumberFormatterSettings() = default; + + friend class LocalizedNumberFormatter; + friend class UnlocalizedNumberFormatter; +}; + +/** + * A NumberFormatter that does not yet have a locale. In order to format numbers, a locale must be specified. + * + * @see NumberFormatter + * @draft ICU 60 + */ +class UnlocalizedNumberFormatter + : public NumberFormatterSettings, public UMemory { + + public: + LocalizedNumberFormatter locale(const icu::Locale &locale) const; + + // Make default copy constructor call the NumberFormatterSettings copy constructor. + UnlocalizedNumberFormatter(const UnlocalizedNumberFormatter &other) : UnlocalizedNumberFormatter( + static_cast &>(other)) {} + + private: + UnlocalizedNumberFormatter() = default; + + explicit UnlocalizedNumberFormatter( + const NumberFormatterSettings &other); + + // To give the fluent setters access to this class's constructor: + friend class NumberFormatterSettings; + + // To give NumberFormatter::with() access to this class's constructor: + friend class NumberFormatter; +}; + +class LocalizedNumberFormatter + : public NumberFormatterSettings, public UMemory { + public: + /** + * Format the given integer number to a string using the settings specified in the NumberFormatter fluent + * setting chain. + * + * @param input + * The number to format. + * @param status + * Set to an ErrorCode if one occured in the setter chain or during formatting. + * @return A FormattedNumber object; call .toString() to get the string. + * @draft ICU 60 + */ + FormattedNumber formatInt(int64_t value, UErrorCode &status) const; + + /** + * Format the given float or double to a string using the settings specified in the NumberFormatter fluent setting + * chain. + * + * @param input + * The number to format. + * @param status + * Set to an ErrorCode if one occured in the setter chain or during formatting. + * @return A FormattedNumber object; call .toString() to get the string. + * @draft ICU 60 + */ + FormattedNumber formatDouble(double value, UErrorCode &status) const; + + /** + * Format the given decimal number to a string using the settings + * specified in the NumberFormatter fluent setting chain. + * The syntax of the unformatted number is a "numeric string" + * as defined in the Decimal Arithmetic Specification, available at + * http://speleotrove.com/decimal + * + * @param input + * The number to format. + * @param status + * Set to an ErrorCode if one occured in the setter chain or during formatting. + * @return A FormattedNumber object; call .toString() to get the string. + * @draft ICU 60 + */ + FormattedNumber formatDecimal(StringPiece value, UErrorCode &status) const; + + // Make default copy constructor call the NumberFormatterSettings copy constructor. + LocalizedNumberFormatter(const LocalizedNumberFormatter &other) : LocalizedNumberFormatter( + static_cast &>(other)) {} + + ~LocalizedNumberFormatter(); + + private: + std::atomic fCompiled{nullptr}; + std::atomic fCallCount{0}; + + LocalizedNumberFormatter() = default; + + explicit LocalizedNumberFormatter(const NumberFormatterSettings &other); + + LocalizedNumberFormatter(const impl::MacroProps ¯os, const Locale &locale); + + /** + * This is the core entrypoint to the number formatting pipeline. It performs self-regulation: a static code path + * for the first few calls, and compiling a more efficient data structure if called repeatedly. + * + *

+ * This function is very hot, being called in every call to the number formatting pipeline. + * + * @param fq + * The quantity to be formatted. + * @return The formatted number result. + */ + FormattedNumber formatImpl(impl::NumberFormatterResults *results, UErrorCode &status) const; + + // To give the fluent setters access to this class's constructor: + friend class NumberFormatterSettings; + + // To give UnlocalizedNumberFormatter::locale() access to this class's constructor: + friend class UnlocalizedNumberFormatter; +}; + +class FormattedNumber : public UMemory { + public: + UnicodeString toString() const; + + Appendable &appendTo(Appendable &appendable); + + void populateFieldPosition(FieldPosition &fieldPosition, UErrorCode &status); + + void populateFieldPositionIterator(FieldPositionIterator &iterator, UErrorCode &status); + + ~FormattedNumber(); + + private: + // Can't use LocalPointer because NumberFormatterResults is forward-declared + const impl::NumberFormatterResults *fResults; + + // Default constructor for error states + FormattedNumber() : fResults(nullptr) {} + + explicit FormattedNumber(impl::NumberFormatterResults *results) : fResults(results) {} + + // To give LocalizedNumberFormatter format methods access to this class's constructor: + friend class LocalizedNumberFormatter; +}; + +/** + * The NumberFormatter class cannot be constructed directly; use one of the factory methods. + * + * @draft ICU 60 + */ +class NumberFormatter final { + public: + /** + * Call this method at the beginning of a NumberFormatter fluent chain in which the locale is not currently known at + * the call site. + * + * @return An {@link UnlocalizedNumberFormatter}, to be used for chaining. + * @draft ICU 60 + */ + static UnlocalizedNumberFormatter with(); + + /** + * Call this method at the beginning of a NumberFormatter fluent chain in which the locale is known at the call + * site. + * + * @param locale + * The locale from which to load formats and symbols for number formatting. + * @return A {@link LocalizedNumberFormatter}, to be used for chaining. + * @draft ICU 60 + */ + static LocalizedNumberFormatter withLocale(const Locale &locale); + + // Don't construct me! + NumberFormatter() = delete; +}; + +} // namespace number +U_NAMESPACE_END + +#endif //NUMBERFORMAT_HEADERS_H diff --git a/icu4c/source/test/intltest/Makefile.in b/icu4c/source/test/intltest/Makefile.in index c4ba088f4f..0527a4b050 100644 --- a/icu4c/source/test/intltest/Makefile.in +++ b/icu4c/source/test/intltest/Makefile.in @@ -61,7 +61,11 @@ windttst.o winnmtst.o winutil.o csdetest.o tzrulets.o tzoffloc.o tzfmttst.o ssea tufmtts.o itspoof.o simplethread.o bidiconf.o locnmtst.o dcfmtest.o alphaindextst.o listformattertest.o genderinfotest.o compactdecimalformattest.o regiontst.o \ reldatefmttest.o simpleformattertest.o measfmttest.o numfmtspectest.o unifiedcachetest.o quantityformattertest.o \ scientificnumberformattertest.o datadrivennumberformattestsuite.o \ -numberformattesttuple.o numberformat2test.o pluralmaptest.o +numberformattesttuple.o numberformat2test.o pluralmaptest.o \ +numbertest_affixutils.o numbertest_stringbuilder.o +# FIXME +#numbertest_api.o numbertest_decimalquantity.o \ +#numbertest_modifiers.o numbertest_patternmodifier.o numbertest_patternstring.o \ DEPS = $(OBJECTS:.o=.d) diff --git a/icu4c/source/test/intltest/itformat.cpp b/icu4c/source/test/intltest/itformat.cpp index ae5ae0c5df..21092659a7 100644 --- a/icu4c/source/test/intltest/itformat.cpp +++ b/icu4c/source/test/intltest/itformat.cpp @@ -59,6 +59,7 @@ #include "dcfmtest.h" // DecimalFormatTest #include "listformattertest.h" // ListFormatterTest #include "regiontst.h" // RegionTest +#include "numbertest.h" // All NumberFormatter tests extern IntlTest *createCompactDecimalFormatTest(); extern IntlTest *createGenderInfoTest(); @@ -204,7 +205,7 @@ void IntlTestFormat::runIndexedTest( int32_t index, UBool exec, const char* &nam callTest(*test, par); } break; - case 49: + case 49: name = "ScientificNumberFormatterTest"; if (exec) { logln("ScientificNumberFormatterTest test---"); @@ -213,8 +214,8 @@ void IntlTestFormat::runIndexedTest( int32_t index, UBool exec, const char* &nam callTest(*test, par); } break; - case 50: - name = "NumberFormat2Test"; + case 50: + name = "NumberFormat2Test"; if (exec) { logln("NumberFormat2Test test---"); logln((UnicodeString)""); @@ -222,6 +223,8 @@ void IntlTestFormat::runIndexedTest( int32_t index, UBool exec, const char* &nam callTest(*test, par); } break; + TESTCLASS(51,AffixUtilsTest); + TESTCLASS(52,NumberStringBuilderTest); default: name = ""; break; //needed to end loop } if (exec) { diff --git a/icu4c/source/test/intltest/numbertest.h b/icu4c/source/test/intltest/numbertest.h new file mode 100644 index 0000000000..1b37795220 --- /dev/null +++ b/icu4c/source/test/intltest/numbertest.h @@ -0,0 +1,50 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#pragma once + +#include "number_stringbuilder.h" +#include "intltest.h" +#include "number_affixutils.h" + +using namespace icu::number; +using namespace icu::number::impl; + +class AffixUtilsTest : public IntlTest { + public: + void testEscape(); + + void testUnescape(); + + void testContainsReplaceType(); + + void testInvalid(); + + void testUnescapeWithSymbolProvider(); + + void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par = 0); + + private: + UnicodeString unescapeWithDefaults(const SymbolProvider &defaultProvider, UnicodeString input, + UErrorCode &status); +}; + +class NumberStringBuilderTest : public IntlTest { + public: + void testInsertAppendUnicodeString(); + + void testInsertAppendCodePoint(); + + void testCopy(); + + void testFields(); + + void testUnlimitedCapacity(); + + void testCodePoints(); + + void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par = 0); + + private: + void assertEqualsImpl(const UnicodeString &a, const NumberStringBuilder &b); +}; diff --git a/icu4c/source/test/intltest/numbertest_affixutils.cpp b/icu4c/source/test/intltest/numbertest_affixutils.cpp new file mode 100644 index 0000000000..aefa351301 --- /dev/null +++ b/icu4c/source/test/intltest/numbertest_affixutils.cpp @@ -0,0 +1,242 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include "putilimp.h" +#include "unicode/dcfmtsym.h" +#include "numbertest.h" +#include "number_utils.h" + +using namespace icu::number::impl; + +class DefaultSymbolProvider : public SymbolProvider { + DecimalFormatSymbols fSymbols; + + public: + DefaultSymbolProvider(UErrorCode &status) : fSymbols(Locale("ar_SA"), status) {} + + virtual UnicodeString getSymbol(AffixPatternType type) const { + switch (type) { + case TYPE_MINUS_SIGN: + return u"−"; + case TYPE_PLUS_SIGN: + return fSymbols.getConstSymbol(DecimalFormatSymbols::ENumberFormatSymbol::kPlusSignSymbol); + case TYPE_PERCENT: + return fSymbols.getConstSymbol(DecimalFormatSymbols::ENumberFormatSymbol::kPercentSymbol); + case TYPE_PERMILLE: + return fSymbols.getConstSymbol(DecimalFormatSymbols::ENumberFormatSymbol::kPerMillSymbol); + case TYPE_CURRENCY_SINGLE: + return u"$"; + case TYPE_CURRENCY_DOUBLE: + return u"XXX"; + case TYPE_CURRENCY_TRIPLE: + return u"long name"; + case TYPE_CURRENCY_QUAD: + return u"\uFFFD"; + case TYPE_CURRENCY_QUINT: + // TODO: Add support for narrow currency symbols here. + return u"\uFFFD"; + case TYPE_CURRENCY_OVERFLOW: + return u"\uFFFD"; + default: + U_ASSERT(false); + return 0; // silence compiler warnings + } + } +}; + +void AffixUtilsTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char *) { + if (exec) { + logln("TestSuite AffixUtilsTest: "); + } + TESTCASE_AUTO_BEGIN; + TESTCASE_AUTO(testEscape); + TESTCASE_AUTO(testUnescape); + TESTCASE_AUTO(testContainsReplaceType); + TESTCASE_AUTO(testInvalid); + TESTCASE_AUTO(testUnescapeWithSymbolProvider); + TESTCASE_AUTO_END; +} + +void AffixUtilsTest::testEscape() { + static const char16_t *cases[][2] = {{u"", u""}, + {u"abc", u"abc"}, + {u"-", u"'-'"}, + {u"-!", u"'-'!"}, + {u"−", u"−"}, + {u"---", u"'---'"}, + {u"-%-", u"'-%-'"}, + {u"'", u"''"}, + {u"-'", u"'-'''"}, + {u"-'-", u"'-''-'"}, + {u"a-'-", u"a'-''-'"}}; + + for (auto &cas : cases) { + UnicodeString input(cas[0]); + UnicodeString expected(cas[1]); + UnicodeString result = AffixUtils::escape(UnicodeStringCharSequence(input)); + assertEquals(input, expected, result); + } +} + +void AffixUtilsTest::testUnescape() { + static struct TestCase { + const char16_t *input; + bool currency; + int32_t expectedLength; + const char16_t *output; + } cases[] = {{u"", false, 0, u""}, + {u"abc", false, 3, u"abc"}, + {u"-", false, 1, u"−"}, + {u"-!", false, 2, u"−!"}, + {u"+", false, 1, u"\u061C+"}, + {u"+!", false, 2, u"\u061C+!"}, + {u"‰", false, 1, u"؉"}, + {u"‰!", false, 2, u"؉!"}, + {u"-x", false, 2, u"−x"}, + {u"'-'x", false, 2, u"-x"}, + {u"'--''-'-x", false, 6, u"--'-−x"}, + {u"''", false, 1, u"'"}, + {u"''''", false, 2, u"''"}, + {u"''''''", false, 3, u"'''"}, + {u"''x''", false, 3, u"'x'"}, + {u"¤", true, 1, u"$"}, + {u"¤¤", true, 2, u"XXX"}, + {u"¤¤¤", true, 3, u"long name"}, + {u"¤¤¤¤", true, 4, u"\uFFFD"}, + {u"¤¤¤¤¤", true, 5, u"\uFFFD"}, + {u"¤¤¤¤¤¤", true, 6, u"\uFFFD"}, + {u"¤¤¤a¤¤¤¤", true, 8, u"long namea\uFFFD"}, + {u"a¤¤¤¤b¤¤¤¤¤c", true, 12, u"a\uFFFDb\uFFFDc"}, + {u"¤!", true, 2, u"$!"}, + {u"¤¤!", true, 3, u"XXX!"}, + {u"¤¤¤!", true, 4, u"long name!"}, + {u"-¤¤", true, 3, u"−XXX"}, + {u"¤¤-", true, 3, u"XXX−"}, + {u"'¤'", false, 1, u"¤"}, + {u"%", false, 1, u"٪\u061C"}, + {u"'%'", false, 1, u"%"}, + {u"¤'-'%", true, 3, u"$-٪\u061C"}, + {u"#0#@#*#;#", false, 9, u"#0#@#*#;#"}}; + + UErrorCode status = U_ZERO_ERROR; + DefaultSymbolProvider defaultProvider(status); + assertSuccess("Constructing DefaultSymbolProvider", status); + + for (TestCase cas : cases) { + UnicodeString input(cas.input); + UnicodeString output(cas.output); + + assertEquals(input, cas.currency, AffixUtils::hasCurrencySymbols(UnicodeStringCharSequence(input), status)); + assertSuccess("Spot 1", status); + assertEquals(input, cas.expectedLength, AffixUtils::estimateLength(UnicodeStringCharSequence(input), status)); + assertSuccess("Spot 2", status); + + UnicodeString actual = unescapeWithDefaults(defaultProvider, input, status); + assertSuccess("Spot 3", status); + assertEquals(input, output, actual); + + int32_t ulength = AffixUtils::unescapedCodePointCount(UnicodeStringCharSequence(input), defaultProvider, status); + assertSuccess("Spot 4", status); + assertEquals(input, output.countChar32(), ulength); + } +} + +void AffixUtilsTest::testContainsReplaceType() { + static struct TestCase { + const char16_t *input; + bool hasMinusSign; + const char16_t *output; + } cases[] = {{u"", false, u""}, + {u"-", true, u"+"}, + {u"-a", true, u"+a"}, + {u"a-", true, u"a+"}, + {u"a-b", true, u"a+b"}, + {u"--", true, u"++"}, + {u"x", false, u"x"}}; + + UErrorCode status = U_ZERO_ERROR; + for (TestCase cas : cases) { + UnicodeString input(cas.input); + bool hasMinusSign = cas.hasMinusSign; + UnicodeString output(cas.output); + + assertEquals( + input, hasMinusSign, AffixUtils::containsType(UnicodeStringCharSequence(input), TYPE_MINUS_SIGN, status)); + assertSuccess("Spot 1", status); + assertEquals( + input, output, AffixUtils::replaceType(UnicodeStringCharSequence(input), TYPE_MINUS_SIGN, u'+', status)); + assertSuccess("Spot 2", status); + } +} + +void AffixUtilsTest::testInvalid() { + static const char16_t *invalidExamples[] = { + u"'", u"x'", u"'x", u"'x''", u"''x'"}; + + UErrorCode status = U_ZERO_ERROR; + DefaultSymbolProvider defaultProvider(status); + assertSuccess("Constructing DefaultSymbolProvider", status); + + for (const char16_t *strPtr : invalidExamples) { + UnicodeString str(strPtr); + + status = U_ZERO_ERROR; + AffixUtils::hasCurrencySymbols(UnicodeStringCharSequence(str), status); + assertEquals("Should set error code spot 1", status, U_ILLEGAL_ARGUMENT_ERROR); + + status = U_ZERO_ERROR; + AffixUtils::estimateLength(UnicodeStringCharSequence(str), status); + assertEquals("Should set error code spot 2", status, U_ILLEGAL_ARGUMENT_ERROR); + + status = U_ZERO_ERROR; + unescapeWithDefaults(defaultProvider, str, status); + assertEquals("Should set error code spot 3", status, U_ILLEGAL_ARGUMENT_ERROR); + } +} + +class NumericSymbolProvider : public SymbolProvider { + public: + virtual UnicodeString getSymbol(AffixPatternType type) const { + return Int64ToUnicodeString(type < 0 ? -type : type); + } +}; + +void AffixUtilsTest::testUnescapeWithSymbolProvider() { + static const char16_t* cases[][2] = { + {u"", u""}, + {u"-", u"1"}, + {u"'-'", u"-"}, + {u"- + % ‰ ¤ ¤¤ ¤¤¤ ¤¤¤¤ ¤¤¤¤¤", u"1 2 3 4 5 6 7 8 9"}, + {u"'¤¤¤¤¤¤'", u"¤¤¤¤¤¤"}, + {u"¤¤¤¤¤¤", u"\uFFFD"} + }; + + NumericSymbolProvider provider; + + UErrorCode status = U_ZERO_ERROR; + NumberStringBuilder sb; + for (auto cas : cases) { + UnicodeString input(cas[0]); + UnicodeString expected(cas[1]); + sb.clear(); + AffixUtils::unescape(UnicodeStringCharSequence(input), sb, 0, provider, status); + assertSuccess("Spot 1", status); + assertEquals(input, expected, sb.toUnicodeString()); + } + + // Test insertion position + sb.clear(); + sb.append(u"abcdefg", UNUM_FIELD_COUNT, status); + assertSuccess("Spot 2", status); + AffixUtils::unescape(UnicodeStringCharSequence(UnicodeString(u"-+%")), sb, 4, provider, status); + assertSuccess("Spot 3", status); + assertEquals(u"Symbol provider into middle", u"abcd123efg", sb.toUnicodeString()); +} + +UnicodeString AffixUtilsTest::unescapeWithDefaults(const SymbolProvider &defaultProvider, + UnicodeString input, UErrorCode &status) { + NumberStringBuilder nsb; + int32_t length = AffixUtils::unescape(UnicodeStringCharSequence(input), nsb, 0, defaultProvider, status); + assertEquals("Return value of unescape", nsb.length(), length); + return nsb.toUnicodeString(); +} diff --git a/icu4c/source/test/intltest/numbertest_api.cpp b/icu4c/source/test/intltest/numbertest_api.cpp new file mode 100644 index 0000000000..ec2fb98f93 --- /dev/null +++ b/icu4c/source/test/intltest/numbertest_api.cpp @@ -0,0 +1,1552 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#define TEMP_TEST_HELPERS_NO_PRINT_PASS 1 + +#include +#include +#include +#include +#include "temp_test_helpers.h" +#include "unicode/numberformatter.h" +#include "number_types.h" + +#include +#include "unicode/ustream.h" + +using namespace icu; +using namespace icu::number; +using namespace icu::number::impl; + +class numbertest_api { + public: + explicit numbertest_api(UErrorCode &status) : USD(u"USD", status), GBP(u"GBP", status), + CZK(u"CZK", status), CAD(u"CAD", status), + FRENCH_SYMBOLS(Locale::getFrench(), status), + SWISS_SYMBOLS(Locale("de-CH"), status), + MYANMAR_SYMBOLS(Locale("my"), status) { + MeasureUnit *unit = MeasureUnit::createMeter(status); + METER = *unit; + delete unit; + unit = MeasureUnit::createDay(status); + DAY = *unit; + delete unit; + unit = MeasureUnit::createSquareMeter(status); + SQUARE_METER = *unit; + delete unit; + unit = MeasureUnit::createFahrenheit(status); + FAHRENHEIT = *unit; + delete unit; + + NumberingSystem *ns = NumberingSystem::createInstanceByName("mathsanb", status); + MATHSANB = *ns; + delete ns; + ns = NumberingSystem::createInstanceByName("latn", status); + LATN = *ns; + delete ns; + } + + void notationSimple(); + + void notationScientific(); + + void notationCompact(); + + void unitMeasure(); + + void unitCurrency(); + + void unitPercent(); + + void roundingFraction(); + + void roundingFigures(); + + void roundingFractionFigures(); + + void roundingOther(); + + void grouping(); + + void padding(); + + void integerWidth(); + + void symbols(); + + // TODO: Add this method if currency symbols override support is added. + //void symbolsOverride(); + + void sign(); + + void decimal(); + + void locale(); + + void errors(); + + private: + CurrencyUnit USD; + CurrencyUnit GBP; + CurrencyUnit CZK; + CurrencyUnit CAD; + + MeasureUnit METER; + MeasureUnit DAY; + MeasureUnit SQUARE_METER; + MeasureUnit FAHRENHEIT; + + NumberingSystem MATHSANB; + NumberingSystem LATN; + + DecimalFormatSymbols FRENCH_SYMBOLS; + DecimalFormatSymbols SWISS_SYMBOLS; + DecimalFormatSymbols MYANMAR_SYMBOLS; + + void assertFormatDescending(const UnicodeString &message, const UnlocalizedNumberFormatter &f, + const Locale &locale, ...); + + void assertFormatDescendingBig(const UnicodeString &message, const UnlocalizedNumberFormatter &f, + const Locale &locale, ...); + + void assertFormatSingle(const UnicodeString &message, const UnlocalizedNumberFormatter &f, + const Locale &locale, double input, const UnicodeString &expected); +}; + +void numbertest_api::notationSimple() { + assertFormatDescending( + u"Basic", + NumberFormatter::with(), + Locale::getEnglish(), + u"87,650", + u"8,765", + u"876.5", + u"87.65", + u"8.765", + u"0.8765", + u"0.08765", + u"0.008765", + u"0"); + + assertFormatSingle( + u"Basic with Negative Sign", + NumberFormatter::with(), + Locale::getEnglish(), + -9876543.21, + u"-9,876,543.21"); +} + + +void numbertest_api::notationScientific() { + assertFormatDescending( + u"Scientific", + NumberFormatter::with().notation(Notation::scientific()), + Locale::getEnglish(), + u"8.765E4", + u"8.765E3", + u"8.765E2", + u"8.765E1", + u"8.765E0", + u"8.765E-1", + u"8.765E-2", + u"8.765E-3", + u"0E0"); + + assertFormatDescending( + u"Engineering", + NumberFormatter::with().notation(Notation::engineering()), + Locale::getEnglish(), + u"87.65E3", + u"8.765E3", + u"876.5E0", + u"87.65E0", + u"8.765E0", + u"876.5E-3", + u"87.65E-3", + u"8.765E-3", + u"0E0"); + + assertFormatDescending( + u"Scientific sign always shown", + NumberFormatter::with().notation( + Notation::scientific().withExponentSignDisplay(UNumberSignDisplay::UNUM_SIGN_ALWAYS)), + Locale::getEnglish(), + u"8.765E+4", + u"8.765E+3", + u"8.765E+2", + u"8.765E+1", + u"8.765E+0", + u"8.765E-1", + u"8.765E-2", + u"8.765E-3", + u"0E+0"); + + assertFormatDescending( + u"Scientific min exponent digits", + NumberFormatter::with().notation(Notation::scientific().withMinExponentDigits(2)), + Locale::getEnglish(), + u"8.765E04", + u"8.765E03", + u"8.765E02", + u"8.765E01", + u"8.765E00", + u"8.765E-01", + u"8.765E-02", + u"8.765E-03", + u"0E00"); + + assertFormatSingle( + u"Scientific Negative", + NumberFormatter::with().notation(Notation::scientific()), + Locale::getEnglish(), + -1000000, + u"-1E6"); +} + +void numbertest_api::notationCompact() { + assertFormatDescending( + u"Compact Short", + NumberFormatter::with().notation(Notation::compactShort()), + Locale::getEnglish(), + u"88K", + u"8.8K", + u"876", + u"88", + u"8.8", + u"0.88", + u"0.088", + u"0.0088", + u"0"); + + assertFormatDescending( + u"Compact Long", + NumberFormatter::with().notation(Notation::compactLong()), + Locale::getEnglish(), + u"88 thousand", + u"8.8 thousand", + u"876", + u"88", + u"8.8", + u"0.88", + u"0.088", + u"0.0088", + u"0"); + + assertFormatDescending( + u"Compact Short Currency", + NumberFormatter::with().notation(Notation::compactShort()).unit(USD), + Locale::getEnglish(), + u"$88K", + u"$8.8K", + u"$876", + u"$88", + u"$8.8", + u"$0.88", + u"$0.088", + u"$0.0088", + u"$0"); + + assertFormatDescending( + u"Compact Short with ISO Currency", + NumberFormatter::with().notation(Notation::compactShort()) + .unit(USD) + .unitWidth(UNumberUnitWidth::UNUM_UNIT_WIDTH_ISO_CODE), + Locale::getEnglish(), + u"USD 88K", + u"USD 8.8K", + u"USD 876", + u"USD 88", + u"USD 8.8", + u"USD 0.88", + u"USD 0.088", + u"USD 0.0088", + u"USD 0"); + + assertFormatDescending( + u"Compact Short with Long Name Currency", + NumberFormatter::with().notation(Notation::compactShort()) + .unit(USD) + .unitWidth(UNumberUnitWidth::UNUM_UNIT_WIDTH_FULL_NAME), + Locale::getEnglish(), + u"88K US dollars", + u"8.8K US dollars", + u"876 US dollars", + u"88 US dollars", + u"8.8 US dollars", + u"0.88 US dollars", + u"0.088 US dollars", + u"0.0088 US dollars", + u"0 US dollars"); + + // Note: Most locales don't have compact long currency, so this currently falls back to short. + // This test case should be fixed when proper compact long currency patterns are added. + assertFormatDescending( + u"Compact Long Currency", + NumberFormatter::with().notation(Notation::compactLong()).unit(USD), + Locale::getEnglish(), + u"$88K", // should be something like "$88 thousand" + u"$8.8K", + u"$876", + u"$88", + u"$8.8", + u"$0.88", + u"$0.088", + u"$0.0088", + u"$0"); + + // Note: Most locales don't have compact long currency, so this currently falls back to short. + // This test case should be fixed when proper compact long currency patterns are added. + assertFormatDescending( + u"Compact Long with ISO Currency", + NumberFormatter::with().notation(Notation::compactLong()) + .unit(USD) + .unitWidth(UNumberUnitWidth::UNUM_UNIT_WIDTH_ISO_CODE), + Locale::getEnglish(), + u"USD 88K", // should be something like "USD 88 thousand" + u"USD 8.8K", + u"USD 876", + u"USD 88", + u"USD 8.8", + u"USD 0.88", + u"USD 0.088", + u"USD 0.0088", + u"USD 0"); + + // TODO: This behavior could be improved and should be revisited. + assertFormatDescending( + u"Compact Long with Long Name Currency", + NumberFormatter::with().notation(Notation::compactLong()) + .unit(USD) + .unitWidth(UNumberUnitWidth::UNUM_UNIT_WIDTH_FULL_NAME), + Locale::getEnglish(), + u"88 thousand US dollars", + u"8.8 thousand US dollars", + u"876 US dollars", + u"88 US dollars", + u"8.8 US dollars", + u"0.88 US dollars", + u"0.088 US dollars", + u"0.0088 US dollars", + u"0 US dollars"); + + assertFormatSingle( + u"Compact Plural One", + NumberFormatter::with().notation(Notation::compactLong()), + Locale::createFromName("es"), + 1000000, + u"1 millón"); + + assertFormatSingle( + u"Compact Plural Other", + NumberFormatter::with().notation(Notation::compactLong()), + Locale::createFromName("es"), + 2000000, + u"2 millones"); + + assertFormatSingle( + u"Compact with Negative Sign", + NumberFormatter::with().notation(Notation::compactShort()), + Locale::getEnglish(), + -9876543.21, + u"-9.9M"); + + assertFormatSingle( + u"Compact Rounding", + NumberFormatter::with().notation(Notation::compactShort()), + Locale::getEnglish(), + 990000, + u"990K"); + + assertFormatSingle( + u"Compact Rounding", + NumberFormatter::with().notation(Notation::compactShort()), + Locale::getEnglish(), + 999000, + u"999K"); + + assertFormatSingle( + u"Compact Rounding", + NumberFormatter::with().notation(Notation::compactShort()), + Locale::getEnglish(), + 999900, + u"1M"); + + assertFormatSingle( + u"Compact Rounding", + NumberFormatter::with().notation(Notation::compactShort()), + Locale::getEnglish(), + 9900000, + u"9.9M"); + + assertFormatSingle( + u"Compact Rounding", + NumberFormatter::with().notation(Notation::compactShort()), + Locale::getEnglish(), + 9990000, + u"10M"); +} + +void numbertest_api::unitMeasure() { + assertFormatDescending( + u"Meters Short", + NumberFormatter::with().adoptUnit(new MeasureUnit(METER)), + Locale::getEnglish(), + u"87,650 m", + u"8,765 m", + u"876.5 m", + u"87.65 m", + u"8.765 m", + u"0.8765 m", + u"0.08765 m", + u"0.008765 m", + u"0 m"); + + assertFormatDescending( + u"Meters Long", + NumberFormatter::with().adoptUnit(new MeasureUnit(METER)) + .unitWidth(UNumberUnitWidth::UNUM_UNIT_WIDTH_FULL_NAME), + Locale::getEnglish(), + u"87,650 meters", + u"8,765 meters", + u"876.5 meters", + u"87.65 meters", + u"8.765 meters", + u"0.8765 meters", + u"0.08765 meters", + u"0.008765 meters", + u"0 meters"); + + assertFormatDescending( + u"Compact Meters Long", + NumberFormatter::with().notation(Notation::compactLong()) + .adoptUnit(new MeasureUnit(METER)) + .unitWidth(UNumberUnitWidth::UNUM_UNIT_WIDTH_FULL_NAME), + Locale::getEnglish(), + u"88 thousand meters", + u"8.8 thousand meters", + u"876 meters", + u"88 meters", + u"8.8 meters", + u"0.88 meters", + u"0.088 meters", + u"0.0088 meters", + u"0 meters"); + +// TODO: Implement Measure in C++ +// assertFormatSingleMeasure( +// u"Meters with Measure Input", +// NumberFormatter::with().unitWidth(UNumberUnitWidth::UNUM_UNIT_WIDTH_FULL_NAME), +// Locale::getEnglish(), +// new Measure(5.43, new MeasureUnit(METER)), +// u"5.43 meters"); + +// TODO: Implement Measure in C++ +// assertFormatSingleMeasure( +// u"Measure format method takes precedence over fluent chain", +// NumberFormatter::with().adoptUnit(new MeasureUnit(METER)), +// Locale::getEnglish(), +// new Measure(5.43, USD), +// u"$5.43"); + + assertFormatSingle( + u"Meters with Negative Sign", + NumberFormatter::with().adoptUnit(new MeasureUnit(METER)), + Locale::getEnglish(), + -9876543.21, + u"-9,876,543.21 m"); + + // The locale string "सान" appears only in brx.txt: + assertFormatSingle( + u"Interesting Data Fallback 1", + NumberFormatter::with().adoptUnit(new MeasureUnit(DAY)) + .unitWidth(UNumberUnitWidth::UNUM_UNIT_WIDTH_FULL_NAME), + Locale::createFromName("brx"), + 5.43, + u"5.43 सान"); + + // Requires following the alias from unitsNarrow to unitsShort: + assertFormatSingle( + u"Interesting Data Fallback 2", + NumberFormatter::with().adoptUnit(new MeasureUnit(DAY)) + .unitWidth(UNumberUnitWidth::UNUM_UNIT_WIDTH_NARROW), + Locale::createFromName("brx"), + 5.43, + u"5.43 d"); + + // en_001.txt has a unitsNarrow/area/square-meter table, but table does not contain the OTHER unit, + // requiring fallback to the root. + assertFormatSingle( + u"Interesting Data Fallback 3", + NumberFormatter::with().adoptUnit(new MeasureUnit(SQUARE_METER)) + .unitWidth(UNumberUnitWidth::UNUM_UNIT_WIDTH_NARROW), + Locale::createFromName("en-GB"), + 5.43, + u"5.43 m²"); + + // es_US has "{0}°" for unitsNarrow/temperature/FAHRENHEIT. + // NOTE: This example is in the documentation. + assertFormatSingle( + u"Difference between Narrow and Short (Narrow Version)", + NumberFormatter::with().adoptUnit(new MeasureUnit(FAHRENHEIT)) + .unitWidth(UNUM_UNIT_WIDTH_NARROW), + Locale("es-US"), + 5.43, + u"5.43°"); + + assertFormatSingle( + u"Difference between Narrow and Short (Short Version)", + NumberFormatter::with().adoptUnit(new MeasureUnit(FAHRENHEIT)) + .unitWidth(UNUM_UNIT_WIDTH_SHORT), + Locale("es-US"), + 5.43, + u"5.43 °F"); +} + +void numbertest_api::unitCurrency() { + assertFormatDescending( + u"Currency", + NumberFormatter::with().unit(GBP), + Locale::getEnglish(), + u"£87,650.00", + u"£8,765.00", + u"£876.50", + u"£87.65", + u"£8.76", + u"£0.88", + u"£0.09", + u"£0.01", + u"£0.00"); + + assertFormatDescending( + u"Currency ISO", + NumberFormatter::with().unit(GBP).unitWidth(UNumberUnitWidth::UNUM_UNIT_WIDTH_ISO_CODE), + Locale::getEnglish(), + u"GBP 87,650.00", + u"GBP 8,765.00", + u"GBP 876.50", + u"GBP 87.65", + u"GBP 8.76", + u"GBP 0.88", + u"GBP 0.09", + u"GBP 0.01", + u"GBP 0.00"); + + assertFormatDescending( + u"Currency Long Name", + NumberFormatter::with().unit(GBP).unitWidth(UNumberUnitWidth::UNUM_UNIT_WIDTH_FULL_NAME), + Locale::getEnglish(), + u"87,650.00 British pounds", + u"8,765.00 British pounds", + u"876.50 British pounds", + u"87.65 British pounds", + u"8.76 British pounds", + u"0.88 British pounds", + u"0.09 British pounds", + u"0.01 British pounds", + u"0.00 British pounds"); + + assertFormatDescending( + u"Currency Hidden", + NumberFormatter::with().unit(GBP).unitWidth(UNUM_UNIT_WIDTH_HIDDEN), + Locale::getEnglish(), + u"87,650.00", + u"8,765.00", + u"876.50", + u"87.65", + u"8.76", + u"0.88", + u"0.09", + u"0.01", + u"0.00"); + +// TODO: Implement Measure in C++ +// assertFormatSingleMeasure( +// u"Currency with CurrencyAmount Input", +// NumberFormatter::with(), +// Locale::getEnglish(), +// new CurrencyAmount(5.43, GBP), +// u"£5.43"); + +// TODO: Enable this test when DecimalFormat wrapper is done. +// assertFormatSingle( +// u"Currency Long Name from Pattern Syntax", NumberFormatter.fromDecimalFormat( +// PatternStringParser.parseToProperties("0 ¤¤¤"), +// DecimalFormatSymbols.getInstance(Locale::getEnglish()), +// null).unit(GBP), Locale::getEnglish(), 1234567.89, u"1234568 British pounds"); + + assertFormatSingle( + u"Currency with Negative Sign", + NumberFormatter::with().unit(GBP), + Locale::getEnglish(), + -9876543.21, + u"-£9,876,543.21"); +} + +void numbertest_api::unitPercent() { + assertFormatDescending( + u"Percent", + NumberFormatter::with().unit(NoUnit::percent()), + Locale::getEnglish(), + u"87,650%", + u"8,765%", + u"876.5%", + u"87.65%", + u"8.765%", + u"0.8765%", + u"0.08765%", + u"0.008765%", + u"0%"); + + assertFormatDescending( + u"Permille", + NumberFormatter::with().unit(NoUnit::permille()), + Locale::getEnglish(), + u"87,650‰", + u"8,765‰", + u"876.5‰", + u"87.65‰", + u"8.765‰", + u"0.8765‰", + u"0.08765‰", + u"0.008765‰", + u"0‰"); + + assertFormatSingle( + u"NoUnit Base", + NumberFormatter::with().unit(NoUnit::base()), + Locale::getEnglish(), + 51423, + u"51,423"); + + assertFormatSingle( + u"Percent with Negative Sign", + NumberFormatter::with().unit(NoUnit::percent()), + Locale::getEnglish(), + -98.7654321, + u"-98.765432%"); +} + +void numbertest_api::roundingFraction() { + assertFormatDescending( + u"Integer", + NumberFormatter::with().rounding(Rounder::integer()), + Locale::getEnglish(), + u"87,650", + u"8,765", + u"876", + u"88", + u"9", + u"1", + u"0", + u"0", + u"0"); + + assertFormatDescending( + u"Fixed Fraction", + NumberFormatter::with().rounding(Rounder::fixedFraction(3)), + Locale::getEnglish(), + u"87,650.000", + u"8,765.000", + u"876.500", + u"87.650", + u"8.765", + u"0.876", + u"0.088", + u"0.009", + u"0.000"); + + assertFormatDescending( + u"Min Fraction", + NumberFormatter::with().rounding(Rounder::minFraction(1)), + Locale::getEnglish(), + u"87,650.0", + u"8,765.0", + u"876.5", + u"87.65", + u"8.765", + u"0.8765", + u"0.08765", + u"0.008765", + u"0.0"); + + assertFormatDescending( + u"Max Fraction", + NumberFormatter::with().rounding(Rounder::maxFraction(1)), + Locale::getEnglish(), + u"87,650", + u"8,765", + u"876.5", + u"87.6", + u"8.8", + u"0.9", + u"0.1", + u"0", + u"0"); + + assertFormatDescending( + u"Min/Max Fraction", + NumberFormatter::with().rounding(Rounder::minMaxFraction(1, 3)), + Locale::getEnglish(), + u"87,650.0", + u"8,765.0", + u"876.5", + u"87.65", + u"8.765", + u"0.876", + u"0.088", + u"0.009", + u"0.0"); +} + +void numbertest_api::roundingFigures() { + assertFormatSingle( + u"Fixed Significant", + NumberFormatter::with().rounding(Rounder::fixedDigits(3)), + Locale::getEnglish(), + -98, + u"-98.0"); + + assertFormatSingle( + u"Fixed Significant Rounding", + NumberFormatter::with().rounding(Rounder::fixedDigits(3)), + Locale::getEnglish(), + -98.7654321, + u"-98.8"); + + assertFormatSingle( + u"Fixed Significant Zero", + NumberFormatter::with().rounding(Rounder::fixedDigits(3)), + Locale::getEnglish(), + 0, + u"0.00"); + + assertFormatSingle( + u"Min Significant", + NumberFormatter::with().rounding(Rounder::minDigits(2)), + Locale::getEnglish(), + -9, + u"-9.0"); + + assertFormatSingle( + u"Max Significant", + NumberFormatter::with().rounding(Rounder::maxDigits(4)), + Locale::getEnglish(), + 98.7654321, + u"98.77"); + + assertFormatSingle( + u"Min/Max Significant", + NumberFormatter::with().rounding(Rounder::minMaxDigits(3, 4)), + Locale::getEnglish(), + 9.99999, + u"10.0"); +} + +void numbertest_api::roundingFractionFigures() { + assertFormatDescending( + u"Basic Significant", // for comparison + NumberFormatter::with().rounding(Rounder::maxDigits(2)), + Locale::getEnglish(), + u"88,000", + u"8,800", + u"880", + u"88", + u"8.8", + u"0.88", + u"0.088", + u"0.0088", + u"0"); + + assertFormatDescending( + u"FracSig minMaxFrac minSig", + NumberFormatter::with().rounding(Rounder::minMaxFraction(1, 2).withMinDigits(3)), + Locale::getEnglish(), + u"87,650.0", + u"8,765.0", + u"876.5", + u"87.65", + u"8.76", + u"0.876", // minSig beats maxFrac + u"0.0876", // minSig beats maxFrac + u"0.00876", // minSig beats maxFrac + u"0.0"); + + assertFormatDescending( + u"FracSig minMaxFrac maxSig A", + NumberFormatter::with().rounding(Rounder::minMaxFraction(1, 3).withMaxDigits(2)), + Locale::getEnglish(), + u"88,000.0", // maxSig beats maxFrac + u"8,800.0", // maxSig beats maxFrac + u"880.0", // maxSig beats maxFrac + u"88.0", // maxSig beats maxFrac + u"8.8", // maxSig beats maxFrac + u"0.88", // maxSig beats maxFrac + u"0.088", + u"0.009", + u"0.0"); + + assertFormatDescending( + u"FracSig minMaxFrac maxSig B", + NumberFormatter::with().rounding(Rounder::fixedFraction(2).withMaxDigits(2)), + Locale::getEnglish(), + u"88,000.00", // maxSig beats maxFrac + u"8,800.00", // maxSig beats maxFrac + u"880.00", // maxSig beats maxFrac + u"88.00", // maxSig beats maxFrac + u"8.80", // maxSig beats maxFrac + u"0.88", + u"0.09", + u"0.01", + u"0.00"); +} + +void numbertest_api::roundingOther() { + assertFormatDescending( + u"Rounding None", + NumberFormatter::with().rounding(Rounder::unlimited()), + Locale::getEnglish(), + u"87,650", + u"8,765", + u"876.5", + u"87.65", + u"8.765", + u"0.8765", + u"0.08765", + u"0.008765", + u"0"); + + assertFormatDescending( + u"Increment", + NumberFormatter::with().rounding(Rounder::increment(0.5).withMinFraction(1)), + Locale::getEnglish(), + u"87,650.0", + u"8,765.0", + u"876.5", + u"87.5", + u"9.0", + u"1.0", + u"0.0", + u"0.0", + u"0.0"); + + assertFormatDescending( + u"Increment with Min Fraction", + NumberFormatter::with().rounding(Rounder::increment(0.5).withMinFraction(2)), + Locale::getEnglish(), + u"87,650.00", + u"8,765.00", + u"876.50", + u"87.50", + u"9.00", + u"1.00", + u"0.00", + u"0.00", + u"0.00"); + + assertFormatDescending( + u"Currency Standard", + NumberFormatter::with().rounding(Rounder::currency(UCurrencyUsage::UCURR_USAGE_STANDARD)) + .unit(CZK), + Locale::getEnglish(), + u"CZK 87,650.00", + u"CZK 8,765.00", + u"CZK 876.50", + u"CZK 87.65", + u"CZK 8.76", + u"CZK 0.88", + u"CZK 0.09", + u"CZK 0.01", + u"CZK 0.00"); + + assertFormatDescending( + u"Currency Cash", + NumberFormatter::with().rounding(Rounder::currency(UCurrencyUsage::UCURR_USAGE_CASH)) + .unit(CZK), + Locale::getEnglish(), + u"CZK 87,650", + u"CZK 8,765", + u"CZK 876", + u"CZK 88", + u"CZK 9", + u"CZK 1", + u"CZK 0", + u"CZK 0", + u"CZK 0"); + + assertFormatDescending( + u"Currency Cash with Nickel Rounding", + NumberFormatter::with().rounding(Rounder::currency(UCurrencyUsage::UCURR_USAGE_CASH)) + .unit(CAD), + Locale::getEnglish(), + u"CA$87,650.00", + u"CA$8,765.00", + u"CA$876.50", + u"CA$87.65", + u"CA$8.75", + u"CA$0.90", + u"CA$0.10", + u"CA$0.00", + u"CA$0.00"); + + assertFormatDescending( + u"Currency not in top-level fluent chain", + NumberFormatter::with().rounding( + Rounder::currency(UCurrencyUsage::UCURR_USAGE_CASH).withCurrency(CZK)), + Locale::getEnglish(), + u"87,650", + u"8,765", + u"876", + u"88", + u"9", + u"1", + u"0", + u"0", + u"0"); + + // NOTE: Other tests cover the behavior of the other rounding modes. + assertFormatDescending( + u"Rounding Mode CEILING", + NumberFormatter::with().rounding(Rounder::integer().withMode(UNumberFormatRoundingMode::UNUM_ROUND_CEILING)), + Locale::getEnglish(), + u"87,650", + u"8,765", + u"877", + u"88", + u"9", + u"1", + u"1", + u"1", + u"0"); +} + +void numbertest_api::grouping() { + assertFormatDescendingBig( + u"Western Grouping", + NumberFormatter::with().grouping(Grouper::defaults()), + Locale::getEnglish(), + u"87,650,000", + u"8,765,000", + u"876,500", + u"87,650", + u"8,765", + u"876.5", + u"87.65", + u"8.765", + u"0"); + + assertFormatDescendingBig( + u"Indic Grouping", + NumberFormatter::with().grouping(Grouper::defaults()), + Locale("en-IN"), + u"8,76,50,000", + u"87,65,000", + u"8,76,500", + u"87,650", + u"8,765", + u"876.5", + u"87.65", + u"8.765", + u"0"); + + assertFormatDescendingBig( + u"Western Grouping, Wide", + NumberFormatter::with().grouping(Grouper::minTwoDigits()), + Locale::getEnglish(), + u"87,650,000", + u"8,765,000", + u"876,500", + u"87,650", + u"8765", + u"876.5", + u"87.65", + u"8.765", + u"0"); + + assertFormatDescendingBig( + u"Indic Grouping, Wide", + NumberFormatter::with().grouping(Grouper::minTwoDigits()), + Locale("en-IN"), + u"8,76,50,000", + u"87,65,000", + u"8,76,500", + u"87,650", + u"8765", + u"876.5", + u"87.65", + u"8.765", + u"0"); + + assertFormatDescendingBig( + u"No Grouping", + NumberFormatter::with().grouping(Grouper::none()), + Locale("en-IN"), + u"87650000", + u"8765000", + u"876500", + u"87650", + u"8765", + u"876.5", + u"87.65", + u"8.765", + u"0"); +} + +void numbertest_api::padding() { + assertFormatDescending( + u"Padding", + NumberFormatter::with().padding(Padder::none()), + Locale::getEnglish(), + u"87,650", + u"8,765", + u"876.5", + u"87.65", + u"8.765", + u"0.8765", + u"0.08765", + u"0.008765", + u"0"); + + assertFormatDescending( + u"Padding", + NumberFormatter::with().padding( + Padder::codePoints( + '*', 8, PadPosition::UNUM_PAD_AFTER_PREFIX)), + Locale::getEnglish(), + u"**87,650", + u"***8,765", + u"***876.5", + u"***87.65", + u"***8.765", + u"**0.8765", + u"*0.08765", + u"0.008765", + u"*******0"); + + assertFormatDescending( + u"Padding with code points", + NumberFormatter::with().padding( + Padder::codePoints( + 0x101E4, 8, PadPosition::UNUM_PAD_AFTER_PREFIX)), + Locale::getEnglish(), + u"𐇤𐇤87,650", + u"𐇤𐇤𐇤8,765", + u"𐇤𐇤𐇤876.5", + u"𐇤𐇤𐇤87.65", + u"𐇤𐇤𐇤8.765", + u"𐇤𐇤0.8765", + u"𐇤0.08765", + u"0.008765", + u"𐇤𐇤𐇤𐇤𐇤𐇤𐇤0"); + + assertFormatDescending( + u"Padding with wide digits", + NumberFormatter::with().padding( + Padder::codePoints( + '*', 8, PadPosition::UNUM_PAD_AFTER_PREFIX)) + .adoptSymbols(new NumberingSystem(MATHSANB)), + Locale::getEnglish(), + u"**𝟴𝟳,𝟲𝟱𝟬", + u"***𝟴,𝟳𝟲𝟱", + u"***𝟴𝟳𝟲.𝟱", + u"***𝟴𝟳.𝟲𝟱", + u"***𝟴.𝟳𝟲𝟱", + u"**𝟬.𝟴𝟳𝟲𝟱", + u"*𝟬.𝟬𝟴𝟳𝟲𝟱", + u"𝟬.𝟬𝟬𝟴𝟳𝟲𝟱", + u"*******𝟬"); + + assertFormatDescending( + u"Padding with currency spacing", + NumberFormatter::with().padding( + Padder::codePoints( + '*', 10, PadPosition::UNUM_PAD_AFTER_PREFIX)) + .unit(GBP) + .unitWidth(UNumberUnitWidth::UNUM_UNIT_WIDTH_ISO_CODE), + Locale::getEnglish(), + u"GBP 87,650.00", + u"GBP 8,765.00", + u"GBP*876.50", + u"GBP**87.65", + u"GBP***8.76", + u"GBP***0.88", + u"GBP***0.09", + u"GBP***0.01", + u"GBP***0.00"); + + assertFormatSingle( + u"Pad Before Prefix", + NumberFormatter::with().padding( + Padder::codePoints( + '*', 8, PadPosition::UNUM_PAD_BEFORE_PREFIX)), + Locale::getEnglish(), + -88.88, + u"**-88.88"); + + assertFormatSingle( + u"Pad After Prefix", + NumberFormatter::with().padding( + Padder::codePoints( + '*', 8, PadPosition::UNUM_PAD_AFTER_PREFIX)), + Locale::getEnglish(), + -88.88, + u"-**88.88"); + + assertFormatSingle( + u"Pad Before Suffix", + NumberFormatter::with().padding( + Padder::codePoints( + '*', 8, PadPosition::UNUM_PAD_BEFORE_SUFFIX)).unit(NoUnit::percent()), + Locale::getEnglish(), + 88.88, + u"88.88**%"); + + assertFormatSingle( + u"Pad After Suffix", + NumberFormatter::with().padding( + Padder::codePoints( + '*', 8, PadPosition::UNUM_PAD_AFTER_SUFFIX)).unit(NoUnit::percent()), + Locale::getEnglish(), + 88.88, + u"88.88%**"); + + assertFormatSingle( + u"Currency Spacing with Zero Digit Padding Broken", + NumberFormatter::with().padding( + Padder::codePoints( + '0', 12, PadPosition::UNUM_PAD_AFTER_PREFIX)) + .unit(GBP) + .unitWidth(UNumberUnitWidth::UNUM_UNIT_WIDTH_ISO_CODE), + Locale::getEnglish(), + 514.23, + u"GBP 000514.23"); // TODO: This is broken; it renders too wide (13 instead of 12). +} + +void numbertest_api::integerWidth() { + assertFormatDescending( + u"Integer Width Default", + NumberFormatter::with().integerWidth(IntegerWidth::zeroFillTo(1)), + Locale::getEnglish(), + u"87,650", + u"8,765", + u"876.5", + u"87.65", + u"8.765", + u"0.8765", + u"0.08765", + u"0.008765", + u"0"); + + assertFormatDescending( + u"Integer Width Zero Fill 0", + NumberFormatter::with().integerWidth(IntegerWidth::zeroFillTo(0)), + Locale::getEnglish(), + u"87,650", + u"8,765", + u"876.5", + u"87.65", + u"8.765", + u".8765", + u".08765", + u".008765", + u""); // TODO: Avoid the empty string here? + + assertFormatDescending( + u"Integer Width Zero Fill 3", + NumberFormatter::with().integerWidth(IntegerWidth::zeroFillTo(3)), + Locale::getEnglish(), + u"87,650", + u"8,765", + u"876.5", + u"087.65", + u"008.765", + u"000.8765", + u"000.08765", + u"000.008765", + u"000"); + + assertFormatDescending( + u"Integer Width Max 3", + NumberFormatter::with().integerWidth(IntegerWidth::zeroFillTo(1).truncateAt(3)), + Locale::getEnglish(), + u"650", + u"765", + u"876.5", + u"87.65", + u"8.765", + u"0.8765", + u"0.08765", + u"0.008765", + u"0"); + + assertFormatDescending( + u"Integer Width Fixed 2", + NumberFormatter::with().integerWidth(IntegerWidth::zeroFillTo(2).truncateAt(2)), + Locale::getEnglish(), + u"50", + u"65", + u"76.5", + u"87.65", + u"08.765", + u"00.8765", + u"00.08765", + u"00.008765", + u"00"); +} + +void numbertest_api::symbols() { + assertFormatDescending( + u"French Symbols with Japanese Data 1", + NumberFormatter::with().symbols(FRENCH_SYMBOLS), + Locale::getJapan(), + u"87 650", + u"8 765", + u"876,5", + u"87,65", + u"8,765", + u"0,8765", + u"0,08765", + u"0,008765", + u"0"); + + assertFormatSingle( + u"French Symbols with Japanese Data 2", + NumberFormatter::with().notation(Notation::compactShort()).symbols(FRENCH_SYMBOLS), + Locale::getJapan(), + 12345, + u"1,2\u4E07"); + + assertFormatDescending( + u"Latin Numbering System with Arabic Data", + NumberFormatter::with().adoptSymbols(new NumberingSystem(LATN)).unit(USD), + Locale("ar"), + u"US$ 87,650.00", + u"US$ 8,765.00", + u"US$ 876.50", + u"US$ 87.65", + u"US$ 8.76", + u"US$ 0.88", + u"US$ 0.09", + u"US$ 0.01", + u"US$ 0.00"); + + assertFormatDescending( + u"Math Numbering System with French Data", + NumberFormatter::with().adoptSymbols(new NumberingSystem(MATHSANB)), + Locale::getFrench(), + u"𝟴𝟳 𝟲𝟱𝟬", + u"𝟴 𝟳𝟲𝟱", + u"𝟴𝟳𝟲,𝟱", + u"𝟴𝟳,𝟲𝟱", + u"𝟴,𝟳𝟲𝟱", + u"𝟬,𝟴𝟳𝟲𝟱", + u"𝟬,𝟬𝟴𝟳𝟲𝟱", + u"𝟬,𝟬𝟬𝟴𝟳𝟲𝟱", + u"𝟬"); + + assertFormatSingle( + u"Swiss Symbols (used in documentation)", + NumberFormatter::with().symbols(SWISS_SYMBOLS), + Locale::getEnglish(), + 12345.67, + u"12’345.67"); + + assertFormatSingle( + u"Myanmar Symbols (used in documentation)", + NumberFormatter::with().symbols(MYANMAR_SYMBOLS), + Locale::getEnglish(), + 12345.67, + u"\u1041\u1042,\u1043\u1044\u1045.\u1046\u1047"); + + // NOTE: Locale ar puts ¤ after the number in NS arab but before the number in NS latn. + + assertFormatSingle( + u"Currency symbol should precede number in ar with NS latn", + NumberFormatter::with().adoptSymbols(new NumberingSystem(LATN)).unit(USD), + Locale("ar"), + 12345.67, + u"US$ 12,345.67"); + + assertFormatSingle( + u"Currency symbol should precede number in ar@numbers=latn", + NumberFormatter::with().unit(USD), + Locale("ar@numbers=latn"), + 12345.67, + u"US$ 12,345.67"); + + assertFormatSingle( + u"Currency symbol should follow number in ar with NS arab", + NumberFormatter::with().unit(USD), + Locale("ar"), + 12345.67, + u"١٢٬٣٤٥٫٦٧ US$"); + + assertFormatSingle( + u"Currency symbol should follow number in ar@numbers=arab", + NumberFormatter::with().unit(USD), + Locale("ar@numbers=arab"), + 12345.67, + u"١٢٬٣٤٥٫٦٧ US$"); + + UErrorCode status = U_ZERO_ERROR; + DecimalFormatSymbols symbols = SWISS_SYMBOLS; + UnlocalizedNumberFormatter f = NumberFormatter::with().symbols(symbols); + symbols.setSymbol(DecimalFormatSymbols::ENumberFormatSymbol::kGroupingSeparatorSymbol, u"!", status); + assertFormatSingle( + u"Symbols object should be copied", f, Locale::getEnglish(), 12345.67, u"12’345.67"); + + assertFormatSingle( + u"The last symbols setter wins", + NumberFormatter::with().symbols(symbols).adoptSymbols(new NumberingSystem(LATN)), + Locale::getEnglish(), + 12345.67, + u"12,345.67"); + + assertFormatSingle( + u"The last symbols setter wins", + NumberFormatter::with().adoptSymbols(new NumberingSystem(LATN)).symbols(symbols), + Locale::getEnglish(), + 12345.67, + u"12!345.67"); +} + +// TODO: Enable if/when currency symbol override is added. +//void NumberFormatterTest::symbolsOverride() { +// DecimalFormatSymbols dfs = DecimalFormatSymbols.getInstance(Locale::getEnglish()); +// dfs.setCurrencySymbol("@"); +// dfs.setInternationalCurrencySymbol("foo"); +// assertFormatSingle( +// u"Custom Short Currency Symbol", +// NumberFormatter::with().unit(Currency.getInstance("XXX")).symbols(dfs), +// Locale::getEnglish(), +// 12.3, +// u"@ 12.30"); +//} + +void numbertest_api::sign() { + assertFormatSingle( + u"Sign Auto Positive", + NumberFormatter::with().sign(UNumberSignDisplay::UNUM_SIGN_AUTO), + Locale::getEnglish(), + 444444, + u"444,444"); + + assertFormatSingle( + u"Sign Auto Negative", + NumberFormatter::with().sign(UNumberSignDisplay::UNUM_SIGN_AUTO), + Locale::getEnglish(), + -444444, + u"-444,444"); + + assertFormatSingle( + u"Sign Always Positive", + NumberFormatter::with().sign(UNumberSignDisplay::UNUM_SIGN_ALWAYS), + Locale::getEnglish(), + 444444, + u"+444,444"); + + assertFormatSingle( + u"Sign Always Negative", + NumberFormatter::with().sign(UNumberSignDisplay::UNUM_SIGN_ALWAYS), + Locale::getEnglish(), + -444444, + u"-444,444"); + + assertFormatSingle( + u"Sign Never Positive", + NumberFormatter::with().sign(UNumberSignDisplay::UNUM_SIGN_NEVER), + Locale::getEnglish(), + 444444, + u"444,444"); + + assertFormatSingle( + u"Sign Never Negative", + NumberFormatter::with().sign(UNumberSignDisplay::UNUM_SIGN_NEVER), + Locale::getEnglish(), + -444444, + u"444,444"); + + assertFormatSingle( + u"Sign Accounting Positive", + NumberFormatter::with().sign(UNumberSignDisplay::UNUM_SIGN_ACCOUNTING).unit(USD), + Locale::getEnglish(), + 444444, + u"$444,444.00"); + + assertFormatSingle( + u"Sign Accounting Negative", + NumberFormatter::with().sign(UNumberSignDisplay::UNUM_SIGN_ACCOUNTING).unit(USD), + Locale::getEnglish(), + -444444, + u"($444,444.00)"); + + assertFormatSingle( + u"Sign Accounting-Always Positive", + NumberFormatter::with().sign(UNumberSignDisplay::UNUM_SIGN_ACCOUNTING_ALWAYS).unit(USD), + Locale::getEnglish(), + 444444, + u"+$444,444.00"); + + assertFormatSingle( + u"Sign Accounting-Always Negative", + NumberFormatter::with().sign(UNumberSignDisplay::UNUM_SIGN_ACCOUNTING_ALWAYS).unit(USD), + Locale::getEnglish(), + -444444, + u"($444,444.00)"); + + assertFormatSingle( + u"Sign Accounting Negative Hidden", + NumberFormatter::with().sign(UNumberSignDisplay::UNUM_SIGN_ACCOUNTING) + .unit(USD) + .unitWidth(UNUM_UNIT_WIDTH_HIDDEN), + Locale::getEnglish(), + -444444, + u"(444,444.00)"); +} + +void numbertest_api::decimal() { + assertFormatDescending( + u"Decimal Default", + NumberFormatter::with().decimal(UNumberDecimalSeparatorDisplay::UNUM_DECIMAL_SEPARATOR_AUTO), + Locale::getEnglish(), + u"87,650", + u"8,765", + u"876.5", + u"87.65", + u"8.765", + u"0.8765", + u"0.08765", + u"0.008765", + u"0"); + + assertFormatDescending( + u"Decimal Always Shown", + NumberFormatter::with().decimal(UNumberDecimalSeparatorDisplay::UNUM_DECIMAL_SEPARATOR_ALWAYS), + Locale::getEnglish(), + u"87,650.", + u"8,765.", + u"876.5", + u"87.65", + u"8.765", + u"0.8765", + u"0.08765", + u"0.008765", + u"0."); +} + +void numbertest_api::locale() { + // Coverage for the locale setters. + UErrorCode status = U_ZERO_ERROR; + UnicodeString actual = NumberFormatter::withLocale(Locale::getFrench()).formatInt(1234, status) + .toString(); + assertEquals("Locale withLocale()", u"1 234", actual); +} + +void numbertest_api::errors() { + LocalizedNumberFormatter lnf = NumberFormatter::withLocale(Locale::getEnglish()).rounding( + Rounder::fixedFraction( + -1)); + + { + UErrorCode status = U_ZERO_ERROR; + lnf.formatInt(1, status); + assertEquals( + "Should fail with U_ILLEGAL_ARGUMENT_ERROR since rounder is not legal", + U_ILLEGAL_ARGUMENT_ERROR, + status); + } + + { + UErrorCode status = U_ZERO_ERROR; + lnf.copyErrorTo(status); + assertEquals( + "Should fail with U_ILLEGAL_ARGUMENT_ERROR since rounder is not legal", + U_ILLEGAL_ARGUMENT_ERROR, + status); + } +} + + +void numbertest_api::assertFormatDescending(const UnicodeString &message, + const UnlocalizedNumberFormatter &f, + const Locale &locale, ...) { + va_list args; + // TODO: Fix this? "warning: 'va_start' has undefined behavior with reference types [-Wvarargs]" + va_start(args, locale); + static double inputs[] = {87650, 8765, 876.5, 87.65, 8.765, 0.8765, 0.08765, 0.008765, 0}; + const LocalizedNumberFormatter l1 = f.threshold(0).locale(locale); // no self-regulation + const LocalizedNumberFormatter l2 = f.threshold(1).locale(locale); // all self-regulation + UErrorCode status = U_ZERO_ERROR; + for (int16_t i = 0; i < 9; i++) { + char16_t caseNumber = u'0' + i; + double d = inputs[i]; + UnicodeString expected = va_arg(args, const char16_t*); + UnicodeString actual1 = l1.formatDouble(d, status).toString(); + assertSuccess(message + u": Unsafe Path: " + caseNumber, status); + assertEquals(message + u": Unsafe Path: " + caseNumber, expected, actual1); + UnicodeString actual2 = l2.formatDouble(d, status).toString(); + assertSuccess(message + u": Safe Path: " + caseNumber, status); + assertEquals(message + u": Safe Path: " + caseNumber, expected, actual2); + } +} + +void numbertest_api::assertFormatDescendingBig(const UnicodeString &message, + const UnlocalizedNumberFormatter &f, + const Locale &locale, ...) { + va_list args; + // TODO: Fix this? "warning: 'va_start' has undefined behavior with reference types [-Wvarargs]" + va_start(args, locale); + static double inputs[] = {87650000, 8765000, 876500, 87650, 8765, 876.5, 87.65, 8.765, 0}; + const LocalizedNumberFormatter l1 = f.threshold(0).locale(locale); // no self-regulation + const LocalizedNumberFormatter l2 = f.threshold(1).locale(locale); // all self-regulation + UErrorCode status = U_ZERO_ERROR; + for (int16_t i = 0; i < 9; i++) { + char16_t caseNumber = u'0' + i; + double d = inputs[i]; + UnicodeString expected = va_arg(args, const char16_t*); + UnicodeString actual1 = l1.formatDouble(d, status).toString(); + assertSuccess(message + u": Unsafe Path: " + caseNumber, status); + assertEquals(message + u": Unsafe Path: " + caseNumber, expected, actual1); + UnicodeString actual2 = l2.formatDouble(d, status).toString(); + assertSuccess(message + u": Safe Path: " + caseNumber, status); + assertEquals(message + u": Safe Path: " + caseNumber, expected, actual2); + } +} + +void numbertest_api::assertFormatSingle(const UnicodeString &message, + const UnlocalizedNumberFormatter &f, const Locale &locale, + double input, const UnicodeString &expected) { + const LocalizedNumberFormatter l1 = f.threshold(0).locale(locale); // no self-regulation + const LocalizedNumberFormatter l2 = f.threshold(1).locale(locale); // all self-regulation + UErrorCode status = U_ZERO_ERROR; + UnicodeString actual1 = l1.formatDouble(input, status).toString(); + assertSuccess(message + u": Unsafe Path", status); + assertEquals(message + u": Unsafe Path", expected, actual1); + UnicodeString actual2 = l2.formatDouble(input, status).toString(); + assertSuccess(message + u": Safe Path", status); + assertEquals(message + u": Safe Path", expected, actual2); +} + +int main() { + UErrorCode status = U_ZERO_ERROR; + numbertest_api test(status); + test.notationSimple(); + test.notationScientific(); + test.notationCompact(); + test.unitMeasure(); + test.unitCurrency(); + test.unitPercent(); + test.roundingFraction(); + test.roundingFigures(); + test.roundingFractionFigures(); + test.roundingOther(); + test.grouping(); + test.padding(); + test.integerWidth(); + test.symbols(); + test.sign(); + test.decimal(); + test.locale(); + test.errors(); + + u_cleanup(); +} diff --git a/icu4c/source/test/intltest/numbertest_decimalquantity.cpp b/icu4c/source/test/intltest/numbertest_decimalquantity.cpp new file mode 100644 index 0000000000..670ecb3035 --- /dev/null +++ b/icu4c/source/test/intltest/numbertest_decimalquantity.cpp @@ -0,0 +1,280 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include "number_decimalquantity.h" +#include "math.h" +#include + +using namespace icu::number::impl; + +class DecimalQuantityTest : public IntlTest { + public: + void testDecimalQuantityBehaviorStandalone(); + + void testSwitchStorage(); + + void testAppend(); + + void testConvertToAccurateDouble(); + + void testUseApproximateDoubleWhenAble(); + + void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par = 0); + + private: + void assertDoubleEquals(const char *message, double a, double b); + + void assertHealth(const DecimalQuantity &fq); + + void assertToStringAndHealth(const DecimalQuantity &fq, const UnicodeString &expected); + + void checkDoubleBehavior(double d, bool explicitRequired); +}; + +void DecimalQuantityTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char *) { + if (exec) { + logln("TestSuite DecimalQuantityTest: "); + } + TESTCASE_AUTO_BEGIN; + TESTCASE_AUTO(testDecimalQuantityBehaviorStandalone); + TESTCASE_AUTO(testSwitchStorage); + TESTCASE_AUTO(testAppend); + TESTCASE_AUTO(testConvertToAccurateDouble); + TESTCASE_AUTO(testUseApproximateDoubleWhenAble); + TESTCASE_AUTO_END; +} + +void DecimalQuantityTest::assertDoubleEquals(const char *message, double a, double b) { + if (a == b) { + return; + } + + double diff = a - b; + diff = diff < 0 ? -diff : diff; + double bound = a < 0 ? -a * 1e-6 : a * 1e-6; + if (diff > bound) { + errln(message); + } +} + +void DecimalQuantityTest::assertHealth(const DecimalQuantity &fq) { + UnicodeString health = fq.checkHealth(); + if (!health.isBogus()) { + errln(UnicodeString("HEALTH FAILURE: ") + fq.toString()); + } +} + +void +DecimalQuantityTest::assertToStringAndHealth(const DecimalQuantity &fq, const UnicodeString &expected) { + UnicodeString actual = fq.toString(); + assertEquals("DecimalQuantity toString failed", expected, actual); + assertHealth(fq); +} + +void DecimalQuantityTest::checkDoubleBehavior(double d, bool explicitRequired) { + DecimalQuantity fq; + fq.setToDouble(d); + if (explicitRequired) { + assertTrue("Should be using approximate double", !fq.isExplicitExactDouble()); + } + assertDoubleEquals("Initial construction from hard double", d, fq.toDouble()); + fq.roundToInfinity(); + if (explicitRequired) { + assertTrue("Should not be using approximate double", fq.isExplicitExactDouble()); + } + assertDoubleEquals("After conversion to exact BCD (double)", d, fq.toDouble()); +} + +void DecimalQuantityTest::testDecimalQuantityBehaviorStandalone() { + UErrorCode status = U_ZERO_ERROR; + DecimalQuantity fq; + assertToStringAndHealth(fq, u""); + fq.setToInt(51423); + assertToStringAndHealth(fq, ""); + fq.adjustMagnitude(-3); + assertToStringAndHealth(fq, ""); + fq.setToLong(999999999999000L); + assertToStringAndHealth(fq, ""); + fq.setIntegerLength(2, 5); + assertToStringAndHealth(fq, ""); + fq.setFractionLength(3, 6); + assertToStringAndHealth(fq, ""); + fq.setToDouble(987.654321); + assertToStringAndHealth(fq, ""); + fq.roundToInfinity(); + assertToStringAndHealth(fq, ""); + fq.roundToIncrement(0.005, RoundingMode::kRoundHalfEven, status); + assertSuccess("Rounding to increment", status); + assertToStringAndHealth(fq, ""); + fq.roundToMagnitude(-2, RoundingMode::kRoundHalfEven, status); + assertSuccess("Rounding to magnitude", status); + assertToStringAndHealth(fq, ""); +} + +void DecimalQuantityTest::testSwitchStorage() { + UErrorCode status = U_ZERO_ERROR; + DecimalQuantity fq; + + fq.setToLong(1234123412341234L); + assertFalse("Should not be using byte array", fq.isUsingBytes()); + assertEquals("Failed on initialize", "1234123412341234E0", fq.toNumberString()); + assertHealth(fq); + // Long -> Bytes + fq.appendDigit(5, 0, true); + assertTrue("Should be using byte array", fq.isUsingBytes()); + assertEquals("Failed on multiply", "12341234123412345E0", fq.toNumberString()); + assertHealth(fq); + // Bytes -> Long + fq.roundToMagnitude(5, RoundingMode::kRoundHalfEven, status); + assertSuccess("Rounding to magnitude", status); + assertFalse("Should not be using byte array", fq.isUsingBytes()); + assertEquals("Failed on round", "123412341234E5", fq.toNumberString()); + assertHealth(fq); +} + +void DecimalQuantityTest::testAppend() { + DecimalQuantity fq; + fq.appendDigit(1, 0, true); + assertEquals("Failed on append", "1E0", fq.toNumberString()); + assertHealth(fq); + fq.appendDigit(2, 0, true); + assertEquals("Failed on append", "12E0", fq.toNumberString()); + assertHealth(fq); + fq.appendDigit(3, 1, true); + assertEquals("Failed on append", "1203E0", fq.toNumberString()); + assertHealth(fq); + fq.appendDigit(0, 1, true); + assertEquals("Failed on append", "1203E2", fq.toNumberString()); + assertHealth(fq); + fq.appendDigit(4, 0, true); + assertEquals("Failed on append", "1203004E0", fq.toNumberString()); + assertHealth(fq); + fq.appendDigit(0, 0, true); + assertEquals("Failed on append", "1203004E1", fq.toNumberString()); + assertHealth(fq); + fq.appendDigit(5, 0, false); + assertEquals("Failed on append", "120300405E-1", fq.toNumberString()); + assertHealth(fq); + fq.appendDigit(6, 0, false); + assertEquals("Failed on append", "1203004056E-2", fq.toNumberString()); + assertHealth(fq); + fq.appendDigit(7, 3, false); + assertEquals("Failed on append", "12030040560007E-6", fq.toNumberString()); + assertHealth(fq); + UnicodeString baseExpected("12030040560007"); + for (int i = 0; i < 10; i++) { + fq.appendDigit(8, 0, false); + baseExpected.append('8'); + UnicodeString expected(baseExpected); + expected.append("E-"); + if (i >= 3) { + expected.append('1'); + } + expected.append(((7 + i) % 10) + '0'); + assertEquals("Failed on append", expected, fq.toNumberString()); + assertHealth(fq); + } + fq.appendDigit(9, 2, false); + baseExpected.append("009"); + UnicodeString expected(baseExpected); + expected.append("E-19"); + assertEquals("Failed on append", expected, fq.toNumberString()); + assertHealth(fq); +} + +void DecimalQuantityTest::testConvertToAccurateDouble() { + // based on https://github.com/google/double-conversion/issues/28 + static double hardDoubles[] = { + 1651087494906221570.0, + -5074790912492772E-327, + 83602530019752571E-327, + 2.207817077636718750000000000000, + 1.818351745605468750000000000000, + 3.941719055175781250000000000000, + 3.738609313964843750000000000000, + 3.967735290527343750000000000000, + 1.328025817871093750000000000000, + 3.920967102050781250000000000000, + 1.015235900878906250000000000000, + 1.335227966308593750000000000000, + 1.344520568847656250000000000000, + 2.879127502441406250000000000000, + 3.695838928222656250000000000000, + 1.845344543457031250000000000000, + 3.793952941894531250000000000000, + 3.211402893066406250000000000000, + 2.565971374511718750000000000000, + 0.965156555175781250000000000000, + 2.700004577636718750000000000000, + 0.767097473144531250000000000000, + 1.780448913574218750000000000000, + 2.624839782714843750000000000000, + 1.305290222167968750000000000000, + 3.834922790527343750000000000000,}; + + static double integerDoubles[] = { + 51423, + 51423e10, + 4.503599627370496E15, + 6.789512076111555E15, + 9.007199254740991E15, + 9.007199254740992E15}; + + for (double d : hardDoubles) { + checkDoubleBehavior(d, true); + } + + for (double d : integerDoubles) { + checkDoubleBehavior(d, false); + } + + assertDoubleEquals("NaN check failed", NAN, DecimalQuantity().setToDouble(NAN).toDouble()); + assertDoubleEquals( + "Inf check failed", INFINITY, DecimalQuantity().setToDouble(INFINITY).toDouble()); + assertDoubleEquals( + "-Inf check failed", -INFINITY, DecimalQuantity().setToDouble(-INFINITY).toDouble()); + + // Generate random doubles + for (int32_t i = 0; i < 1000000; i++) { + uint8_t bytes[8]; + for (int32_t j = 0; j < 8; j++) { + bytes[j] = static_cast(rand() % 256); + } + double d; + uprv_memcpy(&d, bytes, 8); + if (std::isnan(d) || !std::isfinite(d)) { continue; } + checkDoubleBehavior(d, false); + } +} + +void DecimalQuantityTest::testUseApproximateDoubleWhenAble() { + struct TestCase { + double d; + int32_t maxFrac; + RoundingMode roundingMode; + bool usesExact; + } cases[] = {{1.2345678, 1, RoundingMode::kRoundHalfEven, false}, + {1.2345678, 7, RoundingMode::kRoundHalfEven, false}, + {1.2345678, 12, RoundingMode::kRoundHalfEven, false}, + {1.2345678, 13, RoundingMode::kRoundHalfEven, true}, + {1.235, 1, RoundingMode::kRoundHalfEven, false}, + {1.235, 2, RoundingMode::kRoundHalfEven, true}, + {1.235, 3, RoundingMode::kRoundHalfEven, false}, + {1.000000000000001, 0, RoundingMode::kRoundHalfEven, false}, + {1.000000000000001, 0, RoundingMode::kRoundCeiling, true}, + {1.235, 1, RoundingMode::kRoundCeiling, false}, + {1.235, 2, RoundingMode::kRoundCeiling, false}, + {1.235, 3, RoundingMode::kRoundCeiling, true}}; + + UErrorCode status = U_ZERO_ERROR; + for (TestCase cas : cases) { + DecimalQuantity fq; + fq.setToDouble(cas.d); + assertTrue("Should be using approximate double", !fq.isExplicitExactDouble()); + fq.roundToMagnitude(-cas.maxFrac, cas.roundingMode, status); + assertSuccess("Rounding to magnitude", status); + if (cas.usesExact != fq.isExplicitExactDouble()) { + errln(UnicodeString(u"Using approximate double after rounding: ") + fq.toString()); + } + } +} diff --git a/icu4c/source/test/intltest/numbertest_modifiers.cpp b/icu4c/source/test/intltest/numbertest_modifiers.cpp new file mode 100644 index 0000000000..8d57b5f772 --- /dev/null +++ b/icu4c/source/test/intltest/numbertest_modifiers.cpp @@ -0,0 +1,197 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include +#include "intltest.h" +#include "number_stringbuilder.h" +#include "number_modifiers.h" + +using namespace icu::number::impl; + +class ModifiersTest : public IntlTest { + public: + void testConstantAffixModifier(); + + void testConstantMultiFieldModifier(); + + void testSimpleModifier(); + + void testCurrencySpacingEnabledModifier(); + + void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par = 0); + + private: + void assertModifierEquals(const Modifier &mod, int32_t expectedPrefixLength, bool expectedStrong, + UnicodeString expectedChars, UnicodeString expectedFields, + UErrorCode &status); + + void assertModifierEquals(const Modifier &mod, NumberStringBuilder &sb, int32_t expectedPrefixLength, + bool expectedStrong, UnicodeString expectedChars, + UnicodeString expectedFields, UErrorCode &status); +}; + +void ModifiersTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char *) { + if (exec) { + logln("TestSuite ModifiersTest: "); + } + TESTCASE_AUTO_BEGIN; + TESTCASE_AUTO(testConstantAffixModifier); + TESTCASE_AUTO(testConstantMultiFieldModifier); + TESTCASE_AUTO(testSimpleModifier); + TESTCASE_AUTO(testCurrencySpacingEnabledModifier); + TESTCASE_AUTO_END; +} + +void ModifiersTest::testConstantAffixModifier() { + UErrorCode status = U_ZERO_ERROR; + ConstantAffixModifier mod0(u"", u"", UNUM_PERCENT_FIELD, true); + assertModifierEquals(mod0, 0, true, u"|", u"n", status); + assertSuccess("Spot 1", status); + + ConstantAffixModifier mod1(u"a📻", u"b", UNUM_PERCENT_FIELD, true); + assertModifierEquals(mod1, 3, true, u"a📻|b", u"%%%n%", status); + assertSuccess("Spot 2", status); +} + +void ModifiersTest::testConstantMultiFieldModifier() { + UErrorCode status = U_ZERO_ERROR; + NumberStringBuilder prefix; + NumberStringBuilder suffix; + ConstantMultiFieldModifier mod1(prefix, suffix, true); + assertModifierEquals(mod1, 0, true, u"|", u"n", status); + assertSuccess("Spot 1", status); + + prefix.append(u"a📻", UNUM_PERCENT_FIELD, status); + suffix.append(u"b", UNUM_CURRENCY_FIELD, status); + ConstantMultiFieldModifier mod2(prefix, suffix, true); + assertModifierEquals(mod2, 3, true, u"a📻|b", u"%%%n$", status); + assertSuccess("Spot 2", status); + + // Make sure the first modifier is still the same (that it stayed constant) + assertModifierEquals(mod1, 0, true, u"|", u"n", status); + assertSuccess("Spot 3", status); +} + +void ModifiersTest::testSimpleModifier() { + static const int32_t NUM_CASES = 5; + static const int32_t NUM_OUTPUTS = 4; + static const char16_t *patterns[] = {u"{0}", u"X{0}Y", u"XX{0}YYY", u"{0}YY", u"XX📺XX{0}"}; + static const struct { + const char16_t *baseString; + int32_t leftIndex; + int32_t rightIndex; + } outputs[NUM_OUTPUTS] = {{u"", 0, 0}, {u"a📻bcde", 0, 0}, {u"a📻bcde", 4, 4}, {u"a📻bcde", 3, 5}}; + static const int32_t prefixLens[] = {0, 1, 2, 0, 6}; + static const char16_t *expectedCharFields[][2] = {{u"|", u"n"}, + {u"X|Y", u"%n%"}, + {u"XX|YYY", u"%%n%%%"}, + {u"|YY", u"n%%"}, + {u"XX📺XX|", u"%%%%%%n"}}; + static const char16_t *expecteds[][NUM_CASES] = // force auto-format line break + {{ + u"", u"XY", u"XXYYY", u"YY", u"XX📺XX"}, { + u"a📻bcde", u"XYa📻bcde", u"XXYYYa📻bcde", u"YYa📻bcde", u"XX📺XXa📻bcde"}, { + u"a📻bcde", u"a📻bXYcde", u"a📻bXXYYYcde", u"a📻bYYcde", u"a📻bXX📺XXcde"}, { + u"a📻bcde", u"a📻XbcYde", u"a📻XXbcYYYde", u"a📻bcYYde", u"a📻XX📺XXbcde"}}; + + UErrorCode status = U_ZERO_ERROR; + for (int32_t i = 0; i < NUM_CASES; i++) { + const UnicodeString pattern(patterns[i]); + SimpleFormatter compiledFormatter(pattern, 1, 1, status); + assertSuccess("Spot 1", status); + SimpleModifier mod(compiledFormatter, UNUM_PERCENT_FIELD, false); + assertModifierEquals( + mod, prefixLens[i], false, expectedCharFields[i][0], expectedCharFields[i][1], status); + assertSuccess("Spot 2", status); + + // Test strange insertion positions + for (int32_t j = 0; j < NUM_OUTPUTS; j++) { + NumberStringBuilder output; + output.append(outputs[j].baseString, UNUM_FIELD_COUNT, status); + mod.apply(output, outputs[j].leftIndex, outputs[j].rightIndex, status); + UnicodeString expected = expecteds[j][i]; + UnicodeString actual = output.toUnicodeString(); + assertEquals("Strange insertion position", expected, actual); + assertSuccess("Spot 3", status); + } + } +} + +void ModifiersTest::testCurrencySpacingEnabledModifier() { + UErrorCode status = U_ZERO_ERROR; + DecimalFormatSymbols symbols(Locale("en"), status); + assertSuccess("Spot 1", status); + + NumberStringBuilder prefix; + NumberStringBuilder suffix; + CurrencySpacingEnabledModifier mod1(prefix, suffix, true, symbols, status); + assertSuccess("Spot 2", status); + assertModifierEquals(mod1, 0, true, u"|", u"n", status); + assertSuccess("Spot 3", status); + + prefix.append(u"USD", UNUM_CURRENCY_FIELD, status); + assertSuccess("Spot 4", status); + CurrencySpacingEnabledModifier mod2(prefix, suffix, true, symbols, status); + assertSuccess("Spot 5", status); + assertModifierEquals(mod2, 3, true, u"USD|", u"$$$n", status); + assertSuccess("Spot 6", status); + + // Test the default currency spacing rules + NumberStringBuilder sb; + sb.append("123", UNUM_INTEGER_FIELD, status); + assertSuccess("Spot 7", status); + NumberStringBuilder sb1(sb); + assertModifierEquals(mod2, sb1, 3, true, u"USD\u00A0123", u"$$$niii", status); + assertSuccess("Spot 8", status); + + // Compare with the unsafe code path + NumberStringBuilder sb2(sb); + sb2.insert(0, "USD", UNUM_CURRENCY_FIELD, status); + assertSuccess("Spot 9", status); + CurrencySpacingEnabledModifier::applyCurrencySpacing(sb2, 0, 3, 6, 0, symbols, status); + assertSuccess("Spot 10", status); + assertTrue(sb1.toDebugString() + " vs " + sb2.toDebugString(), sb1.contentEquals(sb2)); + + // Test custom patterns + // The following line means that the last char of the number should be a | (rather than a digit) + symbols.setPatternForCurrencySpacing(UNUM_CURRENCY_SURROUNDING_MATCH, true, u"[|]"); + suffix.append("XYZ", UNUM_CURRENCY_FIELD, status); + assertSuccess("Spot 11", status); + CurrencySpacingEnabledModifier mod3(prefix, suffix, true, symbols, status); + assertSuccess("Spot 12", status); + assertModifierEquals(mod3, 3, true, u"USD|\u00A0XYZ", u"$$$nn$$$", status); + assertSuccess("Spot 13", status); +} + +void ModifiersTest::assertModifierEquals(const Modifier &mod, int32_t expectedPrefixLength, + bool expectedStrong, UnicodeString expectedChars, + UnicodeString expectedFields, UErrorCode &status) { + NumberStringBuilder sb; + sb.appendCodePoint('|', UNUM_FIELD_COUNT, status); + assertModifierEquals( + mod, sb, expectedPrefixLength, expectedStrong, expectedChars, expectedFields, status); + +} + +void ModifiersTest::assertModifierEquals(const Modifier &mod, NumberStringBuilder &sb, + int32_t expectedPrefixLength, bool expectedStrong, + UnicodeString expectedChars, UnicodeString expectedFields, + UErrorCode &status) { + int32_t oldCount = sb.codePointCount(); + mod.apply(sb, 0, sb.length(), status); + assertEquals("Prefix length", expectedPrefixLength, mod.getPrefixLength(status)); + assertEquals("Strong", expectedStrong, mod.isStrong()); + if (dynamic_cast(&mod) == nullptr) { + // i.e., if mod is not a CurrencySpacingEnabledModifier + assertEquals("Code point count equals actual code point count", + sb.codePointCount() - oldCount, mod.getCodePointCount(status)); + } + + UnicodeString debugString; + debugString.append(u""); + assertEquals("Debug string", debugString, sb.toDebugString()); +} diff --git a/icu4c/source/test/intltest/numbertest_patternmodifier.cpp b/icu4c/source/test/intltest/numbertest_patternmodifier.cpp new file mode 100644 index 0000000000..9cdce1f651 --- /dev/null +++ b/icu4c/source/test/intltest/numbertest_patternmodifier.cpp @@ -0,0 +1,119 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include "intltest.h" + +class PatternStringTest : public IntlTest { + public: + testBasic(); + testMutableEqualsImmutable(); + + private: + static UnicodeString getPrefix(const MutablePatternModifier &mod, UErrorCode &status); + static UnicodeString getSuffix(const MutablePatternModifier &mod, UErrorCode &status); +}; + +void PatternStringTest::testBasic() { + UErrorCode status = U_ZERO_ERROR; + MutablePatternModifier mod(false); + ParsedPatternInfo patternInfo; + PatternParser::parseToPatternInfo(u"a0b", patternInfo, status); + assertSuccess("Spot 1", status); + mod.setPatternInfo(&patternInfo); + mod.setPatternAttributes(UNUM_SIGN_AUTO, false); + DecimalFormatSymbols symbols(Locale::getEnglish(), status); + CurrencyUnit currency(u"USD", status); + assertSuccess("Spot 2", status); + mod.setSymbols(&symbols, currency, UNUM_UNIT_WIDTH_SHORT, nullptr); + + mod.setNumberProperties(false, StandardPlural::Form::COUNT); + assertEquals("Pattern a0b", u"a", getPrefix(mod, status)); + assertEquals("Pattern a0b", u"b", getSuffix(mod, status)); + mod.setPatternAttributes(UNUM_SIGN_ALWAYS, false); + assertEquals("Pattern a0b", u"+a", getPrefix(mod, status)); + assertEquals("Pattern a0b", u"b", getSuffix(mod, status)); + mod.setNumberProperties(true, StandardPlural::Form::COUNT); + assertEquals("Pattern a0b", u"-a", getPrefix(mod, status)); + assertEquals("Pattern a0b", u"b", getSuffix(mod, status)); + mod.setPatternAttributes(UNUM_SIGN_NEVER, false); + assertEquals("Pattern a0b", u"a", getPrefix(mod, status)); + assertEquals("Pattern a0b", u"b", getSuffix(mod, status)); + assertSuccess("Spot 3", status); + + ParsedPatternInfo patternInfo2; + PatternParser::parseToPatternInfo(u"a0b;c-0d", patternInfo2, status); + assertSuccess("Spot 4", status); + mod.setPatternInfo(&patternInfo2); + mod.setPatternAttributes(UNUM_SIGN_AUTO, false); + mod.setNumberProperties(false, StandardPlural::Form::COUNT); + assertEquals("Pattern a0b;c-0d", u"a", getPrefix(mod, status)); + assertEquals("Pattern a0b;c-0d", u"b", getSuffix(mod, status)); + mod.setPatternAttributes(UNUM_SIGN_ALWAYS, false); + assertEquals("Pattern a0b;c-0d", u"c+", getPrefix(mod, status)); + assertEquals("Pattern a0b;c-0d", u"d", getSuffix(mod, status)); + mod.setNumberProperties(true, StandardPlural::Form::COUNT); + assertEquals("Pattern a0b;c-0d", u"c-", getPrefix(mod, status)); + assertEquals("Pattern a0b;c-0d", u"d", getSuffix(mod, status)); + mod.setPatternAttributes(UNUM_SIGN_NEVER, false); + assertEquals( + "Pattern a0b;c-0d", + u"c-", + getPrefix(mod, status)); // TODO: What should this behavior be? + assertEquals("Pattern a0b;c-0d", u"d", getSuffix(mod, status)); + assertSuccess("Spot 5", status); +} + +void PatternStringTest::testMutableEqualsImmutable() { + UErrorCode status = U_ZERO_ERROR; + MutablePatternModifier mod(false); + ParsedPatternInfo patternInfo; + PatternParser::parseToPatternInfo("a0b;c-0d", patternInfo, status); + assertSuccess("Spot 1", status); + mod.setPatternInfo(&patternInfo); + mod.setPatternAttributes(UNUM_SIGN_AUTO, false); + DecimalFormatSymbols symbols(Locale::getEnglish(), status); + CurrencyUnit currency(u"USD", status); + assertSuccess("Spot 2", status); + mod.setSymbols(&symbols, currency, UNUM_UNIT_WIDTH_SHORT, nullptr); + DecimalQuantity fq; + fq.setToInt(1); + + NumberStringBuilder nsb1; + MicroProps micros1; + mod.addToChain(µs1); + mod.processQuantity(fq, micros1, status); + micros1.modMiddle->apply(nsb1, 0, 0, status); + assertSuccess("Spot 3", status); + + NumberStringBuilder nsb2; + MicroProps micros2; + ImmutablePatternModifier *immutable = mod.createImmutable(status); + immutable->applyToMicros(micros2, fq); + micros2.modMiddle->apply(nsb2, 0, 0, status); + assertSuccess("Spot 4", status); + + NumberStringBuilder nsb3; + MicroProps micros3; + mod.addToChain(µs3); + mod.setPatternAttributes(UNUM_SIGN_ALWAYS, false); + mod.processQuantity(fq, micros3, status); + micros3.modMiddle->apply(nsb3, 0, 0, status); + assertSuccess("Spot 5", status); + + assertTrue(nsb1.toUnicodeString() + " vs " + nsb2.toUnicodeString(), nsb1.contentEquals(nsb2)); + assertFalse(nsb1.toUnicodeString() + " vs " + nsb3.toUnicodeString(), nsb1.contentEquals(nsb3)); +} + +static UnicodeString PatternStringTest::getPrefix(const MutablePatternModifier &mod, UErrorCode &status) { + NumberStringBuilder nsb; + mod.apply(nsb, 0, 0, status); + int32_t prefixLength = mod.getPrefixLength(status); + return UnicodeString(nsb.toUnicodeString(), 0, prefixLength); +} + +static UnicodeString PatternStringTest::getSuffix(const MutablePatternModifier &mod, UErrorCode &status) { + NumberStringBuilder nsb; + mod.apply(nsb, 0, 0, status); + int32_t prefixLength = mod.getPrefixLength(status); + return UnicodeString(nsb.toUnicodeString(), prefixLength, nsb.length() - prefixLength); +} diff --git a/icu4c/source/test/intltest/numbertest_patternstring.cpp b/icu4c/source/test/intltest/numbertest_patternstring.cpp new file mode 100644 index 0000000000..c92b95adfd --- /dev/null +++ b/icu4c/source/test/intltest/numbertest_patternstring.cpp @@ -0,0 +1,74 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +void testToPatternSimple() { + const char16_t *cases[][2] = {{u"#", u"0"}, + {u"0", u"0"}, + {u"#0", u"0"}, + {u"###", u"0"}, + {u"0.##", u"0.##"}, + {u"0.00", u"0.00"}, + {u"0.00#", u"0.00#"}, + {u"#E0", u"#E0"}, + {u"0E0", u"0E0"}, + {u"#00E00", u"#00E00"}, + {u"#,##0", u"#,##0"}, + {u"#;#", u"0;0"}, + // ignore a negative prefix pattern of '-' since that is the default: + {u"#;-#", u"0"}, + {u"**##0", u"**##0"}, + {u"*'x'##0", u"*x##0"}, + {u"a''b0", u"a''b0"}, + {u"*''##0", u"*''##0"}, + {u"*📺##0", u"*'📺'##0"}, + {u"*'நி'##0", u"*'நி'##0"},}; + + UErrorCode status = U_ZERO_ERROR; + for (const char16_t **cas : cases) { + UnicodeString input(cas[0]); + UnicodeString output(cas[1]); + + DecimalFormatProperties properties = PatternParser::parseToProperties( + input, PatternParser::IGNORE_ROUNDING_NEVER, status); + assertSuccess(input, status); + UnicodeString actual = PatternStringUtils::propertiesToPatternString(properties, status); + assertEquals(input, output, actual); + } +} + +void testExceptionOnInvalid() { + static const char16_t *invalidPatterns[] = { + u"#.#.#", + u"0#", + u"0#.", + u".#0", + u"0#.#0", + u"@0", + u"0@", + u"0,", + u"0,,", + u"0,,0", + u"0,,0,", + u"#,##0E0"}; + + for (auto pattern : invalidPatterns) { + UErrorCode status = U_ZERO_ERROR; + ParsedPatternInfo patternInfo; + PatternParser::parseToPatternInfo(pattern, patternInfo, status); + assertTrue(pattern, U_FAILURE(status)); + } +} + +void testBug13117() { + UErrorCode status = U_ZERO_ERROR; + DecimalFormatProperties expected = PatternParser::parseToProperties( + u"0", + PatternParser::IGNORE_ROUNDING_NEVER, + status); + DecimalFormatProperties actual = PatternParser::parseToProperties( + u"0;", + PatternParser::IGNORE_ROUNDING_NEVER, + status); + assertSuccess("Spot 1", status); + assertTrue("Should not consume negative subpattern", expected == actual); +} diff --git a/icu4c/source/test/intltest/numbertest_stringbuilder.cpp b/icu4c/source/test/intltest/numbertest_stringbuilder.cpp new file mode 100644 index 0000000000..d6a87152af --- /dev/null +++ b/icu4c/source/test/intltest/numbertest_stringbuilder.cpp @@ -0,0 +1,231 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include "putilimp.h" +#include "numbertest.h" + +static const char16_t *EXAMPLE_STRINGS[] = { + u"", + u"xyz", + u"The quick brown fox jumps over the lazy dog", + u"😁", + u"mixed 😇 and ASCII", + u"with combining characters like 🇦🇧🇨🇩", + u"A very very very very very very very very very very long string to force heap"}; + +void NumberStringBuilderTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char *) { + if (exec) { + logln("TestSuite NumberStringBuilderTest: "); + } + TESTCASE_AUTO_BEGIN; + TESTCASE_AUTO(testInsertAppendUnicodeString); + TESTCASE_AUTO(testInsertAppendCodePoint); + TESTCASE_AUTO(testCopy); + TESTCASE_AUTO(testFields); + TESTCASE_AUTO(testUnlimitedCapacity); + TESTCASE_AUTO(testCodePoints); + TESTCASE_AUTO_END; +} + +void NumberStringBuilderTest::testInsertAppendUnicodeString() { + UErrorCode status = U_ZERO_ERROR; + UnicodeString sb1; + NumberStringBuilder sb2; + for (const char16_t* strPtr : EXAMPLE_STRINGS) { + UnicodeString str(strPtr); + + NumberStringBuilder sb3; + sb1.append(str); + // Note: UNUM_FIELD_COUNT is like passing null in Java + sb2.append(str, UNUM_FIELD_COUNT, status); + assertSuccess("Appending to sb2", status); + sb3.append(str, UNUM_FIELD_COUNT, status); + assertSuccess("Appending to sb3", status); + assertEqualsImpl(sb1, sb2); + assertEqualsImpl(str, sb3); + + UnicodeString sb4; + NumberStringBuilder sb5; + sb4.append(u"😇"); + sb4.append(str); + sb4.append(u"xx"); + sb5.append(u"😇xx", UNUM_FIELD_COUNT, status); + assertSuccess("Appending to sb5", status); + sb5.insert(2, str, UNUM_FIELD_COUNT, status); + assertSuccess("Inserting into sb5", status); + assertEqualsImpl(sb4, sb5); + + int start = uprv_min(1, str.length()); + int end = uprv_min(10, str.length()); + sb4.insert(3, str, start, end - start); // UnicodeString uses length instead of end index + sb5.insert(3, str, start, end, UNUM_FIELD_COUNT, status); + assertSuccess("Inserting into sb5 again", status); + assertEqualsImpl(sb4, sb5); + + UnicodeString sb4cp(sb4); + NumberStringBuilder sb5cp(sb5); + sb4.append(sb4cp); + sb5.append(sb5cp, status); + assertSuccess("Appending again to sb5", status); + assertEqualsImpl(sb4, sb5); + } +} + +void NumberStringBuilderTest::testInsertAppendCodePoint() { + static const UChar32 cases[] = { + 0, 1, 60, 127, 128, 0x7fff, 0x8000, 0xffff, 0x10000, 0x1f000, 0x10ffff}; + UErrorCode status = U_ZERO_ERROR; + UnicodeString sb1; + NumberStringBuilder sb2; + for (UChar32 cas : cases) { + NumberStringBuilder sb3; + sb1.append(cas); + sb2.appendCodePoint(cas, UNUM_FIELD_COUNT, status); + assertSuccess("Appending to sb2", status); + sb3.appendCodePoint(cas, UNUM_FIELD_COUNT, status); + assertSuccess("Appending to sb3", status); + assertEqualsImpl(sb1, sb2); + assertEquals("Length of sb3", U16_LENGTH(cas), sb3.length()); + assertEquals("Code point count of sb3", 1, sb3.codePointCount()); + assertEquals( + "First code unit in sb3", + !U_IS_SUPPLEMENTARY(cas) ? (char16_t) cas : U16_LEAD(cas), + sb3.charAt(0)); + + UnicodeString sb4; + NumberStringBuilder sb5; + sb4.append(u"😇xx"); + sb4.insert(2, cas); + sb5.append(u"😇xx", UNUM_FIELD_COUNT, status); + assertSuccess("Appending to sb5", status); + sb5.insertCodePoint(2, cas, UNUM_FIELD_COUNT, status); + assertSuccess("Inserting into sb5", status); + assertEqualsImpl(sb4, sb5); + } +} + +void NumberStringBuilderTest::testCopy() { + UErrorCode status = U_ZERO_ERROR; + for (UnicodeString str : EXAMPLE_STRINGS) { + NumberStringBuilder sb1; + sb1.append(str, UNUM_FIELD_COUNT, status); + assertSuccess("Appending to sb1 first time", status); + NumberStringBuilder sb2(sb1); + assertTrue("Content should equal itself", sb1.contentEquals(sb2)); + + sb1.append("12345", UNUM_FIELD_COUNT, status); + assertSuccess("Appending to sb1 second time", status); + assertFalse("Content should no longer equal itself", sb1.contentEquals(sb2)); + } +} + +void NumberStringBuilderTest::testFields() { + UErrorCode status = U_ZERO_ERROR; + // Note: This is a C++11 for loop that calls the UnicodeString constructor on each iteration. + for (UnicodeString str : EXAMPLE_STRINGS) { + NumberStringBuilder sb; + sb.append(str, UNUM_FIELD_COUNT, status); + assertSuccess("Appending to sb", status); + sb.append(str, UNUM_CURRENCY_FIELD, status); + assertSuccess("Appending to sb", status); + assertEquals("Reference string copied twice", str.length() * 2, sb.length()); + for (int32_t i = 0; i < str.length(); i++) { + assertEquals("Null field first", UNUM_FIELD_COUNT, sb.fieldAt(i)); + assertEquals("Currency field second", UNUM_CURRENCY_FIELD, sb.fieldAt(i + str.length())); + } + + // Very basic FieldPosition test. More robust tests happen in NumberFormatTest. + // Let NumberFormatTest also take care of FieldPositionIterator material. + FieldPosition fp(UNUM_CURRENCY_FIELD); + sb.populateFieldPosition(fp, 0, status); + assertSuccess("Populating the FieldPosition", status); + assertEquals("Currency start position", str.length(), fp.getBeginIndex()); + assertEquals("Currency end position", str.length() * 2, fp.getEndIndex()); + + if (str.length() > 0) { + sb.insertCodePoint(2, 100, UNUM_INTEGER_FIELD, status); + assertSuccess("Inserting code point into sb", status); + assertEquals("New length", str.length() * 2 + 1, sb.length()); + assertEquals("Integer field", UNUM_INTEGER_FIELD, sb.fieldAt(2)); + } + + NumberStringBuilder old(sb); + sb.append(old, status); + assertSuccess("Appending to myself", status); + int32_t numNull = 0; + int32_t numCurr = 0; + int32_t numInt = 0; + for (int32_t i = 0; i < sb.length(); i++) { + UNumberFormatFields field = sb.fieldAt(i); + assertEquals("Field should equal location in old", old.fieldAt(i % old.length()), field); + if (field == UNUM_FIELD_COUNT) { + numNull++; + } else if (field == UNUM_CURRENCY_FIELD) { + numCurr++; + } else if (field == UNUM_INTEGER_FIELD) { + numInt++; + } else { + errln("Encountered unknown field"); + } + } + assertEquals("Number of null fields", str.length() * 2, numNull); + assertEquals("Number of currency fields", numNull, numCurr); + assertEquals("Number of integer fields", str.length() > 0 ? 2 : 0, numInt); + } +} + +void NumberStringBuilderTest::testUnlimitedCapacity() { + UErrorCode status = U_ZERO_ERROR; + NumberStringBuilder builder; + // The builder should never fail upon repeated appends. + for (int i = 0; i < 1000; i++) { + UnicodeString message("Iteration #"); + message += Int64ToUnicodeString(i); + assertEquals(message, builder.length(), i); + builder.appendCodePoint('x', UNUM_FIELD_COUNT, status); + assertSuccess(message, status); + assertEquals(message, builder.length(), i + 1); + } +} + +void NumberStringBuilderTest::testCodePoints() { + UErrorCode status = U_ZERO_ERROR; + NumberStringBuilder nsb; + assertEquals("First is -1 on empty string", -1, nsb.getFirstCodePoint()); + assertEquals("Last is -1 on empty string", -1, nsb.getLastCodePoint()); + assertEquals("Length is 0 on empty string", 0, nsb.codePointCount()); + + nsb.append(u"q", UNUM_FIELD_COUNT, status); + assertSuccess("Spot 1", status); + assertEquals("First is q", u'q', nsb.getFirstCodePoint()); + assertEquals("Last is q", u'q', nsb.getLastCodePoint()); + assertEquals("0th is q", u'q', nsb.codePointAt(0)); + assertEquals("Before 1st is q", u'q', nsb.codePointBefore(1)); + assertEquals("Code point count is 1", 1, nsb.codePointCount()); + + // 🚀 is two char16s + nsb.append(u"🚀", UNUM_FIELD_COUNT, status); + assertSuccess("Spot 2" ,status); + assertEquals("First is still q", u'q', nsb.getFirstCodePoint()); + assertEquals("Last is space ship", 128640, nsb.getLastCodePoint()); + assertEquals("1st is space ship", 128640, nsb.codePointAt(1)); + assertEquals("Before 1st is q", u'q', nsb.codePointBefore(1)); + assertEquals("Before 3rd is space ship", 128640, nsb.codePointBefore(3)); + assertEquals("Code point count is 2", 2, nsb.codePointCount()); +} + +void NumberStringBuilderTest::assertEqualsImpl(const UnicodeString &a, const NumberStringBuilder &b) { + // TODO: Why won't this compile without the IntlTest:: qualifier? + IntlTest::assertEquals("Lengths should be the same", a.length(), b.length()); + IntlTest::assertEquals("Code point counts should be the same", a.countChar32(), b.codePointCount()); + + if (a.length() != b.length()) { + return; + } + + for (int32_t i = 0; i < a.length(); i++) { + IntlTest::assertEquals( + UnicodeString("Char at position ") + Int64ToUnicodeString(i) + + UnicodeString(" in string ") + a, a.charAt(i), b.charAt(i)); + } +}