ICU-13634 Refactoring the two separate currency matchers into a single unified CombinedCurrencyMatcher. Allows for easy implementation of currency spacing (included in this changeset) and possibly other currency-related parsing features in the future.
X-SVN-Rev: 41181
This commit is contained in:
parent
7f9de6f1db
commit
921355c6f0
@ -190,7 +190,7 @@ NumberParseMatcher& AffixTokenMatcherWarehouse::permille() {
|
||||
}
|
||||
|
||||
NumberParseMatcher& AffixTokenMatcherWarehouse::currency(UErrorCode& status) {
|
||||
return fCurrency = {{fSetupData->locale, status}, {fSetupData->currencySymbols, status}};
|
||||
return fCurrency = {fSetupData->currencySymbols, fSetupData->dfs, status};
|
||||
}
|
||||
|
||||
IgnorablesMatcher& AffixTokenMatcherWarehouse::ignorables() {
|
||||
|
@ -125,7 +125,7 @@ class AffixTokenMatcherWarehouse : public UMemory {
|
||||
PlusSignMatcher fPlusSign;
|
||||
PercentMatcher fPercent;
|
||||
PermilleMatcher fPermille;
|
||||
CurrencyAnyMatcher fCurrency;
|
||||
CombinedCurrencyMatcher fCurrency;
|
||||
|
||||
// Use a child class for code point matchers, since it requires non-default operators.
|
||||
CodePointMatcherWarehouse fCodePoints;
|
||||
|
@ -18,44 +18,6 @@ using namespace icu::numparse;
|
||||
using namespace icu::numparse::impl;
|
||||
|
||||
|
||||
bool AnyMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const {
|
||||
int32_t initialOffset = segment.getOffset();
|
||||
bool maybeMore = false;
|
||||
|
||||
// NOTE: The range-based for loop calls the virtual begin() and end() methods.
|
||||
for (auto& matcher : *this) {
|
||||
maybeMore = maybeMore || matcher->match(segment, result, status);
|
||||
if (segment.getOffset() != initialOffset) {
|
||||
// Match succeeded.
|
||||
// NOTE: Except for a couple edge cases, if a matcher accepted string A, then it will
|
||||
// accept any string starting with A. Therefore, there is no possibility that matchers
|
||||
// later in the list may be evaluated on longer strings, and we can exit the loop here.
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// None of the matchers succeeded.
|
||||
return maybeMore;
|
||||
}
|
||||
|
||||
bool AnyMatcher::smokeTest(const StringSegment& segment) const {
|
||||
// NOTE: The range-based for loop calls the virtual begin() and end() methods.
|
||||
for (auto& matcher : *this) {
|
||||
if (matcher->smokeTest(segment)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void AnyMatcher::postProcess(ParsedNumber& result) const {
|
||||
// NOTE: The range-based for loop calls the virtual begin() and end() methods.
|
||||
for (auto& matcher : *this) {
|
||||
matcher->postProcess(result);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
bool SeriesMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const {
|
||||
ParsedNumber backup(result);
|
||||
|
||||
|
@ -29,27 +29,29 @@ class CompositionMatcher : public NumberParseMatcher {
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Composes a number of matchers, and succeeds if any of the matchers succeed. Always greedily chooses
|
||||
* the first matcher in the list to succeed.
|
||||
*
|
||||
* NOTE: In C++, this is a base class, unlike ICU4J, which uses a factory-style interface.
|
||||
*
|
||||
* @author sffc
|
||||
* @see SeriesMatcher
|
||||
*/
|
||||
class AnyMatcher : public CompositionMatcher {
|
||||
public:
|
||||
bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override;
|
||||
|
||||
bool smokeTest(const StringSegment& segment) const override;
|
||||
|
||||
void postProcess(ParsedNumber& result) const override;
|
||||
|
||||
protected:
|
||||
// No construction except by subclasses!
|
||||
AnyMatcher() = default;
|
||||
};
|
||||
// NOTE: AnyMatcher is no longer being used. The previous definition is shown below.
|
||||
// The implementation can be found in SVN source control, deleted around March 30, 2018.
|
||||
///**
|
||||
// * Composes a number of matchers, and succeeds if any of the matchers succeed. Always greedily chooses
|
||||
// * the first matcher in the list to succeed.
|
||||
// *
|
||||
// * NOTE: In C++, this is a base class, unlike ICU4J, which uses a factory-style interface.
|
||||
// *
|
||||
// * @author sffc
|
||||
// * @see SeriesMatcher
|
||||
// */
|
||||
//class AnyMatcher : public CompositionMatcher {
|
||||
// public:
|
||||
// bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override;
|
||||
//
|
||||
// bool smokeTest(const StringSegment& segment) const override;
|
||||
//
|
||||
// void postProcess(ParsedNumber& result) const override;
|
||||
//
|
||||
// protected:
|
||||
// // No construction except by subclasses!
|
||||
// AnyMatcher() = default;
|
||||
//};
|
||||
|
||||
|
||||
/**
|
||||
|
@ -20,19 +20,83 @@ using namespace icu::numparse;
|
||||
using namespace icu::numparse::impl;
|
||||
|
||||
|
||||
CurrencyNamesMatcher::CurrencyNamesMatcher(const Locale& locale, UErrorCode& status)
|
||||
: fLocaleName(locale.getName(), -1, status) {
|
||||
CombinedCurrencyMatcher::CombinedCurrencyMatcher(const CurrencySymbols& currencySymbols,
|
||||
const DecimalFormatSymbols& dfs, UErrorCode& status)
|
||||
: fCurrency1(currencySymbols.getCurrencySymbol(status)),
|
||||
fCurrency2(currencySymbols.getIntlCurrencySymbol(status)),
|
||||
afterPrefixInsert(dfs.getPatternForCurrencySpacing(UNUM_CURRENCY_INSERT, false, status)),
|
||||
beforeSuffixInsert(dfs.getPatternForCurrencySpacing(UNUM_CURRENCY_INSERT, true, status)),
|
||||
fLocaleName(dfs.getLocale().getName(), -1, status) {
|
||||
utils::copyCurrencyCode(fCurrencyCode, currencySymbols.getIsoCode());
|
||||
|
||||
// Compute the full set of characters that could be the first in a currency to allow for
|
||||
// efficient smoke test.
|
||||
fLeadCodePoints.add(fCurrency1.char32At(0));
|
||||
fLeadCodePoints.add(fCurrency2.char32At(0));
|
||||
fLeadCodePoints.add(beforeSuffixInsert.char32At(0));
|
||||
uprv_currencyLeads(fLocaleName.data(), fLeadCodePoints, status);
|
||||
// Always apply case mapping closure for currencies
|
||||
fLeadCodePoints.closeOver(USET_ADD_CASE_MAPPINGS);
|
||||
fLeadCodePoints.freeze();
|
||||
}
|
||||
|
||||
bool CurrencyNamesMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const {
|
||||
bool CombinedCurrencyMatcher::match(StringSegment& segment, ParsedNumber& result,
|
||||
UErrorCode& status) const {
|
||||
if (result.currencyCode[0] != 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Try to match a currency spacing separator.
|
||||
int32_t initialOffset = segment.getOffset();
|
||||
bool maybeMore = false;
|
||||
if (result.seenNumber()) {
|
||||
int32_t overlap = segment.getCommonPrefixLength(beforeSuffixInsert);
|
||||
if (overlap == beforeSuffixInsert.length()) {
|
||||
segment.adjustOffset(overlap);
|
||||
// Note: let currency spacing be a weak match. Don't update chars consumed.
|
||||
}
|
||||
maybeMore = maybeMore || overlap == segment.length();
|
||||
}
|
||||
|
||||
// Match the currency string, and reset if we didn't find one.
|
||||
maybeMore = maybeMore || matchCurrency(segment, result, status);
|
||||
if (result.currencyCode[0] == 0) {
|
||||
segment.setOffset(initialOffset);
|
||||
return maybeMore;
|
||||
}
|
||||
|
||||
// Try to match a currency spacing separator.
|
||||
if (!result.seenNumber()) {
|
||||
int32_t overlap = segment.getCommonPrefixLength(afterPrefixInsert);
|
||||
if (overlap == afterPrefixInsert.length()) {
|
||||
segment.adjustOffset(overlap);
|
||||
// Note: let currency spacing be a weak match. Don't update chars consumed.
|
||||
}
|
||||
maybeMore = maybeMore || overlap == segment.length();
|
||||
}
|
||||
|
||||
return maybeMore;
|
||||
}
|
||||
|
||||
bool CombinedCurrencyMatcher::matchCurrency(StringSegment& segment, ParsedNumber& result,
|
||||
UErrorCode& status) const {
|
||||
|
||||
int32_t overlap1 = segment.getCommonPrefixLength(fCurrency1);
|
||||
if (overlap1 == fCurrency1.length()) {
|
||||
utils::copyCurrencyCode(result.currencyCode, fCurrencyCode);
|
||||
segment.adjustOffset(overlap1);
|
||||
result.setCharsConsumed(segment);
|
||||
return segment.length() == 0;
|
||||
}
|
||||
|
||||
int32_t overlap2 = segment.getCommonPrefixLength(fCurrency2);
|
||||
if (overlap2 == fCurrency2.length()) {
|
||||
utils::copyCurrencyCode(result.currencyCode, fCurrencyCode);
|
||||
segment.adjustOffset(overlap2);
|
||||
result.setCharsConsumed(segment);
|
||||
return segment.length() == 0;
|
||||
}
|
||||
|
||||
// NOTE: This call site should be improved with #13584.
|
||||
const UnicodeString segmentString = segment.toTempUnicodeString();
|
||||
|
||||
@ -48,9 +112,6 @@ bool CurrencyNamesMatcher::match(StringSegment& segment, ParsedNumber& result, U
|
||||
result.currencyCode,
|
||||
status);
|
||||
|
||||
// Possible partial match
|
||||
bool partialMatch = partialMatchLen == segment.length();
|
||||
|
||||
if (U_SUCCESS(status) && ppos.getIndex() != 0) {
|
||||
// Complete match.
|
||||
// NOTE: The currency code should already be saved in the ParsedNumber.
|
||||
@ -58,91 +119,16 @@ bool CurrencyNamesMatcher::match(StringSegment& segment, ParsedNumber& result, U
|
||||
result.setCharsConsumed(segment);
|
||||
}
|
||||
|
||||
return partialMatch;
|
||||
return overlap1 == segment.length() || overlap2 == segment.length() ||
|
||||
partialMatchLen == segment.length();
|
||||
}
|
||||
|
||||
bool CurrencyNamesMatcher::smokeTest(const StringSegment& segment) const {
|
||||
bool CombinedCurrencyMatcher::smokeTest(const StringSegment& segment) const {
|
||||
return segment.startsWith(fLeadCodePoints);
|
||||
}
|
||||
|
||||
UnicodeString CurrencyNamesMatcher::toString() const {
|
||||
return u"<CurrencyNames>";
|
||||
}
|
||||
|
||||
|
||||
CurrencyCustomMatcher::CurrencyCustomMatcher(const CurrencySymbols& currencySymbols, UErrorCode& status)
|
||||
: fCurrency1(currencySymbols.getCurrencySymbol(status)),
|
||||
fCurrency2(currencySymbols.getIntlCurrencySymbol(status)) {
|
||||
utils::copyCurrencyCode(fCurrencyCode, currencySymbols.getIsoCode());
|
||||
}
|
||||
|
||||
bool CurrencyCustomMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode&) const {
|
||||
if (result.currencyCode[0] != 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
int overlap1 = segment.getCommonPrefixLength(fCurrency1);
|
||||
if (overlap1 == fCurrency1.length()) {
|
||||
utils::copyCurrencyCode(result.currencyCode, fCurrencyCode);
|
||||
segment.adjustOffset(overlap1);
|
||||
result.setCharsConsumed(segment);
|
||||
}
|
||||
|
||||
int overlap2 = segment.getCommonPrefixLength(fCurrency2);
|
||||
if (overlap2 == fCurrency2.length()) {
|
||||
utils::copyCurrencyCode(result.currencyCode, fCurrencyCode);
|
||||
segment.adjustOffset(overlap2);
|
||||
result.setCharsConsumed(segment);
|
||||
}
|
||||
|
||||
return overlap1 == segment.length() || overlap2 == segment.length();
|
||||
}
|
||||
|
||||
bool CurrencyCustomMatcher::smokeTest(const StringSegment& segment) const {
|
||||
return segment.startsWith(fCurrency1)
|
||||
|| segment.startsWith(fCurrency2);
|
||||
}
|
||||
|
||||
UnicodeString CurrencyCustomMatcher::toString() const {
|
||||
return u"<CurrencyCustom>";
|
||||
}
|
||||
|
||||
|
||||
CurrencyAnyMatcher::CurrencyAnyMatcher() {
|
||||
fMatcherArray[0] = &fNamesMatcher;
|
||||
fMatcherArray[1] = &fCustomMatcher;
|
||||
}
|
||||
|
||||
CurrencyAnyMatcher::CurrencyAnyMatcher(CurrencyNamesMatcher namesMatcher,
|
||||
CurrencyCustomMatcher customMatcher)
|
||||
: fNamesMatcher(std::move(namesMatcher)), fCustomMatcher(std::move(customMatcher)) {
|
||||
fMatcherArray[0] = &fNamesMatcher;
|
||||
fMatcherArray[1] = &fCustomMatcher;
|
||||
}
|
||||
|
||||
CurrencyAnyMatcher::CurrencyAnyMatcher(CurrencyAnyMatcher&& src) U_NOEXCEPT
|
||||
: fNamesMatcher(std::move(src.fNamesMatcher)), fCustomMatcher(std::move(src.fCustomMatcher)) {
|
||||
fMatcherArray[0] = &fNamesMatcher;
|
||||
fMatcherArray[1] = &fCustomMatcher;
|
||||
}
|
||||
|
||||
CurrencyAnyMatcher& CurrencyAnyMatcher::operator=(CurrencyAnyMatcher&& src) U_NOEXCEPT {
|
||||
fNamesMatcher = std::move(src.fNamesMatcher);
|
||||
fCustomMatcher = std::move(src.fCustomMatcher);
|
||||
// Note: do NOT move fMatcherArray
|
||||
return *this;
|
||||
}
|
||||
|
||||
const NumberParseMatcher* const* CurrencyAnyMatcher::begin() const {
|
||||
return fMatcherArray;
|
||||
}
|
||||
|
||||
const NumberParseMatcher* const* CurrencyAnyMatcher::end() const {
|
||||
return fMatcherArray + 2;
|
||||
}
|
||||
|
||||
UnicodeString CurrencyAnyMatcher::toString() const {
|
||||
return u"<CurrencyAny>";
|
||||
UnicodeString CombinedCurrencyMatcher::toString() const {
|
||||
return u"<CombinedCurrencyMatcher>";
|
||||
}
|
||||
|
||||
|
||||
|
@ -19,38 +19,21 @@ namespace impl {
|
||||
using ::icu::number::impl::CurrencySymbols;
|
||||
|
||||
/**
|
||||
* Matches currencies according to all available strings in locale data.
|
||||
* Matches a currency, either a custom currency or one from the data bundle. The class is called
|
||||
* "combined" to emphasize that the currency string may come from one of multiple sources.
|
||||
*
|
||||
* The implementation of this class is different between J and C. See #13584 for a follow-up.
|
||||
* Will match currency spacing either before or after the number depending on whether we are currently in
|
||||
* the prefix or suffix.
|
||||
*
|
||||
* The implementation of this class is slightly different between J and C. See #13584 for a follow-up.
|
||||
*
|
||||
* @author sffc
|
||||
*/
|
||||
class CurrencyNamesMatcher : public NumberParseMatcher, public UMemory {
|
||||
class CombinedCurrencyMatcher : public NumberParseMatcher, public UMemory {
|
||||
public:
|
||||
CurrencyNamesMatcher() = default; // WARNING: Leaves the object in an unusable state
|
||||
CombinedCurrencyMatcher() = default; // WARNING: Leaves the object in an unusable state
|
||||
|
||||
CurrencyNamesMatcher(const Locale& locale, UErrorCode& status);
|
||||
|
||||
bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override;
|
||||
|
||||
bool smokeTest(const StringSegment& segment) const override;
|
||||
|
||||
UnicodeString toString() const override;
|
||||
|
||||
private:
|
||||
// We could use Locale instead of CharString here, but
|
||||
// Locale has a non-trivial default constructor.
|
||||
CharString fLocaleName;
|
||||
|
||||
UnicodeSet fLeadCodePoints;
|
||||
};
|
||||
|
||||
|
||||
class CurrencyCustomMatcher : public NumberParseMatcher, public UMemory {
|
||||
public:
|
||||
CurrencyCustomMatcher() = default; // WARNING: Leaves the object in an unusable state
|
||||
|
||||
CurrencyCustomMatcher(const CurrencySymbols& currencySymbols, UErrorCode& status);
|
||||
CombinedCurrencyMatcher(const CurrencySymbols& currencySymbols, const DecimalFormatSymbols& dfs, UErrorCode& status);
|
||||
|
||||
bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override;
|
||||
|
||||
@ -62,36 +45,18 @@ class CurrencyCustomMatcher : public NumberParseMatcher, public UMemory {
|
||||
UChar fCurrencyCode[4];
|
||||
UnicodeString fCurrency1;
|
||||
UnicodeString fCurrency2;
|
||||
};
|
||||
|
||||
UnicodeString afterPrefixInsert;
|
||||
UnicodeString beforeSuffixInsert;
|
||||
|
||||
/**
|
||||
* An implementation of AnyMatcher, allowing for either currency data or locale currency matches.
|
||||
*/
|
||||
class CurrencyAnyMatcher : public AnyMatcher, public UMemory {
|
||||
public:
|
||||
CurrencyAnyMatcher(); // WARNING: Leaves the object in an unusable state
|
||||
// We could use Locale instead of CharString here, but
|
||||
// Locale has a non-trivial default constructor.
|
||||
CharString fLocaleName;
|
||||
|
||||
CurrencyAnyMatcher(CurrencyNamesMatcher namesMatcher, CurrencyCustomMatcher customMatcher);
|
||||
UnicodeSet fLeadCodePoints;
|
||||
|
||||
// Needs custom move constructor/operator since constructor is nontrivial
|
||||
|
||||
CurrencyAnyMatcher(CurrencyAnyMatcher&& src) U_NOEXCEPT;
|
||||
|
||||
CurrencyAnyMatcher& operator=(CurrencyAnyMatcher&& src) U_NOEXCEPT;
|
||||
|
||||
UnicodeString toString() const override;
|
||||
|
||||
protected:
|
||||
const NumberParseMatcher* const* begin() const override;
|
||||
|
||||
const NumberParseMatcher* const* end() const override;
|
||||
|
||||
private:
|
||||
CurrencyNamesMatcher fNamesMatcher;
|
||||
CurrencyCustomMatcher fCustomMatcher;
|
||||
|
||||
const NumberParseMatcher* fMatcherArray[2];
|
||||
/** Matches the currency string without concern for currency spacing. */
|
||||
bool matchCurrency(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const;
|
||||
};
|
||||
|
||||
|
||||
|
@ -69,7 +69,7 @@ NumberParserImpl::createSimpleParser(const Locale& locale, const UnicodeString&
|
||||
parser->addMatcher(parser->fLocalMatchers.infinity = {symbols});
|
||||
parser->addMatcher(parser->fLocalMatchers.padding = {u"@"});
|
||||
parser->addMatcher(parser->fLocalMatchers.scientific = {symbols, grouper});
|
||||
parser->addMatcher(parser->fLocalMatchers.currencyNames = {locale, status});
|
||||
parser->addMatcher(parser->fLocalMatchers.currency = {currencySymbols, symbols, status});
|
||||
// parser.addMatcher(new RequireNumberMatcher());
|
||||
|
||||
parser->freeze();
|
||||
@ -136,8 +136,7 @@ NumberParserImpl::createParserFromProperties(const number::impl::DecimalFormatPr
|
||||
////////////////////////
|
||||
|
||||
if (parseCurrency || patternInfo.hasCurrencySign()) {
|
||||
parser->addMatcher(parser->fLocalMatchers.currencyCustom = {currencySymbols, status});
|
||||
parser->addMatcher(parser->fLocalMatchers.currencyNames = {locale, status});
|
||||
parser->addMatcher(parser->fLocalMatchers.currency = {currencySymbols, symbols, status});
|
||||
}
|
||||
|
||||
///////////////////////////////
|
||||
|
@ -68,8 +68,7 @@ class NumberParserImpl : public MutableMatcherCollection {
|
||||
PlusSignMatcher plusSign;
|
||||
DecimalMatcher decimal;
|
||||
ScientificMatcher scientific;
|
||||
CurrencyNamesMatcher currencyNames;
|
||||
CurrencyCustomMatcher currencyCustom;
|
||||
CombinedCurrencyMatcher currency;
|
||||
AffixMatcherWarehouse affixMatcherWarehouse;
|
||||
AffixTokenMatcherWarehouse affixTokenMatcherWarehouse;
|
||||
} fLocalMatchers;
|
||||
|
@ -226,9 +226,10 @@ class NumberParserTest : public IntlTest {
|
||||
void testBasic();
|
||||
void testLocaleFi();
|
||||
void testSeriesMatcher();
|
||||
void testCurrencyAnyMatcher();
|
||||
void testCombinedCurrencyMatcher();
|
||||
void testAffixPatternMatcher();
|
||||
void testGroupingDisabled();
|
||||
void testCaseFolding();
|
||||
|
||||
void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par = 0);
|
||||
};
|
||||
|
@ -23,7 +23,7 @@ void NumberParserTest::runIndexedTest(int32_t index, UBool exec, const char*& na
|
||||
TESTCASE_AUTO_BEGIN;
|
||||
TESTCASE_AUTO(testBasic);
|
||||
TESTCASE_AUTO(testSeriesMatcher);
|
||||
TESTCASE_AUTO(testCurrencyAnyMatcher);
|
||||
TESTCASE_AUTO(testCombinedCurrencyMatcher);
|
||||
TESTCASE_AUTO(testAffixPatternMatcher);
|
||||
TESTCASE_AUTO_END;
|
||||
}
|
||||
@ -211,8 +211,8 @@ void NumberParserTest::testSeriesMatcher() {
|
||||
}
|
||||
}
|
||||
|
||||
void NumberParserTest::testCurrencyAnyMatcher() {
|
||||
IcuTestErrorCode status(*this, "testCurrencyAnyMatcher");
|
||||
void NumberParserTest::testCombinedCurrencyMatcher() {
|
||||
IcuTestErrorCode status(*this, "testCombinedCurrencyMatcher");
|
||||
|
||||
IgnorablesMatcher ignorables(unisets::DEFAULT_IGNORABLES);
|
||||
Locale locale = Locale::getEnglish();
|
||||
|
@ -16,12 +16,12 @@ rt: "0.###" 1.0 "1"
|
||||
# Basics
|
||||
fp: "0.####" 0.10005 "0.1" 0.1
|
||||
fp: - 0.10006 "0.1001" 0.1001
|
||||
pat: - "#0.####"
|
||||
pat: - "0.####"
|
||||
fp: "#.####" 0.10005 "0.1" 0.1
|
||||
pat: - "#0.####"
|
||||
pat: - "0.####"
|
||||
|
||||
rt: "0" 1234 "1234"
|
||||
pat: - "#0"
|
||||
pat: - "0"
|
||||
|
||||
# Significant digits
|
||||
fp: "@@@" 1.234567 "1.23" 1.23
|
||||
@ -79,12 +79,12 @@ fpc: - 1234.56/JPY "\u00A51,235" 1235/JPY
|
||||
# ISO codes that overlap display names (QQQ vs. Q)
|
||||
# recognize real ISO name in parsing, so, can not use fake name as QQQ
|
||||
#fpc: - 123/QQQ "QQQ123.00" 123/QQQ # QQQ is fake
|
||||
fpc: - 123/GTQ "GTQ123.00" 123/GTQ
|
||||
fpc: - 123/GTQ "GTQ 123.00" 123/GTQ
|
||||
# ChoiceFormat-based display names
|
||||
fpc: - 1/INR "\u20b91.00" 1/INR
|
||||
fpc: - 2/INR "\u20b92.00" 2/INR
|
||||
# Display names with shared prefix (YDD vs. Y)
|
||||
fpc: - 100/YDD "YDD100.00" 100/YDD
|
||||
fpc: - 100/YDD "YDD 100.00" 100/YDD
|
||||
fpc: - 100/CNY "CN\u00a5100.00" 100/CNY
|
||||
|
||||
# Regression Tests bug#7914
|
||||
|
@ -33,12 +33,8 @@ public class AffixTokenMatcherFactory {
|
||||
return PermilleMatcher.getInstance(symbols);
|
||||
}
|
||||
|
||||
public AnyMatcher currency() {
|
||||
AnyMatcher any = new AnyMatcher();
|
||||
any.addMatcher(CurrencyCustomMatcher.getInstance(currency, locale));
|
||||
any.addMatcher(CurrencyNamesMatcher.getInstance(locale));
|
||||
any.freeze();
|
||||
return any;
|
||||
public CombinedCurrencyMatcher currency() {
|
||||
return CombinedCurrencyMatcher.getInstance(currency, symbols);
|
||||
}
|
||||
|
||||
public IgnorablesMatcher ignorables() {
|
||||
|
@ -1,92 +0,0 @@
|
||||
// © 2018 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html#License
|
||||
package com.ibm.icu.impl.number.parse;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import com.ibm.icu.impl.StringSegment;
|
||||
|
||||
/**
|
||||
* Composes a number of matchers, and succeeds if any of the matchers succeed. Always greedily chooses
|
||||
* the first matcher in the list to succeed.
|
||||
*
|
||||
* @author sffc
|
||||
* @see SeriesMatcher
|
||||
*/
|
||||
public class AnyMatcher implements NumberParseMatcher {
|
||||
|
||||
protected List<NumberParseMatcher> matchers = null;
|
||||
protected boolean frozen = false;
|
||||
|
||||
public void addMatcher(NumberParseMatcher matcher) {
|
||||
assert !frozen;
|
||||
if (matchers == null) {
|
||||
matchers = new ArrayList<NumberParseMatcher>();
|
||||
}
|
||||
matchers.add(matcher);
|
||||
}
|
||||
|
||||
public void freeze() {
|
||||
frozen = true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean match(StringSegment segment, ParsedNumber result) {
|
||||
assert frozen;
|
||||
if (matchers == null) {
|
||||
return false;
|
||||
}
|
||||
|
||||
int initialOffset = segment.getOffset();
|
||||
boolean maybeMore = false;
|
||||
for (int i = 0; i < matchers.size(); i++) {
|
||||
NumberParseMatcher matcher = matchers.get(i);
|
||||
maybeMore = maybeMore || matcher.match(segment, result);
|
||||
if (segment.getOffset() != initialOffset) {
|
||||
// Match succeeded.
|
||||
// NOTE: Except for a couple edge cases, if a matcher accepted string A, then it will
|
||||
// accept any string starting with A. Therefore, there is no possibility that matchers
|
||||
// later in the list may be evaluated on longer strings, and we can exit the loop here.
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// None of the matchers succeeded.
|
||||
return maybeMore;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean smokeTest(StringSegment segment) {
|
||||
assert frozen;
|
||||
if (matchers == null) {
|
||||
return false;
|
||||
}
|
||||
|
||||
for (int i = 0; i < matchers.size(); i++) {
|
||||
if (matchers.get(i).smokeTest(segment)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void postProcess(ParsedNumber result) {
|
||||
assert frozen;
|
||||
if (matchers == null) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (int i = 0; i < matchers.size(); i++) {
|
||||
NumberParseMatcher matcher = matchers.get(i);
|
||||
matcher.postProcess(result);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "<AnyMatcher " + matchers + ">";
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,157 @@
|
||||
// © 2018 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html#License
|
||||
package com.ibm.icu.impl.number.parse;
|
||||
|
||||
import java.util.Iterator;
|
||||
|
||||
import com.ibm.icu.impl.StringSegment;
|
||||
import com.ibm.icu.impl.TextTrieMap;
|
||||
import com.ibm.icu.text.DecimalFormatSymbols;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
import com.ibm.icu.util.Currency;
|
||||
import com.ibm.icu.util.Currency.CurrencyStringInfo;
|
||||
|
||||
/**
|
||||
* Matches a currency, either a custom currency or one from the data bundle. The class is called
|
||||
* "combined" to emphasize that the currency string may come from one of multiple sources.
|
||||
*
|
||||
* Will match currency spacing either before or after the number depending on whether we are currently in
|
||||
* the prefix or suffix.
|
||||
*
|
||||
* The implementation of this class is slightly different between J and C. See #13584 for a follow-up.
|
||||
*
|
||||
* @author sffc
|
||||
*/
|
||||
public class CombinedCurrencyMatcher implements NumberParseMatcher {
|
||||
|
||||
private final String isoCode;
|
||||
private final String currency1;
|
||||
private final String currency2;
|
||||
|
||||
private final String afterPrefixInsert;
|
||||
private final String beforeSuffixInsert;
|
||||
|
||||
private final TextTrieMap<CurrencyStringInfo> longNameTrie;
|
||||
private final TextTrieMap<CurrencyStringInfo> symbolTrie;
|
||||
|
||||
private final UnicodeSet leadCodePoints;
|
||||
|
||||
public static CombinedCurrencyMatcher getInstance(Currency currency, DecimalFormatSymbols dfs) {
|
||||
// TODO: Cache these instances. They are somewhat expensive.
|
||||
return new CombinedCurrencyMatcher(currency, dfs);
|
||||
}
|
||||
|
||||
private CombinedCurrencyMatcher(Currency currency, DecimalFormatSymbols dfs) {
|
||||
this.isoCode = currency.getSubtype();
|
||||
this.currency1 = currency.getSymbol(dfs.getULocale());
|
||||
this.currency2 = currency.getCurrencyCode();
|
||||
|
||||
afterPrefixInsert = dfs
|
||||
.getPatternForCurrencySpacing(DecimalFormatSymbols.CURRENCY_SPC_INSERT, false);
|
||||
beforeSuffixInsert = dfs
|
||||
.getPatternForCurrencySpacing(DecimalFormatSymbols.CURRENCY_SPC_INSERT, true);
|
||||
|
||||
// TODO: Currency trie does not currently have an option for case folding. It defaults to use
|
||||
// case folding on long-names but not symbols.
|
||||
longNameTrie = Currency.getParsingTrie(dfs.getULocale(), Currency.LONG_NAME);
|
||||
symbolTrie = Currency.getParsingTrie(dfs.getULocale(), Currency.SYMBOL_NAME);
|
||||
|
||||
// Compute the full set of characters that could be the first in a currency to allow for
|
||||
// efficient smoke test.
|
||||
leadCodePoints = new UnicodeSet();
|
||||
leadCodePoints.add(currency1.codePointAt(0));
|
||||
leadCodePoints.add(currency2.codePointAt(0));
|
||||
leadCodePoints.add(beforeSuffixInsert.codePointAt(0));
|
||||
longNameTrie.putLeadCodePoints(leadCodePoints);
|
||||
symbolTrie.putLeadCodePoints(leadCodePoints);
|
||||
// Always apply case mapping closure for currencies
|
||||
leadCodePoints.closeOver(UnicodeSet.ADD_CASE_MAPPINGS);
|
||||
leadCodePoints.freeze();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean match(StringSegment segment, ParsedNumber result) {
|
||||
if (result.currencyCode != null) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Try to match a currency spacing separator.
|
||||
int initialOffset = segment.getOffset();
|
||||
boolean maybeMore = false;
|
||||
if (result.seenNumber()) {
|
||||
int overlap = segment.getCommonPrefixLength(beforeSuffixInsert);
|
||||
if (overlap == beforeSuffixInsert.length()) {
|
||||
segment.adjustOffset(overlap);
|
||||
// Note: let currency spacing be a weak match. Don't update chars consumed.
|
||||
}
|
||||
maybeMore = maybeMore || overlap == segment.length();
|
||||
}
|
||||
|
||||
// Match the currency string, and reset if we didn't find one.
|
||||
maybeMore = maybeMore || matchCurrency(segment, result);
|
||||
if (result.currencyCode == null) {
|
||||
segment.setOffset(initialOffset);
|
||||
return maybeMore;
|
||||
}
|
||||
|
||||
// Try to match a currency spacing separator.
|
||||
if (!result.seenNumber()) {
|
||||
int overlap = segment.getCommonPrefixLength(afterPrefixInsert);
|
||||
if (overlap == afterPrefixInsert.length()) {
|
||||
segment.adjustOffset(overlap);
|
||||
// Note: let currency spacing be a weak match. Don't update chars consumed.
|
||||
}
|
||||
maybeMore = maybeMore || overlap == segment.length();
|
||||
}
|
||||
|
||||
return maybeMore;
|
||||
}
|
||||
|
||||
/** Matches the currency string without concern for currency spacing. */
|
||||
private boolean matchCurrency(StringSegment segment, ParsedNumber result) {
|
||||
int overlap1 = segment.getCommonPrefixLength(currency1);
|
||||
if (overlap1 == currency1.length()) {
|
||||
result.currencyCode = isoCode;
|
||||
segment.adjustOffset(overlap1);
|
||||
result.setCharsConsumed(segment);
|
||||
return segment.length() == 0;
|
||||
}
|
||||
|
||||
int overlap2 = segment.getCommonPrefixLength(currency2);
|
||||
if (overlap2 == currency2.length()) {
|
||||
result.currencyCode = isoCode;
|
||||
segment.adjustOffset(overlap2);
|
||||
result.setCharsConsumed(segment);
|
||||
return segment.length() == 0;
|
||||
}
|
||||
|
||||
TextTrieMap.Output trieOutput = new TextTrieMap.Output();
|
||||
Iterator<CurrencyStringInfo> values = longNameTrie.get(segment, 0, trieOutput);
|
||||
if (values == null) {
|
||||
values = symbolTrie.get(segment, 0, trieOutput);
|
||||
}
|
||||
if (values != null) {
|
||||
result.currencyCode = values.next().getISOCode();
|
||||
segment.adjustOffset(trieOutput.matchLength);
|
||||
result.setCharsConsumed(segment);
|
||||
}
|
||||
|
||||
return overlap1 == segment.length() || overlap2 == segment.length() || trieOutput.partialMatch;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean smokeTest(StringSegment segment) {
|
||||
return segment.startsWith(leadCodePoints);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void postProcess(ParsedNumber result) {
|
||||
// No-op
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "<CombinedCurrencyMatcher " + isoCode + ">";
|
||||
}
|
||||
|
||||
}
|
@ -1,67 +0,0 @@
|
||||
// © 2017 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html#License
|
||||
package com.ibm.icu.impl.number.parse;
|
||||
|
||||
import com.ibm.icu.impl.StringSegment;
|
||||
import com.ibm.icu.util.Currency;
|
||||
import com.ibm.icu.util.ULocale;
|
||||
|
||||
/**
|
||||
* A matcher for a single currency instance (not the full trie).
|
||||
*/
|
||||
public class CurrencyCustomMatcher implements NumberParseMatcher {
|
||||
|
||||
private final String isoCode;
|
||||
private final String currency1;
|
||||
private final String currency2;
|
||||
|
||||
public static CurrencyCustomMatcher getInstance(Currency currency, ULocale loc) {
|
||||
return new CurrencyCustomMatcher(currency.getSubtype(),
|
||||
currency.getSymbol(loc),
|
||||
currency.getCurrencyCode());
|
||||
}
|
||||
|
||||
private CurrencyCustomMatcher(String isoCode, String currency1, String currency2) {
|
||||
this.isoCode = isoCode;
|
||||
this.currency1 = currency1;
|
||||
this.currency2 = currency2;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean match(StringSegment segment, ParsedNumber result) {
|
||||
if (result.currencyCode != null) {
|
||||
return false;
|
||||
}
|
||||
|
||||
int overlap1 = segment.getCommonPrefixLength(currency1);
|
||||
if (overlap1 == currency1.length()) {
|
||||
result.currencyCode = isoCode;
|
||||
segment.adjustOffset(overlap1);
|
||||
result.setCharsConsumed(segment);
|
||||
}
|
||||
|
||||
int overlap2 = segment.getCommonPrefixLength(currency2);
|
||||
if (overlap2 == currency2.length()) {
|
||||
result.currencyCode = isoCode;
|
||||
segment.adjustOffset(overlap2);
|
||||
result.setCharsConsumed(segment);
|
||||
}
|
||||
|
||||
return overlap1 == segment.length() || overlap2 == segment.length();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean smokeTest(StringSegment segment) {
|
||||
return segment.startsWith(currency1) || segment.startsWith(currency2);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void postProcess(ParsedNumber result) {
|
||||
// No-op
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "<CurrencyMatcher " + isoCode + ">";
|
||||
}
|
||||
}
|
@ -1,82 +0,0 @@
|
||||
// © 2017 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html#License
|
||||
package com.ibm.icu.impl.number.parse;
|
||||
|
||||
import java.util.Iterator;
|
||||
|
||||
import com.ibm.icu.impl.StringSegment;
|
||||
import com.ibm.icu.impl.TextTrieMap;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
import com.ibm.icu.util.Currency;
|
||||
import com.ibm.icu.util.Currency.CurrencyStringInfo;
|
||||
import com.ibm.icu.util.ULocale;
|
||||
|
||||
/**
|
||||
* Matches currencies according to all available strings in locale data.
|
||||
*
|
||||
* The implementation of this class is different between J and C. See #13584 for a follow-up.
|
||||
*
|
||||
* @author sffc
|
||||
*/
|
||||
public class CurrencyNamesMatcher implements NumberParseMatcher {
|
||||
|
||||
private final TextTrieMap<CurrencyStringInfo> longNameTrie;
|
||||
private final TextTrieMap<CurrencyStringInfo> symbolTrie;
|
||||
|
||||
private final UnicodeSet leadCodePoints;
|
||||
|
||||
public static CurrencyNamesMatcher getInstance(ULocale locale) {
|
||||
// TODO: Pre-compute some of the more popular locales?
|
||||
return new CurrencyNamesMatcher(locale);
|
||||
}
|
||||
|
||||
private CurrencyNamesMatcher(ULocale locale) {
|
||||
// TODO: Currency trie does not currently have an option for case folding. It defaults to use
|
||||
// case folding on long-names but not symbols.
|
||||
longNameTrie = Currency.getParsingTrie(locale, Currency.LONG_NAME);
|
||||
symbolTrie = Currency.getParsingTrie(locale, Currency.SYMBOL_NAME);
|
||||
|
||||
// Compute the full set of characters that could be the first in a currency to allow for
|
||||
// efficient smoke test.
|
||||
leadCodePoints = new UnicodeSet();
|
||||
longNameTrie.putLeadCodePoints(leadCodePoints);
|
||||
symbolTrie.putLeadCodePoints(leadCodePoints);
|
||||
// Always apply case mapping closure for currencies
|
||||
leadCodePoints.closeOver(UnicodeSet.ADD_CASE_MAPPINGS);
|
||||
leadCodePoints.freeze();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean match(StringSegment segment, ParsedNumber result) {
|
||||
if (result.currencyCode != null) {
|
||||
return false;
|
||||
}
|
||||
|
||||
TextTrieMap.Output trieOutput = new TextTrieMap.Output();
|
||||
Iterator<CurrencyStringInfo> values = longNameTrie.get(segment, 0, trieOutput);
|
||||
if (values == null) {
|
||||
values = symbolTrie.get(segment, 0, trieOutput);
|
||||
}
|
||||
if (values != null) {
|
||||
result.currencyCode = values.next().getISOCode();
|
||||
segment.adjustOffset(trieOutput.matchLength);
|
||||
result.setCharsConsumed(segment);
|
||||
}
|
||||
return trieOutput.partialMatch;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean smokeTest(StringSegment segment) {
|
||||
return segment.startsWith(leadCodePoints);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void postProcess(ParsedNumber result) {
|
||||
// No-op
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "<CurrencyTrieMatcher>";
|
||||
}
|
||||
}
|
@ -37,11 +37,12 @@ public class NumberParserImpl {
|
||||
public static NumberParserImpl createSimpleParser(ULocale locale, String pattern, int parseFlags) {
|
||||
|
||||
NumberParserImpl parser = new NumberParserImpl(parseFlags);
|
||||
Currency currency = Currency.getInstance("USD");
|
||||
DecimalFormatSymbols symbols = DecimalFormatSymbols.getInstance(locale);
|
||||
IgnorablesMatcher ignorables = IgnorablesMatcher.DEFAULT;
|
||||
|
||||
AffixTokenMatcherFactory factory = new AffixTokenMatcherFactory();
|
||||
factory.currency = Currency.getInstance("USD");
|
||||
factory.currency = currency;
|
||||
factory.symbols = symbols;
|
||||
factory.ignorables = ignorables;
|
||||
factory.locale = locale;
|
||||
@ -61,7 +62,7 @@ public class NumberParserImpl {
|
||||
parser.addMatcher(InfinityMatcher.getInstance(symbols));
|
||||
parser.addMatcher(PaddingMatcher.getInstance("@"));
|
||||
parser.addMatcher(ScientificMatcher.getInstance(symbols, grouper));
|
||||
parser.addMatcher(CurrencyNamesMatcher.getInstance(locale));
|
||||
parser.addMatcher(CombinedCurrencyMatcher.getInstance(currency, symbols));
|
||||
parser.addMatcher(new RequireNumberValidator());
|
||||
|
||||
parser.freeze();
|
||||
@ -185,8 +186,7 @@ public class NumberParserImpl {
|
||||
////////////////////////
|
||||
|
||||
if (parseCurrency || patternInfo.hasCurrencySign()) {
|
||||
parser.addMatcher(CurrencyCustomMatcher.getInstance(currency, locale));
|
||||
parser.addMatcher(CurrencyNamesMatcher.getInstance(locale));
|
||||
parser.addMatcher(CombinedCurrencyMatcher.getInstance(currency, symbols));
|
||||
}
|
||||
|
||||
///////////////////////////////
|
||||
|
@ -5974,4 +5974,14 @@ public class NumberFormatTest extends TestFmwk {
|
||||
df.setParseStrict(true);
|
||||
expect2(df, 0.5, "50x%");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testParseIsoStrict() {
|
||||
DecimalFormatSymbols dfs = DecimalFormatSymbols.getInstance(ULocale.ENGLISH);
|
||||
DecimalFormat df = new DecimalFormat("¤¤0;-0¤¤", dfs);
|
||||
df.setCurrency(Currency.getInstance("USD"));
|
||||
df.setParseStrict(true);
|
||||
expect2(df, 45, "USD 45.00");
|
||||
expect2(df, -45, "-45.00 USD");
|
||||
}
|
||||
}
|
||||
|
@ -13,7 +13,7 @@ import com.ibm.icu.impl.number.CustomSymbolCurrency;
|
||||
import com.ibm.icu.impl.number.DecimalFormatProperties;
|
||||
import com.ibm.icu.impl.number.parse.AffixPatternMatcher;
|
||||
import com.ibm.icu.impl.number.parse.AffixTokenMatcherFactory;
|
||||
import com.ibm.icu.impl.number.parse.AnyMatcher;
|
||||
import com.ibm.icu.impl.number.parse.CombinedCurrencyMatcher;
|
||||
import com.ibm.icu.impl.number.parse.IgnorablesMatcher;
|
||||
import com.ibm.icu.impl.number.parse.MinusSignMatcher;
|
||||
import com.ibm.icu.impl.number.parse.NumberParserImpl;
|
||||
@ -229,12 +229,13 @@ public class NumberParserTest {
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCurrencyAnyMatcher() {
|
||||
public void testCombinedCurrencyMatcher() {
|
||||
AffixTokenMatcherFactory factory = new AffixTokenMatcherFactory();
|
||||
factory.locale = ULocale.ENGLISH;
|
||||
CustomSymbolCurrency currency = new CustomSymbolCurrency("ICU", "IU$", "ICU");
|
||||
factory.currency = currency;
|
||||
AnyMatcher matcher = factory.currency();
|
||||
factory.symbols = DecimalFormatSymbols.getInstance(ULocale.ENGLISH);
|
||||
CombinedCurrencyMatcher matcher = factory.currency();
|
||||
|
||||
Object[][] cases = new Object[][] {
|
||||
{ "", null },
|
||||
|
Loading…
Reference in New Issue
Block a user