ICU-13634 Refactoring the two separate currency matchers into a single unified CombinedCurrencyMatcher. Allows for easy implementation of currency spacing (included in this changeset) and possibly other currency-related parsing features in the future.

X-SVN-Rev: 41181
This commit is contained in:
Shane Carr 2018-03-31 05:18:51 +00:00
parent 7f9de6f1db
commit 921355c6f0
19 changed files with 304 additions and 467 deletions

View File

@ -190,7 +190,7 @@ NumberParseMatcher& AffixTokenMatcherWarehouse::permille() {
}
NumberParseMatcher& AffixTokenMatcherWarehouse::currency(UErrorCode& status) {
return fCurrency = {{fSetupData->locale, status}, {fSetupData->currencySymbols, status}};
return fCurrency = {fSetupData->currencySymbols, fSetupData->dfs, status};
}
IgnorablesMatcher& AffixTokenMatcherWarehouse::ignorables() {

View File

@ -125,7 +125,7 @@ class AffixTokenMatcherWarehouse : public UMemory {
PlusSignMatcher fPlusSign;
PercentMatcher fPercent;
PermilleMatcher fPermille;
CurrencyAnyMatcher fCurrency;
CombinedCurrencyMatcher fCurrency;
// Use a child class for code point matchers, since it requires non-default operators.
CodePointMatcherWarehouse fCodePoints;

View File

@ -18,44 +18,6 @@ using namespace icu::numparse;
using namespace icu::numparse::impl;
bool AnyMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const {
int32_t initialOffset = segment.getOffset();
bool maybeMore = false;
// NOTE: The range-based for loop calls the virtual begin() and end() methods.
for (auto& matcher : *this) {
maybeMore = maybeMore || matcher->match(segment, result, status);
if (segment.getOffset() != initialOffset) {
// Match succeeded.
// NOTE: Except for a couple edge cases, if a matcher accepted string A, then it will
// accept any string starting with A. Therefore, there is no possibility that matchers
// later in the list may be evaluated on longer strings, and we can exit the loop here.
break;
}
}
// None of the matchers succeeded.
return maybeMore;
}
bool AnyMatcher::smokeTest(const StringSegment& segment) const {
// NOTE: The range-based for loop calls the virtual begin() and end() methods.
for (auto& matcher : *this) {
if (matcher->smokeTest(segment)) {
return true;
}
}
return false;
}
void AnyMatcher::postProcess(ParsedNumber& result) const {
// NOTE: The range-based for loop calls the virtual begin() and end() methods.
for (auto& matcher : *this) {
matcher->postProcess(result);
}
}
bool SeriesMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const {
ParsedNumber backup(result);

View File

@ -29,27 +29,29 @@ class CompositionMatcher : public NumberParseMatcher {
};
/**
* Composes a number of matchers, and succeeds if any of the matchers succeed. Always greedily chooses
* the first matcher in the list to succeed.
*
* NOTE: In C++, this is a base class, unlike ICU4J, which uses a factory-style interface.
*
* @author sffc
* @see SeriesMatcher
*/
class AnyMatcher : public CompositionMatcher {
public:
bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override;
bool smokeTest(const StringSegment& segment) const override;
void postProcess(ParsedNumber& result) const override;
protected:
// No construction except by subclasses!
AnyMatcher() = default;
};
// NOTE: AnyMatcher is no longer being used. The previous definition is shown below.
// The implementation can be found in SVN source control, deleted around March 30, 2018.
///**
// * Composes a number of matchers, and succeeds if any of the matchers succeed. Always greedily chooses
// * the first matcher in the list to succeed.
// *
// * NOTE: In C++, this is a base class, unlike ICU4J, which uses a factory-style interface.
// *
// * @author sffc
// * @see SeriesMatcher
// */
//class AnyMatcher : public CompositionMatcher {
// public:
// bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override;
//
// bool smokeTest(const StringSegment& segment) const override;
//
// void postProcess(ParsedNumber& result) const override;
//
// protected:
// // No construction except by subclasses!
// AnyMatcher() = default;
//};
/**

View File

@ -20,19 +20,83 @@ using namespace icu::numparse;
using namespace icu::numparse::impl;
CurrencyNamesMatcher::CurrencyNamesMatcher(const Locale& locale, UErrorCode& status)
: fLocaleName(locale.getName(), -1, status) {
CombinedCurrencyMatcher::CombinedCurrencyMatcher(const CurrencySymbols& currencySymbols,
const DecimalFormatSymbols& dfs, UErrorCode& status)
: fCurrency1(currencySymbols.getCurrencySymbol(status)),
fCurrency2(currencySymbols.getIntlCurrencySymbol(status)),
afterPrefixInsert(dfs.getPatternForCurrencySpacing(UNUM_CURRENCY_INSERT, false, status)),
beforeSuffixInsert(dfs.getPatternForCurrencySpacing(UNUM_CURRENCY_INSERT, true, status)),
fLocaleName(dfs.getLocale().getName(), -1, status) {
utils::copyCurrencyCode(fCurrencyCode, currencySymbols.getIsoCode());
// Compute the full set of characters that could be the first in a currency to allow for
// efficient smoke test.
fLeadCodePoints.add(fCurrency1.char32At(0));
fLeadCodePoints.add(fCurrency2.char32At(0));
fLeadCodePoints.add(beforeSuffixInsert.char32At(0));
uprv_currencyLeads(fLocaleName.data(), fLeadCodePoints, status);
// Always apply case mapping closure for currencies
fLeadCodePoints.closeOver(USET_ADD_CASE_MAPPINGS);
fLeadCodePoints.freeze();
}
bool CurrencyNamesMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const {
bool CombinedCurrencyMatcher::match(StringSegment& segment, ParsedNumber& result,
UErrorCode& status) const {
if (result.currencyCode[0] != 0) {
return false;
}
// Try to match a currency spacing separator.
int32_t initialOffset = segment.getOffset();
bool maybeMore = false;
if (result.seenNumber()) {
int32_t overlap = segment.getCommonPrefixLength(beforeSuffixInsert);
if (overlap == beforeSuffixInsert.length()) {
segment.adjustOffset(overlap);
// Note: let currency spacing be a weak match. Don't update chars consumed.
}
maybeMore = maybeMore || overlap == segment.length();
}
// Match the currency string, and reset if we didn't find one.
maybeMore = maybeMore || matchCurrency(segment, result, status);
if (result.currencyCode[0] == 0) {
segment.setOffset(initialOffset);
return maybeMore;
}
// Try to match a currency spacing separator.
if (!result.seenNumber()) {
int32_t overlap = segment.getCommonPrefixLength(afterPrefixInsert);
if (overlap == afterPrefixInsert.length()) {
segment.adjustOffset(overlap);
// Note: let currency spacing be a weak match. Don't update chars consumed.
}
maybeMore = maybeMore || overlap == segment.length();
}
return maybeMore;
}
bool CombinedCurrencyMatcher::matchCurrency(StringSegment& segment, ParsedNumber& result,
UErrorCode& status) const {
int32_t overlap1 = segment.getCommonPrefixLength(fCurrency1);
if (overlap1 == fCurrency1.length()) {
utils::copyCurrencyCode(result.currencyCode, fCurrencyCode);
segment.adjustOffset(overlap1);
result.setCharsConsumed(segment);
return segment.length() == 0;
}
int32_t overlap2 = segment.getCommonPrefixLength(fCurrency2);
if (overlap2 == fCurrency2.length()) {
utils::copyCurrencyCode(result.currencyCode, fCurrencyCode);
segment.adjustOffset(overlap2);
result.setCharsConsumed(segment);
return segment.length() == 0;
}
// NOTE: This call site should be improved with #13584.
const UnicodeString segmentString = segment.toTempUnicodeString();
@ -48,9 +112,6 @@ bool CurrencyNamesMatcher::match(StringSegment& segment, ParsedNumber& result, U
result.currencyCode,
status);
// Possible partial match
bool partialMatch = partialMatchLen == segment.length();
if (U_SUCCESS(status) && ppos.getIndex() != 0) {
// Complete match.
// NOTE: The currency code should already be saved in the ParsedNumber.
@ -58,91 +119,16 @@ bool CurrencyNamesMatcher::match(StringSegment& segment, ParsedNumber& result, U
result.setCharsConsumed(segment);
}
return partialMatch;
return overlap1 == segment.length() || overlap2 == segment.length() ||
partialMatchLen == segment.length();
}
bool CurrencyNamesMatcher::smokeTest(const StringSegment& segment) const {
bool CombinedCurrencyMatcher::smokeTest(const StringSegment& segment) const {
return segment.startsWith(fLeadCodePoints);
}
UnicodeString CurrencyNamesMatcher::toString() const {
return u"<CurrencyNames>";
}
CurrencyCustomMatcher::CurrencyCustomMatcher(const CurrencySymbols& currencySymbols, UErrorCode& status)
: fCurrency1(currencySymbols.getCurrencySymbol(status)),
fCurrency2(currencySymbols.getIntlCurrencySymbol(status)) {
utils::copyCurrencyCode(fCurrencyCode, currencySymbols.getIsoCode());
}
bool CurrencyCustomMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode&) const {
if (result.currencyCode[0] != 0) {
return false;
}
int overlap1 = segment.getCommonPrefixLength(fCurrency1);
if (overlap1 == fCurrency1.length()) {
utils::copyCurrencyCode(result.currencyCode, fCurrencyCode);
segment.adjustOffset(overlap1);
result.setCharsConsumed(segment);
}
int overlap2 = segment.getCommonPrefixLength(fCurrency2);
if (overlap2 == fCurrency2.length()) {
utils::copyCurrencyCode(result.currencyCode, fCurrencyCode);
segment.adjustOffset(overlap2);
result.setCharsConsumed(segment);
}
return overlap1 == segment.length() || overlap2 == segment.length();
}
bool CurrencyCustomMatcher::smokeTest(const StringSegment& segment) const {
return segment.startsWith(fCurrency1)
|| segment.startsWith(fCurrency2);
}
UnicodeString CurrencyCustomMatcher::toString() const {
return u"<CurrencyCustom>";
}
CurrencyAnyMatcher::CurrencyAnyMatcher() {
fMatcherArray[0] = &fNamesMatcher;
fMatcherArray[1] = &fCustomMatcher;
}
CurrencyAnyMatcher::CurrencyAnyMatcher(CurrencyNamesMatcher namesMatcher,
CurrencyCustomMatcher customMatcher)
: fNamesMatcher(std::move(namesMatcher)), fCustomMatcher(std::move(customMatcher)) {
fMatcherArray[0] = &fNamesMatcher;
fMatcherArray[1] = &fCustomMatcher;
}
CurrencyAnyMatcher::CurrencyAnyMatcher(CurrencyAnyMatcher&& src) U_NOEXCEPT
: fNamesMatcher(std::move(src.fNamesMatcher)), fCustomMatcher(std::move(src.fCustomMatcher)) {
fMatcherArray[0] = &fNamesMatcher;
fMatcherArray[1] = &fCustomMatcher;
}
CurrencyAnyMatcher& CurrencyAnyMatcher::operator=(CurrencyAnyMatcher&& src) U_NOEXCEPT {
fNamesMatcher = std::move(src.fNamesMatcher);
fCustomMatcher = std::move(src.fCustomMatcher);
// Note: do NOT move fMatcherArray
return *this;
}
const NumberParseMatcher* const* CurrencyAnyMatcher::begin() const {
return fMatcherArray;
}
const NumberParseMatcher* const* CurrencyAnyMatcher::end() const {
return fMatcherArray + 2;
}
UnicodeString CurrencyAnyMatcher::toString() const {
return u"<CurrencyAny>";
UnicodeString CombinedCurrencyMatcher::toString() const {
return u"<CombinedCurrencyMatcher>";
}

View File

@ -19,38 +19,21 @@ namespace impl {
using ::icu::number::impl::CurrencySymbols;
/**
* Matches currencies according to all available strings in locale data.
* Matches a currency, either a custom currency or one from the data bundle. The class is called
* "combined" to emphasize that the currency string may come from one of multiple sources.
*
* The implementation of this class is different between J and C. See #13584 for a follow-up.
* Will match currency spacing either before or after the number depending on whether we are currently in
* the prefix or suffix.
*
* The implementation of this class is slightly different between J and C. See #13584 for a follow-up.
*
* @author sffc
*/
class CurrencyNamesMatcher : public NumberParseMatcher, public UMemory {
class CombinedCurrencyMatcher : public NumberParseMatcher, public UMemory {
public:
CurrencyNamesMatcher() = default; // WARNING: Leaves the object in an unusable state
CombinedCurrencyMatcher() = default; // WARNING: Leaves the object in an unusable state
CurrencyNamesMatcher(const Locale& locale, UErrorCode& status);
bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override;
bool smokeTest(const StringSegment& segment) const override;
UnicodeString toString() const override;
private:
// We could use Locale instead of CharString here, but
// Locale has a non-trivial default constructor.
CharString fLocaleName;
UnicodeSet fLeadCodePoints;
};
class CurrencyCustomMatcher : public NumberParseMatcher, public UMemory {
public:
CurrencyCustomMatcher() = default; // WARNING: Leaves the object in an unusable state
CurrencyCustomMatcher(const CurrencySymbols& currencySymbols, UErrorCode& status);
CombinedCurrencyMatcher(const CurrencySymbols& currencySymbols, const DecimalFormatSymbols& dfs, UErrorCode& status);
bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override;
@ -62,36 +45,18 @@ class CurrencyCustomMatcher : public NumberParseMatcher, public UMemory {
UChar fCurrencyCode[4];
UnicodeString fCurrency1;
UnicodeString fCurrency2;
};
UnicodeString afterPrefixInsert;
UnicodeString beforeSuffixInsert;
/**
* An implementation of AnyMatcher, allowing for either currency data or locale currency matches.
*/
class CurrencyAnyMatcher : public AnyMatcher, public UMemory {
public:
CurrencyAnyMatcher(); // WARNING: Leaves the object in an unusable state
// We could use Locale instead of CharString here, but
// Locale has a non-trivial default constructor.
CharString fLocaleName;
CurrencyAnyMatcher(CurrencyNamesMatcher namesMatcher, CurrencyCustomMatcher customMatcher);
UnicodeSet fLeadCodePoints;
// Needs custom move constructor/operator since constructor is nontrivial
CurrencyAnyMatcher(CurrencyAnyMatcher&& src) U_NOEXCEPT;
CurrencyAnyMatcher& operator=(CurrencyAnyMatcher&& src) U_NOEXCEPT;
UnicodeString toString() const override;
protected:
const NumberParseMatcher* const* begin() const override;
const NumberParseMatcher* const* end() const override;
private:
CurrencyNamesMatcher fNamesMatcher;
CurrencyCustomMatcher fCustomMatcher;
const NumberParseMatcher* fMatcherArray[2];
/** Matches the currency string without concern for currency spacing. */
bool matchCurrency(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const;
};

View File

@ -69,7 +69,7 @@ NumberParserImpl::createSimpleParser(const Locale& locale, const UnicodeString&
parser->addMatcher(parser->fLocalMatchers.infinity = {symbols});
parser->addMatcher(parser->fLocalMatchers.padding = {u"@"});
parser->addMatcher(parser->fLocalMatchers.scientific = {symbols, grouper});
parser->addMatcher(parser->fLocalMatchers.currencyNames = {locale, status});
parser->addMatcher(parser->fLocalMatchers.currency = {currencySymbols, symbols, status});
// parser.addMatcher(new RequireNumberMatcher());
parser->freeze();
@ -136,8 +136,7 @@ NumberParserImpl::createParserFromProperties(const number::impl::DecimalFormatPr
////////////////////////
if (parseCurrency || patternInfo.hasCurrencySign()) {
parser->addMatcher(parser->fLocalMatchers.currencyCustom = {currencySymbols, status});
parser->addMatcher(parser->fLocalMatchers.currencyNames = {locale, status});
parser->addMatcher(parser->fLocalMatchers.currency = {currencySymbols, symbols, status});
}
///////////////////////////////

View File

@ -68,8 +68,7 @@ class NumberParserImpl : public MutableMatcherCollection {
PlusSignMatcher plusSign;
DecimalMatcher decimal;
ScientificMatcher scientific;
CurrencyNamesMatcher currencyNames;
CurrencyCustomMatcher currencyCustom;
CombinedCurrencyMatcher currency;
AffixMatcherWarehouse affixMatcherWarehouse;
AffixTokenMatcherWarehouse affixTokenMatcherWarehouse;
} fLocalMatchers;

View File

@ -226,9 +226,10 @@ class NumberParserTest : public IntlTest {
void testBasic();
void testLocaleFi();
void testSeriesMatcher();
void testCurrencyAnyMatcher();
void testCombinedCurrencyMatcher();
void testAffixPatternMatcher();
void testGroupingDisabled();
void testCaseFolding();
void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par = 0);
};

View File

@ -23,7 +23,7 @@ void NumberParserTest::runIndexedTest(int32_t index, UBool exec, const char*& na
TESTCASE_AUTO_BEGIN;
TESTCASE_AUTO(testBasic);
TESTCASE_AUTO(testSeriesMatcher);
TESTCASE_AUTO(testCurrencyAnyMatcher);
TESTCASE_AUTO(testCombinedCurrencyMatcher);
TESTCASE_AUTO(testAffixPatternMatcher);
TESTCASE_AUTO_END;
}
@ -211,8 +211,8 @@ void NumberParserTest::testSeriesMatcher() {
}
}
void NumberParserTest::testCurrencyAnyMatcher() {
IcuTestErrorCode status(*this, "testCurrencyAnyMatcher");
void NumberParserTest::testCombinedCurrencyMatcher() {
IcuTestErrorCode status(*this, "testCombinedCurrencyMatcher");
IgnorablesMatcher ignorables(unisets::DEFAULT_IGNORABLES);
Locale locale = Locale::getEnglish();

View File

@ -16,12 +16,12 @@ rt: "0.###" 1.0 "1"
# Basics
fp: "0.####" 0.10005 "0.1" 0.1
fp: - 0.10006 "0.1001" 0.1001
pat: - "#0.####"
pat: - "0.####"
fp: "#.####" 0.10005 "0.1" 0.1
pat: - "#0.####"
pat: - "0.####"
rt: "0" 1234 "1234"
pat: - "#0"
pat: - "0"
# Significant digits
fp: "@@@" 1.234567 "1.23" 1.23
@ -79,12 +79,12 @@ fpc: - 1234.56/JPY "\u00A51,235" 1235/JPY
# ISO codes that overlap display names (QQQ vs. Q)
# recognize real ISO name in parsing, so, can not use fake name as QQQ
#fpc: - 123/QQQ "QQQ123.00" 123/QQQ # QQQ is fake
fpc: - 123/GTQ "GTQ123.00" 123/GTQ
fpc: - 123/GTQ "GTQ 123.00" 123/GTQ
# ChoiceFormat-based display names
fpc: - 1/INR "\u20b91.00" 1/INR
fpc: - 2/INR "\u20b92.00" 2/INR
# Display names with shared prefix (YDD vs. Y)
fpc: - 100/YDD "YDD100.00" 100/YDD
fpc: - 100/YDD "YDD 100.00" 100/YDD
fpc: - 100/CNY "CN\u00a5100.00" 100/CNY
# Regression Tests bug#7914

View File

@ -33,12 +33,8 @@ public class AffixTokenMatcherFactory {
return PermilleMatcher.getInstance(symbols);
}
public AnyMatcher currency() {
AnyMatcher any = new AnyMatcher();
any.addMatcher(CurrencyCustomMatcher.getInstance(currency, locale));
any.addMatcher(CurrencyNamesMatcher.getInstance(locale));
any.freeze();
return any;
public CombinedCurrencyMatcher currency() {
return CombinedCurrencyMatcher.getInstance(currency, symbols);
}
public IgnorablesMatcher ignorables() {

View File

@ -1,92 +0,0 @@
// © 2018 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl.number.parse;
import java.util.ArrayList;
import java.util.List;
import com.ibm.icu.impl.StringSegment;
/**
* Composes a number of matchers, and succeeds if any of the matchers succeed. Always greedily chooses
* the first matcher in the list to succeed.
*
* @author sffc
* @see SeriesMatcher
*/
public class AnyMatcher implements NumberParseMatcher {
protected List<NumberParseMatcher> matchers = null;
protected boolean frozen = false;
public void addMatcher(NumberParseMatcher matcher) {
assert !frozen;
if (matchers == null) {
matchers = new ArrayList<NumberParseMatcher>();
}
matchers.add(matcher);
}
public void freeze() {
frozen = true;
}
@Override
public boolean match(StringSegment segment, ParsedNumber result) {
assert frozen;
if (matchers == null) {
return false;
}
int initialOffset = segment.getOffset();
boolean maybeMore = false;
for (int i = 0; i < matchers.size(); i++) {
NumberParseMatcher matcher = matchers.get(i);
maybeMore = maybeMore || matcher.match(segment, result);
if (segment.getOffset() != initialOffset) {
// Match succeeded.
// NOTE: Except for a couple edge cases, if a matcher accepted string A, then it will
// accept any string starting with A. Therefore, there is no possibility that matchers
// later in the list may be evaluated on longer strings, and we can exit the loop here.
break;
}
}
// None of the matchers succeeded.
return maybeMore;
}
@Override
public boolean smokeTest(StringSegment segment) {
assert frozen;
if (matchers == null) {
return false;
}
for (int i = 0; i < matchers.size(); i++) {
if (matchers.get(i).smokeTest(segment)) {
return true;
}
}
return false;
}
@Override
public void postProcess(ParsedNumber result) {
assert frozen;
if (matchers == null) {
return;
}
for (int i = 0; i < matchers.size(); i++) {
NumberParseMatcher matcher = matchers.get(i);
matcher.postProcess(result);
}
}
@Override
public String toString() {
return "<AnyMatcher " + matchers + ">";
}
}

View File

@ -0,0 +1,157 @@
// © 2018 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl.number.parse;
import java.util.Iterator;
import com.ibm.icu.impl.StringSegment;
import com.ibm.icu.impl.TextTrieMap;
import com.ibm.icu.text.DecimalFormatSymbols;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.util.Currency;
import com.ibm.icu.util.Currency.CurrencyStringInfo;
/**
* Matches a currency, either a custom currency or one from the data bundle. The class is called
* "combined" to emphasize that the currency string may come from one of multiple sources.
*
* Will match currency spacing either before or after the number depending on whether we are currently in
* the prefix or suffix.
*
* The implementation of this class is slightly different between J and C. See #13584 for a follow-up.
*
* @author sffc
*/
public class CombinedCurrencyMatcher implements NumberParseMatcher {
private final String isoCode;
private final String currency1;
private final String currency2;
private final String afterPrefixInsert;
private final String beforeSuffixInsert;
private final TextTrieMap<CurrencyStringInfo> longNameTrie;
private final TextTrieMap<CurrencyStringInfo> symbolTrie;
private final UnicodeSet leadCodePoints;
public static CombinedCurrencyMatcher getInstance(Currency currency, DecimalFormatSymbols dfs) {
// TODO: Cache these instances. They are somewhat expensive.
return new CombinedCurrencyMatcher(currency, dfs);
}
private CombinedCurrencyMatcher(Currency currency, DecimalFormatSymbols dfs) {
this.isoCode = currency.getSubtype();
this.currency1 = currency.getSymbol(dfs.getULocale());
this.currency2 = currency.getCurrencyCode();
afterPrefixInsert = dfs
.getPatternForCurrencySpacing(DecimalFormatSymbols.CURRENCY_SPC_INSERT, false);
beforeSuffixInsert = dfs
.getPatternForCurrencySpacing(DecimalFormatSymbols.CURRENCY_SPC_INSERT, true);
// TODO: Currency trie does not currently have an option for case folding. It defaults to use
// case folding on long-names but not symbols.
longNameTrie = Currency.getParsingTrie(dfs.getULocale(), Currency.LONG_NAME);
symbolTrie = Currency.getParsingTrie(dfs.getULocale(), Currency.SYMBOL_NAME);
// Compute the full set of characters that could be the first in a currency to allow for
// efficient smoke test.
leadCodePoints = new UnicodeSet();
leadCodePoints.add(currency1.codePointAt(0));
leadCodePoints.add(currency2.codePointAt(0));
leadCodePoints.add(beforeSuffixInsert.codePointAt(0));
longNameTrie.putLeadCodePoints(leadCodePoints);
symbolTrie.putLeadCodePoints(leadCodePoints);
// Always apply case mapping closure for currencies
leadCodePoints.closeOver(UnicodeSet.ADD_CASE_MAPPINGS);
leadCodePoints.freeze();
}
@Override
public boolean match(StringSegment segment, ParsedNumber result) {
if (result.currencyCode != null) {
return false;
}
// Try to match a currency spacing separator.
int initialOffset = segment.getOffset();
boolean maybeMore = false;
if (result.seenNumber()) {
int overlap = segment.getCommonPrefixLength(beforeSuffixInsert);
if (overlap == beforeSuffixInsert.length()) {
segment.adjustOffset(overlap);
// Note: let currency spacing be a weak match. Don't update chars consumed.
}
maybeMore = maybeMore || overlap == segment.length();
}
// Match the currency string, and reset if we didn't find one.
maybeMore = maybeMore || matchCurrency(segment, result);
if (result.currencyCode == null) {
segment.setOffset(initialOffset);
return maybeMore;
}
// Try to match a currency spacing separator.
if (!result.seenNumber()) {
int overlap = segment.getCommonPrefixLength(afterPrefixInsert);
if (overlap == afterPrefixInsert.length()) {
segment.adjustOffset(overlap);
// Note: let currency spacing be a weak match. Don't update chars consumed.
}
maybeMore = maybeMore || overlap == segment.length();
}
return maybeMore;
}
/** Matches the currency string without concern for currency spacing. */
private boolean matchCurrency(StringSegment segment, ParsedNumber result) {
int overlap1 = segment.getCommonPrefixLength(currency1);
if (overlap1 == currency1.length()) {
result.currencyCode = isoCode;
segment.adjustOffset(overlap1);
result.setCharsConsumed(segment);
return segment.length() == 0;
}
int overlap2 = segment.getCommonPrefixLength(currency2);
if (overlap2 == currency2.length()) {
result.currencyCode = isoCode;
segment.adjustOffset(overlap2);
result.setCharsConsumed(segment);
return segment.length() == 0;
}
TextTrieMap.Output trieOutput = new TextTrieMap.Output();
Iterator<CurrencyStringInfo> values = longNameTrie.get(segment, 0, trieOutput);
if (values == null) {
values = symbolTrie.get(segment, 0, trieOutput);
}
if (values != null) {
result.currencyCode = values.next().getISOCode();
segment.adjustOffset(trieOutput.matchLength);
result.setCharsConsumed(segment);
}
return overlap1 == segment.length() || overlap2 == segment.length() || trieOutput.partialMatch;
}
@Override
public boolean smokeTest(StringSegment segment) {
return segment.startsWith(leadCodePoints);
}
@Override
public void postProcess(ParsedNumber result) {
// No-op
}
@Override
public String toString() {
return "<CombinedCurrencyMatcher " + isoCode + ">";
}
}

View File

@ -1,67 +0,0 @@
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl.number.parse;
import com.ibm.icu.impl.StringSegment;
import com.ibm.icu.util.Currency;
import com.ibm.icu.util.ULocale;
/**
* A matcher for a single currency instance (not the full trie).
*/
public class CurrencyCustomMatcher implements NumberParseMatcher {
private final String isoCode;
private final String currency1;
private final String currency2;
public static CurrencyCustomMatcher getInstance(Currency currency, ULocale loc) {
return new CurrencyCustomMatcher(currency.getSubtype(),
currency.getSymbol(loc),
currency.getCurrencyCode());
}
private CurrencyCustomMatcher(String isoCode, String currency1, String currency2) {
this.isoCode = isoCode;
this.currency1 = currency1;
this.currency2 = currency2;
}
@Override
public boolean match(StringSegment segment, ParsedNumber result) {
if (result.currencyCode != null) {
return false;
}
int overlap1 = segment.getCommonPrefixLength(currency1);
if (overlap1 == currency1.length()) {
result.currencyCode = isoCode;
segment.adjustOffset(overlap1);
result.setCharsConsumed(segment);
}
int overlap2 = segment.getCommonPrefixLength(currency2);
if (overlap2 == currency2.length()) {
result.currencyCode = isoCode;
segment.adjustOffset(overlap2);
result.setCharsConsumed(segment);
}
return overlap1 == segment.length() || overlap2 == segment.length();
}
@Override
public boolean smokeTest(StringSegment segment) {
return segment.startsWith(currency1) || segment.startsWith(currency2);
}
@Override
public void postProcess(ParsedNumber result) {
// No-op
}
@Override
public String toString() {
return "<CurrencyMatcher " + isoCode + ">";
}
}

View File

@ -1,82 +0,0 @@
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl.number.parse;
import java.util.Iterator;
import com.ibm.icu.impl.StringSegment;
import com.ibm.icu.impl.TextTrieMap;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.util.Currency;
import com.ibm.icu.util.Currency.CurrencyStringInfo;
import com.ibm.icu.util.ULocale;
/**
* Matches currencies according to all available strings in locale data.
*
* The implementation of this class is different between J and C. See #13584 for a follow-up.
*
* @author sffc
*/
public class CurrencyNamesMatcher implements NumberParseMatcher {
private final TextTrieMap<CurrencyStringInfo> longNameTrie;
private final TextTrieMap<CurrencyStringInfo> symbolTrie;
private final UnicodeSet leadCodePoints;
public static CurrencyNamesMatcher getInstance(ULocale locale) {
// TODO: Pre-compute some of the more popular locales?
return new CurrencyNamesMatcher(locale);
}
private CurrencyNamesMatcher(ULocale locale) {
// TODO: Currency trie does not currently have an option for case folding. It defaults to use
// case folding on long-names but not symbols.
longNameTrie = Currency.getParsingTrie(locale, Currency.LONG_NAME);
symbolTrie = Currency.getParsingTrie(locale, Currency.SYMBOL_NAME);
// Compute the full set of characters that could be the first in a currency to allow for
// efficient smoke test.
leadCodePoints = new UnicodeSet();
longNameTrie.putLeadCodePoints(leadCodePoints);
symbolTrie.putLeadCodePoints(leadCodePoints);
// Always apply case mapping closure for currencies
leadCodePoints.closeOver(UnicodeSet.ADD_CASE_MAPPINGS);
leadCodePoints.freeze();
}
@Override
public boolean match(StringSegment segment, ParsedNumber result) {
if (result.currencyCode != null) {
return false;
}
TextTrieMap.Output trieOutput = new TextTrieMap.Output();
Iterator<CurrencyStringInfo> values = longNameTrie.get(segment, 0, trieOutput);
if (values == null) {
values = symbolTrie.get(segment, 0, trieOutput);
}
if (values != null) {
result.currencyCode = values.next().getISOCode();
segment.adjustOffset(trieOutput.matchLength);
result.setCharsConsumed(segment);
}
return trieOutput.partialMatch;
}
@Override
public boolean smokeTest(StringSegment segment) {
return segment.startsWith(leadCodePoints);
}
@Override
public void postProcess(ParsedNumber result) {
// No-op
}
@Override
public String toString() {
return "<CurrencyTrieMatcher>";
}
}

View File

@ -37,11 +37,12 @@ public class NumberParserImpl {
public static NumberParserImpl createSimpleParser(ULocale locale, String pattern, int parseFlags) {
NumberParserImpl parser = new NumberParserImpl(parseFlags);
Currency currency = Currency.getInstance("USD");
DecimalFormatSymbols symbols = DecimalFormatSymbols.getInstance(locale);
IgnorablesMatcher ignorables = IgnorablesMatcher.DEFAULT;
AffixTokenMatcherFactory factory = new AffixTokenMatcherFactory();
factory.currency = Currency.getInstance("USD");
factory.currency = currency;
factory.symbols = symbols;
factory.ignorables = ignorables;
factory.locale = locale;
@ -61,7 +62,7 @@ public class NumberParserImpl {
parser.addMatcher(InfinityMatcher.getInstance(symbols));
parser.addMatcher(PaddingMatcher.getInstance("@"));
parser.addMatcher(ScientificMatcher.getInstance(symbols, grouper));
parser.addMatcher(CurrencyNamesMatcher.getInstance(locale));
parser.addMatcher(CombinedCurrencyMatcher.getInstance(currency, symbols));
parser.addMatcher(new RequireNumberValidator());
parser.freeze();
@ -185,8 +186,7 @@ public class NumberParserImpl {
////////////////////////
if (parseCurrency || patternInfo.hasCurrencySign()) {
parser.addMatcher(CurrencyCustomMatcher.getInstance(currency, locale));
parser.addMatcher(CurrencyNamesMatcher.getInstance(locale));
parser.addMatcher(CombinedCurrencyMatcher.getInstance(currency, symbols));
}
///////////////////////////////

View File

@ -5974,4 +5974,14 @@ public class NumberFormatTest extends TestFmwk {
df.setParseStrict(true);
expect2(df, 0.5, "50x%");
}
@Test
public void testParseIsoStrict() {
DecimalFormatSymbols dfs = DecimalFormatSymbols.getInstance(ULocale.ENGLISH);
DecimalFormat df = new DecimalFormat("¤¤0;-0¤¤", dfs);
df.setCurrency(Currency.getInstance("USD"));
df.setParseStrict(true);
expect2(df, 45, "USD 45.00");
expect2(df, -45, "-45.00 USD");
}
}

View File

@ -13,7 +13,7 @@ import com.ibm.icu.impl.number.CustomSymbolCurrency;
import com.ibm.icu.impl.number.DecimalFormatProperties;
import com.ibm.icu.impl.number.parse.AffixPatternMatcher;
import com.ibm.icu.impl.number.parse.AffixTokenMatcherFactory;
import com.ibm.icu.impl.number.parse.AnyMatcher;
import com.ibm.icu.impl.number.parse.CombinedCurrencyMatcher;
import com.ibm.icu.impl.number.parse.IgnorablesMatcher;
import com.ibm.icu.impl.number.parse.MinusSignMatcher;
import com.ibm.icu.impl.number.parse.NumberParserImpl;
@ -229,12 +229,13 @@ public class NumberParserTest {
}
@Test
public void testCurrencyAnyMatcher() {
public void testCombinedCurrencyMatcher() {
AffixTokenMatcherFactory factory = new AffixTokenMatcherFactory();
factory.locale = ULocale.ENGLISH;
CustomSymbolCurrency currency = new CustomSymbolCurrency("ICU", "IU$", "ICU");
factory.currency = currency;
AnyMatcher matcher = factory.currency();
factory.symbols = DecimalFormatSymbols.getInstance(ULocale.ENGLISH);
CombinedCurrencyMatcher matcher = factory.currency();
Object[][] cases = new Object[][] {
{ "", null },