2018-02-10 06:36:07 +00:00
|
|
|
// © 2018 and later: Unicode, Inc. and others.
|
|
|
|
// License & terms of use: http://www.unicode.org/copyright.html
|
|
|
|
|
|
|
|
#include "unicode/utypes.h"
|
|
|
|
|
|
|
|
#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
|
|
|
|
|
|
|
|
#include "numparse_types.h"
|
|
|
|
#include "numparse_affixes.h"
|
2018-02-10 10:01:46 +00:00
|
|
|
#include "numparse_utils.h"
|
|
|
|
#include "number_utils.h"
|
2018-02-10 06:36:07 +00:00
|
|
|
|
|
|
|
using namespace icu;
|
|
|
|
using namespace icu::numparse;
|
|
|
|
using namespace icu::numparse::impl;
|
2018-02-10 10:01:46 +00:00
|
|
|
using namespace icu::number;
|
|
|
|
using namespace icu::number::impl;
|
2018-02-10 06:36:07 +00:00
|
|
|
|
|
|
|
|
2018-02-10 10:01:46 +00:00
|
|
|
AffixPatternMatcherBuilder::AffixPatternMatcherBuilder(const UnicodeString& pattern,
|
2018-02-10 10:57:30 +00:00
|
|
|
AffixTokenMatcherWarehouse& warehouse,
|
2018-02-10 10:01:46 +00:00
|
|
|
IgnorablesMatcher* ignorables)
|
|
|
|
: fMatchersLen(0),
|
|
|
|
fLastTypeOrCp(0),
|
|
|
|
fPattern(pattern),
|
2018-02-10 10:57:30 +00:00
|
|
|
fWarehouse(warehouse),
|
2018-02-10 10:01:46 +00:00
|
|
|
fIgnorables(ignorables) {}
|
2018-02-10 06:36:07 +00:00
|
|
|
|
2018-02-10 10:01:46 +00:00
|
|
|
void AffixPatternMatcherBuilder::consumeToken(AffixPatternType type, UChar32 cp, UErrorCode& status) {
|
|
|
|
// This is called by AffixUtils.iterateWithConsumer() for each token.
|
2018-02-10 06:36:07 +00:00
|
|
|
|
2018-02-10 10:01:46 +00:00
|
|
|
// Add an ignorables matcher between tokens except between two literals, and don't put two
|
|
|
|
// ignorables matchers in a row.
|
|
|
|
if (fIgnorables != nullptr && fMatchersLen > 0 &&
|
|
|
|
(fLastTypeOrCp < 0 || !fIgnorables->getSet()->contains(fLastTypeOrCp))) {
|
|
|
|
addMatcher(*fIgnorables);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (type != TYPE_CODEPOINT) {
|
|
|
|
// Case 1: the token is a symbol.
|
|
|
|
switch (type) {
|
|
|
|
case TYPE_MINUS_SIGN:
|
2018-02-10 11:32:18 +00:00
|
|
|
addMatcher(fWarehouse.minusSign());
|
2018-02-10 10:01:46 +00:00
|
|
|
break;
|
|
|
|
case TYPE_PLUS_SIGN:
|
2018-02-10 11:32:18 +00:00
|
|
|
addMatcher(fWarehouse.plusSign());
|
2018-02-10 10:01:46 +00:00
|
|
|
break;
|
|
|
|
case TYPE_PERCENT:
|
2018-02-10 11:32:18 +00:00
|
|
|
addMatcher(fWarehouse.percent());
|
2018-02-10 10:01:46 +00:00
|
|
|
break;
|
|
|
|
case TYPE_PERMILLE:
|
2018-02-10 11:32:18 +00:00
|
|
|
addMatcher(fWarehouse.permille());
|
2018-02-10 10:01:46 +00:00
|
|
|
break;
|
|
|
|
case TYPE_CURRENCY_SINGLE:
|
|
|
|
case TYPE_CURRENCY_DOUBLE:
|
|
|
|
case TYPE_CURRENCY_TRIPLE:
|
|
|
|
case TYPE_CURRENCY_QUAD:
|
|
|
|
case TYPE_CURRENCY_QUINT:
|
|
|
|
// All currency symbols use the same matcher
|
2018-02-10 11:32:18 +00:00
|
|
|
addMatcher(fWarehouse.currency(status));
|
2018-02-10 10:01:46 +00:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
U_ASSERT(FALSE);
|
|
|
|
}
|
|
|
|
|
|
|
|
} else if (fIgnorables != nullptr && fIgnorables->getSet()->contains(cp)) {
|
|
|
|
// Case 2: the token is an ignorable literal.
|
|
|
|
// No action necessary: the ignorables matcher has already been added.
|
|
|
|
|
|
|
|
} else {
|
|
|
|
// Case 3: the token is a non-ignorable literal.
|
2018-02-10 10:57:30 +00:00
|
|
|
addMatcher(fWarehouse.nextCodePointMatcher(cp));
|
2018-02-10 10:01:46 +00:00
|
|
|
}
|
|
|
|
fLastTypeOrCp = type != TYPE_CODEPOINT ? type : cp;
|
|
|
|
}
|
|
|
|
|
|
|
|
void AffixPatternMatcherBuilder::addMatcher(NumberParseMatcher& matcher) {
|
|
|
|
if (fMatchersLen >= fMatchers.getCapacity()) {
|
|
|
|
fMatchers.resize(fMatchersLen * 2, fMatchersLen);
|
|
|
|
}
|
|
|
|
fMatchers[fMatchersLen++] = &matcher;
|
|
|
|
}
|
|
|
|
|
|
|
|
AffixPatternMatcher AffixPatternMatcherBuilder::build() {
|
2018-02-10 10:57:30 +00:00
|
|
|
return AffixPatternMatcher(fMatchers, fMatchersLen, fPattern);
|
2018-02-10 10:01:46 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2018-02-10 10:57:30 +00:00
|
|
|
AffixTokenMatcherWarehouse::AffixTokenMatcherWarehouse(const UChar* currencyCode,
|
|
|
|
const UnicodeString& currency1,
|
|
|
|
const UnicodeString& currency2,
|
|
|
|
const DecimalFormatSymbols& dfs,
|
|
|
|
IgnorablesMatcher* ignorables, const Locale& locale)
|
|
|
|
: currency1(currency1),
|
|
|
|
currency2(currency2),
|
|
|
|
dfs(dfs),
|
|
|
|
ignorables(ignorables),
|
|
|
|
locale(locale),
|
|
|
|
codePointCount(0),
|
|
|
|
codePointNumBatches(0) {
|
2018-02-10 10:01:46 +00:00
|
|
|
utils::copyCurrencyCode(this->currencyCode, currencyCode);
|
|
|
|
}
|
|
|
|
|
2018-02-10 10:57:30 +00:00
|
|
|
AffixTokenMatcherWarehouse::~AffixTokenMatcherWarehouse() {
|
|
|
|
// Delete the variable number of batches of code point matchers
|
2018-02-10 11:32:18 +00:00
|
|
|
for (int32_t i = 0; i < codePointNumBatches; i++) {
|
2018-02-10 10:57:30 +00:00
|
|
|
delete[] codePointsOverflow[i];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-02-10 11:32:18 +00:00
|
|
|
NumberParseMatcher& AffixTokenMatcherWarehouse::minusSign() {
|
|
|
|
return fMinusSign = {dfs, true};
|
|
|
|
}
|
|
|
|
|
|
|
|
NumberParseMatcher& AffixTokenMatcherWarehouse::plusSign() {
|
|
|
|
return fPlusSign = {dfs, true};
|
|
|
|
}
|
|
|
|
|
|
|
|
NumberParseMatcher& AffixTokenMatcherWarehouse::percent() {
|
|
|
|
return fPercent = {dfs};
|
|
|
|
}
|
|
|
|
|
|
|
|
NumberParseMatcher& AffixTokenMatcherWarehouse::permille() {
|
|
|
|
return fPermille = {dfs};
|
|
|
|
}
|
|
|
|
|
|
|
|
NumberParseMatcher& AffixTokenMatcherWarehouse::currency(UErrorCode& status) {
|
|
|
|
return fCurrency = {{locale, status}, {currencyCode, currency1, currency2}};
|
|
|
|
}
|
|
|
|
|
|
|
|
NumberParseMatcher& AffixTokenMatcherWarehouse::nextCodePointMatcher(UChar32 cp) {
|
2018-02-10 10:57:30 +00:00
|
|
|
if (codePointCount < CODE_POINT_STACK_CAPACITY) {
|
|
|
|
return codePoints[codePointCount++] = {cp};
|
|
|
|
}
|
|
|
|
int32_t totalCapacity = CODE_POINT_STACK_CAPACITY + codePointNumBatches * CODE_POINT_BATCH_SIZE;
|
|
|
|
if (codePointCount >= totalCapacity) {
|
|
|
|
// Need a new batch
|
|
|
|
auto* nextBatch = new CodePointMatcher[CODE_POINT_BATCH_SIZE];
|
|
|
|
if (codePointNumBatches >= codePointsOverflow.getCapacity()) {
|
|
|
|
// Need more room for storing pointers to batches
|
|
|
|
codePointsOverflow.resize(codePointNumBatches * 2, codePointNumBatches);
|
|
|
|
}
|
|
|
|
codePointsOverflow[codePointNumBatches++] = nextBatch;
|
|
|
|
}
|
|
|
|
return codePointsOverflow[codePointNumBatches - 1][(codePointCount++ - CODE_POINT_STACK_CAPACITY) %
|
2018-02-10 11:32:18 +00:00
|
|
|
CODE_POINT_BATCH_SIZE] = {cp};
|
2018-02-10 10:57:30 +00:00
|
|
|
}
|
|
|
|
|
2018-02-10 10:01:46 +00:00
|
|
|
|
|
|
|
CodePointMatcher::CodePointMatcher(UChar32 cp)
|
|
|
|
: fCp(cp) {}
|
|
|
|
|
|
|
|
bool CodePointMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode&) const {
|
|
|
|
if (segment.matches(fCp)) {
|
|
|
|
segment.adjustOffsetByCodePoint();
|
|
|
|
result.setCharsConsumed(segment);
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
const UnicodeSet& CodePointMatcher::getLeadCodePoints() {
|
|
|
|
if (fLocalLeadCodePoints.isNull()) {
|
|
|
|
auto* leadCodePoints = new UnicodeSet();
|
|
|
|
leadCodePoints->add(fCp);
|
|
|
|
leadCodePoints->freeze();
|
|
|
|
fLocalLeadCodePoints.adoptInstead(leadCodePoints);
|
|
|
|
}
|
|
|
|
return *fLocalLeadCodePoints;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2018-02-10 10:57:30 +00:00
|
|
|
AffixPatternMatcher AffixPatternMatcher::fromAffixPattern(const UnicodeString& affixPattern,
|
|
|
|
AffixTokenMatcherWarehouse& warehouse,
|
|
|
|
parse_flags_t parseFlags, bool* success,
|
|
|
|
UErrorCode& status) {
|
2018-02-10 10:01:46 +00:00
|
|
|
if (affixPattern.isEmpty()) {
|
|
|
|
*success = false;
|
|
|
|
return {};
|
|
|
|
}
|
|
|
|
*success = true;
|
|
|
|
|
|
|
|
IgnorablesMatcher* ignorables;
|
|
|
|
if (0 != (parseFlags & PARSE_FLAG_EXACT_AFFIX)) {
|
|
|
|
ignorables = nullptr;
|
|
|
|
} else {
|
2018-02-10 10:57:30 +00:00
|
|
|
ignorables = warehouse.ignorables;
|
2018-02-10 10:01:46 +00:00
|
|
|
}
|
|
|
|
|
2018-02-10 10:57:30 +00:00
|
|
|
AffixPatternMatcherBuilder builder(affixPattern, warehouse, ignorables);
|
2018-02-10 10:01:46 +00:00
|
|
|
AffixUtils::iterateWithConsumer(UnicodeStringCharSequence(affixPattern), builder, status);
|
|
|
|
return builder.build();
|
|
|
|
}
|
|
|
|
|
|
|
|
AffixPatternMatcher::AffixPatternMatcher(MatcherArray& matchers, int32_t matchersLen,
|
2018-02-10 10:57:30 +00:00
|
|
|
const UnicodeString& pattern)
|
|
|
|
: ArraySeriesMatcher(matchers, matchersLen), fPattern(pattern) {
|
2018-02-10 10:01:46 +00:00
|
|
|
}
|
2018-02-10 06:36:07 +00:00
|
|
|
|
|
|
|
|
|
|
|
#endif /* #if !UCONFIG_NO_FORMATTING */
|