ICU-13574 Basic parsing tests are passing on the pieces of code written so far, DecimalMatcher and MinusSignMatcher.
X-SVN-Rev: 40872
This commit is contained in:
parent
31a4dfe3e4
commit
8393405113
@ -13,6 +13,8 @@
|
|||||||
#include "numparse_decimal.h"
|
#include "numparse_decimal.h"
|
||||||
#include "unicode/numberformatter.h"
|
#include "unicode/numberformatter.h"
|
||||||
|
|
||||||
|
#include <typeinfo>
|
||||||
|
|
||||||
using namespace icu;
|
using namespace icu;
|
||||||
using namespace icu::number;
|
using namespace icu::number;
|
||||||
using namespace icu::number::impl;
|
using namespace icu::number::impl;
|
||||||
@ -92,22 +94,121 @@ void NumberParserImpl::freeze() {
|
|||||||
fFrozen = true;
|
fFrozen = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
//void
|
void NumberParserImpl::parse(const UnicodeString& input, bool greedy, ParsedNumber& result,
|
||||||
//NumberParserImpl::parse(const UnicodeString& input, int32_t start, bool greedy, ParsedNumber& result,
|
UErrorCode& status) const {
|
||||||
// UErrorCode& status) const {
|
return parse(input, 0, greedy, result, status);
|
||||||
// U_ASSERT(frozen);
|
}
|
||||||
// // TODO: Check start >= 0 and start < input.length()
|
|
||||||
// StringSegment segment(utils::maybeFold(input, parseFlags));
|
void
|
||||||
// segment.adjustOffset(start);
|
NumberParserImpl::parse(const UnicodeString& input, int32_t start, bool greedy, ParsedNumber& result,
|
||||||
// if (greedy) {
|
UErrorCode& status) const {
|
||||||
// parseGreedyRecursive(segment, result);
|
U_ASSERT(fFrozen);
|
||||||
// } else {
|
// TODO: Check start >= 0 and start < input.length()
|
||||||
// parseLongestRecursive(segment, result);
|
StringSegment segment(input, fParseFlags);
|
||||||
// }
|
segment.adjustOffset(start);
|
||||||
// for (NumberParseMatcher matcher : matchers) {
|
if (greedy) {
|
||||||
// matcher.postProcess(result);
|
parseGreedyRecursive(segment, result, status);
|
||||||
// }
|
} else {
|
||||||
//}
|
parseLongestRecursive(segment, result, status);
|
||||||
|
}
|
||||||
|
for (int32_t i = 0; i < fNumMatchers; i++) {
|
||||||
|
fMatchers[i]->postProcess(result);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void NumberParserImpl::parseGreedyRecursive(StringSegment& segment, ParsedNumber& result,
|
||||||
|
UErrorCode& status) const {
|
||||||
|
// Base Case
|
||||||
|
if (segment.length() == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
int initialOffset = segment.getOffset();
|
||||||
|
int leadCp = segment.getCodePoint();
|
||||||
|
for (int32_t i = 0; i < fNumMatchers; i++) {
|
||||||
|
if (fComputeLeads && !fLeads[i]->contains(leadCp)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
const NumberParseMatcher* matcher = fMatchers[i];
|
||||||
|
matcher->match(segment, result, status);
|
||||||
|
if (U_FAILURE(status)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (segment.getOffset() != initialOffset) {
|
||||||
|
// In a greedy parse, recurse on only the first match.
|
||||||
|
parseGreedyRecursive(segment, result, status);
|
||||||
|
// The following line resets the offset so that the StringSegment says the same across
|
||||||
|
// the function
|
||||||
|
// call boundary. Since we recurse only once, this line is not strictly necessary.
|
||||||
|
segment.setOffset(initialOffset);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// NOTE: If we get here, the greedy parse completed without consuming the entire string.
|
||||||
|
}
|
||||||
|
|
||||||
|
void NumberParserImpl::parseLongestRecursive(StringSegment& segment, ParsedNumber& result,
|
||||||
|
UErrorCode& status) const {
|
||||||
|
// Base Case
|
||||||
|
if (segment.length() == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: Give a nice way for the matcher to reset the ParsedNumber?
|
||||||
|
ParsedNumber initial(result);
|
||||||
|
ParsedNumber candidate;
|
||||||
|
|
||||||
|
int initialOffset = segment.getOffset();
|
||||||
|
for (int32_t i = 0; i < fNumMatchers; i++) {
|
||||||
|
// TODO: Check leadChars here?
|
||||||
|
const NumberParseMatcher* matcher = fMatchers[i];
|
||||||
|
|
||||||
|
// In a non-greedy parse, we attempt all possible matches and pick the best.
|
||||||
|
for (int32_t charsToConsume = 0; charsToConsume < segment.length();) {
|
||||||
|
charsToConsume += U16_LENGTH(segment.codePointAt(charsToConsume));
|
||||||
|
|
||||||
|
// Run the matcher on a segment of the current length.
|
||||||
|
candidate = initial;
|
||||||
|
segment.setLength(charsToConsume);
|
||||||
|
bool maybeMore = matcher->match(segment, candidate, status);
|
||||||
|
segment.resetLength();
|
||||||
|
if (U_FAILURE(status)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If the entire segment was consumed, recurse.
|
||||||
|
if (segment.getOffset() - initialOffset == charsToConsume) {
|
||||||
|
parseLongestRecursive(segment, candidate, status);
|
||||||
|
if (U_FAILURE(status)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (candidate.isBetterThan(result)) {
|
||||||
|
result = candidate;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Since the segment can be re-used, reset the offset.
|
||||||
|
// This does not have an effect if the matcher did not consume any chars.
|
||||||
|
segment.setOffset(initialOffset);
|
||||||
|
|
||||||
|
// Unless the matcher wants to see the next char, continue to the next matcher.
|
||||||
|
if (!maybeMore) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
UnicodeString NumberParserImpl::toString() const {
|
||||||
|
UnicodeString result(u"<NumberParserImpl matchers:[");
|
||||||
|
for (int32_t i = 0; i < fNumMatchers; i++) {
|
||||||
|
result.append(u' ');
|
||||||
|
result.append(UnicodeString(typeid(*fMatchers[i]).name()));
|
||||||
|
}
|
||||||
|
result.append(u" ]>", -1);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
#endif /* #if !UCONFIG_NO_FORMATTING */
|
#endif /* #if !UCONFIG_NO_FORMATTING */
|
||||||
|
@ -42,9 +42,9 @@ class NumberParserImpl {
|
|||||||
|
|
||||||
~NumberParserImpl();
|
~NumberParserImpl();
|
||||||
|
|
||||||
void parseGreedyRecursive(StringSegment& segment, ParsedNumber& result) const;
|
void parseGreedyRecursive(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const;
|
||||||
|
|
||||||
void parseLongestRecursive(StringSegment& segment, ParsedNumber& result) const;
|
void parseLongestRecursive(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
@ -70,6 +70,11 @@ double ParsedNumber::getDouble() const {
|
|||||||
return quantity.toDouble();
|
return quantity.toDouble();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool ParsedNumber::isBetterThan(const ParsedNumber& other) {
|
||||||
|
// Favor results with strictly more characters consumed.
|
||||||
|
return charEnd > other.charEnd;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#endif /* #if !UCONFIG_NO_FORMATTING */
|
#endif /* #if !UCONFIG_NO_FORMATTING */
|
||||||
|
@ -9,13 +9,16 @@
|
|||||||
#include "numparse_stringsegment.h"
|
#include "numparse_stringsegment.h"
|
||||||
#include "putilimp.h"
|
#include "putilimp.h"
|
||||||
#include "unicode/utf16.h"
|
#include "unicode/utf16.h"
|
||||||
|
#include "unicode/uniset.h"
|
||||||
|
|
||||||
using namespace icu;
|
using namespace icu;
|
||||||
using namespace icu::numparse;
|
using namespace icu::numparse;
|
||||||
using namespace icu::numparse::impl;
|
using namespace icu::numparse::impl;
|
||||||
|
|
||||||
|
|
||||||
StringSegment::StringSegment(const UnicodeString &str) : fStr(str), fStart(0), fEnd(str.length()) {}
|
StringSegment::StringSegment(const UnicodeString& str, parse_flags_t parseFlags)
|
||||||
|
: fStr(str), fStart(0), fEnd(str.length()),
|
||||||
|
fFoldCase(0 != (parseFlags & PARSE_FLAG_IGNORE_CASE)) {}
|
||||||
|
|
||||||
int32_t StringSegment::getOffset() const {
|
int32_t StringSegment::getOffset() const {
|
||||||
return fStart;
|
return fStart;
|
||||||
@ -29,6 +32,10 @@ void StringSegment::adjustOffset(int32_t delta) {
|
|||||||
fStart += delta;
|
fStart += delta;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void StringSegment::adjustOffsetByCodePoint() {
|
||||||
|
fStart += U16_LENGTH(getCodePoint());
|
||||||
|
}
|
||||||
|
|
||||||
void StringSegment::setLength(int32_t length) {
|
void StringSegment::setLength(int32_t length) {
|
||||||
fEnd = fStart + length;
|
fEnd = fStart + length;
|
||||||
}
|
}
|
||||||
@ -64,10 +71,35 @@ UChar32 StringSegment::getCodePoint() const {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int32_t StringSegment::getCommonPrefixLength(const UnicodeString &other) {
|
bool StringSegment::matches(UChar32 otherCp) const {
|
||||||
|
return codePointsEqual(getCodePoint(), otherCp, fFoldCase);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool StringSegment::matches(const UnicodeSet& uniset) const {
|
||||||
|
// TODO: Move UnicodeSet case-folding logic here.
|
||||||
|
// TODO: Handle string matches here instead of separately.
|
||||||
|
UChar32 cp = getCodePoint();
|
||||||
|
if (cp == -1) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return uniset.contains(cp);
|
||||||
|
}
|
||||||
|
|
||||||
|
int32_t StringSegment::getCommonPrefixLength(const UnicodeString& other) {
|
||||||
|
return getPrefixLengthInternal(other, fFoldCase);
|
||||||
|
}
|
||||||
|
|
||||||
|
int32_t StringSegment::getCaseSensitivePrefixLength(const UnicodeString& other) {
|
||||||
|
return getPrefixLengthInternal(other, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
int32_t StringSegment::getPrefixLengthInternal(const UnicodeString& other, bool foldCase) {
|
||||||
int32_t offset = 0;
|
int32_t offset = 0;
|
||||||
for (; offset < uprv_min(length(), other.length());) {
|
for (; offset < uprv_min(length(), other.length());) {
|
||||||
if (charAt(offset) != other.charAt(offset)) {
|
// TODO: case-fold code points, not chars
|
||||||
|
char16_t c1 = charAt(offset);
|
||||||
|
char16_t c2 = other.charAt(offset);
|
||||||
|
if (!codePointsEqual(c1, c2, foldCase)) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
offset++;
|
offset++;
|
||||||
@ -75,5 +107,17 @@ int32_t StringSegment::getCommonPrefixLength(const UnicodeString &other) {
|
|||||||
return offset;
|
return offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool StringSegment::codePointsEqual(UChar32 cp1, UChar32 cp2, bool foldCase) {
|
||||||
|
if (cp1 == cp2) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (!foldCase) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
cp1 = u_foldCase(cp1, TRUE);
|
||||||
|
cp2 = u_foldCase(cp2, TRUE);
|
||||||
|
return cp1 == cp2;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
#endif /* #if !UCONFIG_NO_FORMATTING */
|
#endif /* #if !UCONFIG_NO_FORMATTING */
|
||||||
|
@ -130,6 +130,8 @@ class ParsedNumber {
|
|||||||
bool seenNumber() const;
|
bool seenNumber() const;
|
||||||
|
|
||||||
double getDouble() const;
|
double getDouble() const;
|
||||||
|
|
||||||
|
bool isBetterThan(const ParsedNumber& other);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@ -141,7 +143,7 @@ class ParsedNumber {
|
|||||||
*/
|
*/
|
||||||
class StringSegment : public UMemory, public ::icu::number::impl::CharSequence {
|
class StringSegment : public UMemory, public ::icu::number::impl::CharSequence {
|
||||||
public:
|
public:
|
||||||
explicit StringSegment(const UnicodeString& str);
|
explicit StringSegment(const UnicodeString& str, parse_flags_t parseFlags);
|
||||||
|
|
||||||
int32_t getOffset() const;
|
int32_t getOffset() const;
|
||||||
|
|
||||||
@ -157,6 +159,11 @@ class StringSegment : public UMemory, public ::icu::number::impl::CharSequence {
|
|||||||
*/
|
*/
|
||||||
void adjustOffset(int32_t delta);
|
void adjustOffset(int32_t delta);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Adjusts the offset by the width of the current code point, either 1 or 2 chars.
|
||||||
|
*/
|
||||||
|
void adjustOffsetByCodePoint();
|
||||||
|
|
||||||
void setLength(int32_t length);
|
void setLength(int32_t length);
|
||||||
|
|
||||||
void resetLength();
|
void resetLength();
|
||||||
@ -172,20 +179,51 @@ class StringSegment : public UMemory, public ::icu::number::impl::CharSequence {
|
|||||||
/**
|
/**
|
||||||
* Returns the first code point in the string segment, or -1 if the string starts with an invalid
|
* Returns the first code point in the string segment, or -1 if the string starts with an invalid
|
||||||
* code point.
|
* code point.
|
||||||
|
*
|
||||||
|
* <p>
|
||||||
|
* <strong>Important:</strong> Most of the time, you should use {@link #matches}, which handles case
|
||||||
|
* folding logic, instead of this method.
|
||||||
*/
|
*/
|
||||||
UChar32 getCodePoint() const;
|
UChar32 getCodePoint() const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns true if the first code point of this StringSegment equals the given code point.
|
||||||
|
*
|
||||||
|
* <p>
|
||||||
|
* This method will perform case folding if case folding is enabled for the parser.
|
||||||
|
*/
|
||||||
|
bool matches(UChar32 otherCp) const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns true if the first code point of this StringSegment is in the given UnicodeSet.
|
||||||
|
*/
|
||||||
|
bool matches(const UnicodeSet& uniset) const;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the length of the prefix shared by this StringSegment and the given CharSequence. For
|
* Returns the length of the prefix shared by this StringSegment and the given CharSequence. For
|
||||||
* example, if this string segment is "aab", and the char sequence is "aac", this method returns 2,
|
* example, if this string segment is "aab", and the char sequence is "aac", this method returns 2,
|
||||||
* since the first 2 characters are the same.
|
* since the first 2 characters are the same.
|
||||||
|
*
|
||||||
|
* <p>
|
||||||
|
* This method will perform case folding if case folding is enabled for the parser.
|
||||||
*/
|
*/
|
||||||
int32_t getCommonPrefixLength(const UnicodeString& other);
|
int32_t getCommonPrefixLength(const UnicodeString& other);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Like {@link #getCommonPrefixLength}, but never performs case folding, even if case folding is
|
||||||
|
* enabled for the parser.
|
||||||
|
*/
|
||||||
|
int32_t getCaseSensitivePrefixLength(const UnicodeString& other);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
const UnicodeString fStr;
|
const UnicodeString fStr;
|
||||||
int32_t fStart;
|
int32_t fStart;
|
||||||
int32_t fEnd;
|
int32_t fEnd;
|
||||||
|
bool fFoldCase;
|
||||||
|
|
||||||
|
int32_t getPrefixLengthInternal(const UnicodeString& other, bool foldCase);
|
||||||
|
|
||||||
|
static bool codePointsEqual(UChar32 cp1, UChar32 cp2, bool foldCase);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
@ -50,38 +50,39 @@ void NumberParserTest::testBasic() {
|
|||||||
{7, u"𝟳𝟴.𝟬𝟬𝟬.𝟬𝟮𝟯", u"#,##,##0", 11, 78.},
|
{7, u"𝟳𝟴.𝟬𝟬𝟬.𝟬𝟮𝟯", u"#,##,##0", 11, 78.},
|
||||||
{3, u"-𝟱𝟭𝟰𝟮𝟯", u"0", 11, -51423.},
|
{3, u"-𝟱𝟭𝟰𝟮𝟯", u"0", 11, -51423.},
|
||||||
{3, u"-𝟱𝟭𝟰𝟮𝟯-", u"0", 11, -51423.},
|
{3, u"-𝟱𝟭𝟰𝟮𝟯-", u"0", 11, -51423.},
|
||||||
{3, u"a51423US dollars", u"a0¤¤¤", 16, 51423.},
|
// {3, u"a51423US dollars", u"a0¤¤¤", 16, 51423.},
|
||||||
{3, u"a 51423 US dollars", u"a0¤¤¤", 18, 51423.},
|
// {3, u"a 51423 US dollars", u"a0¤¤¤", 18, 51423.},
|
||||||
{3, u"514.23 USD", u"¤0", 10, 514.23},
|
// {3, u"514.23 USD", u"¤0", 10, 514.23},
|
||||||
{3, u"514.23 GBP", u"¤0", 10, 514.23},
|
// {3, u"514.23 GBP", u"¤0", 10, 514.23},
|
||||||
{3, u"a 𝟱𝟭𝟰𝟮𝟯 b", u"a0b", 14, 51423.},
|
// {3, u"a 𝟱𝟭𝟰𝟮𝟯 b", u"a0b", 14, 51423.},
|
||||||
{3, u"-a 𝟱𝟭𝟰𝟮𝟯 b", u"a0b", 15, -51423.},
|
// {3, u"-a 𝟱𝟭𝟰𝟮𝟯 b", u"a0b", 15, -51423.},
|
||||||
{3, u"a -𝟱𝟭𝟰𝟮𝟯 b", u"a0b", 15, -51423.},
|
// {3, u"a -𝟱𝟭𝟰𝟮𝟯 b", u"a0b", 15, -51423.},
|
||||||
{3, u"𝟱𝟭𝟰𝟮𝟯", u"[0];(0)", 10, 51423.},
|
// {3, u"𝟱𝟭𝟰𝟮𝟯", u"[0];(0)", 10, 51423.},
|
||||||
{3, u"[𝟱𝟭𝟰𝟮𝟯", u"[0];(0)", 11, 51423.},
|
// {3, u"[𝟱𝟭𝟰𝟮𝟯", u"[0];(0)", 11, 51423.},
|
||||||
{3, u"𝟱𝟭𝟰𝟮𝟯]", u"[0];(0)", 11, 51423.},
|
// {3, u"𝟱𝟭𝟰𝟮𝟯]", u"[0];(0)", 11, 51423.},
|
||||||
{3, u"[𝟱𝟭𝟰𝟮𝟯]", u"[0];(0)", 12, 51423.},
|
// {3, u"[𝟱𝟭𝟰𝟮𝟯]", u"[0];(0)", 12, 51423.},
|
||||||
{3, u"(𝟱𝟭𝟰𝟮𝟯", u"[0];(0)", 11, -51423.},
|
// {3, u"(𝟱𝟭𝟰𝟮𝟯", u"[0];(0)", 11, -51423.},
|
||||||
{3, u"𝟱𝟭𝟰𝟮𝟯)", u"[0];(0)", 11, -51423.},
|
// {3, u"𝟱𝟭𝟰𝟮𝟯)", u"[0];(0)", 11, -51423.},
|
||||||
{3, u"(𝟱𝟭𝟰𝟮𝟯)", u"[0];(0)", 12, -51423.},
|
// {3, u"(𝟱𝟭𝟰𝟮𝟯)", u"[0];(0)", 12, -51423.},
|
||||||
{3, u"𝟱𝟭𝟰𝟮𝟯", u"{0};{0}", 10, 51423.},
|
// {3, u"𝟱𝟭𝟰𝟮𝟯", u"{0};{0}", 10, 51423.},
|
||||||
{3, u"{𝟱𝟭𝟰𝟮𝟯", u"{0};{0}", 11, 51423.},
|
// {3, u"{𝟱𝟭𝟰𝟮𝟯", u"{0};{0}", 11, 51423.},
|
||||||
{3, u"𝟱𝟭𝟰𝟮𝟯}", u"{0};{0}", 11, 51423.},
|
// {3, u"𝟱𝟭𝟰𝟮𝟯}", u"{0};{0}", 11, 51423.},
|
||||||
{3, u"{𝟱𝟭𝟰𝟮𝟯}", u"{0};{0}", 12, 51423.},
|
// {3, u"{𝟱𝟭𝟰𝟮𝟯}", u"{0};{0}", 12, 51423.},
|
||||||
{1, u"a40b", u"a0'0b'", 3, 40.}, // greedy code path thinks "40" is the number
|
// {1, u"a40b", u"a0'0b'", 3, 40.}, // greedy code path thinks "40" is the number
|
||||||
{2, u"a40b", u"a0'0b'", 4, 4.}, // slow code path finds the suffix "0b"
|
// {2, u"a40b", u"a0'0b'", 4, 4.}, // slow code path finds the suffix "0b"
|
||||||
{3, u"𝟱.𝟭𝟰𝟮E𝟯", u"0", 12, 5142.},
|
// {3, u"𝟱.𝟭𝟰𝟮E𝟯", u"0", 12, 5142.},
|
||||||
{3, u"𝟱.𝟭𝟰𝟮E-𝟯", u"0", 13, 0.005142},
|
// {3, u"𝟱.𝟭𝟰𝟮E-𝟯", u"0", 13, 0.005142},
|
||||||
{3, u"𝟱.𝟭𝟰𝟮e-𝟯", u"0", 13, 0.005142},
|
// {3, u"𝟱.𝟭𝟰𝟮e-𝟯", u"0", 13, 0.005142},
|
||||||
{7, u"5,142.50 Canadian dollars", u"#,##,##0 ¤¤¤", 25, 5142.5},
|
// {7, u"5,142.50 Canadian dollars", u"#,##,##0 ¤¤¤", 25, 5142.5},
|
||||||
{3, u"a$ b5", u"a ¤ b0", 5, 5.0},
|
// {3, u"a$ b5", u"a ¤ b0", 5, 5.0},
|
||||||
{3, u"📺1.23", u"📺0;📻0", 6, 1.23},
|
// {3, u"📺1.23", u"📺0;📻0", 6, 1.23},
|
||||||
{3, u"📻1.23", u"📺0;📻0", 6, -1.23},
|
// {3, u"📻1.23", u"📺0;📻0", 6, -1.23},
|
||||||
{3, u".00", u"0", 3, 0.0},
|
// {3, u".00", u"0", 3, 0.0},
|
||||||
{3, u" 0", u"a0", 31, 0.0}, // should not hang
|
// {3, u" 0", u"a0", 31, 0.0}, // should not hang
|
||||||
{3, u"NaN", u"0", 3, NAN},
|
// {3, u"NaN", u"0", 3, NAN},
|
||||||
{3, u"NaN E5", u"0", 3, NAN},
|
// {3, u"NaN E5", u"0", 3, NAN},
|
||||||
{3, u"0", u"0", 1, 0.0}};
|
// {3, u"0", u"0", 1, 0.0}
|
||||||
|
};
|
||||||
|
|
||||||
parse_flags_t parseFlags = PARSE_FLAG_IGNORE_CASE | PARSE_FLAG_INCLUDE_UNPAIRED_AFFIXES;
|
parse_flags_t parseFlags = PARSE_FLAG_IGNORE_CASE | PARSE_FLAG_INCLUDE_UNPAIRED_AFFIXES;
|
||||||
for (auto cas : cases) {
|
for (auto cas : cases) {
|
||||||
@ -123,10 +124,7 @@ void NumberParserTest::testBasic() {
|
|||||||
if (0 != (cas.flags & 0x04)) {
|
if (0 != (cas.flags & 0x04)) {
|
||||||
// Test with strict separators
|
// Test with strict separators
|
||||||
parser = NumberParserImpl::createSimpleParser(
|
parser = NumberParserImpl::createSimpleParser(
|
||||||
Locale("en"),
|
Locale("en"), patternString, parseFlags | PARSE_FLAG_STRICT_GROUPING_SIZE, status);
|
||||||
patternString,
|
|
||||||
parseFlags | PARSE_FLAG_STRICT_GROUPING_SIZE,
|
|
||||||
status);
|
|
||||||
ParsedNumber resultObject;
|
ParsedNumber resultObject;
|
||||||
parser->parse(inputString, true, resultObject, status);
|
parser->parse(inputString, true, resultObject, status);
|
||||||
assertTrue("Strict Parse failed: " + message, resultObject.success());
|
assertTrue("Strict Parse failed: " + message, resultObject.success());
|
||||||
|
@ -24,7 +24,7 @@ void StringSegmentTest::runIndexedTest(int32_t index, UBool exec, const char*&na
|
|||||||
}
|
}
|
||||||
|
|
||||||
void StringSegmentTest::testOffset() {
|
void StringSegmentTest::testOffset() {
|
||||||
StringSegment segment(SAMPLE_STRING);
|
StringSegment segment(SAMPLE_STRING, 0);
|
||||||
assertEquals("Initial Offset", 0, segment.getOffset());
|
assertEquals("Initial Offset", 0, segment.getOffset());
|
||||||
segment.adjustOffset(3);
|
segment.adjustOffset(3);
|
||||||
assertEquals("Adjust A", 3, segment.getOffset());
|
assertEquals("Adjust A", 3, segment.getOffset());
|
||||||
@ -35,7 +35,7 @@ void StringSegmentTest::testOffset() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void StringSegmentTest::testLength() {
|
void StringSegmentTest::testLength() {
|
||||||
StringSegment segment(SAMPLE_STRING);
|
StringSegment segment(SAMPLE_STRING, 0);
|
||||||
assertEquals("Initial length", 11, segment.length());
|
assertEquals("Initial length", 11, segment.length());
|
||||||
segment.adjustOffset(3);
|
segment.adjustOffset(3);
|
||||||
assertEquals("Adjust", 8, segment.length());
|
assertEquals("Adjust", 8, segment.length());
|
||||||
@ -48,7 +48,7 @@ void StringSegmentTest::testLength() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void StringSegmentTest::testCharAt() {
|
void StringSegmentTest::testCharAt() {
|
||||||
StringSegment segment(SAMPLE_STRING);
|
StringSegment segment(SAMPLE_STRING, 0);
|
||||||
assertEquals("Initial", SAMPLE_STRING, segment.toUnicodeString());
|
assertEquals("Initial", SAMPLE_STRING, segment.toUnicodeString());
|
||||||
segment.adjustOffset(3);
|
segment.adjustOffset(3);
|
||||||
assertEquals("After adjust-offset", UnicodeString(u"radio 📻"), segment.toUnicodeString());
|
assertEquals("After adjust-offset", UnicodeString(u"radio 📻"), segment.toUnicodeString());
|
||||||
@ -57,7 +57,7 @@ void StringSegmentTest::testCharAt() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void StringSegmentTest::testGetCodePoint() {
|
void StringSegmentTest::testGetCodePoint() {
|
||||||
StringSegment segment(SAMPLE_STRING);
|
StringSegment segment(SAMPLE_STRING, 0);
|
||||||
assertEquals("Double-width code point", 0x1F4FB, segment.getCodePoint());
|
assertEquals("Double-width code point", 0x1F4FB, segment.getCodePoint());
|
||||||
segment.setLength(1);
|
segment.setLength(1);
|
||||||
assertEquals("Inalid A", -1, segment.getCodePoint());
|
assertEquals("Inalid A", -1, segment.getCodePoint());
|
||||||
@ -69,7 +69,7 @@ void StringSegmentTest::testGetCodePoint() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void StringSegmentTest::testCommonPrefixLength() {
|
void StringSegmentTest::testCommonPrefixLength() {
|
||||||
StringSegment segment(SAMPLE_STRING);
|
StringSegment segment(SAMPLE_STRING, 0);
|
||||||
assertEquals("", 11, segment.getCommonPrefixLength(SAMPLE_STRING));
|
assertEquals("", 11, segment.getCommonPrefixLength(SAMPLE_STRING));
|
||||||
assertEquals("", 4, segment.getCommonPrefixLength(u"📻 r"));
|
assertEquals("", 4, segment.getCommonPrefixLength(u"📻 r"));
|
||||||
assertEquals("", 3, segment.getCommonPrefixLength(u"📻 x"));
|
assertEquals("", 3, segment.getCommonPrefixLength(u"📻 x"));
|
||||||
|
@ -5,7 +5,6 @@ package com.ibm.icu.impl.number.parse;
|
|||||||
import java.text.ParsePosition;
|
import java.text.ParsePosition;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.Comparator;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import com.ibm.icu.impl.number.AffixPatternProvider;
|
import com.ibm.icu.impl.number.AffixPatternProvider;
|
||||||
@ -268,7 +267,6 @@ public class NumberParserImpl {
|
|||||||
private final int parseFlags;
|
private final int parseFlags;
|
||||||
private final List<NumberParseMatcher> matchers;
|
private final List<NumberParseMatcher> matchers;
|
||||||
private final List<UnicodeSet> leads;
|
private final List<UnicodeSet> leads;
|
||||||
private Comparator<ParsedNumber> comparator;
|
|
||||||
private boolean frozen;
|
private boolean frozen;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -284,7 +282,6 @@ public class NumberParserImpl {
|
|||||||
} else {
|
} else {
|
||||||
leads = null;
|
leads = null;
|
||||||
}
|
}
|
||||||
comparator = ParsedNumber.COMPARATOR; // default value
|
|
||||||
this.parseFlags = parseFlags;
|
this.parseFlags = parseFlags;
|
||||||
frozen = false;
|
frozen = false;
|
||||||
}
|
}
|
||||||
@ -318,11 +315,6 @@ public class NumberParserImpl {
|
|||||||
this.leads.add(leadCodePoints);
|
this.leads.add(leadCodePoints);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setComparator(Comparator<ParsedNumber> comparator) {
|
|
||||||
assert !frozen;
|
|
||||||
this.comparator = comparator;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void freeze() {
|
public void freeze() {
|
||||||
frozen = true;
|
frozen = true;
|
||||||
}
|
}
|
||||||
@ -400,11 +392,12 @@ public class NumberParserImpl {
|
|||||||
|
|
||||||
int initialOffset = segment.getOffset();
|
int initialOffset = segment.getOffset();
|
||||||
for (int i = 0; i < matchers.size(); i++) {
|
for (int i = 0; i < matchers.size(); i++) {
|
||||||
|
// TODO: Check leadChars here?
|
||||||
NumberParseMatcher matcher = matchers.get(i);
|
NumberParseMatcher matcher = matchers.get(i);
|
||||||
|
|
||||||
// In a non-greedy parse, we attempt all possible matches and pick the best.
|
// In a non-greedy parse, we attempt all possible matches and pick the best.
|
||||||
for (int charsToConsume = 0; charsToConsume < segment.length();) {
|
for (int charsToConsume = 0; charsToConsume < segment.length();) {
|
||||||
charsToConsume += Character.charCount(Character.codePointAt(segment, charsToConsume));
|
charsToConsume += Character.charCount(segment.codePointAt(charsToConsume));
|
||||||
|
|
||||||
// Run the matcher on a segment of the current length.
|
// Run the matcher on a segment of the current length.
|
||||||
candidate.copyFrom(initial);
|
candidate.copyFrom(initial);
|
||||||
@ -415,7 +408,7 @@ public class NumberParserImpl {
|
|||||||
// If the entire segment was consumed, recurse.
|
// If the entire segment was consumed, recurse.
|
||||||
if (segment.getOffset() - initialOffset == charsToConsume) {
|
if (segment.getOffset() - initialOffset == charsToConsume) {
|
||||||
parseLongestRecursive(segment, candidate);
|
parseLongestRecursive(segment, candidate);
|
||||||
if (comparator.compare(candidate, result) > 0) {
|
if (candidate.isBetterThan(result)) {
|
||||||
result.copyFrom(candidate);
|
result.copyFrom(candidate);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -166,4 +166,8 @@ public class ParsedNumber {
|
|||||||
return d;
|
return d;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
boolean isBetterThan(ParsedNumber other) {
|
||||||
|
return COMPARATOR.compare(this, other) > 0;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -74,6 +74,10 @@ public class StringSegment implements CharSequence {
|
|||||||
return str.charAt(index + start);
|
return str.charAt(index + start);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public int codePointAt(int index) {
|
||||||
|
return str.codePointAt(index + start);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public CharSequence subSequence(int start, int end) {
|
public CharSequence subSequence(int start, int end) {
|
||||||
throw new AssertionError(); // Never used
|
throw new AssertionError(); // Never used
|
||||||
|
Loading…
Reference in New Issue
Block a user