ICU-13574 Porting the parsing utility classes StringSegment and UnicodeSetStaticCache to C++.
X-SVN-Rev: 40841
This commit is contained in:
parent
e5cc630590
commit
237acf183a
@ -107,7 +107,8 @@ number_affixutils.o number_compact.o number_decimalquantity.o \
|
||||
number_decimfmtprops.o number_fluent.o number_formatimpl.o number_grouping.o \
|
||||
number_integerwidth.o number_longnames.o number_modifiers.o number_notation.o \
|
||||
number_padding.o number_patternmodifier.o number_patternstring.o \
|
||||
number_rounding.o number_scientific.o number_stringbuilder.o
|
||||
number_rounding.o number_scientific.o number_stringbuilder.o \
|
||||
numparse_stringsegment.o numparse_unisets.o
|
||||
|
||||
|
||||
## Header files to install
|
||||
|
79
icu4c/source/i18n/numparse_stringsegment.cpp
Normal file
79
icu4c/source/i18n/numparse_stringsegment.cpp
Normal file
@ -0,0 +1,79 @@
|
||||
// © 2018 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
|
||||
|
||||
#include "numparse_types.h"
|
||||
#include "numparse_stringsegment.h"
|
||||
#include "putilimp.h"
|
||||
#include "unicode/utf16.h"
|
||||
|
||||
using namespace icu;
|
||||
using namespace icu::numparse;
|
||||
using namespace icu::numparse::impl;
|
||||
|
||||
|
||||
StringSegment::StringSegment(const UnicodeString &str) : fStr(str), fStart(0), fEnd(str.length()) {}
|
||||
|
||||
int32_t StringSegment::getOffset() const {
|
||||
return fStart;
|
||||
}
|
||||
|
||||
void StringSegment::setOffset(int32_t start) {
|
||||
fStart = start;
|
||||
}
|
||||
|
||||
void StringSegment::adjustOffset(int32_t delta) {
|
||||
fStart += delta;
|
||||
}
|
||||
|
||||
void StringSegment::setLength(int32_t length) {
|
||||
fEnd = fStart + length;
|
||||
}
|
||||
|
||||
void StringSegment::resetLength() {
|
||||
fEnd = fStr.length();
|
||||
}
|
||||
|
||||
int32_t StringSegment::length() const {
|
||||
return fEnd - fStart;
|
||||
}
|
||||
|
||||
char16_t StringSegment::charAt(int32_t index) const {
|
||||
return fStr.charAt(index + fStart);
|
||||
}
|
||||
|
||||
UChar32 StringSegment::codePointAt(int32_t index) const {
|
||||
return fStr.char32At(index + fStart);
|
||||
}
|
||||
|
||||
UnicodeString StringSegment::toUnicodeString() const {
|
||||
return UnicodeString(fStr, fStart, fEnd - fStart);
|
||||
}
|
||||
|
||||
UChar32 StringSegment::getCodePoint() const {
|
||||
char16_t lead = fStr.charAt(fStart);
|
||||
if (U16_IS_LEAD(lead) && fStart + 1 < fEnd) {
|
||||
return fStr.char32At(fStart);
|
||||
} else if (U16_IS_SURROGATE(lead)) {
|
||||
return -1;
|
||||
} else {
|
||||
return lead;
|
||||
}
|
||||
}
|
||||
|
||||
int32_t StringSegment::getCommonPrefixLength(const UnicodeString &other) {
|
||||
int32_t offset = 0;
|
||||
for (; offset < uprv_min(length(), other.length());) {
|
||||
if (charAt(offset) != other.charAt(offset)) {
|
||||
break;
|
||||
}
|
||||
offset++;
|
||||
}
|
||||
return offset;
|
||||
}
|
||||
|
||||
|
||||
#endif /* #if !UCONFIG_NO_FORMATTING */
|
79
icu4c/source/i18n/numparse_stringsegment.h
Normal file
79
icu4c/source/i18n/numparse_stringsegment.h
Normal file
@ -0,0 +1,79 @@
|
||||
// © 2018 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
|
||||
#ifndef __NUMPARSE_STRINGSEGMENT_H__
|
||||
#define __NUMPARSE_STRINGSEGMENT_H__
|
||||
|
||||
#include "numparse_types.h"
|
||||
#include "number_types.h"
|
||||
#include "unicode/unistr.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
namespace numparse {
|
||||
namespace impl {
|
||||
|
||||
/**
|
||||
* A mutable class allowing for a String with a variable offset and length. The charAt, length, and
|
||||
* subSequence methods all operate relative to the fixed offset into the String.
|
||||
*
|
||||
* @author sffc
|
||||
*/
|
||||
class StringSegment : public UMemory, public ::icu::number::impl::CharSequence {
|
||||
public:
|
||||
explicit StringSegment(const UnicodeString &str);
|
||||
|
||||
int32_t getOffset() const;
|
||||
|
||||
void setOffset(int32_t start);
|
||||
|
||||
/**
|
||||
* Equivalent to <code>setOffset(getOffset()+delta)</code>.
|
||||
*
|
||||
* <p>
|
||||
* This method is usually called by a Matcher to register that a char was consumed. If the char is
|
||||
* strong (it usually is, except for things like whitespace), follow this with a call to
|
||||
* {@link ParsedNumber#setCharsConsumed}. For more information on strong chars, see that method.
|
||||
*/
|
||||
void adjustOffset(int32_t delta);
|
||||
|
||||
void setLength(int32_t length);
|
||||
|
||||
void resetLength();
|
||||
|
||||
int32_t length() const override;
|
||||
|
||||
char16_t charAt(int32_t index) const override;
|
||||
|
||||
UChar32 codePointAt(int32_t index) const override;
|
||||
|
||||
UnicodeString toUnicodeString() const override;
|
||||
|
||||
/**
|
||||
* Returns the first code point in the string segment, or -1 if the string starts with an invalid
|
||||
* code point.
|
||||
*/
|
||||
UChar32 getCodePoint() const;
|
||||
|
||||
/**
|
||||
* Returns the length of the prefix shared by this StringSegment and the given CharSequence. For
|
||||
* example, if this string segment is "aab", and the char sequence is "aac", this method returns 2,
|
||||
* since the first 2 characters are the same.
|
||||
*/
|
||||
int32_t getCommonPrefixLength(const UnicodeString &other);
|
||||
|
||||
private:
|
||||
const UnicodeString fStr;
|
||||
int32_t fStart;
|
||||
int32_t fEnd;
|
||||
};
|
||||
|
||||
|
||||
} // namespace impl
|
||||
} // namespace numparse
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif //__NUMPARSE_STRINGSEGMENT_H__
|
||||
#endif /* #if !UCONFIG_NO_FORMATTING */
|
22
icu4c/source/i18n/numparse_types.h
Normal file
22
icu4c/source/i18n/numparse_types.h
Normal file
@ -0,0 +1,22 @@
|
||||
// © 2018 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
|
||||
#ifndef __NUMPARSE_TYPES_H__
|
||||
#define __NUMPARSE_TYPES_H__
|
||||
|
||||
#include "unicode/uobject.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
namespace numparse {
|
||||
namespace impl {
|
||||
|
||||
|
||||
} // namespace impl
|
||||
} // namespace numparse
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif //__NUMPARSE_TYPES_H__
|
||||
#endif /* #if !UCONFIG_NO_FORMATTING */
|
124
icu4c/source/i18n/numparse_unisets.cpp
Normal file
124
icu4c/source/i18n/numparse_unisets.cpp
Normal file
@ -0,0 +1,124 @@
|
||||
// © 2018 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
|
||||
|
||||
#include "numparse_unisets.h"
|
||||
#include "numparse_types.h"
|
||||
#include "umutex.h"
|
||||
#include "ucln_in.h"
|
||||
#include "unicode/uniset.h"
|
||||
|
||||
using namespace icu;
|
||||
using namespace icu::numparse;
|
||||
using namespace icu::numparse::impl;
|
||||
using namespace icu::numparse::impl::unisets;
|
||||
|
||||
|
||||
namespace {
|
||||
|
||||
UnicodeSet* gUnicodeSets[COUNT] = {};
|
||||
|
||||
UnicodeSet* computeUnion(Key k1, Key k2) {
|
||||
UnicodeSet* result = new UnicodeSet();
|
||||
if (result == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
result->addAll(*gUnicodeSets[k1]);
|
||||
result->addAll(*gUnicodeSets[k2]);
|
||||
result->freeze();
|
||||
return result;
|
||||
}
|
||||
|
||||
UnicodeSet* computeUnion(Key k1, Key k2, Key k3) {
|
||||
UnicodeSet* result = new UnicodeSet();
|
||||
if (result == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
result->addAll(*gUnicodeSets[k1]);
|
||||
result->addAll(*gUnicodeSets[k2]);
|
||||
result->addAll(*gUnicodeSets[k3]);
|
||||
result->freeze();
|
||||
return result;
|
||||
}
|
||||
|
||||
icu::UInitOnce gNumberParseUniSetsInitOnce = U_INITONCE_INITIALIZER;
|
||||
|
||||
UBool U_CALLCONV cleanupNumberParseUnitSets() {
|
||||
for (int32_t i = 0; i < COUNT; i++) {
|
||||
delete gUnicodeSets[i];
|
||||
gUnicodeSets[i] = nullptr;
|
||||
}
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
void U_CALLCONV initNumberParseUniSets(UErrorCode &status) {
|
||||
ucln_i18n_registerCleanup(UCLN_I18N_NUMPARSE_UNISETS, cleanupNumberParseUnitSets);
|
||||
#define NEW_UNISET(pattern, status) new UnicodeSet(UnicodeString(pattern), status)
|
||||
|
||||
// BiDi characters are skipped over and ignored at any point in the string, even in strict mode.
|
||||
gUnicodeSets[BIDI] = NEW_UNISET(u"[[\\u200E\\u200F\\u061C]]", status);
|
||||
|
||||
// This set was decided after discussion with icu-design@. See ticket #13309.
|
||||
// Zs+TAB is "horizontal whitespace" according to UTS #18 (blank property).
|
||||
gUnicodeSets[WHITESPACE] = NEW_UNISET(u"[[:Zs:][\\u0009]]", status);
|
||||
|
||||
gUnicodeSets[DEFAULT_IGNORABLES] = computeUnion(BIDI, WHITESPACE);
|
||||
gUnicodeSets[STRICT_IGNORABLES] = gUnicodeSets[BIDI];
|
||||
|
||||
// TODO: Re-generate these sets from the UCD. They probably haven't been updated in a while.
|
||||
gUnicodeSets[COMMA] = NEW_UNISET(u"[,،٫、︐︑﹐﹑,、]", status);
|
||||
gUnicodeSets[STRICT_COMMA] = NEW_UNISET(u"[,٫︐﹐,]", status);
|
||||
gUnicodeSets[PERIOD] = NEW_UNISET(u"[.․。︒﹒.。]", status);
|
||||
gUnicodeSets[STRICT_PERIOD] = NEW_UNISET(u"[.․﹒.。]", status);
|
||||
gUnicodeSets[OTHER_GROUPING_SEPARATORS] = NEW_UNISET(
|
||||
u"['٬‘’'\\u0020\\u00A0\\u2000-\\u200A\\u202F\\u205F\\u3000]", status);
|
||||
gUnicodeSets[ALL_SEPARATORS] = computeUnion(COMMA, PERIOD, OTHER_GROUPING_SEPARATORS);
|
||||
gUnicodeSets[STRICT_ALL_SEPARATORS] = computeUnion(
|
||||
STRICT_COMMA, STRICT_PERIOD, OTHER_GROUPING_SEPARATORS);
|
||||
|
||||
gUnicodeSets[MINUS_SIGN] = NEW_UNISET(u"[-⁻₋−➖﹣-]", status);
|
||||
gUnicodeSets[PLUS_SIGN] = NEW_UNISET(u"[+⁺₊➕﬩﹢+]", status);
|
||||
|
||||
gUnicodeSets[PERCENT_SIGN] = NEW_UNISET(u"[%٪]", status);
|
||||
gUnicodeSets[PERMILLE_SIGN] = NEW_UNISET(u"[‰؉]", status);
|
||||
gUnicodeSets[INFINITY] = NEW_UNISET(u"[∞]", status);
|
||||
|
||||
gUnicodeSets[DIGITS] = NEW_UNISET(u"[:digit:]", status);
|
||||
gUnicodeSets[NAN_LEAD] = NEW_UNISET(
|
||||
u"[NnТтmeՈոс¤НнчTtsҳ\u975e\u1002\u0e9a\u10d0\u0f68\u0644\u0646]", status);
|
||||
gUnicodeSets[SCIENTIFIC_LEAD] = NEW_UNISET(u"[Ee×·е\u0627]", status);
|
||||
gUnicodeSets[CWCF] = NEW_UNISET(u"[:CWCF:]", status);
|
||||
|
||||
gUnicodeSets[DIGITS_OR_ALL_SEPARATORS] = computeUnion(DIGITS, ALL_SEPARATORS);
|
||||
gUnicodeSets[DIGITS_OR_STRICT_ALL_SEPARATORS] = computeUnion(DIGITS, STRICT_ALL_SEPARATORS);
|
||||
|
||||
for (int32_t i = 0; i < COUNT; i++) {
|
||||
gUnicodeSets[i]->freeze();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
const UnicodeSet* unisets::get(Key key) {
|
||||
UErrorCode localStatus = U_ZERO_ERROR;
|
||||
umtx_initOnce(gNumberParseUniSetsInitOnce, &initNumberParseUniSets, localStatus);
|
||||
if (U_FAILURE(localStatus)) {
|
||||
// TODO: This returns non-null in Java, and callers assume that.
|
||||
return nullptr;
|
||||
}
|
||||
return gUnicodeSets[key];
|
||||
}
|
||||
|
||||
Key unisets::chooseFrom(UnicodeString str, Key key1) {
|
||||
return get(key1)->contains(str) ? key1 : COUNT;
|
||||
}
|
||||
|
||||
Key unisets::chooseFrom(UnicodeString str, Key key1, Key key2) {
|
||||
return get(key1)->contains(str) ? key1 : chooseFrom(str, key2);
|
||||
}
|
||||
|
||||
|
||||
#endif /* #if !UCONFIG_NO_FORMATTING */
|
72
icu4c/source/i18n/numparse_unisets.h
Normal file
72
icu4c/source/i18n/numparse_unisets.h
Normal file
@ -0,0 +1,72 @@
|
||||
// © 2018 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
|
||||
#ifndef __NUMPARSE_UNISETS_H__
|
||||
#define __NUMPARSE_UNISETS_H__
|
||||
|
||||
#include "numparse_types.h"
|
||||
#include "unicode/uniset.h"
|
||||
|
||||
U_NAMESPACE_BEGIN namespace numparse {
|
||||
namespace impl {
|
||||
namespace unisets {
|
||||
|
||||
enum Key {
|
||||
// Ignorables
|
||||
BIDI,
|
||||
WHITESPACE,
|
||||
DEFAULT_IGNORABLES,
|
||||
STRICT_IGNORABLES,
|
||||
|
||||
// Separators
|
||||
// Notes:
|
||||
// - COMMA is a superset of STRICT_COMMA
|
||||
// - PERIOD is a superset of SCRICT_PERIOD
|
||||
// - ALL_SEPARATORS is the union of COMMA, PERIOD, and OTHER_GROUPING_SEPARATORS
|
||||
// - STRICT_ALL_SEPARATORS is the union of STRICT_COMMA, STRICT_PERIOD, and OTHER_GRP_SEPARATORS
|
||||
COMMA,
|
||||
PERIOD,
|
||||
STRICT_COMMA,
|
||||
STRICT_PERIOD,
|
||||
OTHER_GROUPING_SEPARATORS,
|
||||
ALL_SEPARATORS,
|
||||
STRICT_ALL_SEPARATORS,
|
||||
|
||||
// Symbols
|
||||
// TODO: NaN?
|
||||
MINUS_SIGN,
|
||||
PLUS_SIGN,
|
||||
PERCENT_SIGN,
|
||||
PERMILLE_SIGN,
|
||||
INFINITY,
|
||||
|
||||
// Other
|
||||
DIGITS,
|
||||
NAN_LEAD,
|
||||
SCIENTIFIC_LEAD,
|
||||
CWCF,
|
||||
|
||||
// Combined Separators with Digits (for lead code points)
|
||||
DIGITS_OR_ALL_SEPARATORS,
|
||||
DIGITS_OR_STRICT_ALL_SEPARATORS,
|
||||
|
||||
// The number of elements in the enum. Also used to indicate null.
|
||||
COUNT
|
||||
};
|
||||
|
||||
const UnicodeSet* get(Key key);
|
||||
|
||||
Key chooseFrom(UnicodeString str, Key key1);
|
||||
|
||||
Key chooseFrom(UnicodeString str, Key key1, Key key2);
|
||||
|
||||
} // namespace unisets
|
||||
} // namespace impl
|
||||
} // namespace numparse
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif //__NUMPARSE_UNISETS_H__
|
||||
#endif /* #if !UCONFIG_NO_FORMATTING */
|
@ -26,6 +26,7 @@ as the functions are suppose to be called.
|
||||
It's usually best to have child dependencies called first. */
|
||||
typedef enum ECleanupI18NType {
|
||||
UCLN_I18N_START = -1,
|
||||
UCLN_I18N_NUMPARSE_UNISETS,
|
||||
UCLN_I18N_CURRENCY_SPACING,
|
||||
UCLN_I18N_SPOOF,
|
||||
UCLN_I18N_SPOOFDATA,
|
||||
|
@ -64,7 +64,7 @@ scientificnumberformattertest.o datadrivennumberformattestsuite.o \
|
||||
numberformattesttuple.o numberformat2test.o pluralmaptest.o \
|
||||
numbertest_affixutils.o numbertest_api.o numbertest_decimalquantity.o \
|
||||
numbertest_modifiers.o numbertest_patternmodifier.o numbertest_patternstring.o \
|
||||
numbertest_stringbuilder.o
|
||||
numbertest_stringbuilder.o numbertest_stringsegment.o numbertest_unisets.o
|
||||
|
||||
DEPS = $(OBJECTS:.o=.d)
|
||||
|
||||
|
@ -9,9 +9,13 @@
|
||||
#include "number_stringbuilder.h"
|
||||
#include "intltest.h"
|
||||
#include "number_affixutils.h"
|
||||
#include "numparse_stringsegment.h"
|
||||
#include "unicode/locid.h"
|
||||
|
||||
using namespace icu::number;
|
||||
using namespace icu::number::impl;
|
||||
using namespace icu::numparse;
|
||||
using namespace icu::numparse::impl;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////
|
||||
// INSTRUCTIONS: //
|
||||
@ -178,6 +182,30 @@ class NumberStringBuilderTest : public IntlTest {
|
||||
void assertEqualsImpl(const UnicodeString &a, const NumberStringBuilder &b);
|
||||
};
|
||||
|
||||
class StringSegmentTest : public IntlTest {
|
||||
public:
|
||||
void testOffset();
|
||||
void testLength();
|
||||
void testCharAt();
|
||||
void testGetCodePoint();
|
||||
void testCommonPrefixLength();
|
||||
|
||||
void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par = 0);
|
||||
};
|
||||
|
||||
class UniSetsTest : public IntlTest {
|
||||
public:
|
||||
void testSetCoverage();
|
||||
|
||||
void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par = 0);
|
||||
|
||||
private:
|
||||
void assertInSet(const UnicodeString& localeName, const UnicodeString &setName,
|
||||
const UnicodeSet& set, const UnicodeString& str);
|
||||
void assertInSet(const UnicodeString& localeName, const UnicodeString &setName,
|
||||
const UnicodeSet& set, UChar32 cp);
|
||||
};
|
||||
|
||||
|
||||
// NOTE: This macro is identical to the one in itformat.cpp
|
||||
#define TESTCLASS(id, TestClass) \
|
||||
@ -206,6 +234,8 @@ class NumberTest : public IntlTest {
|
||||
TESTCLASS(4, PatternModifierTest);
|
||||
TESTCLASS(5, PatternStringTest);
|
||||
TESTCLASS(6, NumberStringBuilderTest);
|
||||
TESTCLASS(7, StringSegmentTest);
|
||||
TESTCLASS(8, UniSetsTest);
|
||||
default: name = ""; break; // needed to end loop
|
||||
}
|
||||
}
|
||||
|
94
icu4c/source/test/intltest/numbertest_stringsegment.cpp
Normal file
94
icu4c/source/test/intltest/numbertest_stringsegment.cpp
Normal file
@ -0,0 +1,94 @@
|
||||
// © 2018 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
|
||||
|
||||
#include "numbertest.h"
|
||||
#include "numparse_stringsegment.h"
|
||||
|
||||
static const char16_t* SAMPLE_STRING = u"📻 radio 📻";
|
||||
|
||||
void StringSegmentTest::runIndexedTest(int32_t index, UBool exec, const char*&name, char*) {
|
||||
if (exec) {
|
||||
logln("TestSuite StringSegmentTest: ");
|
||||
}
|
||||
TESTCASE_AUTO_BEGIN;
|
||||
TESTCASE_AUTO(testOffset);
|
||||
TESTCASE_AUTO(testLength);
|
||||
TESTCASE_AUTO(testCharAt);
|
||||
TESTCASE_AUTO(testGetCodePoint);
|
||||
TESTCASE_AUTO(testCommonPrefixLength);
|
||||
TESTCASE_AUTO_END;
|
||||
}
|
||||
|
||||
void StringSegmentTest::testOffset() {
|
||||
StringSegment segment(SAMPLE_STRING);
|
||||
assertEquals("Initial Offset", 0, segment.getOffset());
|
||||
segment.adjustOffset(3);
|
||||
assertEquals("Adjust A", 3, segment.getOffset());
|
||||
segment.adjustOffset(2);
|
||||
assertEquals("Adjust B", 5, segment.getOffset());
|
||||
segment.setOffset(4);
|
||||
assertEquals("Set Offset", 4, segment.getOffset());
|
||||
}
|
||||
|
||||
void StringSegmentTest::testLength() {
|
||||
StringSegment segment(SAMPLE_STRING);
|
||||
assertEquals("Initial length", 11, segment.length());
|
||||
segment.adjustOffset(3);
|
||||
assertEquals("Adjust", 8, segment.length());
|
||||
segment.setLength(4);
|
||||
assertEquals("Set Length", 4, segment.length());
|
||||
segment.setOffset(5);
|
||||
assertEquals("After adjust offset", 2, segment.length());
|
||||
segment.resetLength();
|
||||
assertEquals("After reset length", 6, segment.length());
|
||||
}
|
||||
|
||||
void StringSegmentTest::testCharAt() {
|
||||
StringSegment segment(SAMPLE_STRING);
|
||||
assertEquals("Initial", SAMPLE_STRING, segment.toUnicodeString());
|
||||
segment.adjustOffset(3);
|
||||
assertEquals("After adjust-offset", UnicodeString(u"radio 📻"), segment.toUnicodeString());
|
||||
segment.setLength(5);
|
||||
assertEquals("After adjust-length", UnicodeString(u"radio"), segment.toUnicodeString());
|
||||
}
|
||||
|
||||
void StringSegmentTest::testGetCodePoint() {
|
||||
StringSegment segment(SAMPLE_STRING);
|
||||
assertEquals("Double-width code point", 0x1F4FB, segment.getCodePoint());
|
||||
segment.setLength(1);
|
||||
assertEquals("Inalid A", -1, segment.getCodePoint());
|
||||
segment.resetLength();
|
||||
segment.adjustOffset(1);
|
||||
assertEquals("Invalid B", -1, segment.getCodePoint());
|
||||
segment.adjustOffset(1);
|
||||
assertEquals("Valid again", 0x20, segment.getCodePoint());
|
||||
}
|
||||
|
||||
void StringSegmentTest::testCommonPrefixLength() {
|
||||
StringSegment segment(SAMPLE_STRING);
|
||||
assertEquals("", 11, segment.getCommonPrefixLength(SAMPLE_STRING));
|
||||
assertEquals("", 4, segment.getCommonPrefixLength(u"📻 r"));
|
||||
assertEquals("", 3, segment.getCommonPrefixLength(u"📻 x"));
|
||||
assertEquals("", 0, segment.getCommonPrefixLength(u"x"));
|
||||
assertEquals("", 0, segment.getCommonPrefixLength(u""));
|
||||
segment.adjustOffset(3);
|
||||
assertEquals("", 0, segment.getCommonPrefixLength(u"RADiO"));
|
||||
assertEquals("", 5, segment.getCommonPrefixLength(u"radio"));
|
||||
assertEquals("", 2, segment.getCommonPrefixLength(u"rafio"));
|
||||
assertEquals("", 0, segment.getCommonPrefixLength(u"fadio"));
|
||||
assertEquals("", 0, segment.getCommonPrefixLength(u""));
|
||||
segment.setLength(3);
|
||||
assertEquals("", 3, segment.getCommonPrefixLength(u"radio"));
|
||||
assertEquals("", 2, segment.getCommonPrefixLength(u"rafio"));
|
||||
assertEquals("", 0, segment.getCommonPrefixLength(u"fadio"));
|
||||
assertEquals("", 0, segment.getCommonPrefixLength(u""));
|
||||
segment.resetLength();
|
||||
segment.setOffset(11); // end of string
|
||||
assertEquals("", 0, segment.getCommonPrefixLength(u"foo"));
|
||||
}
|
||||
|
||||
#endif
|
99
icu4c/source/test/intltest/numbertest_unisets.cpp
Normal file
99
icu4c/source/test/intltest/numbertest_unisets.cpp
Normal file
@ -0,0 +1,99 @@
|
||||
// © 2018 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
|
||||
|
||||
#include "numbertest.h"
|
||||
#include "numparse_unisets.h"
|
||||
#include "unicode/dcfmtsym.h"
|
||||
|
||||
#include <iostream>
|
||||
#include <cstr.h>
|
||||
|
||||
using icu::numparse::impl::unisets::get;
|
||||
|
||||
void UniSetsTest::runIndexedTest(int32_t index, UBool exec, const char*&name, char*) {
|
||||
if (exec) {
|
||||
logln("TestSuite UniSetsTest: ");
|
||||
}
|
||||
TESTCASE_AUTO_BEGIN;
|
||||
TESTCASE_AUTO(testSetCoverage);
|
||||
TESTCASE_AUTO_END;
|
||||
}
|
||||
|
||||
void UniSetsTest::testSetCoverage() {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
|
||||
// Lenient comma/period should be supersets of strict comma/period;
|
||||
// it also makes the coverage logic cheaper.
|
||||
assertTrue(
|
||||
"COMMA should be superset of STRICT_COMMA",
|
||||
get(unisets::COMMA)->containsAll(*get(unisets::STRICT_COMMA)));
|
||||
assertTrue(
|
||||
"PERIOD should be superset of STRICT_PERIOD",
|
||||
get(unisets::PERIOD)->containsAll(*get(unisets::STRICT_PERIOD)));
|
||||
|
||||
UnicodeSet decimals;
|
||||
decimals.addAll(*get(unisets::STRICT_COMMA));
|
||||
decimals.addAll(*get(unisets::STRICT_PERIOD));
|
||||
decimals.freeze();
|
||||
UnicodeSet grouping;
|
||||
grouping.addAll(decimals);
|
||||
grouping.addAll(*get(unisets::OTHER_GROUPING_SEPARATORS));
|
||||
decimals.freeze();
|
||||
|
||||
const UnicodeSet &plusSign = *get(unisets::PLUS_SIGN);
|
||||
const UnicodeSet &minusSign = *get(unisets::MINUS_SIGN);
|
||||
const UnicodeSet &percent = *get(unisets::PERCENT_SIGN);
|
||||
const UnicodeSet &permille = *get(unisets::PERMILLE_SIGN);
|
||||
const UnicodeSet &infinity = *get(unisets::INFINITY);
|
||||
const UnicodeSet &nanLead = *get(unisets::NAN_LEAD);
|
||||
const UnicodeSet &scientificLead = *get(unisets::SCIENTIFIC_LEAD);
|
||||
|
||||
int32_t localeCount;
|
||||
const Locale* allAvailableLocales = Locale::getAvailableLocales(localeCount);
|
||||
for (int32_t i = 0; i < localeCount; i++) {
|
||||
Locale locale = allAvailableLocales[i];
|
||||
DecimalFormatSymbols dfs(locale, status);
|
||||
UnicodeString localeName;
|
||||
locale.getDisplayName(localeName);
|
||||
assertSuccess(UnicodeString("Making DFS for ") + localeName, status);
|
||||
|
||||
#define ASSERT_IN_SET(name, foo) assertInSet(localeName, UnicodeString("" #name ""), name, foo)
|
||||
ASSERT_IN_SET(decimals, dfs.getConstSymbol(DecimalFormatSymbols::kDecimalSeparatorSymbol));
|
||||
ASSERT_IN_SET(grouping, dfs.getConstSymbol(DecimalFormatSymbols::kGroupingSeparatorSymbol));
|
||||
ASSERT_IN_SET(plusSign, dfs.getConstSymbol(DecimalFormatSymbols::kPlusSignSymbol));
|
||||
ASSERT_IN_SET(minusSign, dfs.getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol));
|
||||
ASSERT_IN_SET(percent, dfs.getConstSymbol(DecimalFormatSymbols::kPercentSymbol));
|
||||
ASSERT_IN_SET(permille, dfs.getConstSymbol(DecimalFormatSymbols::kPerMillSymbol));
|
||||
ASSERT_IN_SET(infinity, dfs.getConstSymbol(DecimalFormatSymbols::kInfinitySymbol));
|
||||
ASSERT_IN_SET(nanLead, dfs.getConstSymbol(DecimalFormatSymbols::kNaNSymbol).char32At(0));
|
||||
ASSERT_IN_SET(nanLead,
|
||||
u_foldCase(dfs.getConstSymbol(DecimalFormatSymbols::kNaNSymbol).char32At(0), 0));
|
||||
ASSERT_IN_SET(scientificLead,
|
||||
u_foldCase(dfs.getConstSymbol(DecimalFormatSymbols::kExponentialSymbol).char32At(0), 0));
|
||||
}
|
||||
}
|
||||
|
||||
void UniSetsTest::assertInSet(const UnicodeString &localeName, const UnicodeString &setName,
|
||||
const UnicodeSet &set, const UnicodeString &str) {
|
||||
if (str.countChar32(0, str.length()) != 1) {
|
||||
// Ignore locale strings with more than one code point (usually a bidi mark)
|
||||
return;
|
||||
}
|
||||
assertInSet(localeName, setName, set, str.char32At(0));
|
||||
}
|
||||
|
||||
void UniSetsTest::assertInSet(const UnicodeString &localeName, const UnicodeString &setName,
|
||||
const UnicodeSet &set, UChar32 cp) {
|
||||
// If this test case fails, add the specified code point to the corresponding set in
|
||||
// UnicodeSetStaticCache.java and numparse_unisets.cpp
|
||||
assertTrue(
|
||||
localeName + UnicodeString(u" ") + UnicodeString(cp) + UnicodeString(u" is missing in ") +
|
||||
setName, set.contains(cp));
|
||||
}
|
||||
|
||||
|
||||
#endif
|
Loading…
Reference in New Issue
Block a user