ICU-13574 Porting the parsing utility classes StringSegment and UnicodeSetStaticCache to C++.

X-SVN-Rev: 40841
This commit is contained in:
Shane Carr 2018-02-06 07:52:58 +00:00
parent e5cc630590
commit 237acf183a
11 changed files with 603 additions and 2 deletions

View File

@ -107,7 +107,8 @@ number_affixutils.o number_compact.o number_decimalquantity.o \
number_decimfmtprops.o number_fluent.o number_formatimpl.o number_grouping.o \
number_integerwidth.o number_longnames.o number_modifiers.o number_notation.o \
number_padding.o number_patternmodifier.o number_patternstring.o \
number_rounding.o number_scientific.o number_stringbuilder.o
number_rounding.o number_scientific.o number_stringbuilder.o \
numparse_stringsegment.o numparse_unisets.o
## Header files to install

View File

@ -0,0 +1,79 @@
// © 2018 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#include "unicode/utypes.h"
#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
#include "numparse_types.h"
#include "numparse_stringsegment.h"
#include "putilimp.h"
#include "unicode/utf16.h"
using namespace icu;
using namespace icu::numparse;
using namespace icu::numparse::impl;
StringSegment::StringSegment(const UnicodeString &str) : fStr(str), fStart(0), fEnd(str.length()) {}
int32_t StringSegment::getOffset() const {
return fStart;
}
void StringSegment::setOffset(int32_t start) {
fStart = start;
}
void StringSegment::adjustOffset(int32_t delta) {
fStart += delta;
}
void StringSegment::setLength(int32_t length) {
fEnd = fStart + length;
}
void StringSegment::resetLength() {
fEnd = fStr.length();
}
int32_t StringSegment::length() const {
return fEnd - fStart;
}
char16_t StringSegment::charAt(int32_t index) const {
return fStr.charAt(index + fStart);
}
UChar32 StringSegment::codePointAt(int32_t index) const {
return fStr.char32At(index + fStart);
}
UnicodeString StringSegment::toUnicodeString() const {
return UnicodeString(fStr, fStart, fEnd - fStart);
}
UChar32 StringSegment::getCodePoint() const {
char16_t lead = fStr.charAt(fStart);
if (U16_IS_LEAD(lead) && fStart + 1 < fEnd) {
return fStr.char32At(fStart);
} else if (U16_IS_SURROGATE(lead)) {
return -1;
} else {
return lead;
}
}
int32_t StringSegment::getCommonPrefixLength(const UnicodeString &other) {
int32_t offset = 0;
for (; offset < uprv_min(length(), other.length());) {
if (charAt(offset) != other.charAt(offset)) {
break;
}
offset++;
}
return offset;
}
#endif /* #if !UCONFIG_NO_FORMATTING */

View File

@ -0,0 +1,79 @@
// © 2018 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#include "unicode/utypes.h"
#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
#ifndef __NUMPARSE_STRINGSEGMENT_H__
#define __NUMPARSE_STRINGSEGMENT_H__
#include "numparse_types.h"
#include "number_types.h"
#include "unicode/unistr.h"
U_NAMESPACE_BEGIN
namespace numparse {
namespace impl {
/**
* A mutable class allowing for a String with a variable offset and length. The charAt, length, and
* subSequence methods all operate relative to the fixed offset into the String.
*
* @author sffc
*/
class StringSegment : public UMemory, public ::icu::number::impl::CharSequence {
public:
explicit StringSegment(const UnicodeString &str);
int32_t getOffset() const;
void setOffset(int32_t start);
/**
* Equivalent to <code>setOffset(getOffset()+delta)</code>.
*
* <p>
* This method is usually called by a Matcher to register that a char was consumed. If the char is
* strong (it usually is, except for things like whitespace), follow this with a call to
* {@link ParsedNumber#setCharsConsumed}. For more information on strong chars, see that method.
*/
void adjustOffset(int32_t delta);
void setLength(int32_t length);
void resetLength();
int32_t length() const override;
char16_t charAt(int32_t index) const override;
UChar32 codePointAt(int32_t index) const override;
UnicodeString toUnicodeString() const override;
/**
* Returns the first code point in the string segment, or -1 if the string starts with an invalid
* code point.
*/
UChar32 getCodePoint() const;
/**
* Returns the length of the prefix shared by this StringSegment and the given CharSequence. For
* example, if this string segment is "aab", and the char sequence is "aac", this method returns 2,
* since the first 2 characters are the same.
*/
int32_t getCommonPrefixLength(const UnicodeString &other);
private:
const UnicodeString fStr;
int32_t fStart;
int32_t fEnd;
};
} // namespace impl
} // namespace numparse
U_NAMESPACE_END
#endif //__NUMPARSE_STRINGSEGMENT_H__
#endif /* #if !UCONFIG_NO_FORMATTING */

View File

@ -0,0 +1,22 @@
// © 2018 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#include "unicode/utypes.h"
#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
#ifndef __NUMPARSE_TYPES_H__
#define __NUMPARSE_TYPES_H__
#include "unicode/uobject.h"
U_NAMESPACE_BEGIN
namespace numparse {
namespace impl {
} // namespace impl
} // namespace numparse
U_NAMESPACE_END
#endif //__NUMPARSE_TYPES_H__
#endif /* #if !UCONFIG_NO_FORMATTING */

View File

@ -0,0 +1,124 @@
// © 2018 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#include "unicode/utypes.h"
#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
#include "numparse_unisets.h"
#include "numparse_types.h"
#include "umutex.h"
#include "ucln_in.h"
#include "unicode/uniset.h"
using namespace icu;
using namespace icu::numparse;
using namespace icu::numparse::impl;
using namespace icu::numparse::impl::unisets;
namespace {
UnicodeSet* gUnicodeSets[COUNT] = {};
UnicodeSet* computeUnion(Key k1, Key k2) {
UnicodeSet* result = new UnicodeSet();
if (result == nullptr) {
return nullptr;
}
result->addAll(*gUnicodeSets[k1]);
result->addAll(*gUnicodeSets[k2]);
result->freeze();
return result;
}
UnicodeSet* computeUnion(Key k1, Key k2, Key k3) {
UnicodeSet* result = new UnicodeSet();
if (result == nullptr) {
return nullptr;
}
result->addAll(*gUnicodeSets[k1]);
result->addAll(*gUnicodeSets[k2]);
result->addAll(*gUnicodeSets[k3]);
result->freeze();
return result;
}
icu::UInitOnce gNumberParseUniSetsInitOnce = U_INITONCE_INITIALIZER;
UBool U_CALLCONV cleanupNumberParseUnitSets() {
for (int32_t i = 0; i < COUNT; i++) {
delete gUnicodeSets[i];
gUnicodeSets[i] = nullptr;
}
return TRUE;
}
void U_CALLCONV initNumberParseUniSets(UErrorCode &status) {
ucln_i18n_registerCleanup(UCLN_I18N_NUMPARSE_UNISETS, cleanupNumberParseUnitSets);
#define NEW_UNISET(pattern, status) new UnicodeSet(UnicodeString(pattern), status)
// BiDi characters are skipped over and ignored at any point in the string, even in strict mode.
gUnicodeSets[BIDI] = NEW_UNISET(u"[[\\u200E\\u200F\\u061C]]", status);
// This set was decided after discussion with icu-design@. See ticket #13309.
// Zs+TAB is "horizontal whitespace" according to UTS #18 (blank property).
gUnicodeSets[WHITESPACE] = NEW_UNISET(u"[[:Zs:][\\u0009]]", status);
gUnicodeSets[DEFAULT_IGNORABLES] = computeUnion(BIDI, WHITESPACE);
gUnicodeSets[STRICT_IGNORABLES] = gUnicodeSets[BIDI];
// TODO: Re-generate these sets from the UCD. They probably haven't been updated in a while.
gUnicodeSets[COMMA] = NEW_UNISET(u"[,،٫、︐︑﹐﹑,、]", status);
gUnicodeSets[STRICT_COMMA] = NEW_UNISET(u"[,٫︐﹐,]", status);
gUnicodeSets[PERIOD] = NEW_UNISET(u"[.․。︒﹒.。]", status);
gUnicodeSets[STRICT_PERIOD] = NEW_UNISET(u"[.․﹒.。]", status);
gUnicodeSets[OTHER_GROUPING_SEPARATORS] = NEW_UNISET(
u"['٬‘’'\\u0020\\u00A0\\u2000-\\u200A\\u202F\\u205F\\u3000]", status);
gUnicodeSets[ALL_SEPARATORS] = computeUnion(COMMA, PERIOD, OTHER_GROUPING_SEPARATORS);
gUnicodeSets[STRICT_ALL_SEPARATORS] = computeUnion(
STRICT_COMMA, STRICT_PERIOD, OTHER_GROUPING_SEPARATORS);
gUnicodeSets[MINUS_SIGN] = NEW_UNISET(u"[-⁻₋−➖﹣-]", status);
gUnicodeSets[PLUS_SIGN] = NEW_UNISET(u"[+⁺₊➕﬩﹢+]", status);
gUnicodeSets[PERCENT_SIGN] = NEW_UNISET(u"[%٪]", status);
gUnicodeSets[PERMILLE_SIGN] = NEW_UNISET(u"[‰؉]", status);
gUnicodeSets[INFINITY] = NEW_UNISET(u"[∞]", status);
gUnicodeSets[DIGITS] = NEW_UNISET(u"[:digit:]", status);
gUnicodeSets[NAN_LEAD] = NEW_UNISET(
u"[NnТтmeՈոс¤НнчTtsҳ\u975e\u1002\u0e9a\u10d0\u0f68\u0644\u0646]", status);
gUnicodeSets[SCIENTIFIC_LEAD] = NEW_UNISET(u"[Ee×·е\u0627]", status);
gUnicodeSets[CWCF] = NEW_UNISET(u"[:CWCF:]", status);
gUnicodeSets[DIGITS_OR_ALL_SEPARATORS] = computeUnion(DIGITS, ALL_SEPARATORS);
gUnicodeSets[DIGITS_OR_STRICT_ALL_SEPARATORS] = computeUnion(DIGITS, STRICT_ALL_SEPARATORS);
for (int32_t i = 0; i < COUNT; i++) {
gUnicodeSets[i]->freeze();
}
}
}
const UnicodeSet* unisets::get(Key key) {
UErrorCode localStatus = U_ZERO_ERROR;
umtx_initOnce(gNumberParseUniSetsInitOnce, &initNumberParseUniSets, localStatus);
if (U_FAILURE(localStatus)) {
// TODO: This returns non-null in Java, and callers assume that.
return nullptr;
}
return gUnicodeSets[key];
}
Key unisets::chooseFrom(UnicodeString str, Key key1) {
return get(key1)->contains(str) ? key1 : COUNT;
}
Key unisets::chooseFrom(UnicodeString str, Key key1, Key key2) {
return get(key1)->contains(str) ? key1 : chooseFrom(str, key2);
}
#endif /* #if !UCONFIG_NO_FORMATTING */

View File

@ -0,0 +1,72 @@
// © 2018 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#include "unicode/utypes.h"
#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
#ifndef __NUMPARSE_UNISETS_H__
#define __NUMPARSE_UNISETS_H__
#include "numparse_types.h"
#include "unicode/uniset.h"
U_NAMESPACE_BEGIN namespace numparse {
namespace impl {
namespace unisets {
enum Key {
// Ignorables
BIDI,
WHITESPACE,
DEFAULT_IGNORABLES,
STRICT_IGNORABLES,
// Separators
// Notes:
// - COMMA is a superset of STRICT_COMMA
// - PERIOD is a superset of SCRICT_PERIOD
// - ALL_SEPARATORS is the union of COMMA, PERIOD, and OTHER_GROUPING_SEPARATORS
// - STRICT_ALL_SEPARATORS is the union of STRICT_COMMA, STRICT_PERIOD, and OTHER_GRP_SEPARATORS
COMMA,
PERIOD,
STRICT_COMMA,
STRICT_PERIOD,
OTHER_GROUPING_SEPARATORS,
ALL_SEPARATORS,
STRICT_ALL_SEPARATORS,
// Symbols
// TODO: NaN?
MINUS_SIGN,
PLUS_SIGN,
PERCENT_SIGN,
PERMILLE_SIGN,
INFINITY,
// Other
DIGITS,
NAN_LEAD,
SCIENTIFIC_LEAD,
CWCF,
// Combined Separators with Digits (for lead code points)
DIGITS_OR_ALL_SEPARATORS,
DIGITS_OR_STRICT_ALL_SEPARATORS,
// The number of elements in the enum. Also used to indicate null.
COUNT
};
const UnicodeSet* get(Key key);
Key chooseFrom(UnicodeString str, Key key1);
Key chooseFrom(UnicodeString str, Key key1, Key key2);
} // namespace unisets
} // namespace impl
} // namespace numparse
U_NAMESPACE_END
#endif //__NUMPARSE_UNISETS_H__
#endif /* #if !UCONFIG_NO_FORMATTING */

View File

@ -26,6 +26,7 @@ as the functions are suppose to be called.
It's usually best to have child dependencies called first. */
typedef enum ECleanupI18NType {
UCLN_I18N_START = -1,
UCLN_I18N_NUMPARSE_UNISETS,
UCLN_I18N_CURRENCY_SPACING,
UCLN_I18N_SPOOF,
UCLN_I18N_SPOOFDATA,

View File

@ -64,7 +64,7 @@ scientificnumberformattertest.o datadrivennumberformattestsuite.o \
numberformattesttuple.o numberformat2test.o pluralmaptest.o \
numbertest_affixutils.o numbertest_api.o numbertest_decimalquantity.o \
numbertest_modifiers.o numbertest_patternmodifier.o numbertest_patternstring.o \
numbertest_stringbuilder.o
numbertest_stringbuilder.o numbertest_stringsegment.o numbertest_unisets.o
DEPS = $(OBJECTS:.o=.d)

View File

@ -9,9 +9,13 @@
#include "number_stringbuilder.h"
#include "intltest.h"
#include "number_affixutils.h"
#include "numparse_stringsegment.h"
#include "unicode/locid.h"
using namespace icu::number;
using namespace icu::number::impl;
using namespace icu::numparse;
using namespace icu::numparse::impl;
////////////////////////////////////////////////////////////////////////////////////////
// INSTRUCTIONS: //
@ -178,6 +182,30 @@ class NumberStringBuilderTest : public IntlTest {
void assertEqualsImpl(const UnicodeString &a, const NumberStringBuilder &b);
};
class StringSegmentTest : public IntlTest {
public:
void testOffset();
void testLength();
void testCharAt();
void testGetCodePoint();
void testCommonPrefixLength();
void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par = 0);
};
class UniSetsTest : public IntlTest {
public:
void testSetCoverage();
void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par = 0);
private:
void assertInSet(const UnicodeString& localeName, const UnicodeString &setName,
const UnicodeSet& set, const UnicodeString& str);
void assertInSet(const UnicodeString& localeName, const UnicodeString &setName,
const UnicodeSet& set, UChar32 cp);
};
// NOTE: This macro is identical to the one in itformat.cpp
#define TESTCLASS(id, TestClass) \
@ -206,6 +234,8 @@ class NumberTest : public IntlTest {
TESTCLASS(4, PatternModifierTest);
TESTCLASS(5, PatternStringTest);
TESTCLASS(6, NumberStringBuilderTest);
TESTCLASS(7, StringSegmentTest);
TESTCLASS(8, UniSetsTest);
default: name = ""; break; // needed to end loop
}
}

View File

@ -0,0 +1,94 @@
// © 2018 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#include "unicode/utypes.h"
#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
#include "numbertest.h"
#include "numparse_stringsegment.h"
static const char16_t* SAMPLE_STRING = u"📻 radio 📻";
void StringSegmentTest::runIndexedTest(int32_t index, UBool exec, const char*&name, char*) {
if (exec) {
logln("TestSuite StringSegmentTest: ");
}
TESTCASE_AUTO_BEGIN;
TESTCASE_AUTO(testOffset);
TESTCASE_AUTO(testLength);
TESTCASE_AUTO(testCharAt);
TESTCASE_AUTO(testGetCodePoint);
TESTCASE_AUTO(testCommonPrefixLength);
TESTCASE_AUTO_END;
}
void StringSegmentTest::testOffset() {
StringSegment segment(SAMPLE_STRING);
assertEquals("Initial Offset", 0, segment.getOffset());
segment.adjustOffset(3);
assertEquals("Adjust A", 3, segment.getOffset());
segment.adjustOffset(2);
assertEquals("Adjust B", 5, segment.getOffset());
segment.setOffset(4);
assertEquals("Set Offset", 4, segment.getOffset());
}
void StringSegmentTest::testLength() {
StringSegment segment(SAMPLE_STRING);
assertEquals("Initial length", 11, segment.length());
segment.adjustOffset(3);
assertEquals("Adjust", 8, segment.length());
segment.setLength(4);
assertEquals("Set Length", 4, segment.length());
segment.setOffset(5);
assertEquals("After adjust offset", 2, segment.length());
segment.resetLength();
assertEquals("After reset length", 6, segment.length());
}
void StringSegmentTest::testCharAt() {
StringSegment segment(SAMPLE_STRING);
assertEquals("Initial", SAMPLE_STRING, segment.toUnicodeString());
segment.adjustOffset(3);
assertEquals("After adjust-offset", UnicodeString(u"radio 📻"), segment.toUnicodeString());
segment.setLength(5);
assertEquals("After adjust-length", UnicodeString(u"radio"), segment.toUnicodeString());
}
void StringSegmentTest::testGetCodePoint() {
StringSegment segment(SAMPLE_STRING);
assertEquals("Double-width code point", 0x1F4FB, segment.getCodePoint());
segment.setLength(1);
assertEquals("Inalid A", -1, segment.getCodePoint());
segment.resetLength();
segment.adjustOffset(1);
assertEquals("Invalid B", -1, segment.getCodePoint());
segment.adjustOffset(1);
assertEquals("Valid again", 0x20, segment.getCodePoint());
}
void StringSegmentTest::testCommonPrefixLength() {
StringSegment segment(SAMPLE_STRING);
assertEquals("", 11, segment.getCommonPrefixLength(SAMPLE_STRING));
assertEquals("", 4, segment.getCommonPrefixLength(u"📻 r"));
assertEquals("", 3, segment.getCommonPrefixLength(u"📻 x"));
assertEquals("", 0, segment.getCommonPrefixLength(u"x"));
assertEquals("", 0, segment.getCommonPrefixLength(u""));
segment.adjustOffset(3);
assertEquals("", 0, segment.getCommonPrefixLength(u"RADiO"));
assertEquals("", 5, segment.getCommonPrefixLength(u"radio"));
assertEquals("", 2, segment.getCommonPrefixLength(u"rafio"));
assertEquals("", 0, segment.getCommonPrefixLength(u"fadio"));
assertEquals("", 0, segment.getCommonPrefixLength(u""));
segment.setLength(3);
assertEquals("", 3, segment.getCommonPrefixLength(u"radio"));
assertEquals("", 2, segment.getCommonPrefixLength(u"rafio"));
assertEquals("", 0, segment.getCommonPrefixLength(u"fadio"));
assertEquals("", 0, segment.getCommonPrefixLength(u""));
segment.resetLength();
segment.setOffset(11); // end of string
assertEquals("", 0, segment.getCommonPrefixLength(u"foo"));
}
#endif

View File

@ -0,0 +1,99 @@
// © 2018 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#include "unicode/utypes.h"
#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
#include "numbertest.h"
#include "numparse_unisets.h"
#include "unicode/dcfmtsym.h"
#include <iostream>
#include <cstr.h>
using icu::numparse::impl::unisets::get;
void UniSetsTest::runIndexedTest(int32_t index, UBool exec, const char*&name, char*) {
if (exec) {
logln("TestSuite UniSetsTest: ");
}
TESTCASE_AUTO_BEGIN;
TESTCASE_AUTO(testSetCoverage);
TESTCASE_AUTO_END;
}
void UniSetsTest::testSetCoverage() {
UErrorCode status = U_ZERO_ERROR;
// Lenient comma/period should be supersets of strict comma/period;
// it also makes the coverage logic cheaper.
assertTrue(
"COMMA should be superset of STRICT_COMMA",
get(unisets::COMMA)->containsAll(*get(unisets::STRICT_COMMA)));
assertTrue(
"PERIOD should be superset of STRICT_PERIOD",
get(unisets::PERIOD)->containsAll(*get(unisets::STRICT_PERIOD)));
UnicodeSet decimals;
decimals.addAll(*get(unisets::STRICT_COMMA));
decimals.addAll(*get(unisets::STRICT_PERIOD));
decimals.freeze();
UnicodeSet grouping;
grouping.addAll(decimals);
grouping.addAll(*get(unisets::OTHER_GROUPING_SEPARATORS));
decimals.freeze();
const UnicodeSet &plusSign = *get(unisets::PLUS_SIGN);
const UnicodeSet &minusSign = *get(unisets::MINUS_SIGN);
const UnicodeSet &percent = *get(unisets::PERCENT_SIGN);
const UnicodeSet &permille = *get(unisets::PERMILLE_SIGN);
const UnicodeSet &infinity = *get(unisets::INFINITY);
const UnicodeSet &nanLead = *get(unisets::NAN_LEAD);
const UnicodeSet &scientificLead = *get(unisets::SCIENTIFIC_LEAD);
int32_t localeCount;
const Locale* allAvailableLocales = Locale::getAvailableLocales(localeCount);
for (int32_t i = 0; i < localeCount; i++) {
Locale locale = allAvailableLocales[i];
DecimalFormatSymbols dfs(locale, status);
UnicodeString localeName;
locale.getDisplayName(localeName);
assertSuccess(UnicodeString("Making DFS for ") + localeName, status);
#define ASSERT_IN_SET(name, foo) assertInSet(localeName, UnicodeString("" #name ""), name, foo)
ASSERT_IN_SET(decimals, dfs.getConstSymbol(DecimalFormatSymbols::kDecimalSeparatorSymbol));
ASSERT_IN_SET(grouping, dfs.getConstSymbol(DecimalFormatSymbols::kGroupingSeparatorSymbol));
ASSERT_IN_SET(plusSign, dfs.getConstSymbol(DecimalFormatSymbols::kPlusSignSymbol));
ASSERT_IN_SET(minusSign, dfs.getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol));
ASSERT_IN_SET(percent, dfs.getConstSymbol(DecimalFormatSymbols::kPercentSymbol));
ASSERT_IN_SET(permille, dfs.getConstSymbol(DecimalFormatSymbols::kPerMillSymbol));
ASSERT_IN_SET(infinity, dfs.getConstSymbol(DecimalFormatSymbols::kInfinitySymbol));
ASSERT_IN_SET(nanLead, dfs.getConstSymbol(DecimalFormatSymbols::kNaNSymbol).char32At(0));
ASSERT_IN_SET(nanLead,
u_foldCase(dfs.getConstSymbol(DecimalFormatSymbols::kNaNSymbol).char32At(0), 0));
ASSERT_IN_SET(scientificLead,
u_foldCase(dfs.getConstSymbol(DecimalFormatSymbols::kExponentialSymbol).char32At(0), 0));
}
}
void UniSetsTest::assertInSet(const UnicodeString &localeName, const UnicodeString &setName,
const UnicodeSet &set, const UnicodeString &str) {
if (str.countChar32(0, str.length()) != 1) {
// Ignore locale strings with more than one code point (usually a bidi mark)
return;
}
assertInSet(localeName, setName, set, str.char32At(0));
}
void UniSetsTest::assertInSet(const UnicodeString &localeName, const UnicodeString &setName,
const UnicodeSet &set, UChar32 cp) {
// If this test case fails, add the specified code point to the corresponding set in
// UnicodeSetStaticCache.java and numparse_unisets.cpp
assertTrue(
localeName + UnicodeString(u" ") + UnicodeString(cp) + UnicodeString(u" is missing in ") +
setName, set.contains(cp));
}
#endif