ICU-13657 Adding NumberFormatter skeleton factory method with UParseError.
- Includes small fixes to StringSegment and to tests.
This commit is contained in:
parent
85600f2639
commit
7507f5322f
@ -76,6 +76,20 @@ unumf_openForSkeletonAndLocale(const UChar* skeleton, int32_t skeletonLen, const
|
||||
return impl->exportForC();
|
||||
}
|
||||
|
||||
U_CAPI UNumberFormatter* U_EXPORT2
|
||||
unumf_openForSkeletonAndLocaleWithError(const UChar* skeleton, int32_t skeletonLen, const char* locale,
|
||||
UParseError* perror, UErrorCode* ec) {
|
||||
auto* impl = new UNumberFormatterData();
|
||||
if (impl == nullptr) {
|
||||
*ec = U_MEMORY_ALLOCATION_ERROR;
|
||||
return nullptr;
|
||||
}
|
||||
// Readonly-alias constructor (first argument is whether we are NUL-terminated)
|
||||
UnicodeString skeletonString(skeletonLen == -1, skeleton, skeletonLen);
|
||||
impl->fFormatter = NumberFormatter::forSkeleton(skeletonString, *perror, *ec).locale(locale);
|
||||
return impl->exportForC();
|
||||
}
|
||||
|
||||
U_CAPI UFormattedNumber* U_EXPORT2
|
||||
unumf_openResult(UErrorCode* ec) {
|
||||
auto* impl = new UFormattedNumberImpl();
|
||||
|
@ -347,7 +347,12 @@ LocalizedNumberFormatter NumberFormatter::withLocale(const Locale& locale) {
|
||||
|
||||
UnlocalizedNumberFormatter
|
||||
NumberFormatter::forSkeleton(const UnicodeString& skeleton, UErrorCode& status) {
|
||||
return skeleton::create(skeleton, status);
|
||||
return skeleton::create(skeleton, nullptr, status);
|
||||
}
|
||||
|
||||
UnlocalizedNumberFormatter
|
||||
NumberFormatter::forSkeleton(const UnicodeString& skeleton, UParseError& perror, UErrorCode& status) {
|
||||
return skeleton::create(skeleton, &perror, status);
|
||||
}
|
||||
|
||||
|
||||
|
@ -406,10 +406,41 @@ enum_to_stem_string::decimalSeparatorDisplay(UNumberDecimalSeparatorDisplay valu
|
||||
}
|
||||
|
||||
|
||||
UnlocalizedNumberFormatter skeleton::create(const UnicodeString& skeletonString, UErrorCode& status) {
|
||||
UnlocalizedNumberFormatter skeleton::create(
|
||||
const UnicodeString& skeletonString, UParseError* perror, UErrorCode& status) {
|
||||
|
||||
// Initialize perror
|
||||
if (perror != nullptr) {
|
||||
perror->line = 0;
|
||||
perror->offset = -1;
|
||||
perror->preContext[0] = 0;
|
||||
perror->postContext[0] = 0;
|
||||
}
|
||||
|
||||
umtx_initOnce(gNumberSkeletonsInitOnce, &initNumberSkeletons, status);
|
||||
MacroProps macros = parseSkeleton(skeletonString, status);
|
||||
return NumberFormatter::with().macros(macros);
|
||||
if (U_FAILURE(status)) {
|
||||
return {};
|
||||
}
|
||||
|
||||
int32_t errOffset;
|
||||
MacroProps macros = parseSkeleton(skeletonString, errOffset, status);
|
||||
if (U_SUCCESS(status)) {
|
||||
return NumberFormatter::with().macros(macros);
|
||||
}
|
||||
|
||||
if (perror == nullptr) {
|
||||
return {};
|
||||
}
|
||||
|
||||
// Populate the UParseError with the error location
|
||||
perror->offset = errOffset;
|
||||
int32_t contextStart = uprv_max(0, errOffset - U_PARSE_CONTEXT_LEN + 1);
|
||||
int32_t contextEnd = uprv_min(skeletonString.length(), errOffset + U_PARSE_CONTEXT_LEN - 1);
|
||||
skeletonString.extract(contextStart, errOffset - contextStart, perror->preContext, 0);
|
||||
perror->preContext[errOffset - contextStart] = 0;
|
||||
skeletonString.extract(errOffset, contextEnd - errOffset, perror->postContext, 0);
|
||||
perror->postContext[contextEnd - errOffset] = 0;
|
||||
return {};
|
||||
}
|
||||
|
||||
UnicodeString skeleton::generate(const MacroProps& macros, UErrorCode& status) {
|
||||
@ -419,8 +450,9 @@ UnicodeString skeleton::generate(const MacroProps& macros, UErrorCode& status) {
|
||||
return sb;
|
||||
}
|
||||
|
||||
MacroProps skeleton::parseSkeleton(const UnicodeString& skeletonString, UErrorCode& status) {
|
||||
if (U_FAILURE(status)) { return MacroProps(); }
|
||||
MacroProps skeleton::parseSkeleton(
|
||||
const UnicodeString& skeletonString, int32_t& errOffset, UErrorCode& status) {
|
||||
U_ASSERT(U_SUCCESS(status));
|
||||
|
||||
// Add a trailing whitespace to the end of the skeleton string to make code cleaner.
|
||||
UnicodeString tempSkeletonString(skeletonString);
|
||||
@ -464,7 +496,10 @@ MacroProps skeleton::parseSkeleton(const UnicodeString& skeletonString, UErrorCo
|
||||
stem = parseOption(stem, segment, macros, status);
|
||||
}
|
||||
segment.resetLength();
|
||||
if (U_FAILURE(status)) { return macros; }
|
||||
if (U_FAILURE(status)) {
|
||||
errOffset = segment.getOffset();
|
||||
return macros;
|
||||
}
|
||||
|
||||
// Consume the segment:
|
||||
segment.adjustOffset(offset);
|
||||
@ -475,6 +510,7 @@ MacroProps skeleton::parseSkeleton(const UnicodeString& skeletonString, UErrorCo
|
||||
// segment.setLength(U16_LENGTH(cp)); // for error message
|
||||
// throw new SkeletonSyntaxException("Unexpected separator character", segment);
|
||||
status = U_NUMBER_SKELETON_SYNTAX_ERROR;
|
||||
errOffset = segment.getOffset();
|
||||
return macros;
|
||||
|
||||
} else {
|
||||
@ -486,6 +522,7 @@ MacroProps skeleton::parseSkeleton(const UnicodeString& skeletonString, UErrorCo
|
||||
// segment.setLength(U16_LENGTH(cp)); // for error message
|
||||
// throw new SkeletonSyntaxException("Unexpected option separator", segment);
|
||||
status = U_NUMBER_SKELETON_SYNTAX_ERROR;
|
||||
errOffset = segment.getOffset();
|
||||
return macros;
|
||||
}
|
||||
|
||||
@ -502,6 +539,7 @@ MacroProps skeleton::parseSkeleton(const UnicodeString& skeletonString, UErrorCo
|
||||
// segment.setLength(U16_LENGTH(cp)); // for error message
|
||||
// throw new SkeletonSyntaxException("Stem requires an option", segment);
|
||||
status = U_NUMBER_SKELETON_SYNTAX_ERROR;
|
||||
errOffset = segment.getOffset();
|
||||
return macros;
|
||||
default:
|
||||
break;
|
||||
|
@ -122,7 +122,8 @@ enum StemEnum {
|
||||
* A number skeleton string, possibly not in its shortest form.
|
||||
* @return An UnlocalizedNumberFormatter with behavior defined by the given skeleton string.
|
||||
*/
|
||||
UnlocalizedNumberFormatter create(const UnicodeString& skeletonString, UErrorCode& status);
|
||||
UnlocalizedNumberFormatter create(
|
||||
const UnicodeString& skeletonString, UParseError* perror, UErrorCode& status);
|
||||
|
||||
/**
|
||||
* Create a skeleton string corresponding to the given NumberFormatter.
|
||||
@ -138,7 +139,7 @@ UnicodeString generate(const MacroProps& macros, UErrorCode& status);
|
||||
*
|
||||
* Internal: use the create() endpoint instead of this function.
|
||||
*/
|
||||
MacroProps parseSkeleton(const UnicodeString& skeletonString, UErrorCode& status);
|
||||
MacroProps parseSkeleton(const UnicodeString& skeletonString, int32_t& errOffset, UErrorCode& status);
|
||||
|
||||
/**
|
||||
* Given that the current segment represents a stem, parse it and save the result.
|
||||
|
@ -266,7 +266,7 @@ class U_I18N_API StringSegment : public UMemory {
|
||||
bool operator==(const UnicodeString& other) const;
|
||||
|
||||
private:
|
||||
const UnicodeString fStr;
|
||||
const UnicodeString& fStr;
|
||||
int32_t fStart;
|
||||
int32_t fEnd;
|
||||
bool fFoldCase;
|
||||
|
@ -15,6 +15,7 @@
|
||||
#include "unicode/fpositer.h"
|
||||
#include "unicode/measunit.h"
|
||||
#include "unicode/nounit.h"
|
||||
#include "unicode/parseerr.h"
|
||||
#include "unicode/plurrule.h"
|
||||
#include "unicode/ucurr.h"
|
||||
#include "unicode/unum.h"
|
||||
@ -2549,6 +2550,9 @@ class U_I18N_API NumberFormatter final {
|
||||
* Call this method at the beginning of a NumberFormatter fluent chain to create an instance based
|
||||
* on a given number skeleton string.
|
||||
*
|
||||
* It is possible for an error to occur while parsing. See the overload of this method if you are
|
||||
* interested in the location of a possible parse error.
|
||||
*
|
||||
* @param skeleton
|
||||
* The skeleton string off of which to base this NumberFormatter.
|
||||
* @param status
|
||||
@ -2558,6 +2562,26 @@ class U_I18N_API NumberFormatter final {
|
||||
*/
|
||||
static UnlocalizedNumberFormatter forSkeleton(const UnicodeString& skeleton, UErrorCode& status);
|
||||
|
||||
/**
|
||||
* Call this method at the beginning of a NumberFormatter fluent chain to create an instance based
|
||||
* on a given number skeleton string.
|
||||
*
|
||||
* If an error occurs while parsing the skeleton string, the offset into the skeleton string at
|
||||
* which the error occurred will be saved into the UParseError, if provided.
|
||||
*
|
||||
* @param skeleton
|
||||
* The skeleton string off of which to base this NumberFormatter.
|
||||
* @param perror
|
||||
* A parse error struct populated if an error occurs when parsing.
|
||||
* If no error occurs, perror.offset will be set to -1.
|
||||
* @param status
|
||||
* Set to U_NUMBER_SKELETON_SYNTAX_ERROR if the skeleton was invalid.
|
||||
* @return An UnlocalizedNumberFormatter, to be used for chaining.
|
||||
* @draft ICU 64
|
||||
*/
|
||||
static UnlocalizedNumberFormatter forSkeleton(const UnicodeString& skeleton,
|
||||
UParseError& perror, UErrorCode& status);
|
||||
|
||||
/**
|
||||
* Use factory methods instead of the constructor to create a NumberFormatter.
|
||||
*/
|
||||
|
@ -7,6 +7,7 @@
|
||||
#ifndef __UNUMBERFORMATTER_H__
|
||||
#define __UNUMBERFORMATTER_H__
|
||||
|
||||
#include "unicode/parseerr.h"
|
||||
#include "unicode/ufieldpositer.h"
|
||||
#include "unicode/umisc.h"
|
||||
#include "unicode/uformattedvalue.h"
|
||||
@ -454,6 +455,23 @@ unumf_openForSkeletonAndLocale(const UChar* skeleton, int32_t skeletonLen, const
|
||||
UErrorCode* ec);
|
||||
|
||||
|
||||
/**
|
||||
* Like unumf_openForSkeletonAndLocale, but accepts a UParseError, which will be populated with the
|
||||
* location of a skeleton syntax error if such a syntax error exists.
|
||||
*
|
||||
* @param skeleton The skeleton string, like u"percent precision-integer"
|
||||
* @param skeletonLen The number of UChars in the skeleton string, or -1 it it is NUL-terminated.
|
||||
* @param locale The NUL-terminated locale ID.
|
||||
* @param perror A parse error struct populated if an error occurs when parsing. Can be NULL.
|
||||
* If no error occurs, perror->offset will be set to -1.
|
||||
* @param ec Set if an error occurs.
|
||||
* @draft ICU 64
|
||||
*/
|
||||
U_DRAFT UNumberFormatter* U_EXPORT2
|
||||
unumf_openForSkeletonAndLocaleWithError(
|
||||
const UChar* skeleton, int32_t skeletonLen, const char* locale, UParseError* perror, UErrorCode* ec);
|
||||
|
||||
|
||||
/**
|
||||
* Creates a new UFormattedNumber for holding the result of a number formatting operation.
|
||||
*
|
||||
|
@ -24,6 +24,8 @@ static void TestExampleCode(void);
|
||||
|
||||
static void TestFormattedValue(void);
|
||||
|
||||
static void TestSkeletonParseError(void);
|
||||
|
||||
void addUNumberFormatterTest(TestNode** root);
|
||||
|
||||
#define TESTCASE(x) addTest(root, &x, "tsformat/unumberformatter/" #x)
|
||||
@ -33,6 +35,7 @@ void addUNumberFormatterTest(TestNode** root) {
|
||||
TESTCASE(TestSkeletonFormatToFields);
|
||||
TESTCASE(TestExampleCode);
|
||||
TESTCASE(TestFormattedValue);
|
||||
TESTCASE(TestSkeletonParseError);
|
||||
}
|
||||
|
||||
|
||||
@ -229,4 +232,29 @@ static void TestFormattedValue() {
|
||||
}
|
||||
|
||||
|
||||
static void TestSkeletonParseError() {
|
||||
UErrorCode ec = U_ZERO_ERROR;
|
||||
UNumberFormatter* uformatter;
|
||||
UParseError perror;
|
||||
|
||||
// The UParseError can be null. The following should not segfault.
|
||||
uformatter = unumf_openForSkeletonAndLocaleWithError(
|
||||
u".00 measure-unit/typo", -1, "en", NULL, &ec);
|
||||
unumf_close(uformatter);
|
||||
|
||||
// Now test the behavior.
|
||||
ec = U_ZERO_ERROR;
|
||||
uformatter = unumf_openForSkeletonAndLocaleWithError(
|
||||
u".00 measure-unit/typo", -1, "en", &perror, &ec);
|
||||
|
||||
assertIntEquals("Should have set error code", U_NUMBER_SKELETON_SYNTAX_ERROR, ec);
|
||||
assertIntEquals("Should have correct skeleton error offset", 17, perror.offset);
|
||||
assertUEquals("Should have correct pre context", u"0 measure-unit/", perror.preContext);
|
||||
assertUEquals("Should have correct post context", u"typo", perror.postContext);
|
||||
|
||||
// cleanup:
|
||||
unumf_close(uformatter);
|
||||
}
|
||||
|
||||
|
||||
#endif /* #if !UCONFIG_NO_FORMATTING */
|
||||
|
@ -110,8 +110,10 @@ void NumberSkeletonTest::validTokens() {
|
||||
for (auto& cas : cases) {
|
||||
UnicodeString skeletonString(cas);
|
||||
status.setScope(skeletonString);
|
||||
NumberFormatter::forSkeleton(skeletonString, status);
|
||||
UParseError perror;
|
||||
NumberFormatter::forSkeleton(skeletonString, perror, status);
|
||||
assertSuccess(CStr(skeletonString)(), status, true);
|
||||
assertEquals(skeletonString, -1, perror.offset);
|
||||
status.errIfFailureAndReset();
|
||||
}
|
||||
}
|
||||
@ -193,7 +195,7 @@ void NumberSkeletonTest::stemsRequiringOption() {
|
||||
static const char16_t* stems[] = {
|
||||
u"precision-increment",
|
||||
u"measure-unit",
|
||||
u"per-unit",
|
||||
u"per-measure-unit",
|
||||
u"currency",
|
||||
u"integer-width",
|
||||
u"numbering-system",
|
||||
@ -204,8 +206,23 @@ void NumberSkeletonTest::stemsRequiringOption() {
|
||||
for (auto& suffix : suffixes) {
|
||||
UnicodeString skeletonString = UnicodeString(stem) + suffix;
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
NumberFormatter::forSkeleton(skeletonString, status);
|
||||
UParseError perror;
|
||||
NumberFormatter::forSkeleton(skeletonString, perror, status);
|
||||
assertEquals(skeletonString, U_NUMBER_SKELETON_SYNTAX_ERROR, status);
|
||||
|
||||
// Check the UParseError for integrity.
|
||||
// If an option is present, the option is wrong; error offset is at the start of the option
|
||||
// If an option is not present, the error offset is at the token separator (end of stem)
|
||||
int32_t expectedOffset = u_strlen(stem) + ((suffix[0] == u'/') ? 1 : 0);
|
||||
assertEquals(skeletonString, expectedOffset, perror.offset);
|
||||
UnicodeString expectedPreContext = skeletonString.tempSubString(0, expectedOffset);
|
||||
if (expectedPreContext.length() >= U_PARSE_CONTEXT_LEN - 1) {
|
||||
expectedPreContext = expectedPreContext.tempSubString(expectedOffset - U_PARSE_CONTEXT_LEN + 1);
|
||||
}
|
||||
assertEquals(skeletonString, expectedPreContext, perror.preContext);
|
||||
UnicodeString expectedPostContext = skeletonString.tempSubString(expectedOffset);
|
||||
// None of the postContext strings in this test exceed U_PARSE_CONTEXT_LEN
|
||||
assertEquals(skeletonString, expectedPostContext, perror.postContext);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -24,7 +24,9 @@ void StringSegmentTest::runIndexedTest(int32_t index, UBool exec, const char*&na
|
||||
}
|
||||
|
||||
void StringSegmentTest::testOffset() {
|
||||
StringSegment segment(SAMPLE_STRING, false);
|
||||
// Note: sampleString needs function scope so it is valid while the StringSegment is valid
|
||||
UnicodeString sampleString(SAMPLE_STRING);
|
||||
StringSegment segment(sampleString, false);
|
||||
assertEquals("Initial Offset", 0, segment.getOffset());
|
||||
segment.adjustOffset(3);
|
||||
assertEquals("Adjust A", 3, segment.getOffset());
|
||||
@ -35,7 +37,9 @@ void StringSegmentTest::testOffset() {
|
||||
}
|
||||
|
||||
void StringSegmentTest::testLength() {
|
||||
StringSegment segment(SAMPLE_STRING, false);
|
||||
// Note: sampleString needs function scope so it is valid while the StringSegment is valid
|
||||
UnicodeString sampleString(SAMPLE_STRING);
|
||||
StringSegment segment(sampleString, false);
|
||||
assertEquals("Initial length", 11, segment.length());
|
||||
segment.adjustOffset(3);
|
||||
assertEquals("Adjust", 8, segment.length());
|
||||
@ -48,7 +52,9 @@ void StringSegmentTest::testLength() {
|
||||
}
|
||||
|
||||
void StringSegmentTest::testCharAt() {
|
||||
StringSegment segment(SAMPLE_STRING, false);
|
||||
// Note: sampleString needs function scope so it is valid while the StringSegment is valid
|
||||
UnicodeString sampleString(SAMPLE_STRING);
|
||||
StringSegment segment(sampleString, false);
|
||||
assertEquals("Initial", SAMPLE_STRING, segment.toUnicodeString());
|
||||
assertEquals("Initial", SAMPLE_STRING, segment.toTempUnicodeString());
|
||||
segment.adjustOffset(3);
|
||||
@ -60,7 +66,9 @@ void StringSegmentTest::testCharAt() {
|
||||
}
|
||||
|
||||
void StringSegmentTest::testGetCodePoint() {
|
||||
StringSegment segment(SAMPLE_STRING, false);
|
||||
// Note: sampleString needs function scope so it is valid while the StringSegment is valid
|
||||
UnicodeString sampleString(SAMPLE_STRING);
|
||||
StringSegment segment(sampleString, false);
|
||||
assertEquals("Double-width code point", 0x1F4FB, segment.getCodePoint());
|
||||
segment.setLength(1);
|
||||
assertEquals("Inalid A", -1, segment.getCodePoint());
|
||||
@ -72,7 +80,9 @@ void StringSegmentTest::testGetCodePoint() {
|
||||
}
|
||||
|
||||
void StringSegmentTest::testCommonPrefixLength() {
|
||||
StringSegment segment(SAMPLE_STRING, false);
|
||||
// Note: sampleString needs function scope so it is valid while the StringSegment is valid
|
||||
UnicodeString sampleString(SAMPLE_STRING);
|
||||
StringSegment segment(sampleString, false);
|
||||
assertEquals("", 11, segment.getCommonPrefixLength(SAMPLE_STRING));
|
||||
assertEquals("", 4, segment.getCommonPrefixLength(u"📻 r"));
|
||||
assertEquals("", 3, segment.getCommonPrefixLength(u"📻 x"));
|
||||
|
@ -213,7 +213,7 @@ public class NumberSkeletonTest {
|
||||
String[] stems = {
|
||||
"precision-increment",
|
||||
"measure-unit",
|
||||
"per-unit",
|
||||
"per-measure-unit",
|
||||
"currency",
|
||||
"integer-width",
|
||||
"numbering-system",
|
||||
|
Loading…
Reference in New Issue
Block a user