ICU-13657 Adding NumberFormatter skeleton factory method with UParseError.

- Includes small fixes to StringSegment and to tests.
This commit is contained in:
Shane Carr 2019-01-18 22:30:12 -08:00 committed by Shane F. Carr
parent 85600f2639
commit 7507f5322f
11 changed files with 174 additions and 19 deletions

View File

@ -76,6 +76,20 @@ unumf_openForSkeletonAndLocale(const UChar* skeleton, int32_t skeletonLen, const
return impl->exportForC();
}
U_CAPI UNumberFormatter* U_EXPORT2
unumf_openForSkeletonAndLocaleWithError(const UChar* skeleton, int32_t skeletonLen, const char* locale,
UParseError* perror, UErrorCode* ec) {
auto* impl = new UNumberFormatterData();
if (impl == nullptr) {
*ec = U_MEMORY_ALLOCATION_ERROR;
return nullptr;
}
// Readonly-alias constructor (first argument is whether we are NUL-terminated)
UnicodeString skeletonString(skeletonLen == -1, skeleton, skeletonLen);
impl->fFormatter = NumberFormatter::forSkeleton(skeletonString, *perror, *ec).locale(locale);
return impl->exportForC();
}
U_CAPI UFormattedNumber* U_EXPORT2
unumf_openResult(UErrorCode* ec) {
auto* impl = new UFormattedNumberImpl();

View File

@ -347,7 +347,12 @@ LocalizedNumberFormatter NumberFormatter::withLocale(const Locale& locale) {
UnlocalizedNumberFormatter
NumberFormatter::forSkeleton(const UnicodeString& skeleton, UErrorCode& status) {
return skeleton::create(skeleton, status);
return skeleton::create(skeleton, nullptr, status);
}
UnlocalizedNumberFormatter
NumberFormatter::forSkeleton(const UnicodeString& skeleton, UParseError& perror, UErrorCode& status) {
return skeleton::create(skeleton, &perror, status);
}

View File

@ -406,10 +406,41 @@ enum_to_stem_string::decimalSeparatorDisplay(UNumberDecimalSeparatorDisplay valu
}
UnlocalizedNumberFormatter skeleton::create(const UnicodeString& skeletonString, UErrorCode& status) {
UnlocalizedNumberFormatter skeleton::create(
const UnicodeString& skeletonString, UParseError* perror, UErrorCode& status) {
// Initialize perror
if (perror != nullptr) {
perror->line = 0;
perror->offset = -1;
perror->preContext[0] = 0;
perror->postContext[0] = 0;
}
umtx_initOnce(gNumberSkeletonsInitOnce, &initNumberSkeletons, status);
MacroProps macros = parseSkeleton(skeletonString, status);
return NumberFormatter::with().macros(macros);
if (U_FAILURE(status)) {
return {};
}
int32_t errOffset;
MacroProps macros = parseSkeleton(skeletonString, errOffset, status);
if (U_SUCCESS(status)) {
return NumberFormatter::with().macros(macros);
}
if (perror == nullptr) {
return {};
}
// Populate the UParseError with the error location
perror->offset = errOffset;
int32_t contextStart = uprv_max(0, errOffset - U_PARSE_CONTEXT_LEN + 1);
int32_t contextEnd = uprv_min(skeletonString.length(), errOffset + U_PARSE_CONTEXT_LEN - 1);
skeletonString.extract(contextStart, errOffset - contextStart, perror->preContext, 0);
perror->preContext[errOffset - contextStart] = 0;
skeletonString.extract(errOffset, contextEnd - errOffset, perror->postContext, 0);
perror->postContext[contextEnd - errOffset] = 0;
return {};
}
UnicodeString skeleton::generate(const MacroProps& macros, UErrorCode& status) {
@ -419,8 +450,9 @@ UnicodeString skeleton::generate(const MacroProps& macros, UErrorCode& status) {
return sb;
}
MacroProps skeleton::parseSkeleton(const UnicodeString& skeletonString, UErrorCode& status) {
if (U_FAILURE(status)) { return MacroProps(); }
MacroProps skeleton::parseSkeleton(
const UnicodeString& skeletonString, int32_t& errOffset, UErrorCode& status) {
U_ASSERT(U_SUCCESS(status));
// Add a trailing whitespace to the end of the skeleton string to make code cleaner.
UnicodeString tempSkeletonString(skeletonString);
@ -464,7 +496,10 @@ MacroProps skeleton::parseSkeleton(const UnicodeString& skeletonString, UErrorCo
stem = parseOption(stem, segment, macros, status);
}
segment.resetLength();
if (U_FAILURE(status)) { return macros; }
if (U_FAILURE(status)) {
errOffset = segment.getOffset();
return macros;
}
// Consume the segment:
segment.adjustOffset(offset);
@ -475,6 +510,7 @@ MacroProps skeleton::parseSkeleton(const UnicodeString& skeletonString, UErrorCo
// segment.setLength(U16_LENGTH(cp)); // for error message
// throw new SkeletonSyntaxException("Unexpected separator character", segment);
status = U_NUMBER_SKELETON_SYNTAX_ERROR;
errOffset = segment.getOffset();
return macros;
} else {
@ -486,6 +522,7 @@ MacroProps skeleton::parseSkeleton(const UnicodeString& skeletonString, UErrorCo
// segment.setLength(U16_LENGTH(cp)); // for error message
// throw new SkeletonSyntaxException("Unexpected option separator", segment);
status = U_NUMBER_SKELETON_SYNTAX_ERROR;
errOffset = segment.getOffset();
return macros;
}
@ -502,6 +539,7 @@ MacroProps skeleton::parseSkeleton(const UnicodeString& skeletonString, UErrorCo
// segment.setLength(U16_LENGTH(cp)); // for error message
// throw new SkeletonSyntaxException("Stem requires an option", segment);
status = U_NUMBER_SKELETON_SYNTAX_ERROR;
errOffset = segment.getOffset();
return macros;
default:
break;

View File

@ -122,7 +122,8 @@ enum StemEnum {
* A number skeleton string, possibly not in its shortest form.
* @return An UnlocalizedNumberFormatter with behavior defined by the given skeleton string.
*/
UnlocalizedNumberFormatter create(const UnicodeString& skeletonString, UErrorCode& status);
UnlocalizedNumberFormatter create(
const UnicodeString& skeletonString, UParseError* perror, UErrorCode& status);
/**
* Create a skeleton string corresponding to the given NumberFormatter.
@ -138,7 +139,7 @@ UnicodeString generate(const MacroProps& macros, UErrorCode& status);
*
* Internal: use the create() endpoint instead of this function.
*/
MacroProps parseSkeleton(const UnicodeString& skeletonString, UErrorCode& status);
MacroProps parseSkeleton(const UnicodeString& skeletonString, int32_t& errOffset, UErrorCode& status);
/**
* Given that the current segment represents a stem, parse it and save the result.

View File

@ -266,7 +266,7 @@ class U_I18N_API StringSegment : public UMemory {
bool operator==(const UnicodeString& other) const;
private:
const UnicodeString fStr;
const UnicodeString& fStr;
int32_t fStart;
int32_t fEnd;
bool fFoldCase;

View File

@ -15,6 +15,7 @@
#include "unicode/fpositer.h"
#include "unicode/measunit.h"
#include "unicode/nounit.h"
#include "unicode/parseerr.h"
#include "unicode/plurrule.h"
#include "unicode/ucurr.h"
#include "unicode/unum.h"
@ -2549,6 +2550,9 @@ class U_I18N_API NumberFormatter final {
* Call this method at the beginning of a NumberFormatter fluent chain to create an instance based
* on a given number skeleton string.
*
* It is possible for an error to occur while parsing. See the overload of this method if you are
* interested in the location of a possible parse error.
*
* @param skeleton
* The skeleton string off of which to base this NumberFormatter.
* @param status
@ -2558,6 +2562,26 @@ class U_I18N_API NumberFormatter final {
*/
static UnlocalizedNumberFormatter forSkeleton(const UnicodeString& skeleton, UErrorCode& status);
/**
* Call this method at the beginning of a NumberFormatter fluent chain to create an instance based
* on a given number skeleton string.
*
* If an error occurs while parsing the skeleton string, the offset into the skeleton string at
* which the error occurred will be saved into the UParseError, if provided.
*
* @param skeleton
* The skeleton string off of which to base this NumberFormatter.
* @param perror
* A parse error struct populated if an error occurs when parsing.
* If no error occurs, perror.offset will be set to -1.
* @param status
* Set to U_NUMBER_SKELETON_SYNTAX_ERROR if the skeleton was invalid.
* @return An UnlocalizedNumberFormatter, to be used for chaining.
* @draft ICU 64
*/
static UnlocalizedNumberFormatter forSkeleton(const UnicodeString& skeleton,
UParseError& perror, UErrorCode& status);
/**
* Use factory methods instead of the constructor to create a NumberFormatter.
*/

View File

@ -7,6 +7,7 @@
#ifndef __UNUMBERFORMATTER_H__
#define __UNUMBERFORMATTER_H__
#include "unicode/parseerr.h"
#include "unicode/ufieldpositer.h"
#include "unicode/umisc.h"
#include "unicode/uformattedvalue.h"
@ -454,6 +455,23 @@ unumf_openForSkeletonAndLocale(const UChar* skeleton, int32_t skeletonLen, const
UErrorCode* ec);
/**
* Like unumf_openForSkeletonAndLocale, but accepts a UParseError, which will be populated with the
* location of a skeleton syntax error if such a syntax error exists.
*
* @param skeleton The skeleton string, like u"percent precision-integer"
* @param skeletonLen The number of UChars in the skeleton string, or -1 it it is NUL-terminated.
* @param locale The NUL-terminated locale ID.
* @param perror A parse error struct populated if an error occurs when parsing. Can be NULL.
* If no error occurs, perror->offset will be set to -1.
* @param ec Set if an error occurs.
* @draft ICU 64
*/
U_DRAFT UNumberFormatter* U_EXPORT2
unumf_openForSkeletonAndLocaleWithError(
const UChar* skeleton, int32_t skeletonLen, const char* locale, UParseError* perror, UErrorCode* ec);
/**
* Creates a new UFormattedNumber for holding the result of a number formatting operation.
*

View File

@ -24,6 +24,8 @@ static void TestExampleCode(void);
static void TestFormattedValue(void);
static void TestSkeletonParseError(void);
void addUNumberFormatterTest(TestNode** root);
#define TESTCASE(x) addTest(root, &x, "tsformat/unumberformatter/" #x)
@ -33,6 +35,7 @@ void addUNumberFormatterTest(TestNode** root) {
TESTCASE(TestSkeletonFormatToFields);
TESTCASE(TestExampleCode);
TESTCASE(TestFormattedValue);
TESTCASE(TestSkeletonParseError);
}
@ -229,4 +232,29 @@ static void TestFormattedValue() {
}
static void TestSkeletonParseError() {
UErrorCode ec = U_ZERO_ERROR;
UNumberFormatter* uformatter;
UParseError perror;
// The UParseError can be null. The following should not segfault.
uformatter = unumf_openForSkeletonAndLocaleWithError(
u".00 measure-unit/typo", -1, "en", NULL, &ec);
unumf_close(uformatter);
// Now test the behavior.
ec = U_ZERO_ERROR;
uformatter = unumf_openForSkeletonAndLocaleWithError(
u".00 measure-unit/typo", -1, "en", &perror, &ec);
assertIntEquals("Should have set error code", U_NUMBER_SKELETON_SYNTAX_ERROR, ec);
assertIntEquals("Should have correct skeleton error offset", 17, perror.offset);
assertUEquals("Should have correct pre context", u"0 measure-unit/", perror.preContext);
assertUEquals("Should have correct post context", u"typo", perror.postContext);
// cleanup:
unumf_close(uformatter);
}
#endif /* #if !UCONFIG_NO_FORMATTING */

View File

@ -110,8 +110,10 @@ void NumberSkeletonTest::validTokens() {
for (auto& cas : cases) {
UnicodeString skeletonString(cas);
status.setScope(skeletonString);
NumberFormatter::forSkeleton(skeletonString, status);
UParseError perror;
NumberFormatter::forSkeleton(skeletonString, perror, status);
assertSuccess(CStr(skeletonString)(), status, true);
assertEquals(skeletonString, -1, perror.offset);
status.errIfFailureAndReset();
}
}
@ -193,7 +195,7 @@ void NumberSkeletonTest::stemsRequiringOption() {
static const char16_t* stems[] = {
u"precision-increment",
u"measure-unit",
u"per-unit",
u"per-measure-unit",
u"currency",
u"integer-width",
u"numbering-system",
@ -204,8 +206,23 @@ void NumberSkeletonTest::stemsRequiringOption() {
for (auto& suffix : suffixes) {
UnicodeString skeletonString = UnicodeString(stem) + suffix;
UErrorCode status = U_ZERO_ERROR;
NumberFormatter::forSkeleton(skeletonString, status);
UParseError perror;
NumberFormatter::forSkeleton(skeletonString, perror, status);
assertEquals(skeletonString, U_NUMBER_SKELETON_SYNTAX_ERROR, status);
// Check the UParseError for integrity.
// If an option is present, the option is wrong; error offset is at the start of the option
// If an option is not present, the error offset is at the token separator (end of stem)
int32_t expectedOffset = u_strlen(stem) + ((suffix[0] == u'/') ? 1 : 0);
assertEquals(skeletonString, expectedOffset, perror.offset);
UnicodeString expectedPreContext = skeletonString.tempSubString(0, expectedOffset);
if (expectedPreContext.length() >= U_PARSE_CONTEXT_LEN - 1) {
expectedPreContext = expectedPreContext.tempSubString(expectedOffset - U_PARSE_CONTEXT_LEN + 1);
}
assertEquals(skeletonString, expectedPreContext, perror.preContext);
UnicodeString expectedPostContext = skeletonString.tempSubString(expectedOffset);
// None of the postContext strings in this test exceed U_PARSE_CONTEXT_LEN
assertEquals(skeletonString, expectedPostContext, perror.postContext);
}
}
}

View File

@ -24,7 +24,9 @@ void StringSegmentTest::runIndexedTest(int32_t index, UBool exec, const char*&na
}
void StringSegmentTest::testOffset() {
StringSegment segment(SAMPLE_STRING, false);
// Note: sampleString needs function scope so it is valid while the StringSegment is valid
UnicodeString sampleString(SAMPLE_STRING);
StringSegment segment(sampleString, false);
assertEquals("Initial Offset", 0, segment.getOffset());
segment.adjustOffset(3);
assertEquals("Adjust A", 3, segment.getOffset());
@ -35,7 +37,9 @@ void StringSegmentTest::testOffset() {
}
void StringSegmentTest::testLength() {
StringSegment segment(SAMPLE_STRING, false);
// Note: sampleString needs function scope so it is valid while the StringSegment is valid
UnicodeString sampleString(SAMPLE_STRING);
StringSegment segment(sampleString, false);
assertEquals("Initial length", 11, segment.length());
segment.adjustOffset(3);
assertEquals("Adjust", 8, segment.length());
@ -48,7 +52,9 @@ void StringSegmentTest::testLength() {
}
void StringSegmentTest::testCharAt() {
StringSegment segment(SAMPLE_STRING, false);
// Note: sampleString needs function scope so it is valid while the StringSegment is valid
UnicodeString sampleString(SAMPLE_STRING);
StringSegment segment(sampleString, false);
assertEquals("Initial", SAMPLE_STRING, segment.toUnicodeString());
assertEquals("Initial", SAMPLE_STRING, segment.toTempUnicodeString());
segment.adjustOffset(3);
@ -60,7 +66,9 @@ void StringSegmentTest::testCharAt() {
}
void StringSegmentTest::testGetCodePoint() {
StringSegment segment(SAMPLE_STRING, false);
// Note: sampleString needs function scope so it is valid while the StringSegment is valid
UnicodeString sampleString(SAMPLE_STRING);
StringSegment segment(sampleString, false);
assertEquals("Double-width code point", 0x1F4FB, segment.getCodePoint());
segment.setLength(1);
assertEquals("Inalid A", -1, segment.getCodePoint());
@ -72,7 +80,9 @@ void StringSegmentTest::testGetCodePoint() {
}
void StringSegmentTest::testCommonPrefixLength() {
StringSegment segment(SAMPLE_STRING, false);
// Note: sampleString needs function scope so it is valid while the StringSegment is valid
UnicodeString sampleString(SAMPLE_STRING);
StringSegment segment(sampleString, false);
assertEquals("", 11, segment.getCommonPrefixLength(SAMPLE_STRING));
assertEquals("", 4, segment.getCommonPrefixLength(u"📻 r"));
assertEquals("", 3, segment.getCommonPrefixLength(u"📻 x"));

View File

@ -213,7 +213,7 @@ public class NumberSkeletonTest {
String[] stems = {
"precision-increment",
"measure-unit",
"per-unit",
"per-measure-unit",
"currency",
"integer-width",
"numbering-system",