ICU-1722 rbnf can't handle small numbers

X-SVN-Rev: 14738
This commit is contained in:
Doug Felt 2004-03-24 00:40:49 +00:00
parent f1414e0160
commit c37337d00c
5 changed files with 126 additions and 33 deletions

View File

@ -321,7 +321,7 @@ NFRule::parseRuleDescriptor(UnicodeString& description, UErrorCode& status)
// tempValue now contain's the rule's radix. Set it
// accordingly, and recalculate the rule's exponent
radix = (int16_t)val;
radix = (int32_t)val;
if (radix == 0) {
// throw new IllegalArgumentException("Rule can't have radix of 0");
status = U_PARSE_ERROR;

View File

@ -91,7 +91,7 @@ private:
private:
int64_t baseValue;
int16_t radix;
int32_t radix;
int16_t exponent;
UnicodeString ruleText;
NFSubstitution* sub1;

View File

@ -14,6 +14,9 @@
*/
#include "nfsubs.h"
#include "digitlst.h"
#include <stdio.h>
#if U_HAVE_RBNF
@ -747,35 +750,63 @@ FractionalPartSubstitution::FractionalPartSubstitution(int32_t _pos,
void
FractionalPartSubstitution::doSubstitution(double number, UnicodeString& toInsertInto, int32_t _pos) const
{
// if we're not in "byDigits" mode, just use the inherited
// doSubstitution() routine
if (!byDigits) {
NFSubstitution::doSubstitution(number, toInsertInto, _pos);
// if we're not in "byDigits" mode, just use the inherited
// doSubstitution() routine
if (!byDigits) {
NFSubstitution::doSubstitution(number, toInsertInto, _pos);
// if we're in "byDigits" mode, transform the value into an integer
// by moving the decimal point eight places to the right and
// pulling digits off the right one at a time, formatting each digit
// as an integer using this substitution's owning rule set
// (this is slower, but more accurate, than doing it from the
// other end)
} else {
int32_t numberToFormat = (int32_t)uprv_round(transformNumber(number) * uprv_pow(10, kMaxDecimalDigits));
// this flag keeps us from formatting trailing zeros. It starts
// out false because we're pulling from the right, and switches
// to true the first time we encounter a non-zero digit
UBool doZeros = FALSE;
for (int32_t i = 0; i < kMaxDecimalDigits; i++) {
int64_t digit = numberToFormat % 10;
if (digit != 0 || doZeros) {
if (doZeros && useSpaces) {
toInsertInto.insert(_pos + getPos(), gSpace);
}
doZeros = TRUE;
getRuleSet()->format(digit, toInsertInto, _pos + getPos());
}
numberToFormat /= 10;
}
// if we're in "byDigits" mode, transform the value into an integer
// by moving the decimal point eight places to the right and
// pulling digits off the right one at a time, formatting each digit
// as an integer using this substitution's owning rule set
// (this is slower, but more accurate, than doing it from the
// other end)
} else {
// int32_t numberToFormat = (int32_t)uprv_round(transformNumber(number) * uprv_pow(10, kMaxDecimalDigits));
// // this flag keeps us from formatting trailing zeros. It starts
// // out false because we're pulling from the right, and switches
// // to true the first time we encounter a non-zero digit
// UBool doZeros = FALSE;
// for (int32_t i = 0; i < kMaxDecimalDigits; i++) {
// int64_t digit = numberToFormat % 10;
// if (digit != 0 || doZeros) {
// if (doZeros && useSpaces) {
// toInsertInto.insert(_pos + getPos(), gSpace);
// }
// doZeros = TRUE;
// getRuleSet()->format(digit, toInsertInto, _pos + getPos());
// }
// numberToFormat /= 10;
// }
DigitList dl;
dl.set(number, 20, TRUE);
UBool pad = false;
while (dl.fCount > (dl.fDecimalAt <= 0 ? 0 : dl.fDecimalAt)) {
if (pad && useSpaces) {
toInsertInto.insert(_pos + getPos(), gSpace);
} else {
pad = TRUE;
}
getRuleSet()->format((int64_t)(dl.fDigits[--dl.fCount] - gZero), toInsertInto, _pos + getPos());
}
while (dl.fDecimalAt < 0) {
if (pad && useSpaces) {
toInsertInto.insert(_pos + getPos(), gSpace);
} else {
pad = TRUE;
}
getRuleSet()->format((int64_t)0, toInsertInto, _pos + getPos());
++dl.fDecimalAt;
}
if (!pad) {
// hack around lack of precision in digitlist. if we would end up with
// "foo point" make sure we add a " zero" to the end.
getRuleSet()->format((int64_t)0, toInsertInto, _pos + getPos());
}
}
}
//-----------------------------------------------------------------------
@ -821,8 +852,9 @@ FractionalPartSubstitution::doParse(const UnicodeString& text,
ParsePosition workPos(1);
double result = 0;
int32_t digit;
double p10 = 0.1;
// double p10 = 0.1;
DigitList dl;
NumberFormat* fmt = NULL;
while (workText.length() > 0 && workPos.getIndex() != 0) {
workPos.setIndex(0);
@ -850,8 +882,9 @@ FractionalPartSubstitution::doParse(const UnicodeString& text,
}
if (workPos.getIndex() != 0) {
result += digit * p10;
p10 /= 10;
dl.append((char)('0' + digit));
// result += digit * p10;
// p10 /= 10;
parsePosition.setIndex(parsePosition.getIndex() + workPos.getIndex());
workText.removeBetween(0, workPos.getIndex());
while (workText.length() > 0 && workText.charAt(0) == gSpace) {
@ -861,6 +894,7 @@ FractionalPartSubstitution::doParse(const UnicodeString& text,
}
}
delete fmt;
result = dl.fCount == 0 ? 0 : dl.getDouble();
result = composeRuleValue(result, baseValue);
resVal.setDouble(result);

View File

@ -17,6 +17,7 @@
#include "unicode/coleitr.h"
#include "unicode/ures.h"
#include "unicode/ustring.h"
#include "unicode/decimfmt.h"
//#include "llong.h"
#include <string.h>
@ -56,6 +57,7 @@ void IntlTestRBNF::runIndexedTest(int32_t index, UBool exec, const char* &name,
TESTCASE(10, TestFractionalRuleSet);
TESTCASE(11, TestSwedishSpellout);
TESTCASE(12, TestBelgianFrenchSpellout);
TESTCASE(13, TestSmallValues);
#else
TESTCASE(0, TestRBNFDisabled);
#endif
@ -1422,6 +1424,56 @@ IntlTestRBNF::TestSwedishSpellout()
delete formatter;
}
void
IntlTestRBNF::TestSmallValues()
{
UErrorCode status = U_ZERO_ERROR;
RuleBasedNumberFormat* formatter
= new RuleBasedNumberFormat(URBNF_SPELLOUT, Locale("en_US"), status);
if (U_FAILURE(status)) {
errln("FAIL: could not construct formatter");
} else {
static const char* testDataDefault[][2] = {
{ "0.001", "zero point zero zero one" },
{ "0.0001", "zero point zero zero zero one" },
{ "0.00001", "zero point zero zero zero zero one" },
{ "0.000001", "zero point zero zero zero zero zero one" },
{ "0.0000001", "zero point zero zero zero zero zero zero one" },
{ "0.00000001", "zero point zero zero zero zero zero zero zero one" },
{ "0.000000001", "zero point zero zero zero zero zero zero zero zero one" },
{ "0.0000000001", "zero point zero zero zero zero zero zero zero zero zero one" },
{ "0.00000000001", "zero point zero zero zero zero zero zero zero zero zero zero one" },
{ "0.000000000001", "zero point zero zero zero zero zero zero zero zero zero zero zero one" },
{ "0.0000000000001", "zero point zero zero zero zero zero zero zero zero zero zero zero zero one" },
{ "0.00000000000001", "zero point zero zero zero zero zero zero zero zero zero zero zero zero zero one" },
{ "0.000000000000001", "zero point zero zero zero zero zero zero zero zero zero zero zero zero zero zero one" },
{ "10,000,000.001", "ten million point zero zero one" },
{ "10,000,000.0001", "ten million point zero zero zero one" },
{ "10,000,000.00001", "ten million point zero zero zero zero one" },
{ "10,000,000.000001", "ten million point zero zero zero zero zero one" },
{ "10,000,000.0000001", "ten million point zero zero zero zero zero zero one" },
// { "10,000,000.00000001", "ten million point zero zero zero zero zero zero zero one" },
// { "10,000,000.000000002", "ten million point zero zero zero zero zero zero zero zero two" },
{ "10,000,000", "ten million" },
// { "1,234,567,890.0987654", "one billion, two hundred and thirty-four million, five hundred and sixty-seven thousand, eight hundred and ninety point zero nine eight seven six five four" },
// { "123,456,789.9876543", "one hundred and twenty-three million, four hundred and fifty-six thousand, seven hundred and eighty-nine point nine eight seven six five four three" },
// { "12,345,678.87654321", "twelve million, three hundred and forty-five thousand, six hundred and seventy-eight point eight seven six five four three two one" },
{ "1,234,567.7654321", "one million, two hundred and thirty-four thousand, five hundred and sixty-seven point seven six five four three two one" },
{ "123,456.654321", "one hundred and twenty-three thousand, four hundred and fifty-six point six five four three two one" },
{ "12,345.54321", "twelve thousand three hundred and forty-five point five four three two one" },
{ "1,234.4321", "one thousand two hundred and thirty-four point four three two one" },
{ "123.321", "one hundred and twenty-three point three two one" },
{ "0.0000000011754944", "zero point zero zero zero zero zero zero zero zero one one seven five four nine four four" },
{ "0.000001175494351", "zero point zero zero zero zero zero one one seven five four nine four three five one" },
{ NULL, NULL }
};
doTest(formatter, testDataDefault, TRUE);
delete formatter;
}
}
void
IntlTestRBNF::doTest(RuleBasedNumberFormat* formatter, const char* testData[][2], UBool testParsing)
@ -1429,7 +1481,8 @@ IntlTestRBNF::doTest(RuleBasedNumberFormat* formatter, const char* testData[][2]
// man, error reporting would be easier with printf-style syntax for unicode string and formattable
UErrorCode status = U_ZERO_ERROR;
NumberFormat* decFmt = NumberFormat::createInstance(Locale::getUS(), status);
// NumberFormat* decFmt = NumberFormat::createInstance(Locale::getUS(), status);
NumberFormat* decFmt = new DecimalFormat("#,###.################", status);
if (U_FAILURE(status)) {
errln("FAIL: could not create NumberFormat");
} else {
@ -1437,6 +1490,7 @@ IntlTestRBNF::doTest(RuleBasedNumberFormat* formatter, const char* testData[][2]
const char* numString = testData[i][0];
const char* expectedWords = testData[i][1];
logln("%i: %s\n", i, numString);
Formattable expectedNumber;
decFmt->parse(numString, expectedNumber, status);
if (U_FAILURE(status)) {

View File

@ -97,6 +97,11 @@ class IntlTestRBNF : public IntlTest {
*/
virtual void TestSwedishSpellout();
/**
* Perform a simple spot check on small values
*/
virtual void TestSmallValues();
protected:
virtual void doTest(RuleBasedNumberFormat* formatter, const char* testData[][2], UBool testParsing);
virtual void doLenientParseTest(RuleBasedNumberFormat* formatter, const char* testData[][2]);