ICU-1722 rbnf can't handle small numbers

X-SVN-Rev: 14738
2004-03-24 00:40:49 +00:00 · 2004-03-24 00:40:49 +00:00 · c37337d00c
commit c37337d00c
parent f1414e0160
5 changed files with 126 additions and 33 deletions
--- a/icu4c/source/i18n/nfrule.cpp
+++ b/icu4c/source/i18n/nfrule.cpp
@ -321,7 +321,7 @@ NFRule::parseRuleDescriptor(UnicodeString& description, UErrorCode& status)

                // tempValue now contain's the rule's radix.  Set it
                // accordingly, and recalculate the rule's exponent
-                radix = (int16_t)val;
+                radix = (int32_t)val;
                if (radix == 0) {
                    // throw new IllegalArgumentException("Rule can't have radix of 0");
                    status = U_PARSE_ERROR;
--- a/icu4c/source/i18n/nfrule.h
+++ b/icu4c/source/i18n/nfrule.h
@ -91,7 +91,7 @@ private:

 private:
    int64_t baseValue;
-    int16_t radix;
+    int32_t radix;
    int16_t exponent;
    UnicodeString ruleText;
    NFSubstitution* sub1;
--- a/icu4c/source/i18n/nfsubs.cpp
+++ b/icu4c/source/i18n/nfsubs.cpp
@ -14,6 +14,9 @@
 */

 #include "nfsubs.h"
+#include "digitlst.h"
+
+#include <stdio.h>

 #if U_HAVE_RBNF

@ -747,35 +750,63 @@ FractionalPartSubstitution::FractionalPartSubstitution(int32_t _pos,
 void
 FractionalPartSubstitution::doSubstitution(double number, UnicodeString& toInsertInto, int32_t _pos) const
 {
-    // if we're not in "byDigits" mode, just use the inherited
-    // doSubstitution() routine
-    if (!byDigits) {
-        NFSubstitution::doSubstitution(number, toInsertInto, _pos);
+  // if we're not in "byDigits" mode, just use the inherited
+  // doSubstitution() routine
+  if (!byDigits) {
+    NFSubstitution::doSubstitution(number, toInsertInto, _pos);

-        // if we're in "byDigits" mode, transform the value into an integer
-        // by moving the decimal point eight places to the right and
-        // pulling digits off the right one at a time, formatting each digit
-        // as an integer using this substitution's owning rule set
-        // (this is slower, but more accurate, than doing it from the
-        // other end)
-    } else {
-        int32_t numberToFormat = (int32_t)uprv_round(transformNumber(number) * uprv_pow(10, kMaxDecimalDigits));
-        // this flag keeps us from formatting trailing zeros.  It starts
-        // out false because we're pulling from the right, and switches
-        // to true the first time we encounter a non-zero digit
-        UBool doZeros = FALSE;
-        for (int32_t i = 0; i < kMaxDecimalDigits; i++) {
-            int64_t digit = numberToFormat % 10;
-            if (digit != 0 || doZeros) {
-                if (doZeros && useSpaces) {
-                    toInsertInto.insert(_pos + getPos(), gSpace);
-                }
-                doZeros = TRUE;
-                getRuleSet()->format(digit, toInsertInto, _pos + getPos());
-            }
-            numberToFormat /= 10;
-        }
+    // if we're in "byDigits" mode, transform the value into an integer
+    // by moving the decimal point eight places to the right and
+    // pulling digits off the right one at a time, formatting each digit
+    // as an integer using this substitution's owning rule set
+    // (this is slower, but more accurate, than doing it from the
+    // other end)
+  } else {
+    //          int32_t numberToFormat = (int32_t)uprv_round(transformNumber(number) * uprv_pow(10, kMaxDecimalDigits));
+    //          // this flag keeps us from formatting trailing zeros.  It starts
+    //          // out false because we're pulling from the right, and switches
+    //          // to true the first time we encounter a non-zero digit
+    //          UBool doZeros = FALSE;
+    //          for (int32_t i = 0; i < kMaxDecimalDigits; i++) {
+    //              int64_t digit = numberToFormat % 10;
+    //              if (digit != 0 || doZeros) {
+    //                  if (doZeros && useSpaces) {
+    //                      toInsertInto.insert(_pos + getPos(), gSpace);
+    //                  }
+    //                  doZeros = TRUE;
+    //                  getRuleSet()->format(digit, toInsertInto, _pos + getPos());
+    //              }
+    //              numberToFormat /= 10;
+    //          }
+
+    DigitList dl;
+    dl.set(number, 20, TRUE);
+    
+    UBool pad = false;
+    while (dl.fCount > (dl.fDecimalAt <= 0 ? 0 : dl.fDecimalAt)) {
+      if (pad && useSpaces) {
+        toInsertInto.insert(_pos + getPos(), gSpace);
+      } else {
+        pad = TRUE;
+      }
+      getRuleSet()->format((int64_t)(dl.fDigits[--dl.fCount] - gZero), toInsertInto, _pos + getPos());
    }
+    while (dl.fDecimalAt < 0) {
+      if (pad && useSpaces) {
+        toInsertInto.insert(_pos + getPos(), gSpace);
+      } else {
+        pad = TRUE;
+      }
+      getRuleSet()->format((int64_t)0, toInsertInto, _pos + getPos());
+      ++dl.fDecimalAt;
+    }
+
+    if (!pad) {
+      // hack around lack of precision in digitlist. if we would end up with
+      // "foo point" make sure we add a " zero" to the end.
+      getRuleSet()->format((int64_t)0, toInsertInto, _pos + getPos());
+    }
+  }
 }

 //-----------------------------------------------------------------------
@ -821,8 +852,9 @@ FractionalPartSubstitution::doParse(const UnicodeString& text,
        ParsePosition workPos(1);
        double result = 0;
        int32_t digit;
-        double p10 = 0.1;
+//          double p10 = 0.1;

+	DigitList dl;
        NumberFormat* fmt = NULL;
        while (workText.length() > 0 && workPos.getIndex() != 0) {
            workPos.setIndex(0);
@ -850,8 +882,9 @@ FractionalPartSubstitution::doParse(const UnicodeString& text,
            }

            if (workPos.getIndex() != 0) {
-                result += digit * p10;
-                p10 /= 10;
+		dl.append((char)('0' + digit));
+//                  result += digit * p10;
+//                  p10 /= 10;
                parsePosition.setIndex(parsePosition.getIndex() + workPos.getIndex());
                workText.removeBetween(0, workPos.getIndex());
                while (workText.length() > 0 && workText.charAt(0) == gSpace) {
@ -861,6 +894,7 @@ FractionalPartSubstitution::doParse(const UnicodeString& text,
            }
        }
        delete fmt;
+	result = dl.fCount == 0 ? 0 : dl.getDouble();

        result = composeRuleValue(result, baseValue);
        resVal.setDouble(result);
--- a/icu4c/source/test/intltest/itrbnf.cpp
+++ b/icu4c/source/test/intltest/itrbnf.cpp
@ -17,6 +17,7 @@
 #include "unicode/coleitr.h"
 #include "unicode/ures.h"
 #include "unicode/ustring.h"
+#include "unicode/decimfmt.h"
 //#include "llong.h"

 #include <string.h>
@ -56,6 +57,7 @@ void IntlTestRBNF::runIndexedTest(int32_t index, UBool exec, const char* &name,
        TESTCASE(10, TestFractionalRuleSet);
        TESTCASE(11, TestSwedishSpellout);
        TESTCASE(12, TestBelgianFrenchSpellout);
+        TESTCASE(13, TestSmallValues);
 #else
        TESTCASE(0, TestRBNFDisabled);
 #endif
@ -1422,6 +1424,56 @@ IntlTestRBNF::TestSwedishSpellout()
    delete formatter;
 }

+void
+IntlTestRBNF::TestSmallValues()
+{
+    UErrorCode status = U_ZERO_ERROR;
+    RuleBasedNumberFormat* formatter
+        = new RuleBasedNumberFormat(URBNF_SPELLOUT, Locale("en_US"), status);
+
+    if (U_FAILURE(status)) {
+        errln("FAIL: could not construct formatter");
+    } else {
+        static const char* testDataDefault[][2] = {
+	    { "0.001", "zero point zero zero one" },
+	    { "0.0001", "zero point zero zero zero one" },
+	    { "0.00001", "zero point zero zero zero zero one" },
+	    { "0.000001", "zero point zero zero zero zero zero one" },
+	    { "0.0000001", "zero point zero zero zero zero zero zero one" },
+	    { "0.00000001", "zero point zero zero zero zero zero zero zero one" },
+	    { "0.000000001", "zero point zero zero zero zero zero zero zero zero one" },
+	    { "0.0000000001", "zero point zero zero zero zero zero zero zero zero zero one" },
+	    { "0.00000000001", "zero point zero zero zero zero zero zero zero zero zero zero one" },
+	    { "0.000000000001", "zero point zero zero zero zero zero zero zero zero zero zero zero one" },
+	    { "0.0000000000001", "zero point zero zero zero zero zero zero zero zero zero zero zero zero one" },
+	    { "0.00000000000001", "zero point zero zero zero zero zero zero zero zero zero zero zero zero zero one" },
+	    { "0.000000000000001", "zero point zero zero zero zero zero zero zero zero zero zero zero zero zero zero one" },
+	    { "10,000,000.001", "ten million point zero zero one" },
+	    { "10,000,000.0001", "ten million point zero zero zero one" },
+	    { "10,000,000.00001", "ten million point zero zero zero zero one" },
+	    { "10,000,000.000001", "ten million point zero zero zero zero zero one" },
+	    { "10,000,000.0000001", "ten million point zero zero zero zero zero zero one" },
+//        { "10,000,000.00000001", "ten million point zero zero zero zero zero zero zero one" },
+//        { "10,000,000.000000002", "ten million point zero zero zero zero zero zero zero zero two" },
+	    { "10,000,000", "ten million" },
+//        { "1,234,567,890.0987654", "one billion, two hundred and thirty-four million, five hundred and sixty-seven thousand, eight hundred and ninety point zero nine eight seven six five four" },
+//        { "123,456,789.9876543", "one hundred and twenty-three million, four hundred and fifty-six thousand, seven hundred and eighty-nine point nine eight seven six five four three" },
+//        { "12,345,678.87654321", "twelve million, three hundred and forty-five thousand, six hundred and seventy-eight point eight seven six five four three two one" },
+	    { "1,234,567.7654321", "one million, two hundred and thirty-four thousand, five hundred and sixty-seven point seven six five four three two one" },
+	    { "123,456.654321", "one hundred and twenty-three thousand, four hundred and fifty-six point six five four three two one" },
+	    { "12,345.54321", "twelve thousand three hundred and forty-five point five four three two one" },
+	    { "1,234.4321", "one thousand two hundred and thirty-four point four three two one" },
+	    { "123.321", "one hundred and twenty-three point three two one" },
+	    { "0.0000000011754944", "zero point zero zero zero zero zero zero zero zero one one seven five four nine four four" },
+	    { "0.000001175494351", "zero point zero zero zero zero zero one one seven five four nine four three five one" },
+            { NULL, NULL }
+	};
+
+        doTest(formatter, testDataDefault, TRUE);
+
+	delete formatter;
+    }
+}

 void 
 IntlTestRBNF::doTest(RuleBasedNumberFormat* formatter, const char* testData[][2], UBool testParsing) 
@ -1429,7 +1481,8 @@ IntlTestRBNF::doTest(RuleBasedNumberFormat* formatter, const char* testData[][2]
  // man, error reporting would be easier with printf-style syntax for unicode string and formattable

    UErrorCode status = U_ZERO_ERROR;
-    NumberFormat* decFmt = NumberFormat::createInstance(Locale::getUS(), status);
+    // NumberFormat* decFmt = NumberFormat::createInstance(Locale::getUS(), status);
+    NumberFormat* decFmt = new DecimalFormat("#,###.################", status);
    if (U_FAILURE(status)) {
        errln("FAIL: could not create NumberFormat");
    } else {
@ -1437,6 +1490,7 @@ IntlTestRBNF::doTest(RuleBasedNumberFormat* formatter, const char* testData[][2]
            const char* numString = testData[i][0];
            const char* expectedWords = testData[i][1];

+            logln("%i: %s\n", i, numString);
            Formattable expectedNumber;
            decFmt->parse(numString, expectedNumber, status);
            if (U_FAILURE(status)) {
--- a/icu4c/source/test/intltest/itrbnf.h
+++ b/icu4c/source/test/intltest/itrbnf.h
@ -97,6 +97,11 @@ class IntlTestRBNF : public IntlTest {
   */
  virtual void TestSwedishSpellout();

+  /**
+   * Perform a simple spot check on small values
+   */
+  virtual void TestSmallValues();
+
 protected:
  virtual void doTest(RuleBasedNumberFormat* formatter, const char* testData[][2], UBool testParsing);
  virtual void doLenientParseTest(RuleBasedNumberFormat* formatter, const char* testData[][2]);