From a451009ae6b5d44e457e26aa5379e032d251ab29 Mon Sep 17 00:00:00 2001
From: Alan Liu <alansliu@gmail.com>
Date: Mon, 11 Jun 2001 17:21:28 +0000
Subject: [PATCH] ICU-986 reimplement ChoiceFormat without using nextDouble or
 previousDouble

X-SVN-Rev: 4932
---
 icu4c/source/i18n/choicfmt.cpp | 403 ++++++++++++++++++++++-----------
 1 file changed, 269 insertions(+), 134 deletions(-)

diff --git a/icu4c/source/i18n/choicfmt.cpp b/icu4c/source/i18n/choicfmt.cpp
index 9cb640c906..f03441f635 100644
--- a/icu4c/source/i18n/choicfmt.cpp
+++ b/icu4c/source/i18n/choicfmt.cpp
@@ -35,14 +35,39 @@
 
 char        ChoiceFormat::fgClassID = 0; // Value is irrelevant
 
+UMTX ChoiceFormat::fgMutex = NULL; // lock for fgNumberFormat
+
 NumberFormat* ChoiceFormat::fgNumberFormat = 0;
 
+inline double _getDouble(const Formattable& f) {
+    return (f.getType() == Formattable::kLong) ?
+        ((double) f.getLong()) : f.getDouble();
+}
+
+// Special characters used by ChoiceFormat.  There are two characters
+// used interchangeably to indicate <=.  Either is parsed, but only
+// LESS_EQUAL is generated by toPattern().
+#define SINGLE_QUOTE ((UChar)0x0027)   /*'*/
+#define LESS_THAN    ((UChar)0x003C)   /*<*/
+#define LESS_EQUAL   ((UChar)0x0023)   /*#*/
+#define LESS_EQUAL2  ((UChar32)0x2264)
+#define VERTICAL_BAR ((UChar)0x007C)   /*|*/
+#define MINUS        ((UChar)0x002D)   /*-*/
+#define INFINITY     ((UChar32)0x221E)
+
+static const UChar _posInf   = INFINITY;
+static const UChar _negInf[] = { MINUS, INFINITY };
+
+const UnicodeString ChoiceFormat::fgPositiveInfinity(FALSE, &_posInf, 1);
+const UnicodeString ChoiceFormat::fgNegativeInfinity(FALSE, _negInf, 2);
+
 // -------------------------------------
 // Creates a ChoiceFormat instance based on the pattern.
 
 ChoiceFormat::ChoiceFormat(const UnicodeString& newPattern,
                            UErrorCode& status)
 : fChoiceLimits(0),
+  fClosures(0),
   fChoiceFormats(0),
   fCount(0)
 {
@@ -57,17 +82,33 @@ ChoiceFormat::ChoiceFormat(const double* limits,
                            const UnicodeString* formats, 
                            int32_t cnt )
 : fChoiceLimits(0),
+  fClosures(0),
   fChoiceFormats(0),
   fCount(0)
 {
     setChoices(limits, formats, cnt );
 }
 
+// -------------------------------------
+
+ChoiceFormat::ChoiceFormat(const double* limits, 
+                           const UBool* closures,
+                           const UnicodeString* formats, 
+                           int32_t cnt )
+: fChoiceLimits(0),
+  fClosures(0),
+  fChoiceFormats(0),
+  fCount(0)
+{
+    setChoices(limits, closures, formats, cnt );
+}
+
 // -------------------------------------
 // copy constructor
 
 ChoiceFormat::ChoiceFormat(const    ChoiceFormat&   that) 
     : fChoiceLimits(0),
+      fClosures(0),
       fChoiceFormats(0)
 {
     *this = that;
@@ -87,6 +128,7 @@ ChoiceFormat::operator==(const Format& that) const
     // LE means less than and equal to, LT means less than.
     for (int32_t i = 0; i < fCount; i++) {
         if ((fChoiceLimits[i] != thatAlias.fChoiceLimits[i]) ||
+            (fClosures[i] != thatAlias.fClosures[i]) ||
             (fChoiceFormats[i] != thatAlias.fChoiceFormats[i]))
             return FALSE;
     }
@@ -103,11 +145,14 @@ ChoiceFormat::operator=(const   ChoiceFormat& that)
         NumberFormat::operator=(that);
         fCount = that.fCount;
         delete [] fChoiceLimits; fChoiceLimits = 0;
+        delete [] fClosures; fClosures = 0;
         delete [] fChoiceFormats; fChoiceFormats = 0;
         fChoiceLimits = new double[fCount];
+        fClosures = new UBool[fCount];
         fChoiceFormats = new UnicodeString[fCount];
 
         uprv_arrayCopy(that.fChoiceLimits, fChoiceLimits, fCount);
+        uprv_arrayCopy(that.fClosures, fClosures, fCount);
         uprv_arrayCopy(that.fChoiceFormats, fChoiceFormats, fCount);
     }
     return *this;
@@ -119,6 +164,8 @@ ChoiceFormat::~ChoiceFormat()
 {
     delete [] fChoiceLimits;
     fChoiceLimits = 0;
+    delete [] fClosures;
+    fClosures = 0;
     delete [] fChoiceFormats;
     fChoiceFormats = 0;
     fCount = 0;
@@ -134,7 +181,7 @@ ChoiceFormat::getNumberFormat(UErrorCode &status)
 
     if (fgNumberFormat != 0) // if there's something in the cache
     {
-        Mutex lock;
+        Mutex lock(&fgMutex);
 
         if (fgNumberFormat != 0) // Someone might have grabbed it.
         {
@@ -159,7 +206,7 @@ ChoiceFormat::releaseNumberFormat(NumberFormat *adopt)
 {
     if(fgNumberFormat == 0) // If the cache is empty we must add it back.
     {
-        Mutex lock;
+        Mutex lock(&fgMutex);
 
         if(fgNumberFormat == 0)
         {
@@ -190,16 +237,10 @@ ChoiceFormat::stod(const UnicodeString& string,
     myFormat->parse(string, result, status);
     releaseNumberFormat(myFormat);
     double value = 0.0;
-    if (U_SUCCESS(status))
-    {
-        Formattable::Type type = result.getType();
-        if (type == Formattable::kLong)
-        {
-            value = result.getLong();
-        }
-        else if (type == Formattable::kDouble)
-        {
-            value = result.getDouble();
+    if (U_SUCCESS(status)) {
+        value = _getDouble(result);
+        if (uprv_isNaN(value)) {
+            status = U_ILLEGAL_ARGUMENT_ERROR;
         }
     }
     return value;
@@ -230,96 +271,142 @@ ChoiceFormat::dtos(double value,
 // Applies the pattern to this ChoiceFormat instance.
 
 void
-ChoiceFormat::applyPattern(const UnicodeString& newPattern,
+ChoiceFormat::applyPattern(const UnicodeString& pattern,
                            UErrorCode& status)
 {
-    if (U_FAILURE(status))
+    if (U_FAILURE(status)) {
         return;
+    }
 
-    UnicodeString segments[2];
-    double newChoiceLimits[30];  // current limit
-    UnicodeString newChoiceFormats[30];   // later, use Vectors
-    int32_t count = 0;
-    int32_t part = 0;
-    double startValue = 0;
-    double oldStartValue = uprv_getNaN();
-    UBool inQuote = FALSE;
-    for(int i = 0; i < newPattern.length(); ++i) {
-        UChar ch = newPattern[i];
-        if(ch == 0x0027 /*'\''*/) {
-            // Check for "''" indicating a literal quote
-            if((i+1) < newPattern.length() && newPattern[i+1] == ch) {
-                segments[part] += ch;
-                ++i;
+    // Perform 2 passes.  The first computes the number of limits in
+    // this pattern (fCount), which is 1 more than the number of
+    // literal VERTICAL_BAR characters.
+    int32_t count = 1;
+    int32_t i;
+    for (i=0; i<pattern.length(); ++i) {
+        UChar c = pattern[i];
+        if (c == SINGLE_QUOTE) {
+            // Skip over the entire quote, including embedded
+            // contiguous pairs of SINGLE_QUOTE.
+            for (;;) {
+                do {
+                    ++i;
+                } while (i<pattern.length() &&
+                         pattern[i] != SINGLE_QUOTE);
+                if ((i+1)<pattern.length() &&
+                    pattern[i+1] == SINGLE_QUOTE) {
+                    // SINGLE_QUOTE pair; skip over it
+                    ++i;
+                } else {
+                    break;
+                }
             }
-            else 
-                inQuote = !inQuote;
-        }
-        else if (inQuote) {
-            segments[part] += ch;
-        }
-        else if (ch == 0x003C /*'<'*/ || ch == 0x0023 /*'#'*/ || ch == 0x2264) {
-            if (segments[0] == "") {
-                status = U_ILLEGAL_ARGUMENT_ERROR;
-                return;
-            }
-
-            UnicodeString tempBuffer = segments[0];
-            tempBuffer.trim();
-            UChar posInf = 0x221E;
-            UChar negInf [] = {0x002D /*'-'*/, posInf };
-            if (tempBuffer == UnicodeString(&posInf, 1, 1)) {
-                startValue = uprv_getInfinity();
-            }
-            else if (tempBuffer == UnicodeString(negInf, 2, 2)) {
-                startValue = - uprv_getInfinity();
-            }
-            else {
-                //segments[0].trim();
-                startValue = stod(tempBuffer, status);
-                if(U_FAILURE(status))
-                    return;
-            }
-
-            if (ch == 0x003C /*'<'*/ && ! uprv_isInfinite(startValue)) {
-                startValue = nextDouble(startValue);
-            }
-            // {sfb} There is a bug in MSVC 5.0 sp3 -- 0.0 <= NaN ==> TRUE
-            //if (startValue <= oldStartValue) {
-            if (startValue <= oldStartValue && ! uprv_isNaN(oldStartValue)) {
-                status = U_ILLEGAL_ARGUMENT_ERROR;
-                return;
-            }
-            segments[0].remove();
-            part = 1;
-        } else if (ch == 0x007C /*'|'*/) {
-            newChoiceLimits[count] = startValue;
-            newChoiceFormats[count] = segments[1];
+        } else if (c == VERTICAL_BAR) {
             ++count;
-            oldStartValue = startValue;
-            segments[1].remove();
-            part = 0;
-        } else {
-            segments[part] += ch;
         }
     }
-    // clean up last one
-    if (part == 1) {
-        newChoiceLimits[count] = startValue;
-        newChoiceFormats[count] = segments[1];
-        ++count;
+
+    // Allocate the required storage.
+    double *newLimits = new double[count];
+    UBool *newClosures = new UBool[count];
+    UnicodeString *newFormats = new UnicodeString[count];
+
+    // Perform the second pass
+    int32_t k = 0; // index into newXxx[] arrays
+    UnicodeString buf; // scratch buffer
+    UBool inQuote = FALSE;
+    UBool inNumber = TRUE; // TRUE before < or #, FALSE after
+    for (i=0; i<pattern.length(); ++i) {
+        UChar c = pattern[i];
+        if (c == SINGLE_QUOTE) {
+            // Check for SINGLE_QUOTE pair indicating a literal quote
+            if ((i+1) < pattern.length() &&
+                pattern[i+1] == SINGLE_QUOTE) {
+                buf += SINGLE_QUOTE;
+                ++i;
+            } else {
+                inQuote = !inQuote;
+            }
+        } else if (inQuote) {
+            buf += c;
+        } else if (c == LESS_THAN || c == LESS_EQUAL || c == LESS_EQUAL2) {
+            if (!inNumber || buf.length() == 0) {
+                goto error;
+            }
+            inNumber = FALSE;
+
+            double limit;
+            buf.trim();
+            if (buf == fgPositiveInfinity) {
+                limit = uprv_getInfinity();
+            } else if (buf == fgNegativeInfinity) {
+                limit = -uprv_getInfinity();
+            } else {
+                limit = stod(buf, status);
+                if (U_FAILURE(status)) {
+                    goto error;
+                }
+            }
+
+            if (k == count) {
+                // This shouldn't happen.  If it does, it means that
+                // the count determined in the first pass did not
+                // match the number of elements found in the second
+                // pass.
+                goto error;                
+            }
+            newLimits[k] = limit;
+            newClosures[k] = (c == LESS_THAN);
+
+            if (k > 0 && limit <= newLimits[k-1]) {
+                // Each limit must be strictly > than the previous
+                // limit.  One exception: Two subsequent limits may be
+                // == if the first closure is FALSE and the second
+                // closure is TRUE.  This places the limit value in
+                // the second interval.
+                if (!(limit == newLimits[k-1] &&
+                      !newClosures[k-1] &&
+                      newClosures[k])) {
+                    goto error;
+                }
+            }
+
+            buf.truncate(0);
+        } else if (c == VERTICAL_BAR) {
+            if (inNumber) {
+                goto error;                
+            }
+            inNumber = TRUE;
+
+            newFormats[k] = buf;
+            ++k;
+            buf.truncate(0);
+        } else {
+            buf += c;
+        }        
     }
 
+    if (k != (count-1) || inNumber || inQuote) {
+        goto error;                
+    }
+    newFormats[k] = buf;
 
-    delete [] fChoiceLimits; fChoiceLimits = 0;
-    delete [] fChoiceFormats; fChoiceFormats = 0;
-
+    // Don't modify this object until the parse succeeds
+    delete[] fChoiceLimits;
+    delete[] fClosures;
+    delete[] fChoiceFormats;
     fCount = count;
-    fChoiceLimits    = new double[fCount];
-    fChoiceFormats    = new UnicodeString[fCount];
+    fChoiceLimits  = newLimits;
+    fClosures      = newClosures;
+    fChoiceFormats = newFormats;
+    return;
 
-    uprv_arrayCopy(newChoiceLimits, fChoiceLimits, fCount);
-    uprv_arrayCopy(newChoiceFormats, fChoiceFormats, fCount);
+ error:
+    status = U_ILLEGAL_ARGUMENT_ERROR;
+    delete[] newLimits;
+    delete[] newClosures;
+    delete[] newFormats;
+    return;
 }
 
 // -------------------------------------
@@ -331,55 +418,45 @@ ChoiceFormat::toPattern(UnicodeString& result) const
     result.remove();
     for (int32_t i = 0; i < fCount; ++i) {
         if (i != 0) {
-            result += (UChar)0x007C /*'|'*/;
+            result += VERTICAL_BAR;
         }
-        // choose based upon which has less precision
-        // approximate that by choosing the closest one to an integer.
-        // could do better, but it's not worth it.
-        double less = previousDouble(fChoiceLimits[i]);
-        double tryLessOrEqual = uprv_fabs(uprv_IEEEremainder(fChoiceLimits[i], 1.0));
-        double tryLess = uprv_fabs(uprv_IEEEremainder(less, 1.0));
-
         UErrorCode status = U_ZERO_ERROR;
         UnicodeString buf;
-        // {sfb} hack to get this to work on MSVC - NaN doesn't behave as it should
-        if (tryLessOrEqual < tryLess && 
-            ! (uprv_isNaN(tryLessOrEqual) || uprv_isNaN(tryLess))) {
+        if (uprv_isPositiveInfinity(fChoiceLimits[i])) {
+            result += INFINITY;
+        } else if (uprv_isNegativeInfinity(fChoiceLimits[i])) {
+            result += MINUS;
+            result += INFINITY;
+        } else {
             result += dtos(fChoiceLimits[i], buf, status);
-            result += (UChar)0x0023 /*'#'*/;
         }
-        else {
-            if (uprv_isPositiveInfinity(fChoiceLimits[i])) {
-                result += (UChar32)0x221E;
-            } else if (uprv_isNegativeInfinity(fChoiceLimits[i])) {
-                result += (UChar)0x002D /*'-'*/;
-                result += (UChar32)0x221E;
-            } else {
-                result += dtos(less, buf, status);
-            }
-            result += (UChar)0x003C /*'<'*/;
+        result += fClosures[i] ? LESS_THAN : LESS_EQUAL;
+        // Append fChoiceFormats[i], using quotes if there are special
+        // characters.  Single quotes themselves must be escaped in
+        // either case.
+        const UnicodeString& text = fChoiceFormats[i];
+        UBool needQuote = text.indexOf(LESS_THAN) >= 0
+            || text.indexOf(LESS_EQUAL) >= 0
+            || text.indexOf(LESS_EQUAL2) >= 0
+            || text.indexOf(VERTICAL_BAR) >= 0;
+        if (needQuote) {
+            result += SINGLE_QUOTE;
         }
-        // Append fChoiceFormats[i], using quotes if there are special characters.
-        // Single quotes themselves must be escaped in either case.
-        UnicodeString text = fChoiceFormats[i];
-        UBool needQuote = text.indexOf((UChar)0x003C /*'<'*/) >= 0
-            || text.indexOf((UChar)0x0023 /*'#'*/) >= 0
-            || text.indexOf((UChar32)0x2264) >= 0
-            || text.indexOf((UChar)0x007C /*'|'*/) >= 0;
-        if (needQuote) 
-            result += (UChar)0x0027 /*'\''*/;
-        if (text.indexOf((UChar)0x0027 /*'\''*/) < 0) 
+        if (text.indexOf(SINGLE_QUOTE) < 0) {
             result += text;
+        }
         else {
-            for (int j = 0; j < text.length(); ++j) {
+            for (int32_t j = 0; j < text.length(); ++j) {
                 UChar c = text[j];
                 result += c;
-                if (c == 0x0027 /*'\''*/) 
+                if (c == SINGLE_QUOTE) {
                     result += c;
+                }
             }
         }
-        if (needQuote) 
-            result += (UChar)0x0027 /*'\''*/;
+        if (needQuote) {
+            result += SINGLE_QUOTE;
+        }
     }
 
     return result;
@@ -392,17 +469,37 @@ void
 ChoiceFormat::adoptChoices(double *limits, 
                            UnicodeString *formats, 
                            int32_t cnt )
+{
+    adoptChoices(limits, 0, formats, cnt);
+}
+
+// -------------------------------------
+// Adopts the limit and format arrays.
+
+void
+ChoiceFormat::adoptChoices(double *limits, 
+                           UBool *closures,
+                           UnicodeString *formats, 
+                           int32_t cnt )
 {
     if(limits == 0 || formats == 0)
         return;
 
     delete [] fChoiceLimits;
-    fChoiceLimits = 0;
+    delete [] fClosures;
     delete [] fChoiceFormats;
-    fChoiceFormats = 0;
     fChoiceLimits = limits;
+    fClosures = closures;
     fChoiceFormats = formats;
     fCount = cnt;
+
+    if (fClosures == 0) {
+        fClosures = new UBool[fCount];
+        int32_t i;
+        for (i=0; i<fCount; ++i) {
+            fClosures[i] = FALSE;
+        }
+    }
 }
 
 // -------------------------------------
@@ -411,21 +508,43 @@ void
 ChoiceFormat::setChoices(  const double* limits, 
                            const UnicodeString* formats, 
                            int32_t cnt )
+{
+    setChoices(limits, 0, formats, cnt);
+}
+
+// -------------------------------------
+// Sets the limit and format arrays. 
+void
+ChoiceFormat::setChoices(  const double* limits, 
+                           const UBool* closures,
+                           const UnicodeString* formats, 
+                           int32_t cnt )
 {
     if(limits == 0 || formats == 0)
         return;
 
-    delete [] fChoiceLimits; fChoiceLimits = 0;
-    delete [] fChoiceFormats; fChoiceFormats = 0;
+    delete [] fChoiceLimits;
+    delete [] fClosures;
+    delete [] fChoiceFormats;
 
     // Note that the old arrays are deleted and this owns
     // the created array.
     fCount = cnt;
     fChoiceLimits = new double[fCount];
+    fClosures = new UBool[fCount];
     fChoiceFormats = new UnicodeString[fCount];
 
     uprv_arrayCopy(limits, fChoiceLimits, fCount);
     uprv_arrayCopy(formats, fChoiceFormats, fCount);
+
+    if (closures != 0) {
+        uprv_arrayCopy(closures, fClosures, fCount);
+    } else {
+        int32_t i;
+        for (i=0; i<fCount; ++i) {
+            fClosures[i] = FALSE;
+        }
+    }
 }
 
 // -------------------------------------
@@ -438,6 +557,16 @@ ChoiceFormat::getLimits(int32_t& cnt) const
     return fChoiceLimits;
 }
 
+// -------------------------------------
+// Gets the closures array.
+
+const UBool*
+ChoiceFormat::getClosures(int32_t& cnt) const 
+{
+    cnt = fCount;
+    return fClosures;
+}
+
 // -------------------------------------
 // Gets the format array.
 
@@ -472,16 +601,23 @@ ChoiceFormat::format(double number,
     // find the number
     int32_t i;
     for (i = 0; i < fCount; ++i) {
-        if (!(number >= fChoiceLimits[i])) {
+        if (fClosures[i]) {
+            if (!(number > fChoiceLimits[i])) {
+                // same as number <= fChoiceLimits, except catches NaN
+                break;
+            }
+        } else if (!(number >= fChoiceLimits[i])) {
             // same as number < fChoiceLimits, except catches NaN
             break;
         }
     }
     --i;
-    if (i < 0) 
+    if (i < 0) {
         i = 0;
+    }
     // return either a formatted number, or a string
-    return (toAppendTo += fChoiceFormats[i]);
+    toAppendTo += fChoiceFormats[i];
+    return toAppendTo;
 }
 
 // -------------------------------------
@@ -503,8 +639,7 @@ ChoiceFormat::format(const Formattable* objs,
     UnicodeString buffer;
     for (int32_t i = 0; i < cnt; i++) {
         buffer.remove();
-        toAppendTo += format((objs[i].getType() == Formattable::kLong) ? objs[i].getLong() : objs[i].getDouble(), 
-                             buffer, pos);
+        toAppendTo += format(_getDouble(objs[i]), buffer, pos);
     }
 
     return toAppendTo;