scuffed-code/icu4c/source/i18n/rbnf.cpp

/*
*******************************************************************************
* Copyright (C) 1997-2001, International Business Machines Corporation and others. All Rights Reserved.
*******************************************************************************
*/

#include "unicode/rbnf.h"

#include "nfrs.h"

#include "cmemory.h"
#include "cstring.h"
#include "unicode/normlzr.h"
#include "unicode/tblcoll.h"
#include "unicode/uchar.h"
#include "unicode/ucol.h"
#include "unicode/uloc.h"
#include "unicode/unum.h"
#include "unicode/ures.h"
#include "unicode/ustring.h"
#include "unicode/utf16.h"

#include <stdio.h>

static const UChar gPercentPercent[] =
{
    0x25, 0x25, 0
}; /* "%%" */

// All urbnf objects are created through openRules, so we init all of the
// Unicode string constants required by rbnf, nfrs, or nfr here.
static const UChar gLenientParse[] =
{
    0x25, 0x25, 0x6C, 0x65, 0x6E, 0x69, 0x65, 0x6E, 0x74, 0x2D, 0x70, 0x61, 0x72, 0x73, 0x65, 0x3A, 0
}; /* "%%lenient-parse:" */
static const UChar gSemiColon = 0x003B;
static const UChar gSemiPercent[] =
{
    0x3B, 0x25, 0
}; /* ";%" */

#define kSomeNumberOfBitsDiv2 22
#define kHalfMaxDouble (double)(1 << kSomeNumberOfBitsDiv2)
#define kMaxDouble (kHalfMaxDouble * kHalfMaxDouble)

const char RuleBasedNumberFormat::fgClassID = 0;

RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, const Locale& alocale, UParseError& perror, UErrorCode& status)
  : ruleSets(NULL)
  , defaultRuleSet(NULL)
  , locale(alocale)
  , collator(NULL)
  , decimalFormatSymbols(NULL)
  , lenient(FALSE)
  , lenientParseRules(NULL)
{
  init(description, perror, status);
}

RuleBasedNumberFormat::RuleBasedNumberFormat(URBNFRuleSetTag tag, const Locale& alocale, UErrorCode& status)
  : ruleSets(NULL)
  , defaultRuleSet(NULL)
  , locale(alocale)
  , collator(NULL)
  , decimalFormatSymbols(NULL)
  , lenient(FALSE)
  , lenientParseRules(NULL)
{
  if (U_FAILURE(status)) {
    return;
  }

  const char* fmt_tag = "";
  switch (tag) {
  case URBNF_SPELLOUT: fmt_tag = "SpelloutRules"; break;
  case URBNF_ORDINAL: fmt_tag = "OrdinalRules"; break;
  case URBNF_DURATION: fmt_tag = "DurationRules"; break;
  default: status = U_ILLEGAL_ARGUMENT_ERROR; return;
  }

  UResourceBundle* nfrb = ures_open(NULL, locale.getName(), &status);
  int32_t len = 0;
  const UChar* description = ures_getStringByKey(nfrb, fmt_tag, &len, &status);
  if (U_SUCCESS(status)) {
	UnicodeString desc(description, len);
	UParseError perror;
	init (desc, perror, status);
  }
  ures_close(nfrb);
}

RuleBasedNumberFormat::RuleBasedNumberFormat(const RuleBasedNumberFormat& rhs)
  : ruleSets(NULL)
  , defaultRuleSet(NULL)
  , locale(rhs.locale)
  , collator(NULL)
  , decimalFormatSymbols(NULL)
  , lenient(FALSE)
  , lenientParseRules(NULL)
{
  this->operator=(rhs);
}

RuleBasedNumberFormat&
RuleBasedNumberFormat::operator=(const RuleBasedNumberFormat& rhs)
{
  UErrorCode status = U_ZERO_ERROR;
  dispose();
  locale = rhs.locale;
  UnicodeString rules = rhs.getRules();
  UParseError perror;
  init(rules, perror, status);
  lenient = rhs.lenient;
  return *this;
}

RuleBasedNumberFormat::~RuleBasedNumberFormat()
{
  dispose();
}

Format*
RuleBasedNumberFormat::clone(void) const
{
  RuleBasedNumberFormat * result = NULL;
  UnicodeString rules = getRules();
  int32_t len = rules.length();
  UChar* rulestring = new UChar[len+1];
  if (rulestring) {
    rules.extract(0, len, rulestring);
    UErrorCode status = U_ZERO_ERROR;
	UParseError perror;
    result = new RuleBasedNumberFormat(rulestring, locale, perror, status);
    if (U_FAILURE(status)) {
      delete result;
      result = NULL;
    } else {
      result->lenient = lenient;
    }
    delete[] rulestring;
  }
  return result;
}

UBool
RuleBasedNumberFormat::operator==(const Format& other) const
{
  if (this == &other) {
    return TRUE;
  }

  if (other.getDynamicClassID() == getStaticClassID()) {
    const RuleBasedNumberFormat& rhs = (const RuleBasedNumberFormat&)other;
    if (locale == rhs.locale &&
        lenient == rhs.lenient) {
      NFRuleSet** p = ruleSets;
      NFRuleSet** q = rhs.ruleSets;
      while (*p && *q && (**p == **q)) {
        ++p;
        ++q;
      }
      return *q == NULL && *p == NULL;
    }
  }

  return FALSE;
}

UnicodeString
RuleBasedNumberFormat::getRules() const
{
  UnicodeString result;
  for (NFRuleSet** p = ruleSets; *p; ++p) {
    (*p)->appendRules(result);
  }
  return result;
}

UnicodeString
RuleBasedNumberFormat::getRuleSetName(int32_t index) const
{
  UnicodeString result;
  for (NFRuleSet** p = ruleSets; *p; ++p) {
    NFRuleSet* rs = *p;
    if (rs->isPublic()) {
      if (--index == -1) {
        rs->getName(result);
        return result;
      }
    }
  }
  return *(UnicodeString*)NULL;
}

int32_t
RuleBasedNumberFormat::getNumberOfRuleSetNames() const
{
	int32_t result = 0;
	for (NFRuleSet** p = ruleSets; *p; ++p) {
		if ((**p).isPublic()) {
				++result;
		}
	}
	return result;
}

NFRuleSet*
RuleBasedNumberFormat::findRuleSet(const UnicodeString& name, UErrorCode& status) const
{
  if (U_SUCCESS(status)) {
    for (NFRuleSet** p = ruleSets; *p; ++p) {
      NFRuleSet* rs = *p;
      if (rs->isNamed(name)) {
        return rs;
      }
    }
    status = U_ILLEGAL_ARGUMENT_ERROR;
  }
  return NULL;
}

UnicodeString&
RuleBasedNumberFormat::format(int32_t number,
                              UnicodeString& toAppendTo,
                              FieldPosition& pos) const
{
  defaultRuleSet->format(llong(number), toAppendTo, toAppendTo.length());
  return toAppendTo;
}

#if 0
UnicodeString&
RuleBasedNumberFormat::format(llong number,
                              UnicodeString& toAppendTo,
                              FieldPosition& pos) const
{
  defaultRuleSet->format(number, toAppendTo, toAppendTo.length());
  return toAppendTo;
}
#endif

UnicodeString&
RuleBasedNumberFormat::format(double number,
                              UnicodeString& toAppendTo,
                              FieldPosition& pos) const
{
  defaultRuleSet->format(number, toAppendTo, toAppendTo.length());
  return toAppendTo;
}


UnicodeString&
RuleBasedNumberFormat::format(int32_t number,
                              const UnicodeString& ruleSetName,
                              UnicodeString& toAppendTo,
                              FieldPosition& pos,
                              UErrorCode& status) const
{
  //	return format(llong(number), ruleSetName, toAppendTo, pos, status);
  if (U_SUCCESS(status)) {
	  if (ruleSetName.indexOf(gPercentPercent) == 0) {
        // throw new IllegalArgumentException("Can't use internal rule set");
		status = U_ILLEGAL_ARGUMENT_ERROR;
	  } else {
        NFRuleSet *rs = findRuleSet(ruleSetName, status);
        if (rs) {
          rs->format(llong(number), toAppendTo, toAppendTo.length());
		}
	  }
  }
  return toAppendTo;

}

#if 0
UnicodeString&
RuleBasedNumberFormat::format(llong number,
                              const UnicodeString& ruleSetName,
                              UnicodeString& toAppendTo,
                              FieldPosition& pos,
                              UErrorCode& status) const
{
  if (U_SUCCESS(status)) {
	  if (ruleSetName.indexOf(gPercentPercent) == 0) {
        // throw new IllegalArgumentException("Can't use internal rule set");
		status = U_ILLEGAL_ARGUMENT_ERROR;
	  } else {
        NFRuleSet *rs = findRuleSet(ruleSetName, status);
        if (rs) {
          rs->format(number, toAppendTo, toAppendTo.length());
		}
	  }
  }
  return toAppendTo;
}
#endif

// make linker happy
UnicodeString&
RuleBasedNumberFormat::format(const Formattable& obj,
                              UnicodeString& toAppendTo,
                              FieldPosition& pos,
                              UErrorCode& status) const
{
	return NumberFormat::format(obj, toAppendTo, pos, status);
}

UnicodeString&
RuleBasedNumberFormat::format(double number,
                              const UnicodeString& ruleSetName,
                              UnicodeString& toAppendTo,
                              FieldPosition& pos,
                              UErrorCode& status) const
{
  if (U_SUCCESS(status)) {
	  if (ruleSetName.indexOf(gPercentPercent) == 0) {
        // throw new IllegalArgumentException("Can't use internal rule set");
		status = U_ILLEGAL_ARGUMENT_ERROR;
	  } else {
        NFRuleSet *rs = findRuleSet(ruleSetName, status);
        if (rs) {
          rs->format(number, toAppendTo, toAppendTo.length());
		}
	  }
  }
  return toAppendTo;
}

void
RuleBasedNumberFormat::parse(const UnicodeString& text,
			     Formattable& result,
			     ParsePosition& parsePosition) const
{
  ParsePosition high_pp;
  Formattable high_result;

  for (NFRuleSet** p = ruleSets; *p; ++p) {
    NFRuleSet *rp = *p;
    if (rp->isPublic()) {
      ParsePosition working_pp = parsePosition;
      Formattable working_result;

      rp->parse(text, working_pp, kMaxDouble, working_result);
      if (working_pp.getIndex() > high_pp.getIndex()) {
        high_pp = working_pp;
        high_result = working_result;

        if (high_pp.getIndex() == text.length()) {
          break;
        }
      }
    }
  }

  parsePosition = high_pp;
  result = high_result;
  if (result.getType() == Formattable::kDouble) {
	int32_t r = (int32_t)result.getDouble();
	if ((double)r == result.getDouble()) {
		result.setLong(r);
	}
  }
}

void
RuleBasedNumberFormat::setLenient(UBool enabled)
{
  lenient = enabled;
  if (!enabled && collator) {
    delete collator;
    collator = NULL;
  }
}

void
RuleBasedNumberFormat::init(const UnicodeString& rules, UParseError& perror, UErrorCode& status)
{
	// TODO: implement perror
  if (U_FAILURE(status)) {
    return;
  }

  UnicodeString description(rules);
  if (!description.length()) {
    status = U_MEMORY_ALLOCATION_ERROR;
    return;
  }

  // start by stripping the trailing whitespace from all the rules
  // (this is all the whitespace follwing each semicolon in the
  // description).  This allows us to look for rule-set boundaries
  // by searching for ";%" without having to worry about whitespace
  // between the ; and the %
  stripWhitespace(description);

  // check to see if there's a set of lenient-parse rules.  If there
  // is, pull them out into our temporary holding place for them,
  // and delete them from the description before the real desciption-
  // parsing code sees them
  UTextOffset lp = description.indexOf(gLenientParse);
  if (lp != -1) {
    // we've got to make sure we're not in the middle of a rule
    // (where "%%lenient-parse" would actually get treated as
    // rule text)
    if (lp == 0 || description.charAt(lp - 1) == gSemiColon) {
      // locate the beginning and end of the actual collation
      // rules (there may be whitespace between the name and
      // the first token in the description)
      int lpEnd = description.indexOf(gSemiPercent, lp);

      if (lpEnd == -1) {
        lpEnd = description.length() - 1;
      }
      int lpStart = lp + u_strlen(gLenientParse);
      while (u_isWhitespace(description.charAt(lpStart))) {
        ++lpStart;
      }

      // copy out the lenient-parse rules and delete them
      // from the description
      lenientParseRules = new UnicodeString();
      lenientParseRules->setTo(description, lpStart, lpEnd - lpStart);

      description.remove(lp, lpEnd + 1 - lp);
    }
  }

  // pre-flight parsing the description and count the number of
  // rule sets (";%" marks the end of one rule set and the beginning
  // of the next)
  int numRuleSets = 0;
  for (UTextOffset p = description.indexOf(gSemiPercent); p != -1; p = description.indexOf(gSemiPercent, p)) {
    ++numRuleSets;
    ++p;
  }
  ++numRuleSets;

  // our rule list is an array of the appropriate size
  ruleSets = new NFRuleSet*[numRuleSets + 1];
  for (int i = 0; i <= numRuleSets; ++i) {
    ruleSets[i] = NULL;
  }

  // divide up the descriptions into individual rule-set descriptions
  // and store them in a temporary array.  At each step, we also
  // new up a rule set, but all this does is initialize its name
  // and remove it from its description.  We can't actually parse
  // the rest of the descriptions and finish initializing everything
  // because we have to know the names and locations of all the rule
  // sets before we can actually set everything up
  UnicodeString* ruleSetDescriptions = new UnicodeString[numRuleSets];

  {
    int curRuleSet = 0;
    UTextOffset start = 0;
    for (UTextOffset p = description.indexOf(gSemiPercent); p != -1; p = description.indexOf(gSemiPercent, start)) {
      ruleSetDescriptions[curRuleSet].setTo(description, start, p + 1 - start);
      ruleSets[curRuleSet] = new NFRuleSet(ruleSetDescriptions, curRuleSet, status);
      ++curRuleSet;
      start = p + 1;
    }
    ruleSetDescriptions[curRuleSet].setTo(description, start, description.length() - start);
    ruleSets[curRuleSet] = new NFRuleSet(ruleSetDescriptions, curRuleSet, status);
  }

  // now we can take note of the formatter's default rule set, which
  // is the last public rule set in the description (it's the last
  // rather than the first so that a user can create a new formatter
  // from an existing formatter and change its default behavior just
  // by appending more rule sets to the end)
  // setDefaultRuleSet
  {
    defaultRuleSet = ruleSets[numRuleSets - 1];
    if (!defaultRuleSet->isPublic()) {
      for (int i = numRuleSets - 2; i >= 0; --i) {
        if (ruleSets[i]->isPublic()) {
          defaultRuleSet = ruleSets[i];
          break;
        }
      }
    }
  }

  // finally, we can go back through the temporary descriptions
  // list and finish seting up the substructure (and we throw
  // away the temporary descriptions as we go)
  {
    for (int i = 0; i < numRuleSets; i++) {
      ruleSets[i]->parseRules(ruleSetDescriptions[i], this, status);
    }
  }

  delete[] ruleSetDescriptions;
}

void
RuleBasedNumberFormat::stripWhitespace(UnicodeString& description)
{
  // iterate through the characters...
  UnicodeString result;

  int start = 0;
  while (start != -1 && start < description.length()) {
    // seek to the first non-whitespace character...
    while (start < description.length()
           && u_isWhitespace(description.charAt(start))) {
      ++start;
    }

    // locate the next semicolon in the text and copy the text from
    // our current position up to that semicolon into the result
    UTextOffset p = description.indexOf(gSemiColon, start);
    if (p == -1) {
      // or if we don't find a semicolon, just copy the rest of
      // the string into the result
      result.append(description, start, description.length() - start);
      start = -1;
    }
    else if (p < description.length()) {
      result.append(description, start, p + 1 - start);
      start = p + 1;
    }

    // when we get here, we've seeked off the end of the sring, and
    // we terminate the loop (we continue until *start* is -1 rather
    // than until *p* is -1, because otherwise we'd miss the last
    // rule in the description)
    else {
      start = -1;
    }
  }

  description.setTo(result);
}


void
RuleBasedNumberFormat::dispose()
{
  if (ruleSets) {
    for (NFRuleSet** p = ruleSets; *p; ++p) {
      delete *p;
    }
    delete[] ruleSets;
    ruleSets = NULL;
  }

  delete collator;

  delete decimalFormatSymbols;

  delete lenientParseRules;
}


//-----------------------------------------------------------------------
// package-internal API
//-----------------------------------------------------------------------

/**
 * Returns the collator to use for lenient parsing.  The collator is lazily created:
 * this function creates it the first time it's called.
 * @return The collator to use for lenient parsing, or null if lenient parsing
 * is turned off.
*/
Collator*
RuleBasedNumberFormat::getCollator() const
{
    // lazy-evaulate the collator
    if (collator == NULL && lenient) {
            // create a default collator based on the formatter's locale,
            // then pull out that collator's rules, append any additional
            // rules specified in the description, and create a _new_
            // collator based on the combinaiton of those rules

			UErrorCode status = U_ZERO_ERROR;

            Collator* temp = Collator::createInstance(locale, status);
			if (U_SUCCESS(status) &&
				temp->getDynamicClassID() == RuleBasedCollator::getStaticClassID()) {

				RuleBasedCollator* newCollator = (RuleBasedCollator*)temp;
				if (lenientParseRules) {
					UnicodeString rules(newCollator->getRules());
					rules.append(*lenientParseRules);

					newCollator = new RuleBasedCollator(rules, status);
				} else {
					temp = NULL;
				}
				if (U_SUCCESS(status)) {
					newCollator->setDecomposition(Normalizer::DECOMP);
					// cast away const
					((RuleBasedNumberFormat*)this)->collator = newCollator;
				} else {
					delete newCollator;
				}
			}
			delete temp;
     }

    // if lenient-parse mode is off, this will be null
    // (see setLenientParseMode())
    return collator;
}


/**
 * Returns the DecimalFormatSymbols object that should be used by all DecimalFormat
 * instances owned by this formatter.  This object is lazily created: this function
 * creates it the first time it's called.
 * @return The DecimalFormatSymbols object that should be used by all DecimalFormat
 * instances owned by this formatter.
*/
DecimalFormatSymbols*
RuleBasedNumberFormat::getDecimalFormatSymbols() const
{
    // lazy-evaluate the DecimalFormatSymbols object.  This object
    // is shared by all DecimalFormat instances belonging to this
    // formatter
    if (decimalFormatSymbols == NULL) {
		UErrorCode status = U_ZERO_ERROR;
		DecimalFormatSymbols* temp = new DecimalFormatSymbols(locale, status);
		if (U_SUCCESS(status)) {
			((RuleBasedNumberFormat*)this)->decimalFormatSymbols = temp;
		} else {
			delete temp;
		}
    }
    return decimalFormatSymbols;
}