scuffed-code/icu4c/source/i18n/tblcoll.cpp

/*
*******************************************************************************
* Copyright (C) 1996-1999, International Business Machines Corporation and    *
* others. All Rights Reserved.                                                *
*******************************************************************************
*
* File tblcoll.cpp
*
* Created by: Helena Shih
*
* Modification History:
*
*  Date        Name        Description
*  2/5/97      aliu        Added streamIn and streamOut methods.  Added
*                          constructor which reads RuleBasedCollator object from
*                          a binary file.  Added writeToFile method which streams
*                          RuleBasedCollator out to a binary file.  The streamIn
*                          and streamOut methods use istream and ostream objects
*                          in binary mode.
*  2/11/97     aliu        Moved declarations out of for loop initializer.
*                          Added Mac compatibility #ifdef for ios::nocreate.
*  2/12/97     aliu        Modified to use TableCollationData sub-object to
*                          hold invariant data.
*  2/13/97     aliu        Moved several methods into this class from Collation.
*                          Added a private RuleBasedCollator(Locale&) constructor,
*                          to be used by Collator::getInstance().  General
*                          clean up.  Made use of UErrorCode variables consistent.
*  2/20/97     helena      Added clone, operator==, operator!=, operator=, and copy
*                          constructor and getDynamicClassID.
*  3/5/97      aliu        Changed compaction cycle to improve performance.  We
*                          use the maximum allowable value which is kBlockCount.
*                          Modified getRules() to load rules dynamically.  Changed
*                          constructFromFile() call to accomodate this (added
*                          parameter to specify whether binary loading is to
*                          take place).
* 05/06/97     helena      Added memory allocation error check.
*  6/20/97     helena      Java class name change.
*  6/23/97     helena      Adding comments to make code more readable.
* 09/03/97     helena      Added createCollationKeyValues().
* 06/26/98     erm         Changes for CollationKeys using byte arrays.
* 08/10/98     erm         Synched with 1.2 version of RuleBasedCollator.java
* 04/23/99     stephen     Removed EDecompositionMode, merged with
*                          Normalizer::EMode
* 06/14/99     stephen     Removed kResourceBundleSuffix
* 06/22/99     stephen     Fixed logic in constructFromFile() since .ctx
*                          files are no longer used.
* 11/02/99     helena      Collator performance enhancements.  Special case
*                          for NO_OP situations.
* 11/17/99     srl         More performance enhancements. Inlined some internal functions.
* 12/15/99     aliu        Update to support Thai collation.  Move NormalizerIterator
*                          to implementation file.
*******************************************************************************
*/

#include "ucmp32.h"
#include "tcoldata.h"

#include "ucolimp.h"

#include "unicode/tblcoll.h"

#include "unicode/coleitr.h"
#include "unicode/locid.h"
#include "unicode/unicode.h"
#include "tables.h"
#include "unicode/normlzr.h"
#include "mergecol.h"
#include "unicode/resbund.h"
#include "filestrm.h"
#include "umemstrm.h"
#include "umutex.h"
#include "cmemory.h"

#ifdef _DEBUG
#include "unistrm.h"
#endif

#include "compitr.h"

#include <string.h>

#include "unicode/ustring.h"

#include "cmemory.h"

const uint32_t tblcoll_StackBufferLen = 1024;

class RuleBasedCollatorStreamer
{
public:
   static void streamIn(RuleBasedCollator* collator, FileStream* is);
   static void streamOut(const RuleBasedCollator* collator, FileStream* os);
   static void streamIn(RuleBasedCollator* collator, UMemoryStream* is, UErrorCode& status);
   static void streamOut(const RuleBasedCollator* collator, UMemoryStream* os);
};

//===========================================================================================
//  The following diagram shows the data structure of the RuleBasedCollator object.
//  Suppose we have the rule, where 'o-umlaut' is the unicode char 0x00F6.
//  "a, A < b, B < c, C, ch, cH, Ch, CH < d, D ... < o, O; 'o-umlaut'/E, 'O-umlaut'/E ...".
//  What the rule says is, sorts 'ch'ligatures and 'c' only with tertiary difference and
//  sorts 'o-umlaut' as if it's always expanded with 'e'.
//
// mapping table                       contracting list                  expanding list
// (contains all unicode char
//  entries)                         ___     _____________         _________________________
//   ________                   |==>|_*_|-->|'c'  |v('c') |   |==>|v('o')|v('umlaut')|v('e')|
//  |_\u0001_|--> v('\u0001')   |   |_:_|   |-------------|   |   |-------------------------|
//  |_\u0002_|--> v('\u0002')   |   |_:_|   |'ch' |v('ch')|   |   |             :           |
//  |____:___|                  |   |_:_|   |-------------|   |   |-------------------------|
//  |____:___|                  |           |'cH' |v('cH')|   |   |             :           |
//  |__'a'___|--> v('a')        |           |-------------|   |   |-------------------------|
//  |__'b'___|--> v('b')        |           |'Ch' |v('Ch')|   |   |             :           |
//  |____:___|                  |           |-------------|   |   |-------------------------|
//  |____:___|                  |           |'CH' |v('CH')|   |   |             :           |
//  |___'c'__|-------------------            -------------    |   |-------------------------|
//  |____:___|                                                |   |             :           |
//  |o-umlaut|------------------------------------------------    |_________________________|
//  |____:___|
//
//
// Noted by Helena Shih on 6/23/97 with pending design changes (slimming collation).
//============================================================================================

const int32_t RuleBasedCollator::CHARINDEX = 0x70000000;             // need look up in .commit()
const int32_t RuleBasedCollator::EXPANDCHARINDEX = 0x7E000000;       // Expand index follows
const int32_t RuleBasedCollator::CONTRACTCHARINDEX = 0x7F000000;     // contract indexes follows
const int32_t RuleBasedCollator::UNMAPPED = 0xFFFFFFFF;              // unmapped character values
const int32_t RuleBasedCollator::PRIMARYORDERINCREMENT = 0x00010000; // primary strength increment
const int32_t RuleBasedCollator::SECONDARYORDERINCREMENT = 0x00000100; // secondary strength increment
const int32_t RuleBasedCollator::TERTIARYORDERINCREMENT = 0x00000001; // tertiary strength increment
const int32_t RuleBasedCollator::MAXIGNORABLE = 0x00010000;          // maximum ignorable char order value
const int32_t RuleBasedCollator::PRIMARYORDERMASK = 0xffff0000;      // mask off anything but primary order
const int32_t RuleBasedCollator::SECONDARYORDERMASK = 0x0000ff00;    // mask off anything but secondary order
const int32_t RuleBasedCollator::TERTIARYORDERMASK = 0x000000ff;     // mask off anything but tertiary order
const int32_t RuleBasedCollator::SECONDARYRESETMASK = 0x0000ffff;    // mask off secondary and tertiary order
const int32_t RuleBasedCollator::IGNORABLEMASK = 0x0000ffff;         // mask off ignorable char order
const int32_t RuleBasedCollator::PRIMARYDIFFERENCEONLY = 0xffff0000; // use only the primary difference
const int32_t RuleBasedCollator::SECONDARYDIFFERENCEONLY = 0xffffff00;  // use only the primary and secondary difference
const int32_t RuleBasedCollator::PRIMARYORDERSHIFT = 16;             // primary order shift
const int32_t RuleBasedCollator::SECONDARYORDERSHIFT = 8;            // secondary order shift
const int32_t RuleBasedCollator::SORTKEYOFFSET = 1;                  // minimum sort key offset
const int32_t RuleBasedCollator::CONTRACTCHAROVERFLOW = 0x7FFFFFFF;  // Indicates the char is a contract char

const int32_t RuleBasedCollator::COLELEMENTSTART = 0x02020202;       // starting value for collation elements
const int32_t RuleBasedCollator::PRIMARYLOWZEROMASK = 0x00FF0000;    // testing mask for primary low element
const int32_t RuleBasedCollator::RESETSECONDARYTERTIARY = 0x00000202;// reseting value for secondaries and tertiaries
const int32_t RuleBasedCollator::RESETTERTIARY = 0x00000002;         // reseting value for tertiaries

const int32_t RuleBasedCollator::IGNORABLE = 0x02020202;
const int32_t RuleBasedCollator::PRIMIGNORABLE = 0x0202;
const int32_t RuleBasedCollator::SECIGNORABLE = 0x02;
const int32_t RuleBasedCollator::TERIGNORABLE = 0x02;

const int16_t RuleBasedCollator::FILEID = 0x5443;                    // unique file id for parity check
const char* RuleBasedCollator::kFilenameSuffix = ".col";             // binary collation file extension
char  RuleBasedCollator::fgClassID = 0; // Value is irrelevant       // class id
UChar RuleBasedCollator::cacheKey = 0;

UMTX RuleBasedCollator::collMutex = NULL;
UBool RuleBasedCollator::isMutexInited = RuleBasedCollator::initMutex();

////////////////////////////////////////////////////////////////////////
// NormalizerIterator
//
// This class is essentially a duplicate of CollationElementIterator,
// stripped down for speed.  It is declared here so we can incorporate
// internal classes as subobjects, as well as just to hide it from the
// public interface.
////////////////////////////////////////////////////////////////////////

/* Internal class for quick iteration over the text.
   100% pure inline code
*/
class NormalizerIterator {
public:
    Normalizer *cursor;
    VectorOfInt *bufferAlias;
    VectorOfInt *reorderBuffer;
    VectorOfInt ownBuffer;
    UChar*      text;
    int32_t     expIndex;
    int32_t     textLen;
    UTextOffset  currentOffset;

    NormalizerIterator(void);
    NormalizerIterator(const UChar* source, int32_t length, Normalizer::EMode mode);
    ~NormalizerIterator(void);
    void setText(const UChar* source, int32_t length, UErrorCode& status);
    void setModeAndText(Normalizer::EMode mode, const UChar* source, int32_t length, UErrorCode& status);

    UChar current(void) const;
    UChar next(void);
    void reset(void);
};

inline
NormalizerIterator::NormalizerIterator() :
    cursor(0),
    bufferAlias(0),
    reorderBuffer(0),
    ownBuffer(2),
    text(0),
    expIndex(0),
    textLen(0),
    currentOffset(0)
{
}

inline
NormalizerIterator::NormalizerIterator(const UChar* source, int32_t length, Normalizer::EMode mode) :
    cursor(0),
    bufferAlias(0),
    reorderBuffer(0),
    ownBuffer(2),
    text(0),
    expIndex(0),
    textLen(0),
    currentOffset(0)
{
    if (mode == Normalizer::NO_OP) {
        text = (UChar*)source;
        textLen = length;
        currentOffset = 0;
    } else {
        cursor = new Normalizer(source, length, mode);

    }
}

inline
NormalizerIterator::~NormalizerIterator()
{
    if (cursor != 0) {
        delete cursor;
        cursor = 0;
    }
    if (reorderBuffer != 0) {
        delete reorderBuffer;
    }
}

inline
void
NormalizerIterator::setText(const UChar* source, int32_t length, UErrorCode& status)
{
    if (cursor == 0) {
        text = (UChar*)source;
        textLen = length;
        currentOffset = 0;

    } else {
        text = 0;
        cursor->setText(source, length, status);
    }
    bufferAlias = 0;
    currentOffset = 0;
}

/* You can only set mode after the comparision of two strings is completed.
   Setting the mode in the middle of a comparison is not allowed.
   */
inline
void

NormalizerIterator::setModeAndText(Normalizer::EMode mode, const UChar* source, int32_t length, UErrorCode& status)
{
    if(mode != Normalizer::NO_OP)
    {
        /* DO have a mode -  will need a normalizer object */
        if(cursor != NULL)
        {
            /* Just modify the existing cursor */
            cursor->setMode(mode);
            cursor->setText(source, length, status);
        }
        else
        {
            cursor = new Normalizer(source, length, mode);
        }

        /* RESET the old data */
        text = 0;
        textLen = 0;
    }
    else
    {
        /* NO_OP mode.. */
        if(cursor != NULL)
        { /* get rid of the old cursor */
            delete cursor;
            cursor = 0;
        }

        text = (UChar*)source;
        textLen = length;
    }
    currentOffset = 0; /* always */

    bufferAlias = 0;
}

inline
UChar
NormalizerIterator::current(void) const
{
    if (text != 0) {
        if(currentOffset >= textLen)
        {
            return Normalizer::DONE;
        }
        else
        {
            return text[currentOffset];
        }
    }

    return (UChar)cursor->current();
}


inline
UChar
NormalizerIterator::next(void)
{
    if (text != 0) {
        return (UChar)((currentOffset < textLen) ? text[++currentOffset] : Normalizer::DONE);
    }
    return (UChar)cursor->next();
}

inline
void
NormalizerIterator::reset(void)
{
    currentOffset = 0;
    if(cursor)
    {
        cursor->reset();
    }
}

//================ Some inline definitions of implementation functions........ ========
/**
 * A clone of CollationElementIterator::makeReorderedBuffer, trimmed down
 * to only handle forward.
 */
inline VectorOfInt*
RuleBasedCollator::makeReorderedBuffer(NormalizerIterator* cursor,
                                       UChar colFirst,
                                       int32_t lastValue,
                                       VectorOfInt* lastExpansion) const {
    VectorOfInt* result;

    int32_t firstValue = ucmp32_get(data->mapping, colFirst);
    if (firstValue >= CONTRACTCHARINDEX) {
        UErrorCode status = U_ZERO_ERROR;
        firstValue = nextContractChar(cursor, colFirst, status);
    }

    VectorOfInt* firstExpansion = NULL;
    if (firstValue >= EXPANDCHARINDEX) {
        firstExpansion = getExpandValueList(firstValue);
    }

    if (firstExpansion == NULL && lastExpansion == NULL) {
        cursor->ownBuffer.at(0) = firstValue;
        cursor->ownBuffer.at(1) = lastValue;
        result = &cursor->ownBuffer;
    }
    else {
        int32_t firstLength = firstExpansion==NULL? 1 : firstExpansion->size();
        int32_t lastLength = lastExpansion==NULL? 1 : lastExpansion->size();
        if (cursor->reorderBuffer == NULL) {
            cursor->reorderBuffer = new VectorOfInt(firstLength+lastLength);
        }
        // reorderdBuffer gets reused for the life of this object.
        // Since its internal buffer only grows, there is a danger
        // that it will get really, really big, and never shrink.  If
        // this is actually happening, insert code here to check for
        // the condition.  Something along the lines of:
        //! else if (reorderBuffer->size() >= 256 &&
        //!          (firstLength+lastLength) < 16) {
        //!     delete reorderBuffer;
        //!     reorderBuffer = new VectorOfInt(firstLength+lastLength);
        //! }
        // The specific numeric values need to be determined
        // empirically. [aliu]
        result = cursor->reorderBuffer;

        if (firstExpansion == NULL) {
            result->atPut(0, firstValue);
        }
        else {
            // System.arraycopy(firstExpansion, 0, result, 0, firstLength);
            *result = *firstExpansion;
        }

        if (lastExpansion == NULL) {
            result->atPut(firstLength, lastValue);
        }
        else {
            // System.arraycopy(lastExpansion, 0, result, firstLength, lastLength);
            for (int32_t i=0; i<lastLength; ++i) {
                result->atPut(firstLength + i, lastExpansion->at(i));
            }
        }
        result->setSize(firstLength+lastLength);
    }

    return result;
}


inline int32_t
RuleBasedCollator::strengthOrder(int32_t value) const
{
    if (getStrength() == PRIMARY)
    {
        return (value & PRIMARYDIFFERENCEONLY);
    } else if (getStrength() == SECONDARY)
    {
        return (value & SECONDARYDIFFERENCEONLY);
    }
    return value;
}


inline int32_t
RuleBasedCollator::getStrengthOrder(NormalizerIterator* cursor,
                                    UErrorCode status) const
{
    if (U_FAILURE(status))
    {
        return CollationElementIterator::NULLORDER;
    }

    if (cursor->bufferAlias != NULL)
    {
        // bufferAlias needs a bit of an explanation.
        // When we hit an expanding character in the text, we call the order's
        // getExpandValues method to retrieve an array of the orderings for all
        // of the characters in the expansion (see the end of this method).
        // The first ordering is returned, and an alias to the orderings array
        // is saved so that the remaining orderings can be returned on subsequent
        // calls to next.  So, if the expanding buffer is not exhausted,
        // all we have to do here is return the next ordering in the buffer.
        if (cursor->expIndex < cursor->bufferAlias->size())
        {
            //_L((stderr, "next from [%08X] from bufferAlias\n", this));
            return strengthOrder(cursor->bufferAlias->at(cursor->expIndex++));
        }
        else
        {
            cursor->bufferAlias = NULL;
        }
    }

    UChar ch = cursor->current();
    cursor->next();

    //_L((stderr, "Next from [%08X] = [%04X], [%c]\n", cursor, (int)ch & 0xFFFF, (char)(ch & 0xFF)));

    if (ch == Normalizer::DONE) {
        return CollationElementIterator::NULLORDER;
    }
    // Ask the collator for this character's ordering.
    int32_t value = ucmp32_get(data->mapping, ch);

    if (value == UNMAPPED)
    {
        // Returned an "unmapped" flag and save the character so it can be
        // returned next time this method is called.
        if (ch == 0x0000) return ch; // \u0000 is not valid in C++'s UnicodeString
        cursor->ownBuffer.at(0) = CollationElementIterator::UNMAPPEDCHARVALUE;
        cursor->ownBuffer.at(1) = ch << 16;
        cursor->bufferAlias = &cursor->ownBuffer;

    } else {

        if (value >= CONTRACTCHARINDEX)
        {
            value = nextContractChar(cursor, ch, status);
        }

        if (value >= EXPANDCHARINDEX) {
            cursor->bufferAlias = getExpandValueList(value);
        }

        if (CollationElementIterator::isThaiPreVowel(ch)) {
            UChar consonant = cursor->current();
            if (CollationElementIterator::isThaiBaseConsonant(consonant)) {
                cursor->next();
                cursor->bufferAlias = makeReorderedBuffer(cursor, consonant, value,
                                                          cursor->bufferAlias);
            }
        }
    }

    if (cursor->bufferAlias != NULL) {
        cursor->expIndex = 1;
        value = cursor->bufferAlias->at(0);
    }

    return strengthOrder(value);
}

// ==================== End inlines ============================================


//===============================================================================
UBool RuleBasedCollator::initMutex() {
    if(isMutexInited == FALSE) {
        umtx_lock(NULL);
        if(isMutexInited == FALSE) {
          umtx_init(&collMutex);
          isMutexInited = TRUE;
        }
        umtx_unlock(NULL);
    }
    return isMutexInited;
}

RuleBasedCollator::RuleBasedCollator()
    : Collator(),
      isOverIgnore(FALSE),
      mPattern(0),
      //      sourceCursor(0),
      //targetCursor(0),
      cursor1(0),
      cursor2(0),
      dataIsOwned(FALSE),
      data(0),
      fSomeMemory(NULL)
{
}

RuleBasedCollator::RuleBasedCollator(const  RuleBasedCollator&  that)
    : Collator(that),
      isOverIgnore(that.isOverIgnore),
      mPattern(0),
      //      sourceCursor(0),
      //targetCursor(0),
      cursor1(0),
      cursor2(0),
      dataIsOwned(FALSE),
      data(that.data) ,// Alias the data pointer
      fSomeMemory(NULL)
{
}

UBool
RuleBasedCollator::operator==(const Collator& that) const
{
    if (this == &that)
    {
        return TRUE;
    }

    if (this->getDynamicClassID() != that.getDynamicClassID())
    {
        return FALSE;  // not the same class
    }

    if (!Collator::operator==(that))
    {
        return FALSE;
    }

    RuleBasedCollator& thatAlias = (RuleBasedCollator&)that;

    if (isOverIgnore != thatAlias.isOverIgnore)
    {
        return FALSE;
    }

    if (data != thatAlias.data)
    {
        return FALSE;
    }

    return TRUE;
}

RuleBasedCollator&
RuleBasedCollator::operator=(const  RuleBasedCollator& that)
{
    if (this != &that)
    {
        Collator::operator=(that);
        isOverIgnore = that.isOverIgnore;

        if (dataIsOwned)
        {
            delete data;
        }

        data = 0;
        delete mPattern;
        mPattern = 0;
        dataIsOwned = FALSE;
        data = that.data;
    }

    return *this;
}

RuleBasedCollator::RuleBasedCollator(const  UnicodeString&  rules,
                                        UErrorCode&      status)
    : Collator(),
      isOverIgnore(FALSE),
      mPattern(0),
      //      sourceCursor(0),
      ///      targetCursor(0),
      cursor1(0),
      cursor2(0),
      dataIsOwned(FALSE),
      data(0),
      fSomeMemory(NULL)

{
    if (U_FAILURE(status))
    {
        return;
    }

    constructFromRules(rules, status);
}

RuleBasedCollator::RuleBasedCollator(const  UnicodeString&  rules,
                     ECollationStrength collationStrength,
                     UErrorCode&      status)
  : Collator(collationStrength, Normalizer::NO_OP),
    isOverIgnore(FALSE),
    mPattern(0),
    //    sourceCursor(0),
    //    targetCursor(0),
    cursor1(0),
    cursor2(0),
    dataIsOwned(FALSE),
    data(0),
      fSomeMemory(NULL)

{
    if (U_FAILURE(status))
    {
        return;
    }
    constructFromRules(rules, status);
}

RuleBasedCollator::RuleBasedCollator(const  UnicodeString&  rules,
                     Normalizer::EMode decompositionMode,
                     UErrorCode&      status)
  : Collator(TERTIARY, decompositionMode),
    isOverIgnore(FALSE),
    mPattern(0),
    //    sourceCursor(0),
    //    targetCursor(0),
    cursor1(0),
    cursor2(0),
    dataIsOwned(FALSE),
    data(0),
      fSomeMemory(NULL)

{
  if (U_FAILURE(status))
    {
      return;
    }

  constructFromRules(rules, status);
}

RuleBasedCollator::RuleBasedCollator(const  UnicodeString&  rules,
                     ECollationStrength collationStrength,
                     Normalizer::EMode decompositionMode,
                     UErrorCode&      status)
  : Collator(collationStrength, decompositionMode),
      isOverIgnore(FALSE),
      mPattern(0),
    //      sourceCursor(0),
    //targetCursor(0),
      cursor1(0),
      cursor2(0),
      dataIsOwned(FALSE),
      data(0),
      fSomeMemory(NULL)

{
    if (U_FAILURE(status))
    {
        return;
    }

    constructFromRules(rules, status);
}

void RuleBasedCollator::constructFromRules(const UnicodeString& rules,
                                        UErrorCode& status)
{
    // Construct this collator's ruleset from its string representation
    if (U_FAILURE(status))
    {
        return;
    }

    if (rules.isBogus())
    {
        status = U_MEMORY_ALLOCATION_ERROR;
        return;
    }

    if (dataIsOwned)
    {
        delete data;
        data = 0;
    }

    status = U_ZERO_ERROR;
    isOverIgnore = FALSE;
    setStrength(Collator::TERTIARY);

    data = new TableCollationData;
    if (data->isBogus())
    {
        status = U_MEMORY_ALLOCATION_ERROR;
        delete data;
        data = 0;
        return;
    }

    // We constructed the data using the build method, so we own it.
    dataIsOwned = TRUE;

    // Now that we've got all the buffers allocated, do the actual work
    mPattern = 0;
    build(rules, status);
    addToCache(UnicodeString(cacheKey++));
}

void
RuleBasedCollator::constructFromFile(const char* fileName,
                                  UErrorCode& status)
{
    // This method tries to read in a flattened RuleBasedCollator that
    // has been previously streamed out using the streamOut() method.
    // The 'fileName' parameter should contain a full pathname valid on
    // the local environment.

    if (U_FAILURE(status))
    {
        return;
    }

    if (dataIsOwned)
    {
        delete data;
        data = 0;
    }

    mPattern = 0;
    isOverIgnore = FALSE;
    setStrength(Collator::TERTIARY); // This is the default strength

    FileStream* ifs = T_FileStream_open(fileName, "rb");
    if (ifs == 0) {
        status = U_FILE_ACCESS_ERROR;
        return;
    }

    // The streamIn function does the actual work here...
    RuleBasedCollatorStreamer::streamIn(this, ifs);

    if (!T_FileStream_error(ifs))
    {
        status = U_ZERO_ERROR;
    }
    else if (data && data->isBogus())
    {
        status = U_MEMORY_ALLOCATION_ERROR;
        delete data;
        data = 0;
    }
    else
    {
        status = U_MISSING_RESOURCE_ERROR;
        delete data;
        data = 0;
    }

#ifdef COLLDEBUG
    fprintf(stderr, "binary read %s size %d, %s\n", fileName, T_FileStream_size(ifs), u_errorName(status));
#endif

    // We constructed the data when streaming it in, so we own it
    dataIsOwned = TRUE;

    T_FileStream_close(ifs);
}

const char *
RuleBasedCollator::constructFromBundle(const Locale & name,
                                  UErrorCode& status)
{
  // This method tries to locate binary collation data which has been
  // previously streamed to a binary object "%%Collation" in a
  // resource bundle. If the data is found, it is cached.
  // cache is checked before actually streaming in data
  // resource bundle fallback mechanism is used.

    if (U_FAILURE(status))
    {
        return 0;
    }

    if (dataIsOwned)
    {
        delete data;
        data = 0;
    }
    const char* realName = 0;

    mPattern = 0;
    isOverIgnore = FALSE;
    setStrength(Collator::TERTIARY); // This is the default strength

    ResourceBundle rb((char *)0, name, status);
    if(U_SUCCESS(status)) {
      ResourceBundle binary = rb.get("%%Collation", status); //This is the bundle that actually contains the collation data
      realName = binary.getName();
      if(U_SUCCESS(status)) {
        UErrorCode intStatus = U_ZERO_ERROR;
        ResourceBundle colElem = rb.get("CollationElements", intStatus);
        if(U_SUCCESS(intStatus)) {
            UnicodeString norm = colElem.getStringEx("Normalize", intStatus);
            if(U_SUCCESS(intStatus)) {
                setDecomposition(Normalizer::DECOMP);
                fDefaultDecomp = Normalizer::DECOMP;
            } else {
                setDecomposition(Normalizer::NO_OP);
                fDefaultDecomp = Normalizer::NO_OP;
            }
        }
        intStatus = U_ZERO_ERROR;

        constructFromCache(realName, intStatus); // check whether we already have this data in cache
        if(U_SUCCESS(intStatus)) {
          return realName;
        }
        int32_t inDataLen = 0;
        const uint8_t *inData = binary.getBinary(inDataLen, status); //This got us the real binary data

        UMemoryStream *ifs = uprv_mstrm_openBuffer(inData, inDataLen);

        if (ifs == 0) {
          status = U_FILE_ACCESS_ERROR;
          return 0;
        }

        // The streamIn function does the actual work here...
        RuleBasedCollatorStreamer::streamIn(this, ifs, status);

        if (!uprv_mstrm_error(ifs)) {
        }
        else if (data && data->isBogus()) {
          status = U_MEMORY_ALLOCATION_ERROR;
          delete data;
          data = 0;
        } else {
          status = U_MISSING_RESOURCE_ERROR;
          delete data;
          data = 0;
        }

        // We constructed the data when streaming it in, so we own it
        dataIsOwned = TRUE;

        uprv_mstrm_close(ifs);
        addToCache(realName); // add the newly constructed data to cache
        return realName;
      } else {
        status = U_MISSING_RESOURCE_ERROR;
        return 0;
      }
    } else {
        return 0;
    }
}

RuleBasedCollator::RuleBasedCollator(   const Locale& desiredLocale,
                                UErrorCode& status)
    : Collator(),
      isOverIgnore(FALSE),
      //      sourceCursor(0),
      //targetCursor(0),
      mPattern(0),
      cursor1(0),
      cursor2(0),
      dataIsOwned(FALSE),
      data(0),
      fSomeMemory(NULL)

{


  if (U_FAILURE(status)) {
    return;
  }

  // Try to load, in order:
  // 1. The desired locale's collation.
  // 2. A fallback of the desired locale.
  // 3. The default locale's collation.
  // 4. A fallback of the default locale.
  // 5. The default collation rules, which contains en_US collation rules.

  // To reiterate, we try:
  // Specific:
  //  language+country+variant
  //  language+country
  //  language
  // Default:
  //  language+country+variant
  //  language+country
  //  language
  // Root: (aka DEFAULTRULES)
  // steps 1-5 are handled by resource bundle fallback mechanism.
  // however, in a very unprobable situation that no resource bundle
  // data exists, step 5 is repeated with hardcoded default rules.

  const char *locName = constructFromBundle(desiredLocale, status);  /*!*/

  if (U_SUCCESS(status)) {
    data->desiredLocale = desiredLocale;
    data->realLocaleName = locName;
  } else {
    UErrorCode intStatus = U_ZERO_ERROR;
    constructFromCache(ResourceBundle::kDefaultFilename, intStatus);
    if(U_FAILURE(intStatus)) {
      intStatus = U_ZERO_ERROR;
      constructFromRules(RuleBasedCollator::DEFAULTRULES, intStatus);
      if (intStatus == U_ZERO_ERROR) {
        status = U_USING_DEFAULT_ERROR;
      } else {
        status = intStatus;     // bubble back
      }

      if (status == U_MEMORY_ALLOCATION_ERROR) {
        return;
      }
    }
    data->realLocaleName = ResourceBundle::kDefaultFilename;
    addToCache(ResourceBundle::kDefaultFilename);
  }
  return;
}

void
RuleBasedCollator::constructFromFile(   const Locale&           locale,
                                    const UnicodeString&    localeFileName,
                                    UBool                  tryBinaryFile,
                                    UErrorCode&              status)
{
  // constructFromFile creates a collation object by reading from a
  // file.  It does not employ the usual FILE search mechanism with
  // locales, default locales, and base locales.  Instead, it tries to
  // look only in files with the given localFileName.  It does,
  // however, employ the LOCALE search mechanism.

  // This method maintains the binary collation files.  If a collation
  // is not present in binary form, but is present in text form (in a
  // resource bundle file), it will be loaded in text form, and then
  // written to disk.

  // If tryBinaryFile is true, then try to load from the binary file first.

  if(U_FAILURE(status)) {
    return;
  }

  if(dataIsOwned) {
    delete data;
    data = 0;
  }

    if(tryBinaryFile) {
      char *binaryFilePath = createPathName(UnicodeString(u_getDataDirectory(),""),
                                            localeFileName,
                                            UnicodeString(kFilenameSuffix,""));

        // Try to load up the collation from a binary file first
        constructFromFile(binaryFilePath, status);
        #ifdef COLLDEBUG
            cerr << localeFileName  << kFilenameSuffix << " binary load " << u_errorName(status) << endl;
        #endif
        if(U_SUCCESS(status) || status == U_MEMORY_ALLOCATION_ERROR) {
            delete [] binaryFilePath;
            return;
        }
        if(status == U_FILE_ACCESS_ERROR) {
            status = U_ZERO_ERROR;
        }
        delete [] binaryFilePath;
    }

  // Now try to load it up from a resource bundle text source file
  UnicodeString dataDir = UnicodeString(u_getDataDirectory(),"");

    char *ch;
    ch = new char[localeFileName.size() + 1];
    ch[localeFileName.extract(0, 0x7fffffff, ch, "")] = 0;
    ResourceBundle bundle(dataDir, ch, status);

    delete [] ch;

  // if there is no resource bundle file for the give locale, break out
  if(U_FAILURE(status))
  {
      return;
  }

    #ifdef COLLDEBUG
        cerr << localeFileName << " ascii load " << u_errorName(status) << endl;
    #endif

    // check and see if this resource bundle contains collation data

    UnicodeString colString;
    UErrorCode intStatus = U_ZERO_ERROR;

    ResourceBundle colElems = bundle.get("CollationElements", intStatus);
    if (U_FAILURE(intStatus))
    {
        status = U_MISSING_RESOURCE_ERROR;
        return;
    }
    colString = colElems.getStringEx("Sequence", intStatus);

    if(U_FAILURE(intStatus)) {
        status = U_MISSING_RESOURCE_ERROR;
        return;
    }

    if(colString.isBogus()) {
        status = U_MEMORY_ALLOCATION_ERROR;
        return;
    }

  // Having loaded the collation from the resource bundle text file,
  // now retrieve the CollationElements tagged data, merged with the
  // default rules.  If that fails, use the default rules alone.

  colString.insert(0, DEFAULTRULES);
  if(colString.isBogus()) {
    status = U_MEMORY_ALLOCATION_ERROR;
    return;
  }

  constructFromRules(colString, intStatus);
  if(intStatus == U_MEMORY_ALLOCATION_ERROR) {
    status = U_MEMORY_ALLOCATION_ERROR;
    return;
  }

  if(intStatus != U_ZERO_ERROR)  {
    status = U_USING_DEFAULT_ERROR;

    // predefined tables should contain correct grammar
    intStatus = U_ZERO_ERROR;
    constructFromRules(DEFAULTRULES, intStatus);
    if(intStatus != U_ZERO_ERROR) {
      status = intStatus;
    }
  }

#ifdef COLLDEBUG
  cerr << localeFileName << " ascii load " << (U_SUCCESS(status) ? "OK" : "Failed") << " - try= " << (tryBinaryFile?"true":"false") << endl;
#endif

}

RuleBasedCollator::~RuleBasedCollator()
{

    if(fSomeMemory != NULL) {
        int32_t i = 0;
        for(i = 0; i<fAvailableMemory; i++) {
            if(*(fSomeMemory+i)!= NULL) {
                uprv_free(*(fSomeMemory+i));
            }
        }
        uprv_free(fSomeMemory);
        delete[] fSizes;
    }

    if (dataIsOwned)
    {
        delete data;
    }

    data = 0;

    //    delete sourceCursor;
    //    sourceCursor = 0;

    //    delete targetCursor;
    //    targetCursor = 0;

    if (cursor1 != NULL) {
        delete cursor1;
        cursor1 = 0;
    }
    if (cursor2 != NULL) {
        delete cursor2;
        cursor2 = 0;
    }

    delete mPattern;
    mPattern = 0;
}

Collator*
RuleBasedCollator::clone() const
{
    return new RuleBasedCollator(*this);
}

// Create a CollationElementIterator object that will iterator over the elements
// in a string, using the collation rules defined in this RuleBasedCollator
CollationElementIterator*
RuleBasedCollator::createCollationElementIterator(const UnicodeString& source) const
{
    UErrorCode status = U_ZERO_ERROR;
    CollationElementIterator *newCursor = 0;

    newCursor = new CollationElementIterator(source, this, status);
    if (U_FAILURE(status))
    {
        return NULL;
    }

    return newCursor;
}

// Create a CollationElementIterator object that will iterator over the elements
// in a string, using the collation rules defined in this RuleBasedCollator
CollationElementIterator*
RuleBasedCollator::createCollationElementIterator(const CharacterIterator& source) const
{
    UErrorCode status = U_ZERO_ERROR;
    CollationElementIterator *newCursor = 0;

    newCursor = new CollationElementIterator(source, this, status);
    if (U_FAILURE(status))
    {
        return NULL;
    }

    return newCursor;
}

// Return a string representation of this collator's rules.
// The string can later be passed to the constructor that takes a
// UnicodeString argument, which will construct a collator that's
// functionally identical to this one.
// You can also allow users to edit the string in order to change
// the collation data, or you can print it out for inspection, or whatever.

const UnicodeString&
RuleBasedCollator::getRules() const
{
    if (mPattern != 0)
    {
        MergeCollation*& nonConstMPattern = *(MergeCollation**)&mPattern;
        mPattern->emitPattern(data->ruleTable);
        data->isRuleTableLoaded = TRUE;
        delete nonConstMPattern;
        nonConstMPattern = 0;
    }
    else if (!data->isRuleTableLoaded)
    {
        // At this point the caller wants the rules, but the rule table data
        // is not loaded.  Furthermore, there is no mPattern object to load
        // the rules from.  Therefore, we fetch the rules off the disk.
        // Notice that we pass in a tryBinaryFile value of FALSE, since
        // by design the binary file has NO rules in it!
        //UErrorCode status = U_ZERO_ERROR;
        //RuleBasedCollator temp(data->realLocaleName, status);
        RuleBasedCollator temp;
        UErrorCode status = U_ZERO_ERROR;
        temp.constructFromFile(data->desiredLocale, data->realLocaleName, FALSE, status);

        // We must check that mPattern is nonzero here, or we run the risk
        // of an infinite loop.
        if (U_SUCCESS(status) && temp.mPattern != 0)
        {
            data->ruleTable = temp.getRules();
            data->isRuleTableLoaded = TRUE;
#ifdef _DEBUG
//              // the following is useful for specific debugging purposes
//               UnicodeString name;
//               cerr << "Table collation rules loaded dynamically for "
//                   << data->desiredLocale.getName(name)
//                   << " at "
//                   << data->realLocaleName
//                   << ", " << dec << data->ruleTable.size() << " characters"
//                   << endl;
#endif
        }
        else
        {
#ifdef _DEBUG
//              UnicodeString name;
//              cerr << "Unable to load table collation rules dynamically for "
//                  << data->desiredLocale.getName(name)
//                  << " at "
//                  << data->realLocaleName
//                  << endl;
//              cerr << "Status " << u_errorName(status) << ", mPattern " << temp.mPattern << endl;
#endif
            /* SRL have to add this because we now have the situation where
               DEFAULT is loaded from a binary file w/ no rules. */
            UErrorCode intStatus = U_ZERO_ERROR;
            temp.constructFromRules(RuleBasedCollator::DEFAULTRULES, intStatus);

            if(U_SUCCESS(intStatus) && (temp.mPattern != 0))
              {
                data->ruleTable = temp.getRules();
                data->isRuleTableLoaded = TRUE;
              }
        }
    }

    return data->ruleTable;
}


Collator::EComparisonResult
RuleBasedCollator::compare( const UnicodeString& source,
                            const UnicodeString& target,
                            int32_t length) const
{
    UnicodeString source_togo;
    UnicodeString target_togo;
    UTextOffset begin=0;

    source.extract(begin, uprv_min(length,source.length()), source_togo);
    target.extract(begin, uprv_min(length,target.length()), target_togo);
    return (RuleBasedCollator::compare(source_togo, target_togo));
}

Collator::EComparisonResult
RuleBasedCollator::compare(const   UChar* source,
                      int32_t sourceLength,
                      const   UChar*  target,
                      int32_t targetLength) const
{
	UCollationResult strcoll_result = ucol_strcoll((UCollator *)this, source, sourceLength, target, targetLength);

	if(strcoll_result == UCOL_LESS) {
		return Collator::LESS;
	} else if(strcoll_result == UCOL_GREATER) {
		return Collator::GREATER;
	} else {
		return Collator::EQUAL;
	}
}

Collator::EComparisonResult
RuleBasedCollator::compareEx(const   UChar* source,
                      int32_t sourceLength,
                      const   UChar*  target,
                      int32_t targetLength) const
{

    // check if source and target are valid strings
    if (((source == 0) && (target == 0)) ||
        ((sourceLength == 0) && (targetLength == 0)))
    {
        return Collator::EQUAL;
    }

    Collator::EComparisonResult result = Collator::EQUAL;
    UErrorCode status = U_ZERO_ERROR;

    if (cursor1 == NULL)
    {
        ((RuleBasedCollator *)this)->cursor1 = new NormalizerIterator(source, sourceLength, getDecomposition());
    }
    else
    {
        cursor1->setModeAndText(getDecomposition(), source, sourceLength, status);
    }

    if ( /*cursor1->cursor == NULL ||*/ U_FAILURE(status))
    {
        return Collator::EQUAL;
    }

    if (cursor2 == NULL)
    {
        ((RuleBasedCollator *)this)->cursor2 = new NormalizerIterator(target, targetLength, getDecomposition());
    }
    else
    {
        cursor2->setModeAndText(getDecomposition(), target, targetLength, status);
    }

    if (/*cursor2 == NULL ||*/ U_FAILURE(status))
    {
        return Collator::EQUAL;
    }

    int32_t sOrder, tOrder;
    //    int32_t sOrder = CollationElementIterator::NULLORDER, tOrder = CollationElementIterator::NULLORDER;
    UBool gets = TRUE, gett = TRUE;
    UBool initialCheckSecTer = getStrength() >= Collator::SECONDARY;
    UBool checkSecTer = initialCheckSecTer;
    UBool checkTertiary = getStrength() >= Collator::TERTIARY;
    UBool isFrenchSec = data->isFrenchSec;
    uint32_t pSOrder, pTOrder;

    for(;;)
    {
        // Get the next collation element in each of the strings, unless
        // we've been requested to skip it.
        if (gets)
        {
            sOrder = getStrengthOrder((NormalizerIterator*)cursor1, status);

            if (U_FAILURE(status))
            {
                return Collator::EQUAL;
            }
        }

        gets = TRUE;

        if (gett)
        {
            tOrder = getStrengthOrder((NormalizerIterator*)cursor2, status);

            if (U_FAILURE(status))
            {
                return Collator::EQUAL;
            }
        }

        gett = TRUE;

        // If we've hit the end of one of the strings, jump out of the loop
        if ((sOrder == CollationElementIterator::NULLORDER)||
            (tOrder == CollationElementIterator::NULLORDER))
        {
            break;
        }

        // If there's no difference at this position, we can skip to the
        // next one.
        pSOrder = CollationElementIterator::primaryOrder(sOrder);
        pTOrder = CollationElementIterator::primaryOrder(tOrder);
        if (sOrder == tOrder)
        {
            if (isFrenchSec && pSOrder != SECIGNORABLE)
            {
                if (!checkSecTer)
                {
                    // in french, a secondary difference more to the right is stronger,
                    // so accents have to be checked with each base element
                    checkSecTer = initialCheckSecTer;

                    // but tertiary differences are less important than the first
                    // secondary difference, so checking tertiary remains disabled
                    checkTertiary = FALSE;
                }
            }

            continue;
        }

        // Compare primary differences first.
        if (pSOrder != pTOrder)
        {
            if (sOrder == IGNORABLE)
            {
                // The entire source element is ignorable.
                // Skip to the next source element, but don't fetch another target element.
                gett = FALSE;
                continue;
            }

            if (tOrder == IGNORABLE)
            {
                gets = FALSE;
                continue;
            }

            // The source and target elements aren't ignorable, but it's still possible
            // for the primary component of one of the elements to be ignorable....
            if (pSOrder == PRIMIGNORABLE)  // primary order in source is ignorable
            {
                // The source's primary is ignorable, but the target's isn't.  We treat ignorables
                // as a secondary difference, so remember that we found one.
                if (checkSecTer)
                {
                    result = Collator::GREATER;  // (strength is SECONDARY)
                    checkSecTer = FALSE;
                }

                // Skip to the next source element, but don't fetch another target element.
                gett = FALSE;
            }
            else if (pTOrder == PRIMIGNORABLE)
            {
                // record differences - see the comment above.
                if (checkSecTer)
                {
                    result = Collator::LESS;  // (strength is SECONDARY)
                    checkSecTer = FALSE;
                }

                // Skip to the next target element, but don't fetch another source element.
                gets = FALSE;
            }
            else
            {
                // Neither of the orders is ignorable, and we already know that the primary
                // orders are different because of the (pSOrder != pTOrder) test above.
                // Record the difference and stop the comparison.
                if (pSOrder < pTOrder)
                {
                    return Collator::LESS;  // (strength is PRIMARY)
                }

                return Collator::GREATER;  // (strength is PRIMARY)
            }
        }
        else
        { // else of if ( pSOrder != pTOrder )
            // primary order is the same, but complete order is different. So there
            // are no base elements at this point, only ignorables (Since the strings are
            // normalized)

            if (checkSecTer)
            {
                // a secondary or tertiary difference may still matter
                uint32_t secSOrder = CollationElementIterator::secondaryOrder(sOrder);
                uint32_t secTOrder = CollationElementIterator::secondaryOrder(tOrder);

                if (secSOrder != secTOrder)
                {
                    // there is a secondary difference
                    result = (secSOrder < secTOrder) ? Collator::LESS : Collator::GREATER;
                                            // (strength is SECONDARY)
                    checkSecTer = FALSE;
                    // (even in french, only the first secondary difference within
                    //  a base character matters)
                }
                else
                {
                    if (checkTertiary)
                    {
                        // a tertiary difference may still matter
                        uint32_t terSOrder = CollationElementIterator::tertiaryOrder(sOrder);
                        uint32_t terTOrder = CollationElementIterator::tertiaryOrder(tOrder);

                        if (terSOrder != terTOrder)
                        {
                            // there is a tertiary difference
                            result = (terSOrder < terTOrder) ? Collator::LESS : Collator::GREATER;
                                            // (strength is TERTIARY)
                            checkTertiary = FALSE;
                        }
                    }
                }
            } // if (checkSecTer)

        }  // if ( pSOrder != pTOrder )
    } // while()

    if (sOrder != CollationElementIterator::NULLORDER)
    {
        // (tOrder must be CollationElementIterator::NULLORDER,
        //  since this point is only reached when sOrder or tOrder is NULLORDER.)
        // The source string has more elements, but the target string hasn't.
        do
        {
            if (CollationElementIterator::primaryOrder(sOrder) != PRIMIGNORABLE)
            {
                // We found an additional non-ignorable base character in the source string.
                // This is a primary difference, so the source is greater
                return Collator::GREATER; // (strength is PRIMARY)
            }

            if (CollationElementIterator::secondaryOrder(sOrder) != SECIGNORABLE)
            {
                // Additional secondary elements mean the source string is greater
                if (checkSecTer)
                {
                    result = Collator::GREATER;  // (strength is SECONDARY)
                    checkSecTer = FALSE;
                }
            }
        }
        while ((sOrder = getStrengthOrder(cursor1, status)) != CollationElementIterator::NULLORDER);
    }
    else if (tOrder != CollationElementIterator::NULLORDER)
    {
        // The target string has more elements, but the source string hasn't.
        do
        {
            if (CollationElementIterator::primaryOrder(tOrder) != PRIMIGNORABLE)
            {
                // We found an additional non-ignorable base character in the target string.
                // This is a primary difference, so the source is less
                return Collator::LESS; // (strength is PRIMARY)
            }

            if (CollationElementIterator::secondaryOrder(tOrder) != SECIGNORABLE)
            {
                // Additional secondary elements in the target mean the source string is less
                if (checkSecTer)
                {
                    result = Collator::LESS;  // (strength is SECONDARY)
                    checkSecTer = FALSE;
                }
            }
        }
        while ((tOrder = getStrengthOrder(cursor2, status)) != CollationElementIterator::NULLORDER);
    }


    // For IDENTICAL comparisons, we use a bitwise character comparison
    // as a tiebreaker if all else is equal
    // NOTE: The java code compares result with 0, and
    // puts the result of the string comparison directly into result
    if (result == Collator::EQUAL && getStrength() == IDENTICAL)
    {
#if 0
      // ******** for the  UChar normalization interface.
      // It doesn't work much faster, and the code was broken
      // so it's commented out. --srl
//          UChar sourceDecomp[1024], targetDecomp[1024];
//          int32_t sourceDecompLength = 1024;
//          int32_t targetDecompLength = 1024;

//          int8_t comparison;
//          Normalizer::EMode decompMode = getDecomposition();

//          if (decompMode != Normalizer::NO_OP)
//            {
//              Normalizer::normalize(source, sourceLength, decompMode,
//                        0, sourceDecomp, sourceDecompLength, status);

//              Normalizer::normalize(target, targetLength, decompMode,
//                        0, targetDecomp, targetDecompLength, status);

//              comparison = u_strcmp(sourceDecomp,targetDecomp);
//            }
//          else
//            {
//              comparison = u_strcmp(source, target); /* ! */
//            }

#else

        UnicodeString sourceDecomp, targetDecomp;

        int8_t comparison;

        Normalizer::normalize(UnicodeString(source, sourceLength), getDecomposition(),
                      0, sourceDecomp,  status);

        Normalizer::normalize(UnicodeString(target, targetLength), getDecomposition(),
                      0, targetDecomp,  status);

        comparison = sourceDecomp.compare(targetDecomp);
#endif

        if (comparison < 0)
        {
            result = Collator::LESS;
        }
        else if (comparison == 0)
        {
            result = Collator::EQUAL;
        }
        else
        {
            result = Collator::GREATER;
        }
    }

    return result;
}

int32_t
RuleBasedCollator::nextContractChar(NormalizerIterator *cursor,
                                    UChar ch,
                                    UErrorCode& status) const
{
    // First get the ordering of this single character
    VectorOfPToContractElement *list = getContractValues(ch);
    EntryPair *pair = (EntryPair *)list->at(0);
    int32_t order = pair->value;

    // Now iterate through the chars following it and
    // look for the longest match
    ((UnicodeString&)key).remove();
    ((UnicodeString&)key) += ch;

    while ((ch = cursor->current()) != Normalizer::DONE)
    {
        ((UnicodeString&)key) += ch;

        int32_t n = getEntry(list, key, TRUE);

        if (n == UNMAPPED)
        {
            break;
        }
        cursor->next();

        pair = (EntryPair *)list->at(n);
        order = pair->value;
    }

    return order;
}

// Compare two strings using this collator
Collator::EComparisonResult
RuleBasedCollator::compare(const UnicodeString& source,
                        const UnicodeString& target) const
{
	UChar uSstart[tblcoll_StackBufferLen];
	UChar uTstart[tblcoll_StackBufferLen];
	UChar *uSource = uSstart;
	UChar *uTarget = uTstart;
	uint32_t sourceLen = source.length();
	uint32_t targetLen = target.length();
	if(sourceLen >= tblcoll_StackBufferLen) {
		uSource = new UChar[sourceLen+1];
	}
	if(targetLen >= tblcoll_StackBufferLen) {
		uTarget = new UChar[targetLen+1];
	}
    source.extract(0, sourceLen, uSource);
    uSource[sourceLen] = 0;
    target.extract(0, targetLen, uTarget);
    uTarget[targetLen] = 0;
	Collator::EComparisonResult result = compare(uSource, sourceLen, uTarget, targetLen);

	if(uSstart != uSource) {
		delete[] uSource;
	}
	if(uTstart != uTarget) {
		delete[] uTarget;
	}
	return result;
}

// Retrieve a collation key for the specified string
// The key can be compared with other collation keys using a bitwise comparison
// (e.g. memcmp) to find the ordering of their respective source strings.
// This is handy when doing a sort, where each sort key must be compared
// many times.
//
// The basic algorithm here is to find all of the collation elements for each
// character in the source string, convert them to an ASCII representation,
// and put them into the collation key.  But it's trickier than that.
// Each collation element in a string has three components: primary ('A' vs 'B'),
// secondary ('u' vs '<27>'), and tertiary ('A' vs 'a'), and a primary difference
// at the end of a string takes precedence over a secondary or tertiary
// difference earlier in the string.
//
// To account for this, we put all of the primary orders at the beginning of the
// string, followed by the secondary and tertiary orders. Each set of orders is
// terminated by nulls so that a key for a string which is a initial substring of
// another key will compare less without any special case.
//
// Here's a hypothetical example, with the collation element represented as
// a three-digit number, one digit for primary, one for secondary, etc.
//
// String:              A     a     B    <20>
// Collation Elements: 101   100   201  511
// Collation Key:      1125<null>0001<null>1011<null>
//
// To make things even trickier, secondary differences (accent marks) are compared
// starting at the *end* of the string in languages with French secondary ordering.
// But when comparing the accent marks on a single base character, they are compared
// from the beginning.  To handle this, we reverse all of the accents that belong
// to each base character, then we reverse the entire string of secondary orderings
// at the end.
//
CollationKey&
RuleBasedCollator::getCollationKey( const   UnicodeString&  source,
                                    CollationKey&   sortkey,
                                    UErrorCode&      status) const
{
	UChar sStart[tblcoll_StackBufferLen];
	UChar *uSource = sStart;
	uint32_t sourceLen = source.length();
	if(sourceLen >= tblcoll_StackBufferLen) {
		uSource = new UChar[sourceLen+1];
	}
    source.extract(0, sourceLen, uSource);
    uSource[sourceLen] = 0;
	CollationKey& result = RuleBasedCollator::getCollationKey(uSource, sourceLen, sortkey, status);
	if(sStart != uSource) {
		delete[] uSource;
	}
	return result;
}

CollationKey&
RuleBasedCollator::getCollationKey( const   UChar*  source,
                                    int32_t sourceLen,
                                    CollationKey&   sortkey,
                                    UErrorCode&      status) const
{
    if (U_FAILURE(status))
    {
        status = U_ILLEGAL_ARGUMENT_ERROR;
        return sortkey.setToBogus();
    }

    if ((!source) || (sourceLen == 0))
    {
        return sortkey.reset();
    }

    uint8_t *result;
	int32_t resLen = 0;
    result = ucol_getSortKeyWithAllocation((UCollator *)this, source, sourceLen, &resLen);

    sortkey.adopt(result, resLen);

	return sortkey;
}

CollationKey&
RuleBasedCollator::getCollationKeyEx( const   UChar*  source,
                                    int32_t sourceLen,
                                    CollationKey&   sortkey,
                                    UErrorCode&      status) const
{
    if (U_FAILURE(status))
    {
        status = U_ILLEGAL_ARGUMENT_ERROR;
        return sortkey.setToBogus();
    }

    if ((!source) || (sourceLen == 0))
    {
        return sortkey.reset();
    }

    if (cursor1 == NULL)
    {
      ((RuleBasedCollator *)this)->cursor1 = new NormalizerIterator(source, sourceLen, getDecomposition());
    }
    else
    {
      cursor1->setModeAndText(getDecomposition(), source,sourceLen, status);
    }

    if (U_FAILURE(status))
    {
        return sortkey.setToBogus();
    }

    UBool  compareSec   = (getStrength() >= Collator::SECONDARY);
    UBool  compareTer   = (getStrength() >= Collator::TERTIARY);
    UBool  compareIdent = (getStrength() == Collator::IDENTICAL);
    int32_t order        = 0;
    int32_t totalPrimary = 0;
    int32_t totalSec     = 0;
    int32_t totalTer     = 0;
    int32_t totalIdent     = 0;
    UnicodeString decomp;

    // iterate over the source, counting primary, secondary, and tertiary entries
    while((order = getStrengthOrder((NormalizerIterator*)cursor1, status)) !=
                                      CollationElementIterator::NULLORDER)
    {
        int32_t secOrder = CollationElementIterator::secondaryOrder(order);
        int32_t terOrder = CollationElementIterator::tertiaryOrder(order);

        if (U_FAILURE(status))
        {
            return sortkey.setToBogus();
        }

        if (! CollationElementIterator::isIgnorable(order))
        {
            totalPrimary += 1;

            if (compareSec)
            {
                totalSec += 1;
            }

            if (compareTer)
            {
                totalTer += 1;
            }
        }
        else
        {
            if (compareSec && secOrder != SECIGNORABLE)
            {
                totalSec += 1;
            }

            if (compareTer && terOrder != TERIGNORABLE)
            {
                totalTer += 1;
            }
        }
    }

    // count the null bytes after the entires
    totalPrimary += 1;

    if (compareSec)
    {
        totalSec += 1;
    }

    if (compareTer)
    {
        totalTer += 1;
    }

    if (compareIdent)
    {
      Normalizer::normalize(source, getDecomposition(), // SRL: ??
                0, decomp, status);

        if (U_SUCCESS(status))
        {
            totalIdent = decomp.length() + 1;
        }
    }

    // Compute total number of bytes to hold the entries
    // and make sure the key can hold them
    uint32_t size   = 2 * (totalPrimary + totalSec + totalTer + totalIdent);

    sortkey.ensureCapacity(size);

    if (sortkey.isBogus())
    {
        status = U_MEMORY_ALLOCATION_ERROR;
        return sortkey;
    }

    int32_t primaryCursor = 0;
    int32_t secCursor     = 2 * totalPrimary;
    int32_t secBase       = secCursor;
    int32_t preSecIgnore  = secBase;
    int32_t terCursor     = secCursor + (2 * totalSec);
    int32_t identCursor      = terCursor + (2 * totalTer);

    // reset source to the beginning
    cursor1->reset();

    // now iterate over the source computing the actual entries
    while((order = getStrengthOrder((NormalizerIterator*)cursor1, status)) != CollationElementIterator::NULLORDER)
    {
        if (U_FAILURE(status))
        {
            return sortkey.reset();
        }

        int32_t primaryOrder = CollationElementIterator::primaryOrder(order);
        int32_t secOrder     = CollationElementIterator::secondaryOrder(order);
        int32_t terOrder     = CollationElementIterator::tertiaryOrder(order);

        if (! CollationElementIterator::isIgnorable(order))
        {
            primaryCursor = sortkey.storeBytes(primaryCursor, primaryOrder + SORTKEYOFFSET);

            if (compareSec)
            {
                if (data->isFrenchSec && (preSecIgnore < secCursor))
                {
                    sortkey.reverseBytes(preSecIgnore, secCursor);
                }

                secCursor = sortkey.storeBytes(secCursor, secOrder + SORTKEYOFFSET);

                preSecIgnore = secCursor;
            }

            if (compareTer)
            {
                terCursor = sortkey.storeBytes(terCursor, terOrder + SORTKEYOFFSET);
            }
        }
        else
        {
            if (compareSec && secOrder != SECIGNORABLE)
            {
                secCursor = sortkey.storeBytes(secCursor, secOrder + data->maxSecOrder + SORTKEYOFFSET);
            }

            if (compareTer && terOrder != TERIGNORABLE)
            {
                terCursor = sortkey.storeBytes(terCursor, terOrder + data->maxTerOrder + SORTKEYOFFSET);
            }
        }
    }

    // append 0 at the end of each portion.
    sortkey.storeBytes(primaryCursor, 0);

    if (compareSec)
    {
        if (data->isFrenchSec)
        {
            if (preSecIgnore < secCursor)
            {
                sortkey.reverseBytes(preSecIgnore, secCursor);
            }

            sortkey.reverseBytes(secBase, secCursor);
        }

        sortkey.storeBytes(secCursor, 0);
    }

    if (compareTer)
    {
        sortkey.storeBytes(terCursor, 0);
    }

    if (compareIdent)
    {
        sortkey.storeUnicodeString(identCursor, decomp);
    }

    //    Debugging - print out the sortkey [--srl]
//      {
//        const uint8_t *bytes;
//        int32_t xcount;
//        bytes = sortkey.getByteArray(xcount);
//        //      fprintf(stderr, "\n\n-  [%02X] [%02X]\n\n", (int)(bytes[0]&0xFF), (int)(bytes[1]&0xFF) );
//      }

    return sortkey;
}


// Build this collator's rule tables based on a string representation of the rules
// See the big diagram at the top of this file for an overview of how the tables
// are organized.
void
RuleBasedCollator::build(const UnicodeString&   pattern,
                            UErrorCode&      status)
{
    if (U_FAILURE(status))
    {
        return;
    }

    // This array maps Unicode characters to their collation ordering
    data->mapping = ucmp32_open(UNMAPPED);

    if (data->mapping->fBogus)
    {
        status = U_MEMORY_ALLOCATION_ERROR;
        return;
    }

    int32_t i = 0;
    UnicodeString lastGroupChars;
    UnicodeString expChars;
    UnicodeString groupChars;

    if (pattern.length() == 0)
    {
        status = U_INVALID_FORMAT_ERROR;
        return;
    }

    // Build the merged collation entries
    // Since rules can be specified in any order in the string
    // (e.g. "c , C < d , D < e , E .... C < CH")
    // this splits all of the rules in the string out into separate
    // objects and then sorts them.  In the above example, it merges the
    // "C < CH" rule in just before the "C < D" rule.

    mPattern = new MergeCollation(pattern, getDecomposition(), status);
    if (U_FAILURE(status))
    {
        ucmp32_close(data->mapping);
        data->mapping = 0;
        delete mPattern;
        mPattern = 0;
        return;
    }

    int32_t order = COLELEMENTSTART;

    // Walk through each entry
    for (i = 0; i < mPattern->getCount(); ++i)
    {
        const PatternEntry* entry = mPattern->getItemAt(i);
        groupChars.remove();
        expChars.remove();

        // if entry is valid
        if (entry != NULL)
        {
            entry->getChars(groupChars);

            // check if french secondary needs to be turned on
            if ((groupChars.length() > 1) &&
                (groupChars[groupChars.length()-1] == 0x0040))
            {
                data->isFrenchSec = TRUE;
                groupChars.remove(groupChars.length()-1);
            }

            order = increment((Collator::ECollationStrength)entry->getStrength(), order);

            if (entry->getExtension(expChars).length() != 0)
            {
                // encountered an expanding character, where one character on input
                // expands to several sort elements (e.g. '<27>' --> 'o' 'e')
                addExpandOrder(groupChars, expChars, order, status);
                if (U_FAILURE(status))
                {
                    return;
                }
            }
            else if (groupChars.length() > 1)
            {
                // encountered a contracting character, where several characters on input
                // contract into one sort order.  For example, "ch" is treated as a single
                // character in traditional Spanish sorting.
                addContractOrder(groupChars, order, status);
                if (U_FAILURE(status))
                {
                    return;
                }
            }
            else
            {
                // Nothing out of the ordinary -- one character maps to one sort order
                addOrder(groupChars[0], order, status);
                if (U_FAILURE(status))
                {
                    return;
                }
            }
        }
    }

    // add expanding entries for pre-composed characters
    addComposedChars();

    // Fill in all the expanding chars values
    commit();

    // Compact the data mapping table
    ucmp32_compact(data->mapping, 1);
}

/**
 * Add expanding entries for pre-composed unicode characters so that this
 * collator can be used reasonably well with decomposition turned off.
 */
 void RuleBasedCollator::addComposedChars()
 {
    UnicodeString buf;
    UErrorCode status = U_ZERO_ERROR;

    // Iterate through all of the pre-composed characters in Unicode
    ComposedCharIter iter;
    UnicodeString decomp;

    while (iter.hasNext())
    {
        UChar c = iter.next();

        if (getCharOrder(c) == UNMAPPED)
        {
            //
            // We don't already have an ordering for this pre-composed character.
            //
            // First, see if the decomposed string is already in our
            // tables as a single contracting-string ordering.
            // If so, just map the precomposed character to that order.
            //
            // TODO: What we should really be doing here is trying to find the
            // longest initial substring of the decomposition that is present
            // in the tables as a contracting character sequence, and find its
            // ordering.  Then do this recursively with the remaining chars
            // so that we build a list of orderings, and add that list to
            // the expansion table.
            // That would be more correct but also significantly slower, so
            // I'm not totally sure it's worth doing.
            //
            iter.getDecomposition(decomp);
            int contractOrder = getContractOrder(decomp);

            if (contractOrder != UNMAPPED)
            {
                addOrder(c, contractOrder, status);
            }
            else
            {
                //
                // We don't have a contracting ordering for the entire string
                // that results from the decomposition, but if we have orders
                // for each individual character, we can add an expanding
                // table entry for the pre-composed character
                //
                UBool allThere = TRUE;
                int32_t i;

                for (i = 0; i < decomp.length(); i += 1)
                {
                    if (getCharOrder(decomp[i]) == UNMAPPED)
                    {
                        allThere = FALSE;
                        break;
                    }
                }

                if (allThere)
                {
                    buf.remove();
                    buf += c;
                    addExpandOrder(buf, decomp, UNMAPPED, status);
                }
            }
        }
    }
}

// When the expanding character tables are built by addExpandOrder,
// it doesn't know what the final ordering of each character
// in the expansion will be.  Instead, it just puts the raw character
// code into the table, adding CHARINDEX as a flag.  Now that we've
// finished building the mapping table, we can go back and look up
// that character to see what its real collation order is and
// stick that into the expansion table.  That lets us avoid doing
// a two-stage lookup later.

void
RuleBasedCollator::commit()
{
    // if there are any expanding characters
    if (data->expandTable != NULL)
    {
        int32_t i;
        for (i = 0; i < data->expandTable->size(); i += 1)
        {
            VectorOfInt* valueList = data->expandTable->at(i);
            int32_t j;
            for (j = 0; j < valueList->size(); j++)
            {
                // found a expanding character
                // the expanding char value is not filled in yet
                if ((valueList->at(j) < EXPANDCHARINDEX) &&
                    (valueList->at(j) > CHARINDEX))
                {
                    // Get the real values for the non-filled entry
                    UChar ch = (UChar)(valueList->at(j) - CHARINDEX);
                    int32_t realValue = ucmp32_get(data->mapping, ch);

                    if (realValue == UNMAPPED)
                    {
                        // The real value is still unmapped, maybe it'signorable
                        valueList->atPut(j, IGNORABLEMASK & ch);
                    }
                    // fill in the value
                    else
                    {
                        valueList->atPut(j, realValue);
                    }
                }
            }
        }
    }
 }

/**
 *  Increment of the last order based on the comparison level.
 */
int32_t
RuleBasedCollator::increment(Collator::ECollationStrength aStrength, int32_t lastValue)
{
    switch(aStrength)
    {
    case Collator::PRIMARY:
        // increment priamry order  and mask off secondary and tertiary difference
        lastValue += PRIMARYORDERINCREMENT;
        if((lastValue & PRIMARYLOWZEROMASK) == 0) {
            lastValue += PRIMARYORDERINCREMENT;
            lastValue += PRIMARYORDERINCREMENT;
        }
        lastValue &= PRIMARYORDERMASK;

        lastValue |= RESETSECONDARYTERTIARY; // Start all values from 02
        isOverIgnore = TRUE;
        break;

    case Collator::SECONDARY:
        // increment secondary order and mask off tertiary difference
        lastValue += SECONDARYORDERINCREMENT;
        lastValue &= SECONDARYDIFFERENCEONLY;
        lastValue |= RESETTERTIARY; // Start all values from 02

        // record max # of ignorable chars with secondary difference
        if (isOverIgnore == FALSE)
        {
            data->maxSecOrder += 1;
        }
        break;

    case Collator::TERTIARY:
        // increment tertiary order
        lastValue += TERTIARYORDERINCREMENT;

        // record max # of ignorable chars with tertiary difference
        if (isOverIgnore == FALSE)
        {
            data->maxTerOrder += 1;
        }
        break;

  // case IDENTICAL?
    }

    return lastValue;
}

// Adds a character and its designated order into the collation table.
// This is the simple case, with no expansion or contraction
void
RuleBasedCollator::addOrder(UChar ch,
                         int32_t anOrder,
                         UErrorCode& status)
{
    if (U_FAILURE(status))
    {
        return;
    }

    // try to find the order of the char in the mapping table
    int32_t order = ucmp32_get(data->mapping, ch);

    if (order >= CONTRACTCHARINDEX)
    {
        // There's already an entry for this character that points to a contracting
        // character table.  Instead of adding the character directly to the mapping
        // table, we must add it to the contract table instead.
        key.remove();
        key += ch;
        if (key.isBogus())
        {
            status = U_MEMORY_ALLOCATION_ERROR;
            return;
        }

        addContractOrder(key, anOrder, status);
    }
    else
    {
        // add the entry to the mapping table, the same later entry replaces the previous one
        ucmp32_set(data->mapping, ch, anOrder);
    }
}

// Add an expanding-character entry to the table.
void
RuleBasedCollator::addExpandOrder(  const   UnicodeString& contractChars,
                                const   UnicodeString& expandChars,
                                int32_t anOrder,
                                UErrorCode& status)
{
    if (U_FAILURE(status))
    {
        return;
    }

    // Create an expansion table entry
    int32_t tableIndex = addExpansion(anOrder, expandChars);

    // And add its index into the main mapping table
    if (contractChars.length() > 1)
    {
        addContractOrder(contractChars, tableIndex, status);
    }
    else
    {
        addOrder(contractChars[0], tableIndex, status);
    }
}

int32_t RuleBasedCollator::addExpansion(int32_t anOrder, const UnicodeString &expandChars)
{
    if (data->expandTable == NULL)
    {
        data->expandTable = new VectorOfPToExpandTable();

        if (data->expandTable == NULL)
        {
            return 0;
        }
    }

    // If anOrder is valid, we want to add it at the beginning of the list
    int32_t offset = (anOrder == UNMAPPED) ? 0 : 1;

    VectorOfInt *valueList = new VectorOfInt(expandChars.length() + offset);

    if (offset == 1)
    {
        valueList->atPut(0, anOrder);
    }

    int32_t i;
    for (i = 0; i < expandChars.length(); i += 1)
    {
        UChar ch = expandChars[i];
        int32_t mapValue = getCharOrder(ch);

        if (mapValue != UNMAPPED)
        {
            valueList->atPut(i + offset, mapValue);
        }
        else
        {
            // can't find it in the table, will be filled in by commit().
            valueList->atPut(i + offset, CHARINDEX + (int32_t)ch);
        }
    }

    // Add the expanding char list into the expansion table.
    int32_t tableIndex = EXPANDCHARINDEX + data->expandTable->size();
    data->expandTable->atPut(data->expandTable->size(), valueList);

    return tableIndex;
}

// Add a string of characters that contracts into a single ordering.
void
RuleBasedCollator::addContractOrder(const   UnicodeString& groupChars,
                                    int32_t anOrder,
                                    UBool fwd,
                                    UErrorCode& status)
{
    if (U_FAILURE(status))
    {
        return;
    }

    if (data->contractTable == NULL)
    {
        data->contractTable = new VectorOfPToContractTable();
        if (data->contractTable->isBogus())
        {
            delete data->contractTable;
            data->contractTable = NULL;
            status = U_MEMORY_ALLOCATION_ERROR;
            return;
        }
    }

    // See if the initial character of the string already has a contract table.
    // e.g. for "ch", look for 'c'.
    int32_t entry = ucmp32_get(data->mapping, groupChars[0]);
    VectorOfPToContractElement *entryTable = getContractValues(entry - CONTRACTCHARINDEX);

    if (entryTable == NULL)
    {
        // We need to create a new table of contract entries for this base char
        int32_t tableIndex = CONTRACTCHARINDEX + data->contractTable->size();
        EntryPair *pair = NULL;
        UnicodeString substring;

        entryTable = new VectorOfPToContractElement();
        if (entryTable->isBogus())
        {
            delete entryTable;
            delete data->contractTable;
            data->contractTable = NULL;
            status = U_MEMORY_ALLOCATION_ERROR;
            return;
        }

        data->contractTable->atPut(data->contractTable->size(), entryTable);
        if (data->contractTable->isBogus())
        {
            delete entryTable;
            delete data->contractTable;
            data->contractTable = NULL;
            status = U_MEMORY_ALLOCATION_ERROR;
            return;
        }


        // Add the initial character's current ordering first. then
        // update its mapping to point to this contract table
        groupChars.extract(0, 1, substring);
        if (substring.isBogus())
        {
            delete entryTable;
            delete data->contractTable;
            data->contractTable = NULL;
            status = U_MEMORY_ALLOCATION_ERROR;
            return;
        }

        pair = new EntryPair(substring, entry);

        entryTable->atPut(0, pair);
        if (entryTable->isBogus())
        {
            delete entryTable;
            delete data->contractTable;
            data->contractTable = NULL;
            status = U_MEMORY_ALLOCATION_ERROR;
            return;
        }

        ucmp32_set(data->mapping, groupChars[0], tableIndex);
    }

    // Now add (or replace) this string in the table
    int32_t index = getEntry(entryTable, groupChars, fwd);

    if (index != UNMAPPED)
    {
        EntryPair *pair = (EntryPair *) entryTable->at(index);
        pair->value = anOrder;
    }
    else
    {
        EntryPair *pair = new EntryPair(groupChars, anOrder, fwd);

        entryTable->atPut(entryTable->size(), pair);
    }

    // If this was a forward mapping for a contracting string, also add a
    // reverse mapping for it, so that CollationElementIterator::previous
    // can work right
    if (fwd)
    {
        UnicodeString reverse(groupChars);

        if (reverse.isBogus())
        {
            delete entryTable;
            delete data->contractTable;
            data->contractTable = NULL;
            status = U_MEMORY_ALLOCATION_ERROR;
            return;
        }

        addContractOrder(reverse.reverse(), anOrder, FALSE, status);
    }
}

/**
 * If the given string has been specified as a contracting string
 * in this collation table, return its ordering.
 * Otherwise return UNMAPPED.
 */
 int32_t RuleBasedCollator::getContractOrder(const UnicodeString &groupChars) const
{
    int32_t result = UNMAPPED;

    if (data->contractTable != NULL)
    {
        VectorOfPToContractElement *entryTable = getContractValues(groupChars[0]);

        if (entryTable != NULL)
        {
            int32_t index = getEntry(entryTable, groupChars, TRUE);

            if (index != UNMAPPED)
            {
                EntryPair *pair = entryTable->at(index);

                result = pair->value;
            }
        }
    }

    return result;
}

int32_t RuleBasedCollator::getCharOrder(UChar ch) const
{
    int32_t order = ucmp32_get(data->mapping, ch);

    if (order >= CONTRACTCHARINDEX)
    {
        VectorOfPToContractElement *groupList = getContractValues(order - CONTRACTCHARINDEX);
        EntryPair *pair = groupList->at(0);

        order = pair->value;
    }

    return order;
}

// Create a hash code for this collation.  Just hash the main rule table --
// that should be good enough for almost any use.
int32_t
RuleBasedCollator::hashCode() const
{
    int32_t         value = 0;
    int32_t         c;
    int32_t         count = getRules().length();
    UTextOffset      pos = count - 1;

    if (count > 64)
    {
        count = 64; // only hash upto limit
    }

    int16_t i = 0;

    while (i < count)
    {
        c = data->ruleTable[pos];
        value = ((value << (c & 0x0f)) ^ (c << 8)) + (c ^ value);
        i += 1;
        pos -= 1;
    }

    if (value == 0)
    {
        value = 1;
    }

    return value;
}

// find the contracting char entry in the list
int32_t
RuleBasedCollator::getEntry(VectorOfPToContractElement* list,
                         const UnicodeString& name,
                         UBool fwd)
{
    int32_t i;

    if (list != NULL)
    {
        for (i = 0; i < list->size(); i += 1)
        {
            EntryPair *pair = list->at(i);

            if ((pair != NULL) && (pair->fwd == fwd) && (pair->getEntryName() == name))
            {
                return i;
            }
        }
    }

    return RuleBasedCollator::UNMAPPED;
}

// look for the contracting list entry with the beginning char
VectorOfPToContractElement*
RuleBasedCollator::getContractValues(UChar ch) const
{
    int32_t index = ucmp32_get(data->mapping, ch);
    return getContractValues(index - CONTRACTCHARINDEX);
}

// look for the contracting list entry with the index
VectorOfPToContractElement*
RuleBasedCollator::getContractValues(int32_t    index) const
{
    if (data->contractTable != NULL)
    {
        if (index >= 0)
        {
            return data->contractTable->at(index);
        }
    }
    return NULL;
}

/**
  * Return the maximum length of any expansion sequences that end
  * with the specified comparison order.
  *
  * @param order a collation order returned by previous or next.
  * @return the maximum length of any expansion seuences ending
  *         with the specified order.
  *
  * @see CollationElementIterator#getMaxExpansion
  */
int32_t RuleBasedCollator::getMaxExpansion(int32_t order) const
{
    int32_t result = 1;

    if (data->expandTable != NULL)
    {
        // Right now this does a linear search through the entire
        // expandsion table.  If a collator had a large number of expansions,
        // this could cause a performance problem, but in practice that
        // rarely happens
        int32_t i;
        for (i = 0; i < data->expandTable->size(); i += 1)
        {
            VectorOfInt *valueList = data->expandTable->at(i);
            int32_t length = valueList->size();

            if (length > result && valueList->at(length-1) == order)
            {
                result = length;
            }
        }
    }

    return result;
}

/**
 *  Get the entry of hash table of the expanding string in the collation
 *  table.
 *  @param offset the index of the expanding string value list
 */
VectorOfInt *RuleBasedCollator::getExpandValueList(int32_t order) const
{
    return data->expandTable->at(order - EXPANDCHARINDEX);
}


void RuleBasedCollatorStreamer::streamIn(RuleBasedCollator* collator, UMemoryStream* is, UErrorCode& status)
{
    if (!uprv_mstrm_error(is) && U_SUCCESS(status)) {
        // Check that this is the correct file type
        int16_t id;

        uprv_mstrm_read(is, &id, sizeof(id));
        if (id != collator->FILEID)
        {
            // This isn't the right type of file.  Mark the ios
            // as failing and return.
            uprv_mstrm_setError(is); // force the stream to set its error flag
            return;
        }

        // Stream in large objects
        char isNull;

        uprv_mstrm_read(is, &isNull, sizeof(isNull));
        if (isNull)
        {
            delete collator->data;
            collator->data = NULL;
            status = U_MISSING_RESOURCE_ERROR;
        }
        else
        {
            if (collator->data == NULL)
            {
                collator->data = new TableCollationData;
            }

            collator->data->streamIn(is, status);
            if (collator->data->isBogus()) {
                uprv_mstrm_setError(is); // force the stream to set its error flag
                status = U_MISSING_RESOURCE_ERROR;
                return;
            }
        }

        // Verify that the end marker is present
        uprv_mstrm_read(is, &id, sizeof(id));
        if (id != collator->FILEID)
        {
            // This isn't the right type of file.  Mark the ios
            // as failing and return.
            uprv_mstrm_setError(is); // force the stream to set its error flag
            status = U_MISSING_RESOURCE_ERROR;
            return;
        }

        // Reset other data members
        collator->isOverIgnore = FALSE;
        collator->lastChar = 0;
        delete collator->mPattern;
        collator->mPattern = 0;
        collator->key.remove();
        collator->dataIsOwned = TRUE;
    }
}

void RuleBasedCollatorStreamer::streamOut(const RuleBasedCollator* collator, UMemoryStream* os)
{
    if (!uprv_mstrm_error(os))
    {
        // We use a 16-bit ID code to identify this file.
        int16_t id = collator->FILEID;
        uprv_mstrm_write(os, (uint8_t *)&id, sizeof(id));

        // Stream out the data
        char isNull;
        isNull = (collator->data == 0);
        uprv_mstrm_write(os, (uint8_t*)&isNull, sizeof(isNull));

        if (!isNull)
        {
            collator->data->streamOut(os);
        }

        // Write out the ID to indicate the end
        uprv_mstrm_write(os, (uint8_t *)&id, sizeof(id));
    }
}

void RuleBasedCollatorStreamer::streamIn(RuleBasedCollator* collator, FileStream* is)
{
    if (!T_FileStream_error(is))
    {
        // Check that this is the correct file type
        int16_t id;

        T_FileStream_read(is, &id, sizeof(id));
        if (id != collator->FILEID)
        {
            // This isn't the right type of file.  Mark the ios
            // as failing and return.
            T_FileStream_setError(is); // force the stream to set its error flag
            return;
        }

        // Stream in large objects
        char isNull;

        T_FileStream_read(is, &isNull, sizeof(isNull));
        if (isNull)
        {
            delete collator->data;
            collator->data = NULL;
        }
        else
        {
            if (collator->data == NULL)
            {
                collator->data = new TableCollationData;
            }

            collator->data->streamIn(is);
            if (collator->data->isBogus()) {
                T_FileStream_setError(is); // force the stream to set its error flag
                return;
            }
        }

        // Verify that the end marker is present
        T_FileStream_read(is, &id, sizeof(id));
        if (id != collator->FILEID)
        {
            // This isn't the right type of file.  Mark the ios
            // as failing and return.
            T_FileStream_setError(is); // force the stream to set its error flag
            return;
        }

        // Reset other data members
        collator->isOverIgnore = FALSE;
        collator->lastChar = 0;
        delete collator->mPattern;
        collator->mPattern = 0;
        collator->key.remove();
        collator->dataIsOwned = TRUE;
    }
}

void RuleBasedCollatorStreamer::streamOut(const RuleBasedCollator* collator, FileStream* os)
{
    if (!T_FileStream_error(os))
    {
        // We use a 16-bit ID code to identify this file.
        int16_t id = collator->FILEID;
        T_FileStream_write(os, &id, sizeof(id));

        // Stream out the data
        char isNull;
        isNull = (collator->data == 0);
        T_FileStream_write(os, &isNull, sizeof(isNull));

        if (!isNull)
        {
            collator->data->streamOut(os);
        }

        // Write out the ID to indicate the end
        T_FileStream_write(os, &id, sizeof(id));
    }
}

UBool RuleBasedCollator::writeToFile(const char* fileName) const
{
    FileStream* ofs = T_FileStream_open(fileName, "wb");
    if (ofs != 0)
    {
        RuleBasedCollatorStreamer::streamOut(this, ofs);
    }

#ifdef COLLDEBUG
    fprintf(stderr, "binary write %s size %d %s\n", fileName, T_FileStream_size(ofs),
        (!T_FileStream_error(ofs) ? ", OK" : ", FAIL"));
#endif

    UBool err = T_FileStream_error(ofs) == 0;

    T_FileStream_close(ofs);
    return err;
}
/*
UBool RuleBasedCollator::prepareForBundle() const
{
    UMemoryStream* ofs = uprv_mstrm_openNew(0);
    if (ofs != 0)
    {
        RuleBasedCollatorStreamer::streamOut(this, ofs);
    }

#ifdef COLLDEBUG
    fprintf(stderr, "binary write %s size %d %s\n", fileName, T_FileStream_size(ofs),
        (!T_FileStream_error(ofs) ? ", OK" : ", FAIL"));
#endif

    UBool err = uprv_mstrm_error(ofs) == 0;

    uprv_mstrm_close(ofs);

    return err;
}
*/

void RuleBasedCollator::addToCache(const UnicodeString& key)
{
    // This method doesn't add the RuleBasedCollator itself to the cache.  Instead,
    // it adds the given RuleBasedCollator's data object to the TableCollationData
    // cache, and marks it as non-owned in the given RuleBasedCollator object.
    TableCollationData::addToCache(key, data);
    dataIsOwned = FALSE;
}

void
RuleBasedCollator::constructFromCache(const UnicodeString& key,
                                   UErrorCode& status)
{
    // Attempt to construct this RuleBasedCollator object from cached TableCollationData.
    // If no such data is in the cache, return false.
    if (U_FAILURE(status)) return;
    if (dataIsOwned)
    {
        delete data;
        data = NULL;
    }

    isOverIgnore = FALSE;
    lastChar = 0;
    mPattern = 0;
    setStrength(Collator::TERTIARY);

    dataIsOwned = FALSE;
    data = TableCollationData::findInCache(key);
    if (data == NULL)
    {
        status = U_MISSING_RESOURCE_ERROR;
    }
}

char*
RuleBasedCollator::createPathName(  const UnicodeString&    prefix,
                                const UnicodeString&    name,
                                const UnicodeString&    suffix)
{
    // Concatenate three elements to form a file name, and return it.

    UnicodeString   workingName(prefix);
    int32_t         size;
    char*           returnVal;

    workingName += name;
    workingName += suffix;

    size = workingName.length();
    returnVal = new char[size + 1];
    workingName.extract(0, size, returnVal, "");
    returnVal[size] = 0;

    return returnVal;
}

void
RuleBasedCollator::chopLocale(UnicodeString& localeName)
{
    // chopLocale removes the final element from a locale string.
    // For instance, "de_CH" becomes "de", and "de" becomes "".
    // "" remains "".

    int32_t     size = localeName.length();
    int32_t     i;

    for (i = size - 1; i > 0; i--)
    {
        if (localeName[i] == 0x005F)
        {
            break;
        }
    }

    if (i < 0)
    {
       i = 0;
    }

    localeName.remove(i, size - i);
}


uint8_t *
RuleBasedCollator::cloneRuleData(int32_t &length, UErrorCode &status)
{
    UMemoryStream *memdata = 0;
    uint8_t *data = 0;

    if(U_FAILURE(status)) {
        return NULL;
    }

    memdata = uprv_mstrm_openNew(0);

    if (memdata != 0) {
        RuleBasedCollatorStreamer::streamOut(this, memdata);
    }

    UBool err = uprv_mstrm_error(memdata) == 0;


    data = (uint8_t *)uprv_malloc(memdata->fPos);
    if(data == 0) {
        status = U_MEMORY_ALLOCATION_ERROR;
        uprv_mstrm_close(memdata);
        length = 0;
        return 0;
    } else {
        uprv_memcpy(data, memdata->fStart, memdata->fPos);
        length = memdata->fPos;
        uprv_mstrm_close(memdata);
        return data;
    }
}

void RuleBasedCollator::setAttribute(UColAttribute attr, UColAttributeValue value, UErrorCode &status) {
	switch(attr) {
	case UCOL_FRENCH_COLLATION: /* attribute for direction of secondary weights*/
		if(value == UCOL_ON) {
			data->isFrenchSec = TRUE;
		} else if (value == UCOL_OFF) {
			data->isFrenchSec = FALSE;
		} else if (value == UCOL_DEFAULT) {
		} else {
			status = U_ILLEGAL_ARGUMENT_ERROR  ;
		}
		break;
    case UCOL_ALTERNATE_HANDLING: /* attribute for handling variable elements*/
		status = U_UNSUPPORTED_ERROR;
		break;
	case UCOL_CASE_FIRST: /* who goes first, lower case or uppercase */
		status = U_UNSUPPORTED_ERROR;
		break;
	case UCOL_CASE_LEVEL: /* do we have an extra case level */
		status = U_UNSUPPORTED_ERROR;
		break;
	case UCOL_NORMALIZATION_MODE: /* attribute for normalization */
		if(value == UCOL_ON) {
            setDecomposition(Normalizer::DECOMP);
		} else if (value == UCOL_OFF) {
            setDecomposition(Normalizer::NO_OP);
		} else if (value == UCOL_DEFAULT) {
            setDecomposition(fDefaultDecomp);
		} else {
			status = U_ILLEGAL_ARGUMENT_ERROR  ;
		}
		break;
		break;
	case UCOL_STRENGTH:         /* attribute for strength */
		status = U_UNSUPPORTED_ERROR;
		break;
	case UCOL_ATTRIBUTE_COUNT:
	default:
		status = U_ILLEGAL_ARGUMENT_ERROR;
		break;
	}
}

UColAttributeValue RuleBasedCollator::getAttribute(UColAttribute attr, UErrorCode &status) {
	switch(attr) {
	case UCOL_FRENCH_COLLATION: /* attribute for direction of secondary weights*/
		if(data->isFrenchSec == TRUE) {
			return UCOL_ON;
		} else {
			return UCOL_OFF;
		}
		break;
    case UCOL_ALTERNATE_HANDLING: /* attribute for handling variable elements*/
		status = U_UNSUPPORTED_ERROR;
		break;
	case UCOL_CASE_FIRST: /* who goes first, lower case or uppercase */
		status = U_UNSUPPORTED_ERROR;
		break;
	case UCOL_CASE_LEVEL: /* do we have an extra case level */
		status = U_UNSUPPORTED_ERROR;
		break;
	case UCOL_NORMALIZATION_MODE: /* attribute for normalization */
        if(getDecomposition() == Normalizer::DECOMP) {
            return UCOL_ON;
        } else {
            return UCOL_OFF;
        }
		break;
	case UCOL_STRENGTH:         /* attribute for strength */
        switch(getStrength()) {
        case PRIMARY :
                return UCOL_PRIMARY;
        case SECONDARY :
                return UCOL_SECONDARY;
        case TERTIARY :
                return UCOL_TERTIARY;
        case IDENTICAL :
                return UCOL_IDENTICAL;
        default :
            status = U_INTERNAL_PROGRAM_ERROR;
        }
		break;
	case UCOL_ATTRIBUTE_COUNT:
	default:
		status = U_ILLEGAL_ARGUMENT_ERROR;
		break;
	}
	return UCOL_DEFAULT;
}

Collator* RuleBasedCollator::safeClone(void) {
    return new RuleBasedCollator(*this);
}

UChar forwardCharIteratorGlue(void *iterator) {
    ForwardCharacterIterator *iter = ((ForwardCharacterIterator *)iterator);
    UChar result = iter->nextPostInc();
    if(result == ForwardCharacterIterator::DONE) {
        return 0xFFFF;
    } else {
        return result;
    }
}


Collator::EComparisonResult RuleBasedCollator::compare(ForwardCharacterIterator &source,
											 ForwardCharacterIterator &target) {

	UCollationResult strcoll_result = ucol_strcollinc((UCollator *)this, forwardCharIteratorGlue, &source, forwardCharIteratorGlue, &target);

	if(strcoll_result == UCOL_LESS) {
		return Collator::LESS;
	} else if(strcoll_result == UCOL_GREATER) {
		return Collator::GREATER;
	} else {
		return Collator::EQUAL;
	}
}

int32_t RuleBasedCollator::getSortKey(const   UnicodeString&  source,
						  uint8_t *result,
						  int32_t resultLength) const {
	UChar sStart[tblcoll_StackBufferLen];
	UChar *uSource = sStart;
	uint32_t sourceLen = source.length();
	if(sourceLen >= tblcoll_StackBufferLen) {
		uSource = new UChar[sourceLen+1];
	}
    source.extract(0, sourceLen, uSource);
    uSource[sourceLen] = 0;
	int32_t resLen = ucol_getSortKey((UCollator *)this, uSource, sourceLen, result, resultLength);
	if(sStart != uSource) {
		delete[] uSource;
	}
	return resLen;
}

int32_t RuleBasedCollator::getSortKey(const   UChar *source,
						  int32_t sourceLength,
						  uint8_t *result,
						  int32_t resultLength) const {
	int32_t resLen = ucol_getSortKey((UCollator *)this, source, sourceLength, result, resultLength);
	return resLen;
}

void * RuleBasedCollator::getSomeMemory(int32_t size) {
    int32_t sizeOfStuff = 5;
    if(fSomeMemory == NULL) {
        fSomeMemory = (void **)uprv_malloc(sizeOfStuff*sizeof(void*));
        fSizes = new int32_t[sizeOfStuff];
        fUsedMemory = 0;
        fAvailableMemory = sizeOfStuff;
        uprv_memset(fSomeMemory, 0, sizeOfStuff*sizeof(void*));
    } else if(fUsedMemory == sizeOfStuff) {
        fUsedMemory = 0;
    }

    void *result = NULL;
    if(*(fSomeMemory+fUsedMemory) == NULL) {
        result = uprv_malloc(size);
        fSizes[fUsedMemory] = size;
    } else {
        if(fSizes[fUsedMemory] < size) {
            result = uprv_realloc(*(fSomeMemory+fUsedMemory), size);
            fSizes[fUsedMemory] = size;
        } else {
            result = *(fSomeMemory+fUsedMemory);
        }
    }

    if(result == NULL) {
        /*freak out*/
    }

    *(fSomeMemory+fUsedMemory) = result;
    fUsedMemory++;

    return result;
}

//eof