scuffed-code/icu4c/source/i18n/bmsearch.cpp

/*
 ******************************************************************************
 *   Copyright (C) 1996-2009, International Business Machines                 *
 *   Corporation and others.  All Rights Reserved.                            *
 ******************************************************************************
 */

#include "unicode/utypes.h"

#if !UCONFIG_NO_COLLATION

#include "unicode/unistr.h"
#include "unicode/putil.h"
#include "unicode/usearch.h"

#include "cmemory.h"
#include "unicode/coll.h"
#include "unicode/tblcoll.h"
#include "unicode/coleitr.h"
#include "unicode/ucoleitr.h"

#include "unicode/regex.h"        // TODO: make conditional on regexp being built.

#include "unicode/uniset.h"
#include "unicode/uset.h"
#include "unicode/ustring.h"
#include "hash.h"
#include "uhash.h"
#include "ucol_imp.h"
#include "unormimp.h"

#include "unicode/colldata.h"
#include "unicode/bmsearch.h"

U_NAMESPACE_BEGIN

#define ARRAY_SIZE(array) (sizeof(array)/sizeof(array[0]))
#define NEW_ARRAY(type, count) (type *) uprv_malloc((count) * sizeof(type))
#define DELETE_ARRAY(array) uprv_free((void *) (array))


struct CEI
{
    uint32_t order;
    int32_t  lowOffset;
    int32_t  highOffset;
};

class Target : public UMemory
{
public:
    Target(UCollator *theCollator, const UnicodeString *target, int32_t patternLength, UErrorCode &status);
    ~Target();

    void setTargetString(const UnicodeString *target);

    const CEI *nextCE(int32_t offset);
    const CEI *prevCE(int32_t offset);

    int32_t stringLength();
    UChar charAt(int32_t offset);

    UBool isBreakBoundary(int32_t offset);
    int32_t nextBreakBoundary(int32_t offset);
    int32_t nextSafeBoundary(int32_t offset);

    UBool isIdentical(UnicodeString &pattern, int32_t start, int32_t end);

    void setOffset(int32_t offset);
    void setLast(int32_t last);
    int32_t getOffset();

private:
    CEI *ceb;
    int32_t bufferSize;
    int32_t bufferMin;
    int32_t bufferMax;

    uint32_t strengthMask;
    UCollationStrength strength;
    uint32_t variableTop;
    UBool toShift;
    UCollator *coll;

    const UnicodeString *targetString;
    const UChar *targetBuffer;
    int32_t targetLength;

    UCollationElements *elements;
    UBreakIterator *charBreakIterator;
};

Target::Target(UCollator *theCollator, const UnicodeString *target, int32_t patternLength, UErrorCode &status)
    : bufferSize(0), bufferMin(0), bufferMax(0),
      strengthMask(0), strength(UCOL_PRIMARY), variableTop(0), toShift(FALSE), coll(theCollator),
      targetString(NULL), targetBuffer(NULL), targetLength(0), elements(NULL), charBreakIterator(NULL)
{
    strength = ucol_getStrength(coll);
    toShift = ucol_getAttribute(coll, UCOL_ALTERNATE_HANDLING, &status) ==  UCOL_SHIFTED;
    variableTop = ucol_getVariableTop(coll, &status);

    // find the largest expansion
    uint8_t maxExpansion = 0;
    for (const uint8_t *expansion = coll->expansionCESize; *expansion != 0; expansion += 1) {
        if (*expansion > maxExpansion) {
            maxExpansion = *expansion;
        }
    }

    // room for an extra character on each end, plus 4 for safety
    bufferSize = patternLength + (2 * maxExpansion) + 4;

    ceb = NEW_ARRAY(CEI, bufferSize);

    if (ceb == NULL) {
        status = U_MEMORY_ALLOCATION_ERROR;
        return;
    }

    if (target != NULL) {
        setTargetString(target);
    }

    switch (strength) 
    {
    default:
        strengthMask |= UCOL_TERTIARYORDERMASK;
        /* fall through */

    case UCOL_SECONDARY:
        strengthMask |= UCOL_SECONDARYORDERMASK;
        /* fall through */

    case UCOL_PRIMARY:
        strengthMask |= UCOL_PRIMARYORDERMASK;
    }
}

Target::~Target()
{
    ubrk_close(charBreakIterator);
    ucol_closeElements(elements);

    DELETE_ARRAY(ceb);
}

void Target::setTargetString(const UnicodeString *target)
{
    if (charBreakIterator != NULL) {
        ubrk_close(charBreakIterator);
        ucol_closeElements(elements);
    }

    targetString = target;

    if (targetString != NULL) {
        UErrorCode status = U_ZERO_ERROR;

        targetBuffer = targetString->getBuffer();
        targetLength = targetString->length();

        elements = ucol_openElements(coll, target->getBuffer(), target->length(), &status);
        ucol_forceHanImplicit(elements, &status);

        charBreakIterator = ubrk_open(UBRK_CHARACTER, ucol_getLocale(coll, ULOC_VALID_LOCALE, &status),
                                      targetBuffer, targetLength, &status);
    } else {
        targetBuffer = NULL;
        targetLength = 0;
    }
}

const CEI *Target::nextCE(int32_t offset)
{
    UErrorCode status = U_ZERO_ERROR;
    int32_t low = -1, high = -1;
    uint32_t order;
    UBool cont = FALSE;

    if (offset >= bufferMin && offset < bufferMax) {
        return &ceb[offset];
    }

    if (bufferMax >= bufferSize || offset != bufferMax) {
        return NULL;
    }

    do {
        low   = ucol_getOffset(elements);
        order = ucol_next(elements, &status);
        high  = ucol_getOffset(elements);

        if (order == UCOL_NULLORDER) {
          //high = low = -1;
            break;
        }

        cont = isContinuation(order);
        order &= strengthMask;

        if (toShift && variableTop > order && (order & UCOL_PRIMARYORDERMASK) != 0) {
            if (strength >= UCOL_QUATERNARY) {
                order &= UCOL_PRIMARYORDERMASK;
            } else {
                order = UCOL_IGNORABLE;
            }
        }
    } while (order == UCOL_IGNORABLE);

    if (cont) {
        order |= UCOL_CONTINUATION_MARKER;
    }

    ceb[offset].order = order;
    ceb[offset].lowOffset = low;
    ceb[offset].highOffset = high;

    bufferMax += 1;

    return &ceb[offset];
}

const CEI *Target::prevCE(int32_t offset)
{
    UErrorCode status = U_ZERO_ERROR;
    int32_t low = -1, high = -1;
    uint32_t order;
    UBool cont = FALSE;

    if (offset >= bufferMin && offset < bufferMax) {
        return &ceb[offset];
    }

    if (bufferMax >= bufferSize || offset != bufferMax) {
        return NULL;
    }

    do {
        high  = ucol_getOffset(elements);
        order = ucol_previous(elements, &status);
        low   = ucol_getOffset(elements);

        if (order == UCOL_NULLORDER) {
            break;
        }

        cont = isContinuation(order);
        order &= strengthMask;

        if (toShift && variableTop > order && (order & UCOL_PRIMARYORDERMASK) != 0) {
            if (strength >= UCOL_QUATERNARY) {
                order &= UCOL_PRIMARYORDERMASK;
            } else {
                order = UCOL_IGNORABLE;
            }
        }
    } while (order == UCOL_IGNORABLE);

    bufferMax += 1;

    if (cont) {
        order |= UCOL_CONTINUATION_MARKER;
    }

    ceb[offset].order       = order;
    ceb[offset].lowOffset   = low;
    ceb[offset].highOffset = high;

    return &ceb[offset];
}

int32_t Target::stringLength()
{
    if (targetString != NULL) {
        return targetLength;
    }

    return 0;
}

UChar Target::charAt(int32_t offset)
{
    if (targetString != NULL) {
        return targetBuffer[offset];
    }

    return 0x0000;
}

void Target::setOffset(int32_t offset)
{
    UErrorCode status = U_ZERO_ERROR;

    bufferMin = 0;
    bufferMax = 0;

    ucol_setOffset(elements, offset, &status);
}

void Target::setLast(int32_t last)
{
    UErrorCode status = U_ZERO_ERROR;

    bufferMin = 0;
    bufferMax = 1;

    ceb[0].order      = UCOL_NULLORDER;
    ceb[0].lowOffset  = last;
    ceb[0].highOffset = last;

    ucol_setOffset(elements, last, &status);
}

int32_t Target::getOffset()
{
    return ucol_getOffset(elements);
}

UBool Target::isBreakBoundary(int32_t offset)
{
    return ubrk_isBoundary(charBreakIterator, offset);
}

int32_t Target::nextBreakBoundary(int32_t offset)
{
    return ubrk_following(charBreakIterator, offset);
}

int32_t Target::nextSafeBoundary(int32_t offset)
{
    while (offset < targetLength) {
      //UChar ch = charAt(offset);
        UChar ch = targetBuffer[offset];

        if (U_IS_LEAD(ch) || ! ucol_unsafeCP(ch, coll)) {
            return offset;
        }

        offset += 1;
    }

    return targetLength;
}

UBool Target::isIdentical(UnicodeString &pattern, int32_t start, int32_t end)
{
    if (strength < UCOL_IDENTICAL) {
        return TRUE;
    }

    UChar t2[32], p2[32];
    const UChar *pBuffer = pattern.getBuffer();
    int32_t pLength = pattern.length();
    int32_t length = end - start;

    UErrorCode status = U_ZERO_ERROR, status2 = U_ZERO_ERROR;

    int32_t decomplength = unorm_decompose(t2, ARRAY_SIZE(t2), 
                                       targetBuffer + start, length, 
                                       FALSE, 0, &status);

    // use separate status2 in case of buffer overflow
    if (decomplength != unorm_decompose(p2, ARRAY_SIZE(p2),
                                        pBuffer, pLength,
                                        FALSE, 0, &status2)) {
        return FALSE; // lengths are different
    }

    // compare contents
    UChar *text, *pat;

    if(U_SUCCESS(status)) {
        text = t2;
        pat = p2;
    } else if(status == U_BUFFER_OVERFLOW_ERROR) {
        status = U_ZERO_ERROR;

        // allocate one buffer for both decompositions
        text = NEW_ARRAY(UChar, decomplength * 2);

        // Check for allocation failure.
        if (text == NULL) {
        	return FALSE;
        }

        pat = text + decomplength;

        unorm_decompose(text, decomplength, targetBuffer + start, 
                        length, FALSE, 0, &status);

        unorm_decompose(pat, decomplength, pBuffer, 
                        pLength, FALSE, 0, &status);
    } else {
        // NFD failed, make sure that u_memcmp() does not overrun t2 & p2
        // and that we don't uprv_free() an undefined text pointer
        text = pat = t2;
        decomplength = 0;
    }

    UBool result = (UBool)(u_memcmp(pat, text, decomplength) == 0);

    if(text != t2) {
        DELETE_ARRAY(text);
    }

    // return FALSE if NFD failed
    return U_SUCCESS(status) && result;
}

#define HASH_TABLE_SIZE 257

class BadCharacterTable : public UMemory
{
public:
    BadCharacterTable(CEList &patternCEs, CollData *data, UErrorCode &status);
    ~BadCharacterTable();

    int32_t operator[](uint32_t ce) const;
    int32_t getMaxSkip() const;
    int32_t minLengthInChars(int32_t index);

private:
    static int32_t hash(uint32_t ce);

    int32_t maxSkip;
    int32_t badCharacterTable[HASH_TABLE_SIZE];

    int32_t *minLengthCache;
};

BadCharacterTable::BadCharacterTable(CEList &patternCEs, CollData *data, UErrorCode &status)
    : minLengthCache(NULL)
{
    int32_t plen = patternCEs.size();

    // **** need a better way to deal with this ****
    if (U_FAILURE(status) || plen == 0) {
        return;
    }

    int32_t *history = NEW_ARRAY(int32_t, plen);

    if (history == NULL) {
        status = U_MEMORY_ALLOCATION_ERROR;
        return;
    }

    for (int32_t i = 0; i < plen; i += 1) {
        history[i] = -1;
    }

    minLengthCache = NEW_ARRAY(int32_t, plen + 1);

    if (minLengthCache == NULL) {
        DELETE_ARRAY(history);
        status = U_MEMORY_ALLOCATION_ERROR;
        return;
    }

    maxSkip = minLengthCache[0] = data->minLengthInChars(&patternCEs, 0, history);

    for(int32_t j = 0; j < HASH_TABLE_SIZE; j += 1) {
        badCharacterTable[j] = maxSkip;
    }

    for(int32_t p = 1; p < plen; p += 1) {
        minLengthCache[p] = data->minLengthInChars(&patternCEs, p, history);

        // Make sure this entry is not bigger than the previous one.
        // Otherwise, we might skip too far in some cases.
        if (minLengthCache[p] < 0 || minLengthCache[p] > minLengthCache[p - 1]) {
            minLengthCache[p] = minLengthCache[p - 1];
        }
    }

    minLengthCache[plen] = 0;

    for(int32_t p = 0; p < plen - 1; p += 1) {
        badCharacterTable[hash(patternCEs[p])] = minLengthCache[p + 1];
    }

    DELETE_ARRAY(history);
}

BadCharacterTable::~BadCharacterTable()
{
    DELETE_ARRAY(minLengthCache);
}

int32_t BadCharacterTable::operator[](uint32_t ce) const
{
    return badCharacterTable[hash(ce)];
}

int32_t BadCharacterTable::getMaxSkip() const
{
    return maxSkip;
}

int32_t BadCharacterTable::minLengthInChars(int32_t index)
{
    return minLengthCache[index];
}

int32_t BadCharacterTable::hash(uint32_t ce)
{
    return UCOL_PRIMARYORDER(ce) % HASH_TABLE_SIZE;
}

class GoodSuffixTable : public UMemory
{
public:
    GoodSuffixTable(CEList &patternCEs, BadCharacterTable &badCharacterTable, UErrorCode &status);
    ~GoodSuffixTable();

    int32_t operator[](int32_t offset) const;

private:
    int32_t *goodSuffixTable;
};

GoodSuffixTable::GoodSuffixTable(CEList &patternCEs, BadCharacterTable &badCharacterTable, UErrorCode &status)
    : goodSuffixTable(NULL)
{
    int32_t patlen = patternCEs.size();

    // **** need a better way to deal with this ****
    if (U_FAILURE(status) || patlen <= 0) {
        return;
    }

    int32_t *suff  = NEW_ARRAY(int32_t, patlen);
    int32_t start = patlen - 1, end = - 1;
    int32_t maxSkip = badCharacterTable.getMaxSkip();

    if (suff == NULL) {
        status = U_MEMORY_ALLOCATION_ERROR;
        return;
    }

    // initialze suff
    suff[patlen - 1] = patlen;

    for (int32_t i = patlen - 2; i >= 0; i -= 1) {
        // (i > start) means we're inside the last suffix match we found
        // ((patlen - 1) - end) is how far the end of that match is from end of pattern
        // (i - start) is how far we are from start of that match
        // (i + (patlen - 1) - end) is index of same character at end of pattern
        // so if any suffix match at that character doesn't extend beyond the last match,
        // it's the suffix for this character as well
        if (i > start && suff[i + patlen - 1 - end] < i - start) {
            suff[i] = suff[i + patlen - 1 - end];
        } else {
            start = end = i;

            int32_t s = patlen;

            while (start >= 0 && patternCEs[start] == patternCEs[--s]) {
                start -= 1;
            }

            suff[i] = end - start;
        }
    }

    // now build goodSuffixTable
    goodSuffixTable  = NEW_ARRAY(int32_t, patlen);

    if (goodSuffixTable == NULL) {
        DELETE_ARRAY(suff);
        status = U_MEMORY_ALLOCATION_ERROR;
        return;
    }


    // initialize entries to minLengthInChars of the pattern
    for (int32_t i = 0; i < patlen; i += 1) {
        goodSuffixTable[i] = maxSkip;
    }

    int32_t prefix = 0;

    for (int32_t i = patlen - /*1*/ 2; i >= 0; i -= 1) {
        if (suff[i] == i + 1) {
            // this matching suffix is a prefix of the pattern
            int32_t prefixSkip = badCharacterTable.minLengthInChars(i + 1);

            // for any mis-match before this suffix, we should skip
            // so that the front of the pattern (i.e. the prefix)
            // lines up with the front of the suffix.
            // (patlen - 1 - i) is the start of the suffix
            while (prefix < patlen - 1 - i) {
                // value of maxSkip means never set...
                if (goodSuffixTable[prefix] == maxSkip) {
                    goodSuffixTable[prefix] = prefixSkip;
                }

                prefix += 1;
            }
        }
    }

    for (int32_t i = 0; i < patlen - 1; i += 1) {
        goodSuffixTable[patlen - 1 - suff[i]] = badCharacterTable.minLengthInChars(i + 1);
    }

    DELETE_ARRAY(suff);
}

GoodSuffixTable::~GoodSuffixTable()
{
    DELETE_ARRAY(goodSuffixTable);
}

int32_t GoodSuffixTable::operator[](int32_t offset) const
{
    return goodSuffixTable[offset];
}

UOBJECT_DEFINE_RTTI_IMPLEMENTATION(BoyerMooreSearch)


UBool BoyerMooreSearch::empty()
{
    return patCEs->size() <= 0;
}

CollData *BoyerMooreSearch::getData()
{
    return data;
}

CEList *BoyerMooreSearch::getPatternCEs()
{
    return patCEs;
}

BadCharacterTable *BoyerMooreSearch::getBadCharacterTable()
{
    return badCharacterTable;
}

GoodSuffixTable *BoyerMooreSearch::getGoodSuffixTable()
{
    return goodSuffixTable;
}

BoyerMooreSearch::BoyerMooreSearch(CollData *theData, const UnicodeString &patternString, const UnicodeString *targetString,
                                   UErrorCode &status)
    : data(theData), patCEs(NULL), badCharacterTable(NULL), goodSuffixTable(NULL), pattern(patternString), target(NULL)
{

    if (U_FAILURE(status)) {
        return;
    }

    UCollator *collator = data->getCollator();

    patCEs = new CEList(collator, patternString, status);

    if (patCEs == NULL || U_FAILURE(status)) {
        return;
    }

    badCharacterTable = new BadCharacterTable(*patCEs, data, status);

    if (badCharacterTable == NULL || U_FAILURE(status)) {
        return;
    }

    goodSuffixTable = new GoodSuffixTable(*patCEs, *badCharacterTable, status);

    if (targetString != NULL) {
        target = new Target(collator, targetString, patCEs->size(), status);
    }
}

BoyerMooreSearch::~BoyerMooreSearch()
{
    delete target;
    delete goodSuffixTable;
    delete badCharacterTable;
    delete patCEs;
}

void BoyerMooreSearch::setTargetString(const UnicodeString *targetString, UErrorCode &status)
{
    if (U_FAILURE(status)) {
        return;
    }

    if (target == NULL) {
        target = new Target(data->getCollator(), targetString, patCEs->size(), status);
    } else {
        target->setTargetString(targetString);
    }
}

// **** main flow of this code from Laura Werner's "Unicode Text Searching in Java" paper. ****
/*
 * TODO:
 *  * deal with trailing (and leading?) ignorables.
 *  * Adding BoyerMooreSearch object slowed it down. How can we speed it up?
 */
UBool BoyerMooreSearch::search(int32_t offset, int32_t &start, int32_t &end)
{
    UCollator *coll = data->getCollator();
    int32_t plen = patCEs->size();
    int32_t tlen = target->stringLength();
    int32_t maxSkip = badCharacterTable->getMaxSkip();
    int32_t tOffset = offset + maxSkip;

    if (plen <= 0) {
        // Searching for a zero length pattern always fails.
        start = end = -1;
        return FALSE;
    }

    while (tOffset <= tlen) {
        int32_t pIndex = plen - 1;
        int32_t tIndex = 0;
        int32_t lIndex = 0;

        if (tOffset < tlen) {
            // **** we really want to skip ahead enough to  ****
            // **** be sure we get at least 1 non-ignorable ****
            // **** CE after the end of the pattern.        ****
            int32_t next = target->nextSafeBoundary(tOffset + 1);

            target->setOffset(next);

            for (lIndex = 0; ; lIndex += 1) {
                const CEI *cei = target->prevCE(lIndex);
                int32_t low = cei->lowOffset;
                int32_t high = cei->highOffset;

                if (high == 0 || (low < high && low <= tOffset)) {
                    if (low < tOffset) {
                        while (lIndex >= 0 && target->prevCE(lIndex)->highOffset == high) {
                            lIndex -= 1;
                        }

                        if (high > tOffset) {
                            tOffset = high;
                        }
                    }

                    break;
                }
            }
        } else {
            target->setLast(tOffset);
            lIndex = 0;
        }

        tIndex = ++lIndex;

        // Iterate backward until we hit the beginning of the pattern
        while (pIndex >= 0) {
            uint32_t pce = (*patCEs)[pIndex];
            const CEI *tcei = target->prevCE(tIndex++);


            if (tcei->order != pce) {
                // There is a mismatch at this position.  Decide how far
                // over to shift the pattern, then try again.
 
                int32_t gsOffset = tOffset + (*goodSuffixTable)[pIndex];
#ifdef EXTRA_CAUTIOUS
                int32_t old = tOffset;
#endif

                tOffset += (*badCharacterTable)[tcei->order] - badCharacterTable->minLengthInChars(pIndex + 1);

                if (gsOffset > tOffset) {
                    tOffset = gsOffset;
                }

#ifdef EXTRA_CAUTIOUS
                // Make sure we don't skip backwards...
                if (tOffset <= old) {
                    tOffset = old + 1;
                }
#endif

                break;
            }

            pIndex -= 1;
        }

        if (pIndex < 0) {
            // We made it back to the beginning of the pattern,
            // which means we matched it all.  Return the location.
            const CEI firstCEI = *target->prevCE(tIndex - 1);
            const CEI lastCEI  = *target->prevCE(lIndex);
            int32_t mStart   = firstCEI.lowOffset;
            int32_t minLimit = lastCEI.lowOffset;
            int32_t maxLimit = lastCEI.highOffset;
            int32_t mLimit; 
            UBool found = TRUE;

            target->setOffset(/*tOffset*/maxLimit);

            const CEI nextCEI = *target->nextCE(0);

            if (nextCEI.lowOffset > maxLimit) {
                maxLimit = nextCEI.lowOffset;
            }

            if (nextCEI.lowOffset == nextCEI.highOffset && nextCEI.order != UCOL_NULLORDER) {
                found = FALSE;
            }

            if (! target->isBreakBoundary(mStart)) {
                found = FALSE;
            }

            if (firstCEI.lowOffset == firstCEI.highOffset) {
                found = FALSE;
            }

            mLimit = maxLimit;
            if (minLimit < maxLimit) {
                int32_t nbb = target->nextBreakBoundary(minLimit);

                if (nbb >= lastCEI.highOffset) {
                    mLimit = nbb;
                }
            }

            if (mLimit > maxLimit) {
                found = FALSE;
            }

            if (! target->isBreakBoundary(mLimit)) {
                found = FALSE;
            }

            if (! target->isIdentical(pattern, mStart, mLimit)) {
                found = FALSE;
            }

            if (found) {
                start = mStart;
                end   = mLimit;

                return TRUE;
            }

            tOffset += (*goodSuffixTable)[0]; // really? Maybe += 1 or += maxSkip?
        }
        // Otherwise, we're here because of a mismatch, so keep going....
    }
    
    // no match
   start = -1;
   end = -1;
   return FALSE;
}

U_NAMESPACE_END

#endif // #if !UCONFIG_NO_COLLATION
ICU-6659 Merge changes from branches/eric/boyer-moore X-SVN-Rev: 25282 2009-01-22 00:24:48 +00:00			`/*`
			`******************************************************************************`
			`* Copyright (C) 1996-2009, International Business Machines *`
			`* Corporation and others. All Rights Reserved. *`
			`******************************************************************************`
			`*/`

			`#include "unicode/utypes.h"`

			`#if !UCONFIG_NO_COLLATION`

			`#include "unicode/unistr.h"`
			`#include "unicode/putil.h"`
			`#include "unicode/usearch.h"`

			`#include "cmemory.h"`
			`#include "unicode/coll.h"`
			`#include "unicode/tblcoll.h"`
			`#include "unicode/coleitr.h"`
			`#include "unicode/ucoleitr.h"`

			`#include "unicode/regex.h" // TODO: make conditional on regexp being built.`

			`#include "unicode/uniset.h"`
			`#include "unicode/uset.h"`
			`#include "unicode/ustring.h"`
			`#include "hash.h"`
			`#include "uhash.h"`
			`#include "ucol_imp.h"`
			`#include "unormimp.h"`

			`#include "unicode/colldata.h"`
			`#include "unicode/bmsearch.h"`

			`U_NAMESPACE_BEGIN`

			`#define ARRAY_SIZE(array) (sizeof(array)/sizeof(array[0]))`
			`#define NEW_ARRAY(type, count) (type ) uprv_malloc((count) sizeof(type))`
			`#define DELETE_ARRAY(array) uprv_free((void *) (array))`


			`struct CEI`
			`{`
			`uint32_t order;`
			`int32_t lowOffset;`
			`int32_t highOffset;`
			`};`

			`class Target : public UMemory`
			`{`
			`public:`
			`Target(UCollator theCollator, const UnicodeString target, int32_t patternLength, UErrorCode &status);`
			`~Target();`

			`void setTargetString(const UnicodeString *target);`

			`const CEI *nextCE(int32_t offset);`
			`const CEI *prevCE(int32_t offset);`

			`int32_t stringLength();`
			`UChar charAt(int32_t offset);`

			`UBool isBreakBoundary(int32_t offset);`
			`int32_t nextBreakBoundary(int32_t offset);`
			`int32_t nextSafeBoundary(int32_t offset);`

			`UBool isIdentical(UnicodeString &pattern, int32_t start, int32_t end);`

			`void setOffset(int32_t offset);`
			`void setLast(int32_t last);`
			`int32_t getOffset();`

			`private:`
			`CEI *ceb;`
			`int32_t bufferSize;`
			`int32_t bufferMin;`
			`int32_t bufferMax;`

			`uint32_t strengthMask;`
			`UCollationStrength strength;`
			`uint32_t variableTop;`
			`UBool toShift;`
			`UCollator *coll;`

			`const UnicodeString *targetString;`
			`const UChar *targetBuffer;`
			`int32_t targetLength;`

			`UCollationElements *elements;`
			`UBreakIterator *charBreakIterator;`
			`};`

			`Target::Target(UCollator theCollator, const UnicodeString target, int32_t patternLength, UErrorCode &status)`
			`: bufferSize(0), bufferMin(0), bufferMax(0),`
			`strengthMask(0), strength(UCOL_PRIMARY), variableTop(0), toShift(FALSE), coll(theCollator),`
			`targetString(NULL), targetBuffer(NULL), targetLength(0), elements(NULL), charBreakIterator(NULL)`
			`{`
			`strength = ucol_getStrength(coll);`
			`toShift = ucol_getAttribute(coll, UCOL_ALTERNATE_HANDLING, &status) == UCOL_SHIFTED;`
			`variableTop = ucol_getVariableTop(coll, &status);`

			`// find the largest expansion`
			`uint8_t maxExpansion = 0;`
			`for (const uint8_t expansion = coll->expansionCESize; expansion != 0; expansion += 1) {`
			`if (*expansion > maxExpansion) {`
			`maxExpansion = *expansion;`
			`}`
			`}`

			`// room for an extra character on each end, plus 4 for safety`
			`bufferSize = patternLength + (2 * maxExpansion) + 4;`

			`ceb = NEW_ARRAY(CEI, bufferSize);`

			`if (ceb == NULL) {`
			`status = U_MEMORY_ALLOCATION_ERROR;`
			`return;`
			`}`

			`if (target != NULL) {`
			`setTargetString(target);`
			`}`

			`switch (strength)`
			`{`
			`default:`
			`strengthMask \|= UCOL_TERTIARYORDERMASK;`
			`/* fall through */`

			`case UCOL_SECONDARY:`
			`strengthMask \|= UCOL_SECONDARYORDERMASK;`
			`/* fall through */`

			`case UCOL_PRIMARY:`
			`strengthMask \|= UCOL_PRIMARYORDERMASK;`
			`}`
			`}`

			`Target::~Target()`
			`{`
			`ubrk_close(charBreakIterator);`
			`ucol_closeElements(elements);`

			`DELETE_ARRAY(ceb);`
			`}`

			`void Target::setTargetString(const UnicodeString *target)`
			`{`
			`if (charBreakIterator != NULL) {`
			`ubrk_close(charBreakIterator);`
			`ucol_closeElements(elements);`
			`}`

			`targetString = target;`

			`if (targetString != NULL) {`
			`UErrorCode status = U_ZERO_ERROR;`

			`targetBuffer = targetString->getBuffer();`
			`targetLength = targetString->length();`

			`elements = ucol_openElements(coll, target->getBuffer(), target->length(), &status);`
			`ucol_forceHanImplicit(elements, &status);`

			`charBreakIterator = ubrk_open(UBRK_CHARACTER, ucol_getLocale(coll, ULOC_VALID_LOCALE, &status),`
			`targetBuffer, targetLength, &status);`
			`} else {`
			`targetBuffer = NULL;`
			`targetLength = 0;`
			`}`
			`}`

			`const CEI *Target::nextCE(int32_t offset)`
			`{`
			`UErrorCode status = U_ZERO_ERROR;`
			`int32_t low = -1, high = -1;`
			`uint32_t order;`
			`UBool cont = FALSE;`

			`if (offset >= bufferMin && offset < bufferMax) {`
			`return &ceb[offset];`
			`}`

			`if (bufferMax >= bufferSize \|\| offset != bufferMax) {`
			`return NULL;`
			`}`

			`do {`
			`low = ucol_getOffset(elements);`
			`order = ucol_next(elements, &status);`
			`high = ucol_getOffset(elements);`

			`if (order == UCOL_NULLORDER) {`
			`//high = low = -1;`
			`break;`
			`}`

			`cont = isContinuation(order);`
			`order &= strengthMask;`

			`if (toShift && variableTop > order && (order & UCOL_PRIMARYORDERMASK) != 0) {`
			`if (strength >= UCOL_QUATERNARY) {`
			`order &= UCOL_PRIMARYORDERMASK;`
			`} else {`
			`order = UCOL_IGNORABLE;`
			`}`
			`}`
			`} while (order == UCOL_IGNORABLE);`

			`if (cont) {`
			`order \|= UCOL_CONTINUATION_MARKER;`
			`}`

			`ceb[offset].order = order;`
			`ceb[offset].lowOffset = low;`
			`ceb[offset].highOffset = high;`

			`bufferMax += 1;`

			`return &ceb[offset];`
			`}`

			`const CEI *Target::prevCE(int32_t offset)`
			`{`
			`UErrorCode status = U_ZERO_ERROR;`
			`int32_t low = -1, high = -1;`
			`uint32_t order;`
			`UBool cont = FALSE;`

			`if (offset >= bufferMin && offset < bufferMax) {`
			`return &ceb[offset];`
			`}`

			`if (bufferMax >= bufferSize \|\| offset != bufferMax) {`
			`return NULL;`
			`}`

			`do {`
			`high = ucol_getOffset(elements);`
			`order = ucol_previous(elements, &status);`
			`low = ucol_getOffset(elements);`

			`if (order == UCOL_NULLORDER) {`
			`break;`
			`}`

			`cont = isContinuation(order);`
			`order &= strengthMask;`

			`if (toShift && variableTop > order && (order & UCOL_PRIMARYORDERMASK) != 0) {`
			`if (strength >= UCOL_QUATERNARY) {`
			`order &= UCOL_PRIMARYORDERMASK;`
			`} else {`
			`order = UCOL_IGNORABLE;`
			`}`
			`}`
			`} while (order == UCOL_IGNORABLE);`

			`bufferMax += 1;`

			`if (cont) {`
			`order \|= UCOL_CONTINUATION_MARKER;`
			`}`

			`ceb[offset].order = order;`
			`ceb[offset].lowOffset = low;`
			`ceb[offset].highOffset = high;`

			`return &ceb[offset];`
			`}`

			`int32_t Target::stringLength()`
			`{`
			`if (targetString != NULL) {`
			`return targetLength;`
			`}`

			`return 0;`
			`}`

			`UChar Target::charAt(int32_t offset)`
			`{`
			`if (targetString != NULL) {`
			`return targetBuffer[offset];`
			`}`

			`return 0x0000;`
			`}`

			`void Target::setOffset(int32_t offset)`
			`{`
			`UErrorCode status = U_ZERO_ERROR;`

			`bufferMin = 0;`
			`bufferMax = 0;`

			`ucol_setOffset(elements, offset, &status);`
			`}`

			`void Target::setLast(int32_t last)`
			`{`
			`UErrorCode status = U_ZERO_ERROR;`

			`bufferMin = 0;`
			`bufferMax = 1;`

			`ceb[0].order = UCOL_NULLORDER;`
			`ceb[0].lowOffset = last;`
			`ceb[0].highOffset = last;`

			`ucol_setOffset(elements, last, &status);`
			`}`

			`int32_t Target::getOffset()`
			`{`
			`return ucol_getOffset(elements);`
			`}`

			`UBool Target::isBreakBoundary(int32_t offset)`
			`{`
			`return ubrk_isBoundary(charBreakIterator, offset);`
			`}`

			`int32_t Target::nextBreakBoundary(int32_t offset)`
			`{`
			`return ubrk_following(charBreakIterator, offset);`
			`}`

			`int32_t Target::nextSafeBoundary(int32_t offset)`
			`{`
			`while (offset < targetLength) {`
			`//UChar ch = charAt(offset);`
			`UChar ch = targetBuffer[offset];`

			`if (U_IS_LEAD(ch) \|\| ! ucol_unsafeCP(ch, coll)) {`
			`return offset;`
			`}`

			`offset += 1;`
			`}`

			`return targetLength;`
			`}`

			`UBool Target::isIdentical(UnicodeString &pattern, int32_t start, int32_t end)`
			`{`
			`if (strength < UCOL_IDENTICAL) {`
			`return TRUE;`
			`}`

			`UChar t2[32], p2[32];`
			`const UChar *pBuffer = pattern.getBuffer();`
			`int32_t pLength = pattern.length();`
			`int32_t length = end - start;`

			`UErrorCode status = U_ZERO_ERROR, status2 = U_ZERO_ERROR;`

			`int32_t decomplength = unorm_decompose(t2, ARRAY_SIZE(t2),`
			`targetBuffer + start, length,`
			`FALSE, 0, &status);`

			`// use separate status2 in case of buffer overflow`
			`if (decomplength != unorm_decompose(p2, ARRAY_SIZE(p2),`
			`pBuffer, pLength,`
			`FALSE, 0, &status2)) {`
			`return FALSE; // lengths are different`
			`}`

			`// compare contents`
			`UChar text, pat;`

			`if(U_SUCCESS(status)) {`
			`text = t2;`
			`pat = p2;`
			`} else if(status == U_BUFFER_OVERFLOW_ERROR) {`
			`status = U_ZERO_ERROR;`

			`// allocate one buffer for both decompositions`
			`text = NEW_ARRAY(UChar, decomplength * 2);`

			`// Check for allocation failure.`
			`if (text == NULL) {`
			`return FALSE;`
			`}`

			`pat = text + decomplength;`

			`unorm_decompose(text, decomplength, targetBuffer + start,`
			`length, FALSE, 0, &status);`

			`unorm_decompose(pat, decomplength, pBuffer,`
			`pLength, FALSE, 0, &status);`
			`} else {`
			`// NFD failed, make sure that u_memcmp() does not overrun t2 & p2`
			`// and that we don't uprv_free() an undefined text pointer`
			`text = pat = t2;`
			`decomplength = 0;`
			`}`

			`UBool result = (UBool)(u_memcmp(pat, text, decomplength) == 0);`

			`if(text != t2) {`
			`DELETE_ARRAY(text);`
			`}`

			`// return FALSE if NFD failed`
			`return U_SUCCESS(status) && result;`
			`}`

			`#define HASH_TABLE_SIZE 257`

			`class BadCharacterTable : public UMemory`
			`{`
			`public:`
			`BadCharacterTable(CEList &patternCEs, CollData *data, UErrorCode &status);`
			`~BadCharacterTable();`

			`int32_t operator[](uint32_t ce) const;`
			`int32_t getMaxSkip() const;`
			`int32_t minLengthInChars(int32_t index);`

			`private:`
			`static int32_t hash(uint32_t ce);`

			`int32_t maxSkip;`
			`int32_t badCharacterTable[HASH_TABLE_SIZE];`

			`int32_t *minLengthCache;`
			`};`

			`BadCharacterTable::BadCharacterTable(CEList &patternCEs, CollData *data, UErrorCode &status)`
			`: minLengthCache(NULL)`
			`{`
			`int32_t plen = patternCEs.size();`

			`// ** need a better way to deal with this **`
			`if (U_FAILURE(status) \|\| plen == 0) {`
			`return;`
			`}`

			`int32_t *history = NEW_ARRAY(int32_t, plen);`

			`if (history == NULL) {`
			`status = U_MEMORY_ALLOCATION_ERROR;`
			`return;`
			`}`

			`for (int32_t i = 0; i < plen; i += 1) {`
			`history[i] = -1;`
			`}`

			`minLengthCache = NEW_ARRAY(int32_t, plen + 1);`

			`if (minLengthCache == NULL) {`
			`DELETE_ARRAY(history);`
			`status = U_MEMORY_ALLOCATION_ERROR;`
			`return;`
			`}`

			`maxSkip = minLengthCache[0] = data->minLengthInChars(&patternCEs, 0, history);`

			`for(int32_t j = 0; j < HASH_TABLE_SIZE; j += 1) {`
			`badCharacterTable[j] = maxSkip;`
			`}`

			`for(int32_t p = 1; p < plen; p += 1) {`
			`minLengthCache[p] = data->minLengthInChars(&patternCEs, p, history);`

			`// Make sure this entry is not bigger than the previous one.`
			`// Otherwise, we might skip too far in some cases.`
			`if (minLengthCache[p] < 0 \|\| minLengthCache[p] > minLengthCache[p - 1]) {`
			`minLengthCache[p] = minLengthCache[p - 1];`
			`}`
			`}`

			`minLengthCache[plen] = 0;`

			`for(int32_t p = 0; p < plen - 1; p += 1) {`
			`badCharacterTable[hash(patternCEs[p])] = minLengthCache[p + 1];`
			`}`

			`DELETE_ARRAY(history);`
			`}`

			`BadCharacterTable::~BadCharacterTable()`
			`{`
			`DELETE_ARRAY(minLengthCache);`
			`}`

			`int32_t BadCharacterTable::operator[](uint32_t ce) const`
			`{`
			`return badCharacterTable[hash(ce)];`
			`}`

			`int32_t BadCharacterTable::getMaxSkip() const`
			`{`
			`return maxSkip;`
			`}`

			`int32_t BadCharacterTable::minLengthInChars(int32_t index)`
			`{`
			`return minLengthCache[index];`
			`}`

			`int32_t BadCharacterTable::hash(uint32_t ce)`
			`{`
			`return UCOL_PRIMARYORDER(ce) % HASH_TABLE_SIZE;`
			`}`

			`class GoodSuffixTable : public UMemory`
			`{`
			`public:`
			`GoodSuffixTable(CEList &patternCEs, BadCharacterTable &badCharacterTable, UErrorCode &status);`
			`~GoodSuffixTable();`

			`int32_t operator[](int32_t offset) const;`

			`private:`
			`int32_t *goodSuffixTable;`
			`};`

			`GoodSuffixTable::GoodSuffixTable(CEList &patternCEs, BadCharacterTable &badCharacterTable, UErrorCode &status)`
			`: goodSuffixTable(NULL)`
			`{`
			`int32_t patlen = patternCEs.size();`

			`// ** need a better way to deal with this **`
			`if (U_FAILURE(status) \|\| patlen <= 0) {`
			`return;`
			`}`

			`int32_t *suff = NEW_ARRAY(int32_t, patlen);`
			`int32_t start = patlen - 1, end = - 1;`
			`int32_t maxSkip = badCharacterTable.getMaxSkip();`

			`if (suff == NULL) {`
			`status = U_MEMORY_ALLOCATION_ERROR;`
			`return;`
			`}`

			`// initialze suff`
			`suff[patlen - 1] = patlen;`

			`for (int32_t i = patlen - 2; i >= 0; i -= 1) {`
			`// (i > start) means we're inside the last suffix match we found`
			`// ((patlen - 1) - end) is how far the end of that match is from end of pattern`
			`// (i - start) is how far we are from start of that match`
			`// (i + (patlen - 1) - end) is index of same character at end of pattern`
			`// so if any suffix match at that character doesn't extend beyond the last match,`
			`// it's the suffix for this character as well`
			`if (i > start && suff[i + patlen - 1 - end] < i - start) {`
			`suff[i] = suff[i + patlen - 1 - end];`
			`} else {`
			`start = end = i;`

			`int32_t s = patlen;`

			`while (start >= 0 && patternCEs[start] == patternCEs[--s]) {`
			`start -= 1;`
			`}`

			`suff[i] = end - start;`
			`}`
			`}`

			`// now build goodSuffixTable`
			`goodSuffixTable = NEW_ARRAY(int32_t, patlen);`

			`if (goodSuffixTable == NULL) {`
			`DELETE_ARRAY(suff);`
			`status = U_MEMORY_ALLOCATION_ERROR;`
			`return;`
			`}`


			`// initialize entries to minLengthInChars of the pattern`
			`for (int32_t i = 0; i < patlen; i += 1) {`
			`goodSuffixTable[i] = maxSkip;`
			`}`

			`int32_t prefix = 0;`

			`for (int32_t i = patlen - /1/ 2; i >= 0; i -= 1) {`
			`if (suff[i] == i + 1) {`
			`// this matching suffix is a prefix of the pattern`
			`int32_t prefixSkip = badCharacterTable.minLengthInChars(i + 1);`

			`// for any mis-match before this suffix, we should skip`
			`// so that the front of the pattern (i.e. the prefix)`
			`// lines up with the front of the suffix.`
			`// (patlen - 1 - i) is the start of the suffix`
			`while (prefix < patlen - 1 - i) {`
			`// value of maxSkip means never set...`
			`if (goodSuffixTable[prefix] == maxSkip) {`
			`goodSuffixTable[prefix] = prefixSkip;`
			`}`

			`prefix += 1;`
			`}`
			`}`
			`}`

			`for (int32_t i = 0; i < patlen - 1; i += 1) {`
			`goodSuffixTable[patlen - 1 - suff[i]] = badCharacterTable.minLengthInChars(i + 1);`
			`}`

			`DELETE_ARRAY(suff);`
			`}`

			`GoodSuffixTable::~GoodSuffixTable()`
			`{`
			`DELETE_ARRAY(goodSuffixTable);`
			`}`

			`int32_t GoodSuffixTable::operator[](int32_t offset) const`
			`{`
			`return goodSuffixTable[offset];`
			`}`

			`UOBJECT_DEFINE_RTTI_IMPLEMENTATION(BoyerMooreSearch)`


			`UBool BoyerMooreSearch::empty()`
			`{`
			`return patCEs->size() <= 0;`
			`}`

			`CollData *BoyerMooreSearch::getData()`
			`{`
			`return data;`
			`}`

			`CEList *BoyerMooreSearch::getPatternCEs()`
			`{`
			`return patCEs;`
			`}`

			`BadCharacterTable *BoyerMooreSearch::getBadCharacterTable()`
			`{`
			`return badCharacterTable;`
			`}`

			`GoodSuffixTable *BoyerMooreSearch::getGoodSuffixTable()`
			`{`
			`return goodSuffixTable;`
			`}`

			`BoyerMooreSearch::BoyerMooreSearch(CollData theData, const UnicodeString &patternString, const UnicodeString targetString,`
			`UErrorCode &status)`
			`: data(theData), patCEs(NULL), badCharacterTable(NULL), goodSuffixTable(NULL), pattern(patternString), target(NULL)`
			`{`

			`if (U_FAILURE(status)) {`
			`return;`
			`}`

			`UCollator *collator = data->getCollator();`

			`patCEs = new CEList(collator, patternString, status);`

			`if (patCEs == NULL \|\| U_FAILURE(status)) {`
			`return;`
			`}`

			`badCharacterTable = new BadCharacterTable(*patCEs, data, status);`

			`if (badCharacterTable == NULL \|\| U_FAILURE(status)) {`
			`return;`
			`}`

			`goodSuffixTable = new GoodSuffixTable(patCEs, badCharacterTable, status);`

			`if (targetString != NULL) {`
			`target = new Target(collator, targetString, patCEs->size(), status);`
			`}`
			`}`

			`BoyerMooreSearch::~BoyerMooreSearch()`
			`{`
			`delete target;`
			`delete goodSuffixTable;`
			`delete badCharacterTable;`
			`delete patCEs;`
			`}`

			`void BoyerMooreSearch::setTargetString(const UnicodeString *targetString, UErrorCode &status)`
			`{`
			`if (U_FAILURE(status)) {`
			`return;`
			`}`

			`if (target == NULL) {`
			`target = new Target(data->getCollator(), targetString, patCEs->size(), status);`
			`} else {`
			`target->setTargetString(targetString);`
			`}`
			`}`

			`// ** main flow of this code from Laura Werner's "Unicode Text Searching in Java" paper. **`
			`/*`
			`* TODO:`
			`* * deal with trailing (and leading?) ignorables.`
			`* * Adding BoyerMooreSearch object slowed it down. How can we speed it up?`
			`*/`
			`UBool BoyerMooreSearch::search(int32_t offset, int32_t &start, int32_t &end)`
			`{`
			`UCollator *coll = data->getCollator();`
			`int32_t plen = patCEs->size();`
			`int32_t tlen = target->stringLength();`
			`int32_t maxSkip = badCharacterTable->getMaxSkip();`
			`int32_t tOffset = offset + maxSkip;`

			`if (plen <= 0) {`
			`// Searching for a zero length pattern always fails.`
			`start = end = -1;`
			`return FALSE;`
			`}`

			`while (tOffset <= tlen) {`
			`int32_t pIndex = plen - 1;`
			`int32_t tIndex = 0;`
			`int32_t lIndex = 0;`

			`if (tOffset < tlen) {`
			`// ** we really want to skip ahead enough to **`
			`// ** be sure we get at least 1 non-ignorable **`
			`// ** CE after the end of the pattern. **`
			`int32_t next = target->nextSafeBoundary(tOffset + 1);`

			`target->setOffset(next);`

			`for (lIndex = 0; ; lIndex += 1) {`
			`const CEI *cei = target->prevCE(lIndex);`
			`int32_t low = cei->lowOffset;`
			`int32_t high = cei->highOffset;`

			`if (high == 0 \|\| (low < high && low <= tOffset)) {`
			`if (low < tOffset) {`
			`while (lIndex >= 0 && target->prevCE(lIndex)->highOffset == high) {`
			`lIndex -= 1;`
			`}`

			`if (high > tOffset) {`
			`tOffset = high;`
			`}`
			`}`

			`break;`
			`}`
			`}`
			`} else {`
			`target->setLast(tOffset);`
			`lIndex = 0;`
			`}`

			`tIndex = ++lIndex;`

			`// Iterate backward until we hit the beginning of the pattern`
			`while (pIndex >= 0) {`
			`uint32_t pce = (*patCEs)[pIndex];`
			`const CEI *tcei = target->prevCE(tIndex++);`


			`if (tcei->order != pce) {`
			`// There is a mismatch at this position. Decide how far`
			`// over to shift the pattern, then try again.`

			`int32_t gsOffset = tOffset + (*goodSuffixTable)[pIndex];`
			`#ifdef EXTRA_CAUTIOUS`
			`int32_t old = tOffset;`
			`#endif`

			`tOffset += (*badCharacterTable)[tcei->order] - badCharacterTable->minLengthInChars(pIndex + 1);`

			`if (gsOffset > tOffset) {`
			`tOffset = gsOffset;`
			`}`

			`#ifdef EXTRA_CAUTIOUS`
			`// Make sure we don't skip backwards...`
			`if (tOffset <= old) {`
			`tOffset = old + 1;`
			`}`
			`#endif`

			`break;`
			`}`

			`pIndex -= 1;`
			`}`

			`if (pIndex < 0) {`
			`// We made it back to the beginning of the pattern,`
			`// which means we matched it all. Return the location.`
			`const CEI firstCEI = *target->prevCE(tIndex - 1);`
			`const CEI lastCEI = *target->prevCE(lIndex);`
			`int32_t mStart = firstCEI.lowOffset;`
			`int32_t minLimit = lastCEI.lowOffset;`
			`int32_t maxLimit = lastCEI.highOffset;`
			`int32_t mLimit;`
			`UBool found = TRUE;`

			`target->setOffset(/tOffset/maxLimit);`

			`const CEI nextCEI = *target->nextCE(0);`

			`if (nextCEI.lowOffset > maxLimit) {`
			`maxLimit = nextCEI.lowOffset;`
			`}`

			`if (nextCEI.lowOffset == nextCEI.highOffset && nextCEI.order != UCOL_NULLORDER) {`
			`found = FALSE;`
			`}`

			`if (! target->isBreakBoundary(mStart)) {`
			`found = FALSE;`
			`}`

			`if (firstCEI.lowOffset == firstCEI.highOffset) {`
			`found = FALSE;`
			`}`

			`mLimit = maxLimit;`
			`if (minLimit < maxLimit) {`
			`int32_t nbb = target->nextBreakBoundary(minLimit);`

			`if (nbb >= lastCEI.highOffset) {`
			`mLimit = nbb;`
			`}`
			`}`

			`if (mLimit > maxLimit) {`
			`found = FALSE;`
			`}`

			`if (! target->isBreakBoundary(mLimit)) {`
			`found = FALSE;`
			`}`

			`if (! target->isIdentical(pattern, mStart, mLimit)) {`
			`found = FALSE;`
			`}`

			`if (found) {`
			`start = mStart;`
			`end = mLimit;`

			`return TRUE;`
			`}`

			`tOffset += (*goodSuffixTable)[0]; // really? Maybe += 1 or += maxSkip?`
			`}`
			`// Otherwise, we're here because of a mismatch, so keep going....`
			`}`

			`// no match`
			`start = -1;`
			`end = -1;`
			`return FALSE;`
			`}`

			`U_NAMESPACE_END`

			`#endif // #if !UCONFIG_NO_COLLATION`