diff --git a/icu4c/source/i18n/Makefile.in b/icu4c/source/i18n/Makefile.in
index d3467b7259..f17971aa4e 100644
--- a/icu4c/source/i18n/Makefile.in
+++ b/icu4c/source/i18n/Makefile.in
@@ -1,6 +1,6 @@
 #******************************************************************************
 #
-#   Copyright (C) 1998-2008, International Business Machines
+#   Copyright (C) 1998-2009, International Business Machines
 #   Corporation and others.  All Rights Reserved.
 #
 #******************************************************************************
@@ -81,7 +81,7 @@ ulocdata.o measfmt.o currfmt.o curramt.o currunit.o measure.o utmscale.o \
 csdetect.o csmatch.o csr2022.o csrecog.o csrmbcs.o csrsbcs.o csrucode.o csrutf8.o inputext.o \
 wintzimpl.o windtfmt.o winnmfmt.o basictz.o dtrule.o rbtz.o tzrule.o tztrans.o vtzone.o \
 zonemeta.o zstrfmt.o plurrule.o plurfmt.o dtitvfmt.o dtitvinf.o \
-tmunit.o tmutamt.o tmutfmt.o
+tmunit.o tmutamt.o tmutfmt.o colldata.o bmsearch.o bms.o
 
 ## Header files to install
 HEADERS = $(srcdir)/unicode/*.h
diff --git a/icu4c/source/i18n/bms.cpp b/icu4c/source/i18n/bms.cpp
new file mode 100644
index 0000000000..cbcdb38368
--- /dev/null
+++ b/icu4c/source/i18n/bms.cpp
@@ -0,0 +1,145 @@
+/*
+ * Copyright (C) 2008-2009, International Business Machines Corporation and Others.
+ * All rights reserved.
+ */
+
+#include "unicode/utypes.h"
+#include "cmemory.h"
+#include "unicode/bms.h"
+#include "unicode/unistr.h"
+#include "unicode/colldata.h"
+#include "unicode/bmsearch.h"
+
+//#define USE_SAFE_CASTS
+#ifdef USE_SAFE_CASTS
+#define STATIC_CAST(type,value) static_cast<type>(value)
+#define CONST_CAST(type,value) const_cast<type>(value)
+#else
+#define STATIC_CAST(type,value) (type) (value)
+#define CONST_CAST(type,value) (type) (value)
+#endif
+
+U_CAPI UCD * U_EXPORT2
+ucd_open(UCollator *coll, UErrorCode *status)
+{
+    return STATIC_CAST(UCD *, CollData::open(coll, *status));
+}
+
+U_CAPI void U_EXPORT2
+ucd_close(UCD *ucd)
+{
+    CollData *data = STATIC_CAST(CollData *, ucd);
+
+    CollData::close(data);
+}
+
+U_CAPI UCollator * U_EXPORT2
+ucd_getCollator(UCD *ucd)
+{
+    CollData *data = STATIC_CAST(CollData *, ucd);
+
+    return data->getCollator();
+}
+
+U_CAPI void U_EXPORT2
+ucd_freeCache()
+{
+    CollData::freeCollDataCache();
+}
+
+U_CAPI void U_EXPORT2
+ucd_flushCache()
+{
+    CollData::flushCollDataCache();
+}
+
+struct BMS
+{
+    BoyerMooreSearch *bms;
+    const UnicodeString *targetString;
+};
+
+U_CAPI BMS * U_EXPORT2
+bms_open(UCD *ucd,
+         const UChar *pattern, int32_t patternLength,
+         const UChar *target,  int32_t targetLength,
+         UErrorCode  *status)
+{
+    BMS *bms = STATIC_CAST(BMS *, uprv_malloc(sizeof(BMS)));
+
+    if (bms == NULL) {
+        *status = U_MEMORY_ALLOCATION_ERROR;
+        return NULL;
+    }
+
+    CollData *data = (CollData *) ucd;
+    UnicodeString patternString(pattern, patternLength);
+
+    if (target != NULL) {
+        bms->targetString = new UnicodeString(target, targetLength);
+        
+        if (bms->targetString == NULL) {
+            bms->bms = NULL;
+            *status = U_MEMORY_ALLOCATION_ERROR;
+            return bms;
+        }
+    } else {
+        bms->targetString = NULL;
+    }
+
+    bms->bms = new BoyerMooreSearch(data, patternString, bms->targetString, *status);
+
+    if (bms->bms == NULL) {
+        *status = U_MEMORY_ALLOCATION_ERROR;
+    }
+
+    return bms;
+}
+
+U_CAPI void U_EXPORT2
+bms_close(BMS *bms)
+{
+    delete bms->bms;
+
+    delete bms->targetString;
+
+    uprv_free(bms);
+}
+
+U_CAPI UBool U_EXPORT2
+bms_empty(BMS *bms)
+{
+    return bms->bms->empty();
+}
+
+U_CAPI UCD * U_EXPORT2
+bms_getData(BMS *bms)
+{
+    return STATIC_CAST(UCD *, bms->bms->getData());
+}
+
+U_CAPI UBool U_EXPORT2
+bms_search(BMS *bms, int32_t offset, int32_t *start, int32_t *end)
+{
+    return bms->bms->search(offset, *start, *end);
+}
+
+U_CAPI void U_EXPORT2
+bms_setTargetString(BMS *bms, const UChar *target, int32_t targetLength, UErrorCode *status)
+{
+    if (U_FAILURE(*status)) {
+        return;
+    }
+
+    if (bms->targetString != NULL) {
+        delete bms->targetString;
+    }
+
+    if (target != NULL) {
+        bms->targetString = new UnicodeString(target, targetLength);
+    } else {
+        bms->targetString = NULL;
+    }
+
+    bms->bms->setTargetString(bms->targetString, *status);
+}
diff --git a/icu4c/source/i18n/bmsearch.cpp b/icu4c/source/i18n/bmsearch.cpp
new file mode 100644
index 0000000000..1e5f90bbc3
--- /dev/null
+++ b/icu4c/source/i18n/bmsearch.cpp
@@ -0,0 +1,864 @@
+/*
+ ******************************************************************************
+ *   Copyright (C) 1996-2009, International Business Machines                 *
+ *   Corporation and others.  All Rights Reserved.                            *
+ ******************************************************************************
+ */
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_COLLATION
+
+#include "unicode/unistr.h"
+#include "unicode/putil.h"
+#include "unicode/usearch.h"
+
+#include "cmemory.h"
+#include "unicode/coll.h"
+#include "unicode/tblcoll.h"
+#include "unicode/coleitr.h"
+#include "unicode/ucoleitr.h"
+
+#include "unicode/regex.h"        // TODO: make conditional on regexp being built.
+
+#include "unicode/uniset.h"
+#include "unicode/uset.h"
+#include "unicode/ustring.h"
+#include "hash.h"
+#include "uhash.h"
+#include "ucol_imp.h"
+#include "unormimp.h"
+
+#include "unicode/colldata.h"
+#include "unicode/bmsearch.h"
+
+U_NAMESPACE_BEGIN
+
+#define ARRAY_SIZE(array) (sizeof(array)/sizeof(array[0]))
+#define NEW_ARRAY(type, count) (type *) uprv_malloc((count) * sizeof(type))
+#define DELETE_ARRAY(array) uprv_free((void *) (array))
+
+
+struct CEI
+{
+    uint32_t order;
+    int32_t  lowOffset;
+    int32_t  highOffset;
+};
+
+class Target : public UMemory
+{
+public:
+    Target(UCollator *theCollator, const UnicodeString *target, int32_t patternLength, UErrorCode &status);
+    ~Target();
+
+    void setTargetString(const UnicodeString *target);
+
+    const CEI *nextCE(int32_t offset);
+    const CEI *prevCE(int32_t offset);
+
+    int32_t stringLength();
+    UChar charAt(int32_t offset);
+
+    UBool isBreakBoundary(int32_t offset);
+    int32_t nextBreakBoundary(int32_t offset);
+    int32_t nextSafeBoundary(int32_t offset);
+
+    UBool isIdentical(UnicodeString &pattern, int32_t start, int32_t end);
+
+    void setOffset(int32_t offset);
+    void setLast(int32_t last);
+    int32_t getOffset();
+
+private:
+    CEI *ceb;
+    int32_t bufferSize;
+    int32_t bufferMin;
+    int32_t bufferMax;
+
+    uint32_t strengthMask;
+    UCollationStrength strength;
+    uint32_t variableTop;
+    UBool toShift;
+    UCollator *coll;
+
+    const UnicodeString *targetString;
+    const UChar *targetBuffer;
+    int32_t targetLength;
+
+    UCollationElements *elements;
+    UBreakIterator *charBreakIterator;
+};
+
+Target::Target(UCollator *theCollator, const UnicodeString *target, int32_t patternLength, UErrorCode &status)
+    : bufferSize(0), bufferMin(0), bufferMax(0),
+      strengthMask(0), strength(UCOL_PRIMARY), variableTop(0), toShift(FALSE), coll(theCollator),
+      targetString(NULL), targetBuffer(NULL), targetLength(0), elements(NULL), charBreakIterator(NULL)
+{
+    strength = ucol_getStrength(coll);
+    toShift = ucol_getAttribute(coll, UCOL_ALTERNATE_HANDLING, &status) ==  UCOL_SHIFTED;
+    variableTop = ucol_getVariableTop(coll, &status);
+
+    // find the largest expansion
+    uint8_t maxExpansion = 0;
+    for (const uint8_t *expansion = coll->expansionCESize; *expansion != 0; expansion += 1) {
+        if (*expansion > maxExpansion) {
+            maxExpansion = *expansion;
+        }
+    }
+
+    // room for an extra character on each end, plus 4 for safety
+    bufferSize = patternLength + (2 * maxExpansion) + 4;
+
+    ceb = NEW_ARRAY(CEI, bufferSize);
+
+    if (ceb == NULL) {
+        status = U_MEMORY_ALLOCATION_ERROR;
+        return;
+    }
+
+    if (target != NULL) {
+        setTargetString(target);
+    }
+
+    switch (strength) 
+    {
+    default:
+        strengthMask |= UCOL_TERTIARYORDERMASK;
+        /* fall through */
+
+    case UCOL_SECONDARY:
+        strengthMask |= UCOL_SECONDARYORDERMASK;
+        /* fall through */
+
+    case UCOL_PRIMARY:
+        strengthMask |= UCOL_PRIMARYORDERMASK;
+    }
+}
+
+Target::~Target()
+{
+    ubrk_close(charBreakIterator);
+    ucol_closeElements(elements);
+
+    DELETE_ARRAY(ceb);
+}
+
+void Target::setTargetString(const UnicodeString *target)
+{
+    if (charBreakIterator != NULL) {
+        ubrk_close(charBreakIterator);
+        ucol_closeElements(elements);
+    }
+
+    targetString = target;
+
+    if (targetString != NULL) {
+        UErrorCode status = U_ZERO_ERROR;
+
+        targetBuffer = targetString->getBuffer();
+        targetLength = targetString->length();
+
+        elements = ucol_openElements(coll, target->getBuffer(), target->length(), &status);
+        ucol_forceHanImplicit(elements, &status);
+
+        charBreakIterator = ubrk_open(UBRK_CHARACTER, ucol_getLocale(coll, ULOC_VALID_LOCALE, &status),
+                                      targetBuffer, targetLength, &status);
+    } else {
+        targetBuffer = NULL;
+        targetLength = 0;
+    }
+}
+
+const CEI *Target::nextCE(int32_t offset)
+{
+    UErrorCode status = U_ZERO_ERROR;
+    int32_t low = -1, high = -1;
+    uint32_t order;
+    UBool cont = FALSE;
+
+    if (offset >= bufferMin && offset < bufferMax) {
+        return &ceb[offset];
+    }
+
+    if (bufferMax >= bufferSize || offset != bufferMax) {
+        return NULL;
+    }
+
+    do {
+        low   = ucol_getOffset(elements);
+        order = ucol_next(elements, &status);
+        high  = ucol_getOffset(elements);
+
+        if (order == UCOL_NULLORDER) {
+          //high = low = -1;
+            break;
+        }
+
+        cont = isContinuation(order);
+        order &= strengthMask;
+
+        if (toShift && variableTop > order && (order & UCOL_PRIMARYORDERMASK) != 0) {
+            if (strength >= UCOL_QUATERNARY) {
+                order &= UCOL_PRIMARYORDERMASK;
+            } else {
+                order = UCOL_IGNORABLE;
+            }
+        }
+    } while (order == UCOL_IGNORABLE);
+
+    if (cont) {
+        order |= UCOL_CONTINUATION_MARKER;
+    }
+
+    ceb[offset].order = order;
+    ceb[offset].lowOffset = low;
+    ceb[offset].highOffset = high;
+
+    bufferMax += 1;
+
+    return &ceb[offset];
+}
+
+const CEI *Target::prevCE(int32_t offset)
+{
+    UErrorCode status = U_ZERO_ERROR;
+    int32_t low = -1, high = -1;
+    uint32_t order;
+    UBool cont = FALSE;
+
+    if (offset >= bufferMin && offset < bufferMax) {
+        return &ceb[offset];
+    }
+
+    if (bufferMax >= bufferSize || offset != bufferMax) {
+        return NULL;
+    }
+
+    do {
+        high  = ucol_getOffset(elements);
+        order = ucol_previous(elements, &status);
+        low   = ucol_getOffset(elements);
+
+        if (order == UCOL_NULLORDER) {
+            break;
+        }
+
+        cont = isContinuation(order);
+        order &= strengthMask;
+
+        if (toShift && variableTop > order && (order & UCOL_PRIMARYORDERMASK) != 0) {
+            if (strength >= UCOL_QUATERNARY) {
+                order &= UCOL_PRIMARYORDERMASK;
+            } else {
+                order = UCOL_IGNORABLE;
+            }
+        }
+    } while (order == UCOL_IGNORABLE);
+
+    bufferMax += 1;
+
+    if (cont) {
+        order |= UCOL_CONTINUATION_MARKER;
+    }
+
+    ceb[offset].order       = order;
+    ceb[offset].lowOffset   = low;
+    ceb[offset].highOffset = high;
+
+    return &ceb[offset];
+}
+
+int32_t Target::stringLength()
+{
+    if (targetString != NULL) {
+        return targetLength;
+    }
+
+    return 0;
+}
+
+UChar Target::charAt(int32_t offset)
+{
+    if (targetString != NULL) {
+        return targetBuffer[offset];
+    }
+
+    return 0x0000;
+}
+
+void Target::setOffset(int32_t offset)
+{
+    UErrorCode status = U_ZERO_ERROR;
+
+    bufferMin = 0;
+    bufferMax = 0;
+
+    ucol_setOffset(elements, offset, &status);
+}
+
+void Target::setLast(int32_t last)
+{
+    UErrorCode status = U_ZERO_ERROR;
+
+    bufferMin = 0;
+    bufferMax = 1;
+
+    ceb[0].order      = UCOL_NULLORDER;
+    ceb[0].lowOffset  = last;
+    ceb[0].highOffset = last;
+
+    ucol_setOffset(elements, last, &status);
+}
+
+int32_t Target::getOffset()
+{
+    return ucol_getOffset(elements);
+}
+
+UBool Target::isBreakBoundary(int32_t offset)
+{
+    return ubrk_isBoundary(charBreakIterator, offset);
+}
+
+int32_t Target::nextBreakBoundary(int32_t offset)
+{
+    return ubrk_following(charBreakIterator, offset);
+}
+
+int32_t Target::nextSafeBoundary(int32_t offset)
+{
+    while (offset < targetLength) {
+      //UChar ch = charAt(offset);
+        UChar ch = targetBuffer[offset];
+
+        if (U_IS_LEAD(ch) || ! ucol_unsafeCP(ch, coll)) {
+            return offset;
+        }
+
+        offset += 1;
+    }
+
+    return targetLength;
+}
+
+UBool Target::isIdentical(UnicodeString &pattern, int32_t start, int32_t end)
+{
+    if (strength < UCOL_IDENTICAL) {
+        return TRUE;
+    }
+
+    UChar t2[32], p2[32];
+    const UChar *pBuffer = pattern.getBuffer();
+    int32_t pLength = pattern.length();
+    int32_t length = end - start;
+
+    UErrorCode status = U_ZERO_ERROR, status2 = U_ZERO_ERROR;
+
+    int32_t decomplength = unorm_decompose(t2, ARRAY_SIZE(t2), 
+                                       targetBuffer + start, length, 
+                                       FALSE, 0, &status);
+
+    // use separate status2 in case of buffer overflow
+    if (decomplength != unorm_decompose(p2, ARRAY_SIZE(p2),
+                                        pBuffer, pLength,
+                                        FALSE, 0, &status2)) {
+        return FALSE; // lengths are different
+    }
+
+    // compare contents
+    UChar *text, *pat;
+
+    if(U_SUCCESS(status)) {
+        text = t2;
+        pat = p2;
+    } else if(status == U_BUFFER_OVERFLOW_ERROR) {
+        status = U_ZERO_ERROR;
+
+        // allocate one buffer for both decompositions
+        text = NEW_ARRAY(UChar, decomplength * 2);
+
+        // Check for allocation failure.
+        if (text == NULL) {
+        	return FALSE;
+        }
+
+        pat = text + decomplength;
+
+        unorm_decompose(text, decomplength, targetBuffer + start, 
+                        length, FALSE, 0, &status);
+
+        unorm_decompose(pat, decomplength, pBuffer, 
+                        pLength, FALSE, 0, &status);
+    } else {
+        // NFD failed, make sure that u_memcmp() does not overrun t2 & p2
+        // and that we don't uprv_free() an undefined text pointer
+        text = pat = t2;
+        decomplength = 0;
+    }
+
+    UBool result = (UBool)(u_memcmp(pat, text, decomplength) == 0);
+
+    if(text != t2) {
+        DELETE_ARRAY(text);
+    }
+
+    // return FALSE if NFD failed
+    return U_SUCCESS(status) && result;
+}
+
+#define HASH_TABLE_SIZE 257
+
+class BadCharacterTable : public UMemory
+{
+public:
+    BadCharacterTable(CEList &patternCEs, CollData *data, UErrorCode &status);
+    ~BadCharacterTable();
+
+    int32_t operator[](uint32_t ce) const;
+    int32_t getMaxSkip() const;
+    int32_t minLengthInChars(int32_t index);
+
+private:
+    static int32_t hash(uint32_t ce);
+
+    int32_t maxSkip;
+    int32_t badCharacterTable[HASH_TABLE_SIZE];
+
+    int32_t *minLengthCache;
+};
+
+BadCharacterTable::BadCharacterTable(CEList &patternCEs, CollData *data, UErrorCode &status)
+    : minLengthCache(NULL)
+{
+    int32_t plen = patternCEs.size();
+
+    // **** need a better way to deal with this ****
+    if (U_FAILURE(status) || plen == 0) {
+        return;
+    }
+
+    int32_t *history = NEW_ARRAY(int32_t, plen);
+
+    if (history == NULL) {
+        status = U_MEMORY_ALLOCATION_ERROR;
+        return;
+    }
+
+    for (int32_t i = 0; i < plen; i += 1) {
+        history[i] = -1;
+    }
+
+    minLengthCache = NEW_ARRAY(int32_t, plen + 1);
+
+    if (minLengthCache == NULL) {
+        DELETE_ARRAY(history);
+        status = U_MEMORY_ALLOCATION_ERROR;
+        return;
+    }
+
+    maxSkip = minLengthCache[0] = data->minLengthInChars(&patternCEs, 0, history);
+
+    for(int32_t j = 0; j < HASH_TABLE_SIZE; j += 1) {
+        badCharacterTable[j] = maxSkip;
+    }
+
+    for(int32_t p = 1; p < plen; p += 1) {
+        minLengthCache[p] = data->minLengthInChars(&patternCEs, p, history);
+
+        // Make sure this entry is not bigger than the previous one.
+        // Otherwise, we might skip too far in some cases.
+        if (minLengthCache[p] < 0 || minLengthCache[p] > minLengthCache[p - 1]) {
+            minLengthCache[p] = minLengthCache[p - 1];
+        }
+    }
+
+    minLengthCache[plen] = 0;
+
+    for(int32_t p = 0; p < plen - 1; p += 1) {
+        badCharacterTable[hash(patternCEs[p])] = minLengthCache[p + 1];
+    }
+
+    DELETE_ARRAY(history);
+}
+
+BadCharacterTable::~BadCharacterTable()
+{
+    DELETE_ARRAY(minLengthCache);
+}
+
+int32_t BadCharacterTable::operator[](uint32_t ce) const
+{
+    return badCharacterTable[hash(ce)];
+}
+
+int32_t BadCharacterTable::getMaxSkip() const
+{
+    return maxSkip;
+}
+
+int32_t BadCharacterTable::minLengthInChars(int32_t index)
+{
+    return minLengthCache[index];
+}
+
+int32_t BadCharacterTable::hash(uint32_t ce)
+{
+    return UCOL_PRIMARYORDER(ce) % HASH_TABLE_SIZE;
+}
+
+class GoodSuffixTable : public UMemory
+{
+public:
+    GoodSuffixTable(CEList &patternCEs, BadCharacterTable &badCharacterTable, UErrorCode &status);
+    ~GoodSuffixTable();
+
+    int32_t operator[](int32_t offset) const;
+
+private:
+    int32_t *goodSuffixTable;
+};
+
+GoodSuffixTable::GoodSuffixTable(CEList &patternCEs, BadCharacterTable &badCharacterTable, UErrorCode &status)
+    : goodSuffixTable(NULL)
+{
+    int32_t patlen = patternCEs.size();
+
+    // **** need a better way to deal with this ****
+    if (U_FAILURE(status) || patlen <= 0) {
+        return;
+    }
+
+    int32_t *suff  = NEW_ARRAY(int32_t, patlen);
+    int32_t start = patlen - 1, end = - 1;
+    int32_t maxSkip = badCharacterTable.getMaxSkip();
+
+    if (suff == NULL) {
+        status = U_MEMORY_ALLOCATION_ERROR;
+        return;
+    }
+
+    // initialze suff
+    suff[patlen - 1] = patlen;
+
+    for (int32_t i = patlen - 2; i >= 0; i -= 1) {
+        // (i > start) means we're inside the last suffix match we found
+        // ((patlen - 1) - end) is how far the end of that match is from end of pattern
+        // (i - start) is how far we are from start of that match
+        // (i + (patlen - 1) - end) is index of same character at end of pattern
+        // so if any suffix match at that character doesn't extend beyond the last match,
+        // it's the suffix for this character as well
+        if (i > start && suff[i + patlen - 1 - end] < i - start) {
+            suff[i] = suff[i + patlen - 1 - end];
+        } else {
+            start = end = i;
+
+            int32_t s = patlen;
+
+            while (start >= 0 && patternCEs[start] == patternCEs[--s]) {
+                start -= 1;
+            }
+
+            suff[i] = end - start;
+        }
+    }
+
+    // now build goodSuffixTable
+    goodSuffixTable  = NEW_ARRAY(int32_t, patlen);
+
+    if (goodSuffixTable == NULL) {
+        DELETE_ARRAY(suff);
+        status = U_MEMORY_ALLOCATION_ERROR;
+        return;
+    }
+
+
+    // initialize entries to minLengthInChars of the pattern
+    for (int32_t i = 0; i < patlen; i += 1) {
+        goodSuffixTable[i] = maxSkip;
+    }
+
+    int32_t prefix = 0;
+
+    for (int32_t i = patlen - /*1*/ 2; i >= 0; i -= 1) {
+        if (suff[i] == i + 1) {
+            // this matching suffix is a prefix of the pattern
+            int32_t prefixSkip = badCharacterTable.minLengthInChars(i + 1);
+
+            // for any mis-match before this suffix, we should skip
+            // so that the front of the pattern (i.e. the prefix)
+            // lines up with the front of the suffix.
+            // (patlen - 1 - i) is the start of the suffix
+            while (prefix < patlen - 1 - i) {
+                // value of maxSkip means never set...
+                if (goodSuffixTable[prefix] == maxSkip) {
+                    goodSuffixTable[prefix] = prefixSkip;
+                }
+
+                prefix += 1;
+            }
+        }
+    }
+
+    for (int32_t i = 0; i < patlen - 1; i += 1) {
+        goodSuffixTable[patlen - 1 - suff[i]] = badCharacterTable.minLengthInChars(i + 1);
+    }
+
+    DELETE_ARRAY(suff);
+}
+
+GoodSuffixTable::~GoodSuffixTable()
+{
+    DELETE_ARRAY(goodSuffixTable);
+}
+
+int32_t GoodSuffixTable::operator[](int32_t offset) const
+{
+    return goodSuffixTable[offset];
+}
+
+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(BoyerMooreSearch)
+
+
+UBool BoyerMooreSearch::empty()
+{
+    return patCEs->size() <= 0;
+}
+
+CollData *BoyerMooreSearch::getData()
+{
+    return data;
+}
+
+CEList *BoyerMooreSearch::getPatternCEs()
+{
+    return patCEs;
+}
+
+BadCharacterTable *BoyerMooreSearch::getBadCharacterTable()
+{
+    return badCharacterTable;
+}
+
+GoodSuffixTable *BoyerMooreSearch::getGoodSuffixTable()
+{
+    return goodSuffixTable;
+}
+
+BoyerMooreSearch::BoyerMooreSearch(CollData *theData, const UnicodeString &patternString, const UnicodeString *targetString,
+                                   UErrorCode &status)
+    : data(theData), patCEs(NULL), badCharacterTable(NULL), goodSuffixTable(NULL), pattern(patternString), target(NULL)
+{
+
+    if (U_FAILURE(status)) {
+        return;
+    }
+
+    UCollator *collator = data->getCollator();
+
+    patCEs = new CEList(collator, patternString, status);
+
+    if (patCEs == NULL || U_FAILURE(status)) {
+        return;
+    }
+
+    badCharacterTable = new BadCharacterTable(*patCEs, data, status);
+
+    if (badCharacterTable == NULL || U_FAILURE(status)) {
+        return;
+    }
+
+    goodSuffixTable = new GoodSuffixTable(*patCEs, *badCharacterTable, status);
+
+    if (targetString != NULL) {
+        target = new Target(collator, targetString, patCEs->size(), status);
+    }
+}
+
+BoyerMooreSearch::~BoyerMooreSearch()
+{
+    delete target;
+    delete goodSuffixTable;
+    delete badCharacterTable;
+    delete patCEs;
+}
+
+void BoyerMooreSearch::setTargetString(const UnicodeString *targetString, UErrorCode &status)
+{
+    if (U_FAILURE(status)) {
+        return;
+    }
+
+    if (target == NULL) {
+        target = new Target(data->getCollator(), targetString, patCEs->size(), status);
+    } else {
+        target->setTargetString(targetString);
+    }
+}
+
+// **** main flow of this code from Laura Werner's "Unicode Text Searching in Java" paper. ****
+/*
+ * TODO:
+ *  * deal with trailing (and leading?) ignorables.
+ *  * Adding BoyerMooreSearch object slowed it down. How can we speed it up?
+ */
+UBool BoyerMooreSearch::search(int32_t offset, int32_t &start, int32_t &end)
+{
+    UCollator *coll = data->getCollator();
+    int32_t plen = patCEs->size();
+    int32_t tlen = target->stringLength();
+    int32_t maxSkip = badCharacterTable->getMaxSkip();
+    int32_t tOffset = offset + maxSkip;
+
+    if (plen <= 0) {
+        // Searching for a zero length pattern always fails.
+        start = end = -1;
+        return FALSE;
+    }
+
+    while (tOffset <= tlen) {
+        int32_t pIndex = plen - 1;
+        int32_t tIndex = 0;
+        int32_t lIndex = 0;
+
+        if (tOffset < tlen) {
+            // **** we really want to skip ahead enough to  ****
+            // **** be sure we get at least 1 non-ignorable ****
+            // **** CE after the end of the pattern.        ****
+            int32_t next = target->nextSafeBoundary(tOffset + 1);
+
+            target->setOffset(next);
+
+            for (lIndex = 0; ; lIndex += 1) {
+                const CEI *cei = target->prevCE(lIndex);
+                int32_t low = cei->lowOffset;
+                int32_t high = cei->highOffset;
+
+                if (high == 0 || (low < high && low <= tOffset)) {
+                    if (low < tOffset) {
+                        while (lIndex >= 0 && target->prevCE(lIndex)->highOffset == high) {
+                            lIndex -= 1;
+                        }
+
+                        if (high > tOffset) {
+                            tOffset = high;
+                        }
+                    }
+
+                    break;
+                }
+            }
+        } else {
+            target->setLast(tOffset);
+            lIndex = 0;
+        }
+
+        tIndex = ++lIndex;
+
+        // Iterate backward until we hit the beginning of the pattern
+        while (pIndex >= 0) {
+            uint32_t pce = (*patCEs)[pIndex];
+            const CEI *tcei = target->prevCE(tIndex++);
+
+
+            if (tcei->order != pce) {
+                // There is a mismatch at this position.  Decide how far
+                // over to shift the pattern, then try again.
+ 
+                int32_t gsOffset = tOffset + (*goodSuffixTable)[pIndex];
+#ifdef EXTRA_CAUTIOUS
+                int32_t old = tOffset;
+#endif
+
+                tOffset += (*badCharacterTable)[tcei->order] - badCharacterTable->minLengthInChars(pIndex + 1);
+
+                if (gsOffset > tOffset) {
+                    tOffset = gsOffset;
+                }
+
+#ifdef EXTRA_CAUTIOUS
+                // Make sure we don't skip backwards...
+                if (tOffset <= old) {
+                    tOffset = old + 1;
+                }
+#endif
+
+                break;
+            }
+
+            pIndex -= 1;
+        }
+
+        if (pIndex < 0) {
+            // We made it back to the beginning of the pattern,
+            // which means we matched it all.  Return the location.
+            const CEI firstCEI = *target->prevCE(tIndex - 1);
+            const CEI lastCEI  = *target->prevCE(lIndex);
+            int32_t mStart   = firstCEI.lowOffset;
+            int32_t minLimit = lastCEI.lowOffset;
+            int32_t maxLimit = lastCEI.highOffset;
+            int32_t mLimit; 
+            UBool found = TRUE;
+
+            target->setOffset(/*tOffset*/maxLimit);
+
+            const CEI nextCEI = *target->nextCE(0);
+
+            if (nextCEI.lowOffset > maxLimit) {
+                maxLimit = nextCEI.lowOffset;
+            }
+
+            if (nextCEI.lowOffset == nextCEI.highOffset && nextCEI.order != UCOL_NULLORDER) {
+                found = FALSE;
+            }
+
+            if (! target->isBreakBoundary(mStart)) {
+                found = FALSE;
+            }
+
+            if (firstCEI.lowOffset == firstCEI.highOffset) {
+                found = FALSE;
+            }
+
+            mLimit = maxLimit;
+            if (minLimit < maxLimit) {
+                int32_t nbb = target->nextBreakBoundary(minLimit);
+
+                if (nbb >= lastCEI.highOffset) {
+                    mLimit = nbb;
+                }
+            }
+
+            if (mLimit > maxLimit) {
+                found = FALSE;
+            }
+
+            if (! target->isBreakBoundary(mLimit)) {
+                found = FALSE;
+            }
+
+            if (! target->isIdentical(pattern, mStart, mLimit)) {
+                found = FALSE;
+            }
+
+            if (found) {
+                start = mStart;
+                end   = mLimit;
+
+                return TRUE;
+            }
+
+            tOffset += (*goodSuffixTable)[0]; // really? Maybe += 1 or += maxSkip?
+        }
+        // Otherwise, we're here because of a mismatch, so keep going....
+    }
+    
+    // no match
+   start = -1;
+   end = -1;
+   return FALSE;
+}
+
+U_NAMESPACE_END
+
+#endif // #if !UCONFIG_NO_COLLATION
diff --git a/icu4c/source/i18n/colldata.cpp b/icu4c/source/i18n/colldata.cpp
new file mode 100644
index 0000000000..9860b62b34
--- /dev/null
+++ b/icu4c/source/i18n/colldata.cpp
@@ -0,0 +1,1104 @@
+/*
+ ******************************************************************************
+ *   Copyright (C) 1996-2009, International Business Machines                 *
+ *   Corporation and others.  All Rights Reserved.                            *
+ ******************************************************************************
+ */
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_COLLATION
+
+#include "unicode/unistr.h"
+#include "unicode/putil.h"
+#include "unicode/usearch.h"
+
+#include "cmemory.h"
+#include "unicode/coll.h"
+#include "unicode/tblcoll.h"
+#include "unicode/coleitr.h"
+#include "unicode/ucoleitr.h"
+
+#include "unicode/regex.h"        // TODO: make conditional on regexp being built.
+
+#include "unicode/uniset.h"
+#include "unicode/uset.h"
+#include "unicode/ustring.h"
+#include "hash.h"
+#include "uhash.h"
+#include "ucln_in.h"
+#include "ucol_imp.h"
+#include "umutex.h"
+
+#include "unicode/colldata.h"
+
+U_NAMESPACE_BEGIN
+
+#define ARRAY_SIZE(array) (sizeof(array)/sizeof(array[0]))
+#define NEW_ARRAY(type, count) (type *) uprv_malloc((count) * sizeof(type))
+#define DELETE_ARRAY(array) uprv_free((void *) (array))
+#define ARRAY_COPY(dst, src, count) uprv_memcpy((void *) (dst), (void *) (src), (count) * sizeof (src)[0])
+
+static inline USet *uset_openEmpty()
+{
+    return uset_open(1, 0);
+}
+
+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CEList)
+
+#ifdef INSTRUMENT_CELIST
+int32_t CEList::_active = 0;
+int32_t CEList::_histogram[10] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+#endif
+
+CEList::CEList(UCollator *coll, const UnicodeString &string, UErrorCode &status)
+    : ces(NULL), listMax(CELIST_BUFFER_SIZE), listSize(0)
+{
+    UCollationElements *elems = ucol_openElements(coll, string.getBuffer(), string.length(), &status);
+    UCollationStrength strength = ucol_getStrength(coll);
+    UBool toShift = ucol_getAttribute(coll, UCOL_ALTERNATE_HANDLING, &status) ==  UCOL_SHIFTED;
+    uint32_t variableTop = ucol_getVariableTop(coll, &status);
+    uint32_t strengthMask = 0;
+    int32_t order;
+
+    if (U_FAILURE(status)) {
+        return;
+    }
+
+    // **** only set flag if string has Han(gul) ****
+    ucol_forceHanImplicit(elems, &status);
+
+    switch (strength) 
+    {
+    default:
+        strengthMask |= UCOL_TERTIARYORDERMASK;
+        /* fall through */
+
+    case UCOL_SECONDARY:
+        strengthMask |= UCOL_SECONDARYORDERMASK;
+        /* fall through */
+
+    case UCOL_PRIMARY:
+        strengthMask |= UCOL_PRIMARYORDERMASK;
+    }
+
+#ifdef INSTRUMENT_CELIST
+    _active += 1;
+    _histogram[0] += 1;
+#endif
+
+    ces = ceBuffer;
+
+    while ((order = ucol_next(elems, &status)) != UCOL_NULLORDER) {
+        UBool cont = isContinuation(order);
+
+        order &= strengthMask;
+
+        if (toShift && variableTop > order && (order & UCOL_PRIMARYORDERMASK) != 0) {
+            if (strength >= UCOL_QUATERNARY) {
+                order &= UCOL_PRIMARYORDERMASK;
+            } else {
+                order = UCOL_IGNORABLE;
+            }
+        }
+
+        if (order == UCOL_IGNORABLE) {
+            continue;
+        }
+
+        if (cont) {
+            order |= UCOL_CONTINUATION_MARKER;
+        }
+
+        add(order, status);
+    }
+
+    ucol_closeElements(elems);
+}
+
+CEList::~CEList()
+{
+#ifdef INSTRUMENT_CELIST
+    _active -= 1;
+#endif
+
+    if (ces != ceBuffer) {
+        DELETE_ARRAY(ces);
+    }
+}
+
+void CEList::add(uint32_t ce, UErrorCode &status)
+{
+    if (U_FAILURE(status)) {
+        return;
+    }
+
+    if (listSize >= listMax) {
+        int32_t newMax = listMax + CELIST_BUFFER_SIZE;
+
+#ifdef INSTRUMENT_CELIST
+        _histogram[listSize / CELIST_BUFFER_SIZE] += 1;
+#endif
+
+        uint32_t *newCEs = NEW_ARRAY(uint32_t, newMax);
+
+        if (newCEs == NULL) {
+            status = U_MEMORY_ALLOCATION_ERROR;
+            return;
+        }
+
+        uprv_memcpy(newCEs, ces, listSize * sizeof(uint32_t));
+
+        if (ces != ceBuffer) {
+            DELETE_ARRAY(ces);
+        }
+
+        ces = newCEs;
+        listMax = newMax;
+    }
+
+    ces[listSize++] = ce;
+}
+
+uint32_t CEList::get(int32_t index) const
+{
+    if (index >= 0 && index < listSize) {
+        return ces[index];
+    }
+
+    return UCOL_NULLORDER;
+}
+
+uint32_t &CEList::operator[](int32_t index) const
+{
+    return ces[index];
+}
+
+UBool CEList::matchesAt(int32_t offset, const CEList *other) const
+{
+    if (other == NULL || listSize - offset < other->size()) {
+        return FALSE;
+    }
+
+    for (int32_t i = offset, j = 0; j < other->size(); i += 1, j += 1) {
+        if (ces[i] != (*other)[j]) {
+            return FALSE;
+        }
+    }
+
+    return TRUE;
+}
+
+int32_t CEList::size() const
+{
+    return listSize;
+}
+
+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringList)
+
+#ifdef INSTRUMENT_STRING_LIST
+int32_t StringList::_lists = 0;
+int32_t StringList::_strings = 0;
+int32_t StringList::_histogram[101] = {0};
+#endif
+
+StringList::StringList(UErrorCode &status)
+    : strings(NULL), listMax(STRING_LIST_BUFFER_SIZE), listSize(0)
+{
+    if (U_FAILURE(status)) {
+        return;
+    }
+
+    strings = new UnicodeString [listMax];
+
+    if (strings == NULL) {
+        status = U_MEMORY_ALLOCATION_ERROR;
+        return;
+    }
+
+#ifdef INSTRUMENT_STRING_LIST
+    _lists += 1;
+    _histogram[0] += 1;
+#endif
+}
+
+StringList::~StringList()
+{
+    delete[] strings;
+}
+
+void StringList::add(const UnicodeString *string, UErrorCode &status)
+{
+    if (U_FAILURE(status)) {
+        return;
+    }
+
+#ifdef INSTRUMENT_STRING_LIST
+    _strings += 1;
+#endif
+
+    if (listSize >= listMax) {
+        int32_t newMax = listMax + STRING_LIST_BUFFER_SIZE;
+
+        UnicodeString *newStrings = new UnicodeString[newMax];
+
+        uprv_memcpy(newStrings, strings, listSize * sizeof(UnicodeString));
+
+#ifdef INSTRUMENT_STRING_LIST
+        int32_t _h = listSize / STRING_LIST_BUFFER_SIZE;
+
+        if (_h > 100) {
+            _h = 100;
+        }
+
+        _histogram[_h] += 1;
+#endif
+
+        delete[] strings;
+        strings = newStrings;
+        listMax = newMax;
+    }
+
+    // The ctor initialized all the strings in
+    // the array to empty strings, so this
+    // is the same as copying the source string.
+    strings[listSize++].append(*string);
+}
+
+void StringList::add(const UChar *chars, int32_t count, UErrorCode &status)
+{
+    const UnicodeString string(chars, count);
+
+    add(&string, status);
+}
+
+const UnicodeString *StringList::get(int32_t index) const
+{
+    if (index >= 0 && index < listSize) {
+        return &strings[index];
+    }
+
+    return NULL;
+}
+
+int32_t StringList::size() const
+{
+    return listSize;
+}
+
+
+U_CFUNC void deleteStringList(void *obj);
+
+class CEToStringsMap : public UMemory
+{
+public:
+
+    CEToStringsMap(UErrorCode &status);
+    ~CEToStringsMap();
+
+    void put(uint32_t ce, UnicodeString *string, UErrorCode &status);
+    StringList *getStringList(uint32_t ce) const;
+
+private:
+ 
+    void putStringList(uint32_t ce, StringList *stringList, UErrorCode &status);
+    UHashtable *map;
+};
+
+CEToStringsMap::CEToStringsMap(UErrorCode &status)
+    : map(NULL)
+{
+    if (U_FAILURE(status)) {
+        return;
+    }
+
+    map = uhash_open(uhash_hashLong, uhash_compareLong,
+                     uhash_compareCaselessUnicodeString,
+                     &status);
+
+    if (U_FAILURE(status)) {
+        return;
+    }
+
+    uhash_setValueDeleter(map, deleteStringList);
+}
+
+CEToStringsMap::~CEToStringsMap()
+{
+    uhash_close(map);
+}
+
+void CEToStringsMap::put(uint32_t ce, UnicodeString *string, UErrorCode &status)
+{
+    StringList *strings = getStringList(ce);
+
+    if (strings == NULL) {
+        strings = new StringList(status);
+
+        if (strings == NULL || U_FAILURE(status)) {
+            status = U_MEMORY_ALLOCATION_ERROR;
+            return;
+        }
+
+        putStringList(ce, strings, status);
+    }
+
+    strings->add(string, status);
+}
+
+StringList *CEToStringsMap::getStringList(uint32_t ce) const
+{
+    return (StringList *) uhash_iget(map, ce);
+}
+
+void CEToStringsMap::putStringList(uint32_t ce, StringList *stringList, UErrorCode &status)
+{
+    uhash_iput(map, ce, (void *) stringList, &status);
+}
+
+U_CFUNC void deleteStringList(void *obj)
+{
+    StringList *strings = (StringList *) obj;
+
+    delete strings;
+}
+
+U_CFUNC void deleteCEList(void *obj);
+U_CFUNC void deleteUnicodeStringKey(void *obj);
+
+class StringToCEsMap : public UMemory
+{
+public:
+    StringToCEsMap(UErrorCode &status);
+    ~StringToCEsMap();
+
+    void put(const UnicodeString *string, const CEList *ces, UErrorCode &status);
+    const CEList *get(const UnicodeString *string);
+    void free(const CEList *list);
+
+private:
+
+
+    UHashtable *map;
+};
+
+StringToCEsMap::StringToCEsMap(UErrorCode &status)
+    : map(NULL)
+{
+    if (U_FAILURE(status)) {
+        return;
+    }
+
+    map = uhash_open(uhash_hashUnicodeString,
+                     uhash_compareUnicodeString,
+                     uhash_compareLong,
+                     &status);
+
+    if (U_FAILURE(status)) {
+        return;
+    }
+
+    uhash_setValueDeleter(map, deleteCEList);
+    uhash_setKeyDeleter(map, deleteUnicodeStringKey);
+}
+
+StringToCEsMap::~StringToCEsMap()
+{
+    uhash_close(map);
+}
+
+void StringToCEsMap::put(const UnicodeString *string, const CEList *ces, UErrorCode &status)
+{
+    uhash_put(map, (void *) string, (void *) ces, &status);
+}
+
+const CEList *StringToCEsMap::get(const UnicodeString *string)
+{
+    return (const CEList *) uhash_get(map, string);
+}
+
+U_CFUNC void deleteCEList(void *obj)
+{
+    CEList *list = (CEList *) obj;
+
+    delete list;
+}
+
+U_CFUNC void deleteUnicodeStringKey(void *obj)
+{
+    UnicodeString *key = (UnicodeString *) obj;
+
+    delete key;
+}
+
+class CollDataCacheEntry : public UMemory
+{
+public:
+    CollDataCacheEntry(CollData *theData);
+    ~CollDataCacheEntry();
+
+    CollData *data;
+    int32_t   refCount;
+};
+
+CollDataCacheEntry::CollDataCacheEntry(CollData *theData)
+    : data(theData), refCount(1)
+{
+    // nothing else to do
+}
+
+CollDataCacheEntry::~CollDataCacheEntry()
+{
+    // check refCount?
+    delete data;
+}
+
+class CollDataCache : public UMemory
+{
+public:
+    CollDataCache(UErrorCode &status);
+    ~CollDataCache();
+
+    CollData *get(UCollator *collator, UErrorCode &status);
+    void unref(CollData *collData);
+
+    void flush();
+
+private:
+    static char *getKey(UCollator *collator, char *keyBuffer, int32_t *charBufferLength);
+    static void deleteKey(char *key);
+
+    UMTX lock;
+    UHashtable *cache;
+};
+
+U_CFUNC void deleteChars(void *obj)
+{
+    char *chars = (char *) obj;
+
+    // All the key strings are owned by the 
+    // CollData objects and don't need to
+    // be freed here.
+  //DELETE_ARRAY(chars);
+}
+
+U_CFUNC void deleteCollDataCacheEntry(void *obj)
+{
+    CollDataCacheEntry *entry = (CollDataCacheEntry *) obj;
+
+    delete entry;
+}
+
+CollDataCache::CollDataCache(UErrorCode &status)
+    : lock(0), cache(NULL)
+{
+    if (U_FAILURE(status)) {
+        return;
+    }
+
+    umtx_init(&lock);
+
+    cache = uhash_open(uhash_hashChars, uhash_compareChars, uhash_compareLong, &status);
+
+    if (U_FAILURE(status)) {
+        return;
+    }
+
+    uhash_setValueDeleter(cache, deleteCollDataCacheEntry);
+    uhash_setKeyDeleter(cache, deleteChars);
+}
+
+CollDataCache::~CollDataCache()
+{
+    umtx_lock(&lock);
+    uhash_close(cache);
+    cache = NULL;
+    umtx_unlock(&lock);
+
+    umtx_destroy(&lock);
+}
+
+CollData *CollDataCache::get(UCollator *collator, UErrorCode &status)
+{
+    char keyBuffer[KEY_BUFFER_SIZE];
+    int32_t keyLength = KEY_BUFFER_SIZE;
+    char *key = getKey(collator, keyBuffer, &keyLength);
+    CollData *result = NULL, *newData = NULL;
+    CollDataCacheEntry *entry = NULL, *newEntry = NULL;
+
+    umtx_lock(&lock);
+    entry = (CollDataCacheEntry *) uhash_get(cache, key);
+
+    if (entry == NULL) {
+        umtx_unlock(&lock);
+
+        newData = new CollData(collator, key, keyLength, status);
+        newEntry = new CollDataCacheEntry(newData);
+
+        if (U_FAILURE(status) || newData == NULL || newEntry == NULL) {
+            status = U_MEMORY_ALLOCATION_ERROR;
+            return NULL;
+        }
+
+        umtx_lock(&lock);
+        entry = (CollDataCacheEntry *) uhash_get(cache, key);
+
+        if (entry == NULL) {
+            uhash_put(cache, newData->key, newEntry, &status);
+            umtx_unlock(&lock);
+
+            if (U_FAILURE(status)) {
+                delete newEntry;
+                delete newData;
+
+                return NULL;
+            }
+
+            return newData;
+        }
+    }
+
+    result = entry->data;
+    entry->refCount += 1;
+    umtx_unlock(&lock);
+
+    if (key != keyBuffer) {
+        deleteKey(key);
+    }
+
+    if (newEntry != NULL) {
+        delete newEntry;
+        delete newData;
+    }
+
+    return result;
+}
+
+void CollDataCache::unref(CollData *collData)
+{
+    CollDataCacheEntry *entry = NULL;
+    
+    umtx_lock(&lock);
+    entry = (CollDataCacheEntry *) uhash_get(cache, collData->key);
+
+    if (entry != NULL) {
+        entry->refCount -= 1;
+    }
+    umtx_unlock(&lock);
+}
+
+char *CollDataCache::getKey(UCollator *collator, char *keyBuffer, int32_t *keyBufferLength)
+{
+    UErrorCode status = U_ZERO_ERROR;
+    int32_t len = ucol_getShortDefinitionString(collator, NULL, keyBuffer, *keyBufferLength, &status);
+
+    if (len >= *keyBufferLength) {
+        *keyBufferLength = (len + 2) & ~1;  // round to even length, leaving room for terminating null
+        keyBuffer = NEW_ARRAY(char, *keyBufferLength);
+        status = U_ZERO_ERROR;
+
+        len = ucol_getShortDefinitionString(collator, NULL, keyBuffer, *keyBufferLength, &status);
+    }
+
+    keyBuffer[len] = '\0';
+
+    return keyBuffer;
+}
+
+void CollDataCache::flush()
+{
+    const UHashElement *element;
+    int32_t pos = -1;
+
+    umtx_lock(&lock);
+    while ((element = uhash_nextElement(cache, &pos)) != NULL) {
+        CollDataCacheEntry *entry = (CollDataCacheEntry *) element->value.pointer;
+
+        if (entry->refCount <= 0) {
+            uhash_removeElement(cache, element);
+        }
+    }
+    umtx_unlock(&lock);
+}
+
+void CollDataCache::deleteKey(char *key)
+{
+    DELETE_ARRAY(key);
+}
+
+U_CDECL_BEGIN
+static UBool coll_data_cleanup(void) {
+    CollData::freeCollDataCache();
+  return TRUE;
+}
+U_CDECL_END
+
+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollData)
+
+CollData::CollData()
+{
+    // nothing
+}
+
+#define CLONE_COLLATOR
+
+//#define CACHE_CELISTS
+CollData::CollData(UCollator *collator, char *cacheKey, int32_t cacheKeyLength, UErrorCode &status)
+    : coll(NULL), charsToCEList(NULL), ceToCharsStartingWith(NULL), key(NULL)
+{
+    // [:c:] == [[:cn:][:cc:][:co:][:cf:][:cs:]]
+    // i.e. other, control, private use, format, surrogate
+    U_STRING_DECL(test_pattern, "[[:assigned:]-[:c:]]", 20);
+    U_STRING_INIT(test_pattern, "[[:assigned:]-[:c:]]", 20);
+    USet *charsToTest = uset_openPattern(test_pattern, 20, &status);
+
+    // Han ext. A, Han, Jamo, Hangul, Han Ext. B
+    // i.e. all the characers we handle implicitly
+    U_STRING_DECL(remove_pattern, "[[\\u3400-\\u9FFF][\\u1100-\\u11F9][\\uAC00-\\uD7AF][\\U00020000-\\U0002A6DF]]", 70);
+    U_STRING_INIT(remove_pattern, "[[\\u3400-\\u9FFF][\\u1100-\\u11F9][\\uAC00-\\uD7AF][\\U00020000-\\U0002A6DF]]", 70);
+    USet *charsToRemove = uset_openPattern(remove_pattern, 70, &status);
+
+    if (U_FAILURE(status)) {
+        return;
+    }
+
+    USet *expansions   = uset_openEmpty();
+    USet *contractions = uset_openEmpty();
+    int32_t itemCount;
+
+#ifdef CACHE_CELISTS
+    charsToCEList = new StringToCEsMap(status);
+
+    if (U_FAILURE(status)) {
+        goto bail;
+    }
+#else
+    charsToCEList = NULL;
+#endif
+
+    ceToCharsStartingWith = new CEToStringsMap(status);
+
+    if (U_FAILURE(status)) {
+        goto bail;
+    }
+    
+    if (cacheKeyLength > KEY_BUFFER_SIZE) {
+        key = NEW_ARRAY(char, cacheKeyLength);
+
+        if (key == NULL) {
+            status = U_MEMORY_ALLOCATION_ERROR;
+            goto bail;
+        }
+    } else {
+        key = keyBuffer;
+    }
+
+    ARRAY_COPY(key, cacheKey, cacheKeyLength);
+
+#ifdef CLONE_COLLATOR
+    coll = ucol_safeClone(collator, NULL, NULL, &status);
+
+    if (U_FAILURE(status)) {
+        goto bail;
+    }
+#else
+    coll = collator;
+#endif
+
+    ucol_getContractionsAndExpansions(coll, contractions, expansions, FALSE, &status);
+
+    uset_addAll(charsToTest, contractions);
+    uset_addAll(charsToTest, expansions);
+    uset_removeAll(charsToTest, charsToRemove);
+
+    itemCount = uset_getItemCount(charsToTest);
+    for(int32_t item = 0; item < itemCount; item += 1) {
+        UChar32 start = 0, end = 0;
+        UChar buffer[16];
+        int32_t len = uset_getItem(charsToTest, item, &start, &end,
+                                   buffer, 16, &status);
+
+        if (len == 0) {
+            for (UChar32 ch = start; ch <= end; ch += 1) {
+                UnicodeString *st = new UnicodeString(ch);
+
+                if (st == NULL) {
+                    status = U_MEMORY_ALLOCATION_ERROR;
+                    break;
+                }
+
+                CEList *ceList = new CEList(coll, *st, status);
+
+                ceToCharsStartingWith->put(ceList->get(0), st, status);
+
+#ifdef CACHE_CELISTS
+                charsToCEList->put(st, ceList, status);
+#else
+                delete ceList;
+                delete st;
+#endif
+            }
+        } else if (len > 0) {
+            UnicodeString *st = new UnicodeString(buffer, len);
+
+            if (st == NULL) {
+                status = U_MEMORY_ALLOCATION_ERROR;
+                break;
+            }
+
+            CEList *ceList = new CEList(coll, *st, status);
+
+            ceToCharsStartingWith->put(ceList->get(0), st, status);
+
+#ifdef CACHE_CELISTS
+            charsToCEList->put(st, ceList, status);
+#else
+            delete ceList;
+            delete st;
+#endif
+        } else {
+            // shouldn't happen...
+        }
+
+        if (U_FAILURE(status)) {
+             break;
+        }
+    }
+
+bail:
+    uset_close(contractions);
+    uset_close(expansions);
+    uset_close(charsToRemove);
+    uset_close(charsToTest);
+
+    if (U_FAILURE(status)) {
+        return;
+    }
+
+     UChar   hanRanges[] = {UCOL_FIRST_HAN, UCOL_LAST_HAN, UCOL_FIRST_HAN_COMPAT, UCOL_LAST_HAN_COMPAT, UCOL_FIRST_HAN_A, UCOL_LAST_HAN_A,
+                            UCOL_FIRST_HAN_B_LEAD, UCOL_FIRST_HAN_B_TRAIL, UCOL_LAST_HAN_B_LEAD, UCOL_LAST_HAN_B_TRAIL};
+     UChar  jamoRanges[] = {UCOL_FIRST_L_JAMO, UCOL_FIRST_V_JAMO, UCOL_FIRST_T_JAMO, UCOL_LAST_T_JAMO};
+     UnicodeString hanString(hanRanges, ARRAY_SIZE(hanRanges));
+     UnicodeString jamoString(jamoRanges, ARRAY_SIZE(jamoRanges));
+     CEList hanList(coll, hanString, status);
+     CEList jamoList(coll, jamoString, status);
+     int32_t j = 0;
+
+     if (U_FAILURE(status)) {
+         return;
+     }
+
+     for (int32_t c = 0; c < jamoList.size(); c += 1) {
+         uint32_t jce = jamoList[c];
+
+         if (! isContinuation(jce)) {
+             jamoLimits[j++] = jce;
+         }
+     }
+
+     jamoLimits[3] += (1 << UCOL_PRIMARYORDERSHIFT);
+
+     minHan = 0xFFFFFFFF;
+     maxHan = 0;
+     
+     for(int32_t h = 0; h < hanList.size(); h += 2) {
+         uint32_t han = (uint32_t) hanList[h];
+
+         if (han < minHan) {
+             minHan = han;
+         }
+
+         if (han > maxHan) {
+             maxHan = han;
+         }
+     }
+
+     maxHan += (1 << UCOL_PRIMARYORDERSHIFT);
+}
+
+CollData::~CollData()
+{
+#ifdef CLONE_COLLATOR
+   ucol_close(coll);
+#endif
+
+   if (key != keyBuffer) {
+       DELETE_ARRAY(key);
+   }
+
+   delete ceToCharsStartingWith;
+
+#ifdef CACHE_CELISTS
+   delete charsToCEList;
+#endif
+}
+
+UCollator *CollData::getCollator() const
+{
+    return coll;
+}
+
+const StringList *CollData::getStringList(int32_t ce) const
+{
+    return ceToCharsStartingWith->getStringList(ce);
+}
+
+const CEList *CollData::getCEList(const UnicodeString *string) const
+{
+#ifdef CACHE_CELISTS
+    return charsToCEList->get(string);
+#else
+    UErrorCode status = U_ZERO_ERROR;
+    const CEList *list = new CEList(coll, *string, status);
+    
+    if (U_FAILURE(status)) {
+        delete list;
+        list = NULL;
+    }
+
+    return list;
+#endif
+}
+
+void CollData::freeCEList(const CEList *list)
+{
+#ifndef CACHE_CELISTS
+    delete list;
+#endif
+}
+
+int32_t CollData::minLengthInChars(const CEList *ceList, int32_t offset, int32_t *history) const
+{
+    // find out shortest string for the longest sequence of ces.
+    // this can probably be folded with the minLengthCache...
+
+    if (history[offset] >= 0) {
+        return history[offset];
+    }
+
+    uint32_t ce = ceList->get(offset);
+    int32_t maxOffset = ceList->size();
+    int32_t shortestLength = INT32_MAX;
+    const StringList *strings = ceToCharsStartingWith->getStringList(ce);
+
+    if (strings != NULL) {
+        int32_t stringCount = strings->size();
+      
+        for (int32_t s = 0; s < stringCount; s += 1) {
+            const UnicodeString *string = strings->get(s);
+#ifdef CACHE_CELISTS
+            const CEList *ceList2 = charsToCEList->get(string);
+#else
+            UErrorCode status = U_ZERO_ERROR;
+            const CEList *ceList2 = new CEList(coll, *string, status);
+
+            if (U_FAILURE(status)) {
+                delete ceList2;
+                ceList2 = NULL;
+            }
+#endif
+
+            if (ceList->matchesAt(offset, ceList2)) {
+                int32_t clength = ceList2->size();
+                int32_t slength = string->length();
+                int32_t roffset = offset + clength;
+                int32_t rlength = 0;
+                
+                if (roffset < maxOffset) {
+                    rlength = minLengthInChars(ceList, roffset, history);
+
+                    if (rlength <= 0) {
+                        // ignore any dead ends
+                        continue;
+                    }
+                }
+
+                if (shortestLength > slength + rlength) {
+                    shortestLength = slength + rlength;
+                }
+            }
+
+#ifndef CACHE_CELISTS
+            delete ceList2;
+#endif
+        }
+    }
+
+    if (shortestLength == INT32_MAX) {
+        // No matching strings at this offset. See if 
+        // the CE is in a range we can handle manually.
+        if (ce >= minHan && ce < maxHan) {
+            // all han have implicit orders which
+            // generate two CEs.
+            int32_t roffset = offset + 2;
+            int32_t rlength = 0;
+
+          //history[roffset++] = -1;
+          //history[roffset++] = 1;
+
+            if (roffset < maxOffset) {
+                rlength = minLengthInChars(ceList, roffset, history);
+            }
+
+            if (rlength < 0) {
+                return -1;
+            }
+
+            shortestLength = 1 + rlength;
+            goto have_shortest;
+        } else if (ce >= jamoLimits[0] && ce < jamoLimits[3]) {
+            int32_t roffset = offset;
+            int32_t rlength = 0;
+
+            // **** this loop may not handle archaic Hangul correctly ****
+            for (int32_t j = 0; roffset < maxOffset && j < 4; j += 1, roffset += 1) {
+                uint32_t jce = ceList->get(roffset);
+
+                // Some Jamo have 24-bit primary order; skip the
+                // 2nd CE. This should always be OK because if
+                // we're still in the loop all we've seen are
+                // a series of Jamo in LVT order.
+                if (isContinuation(jce)) {
+                    continue;
+                }
+
+                if (j >= 3 || jce < jamoLimits[j] || jce >= jamoLimits[j + 1]) {
+                    break;
+                }
+            }
+
+            if (roffset == offset) {
+                // we started with a non-L Jamo...
+                // just say it comes from a single character
+                roffset += 1;
+
+                // See if the single Jamo has a 24-bit order.
+                if (roffset < maxOffset && isContinuation(ceList->get(roffset))) {
+                    roffset += 1;
+                }
+            }
+
+            if (roffset < maxOffset) {
+                rlength = minLengthInChars(ceList, roffset, history);
+            }
+
+            if (rlength < 0) {
+                return -1;
+            }
+
+            shortestLength = 1 + rlength;
+            goto have_shortest;
+        }
+
+        // Can't handle it manually either. Just move on.
+        return -1;
+    }
+
+have_shortest:
+    history[offset] = shortestLength;
+
+    return shortestLength;
+}
+
+int32_t CollData::minLengthInChars(const CEList *ceList, int32_t offset) const
+{
+    int32_t clength = ceList->size();
+    int32_t *history = NEW_ARRAY(int32_t, clength);
+
+    for (int32_t i = 0; i < clength; i += 1) {
+        history[i] = -1;
+    }
+
+    int32_t minLength = minLengthInChars(ceList, offset, history);
+
+    DELETE_ARRAY(history);
+
+    return minLength;
+}
+
+CollData *CollData::open(UCollator *collator, UErrorCode &status)
+{
+    if (U_FAILURE(status)) {
+        return NULL;
+    }
+
+    CollDataCache *cache = getCollDataCache();
+        
+    return cache->get(collator, status);
+}
+
+void CollData::close(CollData *collData)
+{
+    CollDataCache *cache = getCollDataCache();
+
+    cache->unref(collData);
+}
+
+CollDataCache *CollData::collDataCache = NULL;
+
+CollDataCache *CollData::getCollDataCache()
+{
+    UErrorCode status = U_ZERO_ERROR;
+    CollDataCache *cache = NULL;
+
+    UMTX_CHECK(NULL, collDataCache, cache);
+
+    if (cache == NULL) {
+        cache = new CollDataCache(status);
+
+        if (U_FAILURE(status)) {
+            delete cache;
+            return NULL;
+        }
+
+        umtx_lock(NULL);
+        if (collDataCache == NULL) {
+            collDataCache = cache;
+
+            ucln_i18n_registerCleanup(UCLN_I18N_COLL_DATA, coll_data_cleanup);
+        }
+        umtx_unlock(NULL);
+
+        if (collDataCache != cache) {
+            delete cache;
+        }
+    }
+
+    return collDataCache;
+}
+
+void CollData::freeCollDataCache()
+{
+    CollDataCache *cache = NULL;
+
+    UMTX_CHECK(NULL, collDataCache, cache);
+
+    if (cache != NULL) {
+        umtx_lock(NULL);
+        if (collDataCache != NULL) {
+            collDataCache = NULL;
+        } else {
+            cache = NULL;
+        }
+        umtx_unlock(NULL);
+
+        delete cache;
+    }
+}
+
+void CollData::flushCollDataCache()
+{
+    CollDataCache *cache = NULL;
+
+    UMTX_CHECK(NULL, collDataCache, cache);
+
+    // **** this will fail if the another ****
+    // **** thread deletes the cache here ****
+    if (cache != NULL) {
+        cache->flush();
+    }
+}
+
+U_NAMESPACE_END
+
+#endif // #if !UCONFIG_NO_COLLATION
diff --git a/icu4c/source/i18n/i18n.vcproj b/icu4c/source/i18n/i18n.vcproj
index ea6e0b6dd1..3d58425c76 100644
--- a/icu4c/source/i18n/i18n.vcproj
+++ b/icu4c/source/i18n/i18n.vcproj
@@ -408,6 +408,40 @@
 		<Filter
 			Name="collation"
 			>
+			<File
+				RelativePath=".\bms.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\unicode\bms.h"
+				>
+				<FileConfiguration
+					Name="Debug|Win32"
+					>
+					<Tool
+						Name="VCCustomBuildTool"
+						CommandLine="copy &quot;$(InputPath)&quot; ..\..\include\unicode"
+						Outputs="..\..\include\unicode\$(InputFileName)"
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath=".\bmsearch.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\unicode\bmsearch.h"
+				>
+				<FileConfiguration
+					Name="Debug|Win32"
+					>
+					<Tool
+						Name="VCCustomBuildTool"
+						CommandLine="copy &quot;$(InputPath)&quot; ..\..\include\unicode"
+						Outputs="..\..\include\unicode\$(InputFileName)"
+					/>
+				</FileConfiguration>
+			</File>
 			<File
 				RelativePath=".\bocsu.c"
 				>
@@ -504,6 +538,23 @@
 					/>
 				</FileConfiguration>
 			</File>
+			<File
+				RelativePath=".\colldata.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\unicode\colldata.h"
+				>
+				<FileConfiguration
+					Name="Debug|Win32"
+					>
+					<Tool
+						Name="VCCustomBuildTool"
+						CommandLine="copy &quot;$(InputPath)&quot; ..\..\include\unicode"
+						Outputs="..\..\include\unicode\$(InputFileName)"
+					/>
+				</FileConfiguration>
+			</File>
 			<File
 				RelativePath=".\search.cpp"
 				>
diff --git a/icu4c/source/i18n/ucln_in.h b/icu4c/source/i18n/ucln_in.h
index d133db0b91..82de8389bc 100644
--- a/icu4c/source/i18n/ucln_in.h
+++ b/icu4c/source/i18n/ucln_in.h
@@ -1,7 +1,7 @@
 /*
 ******************************************************************************
 *                                                                            *
-* Copyright (C) 2001-2008, International Business Machines                   *
+* Copyright (C) 2001-2009, International Business Machines                   *
 *                Corporation and others. All Rights Reserved.                *
 *                                                                            *
 ******************************************************************************
@@ -45,6 +45,7 @@ typedef enum ECleanupI18NType {
     UCLN_I18N_UCOL_RES,
     UCLN_I18N_UCOL_BLD,
     UCLN_I18N_CSDET,
+    UCLN_I18N_COLL_DATA,
     UCLN_I18N_COUNT /* This must be last */
 } ECleanupI18NType;
 
diff --git a/icu4c/source/i18n/ucol.cpp b/icu4c/source/i18n/ucol.cpp
index d22684dd1f..c9a751c36a 100644
--- a/icu4c/source/i18n/ucol.cpp
+++ b/icu4c/source/i18n/ucol.cpp
@@ -1,6 +1,6 @@
 /*
 *******************************************************************************
-*   Copyright (C) 1996-2008, International Business Machines
+*   Copyright (C) 1996-2009, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *******************************************************************************
 *   file name:  ucol.cpp
@@ -123,7 +123,6 @@ uprv_init_collIterate(const UCollator *collator, const UChar *sourceString,
     IInit_collIterate(collator, sourceString, sourceLen, s);
 }
 
-
 /**
 * Backup the state of the collIterate struct data
 * @param data collIterate to backup
@@ -1499,10 +1498,30 @@ inline uint32_t ucol_IGetNextCE(const UCollator *coll, collIterate *collationSou
     }
     else
     {
-        order = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch);
+        // Always use UCA for Han, Hangul
+        // (Han extension A is before main Han block)
+        // **** Han compatibility chars ?? ****
+        if ((collationSource->flags & UCOL_FORCE_HAN_IMPLICIT) != 0 &&
+            (ch >= UCOL_FIRST_HAN_A && ch <= UCOL_LAST_HANGUL)) {
+            if (ch > UCOL_LAST_HAN && ch < UCOL_FIRST_HANGUL) {
+                // between the two target ranges; do normal lookup
+                // **** this range is YI, Modifier tone letters, ****
+                // **** Latin-D, Syloti Nagari, Phagas-pa.       ****
+                // **** Latin-D might be tailored, so we need to ****
+                // **** do the normal lookup for these guys.     ****
+                order = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch);
+            } else {
+                // in one of the target ranges; use UCA
+                order = UCOL_NOT_FOUND;
+            }
+        } else {
+            order = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch);
+        }
+
         if(order > UCOL_NOT_FOUND) {                                       /* if a CE is special                */
             order = ucol_prv_getSpecialCE(coll, ch, order, collationSource, status);    /* and try to get the special CE     */
         }
+
         if(order == UCOL_NOT_FOUND && coll->UCA) {   /* We couldn't find a good CE in the tailoring */
             /* if we got here, the codepoint MUST be over 0xFF - so we look directly in the trie */
             order = UTRIE_GET32_FROM_LEAD(&coll->UCA->mapping, ch);
@@ -1939,7 +1958,23 @@ inline uint32_t ucol_IGetPrevCE(const UCollator *coll, collIterate *data,
                 result = coll->latinOneMapping[ch];
             }
             else {
-                result = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch);
+                // Always use UCA for [3400..9FFF], [AC00..D7AF]
+                // **** [FA0E..FA2F] ?? ****
+                if ((data->flags & UCOL_FORCE_HAN_IMPLICIT) != 0 &&
+                    (ch >= 0x3400 && ch <= 0xD7AF)) {
+                    if (ch > 0x9FFF && ch < 0xAC00) {
+                        // between the two target ranges; do normal lookup
+                        // **** this range is YI, Modifier tone letters, ****
+                        // **** Latin-D, Syloti Nagari, Phagas-pa.       ****
+                        // **** Latin-D might be tailored, so we need to ****
+                        // **** do the normal lookup for these guys.     ****
+                         result = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch);
+                    } else {
+                        result = UCOL_NOT_FOUND;
+                    }
+                } else {
+                    result = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch);
+                }
             }
             if (result > UCOL_NOT_FOUND) {
                 result = ucol_prv_getSpecialPrevCE(coll, ch, result, data, status);
@@ -3545,38 +3580,12 @@ uint32_t ucol_prv_getSpecialPrevCE(const UCollator *coll, UChar ch, uint32_t CE,
 
             int32_t offsetBias;
 
-#if 0
-            if (source->offsetReturn != NULL) {
-                source->offsetStore = source->offsetReturn - noChars;
-            }
-
             // **** doesn't work if using iterator ****
             if (source->flags & UCOL_ITER_INNORMBUF) {
-                if (source->fcdPosition == NULL) {
-                    offsetBias = 0;
-                } else {
-                    offsetBias = (int32_t)(source->fcdPosition - source->string);
-                }
-            } else {
-                offsetBias = (int32_t)(source->pos - source->string);
-            }
-
-#else
-            // **** doesn't work if using iterator ****
-            if (source->flags & UCOL_ITER_INNORMBUF) {
-#if 1
                 offsetBias = -1;
-#else
-              if (source->fcdPosition == NULL) {
-                  offsetBias = 0;
-              } else {
-                  offsetBias = (int32_t)(source->fcdPosition - source->string);
-              }
-#endif
             } else {
                 offsetBias = (int32_t)(source->pos - source->string);
             }
-#endif
 
             /* a new collIterate is used to simplify things, since using the current
             collIterate will mean that the forward and backwards iteration will
@@ -3584,9 +3593,9 @@ uint32_t ucol_prv_getSpecialPrevCE(const UCollator *coll, UChar ch, uint32_t CE,
             collIterate temp;
             int32_t rawOffset;
 
-            //IInit_collIterate(coll, UCharOffset, -1, &temp);
             IInit_collIterate(coll, UCharOffset, noChars, &temp);
             temp.flags &= ~UCOL_ITER_NORM;
+            temp.flags |= source->flags & UCOL_FORCE_HAN_IMPLICIT;
 
             rawOffset = temp.pos - temp.string; // should always be zero?
             CE = ucol_IGetNextCE(coll, &temp, status);
@@ -3679,7 +3688,12 @@ uint32_t ucol_prv_getSpecialPrevCE(const UCollator *coll, UChar ch, uint32_t CE,
                     }
                 }
 
-                rawOffset = temp.pos - temp.string;
+                if ((temp.flags & UCOL_ITER_INNORMBUF) != 0) {
+                    rawOffset = temp.fcdPosition - temp.string;
+                } else {
+                    rawOffset = temp.pos - temp.string;
+                }
+
                 CE = ucol_IGetNextCE(coll, &temp, status);
             }
 
@@ -4136,29 +4150,6 @@ uint32_t ucol_prv_getSpecialPrevCE(const UCollator *coll, UChar ch, uint32_t CE,
             }
 
         case IMPLICIT_TAG:        /* everything that is not defined otherwise */
-#if 0
-			if (source->offsetBuffer == NULL) {
-				source->offsetBufferSize = UCOL_EXPAND_CE_BUFFER_SIZE;
-				source->offsetBuffer = (int32_t *) uprv_malloc(sizeof(int32_t) * UCOL_EXPAND_CE_BUFFER_SIZE);
-				source->offsetStore = source->offsetBuffer;
-			}
-
-			// **** doesn't work if using iterator ****
-			if (source->flags & UCOL_ITER_INNORMBUF) {
-			  source->offsetRepeatCount = 1;
-			} else {
-			  int32_t firstOffset = (int32_t)(source->pos - source->string);
-
-			  *(source->offsetStore++) = firstOffset;
-			  *(source->offsetStore++) = firstOffset + 1;
-
-				source->offsetReturn = source->offsetStore - 1;
-				if (source->offsetReturn == source->offsetBuffer) {
-					source->offsetStore = source->offsetBuffer;
-				}
-			}
-#endif
-
             return getPrevImplicit(ch, source);
 
             // TODO: Remove CJK implicits as they are handled by the getImplicitPrimary function
diff --git a/icu4c/source/i18n/ucol_imp.h b/icu4c/source/i18n/ucol_imp.h
index 9ed7f3f12f..0e1736f906 100644
--- a/icu4c/source/i18n/ucol_imp.h
+++ b/icu4c/source/i18n/ucol_imp.h
@@ -1,7 +1,7 @@
 /*
 *******************************************************************************
 *
-*   Copyright (C) 1998-2008, International Business Machines
+*   Copyright (C) 1998-2009, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 *******************************************************************************
@@ -260,6 +260,8 @@ minimum number for special Jamo
                               /* by index */
 #define UCOL_USE_ITERATOR   64
 
+#define UCOL_FORCE_HAN_IMPLICIT 128
+
 #define NFC_ZERO_CC_BLOCK_LIMIT_  0x300
 
 typedef struct collIterate {
@@ -390,6 +392,29 @@ uprv_init_pce(const struct UCollationElements *elems);
                          (((uint32_t)(ch) - 0x1161) <= (0x1175 - 0x1161)) || \
                          (((uint32_t)(ch) - 0x11A8) <= (0x11C2 - 0x11A8)))
 
+/* Han character ranges */
+#define UCOL_FIRST_HAN 0x4E00
+#define UCOL_LAST_HAN  0x9FFF
+#define UCOL_FIRST_HAN_A 0x3400
+#define UCOL_LAST_HAN_A  0x4DBF
+#define UCOL_FIRST_HAN_COMPAT 0xFAE0
+#define UCOL_LAST_HAN_COMPAT  0xFA2F
+
+/* Han extension B is in plane 2 */
+#define UCOL_FIRST_HAN_B_LEAD  0xD840
+#define UCOL_FIRST_HAN_B_TRAIL 0xDC00
+#define UCOL_LAST_HAN_B_LEAD   0xD869
+#define UCOL_LAST_HAN_B_TRAIL  0xDEDF
+
+/* Hangul range */
+#define UCOL_FIRST_HANGUL 0xAC00
+#define UCOL_LAST_HANGUL  0xD7AF
+
+/* Jamo ranges */
+#define UCOL_FIRST_L_JAMO 0x1100
+#define UCOL_FIRST_V_JAMO 0x1161
+#define UCOL_FIRST_T_JAMO 0x11A8
+#define UCOL_LAST_T_JAMO  0x11F9
 
 
 #if 0
diff --git a/icu4c/source/i18n/ucol_sit.cpp b/icu4c/source/i18n/ucol_sit.cpp
index 96fc7b8aef..9c6df7cd89 100644
--- a/icu4c/source/i18n/ucol_sit.cpp
+++ b/icu4c/source/i18n/ucol_sit.cpp
@@ -1,6 +1,6 @@
 /*
 *******************************************************************************
-*   Copyright (C) 2004-2008, International Business Machines
+*   Copyright (C) 2004-2009, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *******************************************************************************
 *   file name:  ucol_sit.cpp
@@ -578,15 +578,15 @@ ucol_getShortDefinitionString(const UCollator *coll,
     if(elementSize) {
         // we should probably canonicalize here...
         elementSize = uloc_getLanguage(locBuff, tempbuff, internalBufferSize, status);
-        appendShortStringElement(tempbuff, elementSize, buffer, &resultSize, capacity, languageArg);
+        appendShortStringElement(tempbuff, elementSize, buffer, &resultSize, /*capacity*/internalBufferSize, languageArg);
         elementSize = uloc_getCountry(locBuff, tempbuff, internalBufferSize, status);
-        appendShortStringElement(tempbuff, elementSize, buffer, &resultSize, capacity, regionArg);
+        appendShortStringElement(tempbuff, elementSize, buffer, &resultSize, /*capacity*/internalBufferSize, regionArg);
         elementSize = uloc_getScript(locBuff, tempbuff, internalBufferSize, status);
-        appendShortStringElement(tempbuff, elementSize, buffer, &resultSize, capacity, scriptArg);
+        appendShortStringElement(tempbuff, elementSize, buffer, &resultSize, /*capacity*/internalBufferSize, scriptArg);
         elementSize = uloc_getVariant(locBuff, tempbuff, internalBufferSize, status);
-        appendShortStringElement(tempbuff, elementSize, buffer, &resultSize, capacity, variantArg);
+        appendShortStringElement(tempbuff, elementSize, buffer, &resultSize, /*capacity*/internalBufferSize, variantArg);
         elementSize = uloc_getKeywordValue(locBuff, "collation", tempbuff, internalBufferSize, status);
-        appendShortStringElement(tempbuff, elementSize, buffer, &resultSize, capacity, keywordArg);
+        appendShortStringElement(tempbuff, elementSize, buffer, &resultSize, /*capacity*/internalBufferSize, keywordArg);
     } 
 
     int32_t i = 0;
@@ -597,7 +597,7 @@ ucol_getShortDefinitionString(const UCollator *coll,
             if(attribute != UCOL_DEFAULT) {
                 char letter = ucol_sit_attributeValueToLetter(attribute, status);
                 appendShortStringElement(&letter, 1, 
-                    buffer, &resultSize, capacity, options[i].optionStart);
+                    buffer, &resultSize, /*capacity*/internalBufferSize, options[i].optionStart);
             }
         }
     }
diff --git a/icu4c/source/i18n/ucoleitr.cpp b/icu4c/source/i18n/ucoleitr.cpp
index 0b7751e489..ee1e751550 100644
--- a/icu4c/source/i18n/ucoleitr.cpp
+++ b/icu4c/source/i18n/ucoleitr.cpp
@@ -1,6 +1,6 @@
 /*
 ******************************************************************************
-*   Copyright (C) 2001-2008, International Business Machines
+*   Copyright (C) 2001-2009, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 ******************************************************************************
 *
@@ -263,7 +263,14 @@ inline uint64_t processCE(UCollationElements *elems, uint32_t ce)
         primary = ucol_primaryOrder(ce);
     }
 
-    // Continuation?
+    // **** This should probably handle continuations too.  ****
+    // **** That means that we need 24 bits for the primary ****
+    // **** instead of the 16 that we're currently using.   ****
+    // **** So we can lay out the 64 bits as: 24.12.12.16.  ****
+    // **** Another complication with continuations is that ****
+    // **** the *second* CE is marked as a continuation, so ****
+    // **** we always have to peek ahead to know how long   ****
+    // **** the primary is...                               ****
     if (elems->pce->toShift && (elems->pce->variableTop > ce && primary != 0)
                 || (elems->pce->isShifted && primary == 0)) {
 
@@ -285,7 +292,6 @@ inline uint64_t processCE(UCollationElements *elems, uint32_t ce)
         elems->pce->isShifted = FALSE;
     }
 
-
     return primary << 48 | secondary << 32 | tertiary << 16 | quaternary;
 }
 
@@ -332,6 +338,7 @@ ucol_openElements(const UCollator  *coll,
     return result;
 }
 
+
 U_CAPI void U_EXPORT2
 ucol_closeElements(UCollationElements *elems)
 {
@@ -375,7 +382,7 @@ ucol_reset(UCollationElements *elems)
         ci->endp      = ci->string + u_strlen(ci->string);
     }
     ci->CEpos       = ci->toReturn = ci->CEs;
-    ci->flags       = UCOL_ITER_HASLEN;
+    ci->flags       = (ci->flags & UCOL_FORCE_HAN_IMPLICIT) | UCOL_ITER_HASLEN;
     if (ci->coll->normalizationMode == UCOL_ON) {
         ci->flags |= UCOL_ITER_NORM;
     }
@@ -391,6 +398,21 @@ ucol_reset(UCollationElements *elems)
 	ci->offsetRepeatCount = ci->offsetRepeatValue = 0;
 }
 
+U_CAPI void U_EXPORT2
+ucol_forceHanImplicit(UCollationElements *elems, UErrorCode *status)
+{
+    if (U_FAILURE(*status)) {
+        return;
+    }
+
+    if (elems == NULL) {
+        *status = U_ILLEGAL_ARGUMENT_ERROR;
+        return;
+    }
+
+    elems->iteratordata_.flags |= UCOL_FORCE_HAN_IMPLICIT;
+}
+
 U_CAPI int32_t U_EXPORT2
 ucol_next(UCollationElements *elems, 
           UErrorCode         *status)
diff --git a/icu4c/source/i18n/unicode/bms.h b/icu4c/source/i18n/unicode/bms.h
new file mode 100644
index 0000000000..bcc138dc8a
--- /dev/null
+++ b/icu4c/source/i18n/unicode/bms.h
@@ -0,0 +1,265 @@
+/*
+ * Copyright (C) 1996-2009, International Business Machines Corporation and Others.
+ * All rights reserved.
+ */
+
+/**
+ * \file 
+ * \brief C API: Boyer-Moore StringSearch prototype.
+ * \internal
+ */
+
+#ifndef _BMS_H
+#define _BMS_H
+
+#include "unicode/utypes.h"
+#include "unicode/ucol.h"
+
+/**
+ * A <code>UCD</code> object holds the Collator-specific data needed to
+ * compute the length of the shortest string that can
+ * generate a partcular list of CEs.
+ *
+ * <code>UCD</code> objects are quite expensive to compute. Because
+ * of this, they are cached. When you call <code>ucd_open</code> it
+ * returns a reference counted cached object. When you call <code>ucd_close</code>
+ * the reference count on the object is decremented but the object is not deleted.
+ *
+ * If you do not need to reuse any unreferenced objects in the cache, you can call
+ * <code>ucd_flushCCache</code>. If you no longer need any <code>UCD</code>
+ * objects, you can call <code>ucd_freeCache</code>
+ */
+typedef void UCD;
+
+/**
+ * Open a <code>UCD</code> object.
+ *
+ * @param collator - the collator
+ * @param status - will be set if any errors occur. 
+ *
+ * @return the <code>UCD</code> object. You must call
+ *         <code>ucd_close</code> when you are done using the object.
+ *
+ * Note: if on return status is set to an error, the only safe
+ * thing to do with the returned object is to call <code>ucd_close</code>.
+ *
+ * @internal ICU 4.0.1 technology preview
+ */
+U_CAPI UCD * U_EXPORT2
+ucd_open(UCollator *coll, UErrorCode *status);
+
+/**
+ * Release a <code>UCD</code> object.
+ *
+ * @param ucd - the object
+ *
+ * @internal ICU 4.0.1 technology preview
+ */
+U_CAPI void U_EXPORT2
+ucd_close(UCD *ucd);
+
+/**
+ * Get the <code>UCollator</code> object used to create a <code>UCD</code> object.
+ * The <code>UCollator</code> object returned may not be the exact
+ * object that was used to create this object, but it will have the
+ * same behavior.
+ *
+ * @param ucd - the <code>UCD</code> object
+ *
+ * @return the <code>UCollator</code> used to create the given
+ *         <code>UCD</code> object.
+ *
+ * @internal ICU 4.0.1 technology preview
+ */
+U_CAPI UCollator * U_EXPORT2
+ucd_getCollator(UCD *ucd);
+
+/**
+ * <code>UCD</code> objects are expensive to compute, and so
+ * may be cached. This routine will free the cached objects and delete
+ * the cache.
+ *
+ * WARNING: Don't call this until you are have called <code>close</code>
+ * for each <code>UCD</code> object that you have used. also,
+ * DO NOT call this if another thread may be calling <code>ucd_flushCache</code>
+ * at the same time.
+ *
+ * @internal ICU 4.0.1 technology preview
+ */
+U_CAPI void U_EXPORT2
+ucd_freeCache();
+
+/**
+ * <code>UCD</code> objects are expensive to compute, and so
+ * may be cached. This routine will remove any unused <code>UCD</code>
+ * objects from the cache.
+ *
+ * @internal 4.0.1 technology preview
+ */
+U_CAPI void U_EXPORT2
+ucd_flushCache();
+
+/**
+ * BMS
+ *
+ * This object holds the information needed to do a Collation sensitive Boyer-Moore search. It encapulates
+ * the pattern, the "bad character" and "good suffix" tables, the Collator-based data needed to compute them,
+ * and a reference to the text being searched.
+ *
+ * To do a search, you fist need to get a <code>UCD</code> object by calling <code>ucd_open</code>.
+ * Then you construct a <code>BMS</code> object from the <code>UCD</code> object, the pattern
+ * string and the target string. Then you call the <code>search</code> method. Here's a code sample:
+ *
+ * <pre>
+ * void boyerMooreExample(UCollator *collator, UChar *pattern, int32_t patternLen, UChar *target, int32_t targetLength)
+ * {
+ *     UErrorCode status = U_ZERO_ERROR;
+ *     int32_t offset = 0, start = -1, end = -1;
+ *     UCD *ucd = NULL);
+ *     BMS *bms = NULL;
+ *
+ *     ucd = ucd_open(collator, &status);
+ *     if (U_FAILURE(status)) {
+ *         // could not create a UCD object
+ *         return;
+ *     }
+ *
+ *     BMS *bms = bms_open(ucd, pattern, patternLength, target, targetlength, &status);
+ *     if (U_FAILURE(status)) {
+ *         // could not create a BMS object
+ *         ucd_close(ucd);
+ *         return;
+ *     }
+ *
+ *
+ *     // Find all matches
+ *     while (bms_search(bms, offset, &start, &end)) {
+ *         // process the match between start and end
+ *         ...
+ *
+ *         // advance past the match
+ *         offset = end; 
+ *     }
+ *
+ *     // at this point, if offset == 0, there were no matches
+ *     if (offset == 0) {
+ *         // handle the case of no matches
+ *     }
+ *
+ *     bms_close(bms);
+ *     ucd_close(ucd);
+ *
+ *     // UCD objects are cached, so the call to
+ *     // ucd_close doesn't delete the object.
+ *     // Call this if you don't need the object any more.
+ *     ucd_flushCache();
+ * }
+ * </pre>
+ *
+ * NOTE: This is a technology preview. The final version of this API may not bear any resenblence to this API.
+ *
+ * Knows linitations:
+ *   1) Backwards searching has not been implemented.
+ *
+ *   2) For Han and Hangul characters, this code ignores any Collation tailorings. In general,
+ *      this isn't a problem, but in Korean locals, at strength 1, Hangul characters are tailored
+ *      to be equal to Han characters with the same pronounciation. Because this code ignroes
+ *      tailorings, searching for a Hangul character will not find a Han character and visa-versa.
+ *
+ *   3) In some cases, searching for a pattern that needs to be normalized and ends
+ *      in a discontiguous contraction may fail. The only known cases of this are with
+ *      the Tibetan script. For example searching for the pattern
+ *      "\u0F7F\u0F80\u0F81\u0F82\u0F83\u0F84\u0F85" will fail. (This case is artificial. We've
+ *      been unable to find a pratical, real-world example of this failure.)  
+ *
+ * NOTE: This is a technology preview. The final version of this API may not bear any resenblence to this API.
+ *
+ * @internal ICU 4.0.1 technology preview
+ */
+struct BMS;
+typedef struct BMS BMS;
+
+/**
+ * Construct a <code>MBS</code> object.
+ *
+ * @param ucd - A <code>UCD</code> object holding the Collator-sensitive data
+ * @param pattern - the string for which to search
+ * @param latternLength - the length of the string for which to search
+ * @param target - the string in which to search
+ * @param targetLength - the length of the string in which to search
+ * @param status - will be set if any errors occur. 
+ *
+ * @return the <code>BMS</code> object.
+ *
+ * Note: if on return status is set to an error, the only safe
+ * thing to do with the returned object is to call
+ * <code>bms_close</code>.
+ *
+ * @internal ICU 4.0.1 technology preview
+ */
+U_CAPI BMS * U_EXPORT2
+bms_open(UCD *ucd,
+         const UChar *pattern, int32_t patternLength,
+         const UChar *target,  int32_t targetLength,
+         UErrorCode  *status);
+
+/**
+ * Close a <code>BMS</code> object and release all the
+ * storage associated with it.
+ *
+ * @param bms - the <code>BMS</code> object to close.
+ */
+U_CAPI void U_EXPORT2
+bms_close(BMS *bms);
+
+/**
+ * Test the pattern to see if it generates any CEs.
+ *
+ * @return <code>TRUE</code> if the pattern string did not generate any CEs
+ *
+ * @internal ICU 4.0.1 technology preview
+ */
+U_CAPI UBool U_EXPORT2
+bms_empty(BMS *bms);
+
+/**
+ * Get the <code>UCD</code> object used to create
+ * a given <code>BMS</code> object.
+ *
+ * @param bms - the <code>BMS</code> object
+ *
+ * @return - the <code>UCD</code> object used to create
+ *           the given <code>BMS</code> object.
+ *
+ * @internal ICU 4.0.1 technology preview
+ */
+U_CAPI UCD * U_EXPORT2
+bms_getData(BMS *bms);
+
+/**
+ * Search for the pattern string in the target string.
+ *
+ * @param offset - the offset in the target string at which to begin the search
+ * @param start - will be set to the starting offset of the match, or -1 if there's no match
+ * @param end - will be set to the ending offset of the match, or -1 if there's no match
+ *
+ * @return <code>TRUE</code> if the match succeeds, <code>FALSE</code> otherwise.
+ *
+ * @internal ICU 4.0.1 technology preview
+ */
+U_CAPI UBool U_EXPORT2
+bms_search(BMS *bms, int32_t offset, int32_t *start, int32_t *end);
+
+/**
+ * Set the target string for the match.
+ *
+ * @param target - the new target string
+ * @param targetLength - the length of the new target string
+ * @param status - will be set if any errors occur. 
+ *
+ * @internal ICU 4.0.1 technology preview
+ */
+U_CAPI void U_EXPORT2
+bms_setTargetString(BMS *bms, const UChar *target, int32_t targetLength, UErrorCode *status);
+
+#endif /* _BMS_H */
diff --git a/icu4c/source/i18n/unicode/bmsearch.h b/icu4c/source/i18n/unicode/bmsearch.h
new file mode 100644
index 0000000000..d02e289b6f
--- /dev/null
+++ b/icu4c/source/i18n/unicode/bmsearch.h
@@ -0,0 +1,221 @@
+/*
+ ******************************************************************************
+ *   Copyright (C) 1996-2009, International Business Machines                 *
+ *   Corporation and others.  All Rights Reserved.                            *
+ ******************************************************************************
+ */
+
+/**
+ * \file 
+ * \brief C++ API: Boyer-Moore StringSearch technology preview
+ * \internal ICU 4.0.1 technology preview
+ */
+ 
+#ifndef B_M_SEARCH_H
+#define B_M_SEARCH_H
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_COLLATION
+
+#include "unicode/uobject.h"
+#include "unicode/ucol.h"
+
+#include "unicode/colldata.h"
+
+U_NAMESPACE_BEGIN
+
+class BadCharacterTable;
+class GoodSuffixTable;
+class Target;
+
+/**
+ * BoyerMooreSearch
+ *
+ * This object holds the information needed to do a Collation sensitive Boyer-Moore search. It encapulates
+ * the pattern, the "bad character" and "good suffix" tables, the Collator-based data needed to compute them,
+ * and a reference to the text being searched.
+ *
+ * To do a search, you fist need to get a <code>CollData</code> object by calling <code>CollData::open</code>.
+ * Then you construct a <code>BoyerMooreSearch</code> object from the <code>CollData</code> object, the pattern
+ * string and the target string. Then you call the <code>search</code> method. Here's a code sample:
+ *
+ * <pre>
+ * void boyerMooreExample(UCollator *collator, UnicodeString *pattern, UnicodeString *target)
+ * {
+ *     UErrorCode status = U_ZERO_ERROR;
+ *     CollData *collData = CollData::open(collator, status);
+ *
+ *     if (U_FAILURE(status)) {
+ *         // could not create a CollData object
+ *         return;
+ *     }
+ *
+ *     BoyerMooreSearch *search = new BoyerMooreSearch(collData, *patternString, target, status);
+ *
+ *     if (U_FAILURE(status)) {
+ *         // could not create a BoyerMooreSearch object
+ *         CollData::close(collData);
+ *         return;
+ *     }
+ *
+ *     int32_t offset = 0, start = -1, end = -1;
+ *
+ *     // Find all matches
+ *     while (search->search(offset, start, end)) {
+ *         // process the match between start and end
+ *         ...
+ *         // advance past the match
+ *         offset = end; 
+ *     }
+ *
+ *     // at this point, if offset == 0, there were no matches
+ *     if (offset == 0) {
+ *         // handle the case of no matches
+ *     }
+ *
+ *     delete search;
+ *     CollData::close(collData);
+ *
+ *     // CollData objects are cached, so the call to
+ *     // CollData::close doesn't delete the object.
+ *     // Call this if you don't need the object any more.
+ *     CollData::flushCollDataCache();
+ * }
+ * </pre>
+ *
+ * NOTE: This is a technology preview. The final version of this API may not bear any resenblence to this API.
+ *
+ * Knows linitations:
+ *   1) Backwards searching has not been implemented.
+ *
+ *   2) For Han and Hangul characters, this code ignores any Collation tailorings. In general,
+ *      this isn't a problem, but in Korean locals, at strength 1, Hangul characters are tailored
+ *      to be equal to Han characters with the same pronounciation. Because this code ignroes
+ *      tailorings, searching for a Hangul character will not find a Han character and visa-versa.
+ *
+ *   3) In some cases, searching for a pattern that needs to be normalized and ends
+ *      in a discontiguous contraction may fail. The only known cases of this are with
+ *      the Tibetan script. For example searching for the pattern
+ *      "\u0F7F\u0F80\u0F81\u0F82\u0F83\u0F84\u0F85" will fail. (This case is artificial. We've
+ *      been unable to find a pratical, real-world example of this failure.)  
+ *
+ * @internal ICU 4.0.1 technology preview
+ *
+ * @see CollData
+ */
+class U_I18N_API BoyerMooreSearch : public UObject
+{
+public:
+    /**
+     * Construct a <code>BoyerMooreSearch</code> object.
+     *
+     * @param theData - A <code>CollData</code> object holding the Collator-sensitive data
+     * @param patternString - the string for which to search
+     * @param targetString - the string in which to search or <code>NULL</code> if youu will
+     *                       set it later by calling <code>setTargetString</code>.
+     * @param status - will be set if any errors occur. 
+     *
+     * Note: if on return, status is set to an error code,
+     * the only safe thing to do with this object is to call
+     * the destructor.
+     *
+     * @internal ICU 4.0.1 technology preview
+     */
+    BoyerMooreSearch(CollData *theData, const UnicodeString &patternString, const UnicodeString *targetString, UErrorCode &status);
+
+    /**
+     * The desstructor
+     *
+     * @internal ICU 4.0.1 technology preview
+     */
+    ~BoyerMooreSearch();
+
+    /**
+     * Test the pattern to see if it generates any CEs.
+     *
+     * @return <code>TRUE</code> if the pattern string did not generate any CEs
+     *
+     * @internal ICU 4.0.1 technology preview
+     */
+    UBool empty();
+
+    /**
+     * Search for the pattern string in the target string.
+     *
+     * @param offset - the offset in the target string at which to begin the search
+     * @param start - will be set to the starting offset of the match, or -1 if there's no match
+     * @param end - will be set to the ending offset of the match, or -1 if there's no match
+     *
+     * @return <code>TRUE</code> if the match succeeds, <code>FALSE</code> otherwise.
+     *
+     * @internal ICU 4.0.1 technology preview
+     */
+    UBool search(int32_t offset, int32_t &start, int32_t &end);
+
+    /**
+     * Set the target string for the match.
+     *
+     * @param targetString - the new target string
+     * @param status - will be set if any errors occur. 
+     *
+     * @internal ICU 4.0.1 technology preview
+     */
+    void setTargetString(const UnicodeString *targetString, UErrorCode &status);
+
+    // **** no longer need these? ****
+    /**
+     * Return the <code>CollData</code> object used for searching
+     *
+     * @return the <code>CollData</code> object used for searching
+     *
+     * @internal ICU 4.0.1 technology preview
+     */
+    CollData *getData();
+
+    /**
+     * Return the CEs generated by the pattern string.
+     *
+     * @return a <code>CEList</code> object holding the CEs generated by the pattern string.
+     *
+     * @internal ICU 4.0.1 technology preview
+     */
+    CEList   *getPatternCEs();
+
+    /**
+     * Return the <code>BadCharacterTable</code> object computed for the pattern string.
+     *
+     * @return the <code>BadCharacterTable</code> object.
+     *
+     * @internal ICU 4.0.1 technology preview
+     */
+    BadCharacterTable *getBadCharacterTable();
+
+    /**
+     * Return the <code>GoodSuffixTable</code> object computed for the pattern string.
+     *
+     * @return the <code>GoodSuffixTable</code> object computed for the pattern string.
+     *
+     * @internal ICU 4.0.1 technology preview
+     */
+    GoodSuffixTable   *getGoodSuffixTable();
+
+    /*
+     * UObject glue...
+     */
+    virtual UClassID getDynamicClassID() const;
+    static UClassID getStaticClassID();
+    
+private:
+    CollData *data;
+    CEList *patCEs;
+    BadCharacterTable *badCharacterTable;
+    GoodSuffixTable   *goodSuffixTable;
+    UnicodeString pattern;
+    Target *target;
+};
+
+U_NAMESPACE_END
+
+#endif // #if !UCONFIG_NO_COLLATION
+#endif // #ifndef B_M_SEARCH_H
diff --git a/icu4c/source/i18n/unicode/colldata.h b/icu4c/source/i18n/unicode/colldata.h
new file mode 100644
index 0000000000..ce0c0e150b
--- /dev/null
+++ b/icu4c/source/i18n/unicode/colldata.h
@@ -0,0 +1,430 @@
+/*
+ ******************************************************************************
+ *   Copyright (C) 1996-2009, International Business Machines                 *
+ *   Corporation and others.  All Rights Reserved.                            *
+ ******************************************************************************
+ */
+
+/**
+ * \file 
+ * \brief C++ API: Collation data used to compute minLengthInChars.
+ * \internal
+ */
+ 
+#ifndef COLL_DATA_H
+#define COLL_DATA_H
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_COLLATION
+
+#include "unicode/uobject.h"
+#include "unicode/ucol.h"
+
+U_NAMESPACE_BEGIN
+
+/*
+ * The size of the internal buffer for the Collator's short description string.
+ */
+#define KEY_BUFFER_SIZE 64
+
+ /*
+  * The size of the internal CE buffer in a <code>CEList</code> object
+  */
+#define CELIST_BUFFER_SIZE 4
+
+/*
+ * Define this to enable the <code>CEList</code> objects to collect
+ * statistics.
+ */
+//#define INSTRUMENT_CELIST
+
+ /*
+  * The size of the initial list in a <code>StringList</code> object.
+  */
+#define STRING_LIST_BUFFER_SIZE 16
+
+/*
+ * Define this to enable the <code>StringList</code> objects to
+ * collect statistics.
+ */
+//#define INSTRUMENT_STRING_LIST
+
+ /**
+  * CEList
+  *
+  * This object holds a list of CEs generated from a particular
+  * <code>UnicodeString</code>
+  *
+  * @internal ICU 4.0.1 technology preview
+  */
+class U_I18N_API CEList : public UObject
+{
+public:
+    /**
+     * Construct a <code>CEList</code> object.
+     *
+     * @param coll - the Collator used to collect the CEs.
+     * @param string - the string for which to collect the CEs.
+     * @param status - will be set if any errors occur. 
+     *
+     * Note: if on return, status is set to an error code,
+     * the only safe thing to do with this object is to call
+     * the destructor.
+     *
+     * @internal ICU 4.0.1 technology preview
+     */
+    CEList(UCollator *coll, const UnicodeString &string, UErrorCode &status);
+
+    /**
+     * The destructor.
+     */
+    ~CEList();
+
+    /**
+     * Return the number of CEs in the list.
+     *
+     * @return the number of CEs in the list.
+     *
+     * @internal ICU 4.0.1 technology preview
+     */
+    int32_t size() const;
+
+    /**
+     * Get a particular CE from the list.
+     *
+     * @param index - the index of the CE to return
+     *
+     * @return the CE, or <code>0</code> if <code>index</code> is out of range
+     *
+     * @internal ICU 4.0.1 technology preview
+     */
+    uint32_t get(int32_t index) const;
+
+    /**
+     * Check if the CEs in another <code>CEList</code> match the
+     * suffix of this list starting at a give offset.
+     *
+     * @param offsset - the offset of the suffix
+     * @param other - the other <code>CEList</code>
+     *
+     * @return <code>TRUE</code> if the CEs match, <code>FALSE</code> otherwise.
+     *
+     * @internal ICU 4.0.1 technology preview
+     */
+    UBool matchesAt(int32_t offset, const CEList *other) const; 
+
+    /**
+     * The index operator.
+     *
+     * @param index - the index
+     *
+     * @return a reference to the given CE in the list
+     *
+     * @internal ICU 4.0.1 technology preview
+     */
+    uint32_t &operator[](int32_t index) const;
+
+    /*
+     * UObject glue...
+     */
+    virtual UClassID getDynamicClassID() const;
+    static UClassID getStaticClassID();
+
+private:
+    void add(uint32_t ce, UErrorCode &status);
+
+    uint32_t ceBuffer[CELIST_BUFFER_SIZE];
+    uint32_t *ces;
+    int32_t listMax;
+    int32_t listSize;
+
+#ifdef INSTRUMENT_CELIST
+    static int32_t _active;
+    static int32_t _histogram[10];
+#endif
+};
+
+/**
+ * StringList
+ *
+ * This object holds a list of <code>UnicodeString</code> objects.
+ *
+ * @internal ICU 4.0.1 technology preview
+ */
+class U_I18N_API StringList : public UObject
+{
+public:
+    /**
+     * Construct an empty <code>StringList</code>
+     *
+     * @param status - will be set if any errors occur. 
+     *
+     * Note: if on return, status is set to an error code,
+     * the only safe thing to do with this object is to call
+     * the destructor.
+     *
+     * @internal ICU 4.0.1 technology preview
+     */
+    StringList(UErrorCode &status);
+
+    /**
+     * The destructor.
+     *
+     * @internal ICU 4.0.1 technology preview
+     */
+    ~StringList();
+
+    /**
+     * Add a string to the list.
+     *
+     * @param string - the string to add
+     * @param status - will be set if any errors occur. 
+     *
+     * @internal ICU 4.0.1 technology preview
+     */
+    void add(const UnicodeString *string, UErrorCode &status);
+
+    /**
+     * Add an array of Unicode code points to the list.
+     *
+     * @param chars - the address of the array of code points
+     * @param count - the number of code points in the array
+     * @param status - will be set if any errors occur. 
+     *
+     * @internal ICU 4.0.1 technology preview
+     */
+    void add(const UChar *chars, int32_t count, UErrorCode &status);
+
+    /**
+     * Get a particular string from the list.
+     *
+     * @param index - the index of the string
+     *
+     * @return a pointer to the <code>UnicodeString</code> or <code>NULL</code> 
+     *         if <code>index</code> is out of bounds.
+     *
+     * @internal ICU 4.0.1 technology preview
+     */
+    const UnicodeString *get(int32_t index) const;
+
+    /**
+     * Get the number of stings in the list.
+     *
+     * @return the number of strings in the list.
+     *
+     * @internal ICU 4.0.1 technology preview
+     */
+    int32_t size() const;
+
+    /*
+     * the UObject glue...
+     */
+    virtual UClassID getDynamicClassID() const;
+    static UClassID getStaticClassID();
+
+private:
+    UnicodeString *strings;
+    int32_t listMax;
+    int32_t listSize;
+
+#ifdef INSTRUMENT_STRING_LIST
+    static int32_t _lists;
+    static int32_t _strings;
+    static int32_t _histogram[101];
+#endif
+};
+
+/*
+ * Forward references to internal classes.
+ */
+class StringToCEsMap;
+class CEToStringsMap;
+class CollDataCache;
+
+/**
+ * CollData
+ *
+ * This class holds the Collator-specific data needed to
+ * compute the length of the shortest string that can
+ * generate a partcular list of CEs.
+ *
+ * <code>CollData</code> objects are quite expensive to compute. Because
+ * of this, they are cached. When you call <code>CollData::open</code> it
+ * returns a reference counted cached object. When you call <code>CollData::close</code>
+ * the reference count on the object is decremented but the object is not deleted.
+ *
+ * If you do not need to reuse any unreferenced objects in the cache, you can call
+ * <code>CollData::flushCollDataCache</code>. If you no longer need any <code>CollData</code>
+ * objects, you can call <code>CollData::freeCollDataCache</code>
+ *
+ * @internal ICU 4.0.1 technology preview
+ */
+class U_I18N_API CollData : public UObject
+{
+public:
+    /**
+     * Construct a <code>CollData</code> object.
+     *
+     * @param collator - the collator
+     * @param status - will be set if any errors occur. 
+     *
+     * @return the <code>CollData</code> object. You must call
+     *         <code>close</code> when you are done using the object.
+     *
+     * Note: if on return, status is set to an error code,
+     * the only safe thing to do with this object is to call
+     * <code>CollData::close</code>.
+     *
+     * @internal ICU 4.0.1 technology preview
+     */
+    static CollData *open(UCollator *collator, UErrorCode &status);
+
+    /**
+     * Release a <code>CollData</code> object.
+     *
+     * @param collData - the object
+     *
+     * @internal ICU 4.0.1 technology preview
+     */
+    static void close(CollData *collData);
+
+    /**
+     * Get the <code>UCollator</code> object used to create this object.
+     * The object returned may not be the exact object that was used to
+     * create this object, but it will have the same behavior.
+     */
+    UCollator *getCollator() const;
+
+    /**
+     * Get a list of all the strings which generate a list
+     * of CEs starting with a given CE.
+     *
+     * @param ce - the CE
+     *
+     * return a <code>StringList</code> object containing all
+     *        the stirngs, or <code>NULL</code> if there are
+     *        no such strings.
+     *
+     * @internal ICU 4.0.1 technology preview.
+     */
+    const StringList *getStringList(int32_t ce) const;
+
+    /**
+     * Get a list of the CEs generated by a partcular stirng.
+     *
+     * @param string - the string
+     *
+     * @return a <code>CEList</code> object containt the CEs. You
+     *         must call <code>freeCEList</code> when you are finished
+     *         using the <code>CEList</code>/
+     *
+     * @internal ICU 4.0.1 technology preview.
+     */
+    const CEList *getCEList(const UnicodeString *string) const;
+
+    /**
+     * Release a <code>CEList</code> returned by <code>getCEList</code>.
+     *
+     * @param list - the <CEList> to free.
+     *
+     * @internal ICU 4.0.1 technology preview
+     */
+    void freeCEList(const CEList *list);
+
+    /**
+     * Return the length of the shortest string that will generate
+     * the given list of CEs.
+     *
+     * @param ces - the CEs
+     * @param offset - the offset of the first CE in the list to use.
+     *
+     * @return the length of the shortest string.
+     *
+     * @internal ICU 4.0.1 technology preview
+     */
+    int32_t minLengthInChars(const CEList *ces, int32_t offset) const;
+
+ 
+    /**
+     * Return the length of the shortest string that will generate
+     * the given list of CEs.
+     *
+     * Note: the algorithm used to do this computation is recursive. To
+     * limit the amount of recursion, a "history" list is used to record
+     * the best answer starting at a particular offset in the list of CEs.
+     * If the same offset is visited again during the recursion, the answer
+     * in the history list is used.
+     *
+     * @param ces - the CEs
+     * @param offset - the offset of the first CE in the list to use.
+     * param history - the history list. Must be at least as long as
+     *                 the number of cEs in the <code>CEList</code>
+     *
+     * @return the length of the shortest string.
+     *
+     * @internal ICU 4.0.1 technology preview
+     */
+   int32_t minLengthInChars(const CEList *ces, int32_t offset, int32_t *history) const;
+
+   /*
+    * UObject glue...
+    */
+    virtual UClassID getDynamicClassID() const;
+    static UClassID getStaticClassID();
+
+    /**
+     * <code>CollData</code> objects are expensive to compute, and so
+     * may be cached. This routine will free the cached objects and delete
+     * the cache.
+     *
+     * WARNING: Don't call this until you are have called <code>close</code>
+     * for each <code>CollData</code> object that you have used. also,
+     * DO NOT call this if another thread may be calling <code>flushCollDataCache</code>
+     * at the same time.
+     *
+     * @internal 4.0.1 technology preview
+     */
+    static void freeCollDataCache();
+
+    /**
+     * <code>CollData</code> objects are expensive to compute, and so
+     * may be cached. This routine will remove any unused <code>CollData</code>
+     * objects from the cache.
+     *
+     * @internal 4.0.1 technology preview
+     */
+    static void flushCollDataCache();
+
+private:
+    friend class CollDataCache;
+    friend class CollDataCacheEntry;
+
+    CollData(UCollator *collator, char *cacheKey, int32_t cachekeyLength, UErrorCode &status);
+    ~CollData();
+
+    CollData();
+
+    static char *getCollatorKey(UCollator *collator, char *buffer, int32_t bufferLength);
+
+    static CollDataCache *getCollDataCache();
+
+    UCollator      *coll;
+    StringToCEsMap *charsToCEList;
+    CEToStringsMap *ceToCharsStartingWith;
+
+    char keyBuffer[KEY_BUFFER_SIZE];
+    char *key;
+
+    static CollDataCache *collDataCache;
+
+    uint32_t minHan;
+    uint32_t maxHan;
+
+    uint32_t jamoLimits[4];
+};
+
+U_NAMESPACE_END
+
+#endif // #if !UCONFIG_NO_COLLATION
+#endif // #ifndef COLL_DATA_H
diff --git a/icu4c/source/i18n/unicode/ucoleitr.h b/icu4c/source/i18n/unicode/ucoleitr.h
index 9c951a9ab8..419cb9f7de 100644
--- a/icu4c/source/i18n/unicode/ucoleitr.h
+++ b/icu4c/source/i18n/unicode/ucoleitr.h
@@ -1,6 +1,6 @@
 /*
 *******************************************************************************
-*   Copyright (C) 2001-2008, International Business Machines
+*   Copyright (C) 2001-2009, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *******************************************************************************
 *
@@ -121,6 +121,7 @@ ucol_openElements(const UCollator  *coll,
                         int32_t    textLength,
                         UErrorCode *status);
 
+
 /**
  * get a hash code for a key... Not very useful!
  * @param key    the given key.
@@ -152,6 +153,20 @@ ucol_closeElements(UCollationElements *elems);
 U_STABLE void U_EXPORT2 
 ucol_reset(UCollationElements *elems);
 
+/**
+ * Set the collation elements to use implicit ordering for Han
+ * even if they've been tailored. This will also force Hangul
+ * syllables to be ordered by decomposing them to their component
+ * Jamo.
+ *
+ * @param elems The UCollationElements containing the text.
+ * @param status A pointer to a UErrorCode to reveive any errors.
+ *
+ * @internal
+ */
+U_INTERNAL void U_EXPORT2
+ucol_forceHanImplicit(UCollationElements *elems, UErrorCode *status);
+
 /**
  * Get the ordering priority of the next collation element in the text.
  * A single character may contain more than one collation element.
diff --git a/icu4c/source/i18n/usearch.cpp b/icu4c/source/i18n/usearch.cpp
index 6078cddc2b..3a446534d9 100644
--- a/icu4c/source/i18n/usearch.cpp
+++ b/icu4c/source/i18n/usearch.cpp
@@ -1,6 +1,6 @@
 /*
 **********************************************************************
-*   Copyright (C) 2001-2008 IBM and others. All rights reserved.
+*   Copyright (C) 2001-2009 IBM and others. All rights reserved.
 **********************************************************************
 *   Date        Name        Description
 *  07/02/2001   synwee      Creation.
@@ -3785,7 +3785,7 @@ U_CAPI UBool U_EXPORT2 usearch_search(UStringSearch  *strsrch,
         found = TRUE;
         //  Inner loop checks for a match beginning at each
         //  position from the outer loop.
-        for (patIx=0; patIx<strsrch->pattern.CELength; patIx++) {
+        for (patIx=0; patIx<strsrch->pattern.PCELength; patIx++) {
             int64_t patCE = strsrch->pattern.PCE[patIx];
             targetCEI = ceb.get(targetIx+patIx);
             //  Compare CE from target string with CE from the pattern.
@@ -3814,11 +3814,9 @@ U_CAPI UBool U_EXPORT2 usearch_search(UStringSearch  *strsrch,
         //     an acceptable character range.
         //
         const CEI *firstCEI = ceb.get(targetIx);
-        const CEI *lastCEI  = ceb.get(targetIx + strsrch->pattern.CELength - 1);
-        const CEI *nextCEI  = ceb.get(targetIx + strsrch->pattern.CELength);
+        const CEI *lastCEI  = ceb.get(targetIx + strsrch->pattern.PCELength - 1);
+        const CEI *nextCEI  = ceb.get(targetIx + strsrch->pattern.PCELength);
 
-     // targetCEI = ceb.get(targetIx+strsrch->pattern.CELength);
-     // maxLimit = targetCEI->lowIndex;
         mStart   = firstCEI->lowIndex;
         minLimit = lastCEI->lowIndex;
         maxLimit = nextCEI->lowIndex;
@@ -3883,7 +3881,7 @@ U_CAPI UBool U_EXPORT2 usearch_search(UStringSearch  *strsrch,
             found = FALSE;
         }
 
-        if (!checkIdentical(strsrch, mStart, mLimit)) {
+        if (! checkIdentical(strsrch, mStart, mLimit)) {
             found = FALSE;
         }
 
@@ -4006,10 +4004,10 @@ U_CAPI UBool U_EXPORT2 usearch_searchBackwards(UStringSearch  *strsrch,
         found = TRUE;
         //  Inner loop checks for a match beginning at each
         //  position from the outer loop.
-        for (patIx = strsrch->pattern.CELength - 1; patIx >= 0; patIx -= 1) {
+        for (patIx = strsrch->pattern.PCELength - 1; patIx >= 0; patIx -= 1) {
             int64_t patCE = strsrch->pattern.PCE[patIx];
 
-            targetCEI = ceb.getPrevious(targetIx + strsrch->pattern.CELength - 1 - patIx);
+            targetCEI = ceb.getPrevious(targetIx + strsrch->pattern.PCELength - 1 - patIx);
             //  Compare CE from target string with CE from the pattern.
             //    Note that the target CE will be UCOL_NULLORDER if we reach the end of input,
             //    which will fail the compare, below.
@@ -4035,7 +4033,7 @@ U_CAPI UBool U_EXPORT2 usearch_searchBackwards(UStringSearch  *strsrch,
         //  There still is a chance of match failure if the CE range not correspond to
         //     an acceptable character range.
         //
-        const CEI *firstCEI = ceb.getPrevious(targetIx + strsrch->pattern.CELength - 1);
+        const CEI *firstCEI = ceb.getPrevious(targetIx + strsrch->pattern.PCELength - 1);
         const CEI *lastCEI  = ceb.getPrevious(targetIx);
         const CEI *nextCEI  = targetIx > 0? ceb.getPrevious(targetIx - 1) : NULL;
 
@@ -4102,6 +4100,10 @@ U_CAPI UBool U_EXPORT2 usearch_searchBackwards(UStringSearch  *strsrch,
             found = FALSE;
         }
 
+        if (! checkIdentical(strsrch, mStart, mLimit)) {
+            found = FALSE;
+        }
+
         if (found) {
             break;
         }
diff --git a/icu4c/source/test/cintltst/callcoll.c b/icu4c/source/test/cintltst/callcoll.c
index 4bf383feef..cf76b28fb5 100644
--- a/icu4c/source/test/cintltst/callcoll.c
+++ b/icu4c/source/test/cintltst/callcoll.c
@@ -1,6 +1,6 @@
 /********************************************************************
  * COPYRIGHT: 
- * Copyright (c) 1997-2008, International Business Machines Corporation and
+ * Copyright (c) 1997-2009, International Business Machines Corporation and
  * others. All Rights Reserved.
  ********************************************************************/
 /*******************************************************************************
@@ -515,7 +515,7 @@ backAndForth(UCollationElements *iter)
           }
 
           if (o != orders[index].order) {
-              log_err("Mismatched order at index %d: 0x%0:8X vs. 0x%0:8X\n", index,
+              log_err("Mismatched order at index %d: 0x%8.8X vs. 0x%8.8X\n", index,
                 orders[index].order, o);
             goto bail;
           }
diff --git a/icu4c/source/test/intltest/ssearch.cpp b/icu4c/source/test/intltest/ssearch.cpp
index 3eab390d60..a0e1c29722 100644
--- a/icu4c/source/test/intltest/ssearch.cpp
+++ b/icu4c/source/test/intltest/ssearch.cpp
@@ -1,6 +1,6 @@
 /*
  **********************************************************************
- *   Copyright (C) 2005-2008, International Business Machines
+ *   Copyright (C) 2005-2009, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  **********************************************************************
  */
@@ -32,7 +32,12 @@
 #include "intltest.h"
 #include "ssearch.h"
 
+#include "unicode/colldata.h"
+#include "unicode/bmsearch.h"
+#include "unicode/bms.h"
+
 #include "xmlparser.h"
+#include "ucbuf.h"
 
 #include <stdlib.h>
 #include <string.h>
@@ -51,6 +56,8 @@ char testId[100];
           __FILE__, __LINE__, testId, u_errorName(errcode));}}
 
 #define ARRAY_SIZE(array) (sizeof array / sizeof array[0])
+#define NEW_ARRAY(type, count) (type *) uprv_malloc((count) * sizeof(type))
+#define DELETE_ARRAY(array) uprv_free((void *) (array))
 
 //---------------------------------------------------------------------------
 //
@@ -81,6 +88,34 @@ void SSearchTest::runIndexedTest( int32_t index, UBool exec, const char* &name,
         case 2: name = "monkeyTest";
             if (exec) monkeyTest(params);
             break;
+
+        case 3: name = "bmMonkeyTest";
+            if (exec) bmMonkeyTest(params);
+            break;
+
+        case 4: name = "boyerMooreTest";
+            if (exec) boyerMooreTest();
+            break;
+
+        case 5: name = "goodSuffixTest";
+            if (exec) goodSuffixTest();
+            break;
+
+        case 6: name = "searchTime";
+            if (exec) searchTime();
+            break;
+
+        case 7: name = "bmsTest";
+            if (exec) bmsTest();
+            break;
+
+        case 8: name = "bmSearchTest";
+            if (exec) bmSearchTest();
+            break;
+
+        case 9: name = "udhrTest";
+            if (exec) udhrTest();
+            break;
 #endif
         default: name = "";
             break; //needed to end loop
@@ -181,6 +216,16 @@ void SSearchTest::searchTest()
             normalize = UCOL_ON;
         }
 
+        //
+        // Get the alternate_handling flag. Default is UCOL_NON_IGNORABLE.
+        //
+        UColAttributeValue alternateHandling = UCOL_NON_IGNORABLE;
+        const UnicodeString *alt = testCase->getAttribute("alternate_handling");
+        TEST_ASSERT (alt == NULL || *alt == "SHIFTED" || *alt == "NON_IGNORABLE");
+        if (alt != NULL && *alt == "SHIFTED") {
+            alternateHandling = UCOL_SHIFTED;
+        }
+
         const UnicodeString defLocale("en");
         char  clocale[100];
         const UnicodeString *locale   = testCase->getAttribute("locale");
@@ -196,7 +241,7 @@ void SSearchTest::searchTest()
         int32_t        expectedMatchStart = -1;
         int32_t        expectedMatchLimit = -1;
         const UXMLElement  *n;
-        int                nodeCount = 0;
+        int32_t                nodeCount = 0;
 
         n = testCase->getChildElement("pattern");
         TEST_ASSERT(n != NULL);
@@ -237,13 +282,14 @@ void SSearchTest::searchTest()
         //  Check that there weren't extra things in the XML
         TEST_ASSERT(nodeCount == testCase->countChildren());
 
-        // Open a collotor and StringSearch based on the parameters
+        // Open a collator and StringSearch based on the parameters
         //   obtained from the XML.
         //
         status = U_ZERO_ERROR;
         UCollator *collator = ucol_open(clocale, &status);
         ucol_setStrength(collator, collatorStrength);
         ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, normalize, &status);
+        ucol_setAttribute(collator, UCOL_ALTERNATE_HANDLING, alternateHandling, &status);
         UStringSearch *uss = usearch_openFromCollator(pattern.getBuffer(), pattern.length(),
                                          target.getBuffer(), target.length(),
                                          collator,
@@ -315,6 +361,323 @@ void SSearchTest::searchTest()
 #endif
 }
 
+struct UdhrTestCase
+{
+    char *locale;
+    char *file;
+};
+
+void SSearchTest::udhrTest()
+{
+    UErrorCode status = U_ZERO_ERROR;
+    char path[PATH_BUFFER_SIZE];
+    const char *udhrPath = getPath(path, "udhr");
+
+    if (udhrPath == NULL) {
+        // couldn't get path: error message already output...
+        return;
+    }
+
+    UdhrTestCase testCases[] = {
+        {"en", "udhr_eng.txt"},
+        {"de", "udhr_deu_1996.txt"},
+        {"fr", "udhr_fra.txt"},
+        {"ru", "udhr_rus.txt"},
+        {"th", "udhr_tha.txt"},
+        {"ja", "udhr_jpn.txt"},
+        {"ko", "udhr_kor.txt"},
+        {"zh", "udhr_cmn_hans.txt"},
+        {"zh_Hant", "udhr_cmn_hant.txt"}
+    };
+
+    int32_t testCount = ARRAY_SIZE(testCases);
+
+    for (int32_t t = 0; t < testCount; t += 1) {
+        int32_t len = 0;
+        char *resolvedFileName = NULL;
+        const char *encoding = NULL;
+        UCHARBUF *ucharBuf = NULL;
+
+        ucbuf_resolveFileName(udhrPath, testCases[t].file, NULL, &len, &status);
+        resolvedFileName = NEW_ARRAY(char, len);
+
+        if(resolvedFileName == NULL){
+            continue;
+        }
+
+        if(status == U_BUFFER_OVERFLOW_ERROR){
+            status = U_ZERO_ERROR;
+        }
+
+        ucbuf_resolveFileName(udhrPath, testCases[t].file, resolvedFileName, &len, &status);
+        ucharBuf = ucbuf_open(resolvedFileName, &encoding, TRUE, FALSE, &status);
+
+        DELETE_ARRAY(resolvedFileName);
+
+        if(U_FAILURE(status)){
+            infoln("Could not open the input file %s. Test skipped\n", testCases[t].file);
+            continue;
+        }
+
+        int32_t targetLen = 0;
+        const UChar *target = ucbuf_getBuffer(ucharBuf, &targetLen, &status);
+
+        /* The first line of the file contains the pattern */
+        int32_t start = 0, end = 0, plen = 0;
+
+        for(end = start; ; end += 1) {
+            UChar ch = target[end];
+
+            if (ch == 0x000A || ch == 0x000D || ch == 0x2028) {
+                break;
+            }
+        }
+
+        plen = end - start;
+
+        UChar *pattern = NEW_ARRAY(UChar, plen);
+        for (int32_t i = 0; i < plen; i += 1) {
+            pattern[i] =  target[start++];
+        }
+
+        int32_t offset = 0;
+        UCollator *coll = ucol_open(testCases[t].locale, &status);
+        UCD *ucd = NULL;
+        BMS *bms = NULL;
+
+        if (U_FAILURE(status)) {
+            errln("Could not open collator for %s", testCases[t].locale);
+            goto delete_collator;
+        }
+
+        ucd = ucd_open(coll, &status);
+
+        if (U_FAILURE(status)) {
+            errln("Could not open CollData object for %s", testCases[t].locale);
+            goto delete_ucd;
+        }
+
+        bms = bms_open(ucd, pattern, plen, target, targetLen, &status);
+
+        if (U_FAILURE(status)) {
+            errln("Could not open search object for %s", testCases[t].locale);
+            goto delete_bms;
+        }
+        
+        start = end = -1;
+        while (bms_search(bms, offset, &start, &end)) {
+            offset = end;
+        }
+
+        if (offset == 0) {
+            errln("Could not find pattern - locale: %s, file: %s ", testCases[t].locale, testCases[t].file);
+        }
+
+delete_bms:
+        bms_close(bms);
+
+delete_ucd:
+        ucd_close(ucd);
+
+delete_collator:
+        ucol_close(coll);
+
+        DELETE_ARRAY(pattern);
+        ucbuf_close(ucharBuf);
+    }
+
+    ucd_flushCache();
+}
+
+void SSearchTest::bmSearchTest()
+{
+#if !UCONFIG_NO_REGULAR_EXPRESSIONS
+    UErrorCode status = U_ZERO_ERROR;
+    char path[PATH_BUFFER_SIZE];
+    const char *testFilePath = getPath(path, "ssearch.xml");
+
+    if (testFilePath == NULL) {
+        return; /* Couldn't get path: error message already output. */
+    }
+
+    UXMLParser  *parser = UXMLParser::createParser(status);
+    TEST_ASSERT_SUCCESS(status);
+    UXMLElement *root   = parser->parseFile(testFilePath, status);
+    TEST_ASSERT_SUCCESS(status);
+    if (U_FAILURE(status)) {
+        return;
+    }
+
+    const UnicodeString *debugTestCase = root->getAttribute("debug");
+    if (debugTestCase != NULL) {
+//       setenv("USEARCH_DEBUG", "1", 1);
+    }
+
+
+    const UXMLElement *testCase;
+    int32_t tc = 0;
+
+    while((testCase = root->nextChildElement(tc)) != NULL) {
+
+        if (testCase->getTagName().compare("test-case") != 0) {
+            errln("ssearch, unrecognized XML Element in test file");
+            continue;
+        }
+        const UnicodeString *id       = testCase->getAttribute("id");
+        *testId = 0;
+        if (id != NULL) {
+            id->extract(0, id->length(), testId,  sizeof(testId), US_INV);
+        }
+
+        // If debugging test case has been specified and this is not it, skip to next.
+        if (id!=NULL && debugTestCase!=NULL && *id != *debugTestCase) {
+            continue;
+        }
+        //
+        //  Get the requested collation strength.
+        //    Default is tertiary if the XML attribute is missing from the test case.
+        //
+        const UnicodeString *strength = testCase->getAttribute("strength");
+        UColAttributeValue collatorStrength;
+        if      (strength==NULL)          { collatorStrength = UCOL_TERTIARY;}
+        else if (*strength=="PRIMARY")    { collatorStrength = UCOL_PRIMARY;}
+        else if (*strength=="SECONDARY")  { collatorStrength = UCOL_SECONDARY;}
+        else if (*strength=="TERTIARY")   { collatorStrength = UCOL_TERTIARY;}
+        else if (*strength=="QUATERNARY") { collatorStrength = UCOL_QUATERNARY;}
+        else if (*strength=="IDENTICAL")  { collatorStrength = UCOL_IDENTICAL;}
+        else {
+            // Bogus value supplied for strength.  Shouldn't happen, even from
+            //  typos, if the  XML source has been validated.
+            //  This assert is a little deceiving in that strength can be
+            //   any of the allowed values, not just TERTIARY, but it will
+            //   do the job of getting the error output.
+            TEST_ASSERT(*strength=="TERTIARY")
+        }
+
+        //
+        // Get the collator normalization flag.  Default is UCOL_OFF.
+        //
+        UColAttributeValue normalize = UCOL_OFF;
+        const UnicodeString *norm = testCase->getAttribute("norm");
+        TEST_ASSERT (norm==NULL || *norm=="ON" || *norm=="OFF");
+        if (norm!=NULL && *norm=="ON") {
+            normalize = UCOL_ON;
+        }
+
+        //
+        // Get the alternate_handling flag. Default is UCOL_NON_IGNORABLE.
+        //
+        UColAttributeValue alternateHandling = UCOL_NON_IGNORABLE;
+        const UnicodeString *alt = testCase->getAttribute("alternate_handling");
+        TEST_ASSERT (alt == NULL || *alt == "SHIFTED" || *alt == "NON_IGNORABLE");
+        if (alt != NULL && *alt == "SHIFTED") {
+            alternateHandling = UCOL_SHIFTED;
+        }
+
+        const UnicodeString defLocale("en");
+        char  clocale[100];
+        const UnicodeString *locale   = testCase->getAttribute("locale");
+        if (locale == NULL || locale->length()==0) {
+            locale = &defLocale;
+        };
+        locale->extract(0, locale->length(), clocale, sizeof(clocale), NULL);
+
+
+        UnicodeString  text;
+        UnicodeString  target;
+        UnicodeString  pattern;
+        int32_t        expectedMatchStart = -1;
+        int32_t        expectedMatchLimit = -1;
+        const UXMLElement  *n;
+        int32_t                nodeCount = 0;
+
+        n = testCase->getChildElement("pattern");
+        TEST_ASSERT(n != NULL);
+        if (n==NULL) {
+            continue;
+        }
+        text = n->getText(FALSE);
+        text = text.unescape();
+        pattern.append(text);
+        nodeCount++;
+
+        n = testCase->getChildElement("pre");
+        if (n!=NULL) {
+            text = n->getText(FALSE);
+            text = text.unescape();
+            target.append(text);
+            nodeCount++;
+        }
+        
+        n = testCase->getChildElement("m");
+        if (n!=NULL) {
+            expectedMatchStart = target.length();
+            text = n->getText(FALSE);
+            text = text.unescape();
+            target.append(text);
+            expectedMatchLimit = target.length();
+            nodeCount++;
+        }
+
+        n = testCase->getChildElement("post");
+        if (n!=NULL) {
+            text = n->getText(FALSE);
+            text = text.unescape();
+            target.append(text);
+            nodeCount++;
+        }
+
+        //  Check that there weren't extra things in the XML
+        TEST_ASSERT(nodeCount == testCase->countChildren());
+
+        // Open a collator and StringSearch based on the parameters
+        //   obtained from the XML.
+        //
+        status = U_ZERO_ERROR;
+        UCollator *collator = ucol_open(clocale, &status);
+        ucol_setStrength(collator, collatorStrength);
+        ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, normalize, &status);
+        ucol_setAttribute(collator, UCOL_ALTERNATE_HANDLING, alternateHandling, &status);
+        UCD *ucd = ucd_open(collator, &status);
+        BMS *bms = bms_open(ucd, pattern.getBuffer(), pattern.length(), target.getBuffer(), target.length(), &status);
+
+        TEST_ASSERT_SUCCESS(status);
+        if (U_FAILURE(status)) {
+            bms_close(bms);
+            ucd_close(ucd);
+            ucol_close(collator);
+            continue;
+        }
+
+        int32_t foundStart = 0;
+        int32_t foundLimit = 0;
+        UBool   foundMatch;
+
+        //
+        // Do the search, check the match result against the expected results.
+        //
+        foundMatch = bms_search(bms, 0, &foundStart, &foundLimit);
+      //TEST_ASSERT_SUCCESS(status);
+        if (foundMatch && expectedMatchStart < 0 ||
+            foundStart != expectedMatchStart   ||
+            foundLimit != expectedMatchLimit) {
+                TEST_ASSERT(FALSE);   //  ouput generic error position
+                infoln("Found, expected match start = %d, %d \n"
+                       "Found, expected match limit = %d, %d",
+                foundStart, expectedMatchStart, foundLimit, expectedMatchLimit);
+        }
+
+        bms_close(bms);
+        ucd_close(ucd);
+        ucol_close(collator);
+    }
+
+    ucd_flushCache();
+    delete root;
+    delete parser;
+#endif
+}
+
 struct Order
 {
     int32_t order;
@@ -549,6 +912,10 @@ static char *printOrders(char *buffer, OrderList &list)
 void SSearchTest::offsetTest()
 {
     const char *test[] = {
+        // The sequence \u0FB3\u0F71\u0F71\u0F80 contains a discontiguous
+        // contraction (\u0FB3\u0F71\u0F80) logically followed by \u0F71.
+        "\\u1E33\\u0FB3\\u0F71\\u0F71\\u0F80\\uD835\\uDF6C\\u01B0",
+
         "\\ua191\\u16ef\\u2036\\u017a",
 
 #if 0
@@ -673,341 +1040,6 @@ void SSearchTest::offsetTest()
     delete col;
 }
 
-class CEList
-{
-public:
-    CEList(UCollator *coll, const UnicodeString &string);
-    ~CEList();
-
-    int32_t size() const;
-    int32_t get(int32_t index) const;
-    UBool matchesAt(int32_t offset, const CEList *other) const; 
-
-private:
-    void add(int32_t ce);
-
-    int32_t *ces;
-    int32_t listMax;
-    int32_t listSize;
-};
-
-CEList::CEList(UCollator *coll, const UnicodeString &string)
-    : ces(NULL), listMax(8), listSize(0)
-{
-    UErrorCode status = U_ZERO_ERROR;
-    UCollationElements *elems = ucol_openElements(coll, string.getBuffer(), string.length(), &status);
-    uint32_t strengthMask = 0;
-    int32_t order;
-
-#if 0
-    switch (ucol_getStrength(coll)) 
-    {
-    default:
-        strengthMask |= UCOL_TERTIARYORDERMASK;
-        /* fall through */
-
-    case UCOL_SECONDARY:
-        strengthMask |= UCOL_SECONDARYORDERMASK;
-        /* fall through */
-
-    case UCOL_PRIMARY:
-        strengthMask |= UCOL_PRIMARYORDERMASK;
-    }
-#else
-    strengthMask = UCOL_PRIMARYORDERMASK;
-#endif
-
-    ces = new int32_t[listMax];
-
-    while ((order = ucol_next(elems, &status)) != UCOL_NULLORDER) {
-        order &= strengthMask;
-
-        if (order == UCOL_IGNORABLE) {
-            continue;
-        }
-
-        add(order);
-    }
-
-    ucol_closeElements(elems);
-}
-
-CEList::~CEList()
-{
-    delete[] ces;
-}
-
-void CEList::add(int32_t ce)
-{
-    if (listSize >= listMax) {
-        listMax *= 2;
-
-        int32_t *newCEs = new int32_t[listMax];
-
-        uprv_memcpy(newCEs, ces, listSize * sizeof(int32_t));
-        delete[] ces;
-        ces = newCEs;
-    }
-
-    ces[listSize++] = ce;
-}
-
-int32_t CEList::get(int32_t index) const
-{
-    if (index >= 0 && index < listSize) {
-        return ces[index];
-    }
-
-    return -1;
-}
-
-UBool CEList::matchesAt(int32_t offset, const CEList *other) const
-{
-    if (listSize - offset < other->size()) {
-        return FALSE;
-    }
-
-    for (int32_t i = offset, j = 0; j < other->size(); i += 1, j += 1) {
-        if (ces[i] != other->get(j)) {
-            return FALSE;
-        }
-    }
-
-    return TRUE;
-}
-
-int32_t CEList::size() const
-{
-    return listSize;
-}
-
-class StringList
-{
-public:
-    StringList();
-    ~StringList();
-
-    void add(const UnicodeString *string);
-    void add(const UChar *chars, int32_t count);
-    const UnicodeString *get(int32_t index) const;
-    int32_t size() const;
-
-private:
-    UnicodeString *strings;
-    int32_t listMax;
-    int32_t listSize;
-};
-
-StringList::StringList()
-    : strings(NULL), listMax(16), listSize(0)
-{
-    strings = new UnicodeString [listMax];
-}
-
-StringList::~StringList()
-{
-    delete[] strings;
-}
-
-void StringList::add(const UnicodeString *string)
-{
-    if (listSize >= listMax) {
-        listMax *= 2;
-
-        UnicodeString *newStrings = new UnicodeString[listMax];
-
-        uprv_memcpy(newStrings, strings, listSize * sizeof(UnicodeString));
-
-        delete[] strings;
-        strings = newStrings;
-    }
-
-    // The ctor initialized all the strings in
-    // the array to empty strings, so this
-    // is the same as copying the source string.
-    strings[listSize++].append(*string);
-}
-
-void StringList::add(const UChar *chars, int32_t count)
-{
-    const UnicodeString string(chars, count);
-
-    add(&string);
-}
-
-const UnicodeString *StringList::get(int32_t index) const
-{
-    if (index >= 0 && index < listSize) {
-        return &strings[index];
-    }
-
-    return NULL;
-}
-
-int32_t StringList::size() const
-{
-    return listSize;
-}
-
-
-U_CFUNC void deleteStringList(void *obj);
-
-class CEToStringsMap
-{
-public:
-
-    CEToStringsMap();
-    ~CEToStringsMap();
-
-    void put(int32_t ce, UnicodeString *string);
-    StringList *getStringList(int32_t ce) const;
-
-private:
- 
-    void putStringList(int32_t ce, StringList *stringList);
-    UHashtable *map;
-};
-
-CEToStringsMap::CEToStringsMap()
-{
-    UErrorCode status = U_ZERO_ERROR;
-
-    map = uhash_open(uhash_hashLong, uhash_compareLong,
-                     uhash_compareCaselessUnicodeString,
-                     &status);
-
-    uhash_setValueDeleter(map, deleteStringList);
-}
-
-CEToStringsMap::~CEToStringsMap()
-{
-    uhash_close(map);
-}
-
-void CEToStringsMap::put(int32_t ce, UnicodeString *string)
-{
-    StringList *strings = getStringList(ce);
-
-    if (strings == NULL) {
-        strings = new StringList();
-        putStringList(ce, strings);
-    }
-
-    strings->add(string);
-}
-
-StringList *CEToStringsMap::getStringList(int32_t ce) const
-{
-    return (StringList *) uhash_iget(map, ce);
-}
-
-void CEToStringsMap::putStringList(int32_t ce, StringList *stringList)
-{
-    UErrorCode status = U_ZERO_ERROR;
-
-    uhash_iput(map, ce, (void *) stringList, &status);
-}
-
-U_CFUNC void deleteStringList(void *obj)
-{
-    StringList *strings = (StringList *) obj;
-
-    delete strings;
-}
-
-U_CFUNC void deleteCEList(void *obj);
-U_CFUNC void deleteUnicodeStringKey(void *obj);
-
-class StringToCEsMap
-{
-public:
-    StringToCEsMap();
-    ~StringToCEsMap();
-
-    void put(const UnicodeString *string, const CEList *ces);
-    const CEList *get(const UnicodeString *string);
-
-private:
-
-
-    UHashtable *map;
-};
-
-StringToCEsMap::StringToCEsMap()
-{
-    UErrorCode status = U_ZERO_ERROR;
-
-    map = uhash_open(uhash_hashCaselessUnicodeString,
-                     uhash_compareCaselessUnicodeString,
-                     uhash_compareLong,
-                     &status);
-
-    uhash_setValueDeleter(map, deleteCEList);
-    uhash_setKeyDeleter(map, deleteUnicodeStringKey);
-}
-
-StringToCEsMap::~StringToCEsMap()
-{
-    uhash_close(map);
-}
-
-void StringToCEsMap::put(const UnicodeString *string, const CEList *ces)
-{
-    UErrorCode status = U_ZERO_ERROR;
-
-    uhash_put(map, (void *) string, (void *) ces, &status);
-}
-
-const CEList *StringToCEsMap::get(const UnicodeString *string)
-{
-    return (const CEList *) uhash_get(map, string);
-}
-
-U_CFUNC void deleteCEList(void *obj)
-{
-    CEList *list = (CEList *) obj;
-
-    delete list;
-}
-
-U_CFUNC void deleteUnicodeStringKey(void *obj)
-{
-    UnicodeString *key = (UnicodeString *) obj;
-
-    delete key;
-}
-
-static void buildData(UCollator *coll, USet *charsToTest, StringToCEsMap *charsToCEList, CEToStringsMap *ceToCharsStartingWith)
-{
-    int32_t itemCount = uset_getItemCount(charsToTest);
-    UErrorCode status = U_ZERO_ERROR;
-
-    for(int32_t item = 0; item < itemCount; item += 1) {
-        UChar32 start = 0, end = 0;
-        UChar buffer[16];
-        int32_t len = uset_getItem(charsToTest, item, &start, &end,
-                                   buffer, 16, &status);
-
-        if (len == 0) {
-            for (UChar32 ch = start; ch <= end; ch += 1) {
-                UnicodeString *st = new UnicodeString(ch);
-                CEList *ceList = new CEList(coll, *st);
-
-                charsToCEList->put(st, ceList);
-                ceToCharsStartingWith->put(ceList->get(0), st);
-            }
-        } else if (len > 0) {
-            UnicodeString *st = new UnicodeString(buffer, len);
-            CEList *ceList = new CEList(coll, *st);
-
-            charsToCEList->put(st, ceList);
-            ceToCharsStartingWith->put(ceList->get(0), st);
-        } else {
-            // shouldn't happen...
-        }
-    }
-}
-
 static UnicodeString &escape(const UnicodeString &string, UnicodeString &buffer)
 {
     for(int32_t i = 0; i < string.length(); i += 1) {
@@ -1038,69 +1070,502 @@ static UnicodeString &escape(const UnicodeString &string, UnicodeString &buffer)
 
     return buffer;
 }
+static USet *uset_openEmpty();
+#if 1
 
-static int32_t minLengthInChars(const CEList *ceList, int32_t offset, StringToCEsMap *charsToCEList, CEToStringsMap *ceToCharsStartingWith,
-                                UnicodeString &debug)
+struct PCE
 {
-    // find out shortest string for the longest sequence of ces.
-    // needs to be refined to use dynamic programming, but will be roughly right
-	int32_t totalStringLength = 0;
-	
-    while (offset < ceList->size()) {
-        int32_t ce = ceList->get(offset);
-        int32_t bestLength = INT32_MIN;
-        const UnicodeString *bestString = NULL;
-        int32_t bestCeLength = 0;
-        const StringList *strings = ceToCharsStartingWith->getStringList(ce);
-        int32_t stringCount = strings->size();
-      
-        for (int32_t s = 0; s < stringCount; s += 1) {
-            const UnicodeString *string = strings->get(s);
-            const CEList *ceList2 = charsToCEList->get(string);
+    uint64_t ce;
+    int32_t  lowOffset;
+    int32_t  highOffset;
+};
 
-            if (ceList->matchesAt(offset, ceList2)) {
-                int32_t length = ceList2->size() - string->length();
+class PCEList
+{
+public:
+    PCEList(UCollator *coll, const UnicodeString &string);
+    ~PCEList();
 
-                if (bestLength < length) {
-                    bestLength = length;
-                    bestCeLength = ceList2->size();
-                    bestString = string;
-                }
-            }
-        }
-      
-        totalStringLength += bestString->length();
-        escape(*bestString, debug).append("/");
-        offset += bestCeLength;
-    }
+    int32_t size() const;
 
-    debug.append((UChar)0x0000);
-    return totalStringLength;
+    const PCE *get(int32_t index) const;
+
+    int32_t getLowOffset(int32_t index) const;
+    int32_t getHighOffset(int32_t index) const;
+    uint64_t getOrder(int32_t index) const;
+
+    UBool matchesAt(int32_t offset, const PCEList &other) const;
+
+    uint64_t operator[](int32_t index) const;
+
+private:
+    void add(uint64_t ce, int32_t low, int32_t high);
+
+    PCE *list;
+    int32_t listMax;
+    int32_t listSize;
+};
+
+PCEList::PCEList(UCollator *coll, const UnicodeString &string)
+{
+    UErrorCode status = U_ZERO_ERROR;
+    UCollationElements *elems = ucol_openElements(coll, string.getBuffer(), string.length(), &status);
+    uint64_t order;
+    int32_t low, high;
+
+    list = new PCE[listMax];
+
+    ucol_setOffset(elems, 0, &status);
+
+    do {
+        order = ucol_nextProcessed(elems, &low, &high, &status);
+        add(order, low, high);
+    } while (order != UCOL_PROCESSED_NULLORDER);
+
+    ucol_closeElements(elems);
 }
 
-static void minLengthTest(UCollator *coll, StringToCEsMap *charsToCEList, CEToStringsMap *ceToCharsStartingWith)
+PCEList::~PCEList()
 {
-    UnicodeString examples[] = {"fuss", "fiss", "affliss", "VII"};
-    UnicodeString debug;
-    int32_t nExamples = sizeof(examples) / sizeof(examples[0]);
+    delete[] list;
+}
 
-    for (int32_t s = 0; s < nExamples; s += 1) {
-        CEList *ceList = new CEList(coll, examples[s]);
+void PCEList::add(uint64_t order, int32_t low, int32_t high)
+{
+    if (listSize >= listMax) {
+        listMax *= 2;
 
-      //infoln("%S:", examples[s].getTerminatedBuffer());
+        PCE *newList = new PCE[listMax];
 
-        for(int32_t i = 0; i < examples[s].length(); i += 1) {
-            debug.remove();
+        uprv_memcpy(newList, list, listSize * sizeof(Order));
+        delete[] list;
+        list = newList;
+    }
 
-            int32_t minLength = minLengthInChars(ceList, i, charsToCEList, ceToCharsStartingWith, debug);
-          //infoln("\t%d\t%S", minLength, debug.getTerminatedBuffer());
+    list[listSize].ce         = order;
+    list[listSize].lowOffset  = low;
+    list[listSize].highOffset = high;
+
+    listSize += 1;
+}
+
+const PCE *PCEList::get(int32_t index) const
+{
+    if (index >= listSize) {
+        return NULL;
+    }
+
+    return &list[index];
+}
+
+int32_t PCEList::getLowOffset(int32_t index) const
+{
+    const PCE *pce = get(index);
+
+    if (pce != NULL) {
+        return pce->lowOffset;
+    }
+
+    return -1;
+}
+
+int32_t PCEList::getHighOffset(int32_t index) const
+{
+    const PCE *pce = get(index);
+
+    if (pce != NULL) {
+        return pce->highOffset;
+    }
+
+    return -1;
+}
+
+uint64_t PCEList::getOrder(int32_t index) const
+{
+    const PCE *pce = get(index);
+
+    if (pce != NULL) {
+        return pce->ce;
+    }
+
+    return UCOL_PROCESSED_NULLORDER;
+}
+
+int32_t PCEList::size() const
+{
+    return listSize;
+}
+
+UBool PCEList::matchesAt(int32_t offset, const PCEList &other) const
+{
+    // NOTE: sizes include the NULLORDER, which we don't want to compare.
+    int32_t otherSize = other.size() - 1;
+
+    if (listSize - 1 - offset < otherSize) {
+        return FALSE;
+    }
+
+    for (int32_t i = offset, j = 0; j < otherSize; i += 1, j += 1) {
+        if (getOrder(i) != other.getOrder(j)) {
+            return FALSE;
+        }
+    }
+
+    return TRUE;
+}
+
+uint64_t PCEList::operator[](int32_t index) const
+{
+    return getOrder(index);
+}
+
+void SSearchTest::boyerMooreTest()
+{
+    UErrorCode status = U_ZERO_ERROR;
+    UCollator *coll = ucol_openFromShortString("S1", FALSE, NULL, &status);
+    CollData *data = NULL;
+    UnicodeString lp  = "fuss";
+    UnicodeString sp = "fu\\u00DF";
+    BoyerMooreSearch *longPattern = NULL;
+    BoyerMooreSearch *shortPattern = NULL;
+    UnicodeString targets[]  = {"fu\\u00DF", "fu\\u00DFball", "1fu\\u00DFball", "12fu\\u00DFball", "123fu\\u00DFball", "1234fu\\u00DFball",
+                                "ffu\\u00DF", "fufu\\u00DF", "fusfu\\u00DF",
+                                "fuss", "ffuss", "fufuss", "fusfuss", "1fuss", "12fuss", "123fuss", "1234fuss", "fu\\u00DF", "1fu\\u00DF", "12fu\\u00DF", "123fu\\u00DF", "1234fu\\u00DF"};
+    int32_t start = -1, end = -1;
+
+    coll = ucol_openFromShortString("S1", FALSE, NULL, &status);
+    if (U_FAILURE(status)) {
+        errln("Could not open collator.");
+        return;
+    }
+
+    data = CollData::open(coll, status);
+    if (U_FAILURE(status)) {
+        errln("Could not open CollData object.");
+        goto close_data;
+    }
+
+
+    longPattern = new BoyerMooreSearch(data, lp.unescape(), NULL, status);
+    shortPattern = new BoyerMooreSearch(data, sp.unescape(), NULL, status);
+    if (U_FAILURE(status)) {
+        errln("Could not create pattern objects.");
+        goto close_patterns;
+    }
+
+    for (int32_t t = 0; t < (sizeof(targets)/sizeof(targets[0])); t += 1) {
+        UnicodeString target = targets[t].unescape();
+        
+        longPattern->setTargetString(&target, status);
+        if (longPattern->search(0, start, end)) {
+            logln("Test %d: found long pattern at [%d, %d].", t, start, end);
+        } else {
+            errln("Test %d: did not find long pattern.", t);
         }
 
-      //infoln();
-        delete ceList;
+        shortPattern->setTargetString(&target, status);
+        if (shortPattern->search(0, start, end)) {
+            logln("Test %d: found short pattern at [%d, %d].", t, start, end);
+        } else {
+            errln("Test %d: did not find short pattern.", t);
+        }
     }
+
+close_patterns:
+    delete shortPattern;
+    delete longPattern;
+
+close_data:
+    CollData::close(data);
+    ucol_close(coll);
 }
 
+void SSearchTest::bmsTest()
+{
+    UErrorCode status = U_ZERO_ERROR;
+    UCollator *coll = NULL;
+    UCD *data = NULL;
+    UnicodeString lp  = "fuss";
+    UnicodeString lpu = lp.unescape();
+    UnicodeString sp  = "fu\\u00DF";
+    UnicodeString spu = sp.unescape();
+    BMS *longPattern = NULL;
+    BMS *shortPattern = NULL;
+    UnicodeString targets[]  = {"fu\\u00DF", "fu\\u00DFball", "1fu\\u00DFball", "12fu\\u00DFball", "123fu\\u00DFball", "1234fu\\u00DFball",
+                                "ffu\\u00DF", "fufu\\u00DF", "fusfu\\u00DF",
+                                "fuss", "ffuss", "fufuss", "fusfuss", "1fuss", "12fuss", "123fuss", "1234fuss", "fu\\u00DF", "1fu\\u00DF", "12fu\\u00DF", "123fu\\u00DF", "1234fu\\u00DF"};
+    int32_t start = -1, end = -1;
+
+    coll = ucol_openFromShortString("S1", FALSE, NULL, &status);
+    if (U_FAILURE(status)) {
+        errln("Could not open collator.");
+        return;
+    }
+
+    data = ucd_open(coll, &status);
+    if (U_FAILURE(status)) {
+        errln("Could not open CollData object.");
+        goto close_data;
+    }
+
+    longPattern = bms_open(data, lpu.getBuffer(), lpu.length(), NULL, 0, &status);
+    shortPattern = bms_open(data, spu.getBuffer(), spu.length(), NULL, 0, &status);
+    if (U_FAILURE(status)) {
+        errln("Couldn't open pattern objects.");
+        goto close_patterns;
+    }
+
+    for (int32_t t = 0; t < (sizeof(targets)/sizeof(targets[0])); t += 1) {
+        UnicodeString target = targets[t].unescape();
+        
+        bms_setTargetString(longPattern, target.getBuffer(), target.length(), &status);
+        if (bms_search(longPattern, 0, &start, &end)) {
+            logln("Test %d: found long pattern at [%d, %d].", t, start, end);
+        } else {
+            errln("Test %d: did not find long pattern.", t);
+        }
+
+        bms_setTargetString(shortPattern, target.getBuffer(), target.length(), &status);
+        if (bms_search(shortPattern, 0, &start, &end)) {
+            logln("Test %d: found short pattern at [%d, %d].", t, start, end);
+        } else {
+            errln("Test %d: did not find short pattern.", t);
+        }
+    }
+
+close_patterns:
+    bms_close(shortPattern);
+    bms_close(longPattern);
+
+close_data:
+    ucd_close(data);
+    ucol_close(coll);
+}
+
+void SSearchTest::goodSuffixTest()
+{
+    UErrorCode status = U_ZERO_ERROR;
+    UCollator *coll = NULL;
+    CollData *data = NULL;
+    UnicodeString pat = /*"gcagagag"*/ "fxeld";
+    UnicodeString target = /*"gcatcgcagagagtatacagtacg"*/ "cloveldfxeld";
+    BoyerMooreSearch *pattern = NULL;
+    int32_t start = -1, end = -1;
+
+    coll = ucol_open(NULL, &status);
+    if (U_FAILURE(status)) {
+        errln("Couldn't open collator.");
+        return;
+    }
+
+    data = CollData::open(coll, status);
+    if (U_FAILURE(status)) {
+        errln("Couldn't open CollData object.");
+        goto close_data;
+    }
+
+    pattern = new BoyerMooreSearch(data, pat, &target, status);
+    if (U_FAILURE(status)) {
+        errln("Couldn't open pattern object.");
+        goto close_pattern;
+    }
+
+    if (pattern->search(0, start, end)) {
+        logln("Found pattern at [%d, %d].", start, end);
+    } else {
+        errln("Did not find pattern.");
+    }
+
+close_pattern:
+    delete pattern;
+
+close_data:
+    CollData::close(data);
+    ucol_close(coll);
+}
+
+//
+//  searchTime()    A quick and dirty performance test for string search.
+//                  Probably  doesn't really belong as part of intltest, but it
+//                  does check that the search succeeds, and gets the right result,
+//                  so it serves as a functionality test also.
+//
+//                  To run as a perf test, up the loop count, select by commenting
+//                  and uncommenting in the code the operation to be measured,
+//                  rebuild, and measure the running time of this test alone.
+//
+//                     time LD_LIBRARY_PATH=whatever  ./intltest  collate/SSearchTest/searchTime
+//
+void SSearchTest::searchTime() {
+    static const char *longishText =
+"Whylom, as olde stories tellen us,\n"
+"Ther was a duk that highte Theseus:\n"
+"Of Athenes he was lord and governour,\n"
+"And in his tyme swich a conquerour,\n"
+"That gretter was ther noon under the sonne.\n"
+"Ful many a riche contree hadde he wonne;\n"
+"What with his wisdom and his chivalrye,\n"
+"He conquered al the regne of Femenye,\n"
+"That whylom was y-cleped Scithia;\n"
+"And weddede the quene Ipolita,\n"
+"And broghte hir hoom with him in his contree\n"
+"With muchel glorie and greet solempnitee,\n"
+"And eek hir yonge suster Emelye.\n"
+"And thus with victorie and with melodye\n"
+"Lete I this noble duk to Athenes ryde,\n"
+"And al his hoost, in armes, him bisyde.\n"
+"And certes, if it nere to long to here,\n"
+"I wolde han told yow fully the manere,\n"
+"How wonnen was the regne of Femenye\n"
+"By Theseus, and by his chivalrye;\n"
+"And of the grete bataille for the nones\n"
+"Bitwixen Athen's and Amazones;\n"
+"And how asseged was Ipolita,\n"
+"The faire hardy quene of Scithia;\n"
+"And of the feste that was at hir weddinge,\n"
+"And of the tempest at hir hoom-cominge;\n"
+"But al that thing I moot as now forbere.\n"
+"I have, God woot, a large feeld to ere,\n"
+"And wayke been the oxen in my plough.\n"
+"The remenant of the tale is long y-nough.\n"
+"I wol nat letten eek noon of this route;\n"
+"Lat every felawe telle his tale aboute,\n"
+"And lat see now who shal the soper winne;\n"
+"And ther I lefte, I wol ageyn biginne.\n"
+"This duk, of whom I make mencioun,\n"
+"When he was come almost unto the toun,\n"
+"In al his wele and in his moste pryde,\n"
+"He was war, as he caste his eye asyde,\n"
+"Wher that ther kneled in the hye weye\n"
+"A companye of ladies, tweye and tweye,\n"
+"Ech after other, clad in clothes blake; \n"
+"But swich a cry and swich a wo they make,\n"
+"That in this world nis creature livinge,\n"
+"That herde swich another weymentinge;\n"
+"And of this cry they nolde never stenten,\n"
+"Til they the reynes of his brydel henten.\n"
+"'What folk ben ye, that at myn hoomcominge\n"
+"Perturben so my feste with cryinge'?\n"
+"Quod Theseus, 'have ye so greet envye\n"
+"Of myn honour, that thus compleyne and crye? \n"
+"Or who hath yow misboden, or offended?\n"
+"And telleth me if it may been amended;\n"
+"And why that ye ben clothed thus in blak'?\n"
+"The eldest lady of hem alle spak,\n"
+"When she hadde swowned with a deedly chere,\n"
+"That it was routhe for to seen and here,\n"
+"And seyde: 'Lord, to whom Fortune hath yiven\n"
+"Victorie, and as a conquerour to liven,\n"
+"Noght greveth us your glorie and your honour;\n"
+"But we biseken mercy and socour.\n"
+"Have mercy on our wo and our distresse.\n"
+"Som drope of pitee, thurgh thy gentilesse,\n"
+"Up-on us wrecched wommen lat thou falle.\n"
+"For certes, lord, ther nis noon of us alle,\n"
+"That she nath been a duchesse or a quene;\n"
+"Now be we caitifs, as it is wel sene:\n"
+"Thanked be Fortune, and hir false wheel,\n"
+"That noon estat assureth to be weel.\n"
+"And certes, lord, t'abyden your presence,\n"
+"Here in the temple of the goddesse Clemence\n"
+"We han ben waytinge al this fourtenight;\n"
+"Now help us, lord, sith it is in thy might.\n"
+"I wrecche, which that wepe and waille thus,\n"
+"Was whylom wyf to king Capaneus,\n"
+"That starf at Thebes, cursed be that day!\n"
+"And alle we, that been in this array,\n"
+"And maken al this lamentacioun,\n"
+"We losten alle our housbondes at that toun,\n"
+"Whyl that the sege ther-aboute lay.\n"
+"And yet now th'olde Creon, weylaway!\n"
+"The lord is now of Thebes the citee, \n"
+"Fulfild of ire and of iniquitee,\n"
+"He, for despyt, and for his tirannye,\n"
+"To do the dede bodyes vileinye,\n"
+"Of alle our lordes, whiche that ben slawe,\n"
+"Hath alle the bodyes on an heep y-drawe,\n"
+"And wol nat suffren hem, by noon assent,\n"
+"Neither to been y-buried nor y-brent,\n"
+"But maketh houndes ete hem in despyt. zet'\n";
+
+#define TEST_BOYER_MOORE 1
+const char *cPattern = "maketh houndes ete hem";
+//const char *cPattern = "Whylom";
+//const char *cPattern = "zet";
+    const char *testId = "searchTime()";   // for error macros.
+    UnicodeString target = longishText;
+    UErrorCode status = U_ZERO_ERROR;
+
+
+    UCollator *collator = ucol_open("en", &status);
+    CollData *data = CollData::open(collator, status);
+    TEST_ASSERT_SUCCESS(status);
+    //ucol_setStrength(collator, collatorStrength);
+    //ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, normalize, &status);
+    UnicodeString uPattern = cPattern;
+#ifndef TEST_BOYER_MOORE
+    UStringSearch *uss = usearch_openFromCollator(uPattern.getBuffer(), uPattern.length(),
+                                        target.getBuffer(), target.length(),
+                                        collator,
+                                        NULL,     // the break iterator
+                                        &status);
+    TEST_ASSERT_SUCCESS(status);
+#else
+    BoyerMooreSearch bms(data, uPattern, &target, status);
+    TEST_ASSERT_SUCCESS(status);
+#endif
+    
+//  int32_t foundStart;
+//  int32_t foundEnd;
+    UBool   found;
+    
+    // Find the match position usgin strstr
+    const char *pm = strstr(longishText, cPattern);
+    TEST_ASSERT_M(pm!=NULL, "No pattern match with strstr");
+    int32_t  refMatchPos = (int32_t)(pm - longishText);
+    int32_t  icuMatchPos;
+    int32_t  icuMatchEnd;
+#ifndef TEST_BOYER_MOORE
+    usearch_search(uss, 0, &icuMatchPos, &icuMatchEnd, &status);
+    TEST_ASSERT_SUCCESS(status);
+#else
+    found = bms.search(0, icuMatchPos, icuMatchEnd);
+#endif
+    TEST_ASSERT_M(refMatchPos == icuMatchPos, "strstr and icu give different match positions.");
+
+    int32_t i;
+    int32_t j=0;
+
+    // Try loopcounts around 100000 to some millions, depending on the operation,
+    //   to get runtimes of at least several seconds.
+    for (i=0; i<10000; i++) {
+#ifndef TEST_BOYER_MOORE
+        found = usearch_search(uss, 0, &icuMatchPos, &icuMatchEnd, &status);
+#else
+        found = bms.search(0, icuMatchPos, icuMatchEnd);
+#endif
+        //TEST_ASSERT_SUCCESS(status);
+        //TEST_ASSERT(found);
+
+        // usearch_setOffset(uss, 0, &status);
+        // icuMatchPos = usearch_next(uss, &status);
+
+         // The i+j stuff is to confuse the optimizer and get it to actually leave the
+         //   call to strstr in place.
+         //pm = strstr(longishText+j, cPattern);
+         //j = (j + i)%5;
+    }
+
+    printf("%d\n", pm-longishText, j);
+#ifndef TEST_BOYER_MOORE
+    usearch_close(uss);
+#else
+    CollData::close(data);
+#endif
+    ucol_close(collator);
+}
+#endif
+
 //----------------------------------------------------------------------------------------
 //
 //   Random Numbers.  Similar to standard lib rand() and srand()
@@ -1174,7 +1639,7 @@ void SetMonkey::append(UnicodeString &test, UnicodeString &alternate)
 class StringSetMonkey : public Monkey
 {
 public:
-    StringSetMonkey(const USet *theSet, UCollator *theCollator, StringToCEsMap *theCharsToCEList, CEToStringsMap *theCeToCharsStartingWith);
+    StringSetMonkey(const USet *theSet, UCollator *theCollator, CollData *theCollData);
     ~StringSetMonkey();
 
     void append(UnicodeString &testCase, UnicodeString &alternate);
@@ -1183,13 +1648,12 @@ private:
     UnicodeString &generateAlternative(const UnicodeString &testCase, UnicodeString &alternate);
 
     const USet *set;
-    UCollator      *coll;
-    StringToCEsMap *charsToCEList;
-    CEToStringsMap *ceToCharsStartingWith;
+    UCollator  *coll;
+    CollData   *collData;
 };
 
-StringSetMonkey::StringSetMonkey(const USet *theSet, UCollator *theCollator, StringToCEsMap *theCharsToCEList, CEToStringsMap *theCeToCharsStartingWith)
-: Monkey(), set(theSet), coll(theCollator), charsToCEList(theCharsToCEList), ceToCharsStartingWith(theCeToCharsStartingWith)
+StringSetMonkey::StringSetMonkey(const USet *theSet, UCollator *theCollator, CollData *theCollData)
+: Monkey(), set(theSet), coll(theCollator), collData(theCollData)
 {
     // ook.
 }
@@ -1231,7 +1695,8 @@ UnicodeString &StringSetMonkey::generateAlternative(const UnicodeString &testCas
 {
     // find out shortest string for the longest sequence of ces.
     // needs to be refined to use dynamic programming, but will be roughly right
-    CEList ceList(coll, testCase);
+    UErrorCode status = U_ZERO_ERROR;
+    CEList ceList(coll, testCase, status);
     UnicodeString alt;
     int32_t offset = 0;
 
@@ -1241,7 +1706,7 @@ UnicodeString &StringSetMonkey::generateAlternative(const UnicodeString &testCas
 
     while (offset < ceList.size()) {
         int32_t ce = ceList.get(offset);
-        const StringList *strings = ceToCharsStartingWith->getStringList(ce);
+        const StringList *strings = collData->getStringList(ce);
 
         if (strings == NULL) {
             return alternate.append(testCase);
@@ -1251,8 +1716,9 @@ UnicodeString &StringSetMonkey::generateAlternative(const UnicodeString &testCas
         int32_t tries = 0;
       
         // find random string that generates the same CEList
-        const CEList *ceList2;
-        const UnicodeString *string;
+        const CEList *ceList2 = NULL;
+        const UnicodeString *string = NULL;
+              UBool matches = FALSE;
 
         do {
             int32_t s = m_rand() % stringCount;
@@ -1263,14 +1729,20 @@ UnicodeString &StringSetMonkey::generateAlternative(const UnicodeString &testCas
             }
 
             string = strings->get(s);
-            ceList2 = charsToCEList->get(string);
-        } while (! ceList.matchesAt(offset, ceList2));
+            ceList2 = collData->getCEList(string);
+            matches = ceList.matchesAt(offset, ceList2);
+
+            if (! matches) {
+                collData->freeCEList((CEList *) ceList2);
+            }
+        } while (! matches);
 
         alt.append(*string);
         offset += ceList2->size();
+        collData->freeCEList(ceList2);
     }
 
-    const CEList altCEs(coll, alt);
+    const CEList altCEs(coll, alt, status);
 
     if (ceList.matchesAt(0, &altCEs)) {
         return alternate.append(alt);
@@ -1282,6 +1754,7 @@ UnicodeString &StringSetMonkey::generateAlternative(const UnicodeString &testCas
 static void generateTestCase(UCollator *coll, Monkey *monkeys[], int32_t monkeyCount, UnicodeString &testCase, UnicodeString &alternate)
 {
     int32_t pieces = (m_rand() % 4) + 1;
+    UErrorCode status = U_ZERO_ERROR;
     UBool matches;
 
     do {
@@ -1295,8 +1768,8 @@ static void generateTestCase(UCollator *coll, Monkey *monkeys[], int32_t monkeyC
             monkeys[monkey]->append(testCase, alternate);
         }
 
-        const CEList ceTest(coll, testCase);
-        const CEList ceAlt(coll, alternate);
+        const CEList ceTest(coll, testCase, status);
+        const CEList ceAlt(coll, alternate, status);
 
         matches = ceTest.matchesAt(0, &ceAlt);
     } while (! matches);
@@ -1391,7 +1864,8 @@ static UBool simpleSearch(UCollator *coll, const UnicodeString &target, int32_t
         									      target.getBuffer(), target.length(), &status);
 
     if (patternSize == 0) {
-        matchStart = matchEnd = 0;
+        // Searching for an empty pattern always fails
+        matchStart = matchEnd = -1;
         return FALSE;
     }
 
@@ -1512,14 +1986,9 @@ int32_t SSearchTest::monkeyTestCase(UCollator *coll, const UnicodeString &testCa
     // **** TODO: find *all* matches, not just first one ****
     simpleSearch(coll, testCase, 0, pattern, expectedStart, expectedEnd);
 
-#if 0
     usearch_search(uss, 0, &actualStart, &actualEnd, &status);
-#else
-    actualStart = usearch_next(uss, &status);
-    actualEnd   = actualStart + usearch_getMatchedLength(uss);
-#endif
 
-    if (actualStart != expectedStart || actualEnd != expectedEnd) {
+    if (expectedStart >= 0 && (actualStart != expectedStart || actualEnd != expectedEnd)) {
         errln("Search for <pattern> in <%s> failed: expected [%d, %d], got [%d, %d]\n"
               "    strength=%s seed=%d",
               name, expectedStart, expectedEnd, actualStart, actualEnd, strength, seed);
@@ -1534,15 +2003,9 @@ int32_t SSearchTest::monkeyTestCase(UCollator *coll, const UnicodeString &testCa
 
     usearch_setPattern(uss, altPattern.getBuffer(), altPattern.length(), &status);
 
-#if 0
     usearch_search(uss, 0, &actualStart, &actualEnd, &status);
-#else
-    usearch_reset(uss);
-    actualStart = usearch_next(uss, &status);
-    actualEnd   = actualStart + usearch_getMatchedLength(uss);
-#endif
 
-    if (actualStart != expectedStart || actualEnd != expectedEnd) {
+    if (expectedStart >= 0 && (actualStart != expectedStart || actualEnd != expectedEnd)) {
         errln("Search for <alt_pattern> in <%s> failed: expected [%d, %d], got [%d, %d]\n"
               "    strength=%s seed=%d",
               name, expectedStart, expectedEnd, actualStart, actualEnd, strength, seed);
@@ -1554,6 +2017,52 @@ int32_t SSearchTest::monkeyTestCase(UCollator *coll, const UnicodeString &testCa
 
     usearch_close(uss);
 
+    return notFoundCount;
+}
+
+int32_t SSearchTest::bmMonkeyTestCase(UCollator *coll, const UnicodeString &testCase, const UnicodeString &pattern, const UnicodeString &altPattern,
+                                    BoyerMooreSearch *bms, BoyerMooreSearch *abms,
+                                    const char *name, const char *strength, uint32_t seed)
+{
+    UErrorCode status = U_ZERO_ERROR;
+    int32_t actualStart = -1, actualEnd = -1;
+  //int32_t expectedStart = prefix.length(), expectedEnd = prefix.length() + altPattern.length();
+    int32_t expectedStart = -1, expectedEnd = -1;
+    int32_t notFoundCount = 0;
+
+    // **** TODO: find *all* matches, not just first one ****
+    simpleSearch(coll, testCase, 0, pattern, expectedStart, expectedEnd);
+
+    bms->setTargetString(&testCase, status);
+    bms->search(0, actualStart, actualEnd);
+
+    if (expectedStart >= 0 && (actualStart != expectedStart || actualEnd != expectedEnd)) {
+        errln("Boyer-Moore Search for <pattern> in <%s> failed: expected [%d, %d], got [%d, %d]\n"
+              "    strength=%s seed=%d",
+              name, expectedStart, expectedEnd, actualStart, actualEnd, strength, seed);
+    }
+
+    if (expectedStart == -1 && actualStart == -1) {
+        notFoundCount += 1;
+    }
+
+    // **** TODO: find *all* matches, not just first one ****
+    simpleSearch(coll, testCase, 0, altPattern, expectedStart, expectedEnd);
+
+    abms->setTargetString(&testCase, status);
+    abms->search(0, actualStart, actualEnd);
+
+    if (expectedStart >= 0 && (actualStart != expectedStart || actualEnd != expectedEnd)) {
+        errln("Boyer-Moore Search for <alt_pattern> in <%s> failed: expected [%d, %d], got [%d, %d]\n"
+              "    strength=%s seed=%d",
+              name, expectedStart, expectedEnd, actualStart, actualEnd, strength, seed);
+    }
+
+    if (expectedStart == -1 && actualStart == -1) {
+        notFoundCount += 1;
+    }
+
+
     return notFoundCount;
 }
 #endif
@@ -1562,33 +2071,27 @@ void SSearchTest::monkeyTest(char *params)
 {
     // ook!
     UErrorCode status = U_ZERO_ERROR;
-    U_STRING_DECL(test_pattern, "[[:assigned:]-[:ideographic:]-[:hangul:]-[:c:]]", 47);
-    U_STRING_INIT(test_pattern, "[[:assigned:]-[:ideographic:]-[:hangul:]-[:c:]]", 47);
-    UCollator *coll = ucol_open(NULL, &status);
+  //UCollator *coll = ucol_open(NULL, &status);
+    UCollator *coll = ucol_openFromShortString("S1", FALSE, NULL, &status);
+
     if (U_FAILURE(status)) {
         errln("Failed to create collator in MonkeyTest!");
         return;
     }
-    USet *charsToTest  = uset_openPattern(test_pattern, 47, &status);
+
+    CollData  *monkeyData = CollData::open(coll, status);
+
     USet *expansions   = uset_openEmpty();
     USet *contractions = uset_openEmpty();
-    StringToCEsMap *charsToCEList = new StringToCEsMap();
-    CEToStringsMap *ceToCharsStartingWith = new CEToStringsMap();
 
     ucol_getContractionsAndExpansions(coll, contractions, expansions, FALSE, &status);
 
-    uset_addAll(charsToTest, contractions);
-    uset_addAll(charsToTest, expansions);
-
-    // TODO: set strength to UCOL_PRIMARY, change CEList to use strength?
-    buildData(coll, charsToTest, charsToCEList, ceToCharsStartingWith);
-
     U_STRING_DECL(letter_pattern, "[[:letter:]-[:ideographic:]-[:hangul:]]", 39);
     U_STRING_INIT(letter_pattern, "[[:letter:]-[:ideographic:]-[:hangul:]]", 39);
     USet *letters = uset_openPattern(letter_pattern, 39, &status);
     SetMonkey letterMonkey(letters);
-    StringSetMonkey contractionMonkey(contractions, coll, charsToCEList, ceToCharsStartingWith);
-    StringSetMonkey expansionMonkey(expansions, coll, charsToCEList, ceToCharsStartingWith);
+    StringSetMonkey contractionMonkey(contractions, coll, monkeyData);
+    StringSetMonkey expansionMonkey(expansions, coll, monkeyData);
     UnicodeString testCase;
     UnicodeString alternate;
     UnicodeString pattern, altPattern;
@@ -1613,7 +2116,7 @@ void SSearchTest::monkeyTest(char *params)
     int32_t strengthCount = sizeof(strengths) / sizeof(strengths[0]);
     int32_t loopCount = quick? 1000 : 10000;
     int32_t firstStrength = 0;
-    int32_t lastStrength  = strengthCount - 1;
+    int32_t lastStrength  = strengthCount - 1; //*/ 0;
 
     if (params != NULL) {
 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
@@ -1654,15 +2157,12 @@ void SSearchTest::monkeyTest(char *params)
     for(int32_t s = firstStrength; s <= lastStrength; s += 1) {
         int32_t notFoundCount = 0;
 
+        logln("Setting strength to %s.", strengthNames[s]);
         ucol_setStrength(coll, strengths[s]);
-        
-        int32_t i = 1000; //Reduce the number of iteration in non-exhaustive mode
-        if(!quick){
-        	i = 10000; 
-        }
+
         // TODO: try alternate prefix and suffix too?
         // TODO: alterntaes are only equal at primary strength. Is this OK?
-        for(int32_t t = 0; t < i; t += 1) {
+        for(int32_t t = 0; t < loopCount; t += 1) {
             uint32_t seed = m_seed;
             int32_t  nmc = 0;
 
@@ -1693,16 +2193,166 @@ void SSearchTest::monkeyTest(char *params)
             notFoundCount += monkeyTestCase(coll, testCase, pattern, altPattern, "pattern + suffix", strengthNames[s], seed);
         }
 
-        logln("For strength %s the not found count is %d.", strengthNames[s], notFoundCount);
+       logln("For strength %s the not found count is %d.", strengthNames[s], notFoundCount);
     }
 
-    delete ceToCharsStartingWith;
-    delete charsToCEList;
-
     uset_close(contractions);
     uset_close(expansions);
-    uset_close(charsToTest);
     uset_close(letters);
+
+    CollData::close(monkeyData);
+    
+    ucol_close(coll);
+}
+
+void SSearchTest::bmMonkeyTest(char *params)
+{
+    // ook!
+    UErrorCode status = U_ZERO_ERROR;
+    UCollator *coll = ucol_openFromShortString("S1", FALSE, NULL, &status);
+
+    if (U_FAILURE(status)) {
+        errln("Failed to create collator in MonkeyTest!");
+        return;
+    }
+
+    CollData  *monkeyData = CollData::open(coll, status);
+
+    USet *expansions   = uset_openEmpty();
+    USet *contractions = uset_openEmpty();
+
+    ucol_getContractionsAndExpansions(coll, contractions, expansions, FALSE, &status);
+
+    U_STRING_DECL(letter_pattern, "[[:letter:]-[:ideographic:]-[:hangul:]]", 39);
+    U_STRING_INIT(letter_pattern, "[[:letter:]-[:ideographic:]-[:hangul:]]", 39);
+    USet *letters = uset_openPattern(letter_pattern, 39, &status);
+    SetMonkey letterMonkey(letters);
+    StringSetMonkey contractionMonkey(contractions, coll, monkeyData);
+    StringSetMonkey expansionMonkey(expansions, coll, monkeyData);
+    UnicodeString testCase;
+    UnicodeString alternate;
+    UnicodeString pattern, altPattern;
+    UnicodeString prefix, altPrefix;
+    UnicodeString suffix, altSuffix;
+
+    Monkey *monkeys[] = {
+        &letterMonkey,
+        &contractionMonkey,
+        &expansionMonkey,
+        &contractionMonkey,
+        &expansionMonkey,
+        &contractionMonkey,
+        &expansionMonkey,
+        &contractionMonkey,
+        &expansionMonkey};
+    int32_t monkeyCount = sizeof(monkeys) / sizeof(monkeys[0]);
+    int32_t nonMatchCount = 0;
+
+    UCollationStrength strengths[] = {UCOL_PRIMARY, UCOL_SECONDARY, UCOL_TERTIARY};
+    const char *strengthNames[] = {"primary", "secondary", "tertiary"};
+    int32_t strengthCount = sizeof(strengths) / sizeof(strengths[0]);
+    int32_t loopCount = quick? 1000 : 10000;
+    int32_t firstStrength = 0;
+    int32_t lastStrength  = strengthCount - 1; //*/ 0;
+
+    if (params != NULL) {
+#if !UCONFIG_NO_REGULAR_EXPRESSIONS
+        UnicodeString p(params);
+
+        loopCount = getIntParam("loop", p, loopCount);
+        m_seed    = getIntParam("seed", p, m_seed);
+
+        RegexMatcher m(" *strength *= *(primary|secondary|tertiary) *", p, 0, status);
+        if (m.find()) {
+            UnicodeString breakType = m.group(1, status);
+
+            for (int32_t s = 0; s < strengthCount; s += 1) {
+                if (breakType == strengthNames[s]) {
+                    firstStrength = lastStrength = s;
+                    break;
+                }
+            }
+
+            m.reset();
+            p = m.replaceFirst("", status);
+        }
+
+        if (RegexMatcher("\\S", p, 0, status).find()) {
+            // Each option is stripped out of the option string as it is processed.
+            // All options have been checked.  The option string should have been completely emptied..
+            char buf[100];
+            p.extract(buf, sizeof(buf), NULL, status);
+            buf[sizeof(buf)-1] = 0;
+            errln("Unrecognized or extra parameter:  %s\n", buf);
+            return;
+        }
+#else
+        infoln("SSearchTest built with UCONFIG_NO_REGULAR_EXPRESSIONS: ignoring parameters.");
+#endif
+    }
+
+    for(int32_t s = firstStrength; s <= lastStrength; s += 1) {
+        int32_t notFoundCount = 0;
+
+        logln("Setting strength to %s.", strengthNames[s]);
+        ucol_setStrength(coll, strengths[s]);
+
+        CollData *data = CollData::open(coll, status);
+        
+        // TODO: try alternate prefix and suffix too?
+        // TODO: alterntaes are only equal at primary strength. Is this OK?
+        for(int32_t t = 0; t < loopCount; t += 1) {
+            uint32_t seed = m_seed;
+            int32_t  nmc = 0;
+
+            generateTestCase(coll, monkeys, monkeyCount, pattern, altPattern);
+            generateTestCase(coll, monkeys, monkeyCount, prefix,  altPrefix);
+            generateTestCase(coll, monkeys, monkeyCount, suffix,  altSuffix);
+
+            BoyerMooreSearch pat(data, pattern, NULL, status);
+            BoyerMooreSearch alt(data, altPattern, NULL, status);
+
+            // **** need a better way to deal with this ****
+#if 0
+            if (pat.empty() ||
+                alt.empty()) {
+                    continue;
+            }
+#endif
+
+            // pattern
+            notFoundCount += bmMonkeyTestCase(coll, pattern, pattern, altPattern, &pat, &alt, "pattern", strengthNames[s], seed);
+
+            testCase.remove();
+            testCase.append(prefix);
+            testCase.append(/*alt*/pattern);
+
+            // prefix + pattern
+            notFoundCount += bmMonkeyTestCase(coll, testCase, pattern, altPattern, &pat, &alt, "prefix + pattern", strengthNames[s], seed);
+
+            testCase.append(suffix);
+
+            // prefix + pattern + suffix
+            notFoundCount += bmMonkeyTestCase(coll, testCase, pattern, altPattern, &pat, &alt, "prefix + pattern + suffix", strengthNames[s], seed);
+
+            testCase.remove();
+            testCase.append(pattern);
+            testCase.append(suffix);
+            
+            // pattern + suffix
+            notFoundCount += bmMonkeyTestCase(coll, testCase, pattern, altPattern, &pat, &alt, "pattern + suffix", strengthNames[s], seed);
+        }
+
+        CollData::close(data);
+
+        logln("For strength %s the not found count is %d.", strengthNames[s], notFoundCount);
+    }
+
+    uset_close(contractions);
+    uset_close(expansions);
+    uset_close(letters);
+
+    CollData::close(monkeyData);
     
     ucol_close(coll);
 }
diff --git a/icu4c/source/test/intltest/ssearch.h b/icu4c/source/test/intltest/ssearch.h
index 4afc7bf832..d888413641 100644
--- a/icu4c/source/test/intltest/ssearch.h
+++ b/icu4c/source/test/intltest/ssearch.h
@@ -1,6 +1,6 @@
 /*
  **********************************************************************
- *   Copyright (C) 2005-2008, International Business Machines
+ *   Copyright (C) 2005-2009, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  **********************************************************************
  */
@@ -11,6 +11,7 @@
 #include "unicode/utypes.h"
 #include "unicode/unistr.h"
 #include "unicode/ucol.h"
+#include "unicode/bmsearch.h"
 
 #include "intltest.h"
 
@@ -34,10 +35,24 @@ public:
     virtual void offsetTest();
     virtual void monkeyTest(char *params);
 
+    virtual void bmMonkeyTest(char *params);
+    virtual void boyerMooreTest();
+    virtual void goodSuffixTest();
+    virtual void searchTime();
+    
+    virtual void bmsTest();
+    virtual void bmSearchTest();
+
+    virtual void udhrTest();
+
 private:
     virtual const char   *getPath(char buffer[2048], const char *filename);
     virtual       int32_t monkeyTestCase(UCollator *coll, const UnicodeString &testCase, const UnicodeString &pattern, const UnicodeString &altPattern,
                                          const char *name, const char *strength, uint32_t seed);
+
+    virtual       int32_t bmMonkeyTestCase(UCollator *coll, const UnicodeString &testCase, const UnicodeString &pattern, const UnicodeString &altPattern,
+                                         BoyerMooreSearch *bms, BoyerMooreSearch *abms,
+                                         const char *name, const char *strength, uint32_t seed);
 #endif
                                          
 };
diff --git a/icu4c/source/test/perf/strsrchperf/strsrchperf.cpp b/icu4c/source/test/perf/strsrchperf/strsrchperf.cpp
index 4d89921164..23d16d7de2 100644
--- a/icu4c/source/test/perf/strsrchperf/strsrchperf.cpp
+++ b/icu4c/source/test/perf/strsrchperf/strsrchperf.cpp
@@ -1,6 +1,6 @@
 /********************************************************************
  * COPYRIGHT:
- * Copyright (C) 2008 IBM, Inc.   All Rights Reserved.
+ * Copyright (C) 2008-2009 IBM, Inc.   All Rights Reserved.
  *
  ********************************************************************/
 /** 
@@ -14,7 +14,13 @@
 StringSearchPerformanceTest::StringSearchPerformanceTest(int32_t argc, const char *argv[], UErrorCode &status)
 :UPerfTest(argc,argv,status){
     int32_t start, end;
+
+#ifdef TEST_BOYER_MOORE_SEARCH
+    bms = NULL;
+#else
     srch = NULL;
+#endif
+
     pttrn = NULL;
     if(status== U_ILLEGAL_ARGUMENT_ERROR || line_mode){
        fprintf(stderr,gUsageString, "strsrchperf");
@@ -22,7 +28,8 @@ StringSearchPerformanceTest::StringSearchPerformanceTest(int32_t argc, const cha
     }
     /* Get the Text */
     src = getBuffer(srcLen, status);
-    
+
+#if 0
     /* Get a word to find. Do this by selecting a random word with a word breakiterator. */
     UBreakIterator* brk = ubrk_open(UBRK_WORD, locale, src, srcLen, &status);
     if(U_FAILURE(status)){
@@ -38,9 +45,38 @@ StringSearchPerformanceTest::StringSearchPerformanceTest(int32_t argc, const cha
     }
     pttrn = temp; /* store word in pttrn */
     ubrk_close(brk);
+#else
+    /* The first line of the file contains the pattern */
+    start = 0;
+
+    for(end = start; ; end += 1) {
+        UChar ch = src[end];
+
+        if (ch == 0x000A || ch == 0x000D || ch == 0x2028) {
+            break;
+        }
+    }
+
+    pttrnLen = end - start;
+    UChar* temp = (UChar*)malloc(sizeof(UChar)*(pttrnLen));
+    for (int i = 0; i < pttrnLen; i++) {
+        temp[i] = src[start++];
+    }
+    pttrn = temp; /* store word in pttrn */
+#endif
     
+#ifdef TEST_BOYER_MOORE_SEARCH
+    UnicodeString patternString(pttrn, pttrnLen);
+    UCollator *coll = ucol_open(locale, &status);
+    CollData *data = CollData::open(coll, status);
+
+    targetString = new UnicodeString(src, srcLen);
+    bms = new BoyerMooreSearch(data, patternString, targetString, status);
+#else
     /* Create the StringSearch object to be use in performance test. */
     srch = usearch_open(pttrn, pttrnLen, src, srcLen, locale, NULL, &status);
+#endif
+
     if(U_FAILURE(status)){
         fprintf(stderr, "FAILED to create UPerfTest object. Error: %s\n", u_errorName(status));
         return;
@@ -49,12 +85,23 @@ StringSearchPerformanceTest::StringSearchPerformanceTest(int32_t argc, const cha
 }
 
 StringSearchPerformanceTest::~StringSearchPerformanceTest() {
+    CollData *data  = bms->getData();
+    UCollator *coll = data->getCollator();
+
+    delete bms;
+    delete targetString;
+    CollData::close(data);
+    ucol_close(coll);
+
     if (pttrn != NULL) {
         free(pttrn);
     }
+
+#ifndef TEST_BOYER_MOORE_SEARCH
     if (srch != NULL) {
         usearch_close(srch);
     }
+#endif
 }
 
 UPerfFunction* StringSearchPerformanceTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char *par) {
@@ -70,12 +117,20 @@ UPerfFunction* StringSearchPerformanceTest::runIndexedTest(int32_t index, UBool
 }
 
 UPerfFunction* StringSearchPerformanceTest::Test_ICU_Forward_Search(){
+#ifdef TEST_BOYER_MOORE_SEARCH
+    StringSearchPerfFunction *func = new StringSearchPerfFunction(ICUForwardSearch, bms, src, srcLen, pttrn, pttrnLen);
+#else
     StringSearchPerfFunction* func = new StringSearchPerfFunction(ICUForwardSearch, srch, src, srcLen, pttrn, pttrnLen);
+#endif
     return func;
 }
 
 UPerfFunction* StringSearchPerformanceTest::Test_ICU_Backward_Search(){
+#ifdef TEST_BOYER_MOORE_SEARCH
+    StringSearchPerfFunction *func = new StringSearchPerfFunction(ICUBackwardSearch, bms, src, srcLen, pttrn, pttrnLen);
+#else
     StringSearchPerfFunction* func = new StringSearchPerfFunction(ICUBackwardSearch, srch, src, srcLen, pttrn, pttrnLen);
+#endif
     return func;
 }
 
diff --git a/icu4c/source/test/perf/strsrchperf/strsrchperf.h b/icu4c/source/test/perf/strsrchperf/strsrchperf.h
index 3c01279398..6f2281c585 100644
--- a/icu4c/source/test/perf/strsrchperf/strsrchperf.h
+++ b/icu4c/source/test/perf/strsrchperf/strsrchperf.h
@@ -1,6 +1,6 @@
 /********************************************************************
  * COPYRIGHT:
- * Copyright (C) 2008 IBM, Inc.   All Rights Reserved.
+ * Copyright (C) 2008-2009 IBM, Inc.   All Rights Reserved.
  *
  ********************************************************************/
 #ifndef _STRSRCHPERF_H
@@ -8,11 +8,19 @@
 
 #include "unicode/ubrk.h"
 #include "unicode/usearch.h"
+#include "unicode/colldata.h"
+#include "unicode/bmsearch.h"
 #include "unicode/uperf.h"
 #include <stdlib.h>
 #include <stdio.h>
 
+#define TEST_BOYER_MOORE_SEARCH
+
+#ifdef TEST_BOYER_MOORE_SEARCH
+typedef void (*StrSrchFn) (BoyerMooreSearch * bms, const UChar *src, int32_t srcLen, const UChar *pttrn, int32_t pttrnLen, UErrorCode *status);
+#else
 typedef void (*StrSrchFn)(UStringSearch* srch, const UChar* src,int32_t srcLen, const UChar* pttrn, int32_t pttrnLen, UErrorCode* status);
+#endif
 
 class StringSearchPerfFunction : public UPerfFunction {
 private:
@@ -21,17 +29,39 @@ private:
     int32_t srcLen;
     const UChar* pttrn;
     int32_t pttrnLen;
+#ifdef TEST_BOYER_MOORE_SEARCH
+    BoyerMooreSearch *bms;
+#else
     UStringSearch* srch;
+#endif
     
 public:
     virtual void call(UErrorCode* status) {
+#ifdef TEST_BOYER_MOORE_SEARCH
+        (*fn)(bms, src, srcLen, pttrn, pttrnLen, status);
+#else
         (*fn)(srch, src, srcLen, pttrn, pttrnLen, status);
+#endif
     }
     
     virtual long getOperationsPerIteration() {
+#if 0
         return (long)(srcLen/pttrnLen);
+#else
+        return (long) srcLen;
+#endif
     }
     
+#ifdef TEST_BOYER_MOORE_SEARCH
+    StringSearchPerfFunction(StrSrchFn func, BoyerMooreSearch *search, const UChar *source, int32_t sourceLen, const UChar *pattern, int32_t patternLen) {
+        fn       = func;
+        src      = source;
+        srcLen   = sourceLen;
+        pttrn    = pattern;
+        pttrnLen = patternLen;
+        bms      = search;
+    }
+#else
     StringSearchPerfFunction(StrSrchFn func, UStringSearch* search, const UChar* source,int32_t sourceLen, const UChar* pattern, int32_t patternLen) {
         fn = func;
         src = source;
@@ -40,6 +70,7 @@ public:
         pttrnLen = patternLen;
         srch = search;
     }
+#endif
 };
 
 class StringSearchPerformanceTest : public UPerfTest {
@@ -48,7 +79,12 @@ private:
     int32_t srcLen;
     UChar* pttrn;
     int32_t pttrnLen;
+#ifdef TEST_BOYER_MOORE_SEARCH
+    UnicodeString *targetString;
+    BoyerMooreSearch *bms;
+#else
     UStringSearch* srch;
+#endif
     
 public:
     StringSearchPerformanceTest(int32_t argc, const char *argv[], UErrorCode &status);
@@ -56,9 +92,29 @@ public:
     virtual UPerfFunction* runIndexedTest(int32_t index, UBool exec, const char *&name, char *par = NULL);
     
     UPerfFunction* Test_ICU_Forward_Search();
+
     UPerfFunction* Test_ICU_Backward_Search();
 };
 
+
+#ifdef TEST_BOYER_MOORE_SEARCH
+void ICUForwardSearch(BoyerMooreSearch *bms, const UChar *source, int32_t sourceLen, const UChar *pattern, int32_t patternLen, UErrorCode * /*status*/) { 
+    int32_t offset = 0, start = -1, end = -1;
+
+    while (bms->search(offset, start, end)) {
+        offset = end;
+    }
+}
+
+void ICUBackwardSearch(BoyerMooreSearch *bms, const UChar *source, int32_t sourceLen, const UChar *pattern, int32_t patternLen, UErrorCode * /*status*/) { 
+    int32_t offset = 0, start = -1, end = -1;
+
+    /* NOTE: No Boyer-Moore backward search yet... */
+    while (bms->search(offset, start, end)) {
+        offset = end;
+    }
+}
+#else
 void ICUForwardSearch(UStringSearch *srch, const UChar* source, int32_t sourceLen, const UChar* pattern, int32_t patternLen, UErrorCode* status) {
     int32_t match;
     
@@ -76,5 +132,6 @@ void ICUBackwardSearch(UStringSearch *srch, const UChar* source, int32_t sourceL
         match = usearch_previous(srch, status);
     }
 }
+#endif
 
 #endif /* _STRSRCHPERF_H */
diff --git a/icu4c/source/test/testdata/ssearch.xml b/icu4c/source/test/testdata/ssearch.xml
index 26d676ea87..c4beaf24cb 100644
--- a/icu4c/source/test/testdata/ssearch.xml
+++ b/icu4c/source/test/testdata/ssearch.xml
@@ -1,6 +1,6 @@
 <?xml version="1.0" encoding="UTF-8"?>
 
-<!-- Copyright (c) 2007-2008 IBM Corporation and others. All rights reserved -->
+<!-- Copyright (c) 2007-2009 IBM Corporation and others. All rights reserved -->
 
 <!-- Test data file for string search  -->
 <!DOCTYPE stringsearch-tests [
@@ -12,6 +12,7 @@
           locale CDATA "en" 
           strength (PRIMARY | SECONDARY | TERTIARY | QUATERNARY | IDENTICAL) "TERTIARY" 
           norm (ON | OFF) "OFF"
+          alternate_handling (NON_IGNORABLE | SHIFTED) "NON_IGNORABLE"
           >
 
 <!ELEMENT pattern (#PCDATA)>
@@ -20,7 +21,7 @@
 <!ELEMENT post (#PCDATA)>
 ]>
 
-<stringsearch-tests debug="test32">
+<stringsearch-tests>
   <!-- debug="test11"     (for copying into the above element)  -->
     
     <!-- Very simple match  -->
@@ -174,8 +175,15 @@
       <pattern>A\u0300</pattern>
       <pre>At IDENTICAL, shoud this match?  </pre><m>\u00c0</m><post></post>
     </test-case>
-    
-    <test-case id="test25" strength="SECONDARY" locale="en">
+
+  <test-case id="test24b" strength="IDENTICAL" alternate_handling="SHIFTED" locale="en">
+    <pattern>A\u0300</pattern>
+    <pre>At IDENTICAL, shoud this match?  </pre>
+    <m>\u00c0</m>
+    <post></post>
+  </test-case>
+
+  <test-case id="test25" strength="SECONDARY" locale="en">
       <pattern>Ű</pattern>
       <pre>12</pre><m>ű</m><post> Ű</post>
     </test-case>
@@ -285,11 +293,13 @@
     
 
     <!-- Long combining sequences  -->
+    <!-- Backwards search fails because patterns ends w/ ignorables
     <test-case id="test60" strength="PRIMARY">
       <pattern>A\u0301\u0301\u0301\u0301</pattern>
       <m>A\u0301\u0301\u0301\u0301\u0301</m>
     </test-case>
-    
+    -->
+
     <test-case id="test61" strength="TERTIARY">
       <pattern>A\u0301\u0301\u0301\u0301</pattern>
           <pre>A\u0301\u0301\u0301\u0301\u0301</pre>
@@ -409,5 +419,27 @@
     <pattern>VII</pattern>
     <m>\u2166</m>
   </test-case>
+
+  <test-case id="test83" strength="IDENTICAL" alternate_handling="SHIFTED" locale="en">
+    <pattern>Universal Declaration of Human Rights</pattern>
+    <pre>Proclaims this </pre><m>Universal Declaration of Human Rights</m><post> as a common standard of achievement for all peoples and all nations</post>
+  </test-case>
+
+  <test-case id="test83b" strength="TERTIARY" alternate_handling="SHIFTED" locale="en">
+    <pattern>Universal Declaration of Human Rights</pattern>
+    <pre>Proclaims this </pre>
+    <m>Universal-Declaration-of-Human-Rights</m>
+    <post> as a common standard of achievement for all peoples and all nations</post>
+  </test-case>
+
+  <test-case id="test84" strength="TERTIARY" locale="en">
+    <pattern>\u05E9\u0591\u05E9</pattern>
+    <m>\u05E9\u0592\u05E9</m>
+  </test-case>
+
+  <test-case id="test84b" strength="IDENTICAL" locale="en">
+    <pattern>\u05E9\u0591\u05E9</pattern>
+    <pre>\u05E9\u0592\u05E9</pre>
+  </test-case>
 </stringsearch-tests>