scuffed-code/icu4c/source/i18n/collationsets.cpp

// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
* Copyright (C) 2013-2014, International Business Machines
* Corporation and others.  All Rights Reserved.
*******************************************************************************
* collationsets.cpp
*
* created on: 2013feb09
* created by: Markus W. Scherer
*/

#include "unicode/utypes.h"

#if !UCONFIG_NO_COLLATION

#include "unicode/ucharstrie.h"
#include "unicode/uniset.h"
#include "unicode/unistr.h"
#include "unicode/ustringtrie.h"
#include "collation.h"
#include "collationdata.h"
#include "collationsets.h"
#include "normalizer2impl.h"
#include "uassert.h"
#include "utf16collationiterator.h"
#include "utrie2.h"

U_NAMESPACE_BEGIN

U_CDECL_BEGIN

static UBool U_CALLCONV
enumTailoredRange(const void *context, UChar32 start, UChar32 end, uint32_t ce32) {
    if(ce32 == Collation::FALLBACK_CE32) {
        return TRUE;  // fallback to base, not tailored
    }
    TailoredSet *ts = (TailoredSet *)context;
    return ts->handleCE32(start, end, ce32);
}

U_CDECL_END

void
TailoredSet::forData(const CollationData *d, UErrorCode &ec) {
    if(U_FAILURE(ec)) { return; }
    errorCode = ec;  // Preserve info & warning codes.
    data = d;
    baseData = d->base;
    U_ASSERT(baseData != NULL);
    utrie2_enum(data->trie, NULL, enumTailoredRange, this);
    ec = errorCode;
}

UBool
TailoredSet::handleCE32(UChar32 start, UChar32 end, uint32_t ce32) {
    U_ASSERT(ce32 != Collation::FALLBACK_CE32);
    if(Collation::isSpecialCE32(ce32)) {
        ce32 = data->getIndirectCE32(ce32);
        if(ce32 == Collation::FALLBACK_CE32) {
            return U_SUCCESS(errorCode);
        }
    }
    do {
        uint32_t baseCE32 = baseData->getFinalCE32(baseData->getCE32(start));
        // Do not just continue if ce32 == baseCE32 because
        // contractions and expansions in different data objects
        // normally differ even if they have the same data offsets.
        if(Collation::isSelfContainedCE32(ce32) && Collation::isSelfContainedCE32(baseCE32)) {
            // fastpath
            if(ce32 != baseCE32) {
                tailored->add(start);
            }
        } else {
            compare(start, ce32, baseCE32);
        }
    } while(++start <= end);
    return U_SUCCESS(errorCode);
}

void
TailoredSet::compare(UChar32 c, uint32_t ce32, uint32_t baseCE32) {
    if(Collation::isPrefixCE32(ce32)) {
        const UChar *p = data->contexts + Collation::indexFromCE32(ce32);
        ce32 = data->getFinalCE32(CollationData::readCE32(p));
        if(Collation::isPrefixCE32(baseCE32)) {
            const UChar *q = baseData->contexts + Collation::indexFromCE32(baseCE32);
            baseCE32 = baseData->getFinalCE32(CollationData::readCE32(q));
            comparePrefixes(c, p + 2, q + 2);
        } else {
            addPrefixes(data, c, p + 2);
        }
    } else if(Collation::isPrefixCE32(baseCE32)) {
        const UChar *q = baseData->contexts + Collation::indexFromCE32(baseCE32);
        baseCE32 = baseData->getFinalCE32(CollationData::readCE32(q));
        addPrefixes(baseData, c, q + 2);
    }

    if(Collation::isContractionCE32(ce32)) {
        const UChar *p = data->contexts + Collation::indexFromCE32(ce32);
        if((ce32 & Collation::CONTRACT_SINGLE_CP_NO_MATCH) != 0) {
            ce32 = Collation::NO_CE32;
        } else {
            ce32 = data->getFinalCE32(CollationData::readCE32(p));
        }
        if(Collation::isContractionCE32(baseCE32)) {
            const UChar *q = baseData->contexts + Collation::indexFromCE32(baseCE32);
            if((baseCE32 & Collation::CONTRACT_SINGLE_CP_NO_MATCH) != 0) {
                baseCE32 = Collation::NO_CE32;
            } else {
                baseCE32 = baseData->getFinalCE32(CollationData::readCE32(q));
            }
            compareContractions(c, p + 2, q + 2);
        } else {
            addContractions(c, p + 2);
        }
    } else if(Collation::isContractionCE32(baseCE32)) {
        const UChar *q = baseData->contexts + Collation::indexFromCE32(baseCE32);
        baseCE32 = baseData->getFinalCE32(CollationData::readCE32(q));
        addContractions(c, q + 2);
    }

    int32_t tag;
    if(Collation::isSpecialCE32(ce32)) {
        tag = Collation::tagFromCE32(ce32);
        U_ASSERT(tag != Collation::PREFIX_TAG);
        U_ASSERT(tag != Collation::CONTRACTION_TAG);
        // Currently, the tailoring data builder does not write offset tags.
        // They might be useful for saving space,
        // but they would complicate the builder,
        // and in tailorings we assume that performance of tailored characters is more important.
        U_ASSERT(tag != Collation::OFFSET_TAG);
    } else {
        tag = -1;
    }
    int32_t baseTag;
    if(Collation::isSpecialCE32(baseCE32)) {
        baseTag = Collation::tagFromCE32(baseCE32);
        U_ASSERT(baseTag != Collation::PREFIX_TAG);
        U_ASSERT(baseTag != Collation::CONTRACTION_TAG);
    } else {
        baseTag = -1;
    }

    // Non-contextual mappings, expansions, etc.
    if(baseTag == Collation::OFFSET_TAG) {
        // We might be comparing a tailoring CE which is a copy of
        // a base offset-tag CE, via the [optimize [set]] syntax
        // or when a single-character mapping was copied for tailored contractions.
        // Offset tags always result in long-primary CEs,
        // with common secondary/tertiary weights.
        if(!Collation::isLongPrimaryCE32(ce32)) {
            add(c);
            return;
        }
        int64_t dataCE = baseData->ces[Collation::indexFromCE32(baseCE32)];
        uint32_t p = Collation::getThreeBytePrimaryForOffsetData(c, dataCE);
        if(Collation::primaryFromLongPrimaryCE32(ce32) != p) {
            add(c);
            return;
        }
    }

    if(tag != baseTag) {
        add(c);
        return;
    }

    if(tag == Collation::EXPANSION32_TAG) {
        const uint32_t *ce32s = data->ce32s + Collation::indexFromCE32(ce32);
        int32_t length = Collation::lengthFromCE32(ce32);

        const uint32_t *baseCE32s = baseData->ce32s + Collation::indexFromCE32(baseCE32);
        int32_t baseLength = Collation::lengthFromCE32(baseCE32);

        if(length != baseLength) {
            add(c);
            return;
        }
        for(int32_t i = 0; i < length; ++i) {
            if(ce32s[i] != baseCE32s[i]) {
                add(c);
                break;
            }
        }
    } else if(tag == Collation::EXPANSION_TAG) {
        const int64_t *ces = data->ces + Collation::indexFromCE32(ce32);
        int32_t length = Collation::lengthFromCE32(ce32);

        const int64_t *baseCEs = baseData->ces + Collation::indexFromCE32(baseCE32);
        int32_t baseLength = Collation::lengthFromCE32(baseCE32);

        if(length != baseLength) {
            add(c);
            return;
        }
        for(int32_t i = 0; i < length; ++i) {
            if(ces[i] != baseCEs[i]) {
                add(c);
                break;
            }
        }
    } else if(tag == Collation::HANGUL_TAG) {
        UChar jamos[3];
        int32_t length = Hangul::decompose(c, jamos);
        if(tailored->contains(jamos[0]) || tailored->contains(jamos[1]) ||
                (length == 3 && tailored->contains(jamos[2]))) {
            add(c);
        }
    } else if(ce32 != baseCE32) {
        add(c);
    }
}

void
TailoredSet::comparePrefixes(UChar32 c, const UChar *p, const UChar *q) {
    // Parallel iteration over prefixes of both tables.
    UCharsTrie::Iterator prefixes(p, 0, errorCode);
    UCharsTrie::Iterator basePrefixes(q, 0, errorCode);
    const UnicodeString *tp = NULL;  // Tailoring prefix.
    const UnicodeString *bp = NULL;  // Base prefix.
    // Use a string with a U+FFFF as the limit sentinel.
    // U+FFFF is untailorable and will not occur in prefixes.
    UnicodeString none((UChar)0xffff);
    for(;;) {
        if(tp == NULL) {
            if(prefixes.next(errorCode)) {
                tp = &prefixes.getString();
            } else {
                tp = &none;
            }
        }
        if(bp == NULL) {
            if(basePrefixes.next(errorCode)) {
                bp = &basePrefixes.getString();
            } else {
                bp = &none;
            }
        }
        if(tp == &none && bp == &none) { break; }
        int32_t cmp = tp->compare(*bp);
        if(cmp < 0) {
            // tp occurs in the tailoring but not in the base.
            addPrefix(data, *tp, c, (uint32_t)prefixes.getValue());
            tp = NULL;
        } else if(cmp > 0) {
            // bp occurs in the base but not in the tailoring.
            addPrefix(baseData, *bp, c, (uint32_t)basePrefixes.getValue());
            bp = NULL;
        } else {
            setPrefix(*tp);
            compare(c, (uint32_t)prefixes.getValue(), (uint32_t)basePrefixes.getValue());
            resetPrefix();
            tp = NULL;
            bp = NULL;
        }
    }
}

void
TailoredSet::compareContractions(UChar32 c, const UChar *p, const UChar *q) {
    // Parallel iteration over suffixes of both tables.
    UCharsTrie::Iterator suffixes(p, 0, errorCode);
    UCharsTrie::Iterator baseSuffixes(q, 0, errorCode);
    const UnicodeString *ts = NULL;  // Tailoring suffix.
    const UnicodeString *bs = NULL;  // Base suffix.
    // Use a string with two U+FFFF as the limit sentinel.
    // U+FFFF is untailorable and will not occur in contractions except maybe
    // as a single suffix character for a root-collator boundary contraction.
    UnicodeString none((UChar)0xffff);
    none.append((UChar)0xffff);
    for(;;) {
        if(ts == NULL) {
            if(suffixes.next(errorCode)) {
                ts = &suffixes.getString();
            } else {
                ts = &none;
            }
        }
        if(bs == NULL) {
            if(baseSuffixes.next(errorCode)) {
                bs = &baseSuffixes.getString();
            } else {
                bs = &none;
            }
        }
        if(ts == &none && bs == &none) { break; }
        int32_t cmp = ts->compare(*bs);
        if(cmp < 0) {
            // ts occurs in the tailoring but not in the base.
            addSuffix(c, *ts);
            ts = NULL;
        } else if(cmp > 0) {
            // bs occurs in the base but not in the tailoring.
            addSuffix(c, *bs);
            bs = NULL;
        } else {
            suffix = ts;
            compare(c, (uint32_t)suffixes.getValue(), (uint32_t)baseSuffixes.getValue());
            suffix = NULL;
            ts = NULL;
            bs = NULL;
        }
    }
}

void
TailoredSet::addPrefixes(const CollationData *d, UChar32 c, const UChar *p) {
    UCharsTrie::Iterator prefixes(p, 0, errorCode);
    while(prefixes.next(errorCode)) {
        addPrefix(d, prefixes.getString(), c, (uint32_t)prefixes.getValue());
    }
}

void
TailoredSet::addPrefix(const CollationData *d, const UnicodeString &pfx, UChar32 c, uint32_t ce32) {
    setPrefix(pfx);
    ce32 = d->getFinalCE32(ce32);
    if(Collation::isContractionCE32(ce32)) {
        const UChar *p = d->contexts + Collation::indexFromCE32(ce32);
        addContractions(c, p + 2);
    }
    tailored->add(UnicodeString(unreversedPrefix).append(c));
    resetPrefix();
}

void
TailoredSet::addContractions(UChar32 c, const UChar *p) {
    UCharsTrie::Iterator suffixes(p, 0, errorCode);
    while(suffixes.next(errorCode)) {
        addSuffix(c, suffixes.getString());
    }
}

void
TailoredSet::addSuffix(UChar32 c, const UnicodeString &sfx) {
    tailored->add(UnicodeString(unreversedPrefix).append(c).append(sfx));
}

void
TailoredSet::add(UChar32 c) {
    if(unreversedPrefix.isEmpty() && suffix == NULL) {
        tailored->add(c);
    } else {
        UnicodeString s(unreversedPrefix);
        s.append(c);
        if(suffix != NULL) {
            s.append(*suffix);
        }
        tailored->add(s);
    }
}

ContractionsAndExpansions::CESink::~CESink() {}

U_CDECL_BEGIN

static UBool U_CALLCONV
enumCnERange(const void *context, UChar32 start, UChar32 end, uint32_t ce32) {
    ContractionsAndExpansions *cne = (ContractionsAndExpansions *)context;
    if(cne->checkTailored == 0) {
        // There is no tailoring.
        // No need to collect nor check the tailored set.
    } else if(cne->checkTailored < 0) {
        // Collect the set of code points with mappings in the tailoring data.
        if(ce32 == Collation::FALLBACK_CE32) {
            return TRUE;  // fallback to base, not tailored
        } else {
            cne->tailored.add(start, end);
        }
        // checkTailored > 0: Exclude tailored ranges from the base data enumeration.
    } else if(start == end) {
        if(cne->tailored.contains(start)) {
            return TRUE;
        }
    } else if(cne->tailored.containsSome(start, end)) {
        cne->ranges.set(start, end).removeAll(cne->tailored);
        int32_t count = cne->ranges.getRangeCount();
        for(int32_t i = 0; i < count; ++i) {
            cne->handleCE32(cne->ranges.getRangeStart(i), cne->ranges.getRangeEnd(i), ce32);
        }
        return U_SUCCESS(cne->errorCode);
    }
    cne->handleCE32(start, end, ce32);
    return U_SUCCESS(cne->errorCode);
}

U_CDECL_END

void
ContractionsAndExpansions::forData(const CollationData *d, UErrorCode &ec) {
    if(U_FAILURE(ec)) { return; }
    errorCode = ec;  // Preserve info & warning codes.
    // Add all from the data, can be tailoring or base.
    if(d->base != NULL) {
        checkTailored = -1;
    }
    data = d;
    utrie2_enum(data->trie, NULL, enumCnERange, this);
    if(d->base == NULL || U_FAILURE(errorCode)) {
        ec = errorCode;
        return;
    }
    // Add all from the base data but only for un-tailored code points.
    tailored.freeze();
    checkTailored = 1;
    data = d->base;
    utrie2_enum(data->trie, NULL, enumCnERange, this);
    ec = errorCode;
}

void
ContractionsAndExpansions::forCodePoint(const CollationData *d, UChar32 c, UErrorCode &ec) {
    if(U_FAILURE(ec)) { return; }
    errorCode = ec;  // Preserve info & warning codes.
    uint32_t ce32 = d->getCE32(c);
    if(ce32 == Collation::FALLBACK_CE32) {
        d = d->base;
        ce32 = d->getCE32(c);
    }
    data = d;
    handleCE32(c, c, ce32);
    ec = errorCode;
}

void
ContractionsAndExpansions::handleCE32(UChar32 start, UChar32 end, uint32_t ce32) {
    for(;;) {
        if((ce32 & 0xff) < Collation::SPECIAL_CE32_LOW_BYTE) {
            // !isSpecialCE32()
            if(sink != NULL) {
                sink->handleCE(Collation::ceFromSimpleCE32(ce32));
            }
            return;
        }
        switch(Collation::tagFromCE32(ce32)) {
        case Collation::FALLBACK_TAG:
            return;
        case Collation::RESERVED_TAG_3:
        case Collation::BUILDER_DATA_TAG:
        case Collation::LEAD_SURROGATE_TAG:
            if(U_SUCCESS(errorCode)) { errorCode = U_INTERNAL_PROGRAM_ERROR; }
            return;
        case Collation::LONG_PRIMARY_TAG:
            if(sink != NULL) {
                sink->handleCE(Collation::ceFromLongPrimaryCE32(ce32));
            }
            return;
        case Collation::LONG_SECONDARY_TAG:
            if(sink != NULL) {
                sink->handleCE(Collation::ceFromLongSecondaryCE32(ce32));
            }
            return;
        case Collation::LATIN_EXPANSION_TAG:
            if(sink != NULL) {
                ces[0] = Collation::latinCE0FromCE32(ce32);
                ces[1] = Collation::latinCE1FromCE32(ce32);
                sink->handleExpansion(ces, 2);
            }
            // Optimization: If we have a prefix,
            // then the relevant strings have been added already.
            if(unreversedPrefix.isEmpty()) {
                addExpansions(start, end);
            }
            return;
        case Collation::EXPANSION32_TAG:
            if(sink != NULL) {
                const uint32_t *ce32s = data->ce32s + Collation::indexFromCE32(ce32);
                int32_t length = Collation::lengthFromCE32(ce32);
                for(int32_t i = 0; i < length; ++i) {
                    ces[i] = Collation::ceFromCE32(*ce32s++);
                }
                sink->handleExpansion(ces, length);
            }
            // Optimization: If we have a prefix,
            // then the relevant strings have been added already.
            if(unreversedPrefix.isEmpty()) {
                addExpansions(start, end);
            }
            return;
        case Collation::EXPANSION_TAG:
            if(sink != NULL) {
                int32_t length = Collation::lengthFromCE32(ce32);
                sink->handleExpansion(data->ces + Collation::indexFromCE32(ce32), length);
            }
            // Optimization: If we have a prefix,
            // then the relevant strings have been added already.
            if(unreversedPrefix.isEmpty()) {
                addExpansions(start, end);
            }
            return;
        case Collation::PREFIX_TAG:
            handlePrefixes(start, end, ce32);
            return;
        case Collation::CONTRACTION_TAG:
            handleContractions(start, end, ce32);
            return;
        case Collation::DIGIT_TAG:
            // Fetch the non-numeric-collation CE32 and continue.
            ce32 = data->ce32s[Collation::indexFromCE32(ce32)];
            break;
        case Collation::U0000_TAG:
            U_ASSERT(start == 0 && end == 0);
            // Fetch the normal ce32 for U+0000 and continue.
            ce32 = data->ce32s[0];
            break;
        case Collation::HANGUL_TAG:
            if(sink != NULL) {
                // TODO: This should be optimized,
                // especially if [start..end] is the complete Hangul range. (assert that)
                UTF16CollationIterator iter(data, FALSE, NULL, NULL, NULL);
                UChar hangul[1] = { 0 };
                for(UChar32 c = start; c <= end; ++c) {
                    hangul[0] = (UChar)c;
                    iter.setText(hangul, hangul + 1);
                    int32_t length = iter.fetchCEs(errorCode);
                    if(U_FAILURE(errorCode)) { return; }
                    // Ignore the terminating non-CE.
                    U_ASSERT(length >= 2 && iter.getCE(length - 1) == Collation::NO_CE);
                    sink->handleExpansion(iter.getCEs(), length - 1);
                }
            }
            // Optimization: If we have a prefix,
            // then the relevant strings have been added already.
            if(unreversedPrefix.isEmpty()) {
                addExpansions(start, end);
            }
            return;
        case Collation::OFFSET_TAG:
            // Currently no need to send offset CEs to the sink.
            return;
        case Collation::IMPLICIT_TAG:
            // Currently no need to send implicit CEs to the sink.
            return;
        }
    }
}

void
ContractionsAndExpansions::handlePrefixes(
        UChar32 start, UChar32 end, uint32_t ce32) {
    const UChar *p = data->contexts + Collation::indexFromCE32(ce32);
    ce32 = CollationData::readCE32(p);  // Default if no prefix match.
    handleCE32(start, end, ce32);
    if(!addPrefixes) { return; }
    UCharsTrie::Iterator prefixes(p + 2, 0, errorCode);
    while(prefixes.next(errorCode)) {
        setPrefix(prefixes.getString());
        // Prefix/pre-context mappings are special kinds of contractions
        // that always yield expansions.
        addStrings(start, end, contractions);
        addStrings(start, end, expansions);
        handleCE32(start, end, (uint32_t)prefixes.getValue());
    }
    resetPrefix();
}

void
ContractionsAndExpansions::handleContractions(
        UChar32 start, UChar32 end, uint32_t ce32) {
    const UChar *p = data->contexts + Collation::indexFromCE32(ce32);
    if((ce32 & Collation::CONTRACT_SINGLE_CP_NO_MATCH) != 0) {
        // No match on the single code point.
        // We are underneath a prefix, and the default mapping is just
        // a fallback to the mappings for a shorter prefix.
        U_ASSERT(!unreversedPrefix.isEmpty());
    } else {
        ce32 = CollationData::readCE32(p);  // Default if no suffix match.
        U_ASSERT(!Collation::isContractionCE32(ce32));
        handleCE32(start, end, ce32);
    }
    UCharsTrie::Iterator suffixes(p + 2, 0, errorCode);
    while(suffixes.next(errorCode)) {
        suffix = &suffixes.getString();
        addStrings(start, end, contractions);
        if(!unreversedPrefix.isEmpty()) {
            addStrings(start, end, expansions);
        }
        handleCE32(start, end, (uint32_t)suffixes.getValue());
    }
    suffix = NULL;
}

void
ContractionsAndExpansions::addExpansions(UChar32 start, UChar32 end) {
    if(unreversedPrefix.isEmpty() && suffix == NULL) {
        if(expansions != NULL) {
            expansions->add(start, end);
        }
    } else {
        addStrings(start, end, expansions);
    }
}

void
ContractionsAndExpansions::addStrings(UChar32 start, UChar32 end, UnicodeSet *set) {
    if(set == NULL) { return; }
    UnicodeString s(unreversedPrefix);
    do {
        s.append(start);
        if(suffix != NULL) {
            s.append(*suffix);
        }
        set->add(s);
        s.truncate(unreversedPrefix.length());
    } while(++start <= end);
}

U_NAMESPACE_END

#endif  // !UCONFIG_NO_COLLATION
ICU-12764 icu4c utf-8 source files, update Copyright notices. X-SVN-Rev: 39583 2017-01-20 00:20:31 +00:00			`// © 2016 and later: Unicode, Inc. and others.`
ICU-12564 Update copyright notice in trunk X-SVN-Rev: 38848 2016-06-15 18:58:17 +00:00			`// License & terms of use: http://www.unicode.org/copyright.html`
ICU-9101 merge branches/markus/collv2@35225 into the trunk X-SVN-Rev: 35227 2014-02-25 21:21:49 +00:00			`/*`
			`*******************************************************************************`
ICU-12564 Reverted r38761 and r38762, because we want to prepend the Unicode copyright for existing source files, instead of replacing copyright comments. X-SVN-Rev: 38776 2016-05-31 21:45:07 +00:00			`* Copyright (C) 2013-2014, International Business Machines`
			`* Corporation and others. All Rights Reserved.`
ICU-9101 merge branches/markus/collv2@35225 into the trunk X-SVN-Rev: 35227 2014-02-25 21:21:49 +00:00			`*******************************************************************************`
			`* collationsets.cpp`
			`*`
			`* created on: 2013feb09`
			`* created by: Markus W. Scherer`
			`*/`

			`#include "unicode/utypes.h"`

			`#if !UCONFIG_NO_COLLATION`

			`#include "unicode/ucharstrie.h"`
			`#include "unicode/uniset.h"`
			`#include "unicode/unistr.h"`
			`#include "unicode/ustringtrie.h"`
			`#include "collation.h"`
			`#include "collationdata.h"`
			`#include "collationsets.h"`
			`#include "normalizer2impl.h"`
			`#include "uassert.h"`
			`#include "utf16collationiterator.h"`
			`#include "utrie2.h"`

			`U_NAMESPACE_BEGIN`

			`U_CDECL_BEGIN`

			`static UBool U_CALLCONV`
			`enumTailoredRange(const void *context, UChar32 start, UChar32 end, uint32_t ce32) {`
			`if(ce32 == Collation::FALLBACK_CE32) {`
			`return TRUE; // fallback to base, not tailored`
			`}`
			`TailoredSet ts = (TailoredSet )context;`
			`return ts->handleCE32(start, end, ce32);`
			`}`

			`U_CDECL_END`

			`void`
			`TailoredSet::forData(const CollationData *d, UErrorCode &ec) {`
			`if(U_FAILURE(ec)) { return; }`
			`errorCode = ec; // Preserve info & warning codes.`
			`data = d;`
			`baseData = d->base;`
			`U_ASSERT(baseData != NULL);`
			`utrie2_enum(data->trie, NULL, enumTailoredRange, this);`
			`ec = errorCode;`
			`}`

			`UBool`
			`TailoredSet::handleCE32(UChar32 start, UChar32 end, uint32_t ce32) {`
			`U_ASSERT(ce32 != Collation::FALLBACK_CE32);`
			`if(Collation::isSpecialCE32(ce32)) {`
			`ce32 = data->getIndirectCE32(ce32);`
			`if(ce32 == Collation::FALLBACK_CE32) {`
			`return U_SUCCESS(errorCode);`
			`}`
			`}`
			`do {`
			`uint32_t baseCE32 = baseData->getFinalCE32(baseData->getCE32(start));`
			`// Do not just continue if ce32 == baseCE32 because`
			`// contractions and expansions in different data objects`
			`// normally differ even if they have the same data offsets.`
			`if(Collation::isSelfContainedCE32(ce32) && Collation::isSelfContainedCE32(baseCE32)) {`
			`// fastpath`
			`if(ce32 != baseCE32) {`
			`tailored->add(start);`
			`}`
			`} else {`
			`compare(start, ce32, baseCE32);`
			`}`
			`} while(++start <= end);`
			`return U_SUCCESS(errorCode);`
			`}`

			`void`
			`TailoredSet::compare(UChar32 c, uint32_t ce32, uint32_t baseCE32) {`
			`if(Collation::isPrefixCE32(ce32)) {`
			`const UChar *p = data->contexts + Collation::indexFromCE32(ce32);`
			`ce32 = data->getFinalCE32(CollationData::readCE32(p));`
			`if(Collation::isPrefixCE32(baseCE32)) {`
			`const UChar *q = baseData->contexts + Collation::indexFromCE32(baseCE32);`
			`baseCE32 = baseData->getFinalCE32(CollationData::readCE32(q));`
			`comparePrefixes(c, p + 2, q + 2);`
			`} else {`
			`addPrefixes(data, c, p + 2);`
			`}`
			`} else if(Collation::isPrefixCE32(baseCE32)) {`
			`const UChar *q = baseData->contexts + Collation::indexFromCE32(baseCE32);`
			`baseCE32 = baseData->getFinalCE32(CollationData::readCE32(q));`
			`addPrefixes(baseData, c, q + 2);`
			`}`

			`if(Collation::isContractionCE32(ce32)) {`
			`const UChar *p = data->contexts + Collation::indexFromCE32(ce32);`
			`if((ce32 & Collation::CONTRACT_SINGLE_CP_NO_MATCH) != 0) {`
			`ce32 = Collation::NO_CE32;`
			`} else {`
			`ce32 = data->getFinalCE32(CollationData::readCE32(p));`
			`}`
			`if(Collation::isContractionCE32(baseCE32)) {`
			`const UChar *q = baseData->contexts + Collation::indexFromCE32(baseCE32);`
			`if((baseCE32 & Collation::CONTRACT_SINGLE_CP_NO_MATCH) != 0) {`
			`baseCE32 = Collation::NO_CE32;`
			`} else {`
			`baseCE32 = baseData->getFinalCE32(CollationData::readCE32(q));`
			`}`
			`compareContractions(c, p + 2, q + 2);`
			`} else {`
			`addContractions(c, p + 2);`
			`}`
			`} else if(Collation::isContractionCE32(baseCE32)) {`
			`const UChar *q = baseData->contexts + Collation::indexFromCE32(baseCE32);`
			`baseCE32 = baseData->getFinalCE32(CollationData::readCE32(q));`
			`addContractions(c, q + 2);`
			`}`

			`int32_t tag;`
			`if(Collation::isSpecialCE32(ce32)) {`
			`tag = Collation::tagFromCE32(ce32);`
			`U_ASSERT(tag != Collation::PREFIX_TAG);`
			`U_ASSERT(tag != Collation::CONTRACTION_TAG);`
			`// Currently, the tailoring data builder does not write offset tags.`
			`// They might be useful for saving space,`
			`// but they would complicate the builder,`
			`// and in tailorings we assume that performance of tailored characters is more important.`
			`U_ASSERT(tag != Collation::OFFSET_TAG);`
			`} else {`
			`tag = -1;`
			`}`
			`int32_t baseTag;`
			`if(Collation::isSpecialCE32(baseCE32)) {`
			`baseTag = Collation::tagFromCE32(baseCE32);`
			`U_ASSERT(baseTag != Collation::PREFIX_TAG);`
			`U_ASSERT(baseTag != Collation::CONTRACTION_TAG);`
			`} else {`
			`baseTag = -1;`
			`}`

			`// Non-contextual mappings, expansions, etc.`
			`if(baseTag == Collation::OFFSET_TAG) {`
			`// We might be comparing a tailoring CE which is a copy of`
			`// a base offset-tag CE, via the [optimize [set]] syntax`
			`// or when a single-character mapping was copied for tailored contractions.`
			`// Offset tags always result in long-primary CEs,`
			`// with common secondary/tertiary weights.`
			`if(!Collation::isLongPrimaryCE32(ce32)) {`
			`add(c);`
			`return;`
			`}`
			`int64_t dataCE = baseData->ces[Collation::indexFromCE32(baseCE32)];`
			`uint32_t p = Collation::getThreeBytePrimaryForOffsetData(c, dataCE);`
			`if(Collation::primaryFromLongPrimaryCE32(ce32) != p) {`
			`add(c);`
			`return;`
			`}`
			`}`

			`if(tag != baseTag) {`
			`add(c);`
			`return;`
			`}`

			`if(tag == Collation::EXPANSION32_TAG) {`
			`const uint32_t *ce32s = data->ce32s + Collation::indexFromCE32(ce32);`
			`int32_t length = Collation::lengthFromCE32(ce32);`

			`const uint32_t *baseCE32s = baseData->ce32s + Collation::indexFromCE32(baseCE32);`
			`int32_t baseLength = Collation::lengthFromCE32(baseCE32);`

			`if(length != baseLength) {`
			`add(c);`
			`return;`
			`}`
			`for(int32_t i = 0; i < length; ++i) {`
			`if(ce32s[i] != baseCE32s[i]) {`
			`add(c);`
			`break;`
			`}`
			`}`
			`} else if(tag == Collation::EXPANSION_TAG) {`
			`const int64_t *ces = data->ces + Collation::indexFromCE32(ce32);`
			`int32_t length = Collation::lengthFromCE32(ce32);`

			`const int64_t *baseCEs = baseData->ces + Collation::indexFromCE32(baseCE32);`
			`int32_t baseLength = Collation::lengthFromCE32(baseCE32);`

			`if(length != baseLength) {`
			`add(c);`
			`return;`
			`}`
			`for(int32_t i = 0; i < length; ++i) {`
			`if(ces[i] != baseCEs[i]) {`
			`add(c);`
			`break;`
			`}`
			`}`
			`} else if(tag == Collation::HANGUL_TAG) {`
			`UChar jamos[3];`
			`int32_t length = Hangul::decompose(c, jamos);`
			`if(tailored->contains(jamos[0]) \|\| tailored->contains(jamos[1]) \|\|`
			`(length == 3 && tailored->contains(jamos[2]))) {`
			`add(c);`
			`}`
			`} else if(ce32 != baseCE32) {`
			`add(c);`
			`}`
			`}`

			`void`
			`TailoredSet::comparePrefixes(UChar32 c, const UChar p, const UChar q) {`
			`// Parallel iteration over prefixes of both tables.`
			`UCharsTrie::Iterator prefixes(p, 0, errorCode);`
			`UCharsTrie::Iterator basePrefixes(q, 0, errorCode);`
			`const UnicodeString *tp = NULL; // Tailoring prefix.`
			`const UnicodeString *bp = NULL; // Base prefix.`
			`// Use a string with a U+FFFF as the limit sentinel.`
			`// U+FFFF is untailorable and will not occur in prefixes.`
			`UnicodeString none((UChar)0xffff);`
			`for(;;) {`
			`if(tp == NULL) {`
			`if(prefixes.next(errorCode)) {`
			`tp = &prefixes.getString();`
			`} else {`
			`tp = &none;`
			`}`
			`}`
			`if(bp == NULL) {`
			`if(basePrefixes.next(errorCode)) {`
			`bp = &basePrefixes.getString();`
			`} else {`
			`bp = &none;`
			`}`
			`}`
			`if(tp == &none && bp == &none) { break; }`
			`int32_t cmp = tp->compare(*bp);`
			`if(cmp < 0) {`
			`// tp occurs in the tailoring but not in the base.`
			`addPrefix(data, *tp, c, (uint32_t)prefixes.getValue());`
			`tp = NULL;`
			`} else if(cmp > 0) {`
			`// bp occurs in the base but not in the tailoring.`
			`addPrefix(baseData, *bp, c, (uint32_t)basePrefixes.getValue());`
			`bp = NULL;`
			`} else {`
			`setPrefix(*tp);`
			`compare(c, (uint32_t)prefixes.getValue(), (uint32_t)basePrefixes.getValue());`
			`resetPrefix();`
			`tp = NULL;`
			`bp = NULL;`
			`}`
			`}`
			`}`

			`void`
			`TailoredSet::compareContractions(UChar32 c, const UChar p, const UChar q) {`
			`// Parallel iteration over suffixes of both tables.`
			`UCharsTrie::Iterator suffixes(p, 0, errorCode);`
			`UCharsTrie::Iterator baseSuffixes(q, 0, errorCode);`
			`const UnicodeString *ts = NULL; // Tailoring suffix.`
			`const UnicodeString *bs = NULL; // Base suffix.`
			`// Use a string with two U+FFFF as the limit sentinel.`
			`// U+FFFF is untailorable and will not occur in contractions except maybe`
			`// as a single suffix character for a root-collator boundary contraction.`
			`UnicodeString none((UChar)0xffff);`
			`none.append((UChar)0xffff);`
			`for(;;) {`
			`if(ts == NULL) {`
			`if(suffixes.next(errorCode)) {`
			`ts = &suffixes.getString();`
			`} else {`
			`ts = &none;`
			`}`
			`}`
			`if(bs == NULL) {`
			`if(baseSuffixes.next(errorCode)) {`
			`bs = &baseSuffixes.getString();`
			`} else {`
			`bs = &none;`
			`}`
			`}`
			`if(ts == &none && bs == &none) { break; }`
			`int32_t cmp = ts->compare(*bs);`
			`if(cmp < 0) {`
			`// ts occurs in the tailoring but not in the base.`
			`addSuffix(c, *ts);`
			`ts = NULL;`
			`} else if(cmp > 0) {`
			`// bs occurs in the base but not in the tailoring.`
			`addSuffix(c, *bs);`
			`bs = NULL;`
			`} else {`
			`suffix = ts;`
			`compare(c, (uint32_t)suffixes.getValue(), (uint32_t)baseSuffixes.getValue());`
			`suffix = NULL;`
			`ts = NULL;`
			`bs = NULL;`
			`}`
			`}`
			`}`

			`void`
			`TailoredSet::addPrefixes(const CollationData d, UChar32 c, const UChar p) {`
			`UCharsTrie::Iterator prefixes(p, 0, errorCode);`
			`while(prefixes.next(errorCode)) {`
			`addPrefix(d, prefixes.getString(), c, (uint32_t)prefixes.getValue());`
			`}`
			`}`

			`void`
			`TailoredSet::addPrefix(const CollationData *d, const UnicodeString &pfx, UChar32 c, uint32_t ce32) {`
			`setPrefix(pfx);`
			`ce32 = d->getFinalCE32(ce32);`
			`if(Collation::isContractionCE32(ce32)) {`
			`const UChar *p = d->contexts + Collation::indexFromCE32(ce32);`
			`addContractions(c, p + 2);`
			`}`
			`tailored->add(UnicodeString(unreversedPrefix).append(c));`
			`resetPrefix();`
			`}`

			`void`
			`TailoredSet::addContractions(UChar32 c, const UChar *p) {`
			`UCharsTrie::Iterator suffixes(p, 0, errorCode);`
			`while(suffixes.next(errorCode)) {`
			`addSuffix(c, suffixes.getString());`
			`}`
			`}`

			`void`
			`TailoredSet::addSuffix(UChar32 c, const UnicodeString &sfx) {`
			`tailored->add(UnicodeString(unreversedPrefix).append(c).append(sfx));`
			`}`

			`void`
			`TailoredSet::add(UChar32 c) {`
			`if(unreversedPrefix.isEmpty() && suffix == NULL) {`
			`tailored->add(c);`
			`} else {`
			`UnicodeString s(unreversedPrefix);`
			`s.append(c);`
			`if(suffix != NULL) {`
			`s.append(*suffix);`
			`}`
			`tailored->add(s);`
			`}`
			`}`

			`ContractionsAndExpansions::CESink::~CESink() {}`

			`U_CDECL_BEGIN`

			`static UBool U_CALLCONV`
			`enumCnERange(const void *context, UChar32 start, UChar32 end, uint32_t ce32) {`
			`ContractionsAndExpansions cne = (ContractionsAndExpansions )context;`
			`if(cne->checkTailored == 0) {`
			`// There is no tailoring.`
			`// No need to collect nor check the tailored set.`
			`} else if(cne->checkTailored < 0) {`
			`// Collect the set of code points with mappings in the tailoring data.`
			`if(ce32 == Collation::FALLBACK_CE32) {`
			`return TRUE; // fallback to base, not tailored`
			`} else {`
			`cne->tailored.add(start, end);`
			`}`
			`// checkTailored > 0: Exclude tailored ranges from the base data enumeration.`
			`} else if(start == end) {`
			`if(cne->tailored.contains(start)) {`
			`return TRUE;`
			`}`
			`} else if(cne->tailored.containsSome(start, end)) {`
			`cne->ranges.set(start, end).removeAll(cne->tailored);`
			`int32_t count = cne->ranges.getRangeCount();`
			`for(int32_t i = 0; i < count; ++i) {`
			`cne->handleCE32(cne->ranges.getRangeStart(i), cne->ranges.getRangeEnd(i), ce32);`
			`}`
			`return U_SUCCESS(cne->errorCode);`
			`}`
			`cne->handleCE32(start, end, ce32);`
			`return U_SUCCESS(cne->errorCode);`
			`}`

			`U_CDECL_END`

			`void`
			`ContractionsAndExpansions::forData(const CollationData *d, UErrorCode &ec) {`
			`if(U_FAILURE(ec)) { return; }`
			`errorCode = ec; // Preserve info & warning codes.`
			`// Add all from the data, can be tailoring or base.`
			`if(d->base != NULL) {`
			`checkTailored = -1;`
			`}`
			`data = d;`
			`utrie2_enum(data->trie, NULL, enumCnERange, this);`
			`if(d->base == NULL \|\| U_FAILURE(errorCode)) {`
			`ec = errorCode;`
			`return;`
			`}`
			`// Add all from the base data but only for un-tailored code points.`
			`tailored.freeze();`
			`checkTailored = 1;`
			`data = d->base;`
			`utrie2_enum(data->trie, NULL, enumCnERange, this);`
			`ec = errorCode;`
			`}`

			`void`
			`ContractionsAndExpansions::forCodePoint(const CollationData *d, UChar32 c, UErrorCode &ec) {`
			`if(U_FAILURE(ec)) { return; }`
			`errorCode = ec; // Preserve info & warning codes.`
			`uint32_t ce32 = d->getCE32(c);`
			`if(ce32 == Collation::FALLBACK_CE32) {`
			`d = d->base;`
			`ce32 = d->getCE32(c);`
			`}`
			`data = d;`
			`handleCE32(c, c, ce32);`
			`ec = errorCode;`
			`}`

			`void`
			`ContractionsAndExpansions::handleCE32(UChar32 start, UChar32 end, uint32_t ce32) {`
			`for(;;) {`
			`if((ce32 & 0xff) < Collation::SPECIAL_CE32_LOW_BYTE) {`
			`// !isSpecialCE32()`
			`if(sink != NULL) {`
			`sink->handleCE(Collation::ceFromSimpleCE32(ce32));`
			`}`
			`return;`
			`}`
			`switch(Collation::tagFromCE32(ce32)) {`
			`case Collation::FALLBACK_TAG:`
			`return;`
			`case Collation::RESERVED_TAG_3:`
			`case Collation::BUILDER_DATA_TAG:`
			`case Collation::LEAD_SURROGATE_TAG:`
			`if(U_SUCCESS(errorCode)) { errorCode = U_INTERNAL_PROGRAM_ERROR; }`
			`return;`
			`case Collation::LONG_PRIMARY_TAG:`
			`if(sink != NULL) {`
			`sink->handleCE(Collation::ceFromLongPrimaryCE32(ce32));`
			`}`
			`return;`
			`case Collation::LONG_SECONDARY_TAG:`
			`if(sink != NULL) {`
			`sink->handleCE(Collation::ceFromLongSecondaryCE32(ce32));`
			`}`
			`return;`
			`case Collation::LATIN_EXPANSION_TAG:`
			`if(sink != NULL) {`
			`ces[0] = Collation::latinCE0FromCE32(ce32);`
			`ces[1] = Collation::latinCE1FromCE32(ce32);`
			`sink->handleExpansion(ces, 2);`
			`}`
			`// Optimization: If we have a prefix,`
			`// then the relevant strings have been added already.`
			`if(unreversedPrefix.isEmpty()) {`
			`addExpansions(start, end);`
			`}`
			`return;`
			`case Collation::EXPANSION32_TAG:`
			`if(sink != NULL) {`
			`const uint32_t *ce32s = data->ce32s + Collation::indexFromCE32(ce32);`
			`int32_t length = Collation::lengthFromCE32(ce32);`
			`for(int32_t i = 0; i < length; ++i) {`
			`ces[i] = Collation::ceFromCE32(*ce32s++);`
			`}`
			`sink->handleExpansion(ces, length);`
			`}`
			`// Optimization: If we have a prefix,`
			`// then the relevant strings have been added already.`
			`if(unreversedPrefix.isEmpty()) {`
			`addExpansions(start, end);`
			`}`
			`return;`
			`case Collation::EXPANSION_TAG:`
			`if(sink != NULL) {`
			`int32_t length = Collation::lengthFromCE32(ce32);`
			`sink->handleExpansion(data->ces + Collation::indexFromCE32(ce32), length);`
			`}`
			`// Optimization: If we have a prefix,`
			`// then the relevant strings have been added already.`
			`if(unreversedPrefix.isEmpty()) {`
			`addExpansions(start, end);`
			`}`
			`return;`
			`case Collation::PREFIX_TAG:`
			`handlePrefixes(start, end, ce32);`
			`return;`
			`case Collation::CONTRACTION_TAG:`
			`handleContractions(start, end, ce32);`
			`return;`
			`case Collation::DIGIT_TAG:`
			`// Fetch the non-numeric-collation CE32 and continue.`
			`ce32 = data->ce32s[Collation::indexFromCE32(ce32)];`
			`break;`
			`case Collation::U0000_TAG:`
			`U_ASSERT(start == 0 && end == 0);`
			`// Fetch the normal ce32 for U+0000 and continue.`
			`ce32 = data->ce32s[0];`
			`break;`
			`case Collation::HANGUL_TAG:`
			`if(sink != NULL) {`
			`// TODO: This should be optimized,`
			`// especially if [start..end] is the complete Hangul range. (assert that)`
			`UTF16CollationIterator iter(data, FALSE, NULL, NULL, NULL);`
			`UChar hangul[1] = { 0 };`
			`for(UChar32 c = start; c <= end; ++c) {`
			`hangul[0] = (UChar)c;`
			`iter.setText(hangul, hangul + 1);`
			`int32_t length = iter.fetchCEs(errorCode);`
			`if(U_FAILURE(errorCode)) { return; }`
			`// Ignore the terminating non-CE.`
			`U_ASSERT(length >= 2 && iter.getCE(length - 1) == Collation::NO_CE);`
			`sink->handleExpansion(iter.getCEs(), length - 1);`
			`}`
			`}`
			`// Optimization: If we have a prefix,`
			`// then the relevant strings have been added already.`
			`if(unreversedPrefix.isEmpty()) {`
			`addExpansions(start, end);`
			`}`
			`return;`
			`case Collation::OFFSET_TAG:`
			`// Currently no need to send offset CEs to the sink.`
			`return;`
			`case Collation::IMPLICIT_TAG:`
			`// Currently no need to send implicit CEs to the sink.`
			`return;`
			`}`
			`}`
			`}`

			`void`
			`ContractionsAndExpansions::handlePrefixes(`
			`UChar32 start, UChar32 end, uint32_t ce32) {`
			`const UChar *p = data->contexts + Collation::indexFromCE32(ce32);`
			`ce32 = CollationData::readCE32(p); // Default if no prefix match.`
			`handleCE32(start, end, ce32);`
			`if(!addPrefixes) { return; }`
			`UCharsTrie::Iterator prefixes(p + 2, 0, errorCode);`
			`while(prefixes.next(errorCode)) {`
			`setPrefix(prefixes.getString());`
			`// Prefix/pre-context mappings are special kinds of contractions`
			`// that always yield expansions.`
			`addStrings(start, end, contractions);`
			`addStrings(start, end, expansions);`
			`handleCE32(start, end, (uint32_t)prefixes.getValue());`
			`}`
			`resetPrefix();`
			`}`

			`void`
			`ContractionsAndExpansions::handleContractions(`
			`UChar32 start, UChar32 end, uint32_t ce32) {`
			`const UChar *p = data->contexts + Collation::indexFromCE32(ce32);`
			`if((ce32 & Collation::CONTRACT_SINGLE_CP_NO_MATCH) != 0) {`
			`// No match on the single code point.`
			`// We are underneath a prefix, and the default mapping is just`
			`// a fallback to the mappings for a shorter prefix.`
			`U_ASSERT(!unreversedPrefix.isEmpty());`
			`} else {`
			`ce32 = CollationData::readCE32(p); // Default if no suffix match.`
			`U_ASSERT(!Collation::isContractionCE32(ce32));`
			`handleCE32(start, end, ce32);`
			`}`
			`UCharsTrie::Iterator suffixes(p + 2, 0, errorCode);`
			`while(suffixes.next(errorCode)) {`
			`suffix = &suffixes.getString();`
			`addStrings(start, end, contractions);`
			`if(!unreversedPrefix.isEmpty()) {`
			`addStrings(start, end, expansions);`
			`}`
			`handleCE32(start, end, (uint32_t)suffixes.getValue());`
			`}`
			`suffix = NULL;`
			`}`

			`void`
			`ContractionsAndExpansions::addExpansions(UChar32 start, UChar32 end) {`
			`if(unreversedPrefix.isEmpty() && suffix == NULL) {`
			`if(expansions != NULL) {`
			`expansions->add(start, end);`
			`}`
			`} else {`
			`addStrings(start, end, expansions);`
			`}`
			`}`

			`void`
			`ContractionsAndExpansions::addStrings(UChar32 start, UChar32 end, UnicodeSet *set) {`
			`if(set == NULL) { return; }`
			`UnicodeString s(unreversedPrefix);`
			`do {`
			`s.append(start);`
			`if(suffix != NULL) {`
			`s.append(*suffix);`
			`}`
			`set->add(s);`
			`s.truncate(unreversedPrefix.length());`
			`} while(++start <= end);`
			`}`

			`U_NAMESPACE_END`

			`#endif // !UCONFIG_NO_COLLATION`