scuffed-code/icu4c/source/i18n/collationweights.cpp

/*  
*******************************************************************************
*
*   Copyright (C) 1999-2015, International Business Machines
*   Corporation and others.  All Rights Reserved.
*
*******************************************************************************
*   file name:  collationweights.cpp
*   encoding:   US-ASCII
*   tab size:   8 (not used)
*   indentation:4
*
*   created on: 2001mar08 as ucol_wgt.cpp
*   created by: Markus W. Scherer
*
*   This file contains code for allocating n collation element weights
*   between two exclusive limits.
*   It is used only internally by the collation tailoring builder.
*/

#include "unicode/utypes.h"

#if !UCONFIG_NO_COLLATION

#include "cmemory.h"
#include "collation.h"
#include "collationweights.h"
#include "uarrsort.h"
#include "uassert.h"

#ifdef UCOL_DEBUG
#   include <stdio.h>
#endif

U_NAMESPACE_BEGIN

/* collation element weight allocation -------------------------------------- */

/* helper functions for CE weights */

static inline uint32_t
getWeightTrail(uint32_t weight, int32_t length) {
    return (uint32_t)(weight>>(8*(4-length)))&0xff;
}

static inline uint32_t
setWeightTrail(uint32_t weight, int32_t length, uint32_t trail) {
    length=8*(4-length);
    return (uint32_t)((weight&(0xffffff00<<length))|(trail<<length));
}

static inline uint32_t
getWeightByte(uint32_t weight, int32_t idx) {
    return getWeightTrail(weight, idx); /* same calculation */
}

static inline uint32_t
setWeightByte(uint32_t weight, int32_t idx, uint32_t byte) {
    uint32_t mask; /* 0xffffffff except a 00 "hole" for the index-th byte */

    idx*=8;
    if(idx<32) {
        mask=((uint32_t)0xffffffff)>>idx;
    } else {
        // Do not use uint32_t>>32 because on some platforms that does not shift at all
        // while we need it to become 0.
        // PowerPC: 0xffffffff>>32 = 0           (wanted)
        // x86:     0xffffffff>>32 = 0xffffffff  (not wanted)
        //
        // ANSI C99 6.5.7 Bitwise shift operators:
        // "If the value of the right operand is negative
        // or is greater than or equal to the width of the promoted left operand,
        // the behavior is undefined."
        mask=0;
    }
    idx=32-idx;
    mask|=0xffffff00<<idx;
    return (uint32_t)((weight&mask)|(byte<<idx));
}

static inline uint32_t
truncateWeight(uint32_t weight, int32_t length) {
    return (uint32_t)(weight&(0xffffffff<<(8*(4-length))));
}

static inline uint32_t
incWeightTrail(uint32_t weight, int32_t length) {
    return (uint32_t)(weight+(1UL<<(8*(4-length))));
}

static inline uint32_t
decWeightTrail(uint32_t weight, int32_t length) {
    return (uint32_t)(weight-(1UL<<(8*(4-length))));
}

CollationWeights::CollationWeights()
        : middleLength(0), rangeIndex(0), rangeCount(0) {
    for(int32_t i = 0; i < 5; ++i) {
        minBytes[i] = maxBytes[i] = 0;
    }
}

void
CollationWeights::initForPrimary(UBool compressible) {
    middleLength=1;
    minBytes[1] = Collation::MERGE_SEPARATOR_BYTE + 1;
    maxBytes[1] = Collation::TRAIL_WEIGHT_BYTE;
    if(compressible) {
        minBytes[2] = Collation::PRIMARY_COMPRESSION_LOW_BYTE + 1;
        maxBytes[2] = Collation::PRIMARY_COMPRESSION_HIGH_BYTE - 1;
    } else {
        minBytes[2] = 2;
        maxBytes[2] = 0xff;
    }
    minBytes[3] = 2;
    maxBytes[3] = 0xff;
    minBytes[4] = 2;
    maxBytes[4] = 0xff;
}

void
CollationWeights::initForSecondary() {
    // We use only the lower 16 bits for secondary weights.
    middleLength=3;
    minBytes[1] = 0;
    maxBytes[1] = 0;
    minBytes[2] = 0;
    maxBytes[2] = 0;
    minBytes[3] = Collation::LEVEL_SEPARATOR_BYTE + 1;
    maxBytes[3] = 0xff;
    minBytes[4] = 2;
    maxBytes[4] = 0xff;
}

void
CollationWeights::initForTertiary() {
    // We use only the lower 16 bits for tertiary weights.
    middleLength=3;
    minBytes[1] = 0;
    maxBytes[1] = 0;
    minBytes[2] = 0;
    maxBytes[2] = 0;
    // We use only 6 bits per byte.
    // The other bits are used for case & quaternary weights.
    minBytes[3] = Collation::LEVEL_SEPARATOR_BYTE + 1;
    maxBytes[3] = 0x3f;
    minBytes[4] = 2;
    maxBytes[4] = 0x3f;
}

uint32_t
CollationWeights::incWeight(uint32_t weight, int32_t length) const {
    for(;;) {
        uint32_t byte=getWeightByte(weight, length);
        if(byte<maxBytes[length]) {
            return setWeightByte(weight, length, byte+1);
        } else {
            // Roll over, set this byte to the minimum and increment the previous one.
            weight=setWeightByte(weight, length, minBytes[length]);
            --length;
            U_ASSERT(length > 0);
        }
    }
}

uint32_t
CollationWeights::incWeightByOffset(uint32_t weight, int32_t length, int32_t offset) const {
    for(;;) {
        offset += getWeightByte(weight, length);
        if((uint32_t)offset <= maxBytes[length]) {
            return setWeightByte(weight, length, offset);
        } else {
            // Split the offset between this byte and the previous one.
            offset -= minBytes[length];
            weight = setWeightByte(weight, length, minBytes[length] + offset % countBytes(length));
            offset /= countBytes(length);
            --length;
            U_ASSERT(length > 0);
        }
    }
}

void
CollationWeights::lengthenRange(WeightRange &range) const {
    int32_t length=range.length+1;
    range.start=setWeightTrail(range.start, length, minBytes[length]);
    range.end=setWeightTrail(range.end, length, maxBytes[length]);
    range.count*=countBytes(length);
    range.length=length;
}

/* for uprv_sortArray: sort ranges in weight order */
static int32_t U_CALLCONV
compareRanges(const void * /*context*/, const void *left, const void *right) {
    uint32_t l, r;

    l=((const CollationWeights::WeightRange *)left)->start;
    r=((const CollationWeights::WeightRange *)right)->start;
    if(l<r) {
        return -1;
    } else if(l>r) {
        return 1;
    } else {
        return 0;
    }
}

UBool
CollationWeights::getWeightRanges(uint32_t lowerLimit, uint32_t upperLimit) {
    U_ASSERT(lowerLimit != 0);
    U_ASSERT(upperLimit != 0);

    /* get the lengths of the limits */
    int32_t lowerLength=lengthOfWeight(lowerLimit);
    int32_t upperLength=lengthOfWeight(upperLimit);

#ifdef UCOL_DEBUG
    printf("length of lower limit 0x%08lx is %ld\n", lowerLimit, lowerLength);
    printf("length of upper limit 0x%08lx is %ld\n", upperLimit, upperLength);
#endif
    U_ASSERT(lowerLength>=middleLength);
    // Permit upperLength<middleLength: The upper limit for secondaries is 0x10000.

    if(lowerLimit>=upperLimit) {
#ifdef UCOL_DEBUG
        printf("error: no space between lower & upper limits\n");
#endif
        return FALSE;
    }

    /* check that neither is a prefix of the other */
    if(lowerLength<upperLength) {
        if(lowerLimit==truncateWeight(upperLimit, lowerLength)) {
#ifdef UCOL_DEBUG
            printf("error: lower limit 0x%08lx is a prefix of upper limit 0x%08lx\n", lowerLimit, upperLimit);
#endif
            return FALSE;
        }
    }
    /* if the upper limit is a prefix of the lower limit then the earlier test lowerLimit>=upperLimit has caught it */

    WeightRange lower[5], middle, upper[5]; /* [0] and [1] are not used - this simplifies indexing */
    uprv_memset(lower, 0, sizeof(lower));
    uprv_memset(&middle, 0, sizeof(middle));
    uprv_memset(upper, 0, sizeof(upper));

    /*
     * With the limit lengths of 1..4, there are up to 7 ranges for allocation:
     * range     minimum length
     * lower[4]  4
     * lower[3]  3
     * lower[2]  2
     * middle    1
     * upper[2]  2
     * upper[3]  3
     * upper[4]  4
     *
     * We are now going to calculate up to 7 ranges.
     * Some of them will typically overlap, so we will then have to merge and eliminate ranges.
     */
    uint32_t weight=lowerLimit;
    for(int32_t length=lowerLength; length>middleLength; --length) {
        uint32_t trail=getWeightTrail(weight, length);
        if(trail<maxBytes[length]) {
            lower[length].start=incWeightTrail(weight, length);
            lower[length].end=setWeightTrail(weight, length, maxBytes[length]);
            lower[length].length=length;
            lower[length].count=maxBytes[length]-trail;
        }
        weight=truncateWeight(weight, length-1);
    }
    if(weight<0xff000000) {
        middle.start=incWeightTrail(weight, middleLength);
    } else {
        // Prevent overflow for primary lead byte FF
        // which would yield a middle range starting at 0.
        middle.start=0xffffffff;  // no middle range
    }

    weight=upperLimit;
    for(int32_t length=upperLength; length>middleLength; --length) {
        uint32_t trail=getWeightTrail(weight, length);
        if(trail>minBytes[length]) {
            upper[length].start=setWeightTrail(weight, length, minBytes[length]);
            upper[length].end=decWeightTrail(weight, length);
            upper[length].length=length;
            upper[length].count=trail-minBytes[length];
        }
        weight=truncateWeight(weight, length-1);
    }
    middle.end=decWeightTrail(weight, middleLength);

    /* set the middle range */
    middle.length=middleLength;
    if(middle.end>=middle.start) {
        middle.count=(int32_t)((middle.end-middle.start)>>(8*(4-middleLength)))+1;
    } else {
        /* no middle range, eliminate overlaps */
        for(int32_t length=4; length>middleLength; --length) {
            if(lower[length].count>0 && upper[length].count>0) {
                // Note: The lowerEnd and upperStart weights are versions of
                // lowerLimit and upperLimit (which are lowerLimit<upperLimit),
                // truncated (still less-or-equal)
                // and then with their last bytes changed to the
                // maxByte (for lowerEnd) or minByte (for upperStart).
                const uint32_t lowerEnd=lower[length].end;
                const uint32_t upperStart=upper[length].start;
                UBool merged=FALSE;

                if(lowerEnd>upperStart) {
                    // These two lower and upper ranges collide.
                    // Since lowerLimit<upperLimit and lowerEnd and upperStart
                    // are versions with only their last bytes modified
                    // (and following ones removed/reset to 0),
                    // lowerEnd>upperStart is only possible
                    // if the leading bytes are equal
                    // and lastByte(lowerEnd)>lastByte(upperStart).
                    U_ASSERT(truncateWeight(lowerEnd, length-1)==
                            truncateWeight(upperStart, length-1));
                    // Intersect these two ranges.
                    lower[length].end=upper[length].end;
                    lower[length].count=
                            (int32_t)getWeightTrail(lower[length].end, length)-
                            (int32_t)getWeightTrail(lower[length].start, length)+1;
                    // count might be <=0 in which case there is no room,
                    // and the range-collecting code below will ignore this range.
                    merged=TRUE;
                } else if(lowerEnd==upperStart) {
                    // Not possible, unless minByte==maxByte which is not allowed.
                    U_ASSERT(minBytes[length]<maxBytes[length]);
                } else /* lowerEnd<upperStart */ {
                    if(incWeight(lowerEnd, length)==upperStart) {
                        // Merge adjacent ranges.
                        lower[length].end=upper[length].end;
                        lower[length].count+=upper[length].count;  // might be >countBytes
                        merged=TRUE;
                    }
                }
                if(merged) {
                    // Remove all shorter ranges.
                    // There was no room available for them between the ranges we just merged.
                    upper[length].count=0;
                    while(--length>middleLength) {
                        lower[length].count=upper[length].count=0;
                    }
                    break;
                }
            }
        }
    }

#ifdef UCOL_DEBUG
    /* print ranges */
    for(int32_t length=4; length>=2; --length) {
        if(lower[length].count>0) {
            printf("lower[%ld] .start=0x%08lx .end=0x%08lx .count=%ld\n", length, lower[length].start, lower[length].end, lower[length].count);
        }
    }
    if(middle.count>0) {
        printf("middle   .start=0x%08lx .end=0x%08lx .count=%ld\n", middle.start, middle.end, middle.count);
    }
    for(int32_t length=2; length<=4; ++length) {
        if(upper[length].count>0) {
            printf("upper[%ld] .start=0x%08lx .end=0x%08lx .count=%ld\n", length, upper[length].start, upper[length].end, upper[length].count);
        }
    }
#endif

    /* copy the ranges, shortest first, into the result array */
    rangeCount=0;
    if(middle.count>0) {
        uprv_memcpy(ranges, &middle, sizeof(WeightRange));
        rangeCount=1;
    }
    for(int32_t length=middleLength+1; length<=4; ++length) {
        /* copy upper first so that later the middle range is more likely the first one to use */
        if(upper[length].count>0) {
            uprv_memcpy(ranges+rangeCount, upper+length, sizeof(WeightRange));
            ++rangeCount;
        }
        if(lower[length].count>0) {
            uprv_memcpy(ranges+rangeCount, lower+length, sizeof(WeightRange));
            ++rangeCount;
        }
    }
    return rangeCount>0;
}

UBool
CollationWeights::allocWeightsInShortRanges(int32_t n, int32_t minLength) {
    // See if the first few minLength and minLength+1 ranges have enough weights.
    for(int32_t i = 0; i < rangeCount && ranges[i].length <= (minLength + 1); ++i) {
        if(n <= ranges[i].count) {
            // Use the first few minLength and minLength+1 ranges.
            if(ranges[i].length > minLength) {
                // Reduce the number of weights from the last minLength+1 range
                // which might sort before some minLength ranges,
                // so that we use all weights in the minLength ranges.
                ranges[i].count = n;
            }
            rangeCount = i + 1;
#ifdef UCOL_DEBUG
            printf("take first %ld ranges\n", rangeCount);
#endif

            if(rangeCount>1) {
                /* sort the ranges by weight values */
                UErrorCode errorCode=U_ZERO_ERROR;
                uprv_sortArray(ranges, rangeCount, sizeof(WeightRange),
                               compareRanges, NULL, FALSE, &errorCode);
                /* ignore error code: we know that the internal sort function will not fail here */
            }
            return TRUE;
        }
        n -= ranges[i].count;  // still >0
    }
    return FALSE;
}

UBool
CollationWeights::allocWeightsInMinLengthRanges(int32_t n, int32_t minLength) {
    // See if the minLength ranges have enough weights
    // when we split one and lengthen the following ones.
    int32_t count = 0;
    int32_t minLengthRangeCount;
    for(minLengthRangeCount = 0;
            minLengthRangeCount < rangeCount &&
                ranges[minLengthRangeCount].length == minLength;
            ++minLengthRangeCount) {
        count += ranges[minLengthRangeCount].count;
    }

    int32_t nextCountBytes = countBytes(minLength + 1);
    if(n > count * nextCountBytes) { return FALSE; }

    // Use the minLength ranges. Merge them, and then split again as necessary.
    uint32_t start = ranges[0].start;
    uint32_t end = ranges[0].end;
    for(int32_t i = 1; i < minLengthRangeCount; ++i) {
        if(ranges[i].start < start) { start = ranges[i].start; }
        if(ranges[i].end > end) { end = ranges[i].end; }
    }

    // Calculate how to split the range between minLength (count1) and minLength+1 (count2).
    // Goal:
    //   count1 + count2 * nextCountBytes = n
    //   count1 + count2 = count
    // These turn into
    //   (count - count2) + count2 * nextCountBytes = n
    // and then into the following count1 & count2 computations.
    int32_t count2 = (n - count) / (nextCountBytes - 1);  // number of weights to be lengthened
    int32_t count1 = count - count2;  // number of minLength weights
    if(count2 == 0 || (count1 + count2 * nextCountBytes) < n) {
        // round up
        ++count2;
        --count1;
        U_ASSERT((count1 + count2 * nextCountBytes) >= n);
    }

    ranges[0].start = start;

    if(count1 == 0) {
        // Make one long range.
        ranges[0].end = end;
        ranges[0].count = count;
        lengthenRange(ranges[0]);
        rangeCount = 1;
    } else {
        // Split the range, lengthen the second part.
#ifdef UCOL_DEBUG
        printf("split the range number %ld (out of %ld minLength ranges) by %ld:%ld\n",
               splitRange, rangeCount, count1, count2);
#endif

        // Next start = start + count1. First end = 1 before that.
        ranges[0].end = incWeightByOffset(start, minLength, count1 - 1);
        ranges[0].count = count1;

        ranges[1].start = incWeight(ranges[0].end, minLength);
        ranges[1].end = end;
        ranges[1].length = minLength;  // +1 when lengthened
        ranges[1].count = count2;  // *countBytes when lengthened
        lengthenRange(ranges[1]);
        rangeCount = 2;
    }
    return TRUE;
}

/*
 * call getWeightRanges and then determine heuristically
 * which ranges to use for a given number of weights between (excluding)
 * two limits
 */
UBool
CollationWeights::allocWeights(uint32_t lowerLimit, uint32_t upperLimit, int32_t n) {
#ifdef UCOL_DEBUG
    puts("");
#endif

    if(!getWeightRanges(lowerLimit, upperLimit)) {
#ifdef UCOL_DEBUG
        printf("error: unable to get Weight ranges\n");
#endif
        return FALSE;
    }

    /* try until we find suitably large ranges */
    for(;;) {
        /* get the smallest number of bytes in a range */
        int32_t minLength=ranges[0].length;

        if(allocWeightsInShortRanges(n, minLength)) { break; }

        if(minLength == 4) {
#ifdef UCOL_DEBUG
            printf("error: the maximum number of %ld weights is insufficient for n=%ld\n",
                   minLengthCount, n);
#endif
            return FALSE;
        }

        if(allocWeightsInMinLengthRanges(n, minLength)) { break; }

        /* no good match, lengthen all minLength ranges and iterate */
#ifdef UCOL_DEBUG
        printf("lengthen the short ranges from %ld bytes to %ld and iterate\n", minLength, minLength+1);
#endif
        for(int32_t i=0; ranges[i].length==minLength; ++i) {
            lengthenRange(ranges[i]);
        }
    }

#ifdef UCOL_DEBUG
    puts("final ranges:");
    for(int32_t i=0; i<rangeCount; ++i) {
        printf("ranges[%ld] .start=0x%08lx .end=0x%08lx .length=%ld .count=%ld\n",
               i, ranges[i].start, ranges[i].end, ranges[i].length, ranges[i].count);
    }
#endif

    rangeIndex = 0;
    return TRUE;
}

uint32_t
CollationWeights::nextWeight() {
    if(rangeIndex >= rangeCount) {
        return 0xffffffff;
    } else {
        /* get the next weight */
        WeightRange &range = ranges[rangeIndex];
        uint32_t weight = range.start;
        if(--range.count == 0) {
            /* this range is finished */
            ++rangeIndex;
        } else {
            /* increment the weight for the next value */
            range.start = incWeight(weight, range.length);
            U_ASSERT(range.start <= range.end);
        }

        return weight;
    }
}

U_NAMESPACE_END

#endif /* #if !UCONFIG_NO_COLLATION */
ICU-9101 merge branches/markus/collv2@35225 into the trunk X-SVN-Rev: 35227 2014-02-25 21:21:49 +00:00			`/*`
			`*******************************************************************************`
			`*`
ICU-11574 near-final Unicode 8 data with new block API constants, code adjustments & fixes; svn merge --reintegrate branches/markus/uni80 X-SVN-Rev: 37473 2015-05-28 23:16:00 +00:00			`* Copyright (C) 1999-2015, International Business Machines`
ICU-9101 merge branches/markus/collv2@35225 into the trunk X-SVN-Rev: 35227 2014-02-25 21:21:49 +00:00			`* Corporation and others. All Rights Reserved.`
			`*`
			`*******************************************************************************`
			`* file name: collationweights.cpp`
			`* encoding: US-ASCII`
			`* tab size: 8 (not used)`
			`* indentation:4`
			`*`
			`* created on: 2001mar08 as ucol_wgt.cpp`
			`* created by: Markus W. Scherer`
			`*`
			`* This file contains code for allocating n collation element weights`
			`* between two exclusive limits.`
			`* It is used only internally by the collation tailoring builder.`
			`*/`

			`#include "unicode/utypes.h"`

			`#if !UCONFIG_NO_COLLATION`

			`#include "cmemory.h"`
			`#include "collation.h"`
			`#include "collationweights.h"`
			`#include "uarrsort.h"`
			`#include "uassert.h"`

			`#ifdef UCOL_DEBUG`
			`# include <stdio.h>`
			`#endif`

			`U_NAMESPACE_BEGIN`

			`/* collation element weight allocation -------------------------------------- */`

			`/* helper functions for CE weights */`

			`static inline uint32_t`
			`getWeightTrail(uint32_t weight, int32_t length) {`
			`return (uint32_t)(weight>>(8*(4-length)))&0xff;`
			`}`

			`static inline uint32_t`
			`setWeightTrail(uint32_t weight, int32_t length, uint32_t trail) {`
			`length=8*(4-length);`
			`return (uint32_t)((weight&(0xffffff00<<length))\|(trail<<length));`
			`}`

			`static inline uint32_t`
			`getWeightByte(uint32_t weight, int32_t idx) {`
			`return getWeightTrail(weight, idx); /* same calculation */`
			`}`

			`static inline uint32_t`
			`setWeightByte(uint32_t weight, int32_t idx, uint32_t byte) {`
			`uint32_t mask; /* 0xffffffff except a 00 "hole" for the index-th byte */`

			`idx*=8;`
			`if(idx<32) {`
			`mask=((uint32_t)0xffffffff)>>idx;`
			`} else {`
			`// Do not use uint32_t>>32 because on some platforms that does not shift at all`
			`// while we need it to become 0.`
			`// PowerPC: 0xffffffff>>32 = 0 (wanted)`
			`// x86: 0xffffffff>>32 = 0xffffffff (not wanted)`
			`//`
			`// ANSI C99 6.5.7 Bitwise shift operators:`
			`// "If the value of the right operand is negative`
			`// or is greater than or equal to the width of the promoted left operand,`
			`// the behavior is undefined."`
			`mask=0;`
			`}`
			`idx=32-idx;`
			`mask\|=0xffffff00<<idx;`
			`return (uint32_t)((weight&mask)\|(byte<<idx));`
			`}`

			`static inline uint32_t`
			`truncateWeight(uint32_t weight, int32_t length) {`
			`return (uint32_t)(weight&(0xffffffff<<(8*(4-length))));`
			`}`

			`static inline uint32_t`
			`incWeightTrail(uint32_t weight, int32_t length) {`
			`return (uint32_t)(weight+(1UL<<(8*(4-length))));`
			`}`

			`static inline uint32_t`
			`decWeightTrail(uint32_t weight, int32_t length) {`
			`return (uint32_t)(weight-(1UL<<(8*(4-length))));`
			`}`

			`CollationWeights::CollationWeights()`
			`: middleLength(0), rangeIndex(0), rangeCount(0) {`
			`for(int32_t i = 0; i < 5; ++i) {`
			`minBytes[i] = maxBytes[i] = 0;`
			`}`
			`}`

			`void`
			`CollationWeights::initForPrimary(UBool compressible) {`
			`middleLength=1;`
			`minBytes[1] = Collation::MERGE_SEPARATOR_BYTE + 1;`
			`maxBytes[1] = Collation::TRAIL_WEIGHT_BYTE;`
			`if(compressible) {`
			`minBytes[2] = Collation::PRIMARY_COMPRESSION_LOW_BYTE + 1;`
			`maxBytes[2] = Collation::PRIMARY_COMPRESSION_HIGH_BYTE - 1;`
			`} else {`
			`minBytes[2] = 2;`
			`maxBytes[2] = 0xff;`
			`}`
			`minBytes[3] = 2;`
			`maxBytes[3] = 0xff;`
			`minBytes[4] = 2;`
			`maxBytes[4] = 0xff;`
			`}`

			`void`
			`CollationWeights::initForSecondary() {`
			`// We use only the lower 16 bits for secondary weights.`
			`middleLength=3;`
			`minBytes[1] = 0;`
			`maxBytes[1] = 0;`
			`minBytes[2] = 0;`
			`maxBytes[2] = 0;`
ICU-10829 simplify U+FFFE collation: U+FFFE use common non-primary weights, adjust code for that, test order of U+FFFE equivalent to ucol_mergeSortkeys() but not necessarily same sort keys, omit case level if lowerFirst and only common weights X-SVN-Rev: 36856 2014-12-11 17:04:32 +00:00			`minBytes[3] = Collation::LEVEL_SEPARATOR_BYTE + 1;`
ICU-9101 merge branches/markus/collv2@35225 into the trunk X-SVN-Rev: 35227 2014-02-25 21:21:49 +00:00			`maxBytes[3] = 0xff;`
			`minBytes[4] = 2;`
			`maxBytes[4] = 0xff;`
			`}`

			`void`
			`CollationWeights::initForTertiary() {`
			`// We use only the lower 16 bits for tertiary weights.`
			`middleLength=3;`
			`minBytes[1] = 0;`
			`maxBytes[1] = 0;`
			`minBytes[2] = 0;`
			`maxBytes[2] = 0;`
			`// We use only 6 bits per byte.`
			`// The other bits are used for case & quaternary weights.`
ICU-10829 simplify U+FFFE collation: U+FFFE use common non-primary weights, adjust code for that, test order of U+FFFE equivalent to ucol_mergeSortkeys() but not necessarily same sort keys, omit case level if lowerFirst and only common weights X-SVN-Rev: 36856 2014-12-11 17:04:32 +00:00			`minBytes[3] = Collation::LEVEL_SEPARATOR_BYTE + 1;`
ICU-9101 merge branches/markus/collv2@35225 into the trunk X-SVN-Rev: 35227 2014-02-25 21:21:49 +00:00			`maxBytes[3] = 0x3f;`
			`minBytes[4] = 2;`
			`maxBytes[4] = 0x3f;`
			`}`

			`uint32_t`
			`CollationWeights::incWeight(uint32_t weight, int32_t length) const {`
			`for(;;) {`
			`uint32_t byte=getWeightByte(weight, length);`
			`if(byte<maxBytes[length]) {`
			`return setWeightByte(weight, length, byte+1);`
			`} else {`
			`// Roll over, set this byte to the minimum and increment the previous one.`
			`weight=setWeightByte(weight, length, minBytes[length]);`
			`--length;`
			`U_ASSERT(length > 0);`
			`}`
			`}`
			`}`

			`uint32_t`
			`CollationWeights::incWeightByOffset(uint32_t weight, int32_t length, int32_t offset) const {`
			`for(;;) {`
			`offset += getWeightByte(weight, length);`
			`if((uint32_t)offset <= maxBytes[length]) {`
			`return setWeightByte(weight, length, offset);`
			`} else {`
			`// Split the offset between this byte and the previous one.`
			`offset -= minBytes[length];`
			`weight = setWeightByte(weight, length, minBytes[length] + offset % countBytes(length));`
			`offset /= countBytes(length);`
			`--length;`
			`U_ASSERT(length > 0);`
			`}`
			`}`
			`}`

			`void`
			`CollationWeights::lengthenRange(WeightRange &range) const {`
			`int32_t length=range.length+1;`
			`range.start=setWeightTrail(range.start, length, minBytes[length]);`
			`range.end=setWeightTrail(range.end, length, maxBytes[length]);`
			`range.count*=countBytes(length);`
			`range.length=length;`
			`}`

			`/* for uprv_sortArray: sort ranges in weight order */`
			`static int32_t U_CALLCONV`
			`compareRanges(const void * /context/, const void left, const void right) {`
			`uint32_t l, r;`

			`l=((const CollationWeights::WeightRange *)left)->start;`
			`r=((const CollationWeights::WeightRange *)right)->start;`
			`if(l<r) {`
			`return -1;`
			`} else if(l>r) {`
			`return 1;`
			`} else {`
			`return 0;`
			`}`
			`}`

			`UBool`
			`CollationWeights::getWeightRanges(uint32_t lowerLimit, uint32_t upperLimit) {`
			`U_ASSERT(lowerLimit != 0);`
			`U_ASSERT(upperLimit != 0);`

			`/* get the lengths of the limits */`
			`int32_t lowerLength=lengthOfWeight(lowerLimit);`
			`int32_t upperLength=lengthOfWeight(upperLimit);`

			`#ifdef UCOL_DEBUG`
			`printf("length of lower limit 0x%08lx is %ld\n", lowerLimit, lowerLength);`
			`printf("length of upper limit 0x%08lx is %ld\n", upperLimit, upperLength);`
			`#endif`
			`U_ASSERT(lowerLength>=middleLength);`
			`// Permit upperLength<middleLength: The upper limit for secondaries is 0x10000.`

			`if(lowerLimit>=upperLimit) {`
			`#ifdef UCOL_DEBUG`
			`printf("error: no space between lower & upper limits\n");`
			`#endif`
			`return FALSE;`
			`}`

			`/* check that neither is a prefix of the other */`
			`if(lowerLength<upperLength) {`
			`if(lowerLimit==truncateWeight(upperLimit, lowerLength)) {`
			`#ifdef UCOL_DEBUG`
			`printf("error: lower limit 0x%08lx is a prefix of upper limit 0x%08lx\n", lowerLimit, upperLimit);`
			`#endif`
			`return FALSE;`
			`}`
			`}`
			`/* if the upper limit is a prefix of the lower limit then the earlier test lowerLimit>=upperLimit has caught it */`

			`WeightRange lower[5], middle, upper[5]; /* [0] and [1] are not used - this simplifies indexing */`
			`uprv_memset(lower, 0, sizeof(lower));`
			`uprv_memset(&middle, 0, sizeof(middle));`
			`uprv_memset(upper, 0, sizeof(upper));`

			`/*`
			`* With the limit lengths of 1..4, there are up to 7 ranges for allocation:`
			`* range minimum length`
			`* lower[4] 4`
			`* lower[3] 3`
			`* lower[2] 2`
			`* middle 1`
			`* upper[2] 2`
			`* upper[3] 3`
			`* upper[4] 4`
			`*`
			`* We are now going to calculate up to 7 ranges.`
			`* Some of them will typically overlap, so we will then have to merge and eliminate ranges.`
			`*/`
			`uint32_t weight=lowerLimit;`
			`for(int32_t length=lowerLength; length>middleLength; --length) {`
			`uint32_t trail=getWeightTrail(weight, length);`
			`if(trail<maxBytes[length]) {`
			`lower[length].start=incWeightTrail(weight, length);`
			`lower[length].end=setWeightTrail(weight, length, maxBytes[length]);`
			`lower[length].length=length;`
			`lower[length].count=maxBytes[length]-trail;`
			`}`
			`weight=truncateWeight(weight, length-1);`
			`}`
			`if(weight<0xff000000) {`
			`middle.start=incWeightTrail(weight, middleLength);`
			`} else {`
			`// Prevent overflow for primary lead byte FF`
			`// which would yield a middle range starting at 0.`
			`middle.start=0xffffffff; // no middle range`
			`}`

			`weight=upperLimit;`
			`for(int32_t length=upperLength; length>middleLength; --length) {`
			`uint32_t trail=getWeightTrail(weight, length);`
			`if(trail>minBytes[length]) {`
			`upper[length].start=setWeightTrail(weight, length, minBytes[length]);`
			`upper[length].end=decWeightTrail(weight, length);`
			`upper[length].length=length;`
			`upper[length].count=trail-minBytes[length];`
			`}`
			`weight=truncateWeight(weight, length-1);`
			`}`
			`middle.end=decWeightTrail(weight, middleLength);`

			`/* set the middle range */`
			`middle.length=middleLength;`
			`if(middle.end>=middle.start) {`
			`middle.count=(int32_t)((middle.end-middle.start)>>(8*(4-middleLength)))+1;`
			`} else {`
			`/* no middle range, eliminate overlaps */`
			`for(int32_t length=4; length>middleLength; --length) {`
			`if(lower[length].count>0 && upper[length].count>0) {`
ICU-11574 near-final Unicode 8 data with new block API constants, code adjustments & fixes; svn merge --reintegrate branches/markus/uni80 X-SVN-Rev: 37473 2015-05-28 23:16:00 +00:00			`// Note: The lowerEnd and upperStart weights are versions of`
			`// lowerLimit and upperLimit (which are lowerLimit<upperLimit),`
			`// truncated (still less-or-equal)`
			`// and then with their last bytes changed to the`
			`// maxByte (for lowerEnd) or minByte (for upperStart).`
			`const uint32_t lowerEnd=lower[length].end;`
			`const uint32_t upperStart=upper[length].start;`
			`UBool merged=FALSE;`

			`if(lowerEnd>upperStart) {`
			`// These two lower and upper ranges collide.`
			`// Since lowerLimit<upperLimit and lowerEnd and upperStart`
			`// are versions with only their last bytes modified`
			`// (and following ones removed/reset to 0),`
			`// lowerEnd>upperStart is only possible`
			`// if the leading bytes are equal`
			`// and lastByte(lowerEnd)>lastByte(upperStart).`
			`U_ASSERT(truncateWeight(lowerEnd, length-1)==`
			`truncateWeight(upperStart, length-1));`
			`// Intersect these two ranges.`
			`lower[length].end=upper[length].end;`
ICU-9101 merge branches/markus/collv2@35225 into the trunk X-SVN-Rev: 35227 2014-02-25 21:21:49 +00:00			`lower[length].count=`
ICU-11574 near-final Unicode 8 data with new block API constants, code adjustments & fixes; svn merge --reintegrate branches/markus/uni80 X-SVN-Rev: 37473 2015-05-28 23:16:00 +00:00			`(int32_t)getWeightTrail(lower[length].end, length)-`
			`(int32_t)getWeightTrail(lower[length].start, length)+1;`
			`// count might be <=0 in which case there is no room,`
			`// and the range-collecting code below will ignore this range.`
			`merged=TRUE;`
			`} else if(lowerEnd==upperStart) {`
			`// Not possible, unless minByte==maxByte which is not allowed.`
			`U_ASSERT(minBytes[length]<maxBytes[length]);`
			`} else /* lowerEnd<upperStart */ {`
			`if(incWeight(lowerEnd, length)==upperStart) {`
			`// Merge adjacent ranges.`
			`lower[length].end=upper[length].end;`
			`lower[length].count+=upper[length].count; // might be >countBytes`
			`merged=TRUE;`
			`}`
			`}`
			`if(merged) {`
			`// Remove all shorter ranges.`
			`// There was no room available for them between the ranges we just merged.`
ICU-9101 merge branches/markus/collv2@35225 into the trunk X-SVN-Rev: 35227 2014-02-25 21:21:49 +00:00			`upper[length].count=0;`
			`while(--length>middleLength) {`
			`lower[length].count=upper[length].count=0;`
			`}`
			`break;`
			`}`
			`}`
			`}`
			`}`

			`#ifdef UCOL_DEBUG`
			`/* print ranges */`
			`for(int32_t length=4; length>=2; --length) {`
			`if(lower[length].count>0) {`
			`printf("lower[%ld] .start=0x%08lx .end=0x%08lx .count=%ld\n", length, lower[length].start, lower[length].end, lower[length].count);`
			`}`
			`}`
			`if(middle.count>0) {`
			`printf("middle .start=0x%08lx .end=0x%08lx .count=%ld\n", middle.start, middle.end, middle.count);`
			`}`
			`for(int32_t length=2; length<=4; ++length) {`
			`if(upper[length].count>0) {`
			`printf("upper[%ld] .start=0x%08lx .end=0x%08lx .count=%ld\n", length, upper[length].start, upper[length].end, upper[length].count);`
			`}`
			`}`
			`#endif`

			`/* copy the ranges, shortest first, into the result array */`
			`rangeCount=0;`
			`if(middle.count>0) {`
			`uprv_memcpy(ranges, &middle, sizeof(WeightRange));`
			`rangeCount=1;`
			`}`
			`for(int32_t length=middleLength+1; length<=4; ++length) {`
			`/* copy upper first so that later the middle range is more likely the first one to use */`
			`if(upper[length].count>0) {`
			`uprv_memcpy(ranges+rangeCount, upper+length, sizeof(WeightRange));`
			`++rangeCount;`
			`}`
			`if(lower[length].count>0) {`
			`uprv_memcpy(ranges+rangeCount, lower+length, sizeof(WeightRange));`
			`++rangeCount;`
			`}`
			`}`
			`return rangeCount>0;`
			`}`

			`UBool`
			`CollationWeights::allocWeightsInShortRanges(int32_t n, int32_t minLength) {`
			`// See if the first few minLength and minLength+1 ranges have enough weights.`
			`for(int32_t i = 0; i < rangeCount && ranges[i].length <= (minLength + 1); ++i) {`
			`if(n <= ranges[i].count) {`
			`// Use the first few minLength and minLength+1 ranges.`
			`if(ranges[i].length > minLength) {`
			`// Reduce the number of weights from the last minLength+1 range`
			`// which might sort before some minLength ranges,`
			`// so that we use all weights in the minLength ranges.`
			`ranges[i].count = n;`
			`}`
			`rangeCount = i + 1;`
			`#ifdef UCOL_DEBUG`
			`printf("take first %ld ranges\n", rangeCount);`
			`#endif`

			`if(rangeCount>1) {`
			`/* sort the ranges by weight values */`
			`UErrorCode errorCode=U_ZERO_ERROR;`
			`uprv_sortArray(ranges, rangeCount, sizeof(WeightRange),`
			`compareRanges, NULL, FALSE, &errorCode);`
			`/* ignore error code: we know that the internal sort function will not fail here */`
			`}`
			`return TRUE;`
			`}`
			`n -= ranges[i].count; // still >0`
			`}`
			`return FALSE;`
			`}`

			`UBool`
			`CollationWeights::allocWeightsInMinLengthRanges(int32_t n, int32_t minLength) {`
			`// See if the minLength ranges have enough weights`
			`// when we split one and lengthen the following ones.`
			`int32_t count = 0;`
			`int32_t minLengthRangeCount;`
			`for(minLengthRangeCount = 0;`
			`minLengthRangeCount < rangeCount &&`
			`ranges[minLengthRangeCount].length == minLength;`
			`++minLengthRangeCount) {`
			`count += ranges[minLengthRangeCount].count;`
			`}`

			`int32_t nextCountBytes = countBytes(minLength + 1);`
			`if(n > count * nextCountBytes) { return FALSE; }`

			`// Use the minLength ranges. Merge them, and then split again as necessary.`
			`uint32_t start = ranges[0].start;`
			`uint32_t end = ranges[0].end;`
			`for(int32_t i = 1; i < minLengthRangeCount; ++i) {`
			`if(ranges[i].start < start) { start = ranges[i].start; }`
			`if(ranges[i].end > end) { end = ranges[i].end; }`
			`}`

			`// Calculate how to split the range between minLength (count1) and minLength+1 (count2).`
			`// Goal:`
			`// count1 + count2 * nextCountBytes = n`
			`// count1 + count2 = count`
			`// These turn into`
			`// (count - count2) + count2 * nextCountBytes = n`
			`// and then into the following count1 & count2 computations.`
			`int32_t count2 = (n - count) / (nextCountBytes - 1); // number of weights to be lengthened`
			`int32_t count1 = count - count2; // number of minLength weights`
			`if(count2 == 0 \|\| (count1 + count2 * nextCountBytes) < n) {`
			`// round up`
			`++count2;`
			`--count1;`
			`U_ASSERT((count1 + count2 * nextCountBytes) >= n);`
			`}`

			`ranges[0].start = start;`

			`if(count1 == 0) {`
			`// Make one long range.`
			`ranges[0].end = end;`
			`ranges[0].count = count;`
			`lengthenRange(ranges[0]);`
			`rangeCount = 1;`
			`} else {`
			`// Split the range, lengthen the second part.`
			`#ifdef UCOL_DEBUG`
			`printf("split the range number %ld (out of %ld minLength ranges) by %ld:%ld\n",`
			`splitRange, rangeCount, count1, count2);`
			`#endif`

			`// Next start = start + count1. First end = 1 before that.`
			`ranges[0].end = incWeightByOffset(start, minLength, count1 - 1);`
			`ranges[0].count = count1;`

			`ranges[1].start = incWeight(ranges[0].end, minLength);`
			`ranges[1].end = end;`
			`ranges[1].length = minLength; // +1 when lengthened`
			`ranges[1].count = count2; // *countBytes when lengthened`
			`lengthenRange(ranges[1]);`
			`rangeCount = 2;`
			`}`
			`return TRUE;`
			`}`

			`/*`
			`* call getWeightRanges and then determine heuristically`
			`* which ranges to use for a given number of weights between (excluding)`
			`* two limits`
			`*/`
			`UBool`
			`CollationWeights::allocWeights(uint32_t lowerLimit, uint32_t upperLimit, int32_t n) {`
			`#ifdef UCOL_DEBUG`
			`puts("");`
			`#endif`

			`if(!getWeightRanges(lowerLimit, upperLimit)) {`
			`#ifdef UCOL_DEBUG`
			`printf("error: unable to get Weight ranges\n");`
			`#endif`
			`return FALSE;`
			`}`

			`/* try until we find suitably large ranges */`
			`for(;;) {`
			`/* get the smallest number of bytes in a range */`
			`int32_t minLength=ranges[0].length;`

			`if(allocWeightsInShortRanges(n, minLength)) { break; }`

			`if(minLength == 4) {`
			`#ifdef UCOL_DEBUG`
			`printf("error: the maximum number of %ld weights is insufficient for n=%ld\n",`
			`minLengthCount, n);`
			`#endif`
			`return FALSE;`
			`}`

			`if(allocWeightsInMinLengthRanges(n, minLength)) { break; }`

			`/* no good match, lengthen all minLength ranges and iterate */`
			`#ifdef UCOL_DEBUG`
			`printf("lengthen the short ranges from %ld bytes to %ld and iterate\n", minLength, minLength+1);`
			`#endif`
			`for(int32_t i=0; ranges[i].length==minLength; ++i) {`
			`lengthenRange(ranges[i]);`
			`}`
			`}`

			`#ifdef UCOL_DEBUG`
			`puts("final ranges:");`
			`for(int32_t i=0; i<rangeCount; ++i) {`
			`printf("ranges[%ld] .start=0x%08lx .end=0x%08lx .length=%ld .count=%ld\n",`
			`i, ranges[i].start, ranges[i].end, ranges[i].length, ranges[i].count);`
			`}`
			`#endif`

			`rangeIndex = 0;`
			`return TRUE;`
			`}`

			`uint32_t`
			`CollationWeights::nextWeight() {`
			`if(rangeIndex >= rangeCount) {`
			`return 0xffffffff;`
			`} else {`
			`/* get the next weight */`
			`WeightRange &range = ranges[rangeIndex];`
			`uint32_t weight = range.start;`
			`if(--range.count == 0) {`
			`/* this range is finished */`
			`++rangeIndex;`
			`} else {`
			`/* increment the weight for the next value */`
			`range.start = incWeight(weight, range.length);`
			`U_ASSERT(range.start <= range.end);`
			`}`

			`return weight;`
			`}`
			`}`

			`U_NAMESPACE_END`

			`#endif /* #if !UCONFIG_NO_COLLATION */`