a9d7c3e4bd
X-SVN-Rev: 36924
376 lines
13 KiB
C++
376 lines
13 KiB
C++
/*
|
|
*******************************************************************************
|
|
* Copyright (C) 2013-2015, International Business Machines
|
|
* Corporation and others. All Rights Reserved.
|
|
*******************************************************************************
|
|
* collationsettings.cpp
|
|
*
|
|
* created on: 2013feb07
|
|
* created by: Markus W. Scherer
|
|
*/
|
|
|
|
#include "unicode/utypes.h"
|
|
|
|
#if !UCONFIG_NO_COLLATION
|
|
|
|
#include "unicode/ucol.h"
|
|
#include "cmemory.h"
|
|
#include "collation.h"
|
|
#include "collationdata.h"
|
|
#include "collationsettings.h"
|
|
#include "sharedobject.h"
|
|
#include "uassert.h"
|
|
#include "umutex.h"
|
|
#include "uvectr32.h"
|
|
|
|
U_NAMESPACE_BEGIN
|
|
|
|
CollationSettings::CollationSettings(const CollationSettings &other)
|
|
: SharedObject(other),
|
|
options(other.options), variableTop(other.variableTop),
|
|
reorderTable(NULL),
|
|
minHighNoReorder(other.minHighNoReorder),
|
|
reorderRanges(NULL), reorderRangesLength(0),
|
|
reorderCodes(NULL), reorderCodesLength(0), reorderCodesCapacity(0),
|
|
fastLatinOptions(other.fastLatinOptions) {
|
|
UErrorCode errorCode = U_ZERO_ERROR;
|
|
copyReorderingFrom(other, errorCode);
|
|
if(fastLatinOptions >= 0) {
|
|
uprv_memcpy(fastLatinPrimaries, other.fastLatinPrimaries, sizeof(fastLatinPrimaries));
|
|
}
|
|
}
|
|
|
|
CollationSettings::~CollationSettings() {
|
|
if(reorderCodesCapacity != 0) {
|
|
uprv_free(const_cast<int32_t *>(reorderCodes));
|
|
}
|
|
}
|
|
|
|
UBool
|
|
CollationSettings::operator==(const CollationSettings &other) const {
|
|
if(options != other.options) { return FALSE; }
|
|
if((options & ALTERNATE_MASK) != 0 && variableTop != other.variableTop) { return FALSE; }
|
|
if(reorderCodesLength != other.reorderCodesLength) { return FALSE; }
|
|
for(int32_t i = 0; i < reorderCodesLength; ++i) {
|
|
if(reorderCodes[i] != other.reorderCodes[i]) { return FALSE; }
|
|
}
|
|
return TRUE;
|
|
}
|
|
|
|
int32_t
|
|
CollationSettings::hashCode() const {
|
|
int32_t h = options << 8;
|
|
if((options & ALTERNATE_MASK) != 0) { h ^= variableTop; }
|
|
h ^= reorderCodesLength;
|
|
for(int32_t i = 0; i < reorderCodesLength; ++i) {
|
|
h ^= (reorderCodes[i] << i);
|
|
}
|
|
return h;
|
|
}
|
|
|
|
void
|
|
CollationSettings::resetReordering() {
|
|
// When we turn off reordering, we want to set a NULL permutation
|
|
// rather than a no-op permutation.
|
|
// Keep the memory via reorderCodes and its capacity.
|
|
reorderTable = NULL;
|
|
minHighNoReorder = 0;
|
|
reorderRangesLength = 0;
|
|
reorderCodesLength = 0;
|
|
}
|
|
|
|
void
|
|
CollationSettings::aliasReordering(const CollationData &data, const int32_t *codes, int32_t length,
|
|
const uint32_t *ranges, int32_t rangesLength,
|
|
const uint8_t *table, UErrorCode &errorCode) {
|
|
if(U_FAILURE(errorCode)) { return; }
|
|
if(table != NULL &&
|
|
(rangesLength == 0 ?
|
|
!reorderTableHasSplitBytes(table) :
|
|
rangesLength >= 2 &&
|
|
// The first offset must be 0. The last offset must not be 0.
|
|
(ranges[0] & 0xffff) == 0 && (ranges[rangesLength - 1] & 0xffff) != 0)) {
|
|
// We need to release the memory before setting the alias pointer.
|
|
if(reorderCodesCapacity != 0) {
|
|
uprv_free(const_cast<int32_t *>(reorderCodes));
|
|
reorderCodesCapacity = 0;
|
|
}
|
|
reorderTable = table;
|
|
reorderCodes = codes;
|
|
reorderCodesLength = length;
|
|
// Drop ranges before the first split byte. They are reordered by the table.
|
|
// This then speeds up reordering of the remaining ranges.
|
|
int32_t firstSplitByteRangeIndex = 0;
|
|
while(firstSplitByteRangeIndex < rangesLength &&
|
|
(ranges[firstSplitByteRangeIndex] & 0xff0000) == 0) {
|
|
// The second byte of the primary limit is 0.
|
|
++firstSplitByteRangeIndex;
|
|
}
|
|
if(firstSplitByteRangeIndex == rangesLength) {
|
|
U_ASSERT(!reorderTableHasSplitBytes(table));
|
|
minHighNoReorder = 0;
|
|
reorderRanges = NULL;
|
|
reorderRangesLength = 0;
|
|
} else {
|
|
U_ASSERT(table[ranges[firstSplitByteRangeIndex] >> 24] == 0);
|
|
minHighNoReorder = ranges[rangesLength - 1] & 0xffff0000;
|
|
reorderRanges = ranges + firstSplitByteRangeIndex;
|
|
reorderRangesLength = rangesLength - firstSplitByteRangeIndex;
|
|
}
|
|
return;
|
|
}
|
|
// Regenerate missing data.
|
|
setReordering(data, codes, length, errorCode);
|
|
}
|
|
|
|
void
|
|
CollationSettings::setReordering(const CollationData &data,
|
|
const int32_t *codes, int32_t codesLength,
|
|
UErrorCode &errorCode) {
|
|
if(U_FAILURE(errorCode)) { return; }
|
|
if(codesLength == 0 || (codesLength == 1 && codes[0] == UCOL_REORDER_CODE_NONE)) {
|
|
resetReordering();
|
|
return;
|
|
}
|
|
UVector32 rangesList(errorCode);
|
|
data.makeReorderRanges(codes, codesLength, rangesList, errorCode);
|
|
if(U_FAILURE(errorCode)) { return; }
|
|
int32_t rangesLength = rangesList.size();
|
|
if(rangesLength == 0) {
|
|
resetReordering();
|
|
return;
|
|
}
|
|
const uint32_t *ranges = reinterpret_cast<uint32_t *>(rangesList.getBuffer());
|
|
// ranges[] contains at least two (limit, offset) pairs.
|
|
// The first offset must be 0. The last offset must not be 0.
|
|
// Separators (at the low end) and trailing weights (at the high end)
|
|
// are never reordered.
|
|
U_ASSERT(rangesLength >= 2);
|
|
U_ASSERT((ranges[0] & 0xffff) == 0 && (ranges[rangesLength - 1] & 0xffff) != 0);
|
|
minHighNoReorder = ranges[rangesLength - 1] & 0xffff0000;
|
|
|
|
// Write the lead byte permutation table.
|
|
// Set a 0 for each lead byte that has a range boundary in the middle.
|
|
uint8_t table[256];
|
|
int32_t b = 0;
|
|
int32_t firstSplitByteRangeIndex = -1;
|
|
for(int32_t i = 0; i < rangesLength; ++i) {
|
|
uint32_t pair = ranges[i];
|
|
int32_t limit1 = (int32_t)(pair >> 24);
|
|
while(b < limit1) {
|
|
table[b] = (uint8_t)(b + pair);
|
|
++b;
|
|
}
|
|
// Check the second byte of the limit.
|
|
if((pair & 0xff0000) != 0) {
|
|
table[limit1] = 0;
|
|
b = limit1 + 1;
|
|
if(firstSplitByteRangeIndex < 0) {
|
|
firstSplitByteRangeIndex = i;
|
|
}
|
|
}
|
|
}
|
|
while(b <= 0xff) {
|
|
table[b] = (uint8_t)b;
|
|
++b;
|
|
}
|
|
if(firstSplitByteRangeIndex < 0) {
|
|
// The lead byte permutation table alone suffices for reordering.
|
|
rangesLength = 0;
|
|
} else {
|
|
// Remove the ranges below the first split byte.
|
|
ranges += firstSplitByteRangeIndex;
|
|
rangesLength -= firstSplitByteRangeIndex;
|
|
}
|
|
setReorderArrays(codes, codesLength, ranges, rangesLength, table, errorCode);
|
|
}
|
|
|
|
void
|
|
CollationSettings::setReorderArrays(const int32_t *codes, int32_t codesLength,
|
|
const uint32_t *ranges, int32_t rangesLength,
|
|
const uint8_t *table, UErrorCode &errorCode) {
|
|
if(U_FAILURE(errorCode)) { return; }
|
|
int32_t *ownedCodes;
|
|
int32_t totalLength = codesLength + rangesLength;
|
|
U_ASSERT(totalLength > 0);
|
|
if(totalLength <= reorderCodesCapacity) {
|
|
ownedCodes = const_cast<int32_t *>(reorderCodes);
|
|
} else {
|
|
// Allocate one memory block for the codes, the ranges, and the 16-aligned table.
|
|
int32_t capacity = (totalLength + 3) & ~3; // round up to a multiple of 4 ints
|
|
ownedCodes = (int32_t *)uprv_malloc(capacity * 4 + 256);
|
|
if(ownedCodes == NULL) {
|
|
resetReordering();
|
|
errorCode = U_MEMORY_ALLOCATION_ERROR;
|
|
return;
|
|
}
|
|
if(reorderCodesCapacity != 0) {
|
|
uprv_free(const_cast<int32_t *>(reorderCodes));
|
|
}
|
|
reorderCodes = ownedCodes;
|
|
reorderCodesCapacity = capacity;
|
|
}
|
|
uprv_memcpy(ownedCodes + reorderCodesCapacity, table, 256);
|
|
uprv_memcpy(ownedCodes, codes, codesLength * 4);
|
|
uprv_memcpy(ownedCodes + codesLength, ranges, rangesLength * 4);
|
|
reorderTable = reinterpret_cast<const uint8_t *>(reorderCodes + reorderCodesCapacity);
|
|
reorderCodesLength = codesLength;
|
|
reorderRanges = reinterpret_cast<uint32_t *>(ownedCodes) + codesLength;
|
|
reorderRangesLength = rangesLength;
|
|
}
|
|
|
|
void
|
|
CollationSettings::copyReorderingFrom(const CollationSettings &other, UErrorCode &errorCode) {
|
|
if(U_FAILURE(errorCode)) { return; }
|
|
if(!other.hasReordering()) {
|
|
resetReordering();
|
|
return;
|
|
}
|
|
minHighNoReorder = other.minHighNoReorder;
|
|
if(other.reorderCodesCapacity == 0) {
|
|
// The reorder arrays are aliased to memory-mapped data.
|
|
reorderTable = other.reorderTable;
|
|
reorderRanges = other.reorderRanges;
|
|
reorderRangesLength = other.reorderRangesLength;
|
|
reorderCodes = other.reorderCodes;
|
|
reorderCodesLength = other.reorderCodesLength;
|
|
} else {
|
|
setReorderArrays(other.reorderCodes, other.reorderCodesLength,
|
|
other.reorderRanges, other.reorderRangesLength,
|
|
other.reorderTable, errorCode);
|
|
}
|
|
}
|
|
|
|
UBool
|
|
CollationSettings::reorderTableHasSplitBytes(const uint8_t table[256]) {
|
|
U_ASSERT(table[0] == 0);
|
|
for(int32_t i = 1; i < 256; ++i) {
|
|
if(table[i] == 0) {
|
|
return TRUE;
|
|
}
|
|
}
|
|
return FALSE;
|
|
}
|
|
|
|
uint32_t
|
|
CollationSettings::reorderEx(uint32_t p) const {
|
|
if(p >= minHighNoReorder) { return p; }
|
|
// Round up p so that its lower 16 bits are >= any offset bits.
|
|
// Then compare q directly with (limit, offset) pairs.
|
|
uint32_t q = p | 0xffff;
|
|
uint32_t r;
|
|
const uint32_t *ranges = reorderRanges;
|
|
while(q >= (r = *ranges)) { ++ranges; }
|
|
return p + (r << 24);
|
|
}
|
|
|
|
void
|
|
CollationSettings::setStrength(int32_t value, int32_t defaultOptions, UErrorCode &errorCode) {
|
|
if(U_FAILURE(errorCode)) { return; }
|
|
int32_t noStrength = options & ~STRENGTH_MASK;
|
|
switch(value) {
|
|
case UCOL_PRIMARY:
|
|
case UCOL_SECONDARY:
|
|
case UCOL_TERTIARY:
|
|
case UCOL_QUATERNARY:
|
|
case UCOL_IDENTICAL:
|
|
options = noStrength | (value << STRENGTH_SHIFT);
|
|
break;
|
|
case UCOL_DEFAULT:
|
|
options = noStrength | (defaultOptions & STRENGTH_MASK);
|
|
break;
|
|
default:
|
|
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
|
|
break;
|
|
}
|
|
}
|
|
|
|
void
|
|
CollationSettings::setFlag(int32_t bit, UColAttributeValue value,
|
|
int32_t defaultOptions, UErrorCode &errorCode) {
|
|
if(U_FAILURE(errorCode)) { return; }
|
|
switch(value) {
|
|
case UCOL_ON:
|
|
options |= bit;
|
|
break;
|
|
case UCOL_OFF:
|
|
options &= ~bit;
|
|
break;
|
|
case UCOL_DEFAULT:
|
|
options = (options & ~bit) | (defaultOptions & bit);
|
|
break;
|
|
default:
|
|
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
|
|
break;
|
|
}
|
|
}
|
|
|
|
void
|
|
CollationSettings::setCaseFirst(UColAttributeValue value,
|
|
int32_t defaultOptions, UErrorCode &errorCode) {
|
|
if(U_FAILURE(errorCode)) { return; }
|
|
int32_t noCaseFirst = options & ~CASE_FIRST_AND_UPPER_MASK;
|
|
switch(value) {
|
|
case UCOL_OFF:
|
|
options = noCaseFirst;
|
|
break;
|
|
case UCOL_LOWER_FIRST:
|
|
options = noCaseFirst | CASE_FIRST;
|
|
break;
|
|
case UCOL_UPPER_FIRST:
|
|
options = noCaseFirst | CASE_FIRST_AND_UPPER_MASK;
|
|
break;
|
|
case UCOL_DEFAULT:
|
|
options = noCaseFirst | (defaultOptions & CASE_FIRST_AND_UPPER_MASK);
|
|
break;
|
|
default:
|
|
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
|
|
break;
|
|
}
|
|
}
|
|
|
|
void
|
|
CollationSettings::setAlternateHandling(UColAttributeValue value,
|
|
int32_t defaultOptions, UErrorCode &errorCode) {
|
|
if(U_FAILURE(errorCode)) { return; }
|
|
int32_t noAlternate = options & ~ALTERNATE_MASK;
|
|
switch(value) {
|
|
case UCOL_NON_IGNORABLE:
|
|
options = noAlternate;
|
|
break;
|
|
case UCOL_SHIFTED:
|
|
options = noAlternate | SHIFTED;
|
|
break;
|
|
case UCOL_DEFAULT:
|
|
options = noAlternate | (defaultOptions & ALTERNATE_MASK);
|
|
break;
|
|
default:
|
|
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
|
|
break;
|
|
}
|
|
}
|
|
|
|
void
|
|
CollationSettings::setMaxVariable(int32_t value, int32_t defaultOptions, UErrorCode &errorCode) {
|
|
if(U_FAILURE(errorCode)) { return; }
|
|
int32_t noMax = options & ~MAX_VARIABLE_MASK;
|
|
switch(value) {
|
|
case MAX_VAR_SPACE:
|
|
case MAX_VAR_PUNCT:
|
|
case MAX_VAR_SYMBOL:
|
|
case MAX_VAR_CURRENCY:
|
|
options = noMax | (value << MAX_VARIABLE_SHIFT);
|
|
break;
|
|
case UCOL_DEFAULT:
|
|
options = noMax | (defaultOptions & MAX_VARIABLE_MASK);
|
|
break;
|
|
default:
|
|
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
|
|
break;
|
|
}
|
|
}
|
|
|
|
U_NAMESPACE_END
|
|
|
|
#endif // !UCONFIG_NO_COLLATION
|