345 lines
11 KiB
C++
345 lines
11 KiB
C++
|
// © 2018 and later: Unicode, Inc. and others.
|
||
|
// License & terms of use: http://www.unicode.org/copyright.html
|
||
|
|
||
|
// utrie_swap.cpp
|
||
|
// created: 2018aug08 Markus W. Scherer
|
||
|
|
||
|
#include "unicode/utypes.h"
|
||
|
#include "cmemory.h"
|
||
|
#include "ucptrie_impl.h"
|
||
|
#include "udataswp.h"
|
||
|
#include "utrie.h"
|
||
|
#include "utrie2_impl.h"
|
||
|
|
||
|
// These functions for swapping different generations of ICU code point tries are here
|
||
|
// so that their implementation files need not depend on swapper code,
|
||
|
// need not depend on each other, and so that other swapper code
|
||
|
// need not depend on other trie code.
|
||
|
|
||
|
namespace {
|
||
|
|
||
|
constexpr int32_t ASCII_LIMIT = 0x80;
|
||
|
|
||
|
} // namespace
|
||
|
|
||
|
U_CAPI int32_t U_EXPORT2
|
||
|
utrie_swap(const UDataSwapper *ds,
|
||
|
const void *inData, int32_t length, void *outData,
|
||
|
UErrorCode *pErrorCode) {
|
||
|
const UTrieHeader *inTrie;
|
||
|
UTrieHeader trie;
|
||
|
int32_t size;
|
||
|
UBool dataIs32;
|
||
|
|
||
|
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
|
||
|
return 0;
|
||
|
}
|
||
|
if(ds==NULL || inData==NULL || (length>=0 && outData==NULL)) {
|
||
|
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
/* setup and swapping */
|
||
|
if(length>=0 && (uint32_t)length<sizeof(UTrieHeader)) {
|
||
|
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
inTrie=(const UTrieHeader *)inData;
|
||
|
trie.signature=ds->readUInt32(inTrie->signature);
|
||
|
trie.options=ds->readUInt32(inTrie->options);
|
||
|
trie.indexLength=udata_readInt32(ds, inTrie->indexLength);
|
||
|
trie.dataLength=udata_readInt32(ds, inTrie->dataLength);
|
||
|
|
||
|
if( trie.signature!=0x54726965 ||
|
||
|
(trie.options&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_SHIFT ||
|
||
|
((trie.options>>UTRIE_OPTIONS_INDEX_SHIFT)&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_INDEX_SHIFT ||
|
||
|
trie.indexLength<UTRIE_BMP_INDEX_LENGTH ||
|
||
|
(trie.indexLength&(UTRIE_SURROGATE_BLOCK_COUNT-1))!=0 ||
|
||
|
trie.dataLength<UTRIE_DATA_BLOCK_LENGTH ||
|
||
|
(trie.dataLength&(UTRIE_DATA_GRANULARITY-1))!=0 ||
|
||
|
((trie.options&UTRIE_OPTIONS_LATIN1_IS_LINEAR)!=0 && trie.dataLength<(UTRIE_DATA_BLOCK_LENGTH+0x100))
|
||
|
) {
|
||
|
*pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UTrie */
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
dataIs32=(UBool)((trie.options&UTRIE_OPTIONS_DATA_IS_32_BIT)!=0);
|
||
|
size=sizeof(UTrieHeader)+trie.indexLength*2+trie.dataLength*(dataIs32?4:2);
|
||
|
|
||
|
if(length>=0) {
|
||
|
UTrieHeader *outTrie;
|
||
|
|
||
|
if(length<size) {
|
||
|
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
outTrie=(UTrieHeader *)outData;
|
||
|
|
||
|
/* swap the header */
|
||
|
ds->swapArray32(ds, inTrie, sizeof(UTrieHeader), outTrie, pErrorCode);
|
||
|
|
||
|
/* swap the index and the data */
|
||
|
if(dataIs32) {
|
||
|
ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode);
|
||
|
ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, trie.dataLength*4,
|
||
|
(uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode);
|
||
|
} else {
|
||
|
ds->swapArray16(ds, inTrie+1, (trie.indexLength+trie.dataLength)*2, outTrie+1, pErrorCode);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return size;
|
||
|
}
|
||
|
|
||
|
U_CAPI int32_t U_EXPORT2
|
||
|
utrie2_swap(const UDataSwapper *ds,
|
||
|
const void *inData, int32_t length, void *outData,
|
||
|
UErrorCode *pErrorCode) {
|
||
|
const UTrie2Header *inTrie;
|
||
|
UTrie2Header trie;
|
||
|
int32_t dataLength, size;
|
||
|
UTrie2ValueBits valueBits;
|
||
|
|
||
|
if(U_FAILURE(*pErrorCode)) {
|
||
|
return 0;
|
||
|
}
|
||
|
if(ds==NULL || inData==NULL || (length>=0 && outData==NULL)) {
|
||
|
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
/* setup and swapping */
|
||
|
if(length>=0 && length<(int32_t)sizeof(UTrie2Header)) {
|
||
|
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
inTrie=(const UTrie2Header *)inData;
|
||
|
trie.signature=ds->readUInt32(inTrie->signature);
|
||
|
trie.options=ds->readUInt16(inTrie->options);
|
||
|
trie.indexLength=ds->readUInt16(inTrie->indexLength);
|
||
|
trie.shiftedDataLength=ds->readUInt16(inTrie->shiftedDataLength);
|
||
|
|
||
|
valueBits=(UTrie2ValueBits)(trie.options&UTRIE2_OPTIONS_VALUE_BITS_MASK);
|
||
|
dataLength=(int32_t)trie.shiftedDataLength<<UTRIE2_INDEX_SHIFT;
|
||
|
|
||
|
if( trie.signature!=UTRIE2_SIG ||
|
||
|
valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits ||
|
||
|
trie.indexLength<UTRIE2_INDEX_1_OFFSET ||
|
||
|
dataLength<UTRIE2_DATA_START_OFFSET
|
||
|
) {
|
||
|
*pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UTrie */
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
size=sizeof(UTrie2Header)+trie.indexLength*2;
|
||
|
switch(valueBits) {
|
||
|
case UTRIE2_16_VALUE_BITS:
|
||
|
size+=dataLength*2;
|
||
|
break;
|
||
|
case UTRIE2_32_VALUE_BITS:
|
||
|
size+=dataLength*4;
|
||
|
break;
|
||
|
default:
|
||
|
*pErrorCode=U_INVALID_FORMAT_ERROR;
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
if(length>=0) {
|
||
|
UTrie2Header *outTrie;
|
||
|
|
||
|
if(length<size) {
|
||
|
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
outTrie=(UTrie2Header *)outData;
|
||
|
|
||
|
/* swap the header */
|
||
|
ds->swapArray32(ds, &inTrie->signature, 4, &outTrie->signature, pErrorCode);
|
||
|
ds->swapArray16(ds, &inTrie->options, 12, &outTrie->options, pErrorCode);
|
||
|
|
||
|
/* swap the index and the data */
|
||
|
switch(valueBits) {
|
||
|
case UTRIE2_16_VALUE_BITS:
|
||
|
ds->swapArray16(ds, inTrie+1, (trie.indexLength+dataLength)*2, outTrie+1, pErrorCode);
|
||
|
break;
|
||
|
case UTRIE2_32_VALUE_BITS:
|
||
|
ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode);
|
||
|
ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, dataLength*4,
|
||
|
(uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode);
|
||
|
break;
|
||
|
default:
|
||
|
*pErrorCode=U_INVALID_FORMAT_ERROR;
|
||
|
return 0;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return size;
|
||
|
}
|
||
|
|
||
|
U_CAPI int32_t U_EXPORT2
|
||
|
ucptrie_swap(const UDataSwapper *ds,
|
||
|
const void *inData, int32_t length, void *outData,
|
||
|
UErrorCode *pErrorCode) {
|
||
|
const UCPTrieHeader *inTrie;
|
||
|
UCPTrieHeader trie;
|
||
|
int32_t dataLength, size;
|
||
|
UCPTrieValueWidth valueWidth;
|
||
|
|
||
|
if(U_FAILURE(*pErrorCode)) {
|
||
|
return 0;
|
||
|
}
|
||
|
if(ds==nullptr || inData==nullptr || (length>=0 && outData==nullptr)) {
|
||
|
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
/* setup and swapping */
|
||
|
if(length>=0 && length<(int32_t)sizeof(UCPTrieHeader)) {
|
||
|
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
inTrie=(const UCPTrieHeader *)inData;
|
||
|
trie.signature=ds->readUInt32(inTrie->signature);
|
||
|
trie.options=ds->readUInt16(inTrie->options);
|
||
|
trie.indexLength=ds->readUInt16(inTrie->indexLength);
|
||
|
trie.dataLength = ds->readUInt16(inTrie->dataLength);
|
||
|
|
||
|
UCPTrieType type = (UCPTrieType)((trie.options >> 6) & 3);
|
||
|
valueWidth = (UCPTrieValueWidth)(trie.options & UCPTRIE_OPTIONS_VALUE_BITS_MASK);
|
||
|
dataLength = ((int32_t)(trie.options & UCPTRIE_OPTIONS_DATA_LENGTH_MASK) << 4) | trie.dataLength;
|
||
|
|
||
|
int32_t minIndexLength = type == UCPTRIE_TYPE_FAST ?
|
||
|
UCPTRIE_BMP_INDEX_LENGTH : UCPTRIE_SMALL_INDEX_LENGTH;
|
||
|
if( trie.signature!=UCPTRIE_SIG ||
|
||
|
type > UCPTRIE_TYPE_SMALL ||
|
||
|
(trie.options & UCPTRIE_OPTIONS_RESERVED_MASK) != 0 ||
|
||
|
valueWidth > UCPTRIE_VALUE_BITS_8 ||
|
||
|
trie.indexLength < minIndexLength ||
|
||
|
dataLength < ASCII_LIMIT
|
||
|
) {
|
||
|
*pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UCPTrie */
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
size=sizeof(UCPTrieHeader)+trie.indexLength*2;
|
||
|
switch(valueWidth) {
|
||
|
case UCPTRIE_VALUE_BITS_16:
|
||
|
size+=dataLength*2;
|
||
|
break;
|
||
|
case UCPTRIE_VALUE_BITS_32:
|
||
|
size+=dataLength*4;
|
||
|
break;
|
||
|
case UCPTRIE_VALUE_BITS_8:
|
||
|
size+=dataLength;
|
||
|
break;
|
||
|
default:
|
||
|
*pErrorCode=U_INVALID_FORMAT_ERROR;
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
if(length>=0) {
|
||
|
UCPTrieHeader *outTrie;
|
||
|
|
||
|
if(length<size) {
|
||
|
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
outTrie=(UCPTrieHeader *)outData;
|
||
|
|
||
|
/* swap the header */
|
||
|
ds->swapArray32(ds, &inTrie->signature, 4, &outTrie->signature, pErrorCode);
|
||
|
ds->swapArray16(ds, &inTrie->options, 12, &outTrie->options, pErrorCode);
|
||
|
|
||
|
/* swap the index and the data */
|
||
|
switch(valueWidth) {
|
||
|
case UCPTRIE_VALUE_BITS_16:
|
||
|
ds->swapArray16(ds, inTrie+1, (trie.indexLength+dataLength)*2, outTrie+1, pErrorCode);
|
||
|
break;
|
||
|
case UCPTRIE_VALUE_BITS_32:
|
||
|
ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode);
|
||
|
ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, dataLength*4,
|
||
|
(uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode);
|
||
|
break;
|
||
|
case UCPTRIE_VALUE_BITS_8:
|
||
|
ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode);
|
||
|
if(inTrie!=outTrie) {
|
||
|
uprv_memmove((outTrie+1)+trie.indexLength, (inTrie+1)+trie.indexLength, dataLength);
|
||
|
}
|
||
|
break;
|
||
|
default:
|
||
|
*pErrorCode=U_INVALID_FORMAT_ERROR;
|
||
|
return 0;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return size;
|
||
|
}
|
||
|
|
||
|
namespace {
|
||
|
|
||
|
/**
|
||
|
* Gets the trie version from 32-bit-aligned memory containing the serialized form
|
||
|
* of a UTrie (version 1), a UTrie2 (version 2), or a UCPTrie (version 3).
|
||
|
*
|
||
|
* @param data a pointer to 32-bit-aligned memory containing the serialized form of a trie
|
||
|
* @param length the number of bytes available at data;
|
||
|
* can be more than necessary (see return value)
|
||
|
* @param anyEndianOk If FALSE, only platform-endian serialized forms are recognized.
|
||
|
* If TRUE, opposite-endian serialized forms are recognized as well.
|
||
|
* @return the trie version of the serialized form, or 0 if it is not
|
||
|
* recognized as a serialized trie
|
||
|
*/
|
||
|
int32_t
|
||
|
getVersion(const void *data, int32_t length, UBool anyEndianOk) {
|
||
|
uint32_t signature;
|
||
|
if(length<16 || data==nullptr || (U_POINTER_MASK_LSB(data, 3)!=0)) {
|
||
|
return 0;
|
||
|
}
|
||
|
signature=*(const uint32_t *)data;
|
||
|
if(signature==UCPTRIE_SIG) {
|
||
|
return 3;
|
||
|
}
|
||
|
if(anyEndianOk && signature==UCPTRIE_OE_SIG) {
|
||
|
return 3;
|
||
|
}
|
||
|
if(signature==UTRIE2_SIG) {
|
||
|
return 2;
|
||
|
}
|
||
|
if(anyEndianOk && signature==UTRIE2_OE_SIG) {
|
||
|
return 2;
|
||
|
}
|
||
|
if(signature==UTRIE_SIG) {
|
||
|
return 1;
|
||
|
}
|
||
|
if(anyEndianOk && signature==UTRIE_OE_SIG) {
|
||
|
return 1;
|
||
|
}
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
} // namespace
|
||
|
|
||
|
U_CAPI int32_t U_EXPORT2
|
||
|
utrie_swapAnyVersion(const UDataSwapper *ds,
|
||
|
const void *inData, int32_t length, void *outData,
|
||
|
UErrorCode *pErrorCode) {
|
||
|
if(U_FAILURE(*pErrorCode)) { return 0; }
|
||
|
switch(getVersion(inData, length, TRUE)) {
|
||
|
case 1:
|
||
|
return utrie_swap(ds, inData, length, outData, pErrorCode);
|
||
|
case 2:
|
||
|
return utrie2_swap(ds, inData, length, outData, pErrorCode);
|
||
|
case 3:
|
||
|
return ucptrie_swap(ds, inData, length, outData, pErrorCode);
|
||
|
default:
|
||
|
*pErrorCode=U_INVALID_FORMAT_ERROR;
|
||
|
return 0;
|
||
|
}
|
||
|
}
|