ICU-12410 low-level UTF-16 case mapping functions optionally write metadata to new class Edits
X-SVN-Rev: 39546
This commit is contained in:
parent
fb0439839b
commit
6fc7fe3b65
@ -25,6 +25,12 @@
|
|||||||
#include "unicode/ustring.h"
|
#include "unicode/ustring.h"
|
||||||
#include "unicode/localpointer.h"
|
#include "unicode/localpointer.h"
|
||||||
|
|
||||||
|
#if U_SHOW_CPLUSPLUS_API
|
||||||
|
|
||||||
|
#include "unicode/uobject.h"
|
||||||
|
|
||||||
|
#endif // U_SHOW_CPLUSPLUS_API
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* \file
|
* \file
|
||||||
* \brief C API: Unicode case mapping functions using a UCaseMap service object.
|
* \brief C API: Unicode case mapping functions using a UCaseMap service object.
|
||||||
@ -94,6 +100,98 @@ U_NAMESPACE_BEGIN
|
|||||||
*/
|
*/
|
||||||
U_DEFINE_LOCAL_OPEN_POINTER(LocalUCaseMapPointer, UCaseMap, ucasemap_close);
|
U_DEFINE_LOCAL_OPEN_POINTER(LocalUCaseMapPointer, UCaseMap, ucasemap_close);
|
||||||
|
|
||||||
|
#ifndef U_HIDE_INTERNAL_API
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Records lengths of string edits but not replacement text.
|
||||||
|
* Supports replacements, insertions, deletions in linear progression.
|
||||||
|
* Does not support moving/reordering of text.
|
||||||
|
*
|
||||||
|
* @internal ICU 59 technology preview
|
||||||
|
*/
|
||||||
|
class Edits final : public UMemory {
|
||||||
|
public:
|
||||||
|
/**
|
||||||
|
* Constructs an empty object.
|
||||||
|
* @internal ICU 59 technology preview
|
||||||
|
*/
|
||||||
|
Edits() :
|
||||||
|
array(stackArray), capacity(STACK_CAPACITY), length(0), delta(0),
|
||||||
|
errorCode(U_ZERO_ERROR) {}
|
||||||
|
~Edits();
|
||||||
|
/**
|
||||||
|
* Resets the data but may not release memory.
|
||||||
|
* @internal ICU 59 technology preview
|
||||||
|
*/
|
||||||
|
void reset();
|
||||||
|
/**
|
||||||
|
* Adds a record for an unchanged segment of text.
|
||||||
|
* @internal ICU 59 technology preview
|
||||||
|
*/
|
||||||
|
void addUnchanged(int32_t unchangedLength);
|
||||||
|
/**
|
||||||
|
* Adds a record for a text replacement/insertion/deletion.
|
||||||
|
* @internal ICU 59 technology preview
|
||||||
|
*/
|
||||||
|
void addReplace(int32_t oldLength, int32_t newLength);
|
||||||
|
/**
|
||||||
|
* Sets the UErrorCode if an error occurred while recording edits.
|
||||||
|
* Preserves older error codes in the outErrorCode.
|
||||||
|
* @return TRUE if U_FAILURE(outErrorCode)
|
||||||
|
* @internal ICU 59 technology preview
|
||||||
|
*/
|
||||||
|
UBool setErrorCode(UErrorCode &outErrorCode);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* How much longer is the new text compared with the old text?
|
||||||
|
* @return new length minus old length
|
||||||
|
* @internal ICU 59 technology preview
|
||||||
|
*/
|
||||||
|
int32_t lengthDelta() const { return delta; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
Edits(const Edits &) = delete;
|
||||||
|
Edits &operator=(const Edits &) = delete;
|
||||||
|
|
||||||
|
void setLastUnit(int32_t last) { array[length - 1] = (uint16_t)last; }
|
||||||
|
int32_t lastUnit() const { return length > 0 ? array[length - 1] : 0xffff; }
|
||||||
|
|
||||||
|
void append(int32_t r);
|
||||||
|
void append(const uint16_t *buffer, int32_t bLength);
|
||||||
|
UBool growArray();
|
||||||
|
|
||||||
|
static const int32_t STACK_CAPACITY = 100;
|
||||||
|
uint16_t *array;
|
||||||
|
int32_t capacity;
|
||||||
|
int32_t length;
|
||||||
|
int32_t delta;
|
||||||
|
UErrorCode errorCode;
|
||||||
|
uint16_t stackArray[STACK_CAPACITY];
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Omit unchanged text when case-mapping with Edits.
|
||||||
|
*
|
||||||
|
* TODO: revisit which bit to use; currently:
|
||||||
|
* - 31..20: old normalization options (only deprecated Unicode 3.2)
|
||||||
|
* shifted up for unorm_compare()
|
||||||
|
* - 19..16: more options specific to unorm_compare() (currently bits 19, 17, 16)
|
||||||
|
* - 15..12: more string compare options (currently bits 15 & 12)
|
||||||
|
* - 11.. 8: titlecase mapping options (currently bits 9..8)
|
||||||
|
* - 7.. 0: case folding options, but only bit 0 currently used
|
||||||
|
*
|
||||||
|
* could overlay any normalization and string *comparison* option bits
|
||||||
|
* with case *mapping* option bits
|
||||||
|
* *unless* we start using UCaseMap for string comparison functions
|
||||||
|
*
|
||||||
|
* future: German sharp s may need locale variant or option bit
|
||||||
|
*
|
||||||
|
* @internal ICU 59 technology preview
|
||||||
|
*/
|
||||||
|
#define UCASEMAP_OMIT_UNCHANGED 0x4000
|
||||||
|
|
||||||
|
#endif // U_HIDE_INTERNAL_API
|
||||||
|
|
||||||
U_NAMESPACE_END
|
U_NAMESPACE_END
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
@ -422,4 +520,19 @@ ucasemap_utf8FoldCase(const UCaseMap *csm,
|
|||||||
const char *src, int32_t srcLength,
|
const char *src, int32_t srcLength,
|
||||||
UErrorCode *pErrorCode);
|
UErrorCode *pErrorCode);
|
||||||
|
|
||||||
|
// Not #ifndef U_HIDE_INTERNAL_API because UnicodeString needs the UStringCaseMapper.
|
||||||
|
/**
|
||||||
|
* Internal string case mapping function type.
|
||||||
|
* All error checking must be done.
|
||||||
|
* The UCaseMap must be fully initialized, with locale and/or iter set as needed.
|
||||||
|
* src and dest must not overlap.
|
||||||
|
* @internal
|
||||||
|
*/
|
||||||
|
typedef int32_t U_CALLCONV
|
||||||
|
UStringCaseMapper(const UCaseMap *csm,
|
||||||
|
UChar *dest, int32_t destCapacity,
|
||||||
|
const UChar *src, int32_t srcLength,
|
||||||
|
icu::Edits *edits,
|
||||||
|
UErrorCode *pErrorCode);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -55,25 +55,6 @@ U_STABLE int32_t U_EXPORT2
|
|||||||
u_strlen(const UChar *s);
|
u_strlen(const UChar *s);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/**
|
|
||||||
* \def U_STRING_CASE_MAPPER_DEFINED
|
|
||||||
* @internal
|
|
||||||
*/
|
|
||||||
#ifndef U_STRING_CASE_MAPPER_DEFINED
|
|
||||||
#define U_STRING_CASE_MAPPER_DEFINED
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Internal string case mapping function type.
|
|
||||||
* @internal
|
|
||||||
*/
|
|
||||||
typedef int32_t U_CALLCONV
|
|
||||||
UStringCaseMapper(const UCaseMap *csm,
|
|
||||||
UChar *dest, int32_t destCapacity,
|
|
||||||
const UChar *src, int32_t srcLength,
|
|
||||||
UErrorCode *pErrorCode);
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
U_NAMESPACE_BEGIN
|
U_NAMESPACE_BEGIN
|
||||||
|
|
||||||
#if !UCONFIG_NO_BREAK_ITERATION
|
#if !UCONFIG_NO_BREAK_ITERATION
|
||||||
|
@ -18,6 +18,7 @@
|
|||||||
#define __USTR_IMP_H__
|
#define __USTR_IMP_H__
|
||||||
|
|
||||||
#include "unicode/utypes.h"
|
#include "unicode/utypes.h"
|
||||||
|
#include "unicode/ucasemap.h"
|
||||||
#include "unicode/uiter.h"
|
#include "unicode/uiter.h"
|
||||||
#include "ucase.h"
|
#include "ucase.h"
|
||||||
|
|
||||||
@ -129,28 +130,12 @@ typedef struct UCaseMap UCaseMap;
|
|||||||
U_CFUNC void
|
U_CFUNC void
|
||||||
ustrcase_setTempCaseMapLocale(UCaseMap *csm, const char *locale);
|
ustrcase_setTempCaseMapLocale(UCaseMap *csm, const char *locale);
|
||||||
|
|
||||||
#ifndef U_STRING_CASE_MAPPER_DEFINED
|
|
||||||
#define U_STRING_CASE_MAPPER_DEFINED
|
|
||||||
|
|
||||||
/**
|
|
||||||
* String case mapping function type, used by ustrcase_map().
|
|
||||||
* All error checking must be done.
|
|
||||||
* The UCaseMap must be fully initialized, with locale and/or iter set as needed.
|
|
||||||
* src and dest must not overlap.
|
|
||||||
*/
|
|
||||||
typedef int32_t U_CALLCONV
|
|
||||||
UStringCaseMapper(const UCaseMap *csm,
|
|
||||||
UChar *dest, int32_t destCapacity,
|
|
||||||
const UChar *src, int32_t srcLength,
|
|
||||||
UErrorCode *pErrorCode);
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/** Implements UStringCaseMapper. */
|
/** Implements UStringCaseMapper. */
|
||||||
U_CFUNC int32_t U_CALLCONV
|
U_CFUNC int32_t U_CALLCONV
|
||||||
ustrcase_internalToLower(const UCaseMap *csm,
|
ustrcase_internalToLower(const UCaseMap *csm,
|
||||||
UChar *dest, int32_t destCapacity,
|
UChar *dest, int32_t destCapacity,
|
||||||
const UChar *src, int32_t srcLength,
|
const UChar *src, int32_t srcLength,
|
||||||
|
icu::Edits *edits,
|
||||||
UErrorCode *pErrorCode);
|
UErrorCode *pErrorCode);
|
||||||
|
|
||||||
/** Implements UStringCaseMapper. */
|
/** Implements UStringCaseMapper. */
|
||||||
@ -158,6 +143,7 @@ U_CFUNC int32_t U_CALLCONV
|
|||||||
ustrcase_internalToUpper(const UCaseMap *csm,
|
ustrcase_internalToUpper(const UCaseMap *csm,
|
||||||
UChar *dest, int32_t destCapacity,
|
UChar *dest, int32_t destCapacity,
|
||||||
const UChar *src, int32_t srcLength,
|
const UChar *src, int32_t srcLength,
|
||||||
|
icu::Edits *edits,
|
||||||
UErrorCode *pErrorCode);
|
UErrorCode *pErrorCode);
|
||||||
|
|
||||||
#if !UCONFIG_NO_BREAK_ITERATION
|
#if !UCONFIG_NO_BREAK_ITERATION
|
||||||
@ -167,6 +153,7 @@ U_CFUNC int32_t U_CALLCONV
|
|||||||
ustrcase_internalToTitle(const UCaseMap *csm,
|
ustrcase_internalToTitle(const UCaseMap *csm,
|
||||||
UChar *dest, int32_t destCapacity,
|
UChar *dest, int32_t destCapacity,
|
||||||
const UChar *src, int32_t srcLength,
|
const UChar *src, int32_t srcLength,
|
||||||
|
icu::Edits *edits,
|
||||||
UErrorCode *pErrorCode);
|
UErrorCode *pErrorCode);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
@ -176,6 +163,7 @@ U_CFUNC int32_t U_CALLCONV
|
|||||||
ustrcase_internalFold(const UCaseMap *csm,
|
ustrcase_internalFold(const UCaseMap *csm,
|
||||||
UChar *dest, int32_t destCapacity,
|
UChar *dest, int32_t destCapacity,
|
||||||
const UChar *src, int32_t srcLength,
|
const UChar *src, int32_t srcLength,
|
||||||
|
icu::Edits *edits,
|
||||||
UErrorCode *pErrorCode);
|
UErrorCode *pErrorCode);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -187,6 +175,7 @@ ustrcase_map(const UCaseMap *csm,
|
|||||||
UChar *dest, int32_t destCapacity,
|
UChar *dest, int32_t destCapacity,
|
||||||
const UChar *src, int32_t srcLength,
|
const UChar *src, int32_t srcLength,
|
||||||
UStringCaseMapper *stringCaseMapper,
|
UStringCaseMapper *stringCaseMapper,
|
||||||
|
icu::Edits *edits,
|
||||||
UErrorCode *pErrorCode);
|
UErrorCode *pErrorCode);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -87,7 +87,7 @@ ucasemap_toTitle(UCaseMap *csm,
|
|||||||
csm,
|
csm,
|
||||||
dest, destCapacity,
|
dest, destCapacity,
|
||||||
src, srcLength,
|
src, srcLength,
|
||||||
ustrcase_internalToTitle, pErrorCode);
|
ustrcase_internalToTitle, NULL, pErrorCode);
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif // !UCONFIG_NO_BREAK_ITERATION
|
#endif // !UCONFIG_NO_BREAK_ITERATION
|
||||||
|
@ -32,6 +32,190 @@
|
|||||||
#include "ustr_imp.h"
|
#include "ustr_imp.h"
|
||||||
#include "uassert.h"
|
#include "uassert.h"
|
||||||
|
|
||||||
|
U_NAMESPACE_BEGIN
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
// 0000uuuuuuuuuuuu records u+1 unchanged text units.
|
||||||
|
const int32_t MAX_UNCHANGED_LENGTH = 0x1000;
|
||||||
|
const int32_t MAX_UNCHANGED = MAX_UNCHANGED_LENGTH - 1;
|
||||||
|
|
||||||
|
// 0wwwcccccccccccc with w=1..6 records ccc+1 replacements of w:w text units.
|
||||||
|
// No length change.
|
||||||
|
const int32_t MAX_SHORT_WIDTH = 6;
|
||||||
|
const int32_t MAX_SHORT_CHANGE_LENGTH = 0xfff;
|
||||||
|
const int32_t MAX_SHORT_CHANGE = 0x6fff;
|
||||||
|
|
||||||
|
// 0111mmmmmmnnnnnn records a replacement of m text units with n.
|
||||||
|
// m or n = 61: actual length follows in the next edits array unit.
|
||||||
|
// m or n = 62..63: actual length follows in the next two edits array units.
|
||||||
|
// Bit 30 of the actual length is in the head unit.
|
||||||
|
// Trailing units have bit 15 set.
|
||||||
|
const int32_t LENGTH_IN_1TRAIL = 61;
|
||||||
|
const int32_t LENGTH_IN_2TRAIL = 62;
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
Edits::~Edits() {
|
||||||
|
if(array != stackArray) {
|
||||||
|
uprv_free(array);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void Edits::reset() {
|
||||||
|
length = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Edits::addUnchanged(int32_t unchangedLength) {
|
||||||
|
if(U_FAILURE(errorCode) || unchangedLength == 0) { return; }
|
||||||
|
if(unchangedLength < 0) {
|
||||||
|
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// Merge into previous unchanged-text record, if any.
|
||||||
|
int32_t last = lastUnit();
|
||||||
|
if(last < MAX_UNCHANGED) {
|
||||||
|
int32_t remaining = MAX_UNCHANGED - last;
|
||||||
|
if (remaining >= unchangedLength) {
|
||||||
|
setLastUnit(last + unchangedLength);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
setLastUnit(MAX_UNCHANGED);
|
||||||
|
unchangedLength -= remaining;
|
||||||
|
}
|
||||||
|
// Split large lengths into multiple units.
|
||||||
|
while(unchangedLength >= MAX_UNCHANGED_LENGTH) {
|
||||||
|
append(MAX_UNCHANGED);
|
||||||
|
unchangedLength -= MAX_UNCHANGED_LENGTH;
|
||||||
|
}
|
||||||
|
// Write a small (remaining) length.
|
||||||
|
if(unchangedLength > 0) {
|
||||||
|
append(unchangedLength - 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void Edits::addReplace(int32_t oldLength, int32_t newLength) {
|
||||||
|
if(U_FAILURE(errorCode)) { return; }
|
||||||
|
if(oldLength == newLength && 0 < oldLength && oldLength <= MAX_SHORT_WIDTH) {
|
||||||
|
// Replacement of short oldLength text units by same-length new text.
|
||||||
|
// Merge into previous short-replacement record, if any.
|
||||||
|
int32_t last = lastUnit();
|
||||||
|
if(MAX_UNCHANGED < last && last < MAX_SHORT_CHANGE &&
|
||||||
|
(last >> 12) == oldLength && (last & 0xfff) < MAX_SHORT_CHANGE_LENGTH) {
|
||||||
|
setLastUnit(last + 1);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
append(oldLength << 12);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(oldLength < 0 || newLength < 0) {
|
||||||
|
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (oldLength == 0 && newLength == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
int32_t newDelta = newLength - oldLength;
|
||||||
|
if (newDelta != 0) {
|
||||||
|
if (newDelta > 0 ? newDelta > (INT32_MAX - delta) : newDelta < (INT32_MIN - delta)) {
|
||||||
|
// Integer overflow or underflow.
|
||||||
|
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
delta += newDelta;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint16_t buffer[5];
|
||||||
|
int32_t bLength = 1;
|
||||||
|
int32_t head = 0x7000;
|
||||||
|
if(oldLength < LENGTH_IN_1TRAIL) {
|
||||||
|
head |= oldLength << 6;
|
||||||
|
} else if(oldLength <= 0x7fff) {
|
||||||
|
head |= LENGTH_IN_1TRAIL << 6;
|
||||||
|
buffer[bLength++] = (uint16_t)(0x8000 | oldLength);
|
||||||
|
} else {
|
||||||
|
head |= (LENGTH_IN_2TRAIL + (oldLength >> 30)) << 6;
|
||||||
|
buffer[bLength++] = (uint16_t)(0x8000 | (oldLength >> 15));
|
||||||
|
buffer[bLength++] = (uint16_t)(0x8000 | oldLength);
|
||||||
|
}
|
||||||
|
if(newLength < LENGTH_IN_1TRAIL) {
|
||||||
|
head |= newLength;
|
||||||
|
} else if(newLength <= 0x7fff) {
|
||||||
|
head |= LENGTH_IN_1TRAIL;
|
||||||
|
buffer[bLength++] = (uint16_t)(0x8000 | newLength);
|
||||||
|
} else {
|
||||||
|
head |= LENGTH_IN_2TRAIL + (newLength >> 30);
|
||||||
|
buffer[bLength++] = (uint16_t)(0x8000 | (newLength >> 15));
|
||||||
|
buffer[bLength++] = (uint16_t)(0x8000 | newLength);
|
||||||
|
}
|
||||||
|
if(bLength == 1) {
|
||||||
|
append(head);
|
||||||
|
} else {
|
||||||
|
buffer[0] = (uint16_t)head;
|
||||||
|
append(buffer, bLength);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void Edits::append(int32_t r) {
|
||||||
|
if(length < capacity || growArray()) {
|
||||||
|
array[length++] = (uint16_t)r;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void Edits::append(const uint16_t *buffer, int32_t bLength) {
|
||||||
|
if(bLength > (INT32_MAX - length)) {
|
||||||
|
errorCode = U_INDEX_OUTOFBOUNDS_ERROR; // Integer overflow.
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if((length + bLength) < capacity || growArray()) {
|
||||||
|
int32_t i = 0;
|
||||||
|
do {
|
||||||
|
array[length++] = buffer[i++];
|
||||||
|
} while (i < bLength);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
UBool Edits::growArray() {
|
||||||
|
int32_t newCapacity;
|
||||||
|
if (array == stackArray) {
|
||||||
|
newCapacity = 2000;
|
||||||
|
} else if (capacity == INT32_MAX) {
|
||||||
|
errorCode = U_BUFFER_OVERFLOW_ERROR;
|
||||||
|
return FALSE;
|
||||||
|
} else if (capacity >= (INT32_MAX / 2)) {
|
||||||
|
newCapacity = INT32_MAX;
|
||||||
|
} else {
|
||||||
|
newCapacity = 2 * capacity;
|
||||||
|
}
|
||||||
|
// Grow by at least 5 units so that a maximal change record will fit.
|
||||||
|
if ((newCapacity - capacity) < 5) {
|
||||||
|
errorCode = U_BUFFER_OVERFLOW_ERROR;
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
uint16_t *newArray = (uint16_t *)uprv_malloc((size_t)newCapacity * 2);
|
||||||
|
if (newArray == NULL) {
|
||||||
|
errorCode = U_MEMORY_ALLOCATION_ERROR;
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
uprv_memcpy(newArray, array, (size_t)length * 2);
|
||||||
|
if (array != stackArray) {
|
||||||
|
uprv_free(array);
|
||||||
|
}
|
||||||
|
array = newArray;
|
||||||
|
capacity = newCapacity;
|
||||||
|
return TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
UBool Edits::setErrorCode(UErrorCode &outErrorCode) {
|
||||||
|
if(U_FAILURE(outErrorCode)) { return TRUE; }
|
||||||
|
if(U_SUCCESS(errorCode)) { return FALSE; }
|
||||||
|
outErrorCode = errorCode;
|
||||||
|
return TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
U_NAMESPACE_END
|
||||||
|
|
||||||
U_NAMESPACE_USE
|
U_NAMESPACE_USE
|
||||||
|
|
||||||
/* string casing ------------------------------------------------------------ */
|
/* string casing ------------------------------------------------------------ */
|
||||||
@ -39,21 +223,43 @@ U_NAMESPACE_USE
|
|||||||
/* Appends a full case mapping result, see UCASE_MAX_STRING_LENGTH. */
|
/* Appends a full case mapping result, see UCASE_MAX_STRING_LENGTH. */
|
||||||
static inline int32_t
|
static inline int32_t
|
||||||
appendResult(UChar *dest, int32_t destIndex, int32_t destCapacity,
|
appendResult(UChar *dest, int32_t destIndex, int32_t destCapacity,
|
||||||
int32_t result, const UChar *s) {
|
int32_t result, const UChar *s,
|
||||||
|
uint32_t options, int32_t cpLength, icu::Edits *edits) {
|
||||||
UChar32 c;
|
UChar32 c;
|
||||||
int32_t length;
|
int32_t length;
|
||||||
|
|
||||||
/* decode the result */
|
/* decode the result */
|
||||||
if(result<0) {
|
if(result<0) {
|
||||||
/* (not) original code point */
|
/* (not) original code point */
|
||||||
|
if(edits!=NULL) {
|
||||||
|
edits->addUnchanged(cpLength);
|
||||||
|
}
|
||||||
|
if(options & UCASEMAP_OMIT_UNCHANGED) {
|
||||||
|
return destIndex;
|
||||||
|
}
|
||||||
c=~result;
|
c=~result;
|
||||||
length=U16_LENGTH(c);
|
if(destIndex<destCapacity && c<=0xffff) { // BMP slightly-fastpath
|
||||||
} else if(result<=UCASE_MAX_STRING_LENGTH) {
|
dest[destIndex++]=(UChar)c;
|
||||||
c=U_SENTINEL;
|
return destIndex;
|
||||||
length=result;
|
}
|
||||||
|
length=cpLength;
|
||||||
} else {
|
} else {
|
||||||
c=result;
|
if(result<=UCASE_MAX_STRING_LENGTH) {
|
||||||
length=U16_LENGTH(c);
|
c=U_SENTINEL;
|
||||||
|
length=result;
|
||||||
|
} else if(destIndex<destCapacity && result<=0xffff) { // BMP slightly-fastpath
|
||||||
|
dest[destIndex++]=(UChar)result;
|
||||||
|
if(edits!=NULL) {
|
||||||
|
edits->addReplace(cpLength, 1);
|
||||||
|
}
|
||||||
|
return destIndex;
|
||||||
|
} else {
|
||||||
|
c=result;
|
||||||
|
length=U16_LENGTH(c);
|
||||||
|
}
|
||||||
|
if(edits!=NULL) {
|
||||||
|
edits->addReplace(cpLength, length);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if(length>(INT32_MAX-destIndex)) {
|
if(length>(INT32_MAX-destIndex)) {
|
||||||
return -1; // integer overflow
|
return -1; // integer overflow
|
||||||
@ -99,8 +305,8 @@ appendUChar(UChar *dest, int32_t destIndex, int32_t destCapacity, UChar c) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static inline int32_t
|
static inline int32_t
|
||||||
appendString(UChar *dest, int32_t destIndex, int32_t destCapacity,
|
appendUnchanged(UChar *dest, int32_t destIndex, int32_t destCapacity,
|
||||||
const UChar *s, int32_t length) {
|
const UChar *s, int32_t length, icu::Edits *edits) {
|
||||||
if(length>0) {
|
if(length>0) {
|
||||||
if(length>(INT32_MAX-destIndex)) {
|
if(length>(INT32_MAX-destIndex)) {
|
||||||
return -1; // integer overflow
|
return -1; // integer overflow
|
||||||
@ -109,6 +315,9 @@ appendString(UChar *dest, int32_t destIndex, int32_t destCapacity,
|
|||||||
u_memcpy(dest+destIndex, s, length);
|
u_memcpy(dest+destIndex, s, length);
|
||||||
}
|
}
|
||||||
destIndex+=length;
|
destIndex+=length;
|
||||||
|
if(edits!=NULL) {
|
||||||
|
edits->addUnchanged(length);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return destIndex;
|
return destIndex;
|
||||||
}
|
}
|
||||||
@ -154,31 +363,26 @@ _caseMap(const UCaseMap *csm, UCaseMapFull *map,
|
|||||||
UChar *dest, int32_t destCapacity,
|
UChar *dest, int32_t destCapacity,
|
||||||
const UChar *src, UCaseContext *csc,
|
const UChar *src, UCaseContext *csc,
|
||||||
int32_t srcStart, int32_t srcLimit,
|
int32_t srcStart, int32_t srcLimit,
|
||||||
|
icu::Edits *edits,
|
||||||
UErrorCode *pErrorCode) {
|
UErrorCode *pErrorCode) {
|
||||||
const UChar *s;
|
int32_t locCache=csm->locCache;
|
||||||
UChar32 c, c2 = 0;
|
|
||||||
int32_t srcIndex, destIndex;
|
|
||||||
int32_t locCache;
|
|
||||||
|
|
||||||
locCache=csm->locCache;
|
|
||||||
|
|
||||||
/* case mapping loop */
|
/* case mapping loop */
|
||||||
srcIndex=srcStart;
|
int32_t srcIndex=srcStart;
|
||||||
destIndex=0;
|
int32_t destIndex=0;
|
||||||
while(srcIndex<srcLimit) {
|
while(srcIndex<srcLimit) {
|
||||||
csc->cpStart=srcIndex;
|
int32_t cpStart;
|
||||||
|
csc->cpStart=cpStart=srcIndex;
|
||||||
|
UChar32 c;
|
||||||
U16_NEXT(src, srcIndex, srcLimit, c);
|
U16_NEXT(src, srcIndex, srcLimit, c);
|
||||||
csc->cpLimit=srcIndex;
|
csc->cpLimit=srcIndex;
|
||||||
|
const UChar *s;
|
||||||
c=map(csm->csp, c, utf16_caseContextIterator, csc, &s, csm->locale, &locCache);
|
c=map(csm->csp, c, utf16_caseContextIterator, csc, &s, csm->locale, &locCache);
|
||||||
if((destIndex<destCapacity) && (c<0 ? (c2=~c)<=0xffff : UCASE_MAX_STRING_LENGTH<c && (c2=c)<=0xffff)) {
|
destIndex = appendResult(dest, destIndex, destCapacity, c, s,
|
||||||
/* fast path version of appendResult() for BMP results */
|
csm->options, srcIndex - cpStart, edits);
|
||||||
dest[destIndex++]=(UChar)c2;
|
if (destIndex < 0) {
|
||||||
} else {
|
*pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
|
||||||
destIndex=appendResult(dest, destIndex, destCapacity, c, s);
|
return 0;
|
||||||
if(destIndex<0) {
|
|
||||||
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -194,12 +398,8 @@ U_CFUNC int32_t U_CALLCONV
|
|||||||
ustrcase_internalToTitle(const UCaseMap *csm,
|
ustrcase_internalToTitle(const UCaseMap *csm,
|
||||||
UChar *dest, int32_t destCapacity,
|
UChar *dest, int32_t destCapacity,
|
||||||
const UChar *src, int32_t srcLength,
|
const UChar *src, int32_t srcLength,
|
||||||
|
icu::Edits *edits,
|
||||||
UErrorCode *pErrorCode) {
|
UErrorCode *pErrorCode) {
|
||||||
const UChar *s;
|
|
||||||
UChar32 c;
|
|
||||||
int32_t prev, titleStart, titleLimit, idx, destIndex;
|
|
||||||
UBool isFirstIndex;
|
|
||||||
|
|
||||||
if(U_FAILURE(*pErrorCode)) {
|
if(U_FAILURE(*pErrorCode)) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -213,13 +413,14 @@ ustrcase_internalToTitle(const UCaseMap *csm,
|
|||||||
UCaseContext csc=UCASECONTEXT_INITIALIZER;
|
UCaseContext csc=UCASECONTEXT_INITIALIZER;
|
||||||
csc.p=(void *)src;
|
csc.p=(void *)src;
|
||||||
csc.limit=srcLength;
|
csc.limit=srcLength;
|
||||||
destIndex=0;
|
int32_t destIndex=0;
|
||||||
prev=0;
|
int32_t prev=0;
|
||||||
isFirstIndex=TRUE;
|
UBool isFirstIndex=TRUE;
|
||||||
|
|
||||||
/* titlecasing loop */
|
/* titlecasing loop */
|
||||||
while(prev<srcLength) {
|
while(prev<srcLength) {
|
||||||
/* find next index where to titlecase */
|
/* find next index where to titlecase */
|
||||||
|
int32_t idx;
|
||||||
if(isFirstIndex) {
|
if(isFirstIndex) {
|
||||||
isFirstIndex=FALSE;
|
isFirstIndex=FALSE;
|
||||||
idx=bi->first();
|
idx=bi->first();
|
||||||
@ -245,7 +446,9 @@ ustrcase_internalToTitle(const UCaseMap *csm,
|
|||||||
*/
|
*/
|
||||||
if(prev<idx) {
|
if(prev<idx) {
|
||||||
/* find and copy uncased characters [prev..titleStart[ */
|
/* find and copy uncased characters [prev..titleStart[ */
|
||||||
titleStart=titleLimit=prev;
|
int32_t titleStart=prev;
|
||||||
|
int32_t titleLimit=prev;
|
||||||
|
UChar32 c;
|
||||||
U16_NEXT(src, titleLimit, idx, c);
|
U16_NEXT(src, titleLimit, idx, c);
|
||||||
if((csm->options&U_TITLECASE_NO_BREAK_ADJUSTMENT)==0 && UCASE_NONE==ucase_getType(csm->csp, c)) {
|
if((csm->options&U_TITLECASE_NO_BREAK_ADJUSTMENT)==0 && UCASE_NONE==ucase_getType(csm->csp, c)) {
|
||||||
/* Adjust the titlecasing index (titleStart) to the next cased character. */
|
/* Adjust the titlecasing index (titleStart) to the next cased character. */
|
||||||
@ -263,7 +466,8 @@ ustrcase_internalToTitle(const UCaseMap *csm,
|
|||||||
break; /* cased letter at [titleStart..titleLimit[ */
|
break; /* cased letter at [titleStart..titleLimit[ */
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
destIndex=appendString(dest, destIndex, destCapacity, src+prev, titleStart-prev);
|
destIndex=appendUnchanged(dest, destIndex, destCapacity,
|
||||||
|
src+prev, titleStart-prev, edits);
|
||||||
if(destIndex<0) {
|
if(destIndex<0) {
|
||||||
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
||||||
return 0;
|
return 0;
|
||||||
@ -274,8 +478,11 @@ ustrcase_internalToTitle(const UCaseMap *csm,
|
|||||||
/* titlecase c which is from [titleStart..titleLimit[ */
|
/* titlecase c which is from [titleStart..titleLimit[ */
|
||||||
csc.cpStart=titleStart;
|
csc.cpStart=titleStart;
|
||||||
csc.cpLimit=titleLimit;
|
csc.cpLimit=titleLimit;
|
||||||
c=ucase_toFullTitle(csm->csp, c, utf16_caseContextIterator, &csc, &s, csm->locale, &locCache);
|
const UChar *s;
|
||||||
destIndex=appendResult(dest, destIndex, destCapacity, c, s);
|
c=ucase_toFullTitle(csm->csp, c, utf16_caseContextIterator, &csc, &s,
|
||||||
|
csm->locale, &locCache);
|
||||||
|
destIndex=appendResult(dest, destIndex, destCapacity, c, s,
|
||||||
|
csm->options, titleLimit-titleStart, edits);
|
||||||
if(destIndex<0) {
|
if(destIndex<0) {
|
||||||
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
||||||
return 0;
|
return 0;
|
||||||
@ -291,6 +498,9 @@ ustrcase_internalToTitle(const UCaseMap *csm,
|
|||||||
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
if(edits!=NULL) {
|
||||||
|
edits->addReplace(1, 1);
|
||||||
|
}
|
||||||
titleLimit++;
|
titleLimit++;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -304,7 +514,7 @@ ustrcase_internalToTitle(const UCaseMap *csm,
|
|||||||
dest+destIndex, destCapacity-destIndex,
|
dest+destIndex, destCapacity-destIndex,
|
||||||
src, &csc,
|
src, &csc,
|
||||||
titleLimit, idx,
|
titleLimit, idx,
|
||||||
pErrorCode);
|
edits, pErrorCode);
|
||||||
if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
|
if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
|
||||||
*pErrorCode=U_ZERO_ERROR;
|
*pErrorCode=U_ZERO_ERROR;
|
||||||
}
|
}
|
||||||
@ -313,7 +523,8 @@ ustrcase_internalToTitle(const UCaseMap *csm,
|
|||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
/* Optionally just copy the rest of the word unchanged. */
|
/* Optionally just copy the rest of the word unchanged. */
|
||||||
destIndex=appendString(dest, destIndex, destCapacity, src+titleLimit, idx-titleLimit);
|
destIndex=appendUnchanged(dest, destIndex, destCapacity,
|
||||||
|
src+titleLimit, idx-titleLimit, edits);
|
||||||
if(destIndex<0) {
|
if(destIndex<0) {
|
||||||
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
||||||
return 0;
|
return 0;
|
||||||
@ -816,6 +1027,7 @@ UBool isFollowedByCasedLetter(const UCaseProps *csp, const UChar *s, int32_t i,
|
|||||||
int32_t toUpper(const UCaseMap *csm,
|
int32_t toUpper(const UCaseMap *csm,
|
||||||
UChar *dest, int32_t destCapacity,
|
UChar *dest, int32_t destCapacity,
|
||||||
const UChar *src, int32_t srcLength,
|
const UChar *src, int32_t srcLength,
|
||||||
|
Edits *edits,
|
||||||
UErrorCode *pErrorCode) {
|
UErrorCode *pErrorCode) {
|
||||||
int32_t locCache = UCASE_LOC_GREEK;
|
int32_t locCache = UCASE_LOC_GREEK;
|
||||||
int32_t destIndex=0;
|
int32_t destIndex=0;
|
||||||
@ -890,35 +1102,64 @@ int32_t toUpper(const UCaseMap *csm,
|
|||||||
data &= ~HAS_EITHER_DIALYTIKA;
|
data &= ~HAS_EITHER_DIALYTIKA;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
destIndex=appendUChar(dest, destIndex, destCapacity, (UChar)upper);
|
|
||||||
if (destIndex >= 0 && (data & HAS_EITHER_DIALYTIKA) != 0) {
|
UBool change;
|
||||||
destIndex=appendUChar(dest, destIndex, destCapacity, 0x308); // restore or add a dialytika
|
if ((csm->options & UCASEMAP_OMIT_UNCHANGED) == 0 && edits == NULL) {
|
||||||
}
|
change = TRUE; // common, simple usage
|
||||||
if (destIndex >= 0 && addTonos) {
|
|
||||||
destIndex=appendUChar(dest, destIndex, destCapacity, 0x301);
|
|
||||||
}
|
|
||||||
while (destIndex >= 0 && numYpogegrammeni > 0) {
|
|
||||||
destIndex=appendUChar(dest, destIndex, destCapacity, 0x399);
|
|
||||||
--numYpogegrammeni;
|
|
||||||
}
|
|
||||||
if(destIndex<0) {
|
|
||||||
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
const UChar *s;
|
|
||||||
UChar32 c2 = 0;
|
|
||||||
c=ucase_toFullUpper(csm->csp, c, NULL, NULL, &s, csm->locale, &locCache);
|
|
||||||
if((destIndex<destCapacity) && (c<0 ? (c2=~c)<=0xffff : UCASE_MAX_STRING_LENGTH<c && (c2=c)<=0xffff)) {
|
|
||||||
/* fast path version of appendResult() for BMP results */
|
|
||||||
dest[destIndex++]=(UChar)c2;
|
|
||||||
} else {
|
} else {
|
||||||
destIndex=appendResult(dest, destIndex, destCapacity, c, s);
|
// Find out first whether we are changing the text.
|
||||||
|
change = src[i] != upper || numYpogegrammeni > 0;
|
||||||
|
int32_t i2 = i + 1;
|
||||||
|
if ((data & HAS_EITHER_DIALYTIKA) != 0) {
|
||||||
|
change |= i2 >= nextIndex || src[i2] != 0x308;
|
||||||
|
++i2;
|
||||||
|
}
|
||||||
|
if (addTonos) {
|
||||||
|
change |= i2 >= nextIndex || src[i2] != 0x301;
|
||||||
|
++i2;
|
||||||
|
}
|
||||||
|
int32_t oldLength = nextIndex - i;
|
||||||
|
int32_t newLength = (i2 - i) + numYpogegrammeni;
|
||||||
|
change |= oldLength != newLength;
|
||||||
|
if (change) {
|
||||||
|
if (edits != NULL) {
|
||||||
|
edits->addReplace(oldLength, newLength);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (edits != NULL) {
|
||||||
|
edits->addUnchanged(oldLength);
|
||||||
|
}
|
||||||
|
// Write unchanged text?
|
||||||
|
change |= (csm->options & UCASEMAP_OMIT_UNCHANGED) == 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (change) {
|
||||||
|
destIndex=appendUChar(dest, destIndex, destCapacity, (UChar)upper);
|
||||||
|
if (destIndex >= 0 && (data & HAS_EITHER_DIALYTIKA) != 0) {
|
||||||
|
destIndex=appendUChar(dest, destIndex, destCapacity, 0x308); // restore or add a dialytika
|
||||||
|
}
|
||||||
|
if (destIndex >= 0 && addTonos) {
|
||||||
|
destIndex=appendUChar(dest, destIndex, destCapacity, 0x301);
|
||||||
|
}
|
||||||
|
while (destIndex >= 0 && numYpogegrammeni > 0) {
|
||||||
|
destIndex=appendUChar(dest, destIndex, destCapacity, 0x399);
|
||||||
|
--numYpogegrammeni;
|
||||||
|
}
|
||||||
if(destIndex<0) {
|
if(destIndex<0) {
|
||||||
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
const UChar *s;
|
||||||
|
c=ucase_toFullUpper(csm->csp, c, NULL, NULL, &s, csm->locale, &locCache);
|
||||||
|
destIndex = appendResult(dest, destIndex, destCapacity, c, s,
|
||||||
|
csm->options, nextIndex - i, edits);
|
||||||
|
if (destIndex < 0) {
|
||||||
|
*pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
i = nextIndex;
|
i = nextIndex;
|
||||||
state = nextState;
|
state = nextState;
|
||||||
@ -939,6 +1180,7 @@ U_CFUNC int32_t U_CALLCONV
|
|||||||
ustrcase_internalToLower(const UCaseMap *csm,
|
ustrcase_internalToLower(const UCaseMap *csm,
|
||||||
UChar *dest, int32_t destCapacity,
|
UChar *dest, int32_t destCapacity,
|
||||||
const UChar *src, int32_t srcLength,
|
const UChar *src, int32_t srcLength,
|
||||||
|
icu::Edits *edits,
|
||||||
UErrorCode *pErrorCode) {
|
UErrorCode *pErrorCode) {
|
||||||
UCaseContext csc=UCASECONTEXT_INITIALIZER;
|
UCaseContext csc=UCASECONTEXT_INITIALIZER;
|
||||||
csc.p=(void *)src;
|
csc.p=(void *)src;
|
||||||
@ -947,17 +1189,18 @@ ustrcase_internalToLower(const UCaseMap *csm,
|
|||||||
csm, ucase_toFullLower,
|
csm, ucase_toFullLower,
|
||||||
dest, destCapacity,
|
dest, destCapacity,
|
||||||
src, &csc, 0, srcLength,
|
src, &csc, 0, srcLength,
|
||||||
pErrorCode);
|
edits, pErrorCode);
|
||||||
}
|
}
|
||||||
|
|
||||||
U_CFUNC int32_t U_CALLCONV
|
U_CFUNC int32_t U_CALLCONV
|
||||||
ustrcase_internalToUpper(const UCaseMap *csm,
|
ustrcase_internalToUpper(const UCaseMap *csm,
|
||||||
UChar *dest, int32_t destCapacity,
|
UChar *dest, int32_t destCapacity,
|
||||||
const UChar *src, int32_t srcLength,
|
const UChar *src, int32_t srcLength,
|
||||||
|
icu::Edits *edits,
|
||||||
UErrorCode *pErrorCode) {
|
UErrorCode *pErrorCode) {
|
||||||
int32_t locCache = csm->locCache;
|
int32_t locCache = csm->locCache;
|
||||||
if (ucase_getCaseLocale(csm->locale, &locCache) == UCASE_LOC_GREEK) {
|
if (ucase_getCaseLocale(csm->locale, &locCache) == UCASE_LOC_GREEK) {
|
||||||
return GreekUpper::toUpper(csm, dest, destCapacity, src, srcLength, pErrorCode);
|
return GreekUpper::toUpper(csm, dest, destCapacity, src, srcLength, edits, pErrorCode);
|
||||||
}
|
}
|
||||||
UCaseContext csc=UCASECONTEXT_INITIALIZER;
|
UCaseContext csc=UCASECONTEXT_INITIALIZER;
|
||||||
csc.p=(void *)src;
|
csc.p=(void *)src;
|
||||||
@ -966,49 +1209,37 @@ ustrcase_internalToUpper(const UCaseMap *csm,
|
|||||||
csm, ucase_toFullUpper,
|
csm, ucase_toFullUpper,
|
||||||
dest, destCapacity,
|
dest, destCapacity,
|
||||||
src, &csc, 0, srcLength,
|
src, &csc, 0, srcLength,
|
||||||
pErrorCode);
|
edits, pErrorCode);
|
||||||
}
|
|
||||||
|
|
||||||
static int32_t
|
|
||||||
ustr_foldCase(const UCaseProps *csp,
|
|
||||||
UChar *dest, int32_t destCapacity,
|
|
||||||
const UChar *src, int32_t srcLength,
|
|
||||||
uint32_t options,
|
|
||||||
UErrorCode *pErrorCode) {
|
|
||||||
int32_t srcIndex, destIndex;
|
|
||||||
|
|
||||||
const UChar *s;
|
|
||||||
UChar32 c, c2 = 0;
|
|
||||||
|
|
||||||
/* case mapping loop */
|
|
||||||
srcIndex=destIndex=0;
|
|
||||||
while(srcIndex<srcLength) {
|
|
||||||
U16_NEXT(src, srcIndex, srcLength, c);
|
|
||||||
c=ucase_toFullFolding(csp, c, &s, options);
|
|
||||||
if((destIndex<destCapacity) && (c<0 ? (c2=~c)<=0xffff : UCASE_MAX_STRING_LENGTH<c && (c2=c)<=0xffff)) {
|
|
||||||
/* fast path version of appendResult() for BMP results */
|
|
||||||
dest[destIndex++]=(UChar)c2;
|
|
||||||
} else {
|
|
||||||
destIndex=appendResult(dest, destIndex, destCapacity, c, s);
|
|
||||||
if(destIndex<0) {
|
|
||||||
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if(destIndex>destCapacity) {
|
|
||||||
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
|
|
||||||
}
|
|
||||||
return destIndex;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
U_CFUNC int32_t U_CALLCONV
|
U_CFUNC int32_t U_CALLCONV
|
||||||
ustrcase_internalFold(const UCaseMap *csm,
|
ustrcase_internalFold(const UCaseMap *csm,
|
||||||
UChar *dest, int32_t destCapacity,
|
UChar *dest, int32_t destCapacity,
|
||||||
const UChar *src, int32_t srcLength,
|
const UChar *src, int32_t srcLength,
|
||||||
|
icu::Edits *edits,
|
||||||
UErrorCode *pErrorCode) {
|
UErrorCode *pErrorCode) {
|
||||||
return ustr_foldCase(csm->csp, dest, destCapacity, src, srcLength, csm->options, pErrorCode);
|
/* case mapping loop */
|
||||||
|
int32_t srcIndex = 0;
|
||||||
|
int32_t destIndex = 0;
|
||||||
|
while (srcIndex < srcLength) {
|
||||||
|
int32_t cpStart = srcIndex;
|
||||||
|
UChar32 c;
|
||||||
|
U16_NEXT(src, srcIndex, srcLength, c);
|
||||||
|
const UChar *s;
|
||||||
|
c = ucase_toFullFolding(csm->csp, c, &s, csm->options);
|
||||||
|
destIndex = appendResult(dest, destIndex, destCapacity, c, s,
|
||||||
|
csm->options, srcIndex - cpStart, edits);
|
||||||
|
if (destIndex < 0) {
|
||||||
|
*pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: are these internal functions called where destIndex>destCapacity is not already checked? (see u_terminateUChars())
|
||||||
|
if (destIndex > destCapacity) {
|
||||||
|
*pErrorCode = U_BUFFER_OVERFLOW_ERROR;
|
||||||
|
}
|
||||||
|
return destIndex;
|
||||||
}
|
}
|
||||||
|
|
||||||
U_CFUNC int32_t
|
U_CFUNC int32_t
|
||||||
@ -1016,6 +1247,7 @@ ustrcase_map(const UCaseMap *csm,
|
|||||||
UChar *dest, int32_t destCapacity,
|
UChar *dest, int32_t destCapacity,
|
||||||
const UChar *src, int32_t srcLength,
|
const UChar *src, int32_t srcLength,
|
||||||
UStringCaseMapper *stringCaseMapper,
|
UStringCaseMapper *stringCaseMapper,
|
||||||
|
icu::Edits *edits,
|
||||||
UErrorCode *pErrorCode) {
|
UErrorCode *pErrorCode) {
|
||||||
UChar buffer[300];
|
UChar buffer[300];
|
||||||
UChar *temp;
|
UChar *temp;
|
||||||
@ -1061,14 +1293,11 @@ ustrcase_map(const UCaseMap *csm,
|
|||||||
temp=dest;
|
temp=dest;
|
||||||
}
|
}
|
||||||
|
|
||||||
destLength=stringCaseMapper(csm, temp, destCapacity, src, srcLength, pErrorCode);
|
destLength=stringCaseMapper(csm, temp, destCapacity, src, srcLength, edits, pErrorCode);
|
||||||
if(temp!=dest) {
|
if(temp!=dest) {
|
||||||
/* copy the result string to the destination buffer */
|
/* copy the result string to the destination buffer */
|
||||||
if(destLength>0) {
|
if (U_SUCCESS(*pErrorCode) && 0 < destLength && destLength <= destCapacity) {
|
||||||
int32_t copyLength= destLength<=destCapacity ? destLength : destCapacity;
|
u_memmove(dest, temp, destLength);
|
||||||
if(copyLength>0) {
|
|
||||||
u_memmove(dest, temp, copyLength);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
if(temp!=buffer) {
|
if(temp!=buffer) {
|
||||||
uprv_free(temp);
|
uprv_free(temp);
|
||||||
@ -1092,7 +1321,7 @@ u_strFoldCase(UChar *dest, int32_t destCapacity,
|
|||||||
&csm,
|
&csm,
|
||||||
dest, destCapacity,
|
dest, destCapacity,
|
||||||
src, srcLength,
|
src, srcLength,
|
||||||
ustrcase_internalFold, pErrorCode);
|
ustrcase_internalFold, NULL, pErrorCode);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* case-insensitive string comparisons -------------------------------------- */
|
/* case-insensitive string comparisons -------------------------------------- */
|
||||||
|
@ -94,7 +94,7 @@ u_strToLower(UChar *dest, int32_t destCapacity,
|
|||||||
&csm,
|
&csm,
|
||||||
dest, destCapacity,
|
dest, destCapacity,
|
||||||
src, srcLength,
|
src, srcLength,
|
||||||
ustrcase_internalToLower, pErrorCode);
|
ustrcase_internalToLower, NULL, pErrorCode);
|
||||||
}
|
}
|
||||||
|
|
||||||
U_CAPI int32_t U_EXPORT2
|
U_CAPI int32_t U_EXPORT2
|
||||||
@ -108,5 +108,5 @@ u_strToUpper(UChar *dest, int32_t destCapacity,
|
|||||||
&csm,
|
&csm,
|
||||||
dest, destCapacity,
|
dest, destCapacity,
|
||||||
src, srcLength,
|
src, srcLength,
|
||||||
ustrcase_internalToUpper, pErrorCode);
|
ustrcase_internalToUpper, NULL, pErrorCode);
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user