ICU-12410 move new code into new files, split ucasemap_imp.h from ustr_imp.h
X-SVN-Rev: 39655
This commit is contained in:
parent
1c2a1da83b
commit
5da94f206a
@ -94,6 +94,7 @@ stringtriebuilder.o bytestriebuilder.o \
|
||||
bytestrie.o bytestrieiterator.o \
|
||||
ucharstrie.o ucharstriebuilder.o ucharstrieiterator.o \
|
||||
dictionarydata.o \
|
||||
edits.o \
|
||||
appendable.o ustr_cnv.o unistr_cnv.o unistr.o unistr_case.o unistr_props.o \
|
||||
utf_impl.o ustring.o ustrcase.o ucasemap.o ucasemap_titlecase_brkiter.o cstring.o ustrfmt.o ustrtrns.o ustr_wcs.o utext.o \
|
||||
unistr_case_locale.o ustrcase_locale.o unistr_titlecase_brkiter.o ustr_titlecase_brkiter.o \
|
||||
|
@ -449,6 +449,7 @@
|
||||
<ClCompile Include="cstring.cpp" />
|
||||
<ClCompile Include="cstr.cpp" />
|
||||
<ClCompile Include="cwchar.cpp" />
|
||||
<ClCompile Include="edits.cpp" />
|
||||
<ClCompile Include="messagepattern.cpp" />
|
||||
<ClCompile Include="schriter.cpp" />
|
||||
<ClCompile Include="stringpiece.cpp" />
|
||||
@ -1511,6 +1512,20 @@
|
||||
</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">copy "%(FullPath)" ..\..\include\unicode
|
||||
</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
|
||||
</CustomBuild>
|
||||
<CustomBuild Include="unicode\casemap.h">
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">copy "%(FullPath)" ..\..\include\unicode
|
||||
</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">copy "%(FullPath)" ..\..\include\unicode
|
||||
</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">copy "%(FullPath)" ..\..\include\unicode
|
||||
</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">copy "%(FullPath)" ..\..\include\unicode
|
||||
</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
|
||||
</CustomBuild>
|
||||
@ -1532,6 +1547,20 @@
|
||||
<ClInclude Include="cstring.h" />
|
||||
<ClInclude Include="cstr.h" />
|
||||
<ClInclude Include="cwchar.h" />
|
||||
<CustomBuild Include="unicode\edits.h">
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">copy "%(FullPath)" ..\..\include\unicode
|
||||
</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">copy "%(FullPath)" ..\..\include\unicode
|
||||
</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">copy "%(FullPath)" ..\..\include\unicode
|
||||
</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">copy "%(FullPath)" ..\..\include\unicode
|
||||
</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
|
||||
</CustomBuild>
|
||||
<CustomBuild Include="unicode\messagepattern.h">
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">copy "%(FullPath)" ..\..\include\unicode
|
||||
</Command>
|
||||
@ -1616,6 +1645,7 @@
|
||||
</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
|
||||
</CustomBuild>
|
||||
<ClInclude Include="ucasemap_imp.h" />
|
||||
<CustomBuild Include="unicode\ucharstrie.h">
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">copy "%(FullPath)" ..\..\include\unicode
|
||||
</Command>
|
||||
|
@ -478,6 +478,9 @@
|
||||
<ClCompile Include="cwchar.cpp">
|
||||
<Filter>strings</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="edits.cpp">
|
||||
<Filter>strings</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="schriter.cpp">
|
||||
<Filter>strings</Filter>
|
||||
</ClCompile>
|
||||
@ -870,6 +873,9 @@
|
||||
<ClInclude Include="cwchar.h">
|
||||
<Filter>strings</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="ucasemap_imp.h">
|
||||
<Filter>strings</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="uinvchar.h">
|
||||
<Filter>strings</Filter>
|
||||
</ClInclude>
|
||||
@ -1096,9 +1102,15 @@
|
||||
<CustomBuild Include="unicode\bytestream.h">
|
||||
<Filter>strings</Filter>
|
||||
</CustomBuild>
|
||||
<CustomBuild Include="unicode\casemap.h">
|
||||
<Filter>strings</Filter>
|
||||
</CustomBuild>
|
||||
<CustomBuild Include="unicode\chariter.h">
|
||||
<Filter>strings</Filter>
|
||||
</CustomBuild>
|
||||
<CustomBuild Include="unicode\edits.h">
|
||||
<Filter>strings</Filter>
|
||||
</CustomBuild>
|
||||
<CustomBuild Include="unicode\rep.h">
|
||||
<Filter>strings</Filter>
|
||||
</CustomBuild>
|
||||
|
342
icu4c/source/common/edits.cpp
Normal file
342
icu4c/source/common/edits.cpp
Normal file
@ -0,0 +1,342 @@
|
||||
// Copyright (C) 2017 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
|
||||
// edits.cpp
|
||||
// created: 2017feb08 Markus W. Scherer
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/edits.h"
|
||||
#include "cmemory.h"
|
||||
#include "uassert.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
namespace {
|
||||
|
||||
// 0000uuuuuuuuuuuu records u+1 unchanged text units.
|
||||
const int32_t MAX_UNCHANGED_LENGTH = 0x1000;
|
||||
const int32_t MAX_UNCHANGED = MAX_UNCHANGED_LENGTH - 1;
|
||||
|
||||
// 0wwwcccccccccccc with w=1..6 records ccc+1 replacements of w:w text units.
|
||||
// No length change.
|
||||
const int32_t MAX_SHORT_WIDTH = 6;
|
||||
const int32_t MAX_SHORT_CHANGE_LENGTH = 0xfff;
|
||||
const int32_t MAX_SHORT_CHANGE = 0x6fff;
|
||||
|
||||
// 0111mmmmmmnnnnnn records a replacement of m text units with n.
|
||||
// m or n = 61: actual length follows in the next edits array unit.
|
||||
// m or n = 62..63: actual length follows in the next two edits array units.
|
||||
// Bit 30 of the actual length is in the head unit.
|
||||
// Trailing units have bit 15 set.
|
||||
const int32_t LENGTH_IN_1TRAIL = 61;
|
||||
const int32_t LENGTH_IN_2TRAIL = 62;
|
||||
|
||||
} // namespace
|
||||
|
||||
Edits::~Edits() {
|
||||
if(array != stackArray) {
|
||||
uprv_free(array);
|
||||
}
|
||||
}
|
||||
|
||||
void Edits::reset() {
|
||||
length = 0;
|
||||
}
|
||||
|
||||
void Edits::addUnchanged(int32_t unchangedLength) {
|
||||
if(U_FAILURE(errorCode) || unchangedLength == 0) { return; }
|
||||
if(unchangedLength < 0) {
|
||||
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return;
|
||||
}
|
||||
// Merge into previous unchanged-text record, if any.
|
||||
int32_t last = lastUnit();
|
||||
if(last < MAX_UNCHANGED) {
|
||||
int32_t remaining = MAX_UNCHANGED - last;
|
||||
if (remaining >= unchangedLength) {
|
||||
setLastUnit(last + unchangedLength);
|
||||
return;
|
||||
}
|
||||
setLastUnit(MAX_UNCHANGED);
|
||||
unchangedLength -= remaining;
|
||||
}
|
||||
// Split large lengths into multiple units.
|
||||
while(unchangedLength >= MAX_UNCHANGED_LENGTH) {
|
||||
append(MAX_UNCHANGED);
|
||||
unchangedLength -= MAX_UNCHANGED_LENGTH;
|
||||
}
|
||||
// Write a small (remaining) length.
|
||||
if(unchangedLength > 0) {
|
||||
append(unchangedLength - 1);
|
||||
}
|
||||
}
|
||||
|
||||
void Edits::addReplace(int32_t oldLength, int32_t newLength) {
|
||||
if(U_FAILURE(errorCode)) { return; }
|
||||
if(oldLength == newLength && 0 < oldLength && oldLength <= MAX_SHORT_WIDTH) {
|
||||
// Replacement of short oldLength text units by same-length new text.
|
||||
// Merge into previous short-replacement record, if any.
|
||||
int32_t last = lastUnit();
|
||||
if(MAX_UNCHANGED < last && last < MAX_SHORT_CHANGE &&
|
||||
(last >> 12) == oldLength && (last & 0xfff) < MAX_SHORT_CHANGE_LENGTH) {
|
||||
setLastUnit(last + 1);
|
||||
return;
|
||||
}
|
||||
append(oldLength << 12);
|
||||
return;
|
||||
}
|
||||
|
||||
if(oldLength < 0 || newLength < 0) {
|
||||
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return;
|
||||
}
|
||||
if (oldLength == 0 && newLength == 0) {
|
||||
return;
|
||||
}
|
||||
int32_t newDelta = newLength - oldLength;
|
||||
if (newDelta != 0) {
|
||||
if (newDelta > 0 ? newDelta > (INT32_MAX - delta) : newDelta < (INT32_MIN - delta)) {
|
||||
// Integer overflow or underflow.
|
||||
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return;
|
||||
}
|
||||
delta += newDelta;
|
||||
}
|
||||
|
||||
int32_t head = 0x7000;
|
||||
if (oldLength < LENGTH_IN_1TRAIL && newLength < LENGTH_IN_1TRAIL) {
|
||||
head |= oldLength << 6;
|
||||
head |= newLength;
|
||||
append(head);
|
||||
} else if ((capacity - length) >= 5 || growArray()) {
|
||||
int32_t limit = length + 1;
|
||||
if(oldLength < LENGTH_IN_1TRAIL) {
|
||||
head |= oldLength << 6;
|
||||
} else if(oldLength <= 0x7fff) {
|
||||
head |= LENGTH_IN_1TRAIL << 6;
|
||||
array[limit++] = (uint16_t)(0x8000 | oldLength);
|
||||
} else {
|
||||
head |= (LENGTH_IN_2TRAIL + (oldLength >> 30)) << 6;
|
||||
array[limit++] = (uint16_t)(0x8000 | (oldLength >> 15));
|
||||
array[limit++] = (uint16_t)(0x8000 | oldLength);
|
||||
}
|
||||
if(newLength < LENGTH_IN_1TRAIL) {
|
||||
head |= newLength;
|
||||
} else if(newLength <= 0x7fff) {
|
||||
head |= LENGTH_IN_1TRAIL;
|
||||
array[limit++] = (uint16_t)(0x8000 | newLength);
|
||||
} else {
|
||||
head |= LENGTH_IN_2TRAIL + (newLength >> 30);
|
||||
array[limit++] = (uint16_t)(0x8000 | (newLength >> 15));
|
||||
array[limit++] = (uint16_t)(0x8000 | newLength);
|
||||
}
|
||||
array[length] = (uint16_t)head;
|
||||
length = limit;
|
||||
}
|
||||
}
|
||||
|
||||
void Edits::append(int32_t r) {
|
||||
if(length < capacity || growArray()) {
|
||||
array[length++] = (uint16_t)r;
|
||||
}
|
||||
}
|
||||
|
||||
UBool Edits::growArray() {
|
||||
int32_t newCapacity;
|
||||
if (array == stackArray) {
|
||||
newCapacity = 2000;
|
||||
} else if (capacity == INT32_MAX) {
|
||||
errorCode = U_BUFFER_OVERFLOW_ERROR;
|
||||
return FALSE;
|
||||
} else if (capacity >= (INT32_MAX / 2)) {
|
||||
newCapacity = INT32_MAX;
|
||||
} else {
|
||||
newCapacity = 2 * capacity;
|
||||
}
|
||||
// Grow by at least 5 units so that a maximal change record will fit.
|
||||
if ((newCapacity - capacity) < 5) {
|
||||
errorCode = U_BUFFER_OVERFLOW_ERROR;
|
||||
return FALSE;
|
||||
}
|
||||
uint16_t *newArray = (uint16_t *)uprv_malloc((size_t)newCapacity * 2);
|
||||
if (newArray == NULL) {
|
||||
errorCode = U_MEMORY_ALLOCATION_ERROR;
|
||||
return FALSE;
|
||||
}
|
||||
uprv_memcpy(newArray, array, (size_t)length * 2);
|
||||
if (array != stackArray) {
|
||||
uprv_free(array);
|
||||
}
|
||||
array = newArray;
|
||||
capacity = newCapacity;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
UBool Edits::copyErrorTo(UErrorCode &outErrorCode) {
|
||||
if (U_FAILURE(outErrorCode)) { return TRUE; }
|
||||
if (U_SUCCESS(errorCode)) { return FALSE; }
|
||||
outErrorCode = errorCode;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
UBool Edits::hasChanges() const {
|
||||
if (delta != 0) {
|
||||
return TRUE;
|
||||
}
|
||||
for (int32_t i = 0; i < length; ++i) {
|
||||
if (array[i] > MAX_UNCHANGED) {
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
Edits::Iterator::Iterator(const uint16_t *a, int32_t len, UBool oc, UBool crs) :
|
||||
array(a), index(0), length(len), remaining(0),
|
||||
onlyChanges(oc), coarse(crs),
|
||||
changed(FALSE), oldLength_(0), newLength_(0),
|
||||
srcIndex(0), replIndex(0), destIndex(0) {}
|
||||
|
||||
int32_t Edits::Iterator::readLength(int32_t head) {
|
||||
if (head < LENGTH_IN_1TRAIL) {
|
||||
return head;
|
||||
} else if (head < LENGTH_IN_2TRAIL) {
|
||||
U_ASSERT(index < length);
|
||||
U_ASSERT(array[index] >= 0x8000);
|
||||
return array[index++];
|
||||
} else {
|
||||
U_ASSERT((index + 2) <= length);
|
||||
U_ASSERT(array[index] >= 0x8000);
|
||||
U_ASSERT(array[index + 1] >= 0x8000);
|
||||
int32_t len = ((head & 1) << 30) |
|
||||
((int32_t)(array[index] & 0x7fff) << 15) |
|
||||
(array[index + 1] & 0x7fff);
|
||||
index += 2;
|
||||
return len;
|
||||
}
|
||||
}
|
||||
|
||||
void Edits::Iterator::updateIndexes() {
|
||||
srcIndex += oldLength_;
|
||||
if (changed) {
|
||||
replIndex += newLength_;
|
||||
}
|
||||
destIndex += newLength_;
|
||||
}
|
||||
|
||||
UBool Edits::Iterator::noNext() {
|
||||
// Empty span beyond the string.
|
||||
oldLength_ = newLength_ = 0;
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
UBool Edits::Iterator::next(UErrorCode &errorCode) {
|
||||
if (U_FAILURE(errorCode)) { return FALSE; }
|
||||
// We have an errorCode in case we need to start guarding against integer overflows.
|
||||
// It is also convenient for caller loops if we bail out when an error was set elsewhere.
|
||||
updateIndexes();
|
||||
if (remaining > 0) {
|
||||
// Fine-grained iterator: Continue a sequence of equal-length changes.
|
||||
--remaining;
|
||||
return TRUE;
|
||||
}
|
||||
if (index >= length) {
|
||||
return noNext();
|
||||
}
|
||||
int32_t u = array[index++];
|
||||
if (u <= MAX_UNCHANGED) {
|
||||
// Combine adjacent unchanged ranges.
|
||||
changed = FALSE;
|
||||
oldLength_ = u + 1;
|
||||
while (index < length && (u = array[index]) <= MAX_UNCHANGED) {
|
||||
++index;
|
||||
oldLength_ += u + 1;
|
||||
}
|
||||
newLength_ = oldLength_;
|
||||
if (onlyChanges) {
|
||||
updateIndexes();
|
||||
if (index >= length) {
|
||||
return noNext();
|
||||
}
|
||||
// already fetched u > MAX_UNCHANGED at index
|
||||
++index;
|
||||
} else {
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
changed = TRUE;
|
||||
if (u <= MAX_SHORT_CHANGE) {
|
||||
if (coarse) {
|
||||
int32_t w = u >> 12;
|
||||
int32_t len = (u & 0xfff) + 1;
|
||||
oldLength_ = newLength_ = len * w;
|
||||
} else {
|
||||
// Split a sequence of equal-length changes that was compressed into one unit.
|
||||
oldLength_ = newLength_ = u >> 12;
|
||||
remaining = u & 0xfff;
|
||||
return TRUE;
|
||||
}
|
||||
} else {
|
||||
U_ASSERT(u <= 0x7fff);
|
||||
oldLength_ = readLength((u >> 6) & 0x3f);
|
||||
newLength_ = readLength(u & 0x3f);
|
||||
if (!coarse) {
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
// Combine adjacent changes.
|
||||
while (index < length && (u = array[index]) > MAX_UNCHANGED) {
|
||||
++index;
|
||||
if (u <= MAX_SHORT_CHANGE) {
|
||||
int32_t w = u >> 12;
|
||||
int32_t len = (u & 0xfff) + 1;
|
||||
len = len * w;
|
||||
oldLength_ += len;
|
||||
newLength_ += len;
|
||||
} else {
|
||||
U_ASSERT(u <= 0x7fff);
|
||||
int32_t oldLen = readLength((u >> 6) & 0x3f);
|
||||
int32_t newLen = readLength(u & 0x3f);
|
||||
oldLength_ += oldLen;
|
||||
newLength_ += newLen;
|
||||
}
|
||||
}
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
UBool Edits::Iterator::findSourceIndex(int32_t i, UErrorCode &errorCode) {
|
||||
if (U_FAILURE(errorCode) || i < 0) { return FALSE; }
|
||||
if (i < srcIndex) {
|
||||
// Reset the iterator to the start.
|
||||
index = remaining = srcIndex = replIndex = destIndex = 0;
|
||||
} else if (i < (srcIndex + oldLength_)) {
|
||||
// The index is in the current span.
|
||||
return TRUE;
|
||||
}
|
||||
while (next(errorCode)) {
|
||||
if (i < (srcIndex + oldLength_)) {
|
||||
// The index is in the current span.
|
||||
return TRUE;
|
||||
}
|
||||
if (remaining > 0) {
|
||||
// Is the index in one of the remaining compressed edits?
|
||||
// srcIndex is the start of the current span, before the remaining ones.
|
||||
int32_t len = (remaining + 1) * oldLength_;
|
||||
if (i < (srcIndex + len)) {
|
||||
int32_t n = (i - srcIndex) / oldLength_; // 1 <= n <= remaining
|
||||
len = n * oldLength_;
|
||||
srcIndex += len;
|
||||
replIndex += len;
|
||||
destIndex += len;
|
||||
remaining -= n;
|
||||
return TRUE;
|
||||
}
|
||||
// Make next() skip all of these edits at once.
|
||||
oldLength_ = newLength_ = len;
|
||||
remaining = 0;
|
||||
}
|
||||
}
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
@ -22,6 +22,7 @@
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/locid.h"
|
||||
#include "unicode/putil.h"
|
||||
#include "unicode/uchar.h"
|
||||
#include "unicode/uloc.h"
|
||||
#include "unicode/ures.h"
|
||||
#include "unicode/uscript.h"
|
||||
|
@ -33,6 +33,7 @@
|
||||
#include "cmemory.h"
|
||||
#include "cstring.h"
|
||||
#include "ucase.h"
|
||||
#include "ucasemap_imp.h"
|
||||
#include "ustr_imp.h"
|
||||
|
||||
U_NAMESPACE_USE
|
||||
|
236
icu4c/source/common/ucasemap_imp.h
Normal file
236
icu4c/source/common/ucasemap_imp.h
Normal file
@ -0,0 +1,236 @@
|
||||
// Copyright (C) 2017 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
|
||||
// ucasemap_imp.h
|
||||
// created: 2017feb08 Markus W. Scherer
|
||||
|
||||
#ifndef __UCASEMAP_IMP_H__
|
||||
#define __UCASEMAP_IMP_H__
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/ucasemap.h"
|
||||
#include "ucase.h"
|
||||
|
||||
#ifndef U_COMPARE_IGNORE_CASE
|
||||
/* see also unorm.h */
|
||||
/**
|
||||
* Option bit for unorm_compare:
|
||||
* Perform case-insensitive comparison.
|
||||
*/
|
||||
#define U_COMPARE_IGNORE_CASE 0x10000
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Internal API, used by u_strcasecmp() etc.
|
||||
* Compare strings case-insensitively,
|
||||
* in code point order or code unit order.
|
||||
*/
|
||||
U_CFUNC int32_t
|
||||
u_strcmpFold(const UChar *s1, int32_t length1,
|
||||
const UChar *s2, int32_t length2,
|
||||
uint32_t options,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Interanl API, used for detecting length of
|
||||
* shared prefix case-insensitively.
|
||||
* @param s1 input string 1
|
||||
* @param length1 length of string 1, or -1 (NULL terminated)
|
||||
* @param s2 input string 2
|
||||
* @param length2 length of string 2, or -1 (NULL terminated)
|
||||
* @param options compare options
|
||||
* @param matchLen1 (output) length of partial prefix match in s1
|
||||
* @param matchLen2 (output) length of partial prefix match in s2
|
||||
* @param pErrorCode receives error status
|
||||
*/
|
||||
U_CAPI void
|
||||
u_caseInsensitivePrefixMatch(const UChar *s1, int32_t length1,
|
||||
const UChar *s2, int32_t length2,
|
||||
uint32_t options,
|
||||
int32_t *matchLen1, int32_t *matchLen2,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Are the Unicode properties loaded?
|
||||
* This must be used before internal functions are called that do
|
||||
* not perform this check.
|
||||
* Generate a debug assertion failure if data is not loaded.
|
||||
*/
|
||||
U_CFUNC UBool
|
||||
uprv_haveProperties(UErrorCode *pErrorCode);
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
#include "unicode/unistr.h" // for UStringCaseMapper
|
||||
|
||||
/*
|
||||
* Internal string casing functions implementing
|
||||
* ustring.h/ustrcase.cpp and UnicodeString case mapping functions.
|
||||
*/
|
||||
|
||||
struct UCaseMap : public icu::UMemory {
|
||||
/** Implements most of ucasemap_open(). */
|
||||
UCaseMap(const char *localeID, uint32_t opts, UErrorCode *pErrorCode);
|
||||
~UCaseMap();
|
||||
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
icu::BreakIterator *iter; /* We adopt the iterator, so we own it. */
|
||||
#endif
|
||||
char locale[32];
|
||||
int32_t caseLocale;
|
||||
uint32_t options;
|
||||
};
|
||||
|
||||
#if UCONFIG_NO_BREAK_ITERATION
|
||||
# define UCASEMAP_BREAK_ITERATOR_PARAM
|
||||
# define UCASEMAP_BREAK_ITERATOR_UNUSED
|
||||
# define UCASEMAP_BREAK_ITERATOR
|
||||
# define UCASEMAP_BREAK_ITERATOR_NULL
|
||||
#else
|
||||
# define UCASEMAP_BREAK_ITERATOR_PARAM icu::BreakIterator *iter,
|
||||
# define UCASEMAP_BREAK_ITERATOR_UNUSED icu::BreakIterator *,
|
||||
# define UCASEMAP_BREAK_ITERATOR iter,
|
||||
# define UCASEMAP_BREAK_ITERATOR_NULL NULL,
|
||||
#endif
|
||||
|
||||
U_CFUNC int32_t
|
||||
ustrcase_getCaseLocale(const char *locale);
|
||||
|
||||
// TODO: swap src / dest if approved for new public api
|
||||
/** Implements UStringCaseMapper. */
|
||||
U_CFUNC int32_t U_CALLCONV
|
||||
ustrcase_internalToLower(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
icu::Edits *edits,
|
||||
UErrorCode &errorCode);
|
||||
|
||||
/** Implements UStringCaseMapper. */
|
||||
U_CFUNC int32_t U_CALLCONV
|
||||
ustrcase_internalToUpper(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
icu::Edits *edits,
|
||||
UErrorCode &errorCode);
|
||||
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
/** Implements UStringCaseMapper. */
|
||||
U_CFUNC int32_t U_CALLCONV
|
||||
ustrcase_internalToTitle(int32_t caseLocale, uint32_t options,
|
||||
icu::BreakIterator *iter,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
icu::Edits *edits,
|
||||
UErrorCode &errorCode);
|
||||
|
||||
#endif
|
||||
|
||||
/** Implements UStringCaseMapper. */
|
||||
U_CFUNC int32_t U_CALLCONV
|
||||
ustrcase_internalFold(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
icu::Edits *edits,
|
||||
UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Common string case mapping implementation for ucasemap_toXyz() and UnicodeString::toXyz().
|
||||
* Implements argument checking.
|
||||
*/
|
||||
U_CFUNC int32_t
|
||||
ustrcase_map(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
UStringCaseMapper *stringCaseMapper,
|
||||
icu::Edits *edits,
|
||||
UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Common string case mapping implementation for old-fashioned u_strToXyz() functions
|
||||
* that allow the source string to overlap the destination buffer.
|
||||
* Implements argument checking and internally works with an intermediate buffer if necessary.
|
||||
*/
|
||||
U_CFUNC int32_t
|
||||
ustrcase_mapWithOverlap(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
UStringCaseMapper *stringCaseMapper,
|
||||
UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* UTF-8 string case mapping function type, used by ucasemap_mapUTF8().
|
||||
* UTF-8 version of UStringCaseMapper.
|
||||
* All error checking must be done.
|
||||
* The UCaseMap must be fully initialized, with locale and/or iter set as needed.
|
||||
* src and dest must not overlap.
|
||||
*/
|
||||
typedef int32_t U_CALLCONV
|
||||
UTF8CaseMapper(int32_t caseLocale, uint32_t options,
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
icu::BreakIterator *iter,
|
||||
#endif
|
||||
uint8_t *dest, int32_t destCapacity,
|
||||
const uint8_t *src, int32_t srcLength,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
/** Implements UTF8CaseMapper. */
|
||||
U_CFUNC int32_t U_CALLCONV
|
||||
ucasemap_internalUTF8ToTitle(int32_t caseLocale, uint32_t options,
|
||||
icu::BreakIterator *iter,
|
||||
uint8_t *dest, int32_t destCapacity,
|
||||
const uint8_t *src, int32_t srcLength,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Implements argument checking and buffer handling
|
||||
* for UTF-8 string case mapping as a common function.
|
||||
*/
|
||||
U_CFUNC int32_t
|
||||
ucasemap_mapUTF8(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
|
||||
uint8_t *dest, int32_t destCapacity,
|
||||
const uint8_t *src, int32_t srcLength,
|
||||
UTF8CaseMapper *stringCaseMapper,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
namespace GreekUpper {
|
||||
|
||||
// Data bits.
|
||||
static const uint32_t UPPER_MASK = 0x3ff;
|
||||
static const uint32_t HAS_VOWEL = 0x1000;
|
||||
static const uint32_t HAS_YPOGEGRAMMENI = 0x2000;
|
||||
static const uint32_t HAS_ACCENT = 0x4000;
|
||||
static const uint32_t HAS_DIALYTIKA = 0x8000;
|
||||
// Further bits during data building and processing, not stored in the data map.
|
||||
static const uint32_t HAS_COMBINING_DIALYTIKA = 0x10000;
|
||||
static const uint32_t HAS_OTHER_GREEK_DIACRITIC = 0x20000;
|
||||
|
||||
static const uint32_t HAS_VOWEL_AND_ACCENT = HAS_VOWEL | HAS_ACCENT;
|
||||
static const uint32_t HAS_VOWEL_AND_ACCENT_AND_DIALYTIKA =
|
||||
HAS_VOWEL_AND_ACCENT | HAS_DIALYTIKA;
|
||||
static const uint32_t HAS_EITHER_DIALYTIKA = HAS_DIALYTIKA | HAS_COMBINING_DIALYTIKA;
|
||||
|
||||
// State bits.
|
||||
static const uint32_t AFTER_CASED = 1;
|
||||
static const uint32_t AFTER_VOWEL_WITH_ACCENT = 2;
|
||||
|
||||
uint32_t getLetterData(UChar32 c);
|
||||
|
||||
/**
|
||||
* Returns a non-zero value for each of the Greek combining diacritics
|
||||
* listed in The Unicode Standard, version 8, chapter 7.2 Greek,
|
||||
* plus some perispomeni look-alikes.
|
||||
*/
|
||||
uint32_t getDiacriticData(UChar32 c);
|
||||
|
||||
} // namespace GreekUpper
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif // __cplusplus
|
||||
|
||||
#endif // __UCASEMAP_IMP_H__
|
@ -26,7 +26,7 @@
|
||||
#include "unicode/ucasemap.h"
|
||||
#include "cmemory.h"
|
||||
#include "ucase.h"
|
||||
#include "ustr_imp.h"
|
||||
#include "ucasemap_imp.h"
|
||||
|
||||
U_NAMESPACE_USE
|
||||
|
||||
|
193
icu4c/source/common/unicode/casemap.h
Normal file
193
icu4c/source/common/unicode/casemap.h
Normal file
@ -0,0 +1,193 @@
|
||||
// Copyright (C) 2017 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
|
||||
// casemap.h
|
||||
// created: 2017jan12 Markus W. Scherer
|
||||
|
||||
#ifndef __CASEMAP_H__
|
||||
#define __CASEMAP_H__
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uobject.h"
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C++ API: Low-level C++ case mapping functions.
|
||||
*/
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
|
||||
class BreakIterator;
|
||||
class Edits;
|
||||
|
||||
/**
|
||||
* Low-level C++ case mapping functions.
|
||||
*
|
||||
* @draft ICU 59
|
||||
*/
|
||||
class U_COMMON_API CaseMap final : public UMemory {
|
||||
public:
|
||||
/**
|
||||
* Lowercases a UTF-16 string and optionally records edits.
|
||||
* Casing is locale-dependent and context-sensitive.
|
||||
* The result may be longer or shorter than the original.
|
||||
* The source string and the destination buffer must not overlap.
|
||||
*
|
||||
* @param locale The locale ID. ("" = root locale, NULL = default locale.)
|
||||
* @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT.
|
||||
* @param src The original string.
|
||||
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
|
||||
* @param dest A buffer for the result string. The result will be NUL-terminated if
|
||||
* the buffer is large enough.
|
||||
* The contents is undefined in case of failure.
|
||||
* @param destCapacity The size of the buffer (number of bytes). If it is 0, then
|
||||
* dest may be NULL and the function will only return the length of the result
|
||||
* without writing any of the result string.
|
||||
* @param edits Records edits for index mapping, working with styled text,
|
||||
* and getting only changes (if any).
|
||||
* This function calls edits->reset() first. edits can be NULL.
|
||||
* @param errorCode Reference to an in/out error code value
|
||||
* which must not indicate a failure before the function call.
|
||||
* @return The length of the result string, if successful - or in case of a buffer overflow,
|
||||
* in which case it will be greater than destCapacity.
|
||||
*
|
||||
* @see u_strToLower
|
||||
* @draft ICU 59
|
||||
*/
|
||||
static int32_t toLower(
|
||||
const char *locale, uint32_t options,
|
||||
const UChar *src, int32_t srcLength,
|
||||
UChar *dest, int32_t destCapacity, Edits *edits,
|
||||
UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Uppercases a UTF-16 string and optionally records edits.
|
||||
* Casing is locale-dependent and context-sensitive.
|
||||
* The result may be longer or shorter than the original.
|
||||
* The source string and the destination buffer must not overlap.
|
||||
*
|
||||
* @param locale The locale ID. ("" = root locale, NULL = default locale.)
|
||||
* @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT.
|
||||
* @param src The original string.
|
||||
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
|
||||
* @param dest A buffer for the result string. The result will be NUL-terminated if
|
||||
* the buffer is large enough.
|
||||
* The contents is undefined in case of failure.
|
||||
* @param destCapacity The size of the buffer (number of bytes). If it is 0, then
|
||||
* dest may be NULL and the function will only return the length of the result
|
||||
* without writing any of the result string.
|
||||
* @param edits Records edits for index mapping, working with styled text,
|
||||
* and getting only changes (if any).
|
||||
* This function calls edits->reset() first. edits can be NULL.
|
||||
* @param errorCode Reference to an in/out error code value
|
||||
* which must not indicate a failure before the function call.
|
||||
* @return The length of the result string, if successful - or in case of a buffer overflow,
|
||||
* in which case it will be greater than destCapacity.
|
||||
*
|
||||
* @see u_strToUpper
|
||||
* @draft ICU 59
|
||||
*/
|
||||
static int32_t toUpper(
|
||||
const char *locale, uint32_t options,
|
||||
const UChar *src, int32_t srcLength,
|
||||
UChar *dest, int32_t destCapacity, Edits *edits,
|
||||
UErrorCode &errorCode);
|
||||
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
/**
|
||||
* Titlecases a UTF-16 string and optionally records edits.
|
||||
* Casing is locale-dependent and context-sensitive.
|
||||
* The result may be longer or shorter than the original.
|
||||
* The source string and the destination buffer must not overlap.
|
||||
*
|
||||
* Titlecasing uses a break iterator to find the first characters of words
|
||||
* that are to be titlecased. It titlecases those characters and lowercases
|
||||
* all others. (This can be modified with options bits.)
|
||||
*
|
||||
* @param locale The locale ID. ("" = root locale, NULL = default locale.)
|
||||
* @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT,
|
||||
* U_TITLECASE_NO_LOWERCASE, U_TITLECASE_NO_BREAK_ADJUSTMENT.
|
||||
* @param iter A break iterator to find the first characters of words that are to be titlecased.
|
||||
* It is set to the source string (setText())
|
||||
* and used one or more times for iteration (first() and next()).
|
||||
* If NULL, then a word break iterator for the locale is used
|
||||
* (or something equivalent).
|
||||
* @param src The original string.
|
||||
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
|
||||
* @param dest A buffer for the result string. The result will be NUL-terminated if
|
||||
* the buffer is large enough.
|
||||
* The contents is undefined in case of failure.
|
||||
* @param destCapacity The size of the buffer (number of bytes). If it is 0, then
|
||||
* dest may be NULL and the function will only return the length of the result
|
||||
* without writing any of the result string.
|
||||
* @param edits Records edits for index mapping, working with styled text,
|
||||
* and getting only changes (if any).
|
||||
* This function calls edits->reset() first. edits can be NULL.
|
||||
* @param errorCode Reference to an in/out error code value
|
||||
* which must not indicate a failure before the function call.
|
||||
* @return The length of the result string, if successful - or in case of a buffer overflow,
|
||||
* in which case it will be greater than destCapacity.
|
||||
*
|
||||
* @see u_strToTitle
|
||||
* @see ucasemap_toTitle
|
||||
* @draft ICU 59
|
||||
*/
|
||||
static int32_t toTitle(
|
||||
const char *locale, uint32_t options, BreakIterator *iter,
|
||||
const UChar *src, int32_t srcLength,
|
||||
UChar *dest, int32_t destCapacity, Edits *edits,
|
||||
UErrorCode &errorCode);
|
||||
|
||||
#endif // UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
/**
|
||||
* Case-folds a UTF-16 string and optionally records edits.
|
||||
*
|
||||
* Case-folding is locale-independent and not context-sensitive,
|
||||
* but there is an option for whether to include or exclude mappings for dotted I
|
||||
* and dotless i that are marked with 'T' in CaseFolding.txt.
|
||||
*
|
||||
* The result may be longer or shorter than the original.
|
||||
* The source string and the destination buffer must not overlap.
|
||||
*
|
||||
* @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT,
|
||||
* U_FOLD_CASE_DEFAULT, U_FOLD_CASE_EXCLUDE_SPECIAL_I.
|
||||
* @param src The original string.
|
||||
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
|
||||
* @param dest A buffer for the result string. The result will be NUL-terminated if
|
||||
* the buffer is large enough.
|
||||
* The contents is undefined in case of failure.
|
||||
* @param destCapacity The size of the buffer (number of bytes). If it is 0, then
|
||||
* dest may be NULL and the function will only return the length of the result
|
||||
* without writing any of the result string.
|
||||
* @param edits Records edits for index mapping, working with styled text,
|
||||
* and getting only changes (if any).
|
||||
* This function calls edits->reset() first. edits can be NULL.
|
||||
* @param errorCode Reference to an in/out error code value
|
||||
* which must not indicate a failure before the function call.
|
||||
* @return The length of the result string, if successful - or in case of a buffer overflow,
|
||||
* in which case it will be greater than destCapacity.
|
||||
*
|
||||
* @see u_strFoldCase
|
||||
* @draft ICU 59
|
||||
*/
|
||||
static int32_t foldCase(
|
||||
uint32_t options,
|
||||
const UChar *src, int32_t srcLength,
|
||||
UChar *dest, int32_t destCapacity, Edits *edits,
|
||||
UErrorCode &errorCode);
|
||||
|
||||
private:
|
||||
CaseMap() = delete;
|
||||
CaseMap(const CaseMap &other) = delete;
|
||||
CaseMap &operator=(const CaseMap &other) = delete;
|
||||
};
|
||||
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif // __CASEMAP_H__
|
244
icu4c/source/common/unicode/edits.h
Normal file
244
icu4c/source/common/unicode/edits.h
Normal file
@ -0,0 +1,244 @@
|
||||
// Copyright (C) 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
|
||||
// edits.h
|
||||
// created: 2016dec30 Markus W. Scherer
|
||||
|
||||
#ifndef __EDITS_H__
|
||||
#define __EDITS_H__
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uobject.h"
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C++ API: C++ class Edits for low-level string transformations on styled text.
|
||||
*/
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
|
||||
/**
|
||||
* Records lengths of string edits but not replacement text.
|
||||
* Supports replacements, insertions, deletions in linear progression.
|
||||
* Does not support moving/reordering of text.
|
||||
*
|
||||
* An Edits object tracks a separate UErrorCode, but ICU string transformation functions
|
||||
* (e.g., case mapping functions) merge any such errors into their API's UErrorCode.
|
||||
*
|
||||
* @draft ICU 59
|
||||
*/
|
||||
class U_COMMON_API Edits final : public UMemory {
|
||||
public:
|
||||
/**
|
||||
* Constructs an empty object.
|
||||
* @draft ICU 59
|
||||
*/
|
||||
Edits() :
|
||||
array(stackArray), capacity(STACK_CAPACITY), length(0), delta(0),
|
||||
errorCode(U_ZERO_ERROR) {}
|
||||
/**
|
||||
* Destructor.
|
||||
* @draft ICU 59
|
||||
*/
|
||||
~Edits();
|
||||
|
||||
/**
|
||||
* Resets the data but may not release memory.
|
||||
* @draft ICU 59
|
||||
*/
|
||||
void reset();
|
||||
|
||||
/**
|
||||
* Adds a record for an unchanged segment of text.
|
||||
* Normally called from inside ICU string transformation functions, not user code.
|
||||
* @draft ICU 59
|
||||
*/
|
||||
void addUnchanged(int32_t unchangedLength);
|
||||
/**
|
||||
* Adds a record for a text replacement/insertion/deletion.
|
||||
* Normally called from inside ICU string transformation functions, not user code.
|
||||
* @draft ICU 59
|
||||
*/
|
||||
void addReplace(int32_t oldLength, int32_t newLength);
|
||||
/**
|
||||
* Sets the UErrorCode if an error occurred while recording edits.
|
||||
* Preserves older error codes in the outErrorCode.
|
||||
* Normally called from inside ICU string transformation functions, not user code.
|
||||
* @return TRUE if U_FAILURE(outErrorCode)
|
||||
* @draft ICU 59
|
||||
*/
|
||||
UBool copyErrorTo(UErrorCode &outErrorCode);
|
||||
|
||||
/**
|
||||
* How much longer is the new text compared with the old text?
|
||||
* @return new length minus old length
|
||||
* @draft ICU 59
|
||||
*/
|
||||
int32_t lengthDelta() const { return delta; }
|
||||
/**
|
||||
* @return TRUE if there are any change edits
|
||||
* @draft ICU 59
|
||||
*/
|
||||
UBool hasChanges() const;
|
||||
|
||||
/**
|
||||
* Access to the list of edits.
|
||||
* @see getCoarseIterator
|
||||
* @see getFineIterator
|
||||
* @draft ICU 59
|
||||
*/
|
||||
struct Iterator final : public UMemory {
|
||||
/**
|
||||
* Copy constructor.
|
||||
* @draft ICU 59
|
||||
*/
|
||||
Iterator(const Iterator &other) = default;
|
||||
/**
|
||||
* Assignment operator.
|
||||
* @draft ICU 59
|
||||
*/
|
||||
Iterator &operator=(const Iterator &other) = default;
|
||||
|
||||
/**
|
||||
* Advances to the next edit.
|
||||
* @return TRUE if there is another edit
|
||||
* @draft ICU 59
|
||||
*/
|
||||
UBool next(UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Finds the edit that contains the source index.
|
||||
* The source index may be found in a non-change
|
||||
* even if normal iteration would skip non-changes.
|
||||
* Normal iteration can continue from a found edit.
|
||||
*
|
||||
* The iterator state before this search logically does not matter.
|
||||
* (It may affect the performance of the search.)
|
||||
*
|
||||
* The iterator state after this search is undefined
|
||||
* if the source index is out of bounds for the source string.
|
||||
*
|
||||
* @param i source index
|
||||
* @return TRUE if the edit for the source index was found
|
||||
* @draft ICU 59
|
||||
*/
|
||||
UBool findSourceIndex(int32_t i, UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* @return TRUE if this edit replaces oldLength() units with newLength() different ones.
|
||||
* FALSE if oldLength units remain unchanged.
|
||||
* @draft ICU 59
|
||||
*/
|
||||
UBool hasChange() const { return changed; }
|
||||
/**
|
||||
* @return the number of units in the original string which are replaced or remain unchanged.
|
||||
* @draft ICU 59
|
||||
*/
|
||||
int32_t oldLength() const { return oldLength_; }
|
||||
/**
|
||||
* @return the number of units in the modified string, if hasChange() is TRUE.
|
||||
* Same as oldLength if hasChange() is FALSE.
|
||||
* @draft ICU 59
|
||||
*/
|
||||
int32_t newLength() const { return newLength_; }
|
||||
|
||||
/**
|
||||
* @return the current index into the source string
|
||||
* @draft ICU 59
|
||||
*/
|
||||
int32_t sourceIndex() const { return srcIndex; }
|
||||
/**
|
||||
* @return the current index into the replacement-characters-only string,
|
||||
* not counting unchanged spans
|
||||
* @draft ICU 59
|
||||
*/
|
||||
int32_t replacementIndex() const { return replIndex; }
|
||||
/**
|
||||
* @return the current index into the full destination string
|
||||
* @draft ICU 59
|
||||
*/
|
||||
int32_t destinationIndex() const { return destIndex; }
|
||||
|
||||
private:
|
||||
friend class Edits;
|
||||
|
||||
Iterator(const uint16_t *a, int32_t len, UBool oc, UBool crs);
|
||||
|
||||
int32_t readLength(int32_t head);
|
||||
void updateIndexes();
|
||||
UBool noNext();
|
||||
|
||||
const uint16_t *array;
|
||||
int32_t index, length;
|
||||
int32_t remaining;
|
||||
UBool onlyChanges, coarse;
|
||||
|
||||
UBool changed;
|
||||
int32_t oldLength_, newLength_;
|
||||
int32_t srcIndex, replIndex, destIndex;
|
||||
};
|
||||
|
||||
/**
|
||||
* Returns an Iterator for coarse-grained changes for simple string updates.
|
||||
* Skips non-changes.
|
||||
* @return an Iterator that merges adjacent changes.
|
||||
* @draft ICU 59
|
||||
*/
|
||||
Iterator getCoarseChangesIterator() const {
|
||||
return Iterator(array, length, TRUE, TRUE);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an Iterator for coarse-grained changes and non-changes for simple string updates.
|
||||
* @return an Iterator that merges adjacent changes.
|
||||
* @draft ICU 59
|
||||
*/
|
||||
Iterator getCoarseIterator() const {
|
||||
return Iterator(array, length, FALSE, TRUE);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an Iterator for fine-grained changes for modifying styled text.
|
||||
* Skips non-changes.
|
||||
* @return an Iterator that separates adjacent changes.
|
||||
* @draft ICU 59
|
||||
*/
|
||||
Iterator getFineChangesIterator() const {
|
||||
return Iterator(array, length, TRUE, FALSE);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an Iterator for fine-grained changes and non-changes for modifying styled text.
|
||||
* @return an Iterator that separates adjacent changes.
|
||||
* @draft ICU 59
|
||||
*/
|
||||
Iterator getFineIterator() const {
|
||||
return Iterator(array, length, FALSE, FALSE);
|
||||
}
|
||||
|
||||
private:
|
||||
Edits(const Edits &) = delete;
|
||||
Edits &operator=(const Edits &) = delete;
|
||||
|
||||
void setLastUnit(int32_t last) { array[length - 1] = (uint16_t)last; }
|
||||
int32_t lastUnit() const { return length > 0 ? array[length - 1] : 0xffff; }
|
||||
|
||||
void append(int32_t r);
|
||||
UBool growArray();
|
||||
|
||||
static const int32_t STACK_CAPACITY = 100;
|
||||
uint16_t *array;
|
||||
int32_t capacity;
|
||||
int32_t length;
|
||||
int32_t delta;
|
||||
UErrorCode errorCode;
|
||||
uint16_t stackArray[STACK_CAPACITY];
|
||||
};
|
||||
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif // __EDITS_H__
|
@ -23,11 +23,6 @@
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/localpointer.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
#include "unicode/uobject.h"
|
||||
#endif // U_SHOW_CPLUSPLUS_API
|
||||
|
||||
#include "unicode/ustring.h"
|
||||
|
||||
/**
|
||||
@ -88,8 +83,6 @@ ucasemap_close(UCaseMap *csm);
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
class BreakIterator;
|
||||
|
||||
/**
|
||||
* \class LocalUCaseMapPointer
|
||||
* "Smart pointer" class, closes a UCaseMap via ucasemap_close().
|
||||
@ -101,401 +94,6 @@ class BreakIterator;
|
||||
*/
|
||||
U_DEFINE_LOCAL_OPEN_POINTER(LocalUCaseMapPointer, UCaseMap, ucasemap_close);
|
||||
|
||||
// TODO: move to new C++ unicode/casemap.h
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
|
||||
/**
|
||||
* Records lengths of string edits but not replacement text.
|
||||
* Supports replacements, insertions, deletions in linear progression.
|
||||
* Does not support moving/reordering of text.
|
||||
*
|
||||
* An Edits object tracks a separate UErrorCode, but ICU string transformation functions
|
||||
* (e.g., case mapping functions) merge any such errors into their API's UErrorCode.
|
||||
*
|
||||
* @draft ICU 59
|
||||
*/
|
||||
class U_COMMON_API Edits final : public UMemory {
|
||||
public:
|
||||
/**
|
||||
* Constructs an empty object.
|
||||
* @draft ICU 59
|
||||
*/
|
||||
Edits() :
|
||||
array(stackArray), capacity(STACK_CAPACITY), length(0), delta(0),
|
||||
errorCode(U_ZERO_ERROR) {}
|
||||
/**
|
||||
* Destructor.
|
||||
* @draft ICU 59
|
||||
*/
|
||||
~Edits();
|
||||
|
||||
/**
|
||||
* Resets the data but may not release memory.
|
||||
* @draft ICU 59
|
||||
*/
|
||||
void reset();
|
||||
|
||||
/**
|
||||
* Adds a record for an unchanged segment of text.
|
||||
* Normally called from inside ICU string transformation functions, not user code.
|
||||
* @draft ICU 59
|
||||
*/
|
||||
void addUnchanged(int32_t unchangedLength);
|
||||
/**
|
||||
* Adds a record for a text replacement/insertion/deletion.
|
||||
* Normally called from inside ICU string transformation functions, not user code.
|
||||
* @draft ICU 59
|
||||
*/
|
||||
void addReplace(int32_t oldLength, int32_t newLength);
|
||||
/**
|
||||
* Sets the UErrorCode if an error occurred while recording edits.
|
||||
* Preserves older error codes in the outErrorCode.
|
||||
* Normally called from inside ICU string transformation functions, not user code.
|
||||
* @return TRUE if U_FAILURE(outErrorCode)
|
||||
* @draft ICU 59
|
||||
*/
|
||||
UBool copyErrorTo(UErrorCode &outErrorCode);
|
||||
|
||||
/**
|
||||
* How much longer is the new text compared with the old text?
|
||||
* @return new length minus old length
|
||||
* @draft ICU 59
|
||||
*/
|
||||
int32_t lengthDelta() const { return delta; }
|
||||
/**
|
||||
* @return TRUE if there are any change edits
|
||||
* @draft ICU 59
|
||||
*/
|
||||
UBool hasChanges() const;
|
||||
|
||||
/**
|
||||
* Access to the list of edits.
|
||||
* @see getCoarseIterator
|
||||
* @see getFineIterator
|
||||
* @draft ICU 59
|
||||
*/
|
||||
struct Iterator final : public UMemory {
|
||||
/**
|
||||
* Copy constructor.
|
||||
* @draft ICU 59
|
||||
*/
|
||||
Iterator(const Iterator &other) = default;
|
||||
/**
|
||||
* Assignment operator.
|
||||
* @draft ICU 59
|
||||
*/
|
||||
Iterator &operator=(const Iterator &other) = default;
|
||||
|
||||
/**
|
||||
* Advances to the next edit.
|
||||
* @return TRUE if there is another edit
|
||||
* @draft ICU 59
|
||||
*/
|
||||
UBool next(UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Finds the edit that contains the source index.
|
||||
* The source index may be found in a non-change
|
||||
* even if normal iteration would skip non-changes.
|
||||
* Normal iteration can continue from a found edit.
|
||||
*
|
||||
* The iterator state before this search logically does not matter.
|
||||
* (It may affect the performance of the search.)
|
||||
*
|
||||
* The iterator state after this search is undefined
|
||||
* if the source index is out of bounds for the source string.
|
||||
*
|
||||
* @param i source index
|
||||
* @return TRUE if the edit for the source index was found
|
||||
* @draft ICU 59
|
||||
*/
|
||||
UBool findSourceIndex(int32_t i, UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* @return TRUE if this edit replaces oldLength() units with newLength() different ones.
|
||||
* FALSE if oldLength units remain unchanged.
|
||||
* @draft ICU 59
|
||||
*/
|
||||
UBool hasChange() const { return changed; }
|
||||
/**
|
||||
* @return the number of units in the original string which are replaced or remain unchanged.
|
||||
* @draft ICU 59
|
||||
*/
|
||||
int32_t oldLength() const { return oldLength_; }
|
||||
/**
|
||||
* @return the number of units in the modified string, if hasChange() is TRUE.
|
||||
* Same as oldLength if hasChange() is FALSE.
|
||||
* @draft ICU 59
|
||||
*/
|
||||
int32_t newLength() const { return newLength_; }
|
||||
|
||||
/**
|
||||
* @return the current index into the source string
|
||||
* @draft ICU 59
|
||||
*/
|
||||
int32_t sourceIndex() const { return srcIndex; }
|
||||
/**
|
||||
* @return the current index into the replacement-characters-only string,
|
||||
* not counting unchanged spans
|
||||
* @draft ICU 59
|
||||
*/
|
||||
int32_t replacementIndex() const { return replIndex; }
|
||||
/**
|
||||
* @return the current index into the full destination string
|
||||
* @draft ICU 59
|
||||
*/
|
||||
int32_t destinationIndex() const { return destIndex; }
|
||||
|
||||
private:
|
||||
friend class Edits;
|
||||
|
||||
Iterator(const uint16_t *a, int32_t len, UBool oc, UBool crs);
|
||||
|
||||
int32_t readLength(int32_t head);
|
||||
void updateIndexes();
|
||||
UBool noNext();
|
||||
|
||||
const uint16_t *array;
|
||||
int32_t index, length;
|
||||
int32_t remaining;
|
||||
UBool onlyChanges, coarse;
|
||||
|
||||
UBool changed;
|
||||
int32_t oldLength_, newLength_;
|
||||
int32_t srcIndex, replIndex, destIndex;
|
||||
};
|
||||
|
||||
/**
|
||||
* Returns an Iterator for coarse-grained changes for simple string updates.
|
||||
* Skips non-changes.
|
||||
* @return an Iterator that merges adjacent changes.
|
||||
* @draft ICU 59
|
||||
*/
|
||||
Iterator getCoarseChangesIterator() const {
|
||||
return Iterator(array, length, TRUE, TRUE);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an Iterator for coarse-grained changes and non-changes for simple string updates.
|
||||
* @return an Iterator that merges adjacent changes.
|
||||
* @draft ICU 59
|
||||
*/
|
||||
Iterator getCoarseIterator() const {
|
||||
return Iterator(array, length, FALSE, TRUE);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an Iterator for fine-grained changes for modifying styled text.
|
||||
* Skips non-changes.
|
||||
* @return an Iterator that separates adjacent changes.
|
||||
* @draft ICU 59
|
||||
*/
|
||||
Iterator getFineChangesIterator() const {
|
||||
return Iterator(array, length, TRUE, FALSE);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an Iterator for fine-grained changes and non-changes for modifying styled text.
|
||||
* @return an Iterator that separates adjacent changes.
|
||||
* @draft ICU 59
|
||||
*/
|
||||
Iterator getFineIterator() const {
|
||||
return Iterator(array, length, FALSE, FALSE);
|
||||
}
|
||||
|
||||
private:
|
||||
Edits(const Edits &) = delete;
|
||||
Edits &operator=(const Edits &) = delete;
|
||||
|
||||
void setLastUnit(int32_t last) { array[length - 1] = (uint16_t)last; }
|
||||
int32_t lastUnit() const { return length > 0 ? array[length - 1] : 0xffff; }
|
||||
|
||||
void append(int32_t r);
|
||||
UBool growArray();
|
||||
|
||||
static const int32_t STACK_CAPACITY = 100;
|
||||
uint16_t *array;
|
||||
int32_t capacity;
|
||||
int32_t length;
|
||||
int32_t delta;
|
||||
UErrorCode errorCode;
|
||||
uint16_t stackArray[STACK_CAPACITY];
|
||||
};
|
||||
|
||||
/**
|
||||
* Low-level C++ case mapping functions.
|
||||
*
|
||||
* @draft ICU 59
|
||||
*/
|
||||
class U_COMMON_API CaseMap final : public UMemory {
|
||||
public:
|
||||
/**
|
||||
* Lowercases a UTF-16 string and optionally records edits.
|
||||
* Casing is locale-dependent and context-sensitive.
|
||||
* The result may be longer or shorter than the original.
|
||||
* The source string and the destination buffer must not overlap.
|
||||
*
|
||||
* @param locale The locale ID. ("" = root locale, NULL = default locale.)
|
||||
* @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT.
|
||||
* @param src The original string.
|
||||
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
|
||||
* @param dest A buffer for the result string. The result will be NUL-terminated if
|
||||
* the buffer is large enough.
|
||||
* The contents is undefined in case of failure.
|
||||
* @param destCapacity The size of the buffer (number of bytes). If it is 0, then
|
||||
* dest may be NULL and the function will only return the length of the result
|
||||
* without writing any of the result string.
|
||||
* @param edits Records edits for index mapping, working with styled text,
|
||||
* and getting only changes (if any).
|
||||
* This function calls edits->reset() first. edits can be NULL.
|
||||
* @param errorCode Reference to an in/out error code value
|
||||
* which must not indicate a failure before the function call.
|
||||
* @return The length of the result string, if successful - or in case of a buffer overflow,
|
||||
* in which case it will be greater than destCapacity.
|
||||
*
|
||||
* @see u_strToLower
|
||||
* @draft ICU 59
|
||||
*/
|
||||
static int32_t toLower(
|
||||
const char *locale, uint32_t options,
|
||||
const UChar *src, int32_t srcLength,
|
||||
UChar *dest, int32_t destCapacity, Edits *edits,
|
||||
UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Uppercases a UTF-16 string and optionally records edits.
|
||||
* Casing is locale-dependent and context-sensitive.
|
||||
* The result may be longer or shorter than the original.
|
||||
* The source string and the destination buffer must not overlap.
|
||||
*
|
||||
* @param locale The locale ID. ("" = root locale, NULL = default locale.)
|
||||
* @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT.
|
||||
* @param src The original string.
|
||||
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
|
||||
* @param dest A buffer for the result string. The result will be NUL-terminated if
|
||||
* the buffer is large enough.
|
||||
* The contents is undefined in case of failure.
|
||||
* @param destCapacity The size of the buffer (number of bytes). If it is 0, then
|
||||
* dest may be NULL and the function will only return the length of the result
|
||||
* without writing any of the result string.
|
||||
* @param edits Records edits for index mapping, working with styled text,
|
||||
* and getting only changes (if any).
|
||||
* This function calls edits->reset() first. edits can be NULL.
|
||||
* @param errorCode Reference to an in/out error code value
|
||||
* which must not indicate a failure before the function call.
|
||||
* @return The length of the result string, if successful - or in case of a buffer overflow,
|
||||
* in which case it will be greater than destCapacity.
|
||||
*
|
||||
* @see u_strToUpper
|
||||
* @draft ICU 59
|
||||
*/
|
||||
static int32_t toUpper(
|
||||
const char *locale, uint32_t options,
|
||||
const UChar *src, int32_t srcLength,
|
||||
UChar *dest, int32_t destCapacity, Edits *edits,
|
||||
UErrorCode &errorCode);
|
||||
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
/**
|
||||
* Titlecases a UTF-16 string and optionally records edits.
|
||||
* Casing is locale-dependent and context-sensitive.
|
||||
* The result may be longer or shorter than the original.
|
||||
* The source string and the destination buffer must not overlap.
|
||||
*
|
||||
* Titlecasing uses a break iterator to find the first characters of words
|
||||
* that are to be titlecased. It titlecases those characters and lowercases
|
||||
* all others. (This can be modified with options bits.)
|
||||
*
|
||||
* @param locale The locale ID. ("" = root locale, NULL = default locale.)
|
||||
* @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT,
|
||||
* U_TITLECASE_NO_LOWERCASE, U_TITLECASE_NO_BREAK_ADJUSTMENT.
|
||||
* @param iter A break iterator to find the first characters of words that are to be titlecased.
|
||||
* It is set to the source string (setText())
|
||||
* and used one or more times for iteration (first() and next()).
|
||||
* If NULL, then a word break iterator for the locale is used
|
||||
* (or something equivalent).
|
||||
* @param src The original string.
|
||||
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
|
||||
* @param dest A buffer for the result string. The result will be NUL-terminated if
|
||||
* the buffer is large enough.
|
||||
* The contents is undefined in case of failure.
|
||||
* @param destCapacity The size of the buffer (number of bytes). If it is 0, then
|
||||
* dest may be NULL and the function will only return the length of the result
|
||||
* without writing any of the result string.
|
||||
* @param edits Records edits for index mapping, working with styled text,
|
||||
* and getting only changes (if any).
|
||||
* This function calls edits->reset() first. edits can be NULL.
|
||||
* @param errorCode Reference to an in/out error code value
|
||||
* which must not indicate a failure before the function call.
|
||||
* @return The length of the result string, if successful - or in case of a buffer overflow,
|
||||
* in which case it will be greater than destCapacity.
|
||||
*
|
||||
* @see u_strToTitle
|
||||
* @see ucasemap_toTitle
|
||||
* @draft ICU 59
|
||||
*/
|
||||
static int32_t toTitle(
|
||||
const char *locale, uint32_t options, BreakIterator *iter,
|
||||
const UChar *src, int32_t srcLength,
|
||||
UChar *dest, int32_t destCapacity, Edits *edits,
|
||||
UErrorCode &errorCode);
|
||||
|
||||
#endif // UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
/**
|
||||
* Case-folds a UTF-16 string and optionally records edits.
|
||||
*
|
||||
* Case-folding is locale-independent and not context-sensitive,
|
||||
* but there is an option for whether to include or exclude mappings for dotted I
|
||||
* and dotless i that are marked with 'T' in CaseFolding.txt.
|
||||
*
|
||||
* The result may be longer or shorter than the original.
|
||||
* The source string and the destination buffer must not overlap.
|
||||
*
|
||||
* @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT,
|
||||
* U_FOLD_CASE_DEFAULT, U_FOLD_CASE_EXCLUDE_SPECIAL_I.
|
||||
* @param src The original string.
|
||||
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
|
||||
* @param dest A buffer for the result string. The result will be NUL-terminated if
|
||||
* the buffer is large enough.
|
||||
* The contents is undefined in case of failure.
|
||||
* @param destCapacity The size of the buffer (number of bytes). If it is 0, then
|
||||
* dest may be NULL and the function will only return the length of the result
|
||||
* without writing any of the result string.
|
||||
* @param edits Records edits for index mapping, working with styled text,
|
||||
* and getting only changes (if any).
|
||||
* This function calls edits->reset() first. edits can be NULL.
|
||||
* @param errorCode Reference to an in/out error code value
|
||||
* which must not indicate a failure before the function call.
|
||||
* @return The length of the result string, if successful - or in case of a buffer overflow,
|
||||
* in which case it will be greater than destCapacity.
|
||||
*
|
||||
* @see u_strFoldCase
|
||||
* @draft ICU 59
|
||||
*/
|
||||
static int32_t foldCase(
|
||||
uint32_t options,
|
||||
const UChar *src, int32_t srcLength,
|
||||
UChar *dest, int32_t destCapacity, Edits *edits,
|
||||
UErrorCode &errorCode);
|
||||
|
||||
private:
|
||||
CaseMap() = delete;
|
||||
CaseMap(const CaseMap &other) = delete;
|
||||
CaseMap &operator=(const CaseMap &other) = delete;
|
||||
};
|
||||
|
||||
/**
|
||||
* Omit unchanged text when case-mapping with Edits.
|
||||
*
|
||||
* @draft ICU 59
|
||||
*/
|
||||
#define UCASEMAP_OMIT_UNCHANGED_TEXT 0x4000
|
||||
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif
|
||||
@ -587,6 +185,15 @@ ucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode *pErrorCode);
|
||||
*/
|
||||
#define U_TITLECASE_NO_BREAK_ADJUSTMENT 0x200
|
||||
|
||||
/**
|
||||
* Omit unchanged text when case-mapping with Edits.
|
||||
*
|
||||
* @see CaseMap
|
||||
* @see Edits
|
||||
* @draft ICU 59
|
||||
*/
|
||||
#define UCASEMAP_OMIT_UNCHANGED_TEXT 0x4000
|
||||
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
/**
|
||||
|
@ -19,6 +19,8 @@
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/casemap.h"
|
||||
#include "unicode/edits.h"
|
||||
#include "unicode/putil.h"
|
||||
#include "cstring.h"
|
||||
#include "cmemory.h"
|
||||
@ -26,8 +28,8 @@
|
||||
#include "unicode/unistr.h"
|
||||
#include "unicode/uchar.h"
|
||||
#include "uassert.h"
|
||||
#include "ucasemap_imp.h"
|
||||
#include "uelement.h"
|
||||
#include "ustr_imp.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
|
@ -21,7 +21,7 @@
|
||||
#include "unicode/locid.h"
|
||||
#include "unicode/ucasemap.h"
|
||||
#include "unicode/unistr.h"
|
||||
#include "ustr_imp.h"
|
||||
#include "ucasemap_imp.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
|
@ -25,7 +25,7 @@
|
||||
#include "unicode/locid.h"
|
||||
#include "unicode/ucasemap.h"
|
||||
#include "unicode/unistr.h"
|
||||
#include "ustr_imp.h"
|
||||
#include "ucasemap_imp.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
|
@ -18,24 +18,6 @@
|
||||
#define __USTR_IMP_H__
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/ucasemap.h"
|
||||
#include "unicode/uiter.h"
|
||||
#include "ucase.h"
|
||||
|
||||
/** Simple declaration to avoid including unicode/ubrk.h. */
|
||||
#ifndef UBRK_TYPEDEF_UBREAK_ITERATOR
|
||||
# define UBRK_TYPEDEF_UBREAK_ITERATOR
|
||||
typedef struct UBreakIterator UBreakIterator;
|
||||
#endif
|
||||
|
||||
#ifndef U_COMPARE_IGNORE_CASE
|
||||
/* see also unorm.h */
|
||||
/**
|
||||
* Option bit for unorm_compare:
|
||||
* Perform case-insensitive comparison.
|
||||
*/
|
||||
#define U_COMPARE_IGNORE_CASE 0x10000
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Internal option for unorm_cmpEquivFold() for strncmp style.
|
||||
@ -54,230 +36,6 @@ uprv_strCompare(const UChar *s1, int32_t length1,
|
||||
const UChar *s2, int32_t length2,
|
||||
UBool strncmpStyle, UBool codePointOrder);
|
||||
|
||||
/**
|
||||
* Internal API, used by u_strcasecmp() etc.
|
||||
* Compare strings case-insensitively,
|
||||
* in code point order or code unit order.
|
||||
*/
|
||||
U_CFUNC int32_t
|
||||
u_strcmpFold(const UChar *s1, int32_t length1,
|
||||
const UChar *s2, int32_t length2,
|
||||
uint32_t options,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Interanl API, used for detecting length of
|
||||
* shared prefix case-insensitively.
|
||||
* @param s1 input string 1
|
||||
* @param length1 length of string 1, or -1 (NULL terminated)
|
||||
* @param s2 input string 2
|
||||
* @param length2 length of string 2, or -1 (NULL terminated)
|
||||
* @param options compare options
|
||||
* @param matchLen1 (output) length of partial prefix match in s1
|
||||
* @param matchLen2 (output) length of partial prefix match in s2
|
||||
* @param pErrorCode receives error status
|
||||
*/
|
||||
U_CAPI void
|
||||
u_caseInsensitivePrefixMatch(const UChar *s1, int32_t length1,
|
||||
const UChar *s2, int32_t length2,
|
||||
uint32_t options,
|
||||
int32_t *matchLen1, int32_t *matchLen2,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Are the Unicode properties loaded?
|
||||
* This must be used before internal functions are called that do
|
||||
* not perform this check.
|
||||
* Generate a debug assertion failure if data is not loaded.
|
||||
*/
|
||||
U_CFUNC UBool
|
||||
uprv_haveProperties(UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Load the Unicode property data.
|
||||
* Intended primarily for use from u_init().
|
||||
* Has no effect if property data is already loaded.
|
||||
* NOT thread safe.
|
||||
*/
|
||||
/*U_CFUNC int8_t
|
||||
uprv_loadPropsData(UErrorCode *errorCode);*/
|
||||
|
||||
#ifdef __cplusplus
|
||||
// TODO: Consider moving these case mapping definitions
|
||||
// into a new internal header like ucasemap_imp.h.
|
||||
|
||||
#include "unicode/unistr.h" // for UStringCaseMapper
|
||||
|
||||
/*
|
||||
* Internal string casing functions implementing
|
||||
* ustring.h/ustrcase.c and UnicodeString case mapping functions.
|
||||
*/
|
||||
|
||||
struct UCaseMap : public icu::UMemory {
|
||||
/** Implements most of ucasemap_open(). */
|
||||
UCaseMap(const char *localeID, uint32_t opts, UErrorCode *pErrorCode);
|
||||
~UCaseMap();
|
||||
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
icu::BreakIterator *iter; /* We adopt the iterator, so we own it. */
|
||||
#endif
|
||||
char locale[32];
|
||||
int32_t caseLocale;
|
||||
uint32_t options;
|
||||
};
|
||||
|
||||
#if UCONFIG_NO_BREAK_ITERATION
|
||||
# define UCASEMAP_BREAK_ITERATOR_PARAM
|
||||
# define UCASEMAP_BREAK_ITERATOR_UNUSED
|
||||
# define UCASEMAP_BREAK_ITERATOR
|
||||
# define UCASEMAP_BREAK_ITERATOR_NULL
|
||||
#else
|
||||
# define UCASEMAP_BREAK_ITERATOR_PARAM icu::BreakIterator *iter,
|
||||
# define UCASEMAP_BREAK_ITERATOR_UNUSED icu::BreakIterator *,
|
||||
# define UCASEMAP_BREAK_ITERATOR iter,
|
||||
# define UCASEMAP_BREAK_ITERATOR_NULL NULL,
|
||||
#endif
|
||||
|
||||
U_CFUNC int32_t
|
||||
ustrcase_getCaseLocale(const char *locale);
|
||||
|
||||
// TODO: swap src / dest if approved for new public api
|
||||
/** Implements UStringCaseMapper. */
|
||||
U_CFUNC int32_t U_CALLCONV
|
||||
ustrcase_internalToLower(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
icu::Edits *edits,
|
||||
UErrorCode &errorCode);
|
||||
|
||||
/** Implements UStringCaseMapper. */
|
||||
U_CFUNC int32_t U_CALLCONV
|
||||
ustrcase_internalToUpper(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
icu::Edits *edits,
|
||||
UErrorCode &errorCode);
|
||||
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
/** Implements UStringCaseMapper. */
|
||||
U_CFUNC int32_t U_CALLCONV
|
||||
ustrcase_internalToTitle(int32_t caseLocale, uint32_t options,
|
||||
icu::BreakIterator *iter,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
icu::Edits *edits,
|
||||
UErrorCode &errorCode);
|
||||
|
||||
#endif
|
||||
|
||||
/** Implements UStringCaseMapper. */
|
||||
U_CFUNC int32_t U_CALLCONV
|
||||
ustrcase_internalFold(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
icu::Edits *edits,
|
||||
UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Common string case mapping implementation for ucasemap_toXyz() and UnicodeString::toXyz().
|
||||
* Implements argument checking.
|
||||
*/
|
||||
U_CFUNC int32_t
|
||||
ustrcase_map(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
UStringCaseMapper *stringCaseMapper,
|
||||
icu::Edits *edits,
|
||||
UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Common string case mapping implementation for old-fashioned u_strToXyz() functions
|
||||
* that allow the source string to overlap the destination buffer.
|
||||
* Implements argument checking and internally works with an intermediate buffer if necessary.
|
||||
*/
|
||||
U_CFUNC int32_t
|
||||
ustrcase_mapWithOverlap(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
UStringCaseMapper *stringCaseMapper,
|
||||
UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* UTF-8 string case mapping function type, used by ucasemap_mapUTF8().
|
||||
* UTF-8 version of UStringCaseMapper.
|
||||
* All error checking must be done.
|
||||
* The UCaseMap must be fully initialized, with locale and/or iter set as needed.
|
||||
* src and dest must not overlap.
|
||||
*/
|
||||
typedef int32_t U_CALLCONV
|
||||
UTF8CaseMapper(int32_t caseLocale, uint32_t options,
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
icu::BreakIterator *iter,
|
||||
#endif
|
||||
uint8_t *dest, int32_t destCapacity,
|
||||
const uint8_t *src, int32_t srcLength,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
/** Implements UTF8CaseMapper. */
|
||||
U_CFUNC int32_t U_CALLCONV
|
||||
ucasemap_internalUTF8ToTitle(int32_t caseLocale, uint32_t options,
|
||||
icu::BreakIterator *iter,
|
||||
uint8_t *dest, int32_t destCapacity,
|
||||
const uint8_t *src, int32_t srcLength,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Implements argument checking and buffer handling
|
||||
* for UTF-8 string case mapping as a common function.
|
||||
*/
|
||||
U_CFUNC int32_t
|
||||
ucasemap_mapUTF8(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
|
||||
uint8_t *dest, int32_t destCapacity,
|
||||
const uint8_t *src, int32_t srcLength,
|
||||
UTF8CaseMapper *stringCaseMapper,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
namespace GreekUpper {
|
||||
|
||||
// Data bits.
|
||||
static const uint32_t UPPER_MASK = 0x3ff;
|
||||
static const uint32_t HAS_VOWEL = 0x1000;
|
||||
static const uint32_t HAS_YPOGEGRAMMENI = 0x2000;
|
||||
static const uint32_t HAS_ACCENT = 0x4000;
|
||||
static const uint32_t HAS_DIALYTIKA = 0x8000;
|
||||
// Further bits during data building and processing, not stored in the data map.
|
||||
static const uint32_t HAS_COMBINING_DIALYTIKA = 0x10000;
|
||||
static const uint32_t HAS_OTHER_GREEK_DIACRITIC = 0x20000;
|
||||
|
||||
static const uint32_t HAS_VOWEL_AND_ACCENT = HAS_VOWEL | HAS_ACCENT;
|
||||
static const uint32_t HAS_VOWEL_AND_ACCENT_AND_DIALYTIKA =
|
||||
HAS_VOWEL_AND_ACCENT | HAS_DIALYTIKA;
|
||||
static const uint32_t HAS_EITHER_DIALYTIKA = HAS_DIALYTIKA | HAS_COMBINING_DIALYTIKA;
|
||||
|
||||
// State bits.
|
||||
static const uint32_t AFTER_CASED = 1;
|
||||
static const uint32_t AFTER_VOWEL_WITH_ACCENT = 2;
|
||||
|
||||
uint32_t getLetterData(UChar32 c);
|
||||
|
||||
/**
|
||||
* Returns a non-zero value for each of the Greek combining diacritics
|
||||
* listed in The Unicode Standard, version 8, chapter 7.2 Greek,
|
||||
* plus some perispomeni look-alikes.
|
||||
*/
|
||||
uint32_t getDiacriticData(UChar32 c);
|
||||
|
||||
} // namespace GreekUpper
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif // __cplusplus
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ustr_hashUCharsN(const UChar *str, int32_t length);
|
||||
|
||||
|
@ -22,12 +22,13 @@
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
#include "unicode/brkiter.h"
|
||||
#include "unicode/casemap.h"
|
||||
#include "unicode/localpointer.h"
|
||||
#include "unicode/ubrk.h"
|
||||
#include "unicode/ucasemap.h"
|
||||
#include "cmemory.h"
|
||||
#include "ucase.h"
|
||||
#include "ustr_imp.h"
|
||||
#include "ucasemap_imp.h"
|
||||
|
||||
U_NAMESPACE_USE
|
||||
|
||||
|
@ -22,6 +22,8 @@
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/brkiter.h"
|
||||
#include "unicode/casemap.h"
|
||||
#include "unicode/edits.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "unicode/ucasemap.h"
|
||||
#include "unicode/ubrk.h"
|
||||
@ -29,6 +31,7 @@
|
||||
#include "unicode/utf16.h"
|
||||
#include "cmemory.h"
|
||||
#include "ucase.h"
|
||||
#include "ucasemap_imp.h"
|
||||
#include "ustr_imp.h"
|
||||
#include "uassert.h"
|
||||
|
||||
@ -36,334 +39,6 @@ U_NAMESPACE_BEGIN
|
||||
|
||||
namespace {
|
||||
|
||||
// 0000uuuuuuuuuuuu records u+1 unchanged text units.
|
||||
const int32_t MAX_UNCHANGED_LENGTH = 0x1000;
|
||||
const int32_t MAX_UNCHANGED = MAX_UNCHANGED_LENGTH - 1;
|
||||
|
||||
// 0wwwcccccccccccc with w=1..6 records ccc+1 replacements of w:w text units.
|
||||
// No length change.
|
||||
const int32_t MAX_SHORT_WIDTH = 6;
|
||||
const int32_t MAX_SHORT_CHANGE_LENGTH = 0xfff;
|
||||
const int32_t MAX_SHORT_CHANGE = 0x6fff;
|
||||
|
||||
// 0111mmmmmmnnnnnn records a replacement of m text units with n.
|
||||
// m or n = 61: actual length follows in the next edits array unit.
|
||||
// m or n = 62..63: actual length follows in the next two edits array units.
|
||||
// Bit 30 of the actual length is in the head unit.
|
||||
// Trailing units have bit 15 set.
|
||||
const int32_t LENGTH_IN_1TRAIL = 61;
|
||||
const int32_t LENGTH_IN_2TRAIL = 62;
|
||||
|
||||
} // namespace
|
||||
|
||||
Edits::~Edits() {
|
||||
if(array != stackArray) {
|
||||
uprv_free(array);
|
||||
}
|
||||
}
|
||||
|
||||
void Edits::reset() {
|
||||
length = 0;
|
||||
}
|
||||
|
||||
void Edits::addUnchanged(int32_t unchangedLength) {
|
||||
if(U_FAILURE(errorCode) || unchangedLength == 0) { return; }
|
||||
if(unchangedLength < 0) {
|
||||
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return;
|
||||
}
|
||||
// Merge into previous unchanged-text record, if any.
|
||||
int32_t last = lastUnit();
|
||||
if(last < MAX_UNCHANGED) {
|
||||
int32_t remaining = MAX_UNCHANGED - last;
|
||||
if (remaining >= unchangedLength) {
|
||||
setLastUnit(last + unchangedLength);
|
||||
return;
|
||||
}
|
||||
setLastUnit(MAX_UNCHANGED);
|
||||
unchangedLength -= remaining;
|
||||
}
|
||||
// Split large lengths into multiple units.
|
||||
while(unchangedLength >= MAX_UNCHANGED_LENGTH) {
|
||||
append(MAX_UNCHANGED);
|
||||
unchangedLength -= MAX_UNCHANGED_LENGTH;
|
||||
}
|
||||
// Write a small (remaining) length.
|
||||
if(unchangedLength > 0) {
|
||||
append(unchangedLength - 1);
|
||||
}
|
||||
}
|
||||
|
||||
void Edits::addReplace(int32_t oldLength, int32_t newLength) {
|
||||
if(U_FAILURE(errorCode)) { return; }
|
||||
if(oldLength == newLength && 0 < oldLength && oldLength <= MAX_SHORT_WIDTH) {
|
||||
// Replacement of short oldLength text units by same-length new text.
|
||||
// Merge into previous short-replacement record, if any.
|
||||
int32_t last = lastUnit();
|
||||
if(MAX_UNCHANGED < last && last < MAX_SHORT_CHANGE &&
|
||||
(last >> 12) == oldLength && (last & 0xfff) < MAX_SHORT_CHANGE_LENGTH) {
|
||||
setLastUnit(last + 1);
|
||||
return;
|
||||
}
|
||||
append(oldLength << 12);
|
||||
return;
|
||||
}
|
||||
|
||||
if(oldLength < 0 || newLength < 0) {
|
||||
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return;
|
||||
}
|
||||
if (oldLength == 0 && newLength == 0) {
|
||||
return;
|
||||
}
|
||||
int32_t newDelta = newLength - oldLength;
|
||||
if (newDelta != 0) {
|
||||
if (newDelta > 0 ? newDelta > (INT32_MAX - delta) : newDelta < (INT32_MIN - delta)) {
|
||||
// Integer overflow or underflow.
|
||||
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return;
|
||||
}
|
||||
delta += newDelta;
|
||||
}
|
||||
|
||||
int32_t head = 0x7000;
|
||||
if (oldLength < LENGTH_IN_1TRAIL && newLength < LENGTH_IN_1TRAIL) {
|
||||
head |= oldLength << 6;
|
||||
head |= newLength;
|
||||
append(head);
|
||||
} else if ((capacity - length) >= 5 || growArray()) {
|
||||
int32_t limit = length + 1;
|
||||
if(oldLength < LENGTH_IN_1TRAIL) {
|
||||
head |= oldLength << 6;
|
||||
} else if(oldLength <= 0x7fff) {
|
||||
head |= LENGTH_IN_1TRAIL << 6;
|
||||
array[limit++] = (uint16_t)(0x8000 | oldLength);
|
||||
} else {
|
||||
head |= (LENGTH_IN_2TRAIL + (oldLength >> 30)) << 6;
|
||||
array[limit++] = (uint16_t)(0x8000 | (oldLength >> 15));
|
||||
array[limit++] = (uint16_t)(0x8000 | oldLength);
|
||||
}
|
||||
if(newLength < LENGTH_IN_1TRAIL) {
|
||||
head |= newLength;
|
||||
} else if(newLength <= 0x7fff) {
|
||||
head |= LENGTH_IN_1TRAIL;
|
||||
array[limit++] = (uint16_t)(0x8000 | newLength);
|
||||
} else {
|
||||
head |= LENGTH_IN_2TRAIL + (newLength >> 30);
|
||||
array[limit++] = (uint16_t)(0x8000 | (newLength >> 15));
|
||||
array[limit++] = (uint16_t)(0x8000 | newLength);
|
||||
}
|
||||
array[length] = (uint16_t)head;
|
||||
length = limit;
|
||||
}
|
||||
}
|
||||
|
||||
void Edits::append(int32_t r) {
|
||||
if(length < capacity || growArray()) {
|
||||
array[length++] = (uint16_t)r;
|
||||
}
|
||||
}
|
||||
|
||||
UBool Edits::growArray() {
|
||||
int32_t newCapacity;
|
||||
if (array == stackArray) {
|
||||
newCapacity = 2000;
|
||||
} else if (capacity == INT32_MAX) {
|
||||
errorCode = U_BUFFER_OVERFLOW_ERROR;
|
||||
return FALSE;
|
||||
} else if (capacity >= (INT32_MAX / 2)) {
|
||||
newCapacity = INT32_MAX;
|
||||
} else {
|
||||
newCapacity = 2 * capacity;
|
||||
}
|
||||
// Grow by at least 5 units so that a maximal change record will fit.
|
||||
if ((newCapacity - capacity) < 5) {
|
||||
errorCode = U_BUFFER_OVERFLOW_ERROR;
|
||||
return FALSE;
|
||||
}
|
||||
uint16_t *newArray = (uint16_t *)uprv_malloc((size_t)newCapacity * 2);
|
||||
if (newArray == NULL) {
|
||||
errorCode = U_MEMORY_ALLOCATION_ERROR;
|
||||
return FALSE;
|
||||
}
|
||||
uprv_memcpy(newArray, array, (size_t)length * 2);
|
||||
if (array != stackArray) {
|
||||
uprv_free(array);
|
||||
}
|
||||
array = newArray;
|
||||
capacity = newCapacity;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
UBool Edits::copyErrorTo(UErrorCode &outErrorCode) {
|
||||
if (U_FAILURE(outErrorCode)) { return TRUE; }
|
||||
if (U_SUCCESS(errorCode)) { return FALSE; }
|
||||
outErrorCode = errorCode;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
UBool Edits::hasChanges() const {
|
||||
if (delta != 0) {
|
||||
return TRUE;
|
||||
}
|
||||
for (int32_t i = 0; i < length; ++i) {
|
||||
if (array[i] > MAX_UNCHANGED) {
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
Edits::Iterator::Iterator(const uint16_t *a, int32_t len, UBool oc, UBool crs) :
|
||||
array(a), index(0), length(len), remaining(0),
|
||||
onlyChanges(oc), coarse(crs),
|
||||
changed(FALSE), oldLength_(0), newLength_(0),
|
||||
srcIndex(0), replIndex(0), destIndex(0) {}
|
||||
|
||||
int32_t Edits::Iterator::readLength(int32_t head) {
|
||||
if (head < LENGTH_IN_1TRAIL) {
|
||||
return head;
|
||||
} else if (head < LENGTH_IN_2TRAIL) {
|
||||
U_ASSERT(index < length);
|
||||
U_ASSERT(array[index] >= 0x8000);
|
||||
return array[index++];
|
||||
} else {
|
||||
U_ASSERT((index + 2) <= length);
|
||||
U_ASSERT(array[index] >= 0x8000);
|
||||
U_ASSERT(array[index + 1] >= 0x8000);
|
||||
int32_t len = ((head & 1) << 30) |
|
||||
((int32_t)(array[index] & 0x7fff) << 15) |
|
||||
(array[index + 1] & 0x7fff);
|
||||
index += 2;
|
||||
return len;
|
||||
}
|
||||
}
|
||||
|
||||
void Edits::Iterator::updateIndexes() {
|
||||
srcIndex += oldLength_;
|
||||
if (changed) {
|
||||
replIndex += newLength_;
|
||||
}
|
||||
destIndex += newLength_;
|
||||
}
|
||||
|
||||
UBool Edits::Iterator::noNext() {
|
||||
// Empty span beyond the string.
|
||||
oldLength_ = newLength_ = 0;
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
UBool Edits::Iterator::next(UErrorCode &errorCode) {
|
||||
if (U_FAILURE(errorCode)) { return FALSE; }
|
||||
// We have an errorCode in case we need to start guarding against integer overflows.
|
||||
// It is also convenient for caller loops if we bail out when an error was set elsewhere.
|
||||
updateIndexes();
|
||||
if (remaining > 0) {
|
||||
// Fine-grained iterator: Continue a sequence of equal-length changes.
|
||||
--remaining;
|
||||
return TRUE;
|
||||
}
|
||||
if (index >= length) {
|
||||
return noNext();
|
||||
}
|
||||
int32_t u = array[index++];
|
||||
if (u <= MAX_UNCHANGED) {
|
||||
// Combine adjacent unchanged ranges.
|
||||
changed = FALSE;
|
||||
oldLength_ = u + 1;
|
||||
while (index < length && (u = array[index]) <= MAX_UNCHANGED) {
|
||||
++index;
|
||||
oldLength_ += u + 1;
|
||||
}
|
||||
newLength_ = oldLength_;
|
||||
if (onlyChanges) {
|
||||
updateIndexes();
|
||||
if (index >= length) {
|
||||
return noNext();
|
||||
}
|
||||
// already fetched u > MAX_UNCHANGED at index
|
||||
++index;
|
||||
} else {
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
changed = TRUE;
|
||||
if (u <= MAX_SHORT_CHANGE) {
|
||||
if (coarse) {
|
||||
int32_t w = u >> 12;
|
||||
int32_t len = (u & 0xfff) + 1;
|
||||
oldLength_ = newLength_ = len * w;
|
||||
} else {
|
||||
// Split a sequence of equal-length changes that was compressed into one unit.
|
||||
oldLength_ = newLength_ = u >> 12;
|
||||
remaining = u & 0xfff;
|
||||
return TRUE;
|
||||
}
|
||||
} else {
|
||||
U_ASSERT(u <= 0x7fff);
|
||||
oldLength_ = readLength((u >> 6) & 0x3f);
|
||||
newLength_ = readLength(u & 0x3f);
|
||||
if (!coarse) {
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
// Combine adjacent changes.
|
||||
while (index < length && (u = array[index]) > MAX_UNCHANGED) {
|
||||
++index;
|
||||
if (u <= MAX_SHORT_CHANGE) {
|
||||
int32_t w = u >> 12;
|
||||
int32_t len = (u & 0xfff) + 1;
|
||||
len = len * w;
|
||||
oldLength_ += len;
|
||||
newLength_ += len;
|
||||
} else {
|
||||
U_ASSERT(u <= 0x7fff);
|
||||
int32_t oldLen = readLength((u >> 6) & 0x3f);
|
||||
int32_t newLen = readLength(u & 0x3f);
|
||||
oldLength_ += oldLen;
|
||||
newLength_ += newLen;
|
||||
}
|
||||
}
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
UBool Edits::Iterator::findSourceIndex(int32_t i, UErrorCode &errorCode) {
|
||||
if (U_FAILURE(errorCode) || i < 0) { return FALSE; }
|
||||
if (i < srcIndex) {
|
||||
// Reset the iterator to the start.
|
||||
index = remaining = srcIndex = replIndex = destIndex = 0;
|
||||
} else if (i < (srcIndex + oldLength_)) {
|
||||
// The index is in the current span.
|
||||
return TRUE;
|
||||
}
|
||||
while (next(errorCode)) {
|
||||
if (i < (srcIndex + oldLength_)) {
|
||||
// The index is in the current span.
|
||||
return TRUE;
|
||||
}
|
||||
if (remaining > 0) {
|
||||
// Is the index in one of the remaining compressed edits?
|
||||
// srcIndex is the start of the current span, before the remaining ones.
|
||||
int32_t len = (remaining + 1) * oldLength_;
|
||||
if (i < (srcIndex + len)) {
|
||||
int32_t n = (i - srcIndex) / oldLength_; // 1 <= n <= remaining
|
||||
len = n * oldLength_;
|
||||
srcIndex += len;
|
||||
replIndex += len;
|
||||
destIndex += len;
|
||||
remaining -= n;
|
||||
return TRUE;
|
||||
}
|
||||
// Make next() skip all of these edits at once.
|
||||
oldLength_ = newLength_ = len;
|
||||
remaining = 0;
|
||||
}
|
||||
}
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
int32_t checkOverflowAndEditsError(int32_t destIndex, int32_t destCapacity,
|
||||
Edits *edits, UErrorCode &errorCode) {
|
||||
if (U_SUCCESS(errorCode)) {
|
||||
|
@ -20,11 +20,12 @@
|
||||
#include "unicode/utypes.h"
|
||||
#include "uassert.h"
|
||||
#include "unicode/brkiter.h"
|
||||
#include "unicode/casemap.h"
|
||||
#include "unicode/ucasemap.h"
|
||||
#include "unicode/uloc.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "ucase.h"
|
||||
#include "ustr_imp.h"
|
||||
#include "ucasemap_imp.h"
|
||||
|
||||
U_CFUNC int32_t
|
||||
ustrcase_getCaseLocale(const char *locale) {
|
||||
|
@ -19,6 +19,7 @@
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/putil.h"
|
||||
#include "unicode/uchar.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "cstring.h"
|
||||
|
@ -26,6 +26,7 @@
|
||||
#include "unicode/decimfmt.h"
|
||||
#include "uresimp.h"
|
||||
#include "unicode/ures.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "ureslocs.h"
|
||||
#include "cstring.h"
|
||||
#include "mutex.h"
|
||||
|
@ -15,6 +15,7 @@
|
||||
#if !UCONFIG_NO_FORMATTING && !UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
#include "unicode/dtfmtsym.h"
|
||||
#include "unicode/ucasemap.h"
|
||||
#include "unicode/ureldatefmt.h"
|
||||
#include "unicode/udisplaycontext.h"
|
||||
#include "unicode/unum.h"
|
||||
|
@ -48,6 +48,7 @@
|
||||
#include "unicode/simpletz.h"
|
||||
#include "unicode/rbtz.h"
|
||||
#include "unicode/tzfmt.h"
|
||||
#include "unicode/ucasemap.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "unicode/vtzone.h"
|
||||
#include "unicode/udisplaycontext.h"
|
||||
@ -64,6 +65,7 @@
|
||||
#include <float.h>
|
||||
#include "smpdtfst.h"
|
||||
#include "sharednumberformat.h"
|
||||
#include "ucasemap_imp.h"
|
||||
#include "ustr_imp.h"
|
||||
#include "charstr.h"
|
||||
#include "uvector.h"
|
||||
|
@ -27,6 +27,7 @@
|
||||
#include "unicode/ucasemap.h"
|
||||
#include "cmemory.h"
|
||||
#include "cintltst.h"
|
||||
#include "ucasemap_imp.h"
|
||||
#include "ustr_imp.h"
|
||||
|
||||
/* test string case mapping functions --------------------------------------- */
|
||||
|
Loading…
Reference in New Issue
Block a user