ICU-12410 class Edits, class CaseMap with new low-level functions that work with Edits, simpler case properties code, some cleanup

X-SVN-Rev: 39684
This commit is contained in:
Markus Scherer 2017-02-17 21:03:35 +00:00
commit 4c2fad3e36
76 changed files with 3885 additions and 1432 deletions

View File

@ -94,6 +94,7 @@ stringtriebuilder.o bytestriebuilder.o \
bytestrie.o bytestrieiterator.o \ bytestrie.o bytestrieiterator.o \
ucharstrie.o ucharstriebuilder.o ucharstrieiterator.o \ ucharstrie.o ucharstriebuilder.o ucharstrieiterator.o \
dictionarydata.o \ dictionarydata.o \
edits.o \
appendable.o ustr_cnv.o unistr_cnv.o unistr.o unistr_case.o unistr_props.o \ appendable.o ustr_cnv.o unistr_cnv.o unistr.o unistr_case.o unistr_props.o \
utf_impl.o ustring.o ustrcase.o ucasemap.o ucasemap_titlecase_brkiter.o cstring.o ustrfmt.o ustrtrns.o ustr_wcs.o utext.o \ utf_impl.o ustring.o ustrcase.o ucasemap.o ucasemap_titlecase_brkiter.o cstring.o ustrfmt.o ustrtrns.o ustr_wcs.o utext.o \
unistr_case_locale.o ustrcase_locale.o unistr_titlecase_brkiter.o ustr_titlecase_brkiter.o \ unistr_case_locale.o ustrcase_locale.o unistr_titlecase_brkiter.o ustr_titlecase_brkiter.o \

View File

@ -15,6 +15,7 @@
*/ */
#include "unicode/utypes.h" #include "unicode/utypes.h"
#include "unicode/putil.h"
#include "charstr.h" #include "charstr.h"
#include "cmemory.h" #include "cmemory.h"
#include "cstring.h" #include "cstring.h"

View File

@ -453,6 +453,7 @@
<ClCompile Include="cstring.cpp" /> <ClCompile Include="cstring.cpp" />
<ClCompile Include="cstr.cpp" /> <ClCompile Include="cstr.cpp" />
<ClCompile Include="cwchar.cpp" /> <ClCompile Include="cwchar.cpp" />
<ClCompile Include="edits.cpp" />
<ClCompile Include="messagepattern.cpp" /> <ClCompile Include="messagepattern.cpp" />
<ClCompile Include="schriter.cpp" /> <ClCompile Include="schriter.cpp" />
<ClCompile Include="stringpiece.cpp" /> <ClCompile Include="stringpiece.cpp" />
@ -1515,6 +1516,20 @@
</Command> </Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">copy "%(FullPath)" ..\..\include\unicode <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">copy "%(FullPath)" ..\..\include\unicode
</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\casemap.h">
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">copy "%(FullPath)" ..\..\include\unicode
</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">copy "%(FullPath)" ..\..\include\unicode
</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">copy "%(FullPath)" ..\..\include\unicode
</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">copy "%(FullPath)" ..\..\include\unicode
</Command> </Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
@ -1536,6 +1551,20 @@
<ClInclude Include="cstring.h" /> <ClInclude Include="cstring.h" />
<ClInclude Include="cstr.h" /> <ClInclude Include="cstr.h" />
<ClInclude Include="cwchar.h" /> <ClInclude Include="cwchar.h" />
<CustomBuild Include="unicode\edits.h">
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">copy "%(FullPath)" ..\..\include\unicode
</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">copy "%(FullPath)" ..\..\include\unicode
</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">copy "%(FullPath)" ..\..\include\unicode
</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">copy "%(FullPath)" ..\..\include\unicode
</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\messagepattern.h"> <CustomBuild Include="unicode\messagepattern.h">
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">copy "%(FullPath)" ..\..\include\unicode <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">copy "%(FullPath)" ..\..\include\unicode
</Command> </Command>
@ -1620,6 +1649,7 @@
</Command> </Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
<ClInclude Include="ucasemap_imp.h" />
<CustomBuild Include="unicode\ucharstrie.h"> <CustomBuild Include="unicode\ucharstrie.h">
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">copy "%(FullPath)" ..\..\include\unicode <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">copy "%(FullPath)" ..\..\include\unicode
</Command> </Command>

View File

@ -478,6 +478,9 @@
<ClCompile Include="cwchar.cpp"> <ClCompile Include="cwchar.cpp">
<Filter>strings</Filter> <Filter>strings</Filter>
</ClCompile> </ClCompile>
<ClCompile Include="edits.cpp">
<Filter>strings</Filter>
</ClCompile>
<ClCompile Include="schriter.cpp"> <ClCompile Include="schriter.cpp">
<Filter>strings</Filter> <Filter>strings</Filter>
</ClCompile> </ClCompile>
@ -870,6 +873,9 @@
<ClInclude Include="cwchar.h"> <ClInclude Include="cwchar.h">
<Filter>strings</Filter> <Filter>strings</Filter>
</ClInclude> </ClInclude>
<ClInclude Include="ucasemap_imp.h">
<Filter>strings</Filter>
</ClInclude>
<ClInclude Include="uinvchar.h"> <ClInclude Include="uinvchar.h">
<Filter>strings</Filter> <Filter>strings</Filter>
</ClInclude> </ClInclude>
@ -1096,9 +1102,15 @@
<CustomBuild Include="unicode\bytestream.h"> <CustomBuild Include="unicode\bytestream.h">
<Filter>strings</Filter> <Filter>strings</Filter>
</CustomBuild> </CustomBuild>
<CustomBuild Include="unicode\casemap.h">
<Filter>strings</Filter>
</CustomBuild>
<CustomBuild Include="unicode\chariter.h"> <CustomBuild Include="unicode\chariter.h">
<Filter>strings</Filter> <Filter>strings</Filter>
</CustomBuild> </CustomBuild>
<CustomBuild Include="unicode\edits.h">
<Filter>strings</Filter>
</CustomBuild>
<CustomBuild Include="unicode\rep.h"> <CustomBuild Include="unicode\rep.h">
<Filter>strings</Filter> <Filter>strings</Filter>
</CustomBuild> </CustomBuild>

View File

@ -0,0 +1,346 @@
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
// edits.cpp
// created: 2017feb08 Markus W. Scherer
#include "unicode/utypes.h"
#include "unicode/edits.h"
#include "cmemory.h"
#include "uassert.h"
U_NAMESPACE_BEGIN
namespace {
// 0000uuuuuuuuuuuu records u+1 unchanged text units.
const int32_t MAX_UNCHANGED_LENGTH = 0x1000;
const int32_t MAX_UNCHANGED = MAX_UNCHANGED_LENGTH - 1;
// 0wwwcccccccccccc with w=1..6 records ccc+1 replacements of w:w text units.
// No length change.
const int32_t MAX_SHORT_WIDTH = 6;
const int32_t MAX_SHORT_CHANGE_LENGTH = 0xfff;
const int32_t MAX_SHORT_CHANGE = 0x6fff;
// 0111mmmmmmnnnnnn records a replacement of m text units with n.
// m or n = 61: actual length follows in the next edits array unit.
// m or n = 62..63: actual length follows in the next two edits array units.
// Bit 30 of the actual length is in the head unit.
// Trailing units have bit 15 set.
const int32_t LENGTH_IN_1TRAIL = 61;
const int32_t LENGTH_IN_2TRAIL = 62;
} // namespace
Edits::~Edits() {
if(array != stackArray) {
uprv_free(array);
}
}
void Edits::reset() {
length = delta = 0;
}
void Edits::addUnchanged(int32_t unchangedLength) {
if(U_FAILURE(errorCode) || unchangedLength == 0) { return; }
if(unchangedLength < 0) {
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
// Merge into previous unchanged-text record, if any.
int32_t last = lastUnit();
if(last < MAX_UNCHANGED) {
int32_t remaining = MAX_UNCHANGED - last;
if (remaining >= unchangedLength) {
setLastUnit(last + unchangedLength);
return;
}
setLastUnit(MAX_UNCHANGED);
unchangedLength -= remaining;
}
// Split large lengths into multiple units.
while(unchangedLength >= MAX_UNCHANGED_LENGTH) {
append(MAX_UNCHANGED);
unchangedLength -= MAX_UNCHANGED_LENGTH;
}
// Write a small (remaining) length.
if(unchangedLength > 0) {
append(unchangedLength - 1);
}
}
void Edits::addReplace(int32_t oldLength, int32_t newLength) {
if(U_FAILURE(errorCode)) { return; }
if(oldLength == newLength && 0 < oldLength && oldLength <= MAX_SHORT_WIDTH) {
// Replacement of short oldLength text units by same-length new text.
// Merge into previous short-replacement record, if any.
int32_t last = lastUnit();
if(MAX_UNCHANGED < last && last < MAX_SHORT_CHANGE &&
(last >> 12) == oldLength && (last & 0xfff) < MAX_SHORT_CHANGE_LENGTH) {
setLastUnit(last + 1);
return;
}
append(oldLength << 12);
return;
}
if(oldLength < 0 || newLength < 0) {
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
if (oldLength == 0 && newLength == 0) {
return;
}
int32_t newDelta = newLength - oldLength;
if (newDelta != 0) {
if ((newDelta > 0 && delta >= 0 && newDelta > (INT32_MAX - delta)) ||
(newDelta < 0 && delta < 0 && newDelta < (INT32_MIN - delta))) {
// Integer overflow or underflow.
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
return;
}
delta += newDelta;
}
int32_t head = 0x7000;
if (oldLength < LENGTH_IN_1TRAIL && newLength < LENGTH_IN_1TRAIL) {
head |= oldLength << 6;
head |= newLength;
append(head);
} else if ((capacity - length) >= 5 || growArray()) {
int32_t limit = length + 1;
if(oldLength < LENGTH_IN_1TRAIL) {
head |= oldLength << 6;
} else if(oldLength <= 0x7fff) {
head |= LENGTH_IN_1TRAIL << 6;
array[limit++] = (uint16_t)(0x8000 | oldLength);
} else {
head |= (LENGTH_IN_2TRAIL + (oldLength >> 30)) << 6;
array[limit++] = (uint16_t)(0x8000 | (oldLength >> 15));
array[limit++] = (uint16_t)(0x8000 | oldLength);
}
if(newLength < LENGTH_IN_1TRAIL) {
head |= newLength;
} else if(newLength <= 0x7fff) {
head |= LENGTH_IN_1TRAIL;
array[limit++] = (uint16_t)(0x8000 | newLength);
} else {
head |= LENGTH_IN_2TRAIL + (newLength >> 30);
array[limit++] = (uint16_t)(0x8000 | (newLength >> 15));
array[limit++] = (uint16_t)(0x8000 | newLength);
}
array[length] = (uint16_t)head;
length = limit;
}
}
void Edits::append(int32_t r) {
if(length < capacity || growArray()) {
array[length++] = (uint16_t)r;
}
}
UBool Edits::growArray() {
int32_t newCapacity;
if (array == stackArray) {
newCapacity = 2000;
} else if (capacity == INT32_MAX) {
// Not U_BUFFER_OVERFLOW_ERROR because that could be confused on a string transform API
// with a result-string-buffer overflow.
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
return FALSE;
} else if (capacity >= (INT32_MAX / 2)) {
newCapacity = INT32_MAX;
} else {
newCapacity = 2 * capacity;
}
// Grow by at least 5 units so that a maximal change record will fit.
if ((newCapacity - capacity) < 5) {
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
return FALSE;
}
uint16_t *newArray = (uint16_t *)uprv_malloc((size_t)newCapacity * 2);
if (newArray == NULL) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
return FALSE;
}
uprv_memcpy(newArray, array, (size_t)length * 2);
if (array != stackArray) {
uprv_free(array);
}
array = newArray;
capacity = newCapacity;
return TRUE;
}
UBool Edits::copyErrorTo(UErrorCode &outErrorCode) {
if (U_FAILURE(outErrorCode)) { return TRUE; }
if (U_SUCCESS(errorCode)) { return FALSE; }
outErrorCode = errorCode;
return TRUE;
}
UBool Edits::hasChanges() const {
if (delta != 0) {
return TRUE;
}
for (int32_t i = 0; i < length; ++i) {
if (array[i] > MAX_UNCHANGED) {
return TRUE;
}
}
return FALSE;
}
Edits::Iterator::Iterator(const uint16_t *a, int32_t len, UBool oc, UBool crs) :
array(a), index(0), length(len), remaining(0),
onlyChanges_(oc), coarse(crs),
changed(FALSE), oldLength_(0), newLength_(0),
srcIndex(0), replIndex(0), destIndex(0) {}
int32_t Edits::Iterator::readLength(int32_t head) {
if (head < LENGTH_IN_1TRAIL) {
return head;
} else if (head < LENGTH_IN_2TRAIL) {
U_ASSERT(index < length);
U_ASSERT(array[index] >= 0x8000);
return array[index++] & 0x7fff;
} else {
U_ASSERT((index + 2) <= length);
U_ASSERT(array[index] >= 0x8000);
U_ASSERT(array[index + 1] >= 0x8000);
int32_t len = ((head & 1) << 30) |
((int32_t)(array[index] & 0x7fff) << 15) |
(array[index + 1] & 0x7fff);
index += 2;
return len;
}
}
void Edits::Iterator::updateIndexes() {
srcIndex += oldLength_;
if (changed) {
replIndex += newLength_;
}
destIndex += newLength_;
}
UBool Edits::Iterator::noNext() {
// No change beyond the string.
changed = FALSE;
oldLength_ = newLength_ = 0;
return FALSE;
}
UBool Edits::Iterator::next(UBool onlyChanges, UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) { return FALSE; }
// We have an errorCode in case we need to start guarding against integer overflows.
// It is also convenient for caller loops if we bail out when an error was set elsewhere.
updateIndexes();
if (remaining > 0) {
// Fine-grained iterator: Continue a sequence of equal-length changes.
--remaining;
return TRUE;
}
if (index >= length) {
return noNext();
}
int32_t u = array[index++];
if (u <= MAX_UNCHANGED) {
// Combine adjacent unchanged ranges.
changed = FALSE;
oldLength_ = u + 1;
while (index < length && (u = array[index]) <= MAX_UNCHANGED) {
++index;
oldLength_ += u + 1;
}
newLength_ = oldLength_;
if (onlyChanges) {
updateIndexes();
if (index >= length) {
return noNext();
}
// already fetched u > MAX_UNCHANGED at index
++index;
} else {
return TRUE;
}
}
changed = TRUE;
if (u <= MAX_SHORT_CHANGE) {
if (coarse) {
int32_t w = u >> 12;
int32_t len = (u & 0xfff) + 1;
oldLength_ = newLength_ = len * w;
} else {
// Split a sequence of equal-length changes that was compressed into one unit.
oldLength_ = newLength_ = u >> 12;
remaining = u & 0xfff;
return TRUE;
}
} else {
U_ASSERT(u <= 0x7fff);
oldLength_ = readLength((u >> 6) & 0x3f);
newLength_ = readLength(u & 0x3f);
if (!coarse) {
return TRUE;
}
}
// Combine adjacent changes.
while (index < length && (u = array[index]) > MAX_UNCHANGED) {
++index;
if (u <= MAX_SHORT_CHANGE) {
int32_t w = u >> 12;
int32_t len = (u & 0xfff) + 1;
len = len * w;
oldLength_ += len;
newLength_ += len;
} else {
U_ASSERT(u <= 0x7fff);
int32_t oldLen = readLength((u >> 6) & 0x3f);
int32_t newLen = readLength(u & 0x3f);
oldLength_ += oldLen;
newLength_ += newLen;
}
}
return TRUE;
}
UBool Edits::Iterator::findSourceIndex(int32_t i, UErrorCode &errorCode) {
if (U_FAILURE(errorCode) || i < 0) { return FALSE; }
if (i < srcIndex) {
// Reset the iterator to the start.
index = remaining = oldLength_ = newLength_ = srcIndex = replIndex = destIndex = 0;
} else if (i < (srcIndex + oldLength_)) {
// The index is in the current span.
return TRUE;
}
while (next(FALSE, errorCode)) {
if (i < (srcIndex + oldLength_)) {
// The index is in the current span.
return TRUE;
}
if (remaining > 0) {
// Is the index in one of the remaining compressed edits?
// srcIndex is the start of the current span, before the remaining ones.
int32_t len = (remaining + 1) * oldLength_;
if (i < (srcIndex + len)) {
int32_t n = (i - srcIndex) / oldLength_; // 1 <= n <= remaining
len = n * oldLength_;
srcIndex += len;
replIndex += len;
destIndex += len;
remaining -= n;
return TRUE;
}
// Make next() skip all of these edits at once.
oldLength_ = newLength_ = len;
remaining = 0;
}
}
return FALSE;
}
U_NAMESPACE_END

View File

@ -13,6 +13,7 @@
#include "unicode/locdspnm.h" #include "unicode/locdspnm.h"
#include "unicode/simpleformatter.h" #include "unicode/simpleformatter.h"
#include "unicode/ucasemap.h"
#include "unicode/ures.h" #include "unicode/ures.h"
#include "unicode/udisplaycontext.h" #include "unicode/udisplaycontext.h"
#include "unicode/brkiter.h" #include "unicode/brkiter.h"

View File

@ -33,6 +33,7 @@
#include "unicode/locid.h" #include "unicode/locid.h"
#include "unicode/strenum.h"
#include "unicode/uloc.h" #include "unicode/uloc.h"
#include "putilimp.h" #include "putilimp.h"
#include "mutex.h" #include "mutex.h"

View File

@ -22,6 +22,7 @@
#include "unicode/utypes.h" #include "unicode/utypes.h"
#include "unicode/locid.h" #include "unicode/locid.h"
#include "unicode/putil.h" #include "unicode/putil.h"
#include "unicode/uchar.h"
#include "unicode/uloc.h" #include "unicode/uloc.h"
#include "unicode/ures.h" #include "unicode/ures.h"
#include "unicode/uscript.h" #include "unicode/uscript.h"

View File

@ -46,13 +46,6 @@ struct UCaseProps {
#define INCLUDED_FROM_UCASE_CPP #define INCLUDED_FROM_UCASE_CPP
#include "ucase_props_data.h" #include "ucase_props_data.h"
/* UCaseProps singleton ----------------------------------------------------- */
U_CAPI const UCaseProps * U_EXPORT2
ucase_getSingleton() {
return &ucase_props_singleton;
}
/* set of property starts for UnicodeSet ------------------------------------ */ /* set of property starts for UnicodeSet ------------------------------------ */
static UBool U_CALLCONV static UBool U_CALLCONV
@ -64,13 +57,13 @@ _enumPropertyStartsRange(const void *context, UChar32 start, UChar32 /*end*/, ui
} }
U_CFUNC void U_EXPORT2 U_CFUNC void U_EXPORT2
ucase_addPropertyStarts(const UCaseProps *csp, const USetAdder *sa, UErrorCode *pErrorCode) { ucase_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) {
if(U_FAILURE(*pErrorCode)) { if(U_FAILURE(*pErrorCode)) {
return; return;
} }
/* add the start code point of each same-value range of the trie */ /* add the start code point of each same-value range of the trie */
utrie2_enum(&csp->trie, NULL, _enumPropertyStartsRange, sa); utrie2_enum(&ucase_props_singleton.trie, NULL, _enumPropertyStartsRange, sa);
/* add code points with hardcoded properties, plus the ones following them */ /* add code points with hardcoded properties, plus the ones following them */
@ -133,14 +126,14 @@ static const uint8_t flagsOffset[256]={
/* simple case mappings ----------------------------------------------------- */ /* simple case mappings ----------------------------------------------------- */
U_CAPI UChar32 U_EXPORT2 U_CAPI UChar32 U_EXPORT2
ucase_tolower(const UCaseProps *csp, UChar32 c) { ucase_tolower(UChar32 c) {
uint16_t props=UTRIE2_GET16(&csp->trie, c); uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
if(!PROPS_HAS_EXCEPTION(props)) { if(!PROPS_HAS_EXCEPTION(props)) {
if(UCASE_GET_TYPE(props)>=UCASE_UPPER) { if(UCASE_GET_TYPE(props)>=UCASE_UPPER) {
c+=UCASE_GET_DELTA(props); c+=UCASE_GET_DELTA(props);
} }
} else { } else {
const uint16_t *pe=GET_EXCEPTIONS(csp, props); const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
uint16_t excWord=*pe++; uint16_t excWord=*pe++;
if(HAS_SLOT(excWord, UCASE_EXC_LOWER)) { if(HAS_SLOT(excWord, UCASE_EXC_LOWER)) {
GET_SLOT_VALUE(excWord, UCASE_EXC_LOWER, pe, c); GET_SLOT_VALUE(excWord, UCASE_EXC_LOWER, pe, c);
@ -150,14 +143,14 @@ ucase_tolower(const UCaseProps *csp, UChar32 c) {
} }
U_CAPI UChar32 U_EXPORT2 U_CAPI UChar32 U_EXPORT2
ucase_toupper(const UCaseProps *csp, UChar32 c) { ucase_toupper(UChar32 c) {
uint16_t props=UTRIE2_GET16(&csp->trie, c); uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
if(!PROPS_HAS_EXCEPTION(props)) { if(!PROPS_HAS_EXCEPTION(props)) {
if(UCASE_GET_TYPE(props)==UCASE_LOWER) { if(UCASE_GET_TYPE(props)==UCASE_LOWER) {
c+=UCASE_GET_DELTA(props); c+=UCASE_GET_DELTA(props);
} }
} else { } else {
const uint16_t *pe=GET_EXCEPTIONS(csp, props); const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
uint16_t excWord=*pe++; uint16_t excWord=*pe++;
if(HAS_SLOT(excWord, UCASE_EXC_UPPER)) { if(HAS_SLOT(excWord, UCASE_EXC_UPPER)) {
GET_SLOT_VALUE(excWord, UCASE_EXC_UPPER, pe, c); GET_SLOT_VALUE(excWord, UCASE_EXC_UPPER, pe, c);
@ -167,14 +160,14 @@ ucase_toupper(const UCaseProps *csp, UChar32 c) {
} }
U_CAPI UChar32 U_EXPORT2 U_CAPI UChar32 U_EXPORT2
ucase_totitle(const UCaseProps *csp, UChar32 c) { ucase_totitle(UChar32 c) {
uint16_t props=UTRIE2_GET16(&csp->trie, c); uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
if(!PROPS_HAS_EXCEPTION(props)) { if(!PROPS_HAS_EXCEPTION(props)) {
if(UCASE_GET_TYPE(props)==UCASE_LOWER) { if(UCASE_GET_TYPE(props)==UCASE_LOWER) {
c+=UCASE_GET_DELTA(props); c+=UCASE_GET_DELTA(props);
} }
} else { } else {
const uint16_t *pe=GET_EXCEPTIONS(csp, props); const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
uint16_t excWord=*pe++; uint16_t excWord=*pe++;
int32_t idx; int32_t idx;
if(HAS_SLOT(excWord, UCASE_EXC_TITLE)) { if(HAS_SLOT(excWord, UCASE_EXC_TITLE)) {
@ -198,7 +191,7 @@ static const UChar iDotTilde[3] = { 0x69, 0x307, 0x303 };
U_CFUNC void U_EXPORT2 U_CFUNC void U_EXPORT2
ucase_addCaseClosure(const UCaseProps *csp, UChar32 c, const USetAdder *sa) { ucase_addCaseClosure(UChar32 c, const USetAdder *sa) {
uint16_t props; uint16_t props;
/* /*
@ -229,7 +222,7 @@ ucase_addCaseClosure(const UCaseProps *csp, UChar32 c, const USetAdder *sa) {
break; break;
} }
props=UTRIE2_GET16(&csp->trie, c); props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
if(!PROPS_HAS_EXCEPTION(props)) { if(!PROPS_HAS_EXCEPTION(props)) {
if(UCASE_GET_TYPE(props)!=UCASE_NONE) { if(UCASE_GET_TYPE(props)!=UCASE_NONE) {
/* add the one simple case mapping, no matter what type it is */ /* add the one simple case mapping, no matter what type it is */
@ -243,7 +236,7 @@ ucase_addCaseClosure(const UCaseProps *csp, UChar32 c, const USetAdder *sa) {
* c has exceptions, so there may be multiple simple and/or * c has exceptions, so there may be multiple simple and/or
* full case mappings. Add them all. * full case mappings. Add them all.
*/ */
const uint16_t *pe0, *pe=GET_EXCEPTIONS(csp, props); const uint16_t *pe0, *pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
const UChar *closure; const UChar *closure;
uint16_t excWord=*pe++; uint16_t excWord=*pe++;
int32_t idx, closureLength, fullLength, length; int32_t idx, closureLength, fullLength, length;
@ -338,10 +331,10 @@ strcmpMax(const UChar *s, int32_t length, const UChar *t, int32_t max) {
} }
U_CFUNC UBool U_EXPORT2 U_CFUNC UBool U_EXPORT2
ucase_addStringCaseClosure(const UCaseProps *csp, const UChar *s, int32_t length, const USetAdder *sa) { ucase_addStringCaseClosure(const UChar *s, int32_t length, const USetAdder *sa) {
int32_t i, start, limit, result, unfoldRows, unfoldRowWidth, unfoldStringWidth; int32_t i, start, limit, result, unfoldRows, unfoldRowWidth, unfoldStringWidth;
if(csp->unfold==NULL || s==NULL) { if(ucase_props_singleton.unfold==NULL || s==NULL) {
return FALSE; /* no reverse case folding data, or no string */ return FALSE; /* no reverse case folding data, or no string */
} }
if(length<=1) { if(length<=1) {
@ -355,7 +348,7 @@ ucase_addStringCaseClosure(const UCaseProps *csp, const UChar *s, int32_t length
return FALSE; return FALSE;
} }
const uint16_t *unfold=csp->unfold; const uint16_t *unfold=ucase_props_singleton.unfold;
unfoldRows=unfold[UCASE_UNFOLD_ROWS]; unfoldRows=unfold[UCASE_UNFOLD_ROWS];
unfoldRowWidth=unfold[UCASE_UNFOLD_ROW_WIDTH]; unfoldRowWidth=unfold[UCASE_UNFOLD_ROW_WIDTH];
unfoldStringWidth=unfold[UCASE_UNFOLD_STRING_WIDTH]; unfoldStringWidth=unfold[UCASE_UNFOLD_STRING_WIDTH];
@ -381,7 +374,7 @@ ucase_addStringCaseClosure(const UCaseProps *csp, const UChar *s, int32_t length
for(i=unfoldStringWidth; i<unfoldRowWidth && p[i]!=0;) { for(i=unfoldStringWidth; i<unfoldRowWidth && p[i]!=0;) {
U16_NEXT_UNSAFE(p, i, c); U16_NEXT_UNSAFE(p, i, c);
sa->add(sa->set, c); sa->add(sa->set, c);
ucase_addCaseClosure(csp, c, sa); ucase_addCaseClosure(c, sa);
} }
return TRUE; return TRUE;
} else if(result<0) { } else if(result<0) {
@ -430,38 +423,38 @@ U_NAMESPACE_END
/** @return UCASE_NONE, UCASE_LOWER, UCASE_UPPER, UCASE_TITLE */ /** @return UCASE_NONE, UCASE_LOWER, UCASE_UPPER, UCASE_TITLE */
U_CAPI int32_t U_EXPORT2 U_CAPI int32_t U_EXPORT2
ucase_getType(const UCaseProps *csp, UChar32 c) { ucase_getType(UChar32 c) {
uint16_t props=UTRIE2_GET16(&csp->trie, c); uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
return UCASE_GET_TYPE(props); return UCASE_GET_TYPE(props);
} }
/** @return same as ucase_getType() and set bit 2 if c is case-ignorable */ /** @return same as ucase_getType() and set bit 2 if c is case-ignorable */
U_CAPI int32_t U_EXPORT2 U_CAPI int32_t U_EXPORT2
ucase_getTypeOrIgnorable(const UCaseProps *csp, UChar32 c) { ucase_getTypeOrIgnorable(UChar32 c) {
uint16_t props=UTRIE2_GET16(&csp->trie, c); uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
return UCASE_GET_TYPE_AND_IGNORABLE(props); return UCASE_GET_TYPE_AND_IGNORABLE(props);
} }
/** @return UCASE_NO_DOT, UCASE_SOFT_DOTTED, UCASE_ABOVE, UCASE_OTHER_ACCENT */ /** @return UCASE_NO_DOT, UCASE_SOFT_DOTTED, UCASE_ABOVE, UCASE_OTHER_ACCENT */
static inline int32_t static inline int32_t
getDotType(const UCaseProps *csp, UChar32 c) { getDotType(UChar32 c) {
uint16_t props=UTRIE2_GET16(&csp->trie, c); uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
if(!PROPS_HAS_EXCEPTION(props)) { if(!PROPS_HAS_EXCEPTION(props)) {
return props&UCASE_DOT_MASK; return props&UCASE_DOT_MASK;
} else { } else {
const uint16_t *pe=GET_EXCEPTIONS(csp, props); const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
return (*pe>>UCASE_EXC_DOT_SHIFT)&UCASE_DOT_MASK; return (*pe>>UCASE_EXC_DOT_SHIFT)&UCASE_DOT_MASK;
} }
} }
U_CAPI UBool U_EXPORT2 U_CAPI UBool U_EXPORT2
ucase_isSoftDotted(const UCaseProps *csp, UChar32 c) { ucase_isSoftDotted(UChar32 c) {
return (UBool)(getDotType(csp, c)==UCASE_SOFT_DOTTED); return (UBool)(getDotType(c)==UCASE_SOFT_DOTTED);
} }
U_CAPI UBool U_EXPORT2 U_CAPI UBool U_EXPORT2
ucase_isCaseSensitive(const UCaseProps *csp, UChar32 c) { ucase_isCaseSensitive(UChar32 c) {
uint16_t props=UTRIE2_GET16(&csp->trie, c); uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
return (UBool)((props&UCASE_SENSITIVE)!=0); return (UBool)((props&UCASE_SENSITIVE)!=0);
} }
@ -545,12 +538,10 @@ ucase_isCaseSensitive(const UCaseProps *csp, UChar32 c) {
* zero or more case-ignorable characters. * zero or more case-ignorable characters.
*/ */
#define is_a(c) ((c)=='a' || (c)=='A')
#define is_d(c) ((c)=='d' || (c)=='D') #define is_d(c) ((c)=='d' || (c)=='D')
#define is_e(c) ((c)=='e' || (c)=='E') #define is_e(c) ((c)=='e' || (c)=='E')
#define is_i(c) ((c)=='i' || (c)=='I') #define is_i(c) ((c)=='i' || (c)=='I')
#define is_l(c) ((c)=='l' || (c)=='L') #define is_l(c) ((c)=='l' || (c)=='L')
#define is_n(c) ((c)=='n' || (c)=='N')
#define is_r(c) ((c)=='r' || (c)=='R') #define is_r(c) ((c)=='r' || (c)=='R')
#define is_t(c) ((c)=='t' || (c)=='T') #define is_t(c) ((c)=='t' || (c)=='T')
#define is_u(c) ((c)=='u' || (c)=='U') #define is_u(c) ((c)=='u' || (c)=='U')
@ -565,16 +556,7 @@ ucase_isCaseSensitive(const UCaseProps *csp, UChar32 c) {
* Accepts both 2- and 3-letter codes and accepts case variants. * Accepts both 2- and 3-letter codes and accepts case variants.
*/ */
U_CFUNC int32_t U_CFUNC int32_t
ucase_getCaseLocale(const char *locale, int32_t *locCache) { ucase_getCaseLocale(const char *locale) {
int32_t result;
char c;
if(locCache!=NULL && (result=*locCache)!=UCASE_LOC_UNKNOWN) {
return result;
}
result=UCASE_LOC_ROOT;
/* /*
* This function used to use uloc_getLanguage(), but the current code * This function used to use uloc_getLanguage(), but the current code
* removes the dependency of this low-level code on uloc implementation code * removes the dependency of this low-level code on uloc implementation code
@ -584,44 +566,12 @@ ucase_getCaseLocale(const char *locale, int32_t *locCache) {
* Because this code does not want to depend on uloc, the caller must * Because this code does not want to depend on uloc, the caller must
* pass in a non-NULL locale, i.e., may need to call uloc_getDefault(). * pass in a non-NULL locale, i.e., may need to call uloc_getDefault().
*/ */
c=*locale++; char c=*locale++;
if(is_t(c)) { // Fastpath for English "en" which is often used for default (=root locale) case mappings,
/* tr or tur? */ // and for Chinese "zh": Very common but no special case mapping behavior.
c=*locale++; // Then check lowercase vs. uppercase to reduce the number of comparisons
if(is_u(c)) { // for other locales without special behavior.
c=*locale++; if(c=='e') {
}
if(is_r(c)) {
c=*locale;
if(is_sep(c)) {
result=UCASE_LOC_TURKISH;
}
}
} else if(is_a(c)) {
/* az or aze? */
c=*locale++;
if(is_z(c)) {
c=*locale++;
if(is_e(c)) {
c=*locale;
}
if(is_sep(c)) {
result=UCASE_LOC_TURKISH;
}
}
} else if(is_l(c)) {
/* lt or lit? */
c=*locale++;
if(is_i(c)) {
c=*locale++;
}
if(is_t(c)) {
c=*locale;
if(is_sep(c)) {
result=UCASE_LOC_LITHUANIAN;
}
}
} else if(is_e(c)) {
/* el or ell? */ /* el or ell? */
c=*locale++; c=*locale++;
if(is_l(c)) { if(is_l(c)) {
@ -630,27 +580,135 @@ ucase_getCaseLocale(const char *locale, int32_t *locCache) {
c=*locale; c=*locale;
} }
if(is_sep(c)) { if(is_sep(c)) {
result=UCASE_LOC_GREEK; return UCASE_LOC_GREEK;
} }
} }
} else if(is_n(c)) { // en, es, ... -> root
/* nl or nld? */ } else if(c=='z') {
c=*locale++; return UCASE_LOC_ROOT;
if(is_l(c)) { #if U_CHARSET_FAMILY==U_ASCII_FAMILY
} else if(c>='a') { // ASCII a-z = 0x61..0x7a, after A-Z
#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
} else if(c<='z') { // EBCDIC a-z = 0x81..0xa9 with two gaps, before A-Z
#else
# error Unknown charset family!
#endif
// lowercase c
if(c=='t') {
/* tr or tur? */
c=*locale++; c=*locale++;
if(is_d(c)) { if(is_u(c)) {
c=*locale; c=*locale++;
} }
if(is_sep(c)) { if(is_r(c)) {
result=UCASE_LOC_DUTCH; c=*locale;
if(is_sep(c)) {
return UCASE_LOC_TURKISH;
}
}
} else if(c=='a') {
/* az or aze? */
c=*locale++;
if(is_z(c)) {
c=*locale++;
if(is_e(c)) {
c=*locale;
}
if(is_sep(c)) {
return UCASE_LOC_TURKISH;
}
}
} else if(c=='l') {
/* lt or lit? */
c=*locale++;
if(is_i(c)) {
c=*locale++;
}
if(is_t(c)) {
c=*locale;
if(is_sep(c)) {
return UCASE_LOC_LITHUANIAN;
}
}
} else if(c=='n') {
/* nl or nld? */
c=*locale++;
if(is_l(c)) {
c=*locale++;
if(is_d(c)) {
c=*locale;
}
if(is_sep(c)) {
return UCASE_LOC_DUTCH;
}
}
}
} else {
// uppercase c
// Same code as for lowercase c but also check for 'E'.
if(c=='T') {
/* tr or tur? */
c=*locale++;
if(is_u(c)) {
c=*locale++;
}
if(is_r(c)) {
c=*locale;
if(is_sep(c)) {
return UCASE_LOC_TURKISH;
}
}
} else if(c=='A') {
/* az or aze? */
c=*locale++;
if(is_z(c)) {
c=*locale++;
if(is_e(c)) {
c=*locale;
}
if(is_sep(c)) {
return UCASE_LOC_TURKISH;
}
}
} else if(c=='L') {
/* lt or lit? */
c=*locale++;
if(is_i(c)) {
c=*locale++;
}
if(is_t(c)) {
c=*locale;
if(is_sep(c)) {
return UCASE_LOC_LITHUANIAN;
}
}
} else if(c=='E') {
/* el or ell? */
c=*locale++;
if(is_l(c)) {
c=*locale++;
if(is_l(c)) {
c=*locale;
}
if(is_sep(c)) {
return UCASE_LOC_GREEK;
}
}
} else if(c=='N') {
/* nl or nld? */
c=*locale++;
if(is_l(c)) {
c=*locale++;
if(is_d(c)) {
c=*locale;
}
if(is_sep(c)) {
return UCASE_LOC_DUTCH;
}
} }
} }
} }
return UCASE_LOC_ROOT;
if(locCache!=NULL) {
*locCache=result;
}
return result;
} }
/* /*
@ -662,7 +720,7 @@ ucase_getCaseLocale(const char *locale, int32_t *locCache) {
* it is also cased or not. * it is also cased or not.
*/ */
static UBool static UBool
isFollowedByCasedLetter(const UCaseProps *csp, UCaseContextIterator *iter, void *context, int8_t dir) { isFollowedByCasedLetter(UCaseContextIterator *iter, void *context, int8_t dir) {
UChar32 c; UChar32 c;
if(iter==NULL) { if(iter==NULL) {
@ -670,7 +728,7 @@ isFollowedByCasedLetter(const UCaseProps *csp, UCaseContextIterator *iter, void
} }
for(/* dir!=0 sets direction */; (c=iter(context, dir))>=0; dir=0) { for(/* dir!=0 sets direction */; (c=iter(context, dir))>=0; dir=0) {
int32_t type=ucase_getTypeOrIgnorable(csp, c); int32_t type=ucase_getTypeOrIgnorable(c);
if(type&4) { if(type&4) {
/* case-ignorable, continue with the loop */ /* case-ignorable, continue with the loop */
} else if(type!=UCASE_NONE) { } else if(type!=UCASE_NONE) {
@ -685,7 +743,7 @@ isFollowedByCasedLetter(const UCaseProps *csp, UCaseContextIterator *iter, void
/* Is preceded by Soft_Dotted character with no intervening cc=230 ? */ /* Is preceded by Soft_Dotted character with no intervening cc=230 ? */
static UBool static UBool
isPrecededBySoftDotted(const UCaseProps *csp, UCaseContextIterator *iter, void *context) { isPrecededBySoftDotted(UCaseContextIterator *iter, void *context) {
UChar32 c; UChar32 c;
int32_t dotType; int32_t dotType;
int8_t dir; int8_t dir;
@ -695,7 +753,7 @@ isPrecededBySoftDotted(const UCaseProps *csp, UCaseContextIterator *iter, void *
} }
for(dir=-1; (c=iter(context, dir))>=0; dir=0) { for(dir=-1; (c=iter(context, dir))>=0; dir=0) {
dotType=getDotType(csp, c); dotType=getDotType(c);
if(dotType==UCASE_SOFT_DOTTED) { if(dotType==UCASE_SOFT_DOTTED) {
return TRUE; /* preceded by TYPE_i */ return TRUE; /* preceded by TYPE_i */
} else if(dotType!=UCASE_OTHER_ACCENT) { } else if(dotType!=UCASE_OTHER_ACCENT) {
@ -742,7 +800,7 @@ isPrecededBySoftDotted(const UCaseProps *csp, UCaseContextIterator *iter, void *
/* Is preceded by base character 'I' with no intervening cc=230 ? */ /* Is preceded by base character 'I' with no intervening cc=230 ? */
static UBool static UBool
isPrecededBy_I(const UCaseProps *csp, UCaseContextIterator *iter, void *context) { isPrecededBy_I(UCaseContextIterator *iter, void *context) {
UChar32 c; UChar32 c;
int32_t dotType; int32_t dotType;
int8_t dir; int8_t dir;
@ -755,7 +813,7 @@ isPrecededBy_I(const UCaseProps *csp, UCaseContextIterator *iter, void *context)
if(c==0x49) { if(c==0x49) {
return TRUE; /* preceded by I */ return TRUE; /* preceded by I */
} }
dotType=getDotType(csp, c); dotType=getDotType(c);
if(dotType!=UCASE_OTHER_ACCENT) { if(dotType!=UCASE_OTHER_ACCENT) {
return FALSE; /* preceded by different base character (not I), or intervening cc==230 */ return FALSE; /* preceded by different base character (not I), or intervening cc==230 */
} }
@ -766,7 +824,7 @@ isPrecededBy_I(const UCaseProps *csp, UCaseContextIterator *iter, void *context)
/* Is followed by one or more cc==230 ? */ /* Is followed by one or more cc==230 ? */
static UBool static UBool
isFollowedByMoreAbove(const UCaseProps *csp, UCaseContextIterator *iter, void *context) { isFollowedByMoreAbove(UCaseContextIterator *iter, void *context) {
UChar32 c; UChar32 c;
int32_t dotType; int32_t dotType;
int8_t dir; int8_t dir;
@ -776,7 +834,7 @@ isFollowedByMoreAbove(const UCaseProps *csp, UCaseContextIterator *iter, void *c
} }
for(dir=1; (c=iter(context, dir))>=0; dir=0) { for(dir=1; (c=iter(context, dir))>=0; dir=0) {
dotType=getDotType(csp, c); dotType=getDotType(c);
if(dotType==UCASE_ABOVE) { if(dotType==UCASE_ABOVE) {
return TRUE; /* at least one cc==230 following */ return TRUE; /* at least one cc==230 following */
} else if(dotType!=UCASE_OTHER_ACCENT) { } else if(dotType!=UCASE_OTHER_ACCENT) {
@ -789,7 +847,7 @@ isFollowedByMoreAbove(const UCaseProps *csp, UCaseContextIterator *iter, void *c
/* Is followed by a dot above (without cc==230 in between) ? */ /* Is followed by a dot above (without cc==230 in between) ? */
static UBool static UBool
isFollowedByDotAbove(const UCaseProps *csp, UCaseContextIterator *iter, void *context) { isFollowedByDotAbove(UCaseContextIterator *iter, void *context) {
UChar32 c; UChar32 c;
int32_t dotType; int32_t dotType;
int8_t dir; int8_t dir;
@ -802,7 +860,7 @@ isFollowedByDotAbove(const UCaseProps *csp, UCaseContextIterator *iter, void *co
if(c==0x307) { if(c==0x307) {
return TRUE; return TRUE;
} }
dotType=getDotType(csp, c); dotType=getDotType(c);
if(dotType!=UCASE_OTHER_ACCENT) { if(dotType!=UCASE_OTHER_ACCENT) {
return FALSE; /* next base character or cc==230 in between */ return FALSE; /* next base character or cc==230 in between */
} }
@ -812,20 +870,20 @@ isFollowedByDotAbove(const UCaseProps *csp, UCaseContextIterator *iter, void *co
} }
U_CAPI int32_t U_EXPORT2 U_CAPI int32_t U_EXPORT2
ucase_toFullLower(const UCaseProps *csp, UChar32 c, ucase_toFullLower(UChar32 c,
UCaseContextIterator *iter, void *context, UCaseContextIterator *iter, void *context,
const UChar **pString, const UChar **pString,
const char *locale, int32_t *locCache) { int32_t loc) {
// The sign of the result has meaning, input must be non-negative so that it can be returned as is. // The sign of the result has meaning, input must be non-negative so that it can be returned as is.
U_ASSERT(c >= 0); U_ASSERT(c >= 0);
UChar32 result=c; UChar32 result=c;
uint16_t props=UTRIE2_GET16(&csp->trie, c); uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
if(!PROPS_HAS_EXCEPTION(props)) { if(!PROPS_HAS_EXCEPTION(props)) {
if(UCASE_GET_TYPE(props)>=UCASE_UPPER) { if(UCASE_GET_TYPE(props)>=UCASE_UPPER) {
result=c+UCASE_GET_DELTA(props); result=c+UCASE_GET_DELTA(props);
} }
} else { } else {
const uint16_t *pe=GET_EXCEPTIONS(csp, props), *pe2; const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props), *pe2;
uint16_t excWord=*pe++; uint16_t excWord=*pe++;
int32_t full; int32_t full;
@ -833,7 +891,6 @@ ucase_toFullLower(const UCaseProps *csp, UChar32 c,
if(excWord&UCASE_EXC_CONDITIONAL_SPECIAL) { if(excWord&UCASE_EXC_CONDITIONAL_SPECIAL) {
/* use hardcoded conditions and mappings */ /* use hardcoded conditions and mappings */
int32_t loc=ucase_getCaseLocale(locale, locCache);
/* /*
* Test for conditional mappings first * Test for conditional mappings first
@ -844,7 +901,7 @@ ucase_toFullLower(const UCaseProps *csp, UChar32 c,
if( loc==UCASE_LOC_LITHUANIAN && if( loc==UCASE_LOC_LITHUANIAN &&
/* base characters, find accents above */ /* base characters, find accents above */
(((c==0x49 || c==0x4a || c==0x12e) && (((c==0x49 || c==0x4a || c==0x12e) &&
isFollowedByMoreAbove(csp, iter, context)) || isFollowedByMoreAbove(iter, context)) ||
/* precomposed with accent above, no need to find one */ /* precomposed with accent above, no need to find one */
(c==0xcc || c==0xcd || c==0x128)) (c==0xcc || c==0xcd || c==0x128))
) { ) {
@ -896,7 +953,7 @@ ucase_toFullLower(const UCaseProps *csp, UChar32 c,
0130; 0069; 0130; 0130; az # LATIN CAPITAL LETTER I WITH DOT ABOVE 0130; 0069; 0130; 0130; az # LATIN CAPITAL LETTER I WITH DOT ABOVE
*/ */
return 0x69; return 0x69;
} else if(loc==UCASE_LOC_TURKISH && c==0x307 && isPrecededBy_I(csp, iter, context)) { } else if(loc==UCASE_LOC_TURKISH && c==0x307 && isPrecededBy_I(iter, context)) {
/* /*
# When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i. # When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i.
# This matches the behavior of the canonically equivalent I-dot_above # This matches the behavior of the canonically equivalent I-dot_above
@ -905,7 +962,7 @@ ucase_toFullLower(const UCaseProps *csp, UChar32 c,
0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE 0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE
*/ */
return 0; /* remove the dot (continue without output) */ return 0; /* remove the dot (continue without output) */
} else if(loc==UCASE_LOC_TURKISH && c==0x49 && !isFollowedByDotAbove(csp, iter, context)) { } else if(loc==UCASE_LOC_TURKISH && c==0x49 && !isFollowedByDotAbove(iter, context)) {
/* /*
# When lowercasing, unless an I is before a dot_above, it turns into a dotless i. # When lowercasing, unless an I is before a dot_above, it turns into a dotless i.
@ -922,8 +979,8 @@ ucase_toFullLower(const UCaseProps *csp, UChar32 c,
*pString=iDot; *pString=iDot;
return 2; return 2;
} else if( c==0x3a3 && } else if( c==0x3a3 &&
!isFollowedByCasedLetter(csp, iter, context, 1) && !isFollowedByCasedLetter(iter, context, 1) &&
isFollowedByCasedLetter(csp, iter, context, -1) /* -1=preceded */ isFollowedByCasedLetter(iter, context, -1) /* -1=preceded */
) { ) {
/* greek capital sigma maps depending on surrounding cased letters (see SpecialCasing.txt) */ /* greek capital sigma maps depending on surrounding cased letters (see SpecialCasing.txt) */
/* /*
@ -957,21 +1014,21 @@ ucase_toFullLower(const UCaseProps *csp, UChar32 c,
/* internal */ /* internal */
static int32_t static int32_t
toUpperOrTitle(const UCaseProps *csp, UChar32 c, toUpperOrTitle(UChar32 c,
UCaseContextIterator *iter, void *context, UCaseContextIterator *iter, void *context,
const UChar **pString, const UChar **pString,
const char *locale, int32_t *locCache, int32_t loc,
UBool upperNotTitle) { UBool upperNotTitle) {
// The sign of the result has meaning, input must be non-negative so that it can be returned as is. // The sign of the result has meaning, input must be non-negative so that it can be returned as is.
U_ASSERT(c >= 0); U_ASSERT(c >= 0);
UChar32 result=c; UChar32 result=c;
uint16_t props=UTRIE2_GET16(&csp->trie, c); uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
if(!PROPS_HAS_EXCEPTION(props)) { if(!PROPS_HAS_EXCEPTION(props)) {
if(UCASE_GET_TYPE(props)==UCASE_LOWER) { if(UCASE_GET_TYPE(props)==UCASE_LOWER) {
result=c+UCASE_GET_DELTA(props); result=c+UCASE_GET_DELTA(props);
} }
} else { } else {
const uint16_t *pe=GET_EXCEPTIONS(csp, props), *pe2; const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props), *pe2;
uint16_t excWord=*pe++; uint16_t excWord=*pe++;
int32_t full, idx; int32_t full, idx;
@ -979,8 +1036,6 @@ toUpperOrTitle(const UCaseProps *csp, UChar32 c,
if(excWord&UCASE_EXC_CONDITIONAL_SPECIAL) { if(excWord&UCASE_EXC_CONDITIONAL_SPECIAL) {
/* use hardcoded conditions and mappings */ /* use hardcoded conditions and mappings */
int32_t loc=ucase_getCaseLocale(locale, locCache);
if(loc==UCASE_LOC_TURKISH && c==0x69) { if(loc==UCASE_LOC_TURKISH && c==0x69) {
/* /*
# Turkish and Azeri # Turkish and Azeri
@ -994,7 +1049,7 @@ toUpperOrTitle(const UCaseProps *csp, UChar32 c,
0069; 0069; 0130; 0130; az; # LATIN SMALL LETTER I 0069; 0069; 0130; 0130; az; # LATIN SMALL LETTER I
*/ */
return 0x130; return 0x130;
} else if(loc==UCASE_LOC_LITHUANIAN && c==0x307 && isPrecededBySoftDotted(csp, iter, context)) { } else if(loc==UCASE_LOC_LITHUANIAN && c==0x307 && isPrecededBySoftDotted(iter, context)) {
/* /*
# Lithuanian # Lithuanian
@ -1052,19 +1107,19 @@ toUpperOrTitle(const UCaseProps *csp, UChar32 c,
} }
U_CAPI int32_t U_EXPORT2 U_CAPI int32_t U_EXPORT2
ucase_toFullUpper(const UCaseProps *csp, UChar32 c, ucase_toFullUpper(UChar32 c,
UCaseContextIterator *iter, void *context, UCaseContextIterator *iter, void *context,
const UChar **pString, const UChar **pString,
const char *locale, int32_t *locCache) { int32_t caseLocale) {
return toUpperOrTitle(csp, c, iter, context, pString, locale, locCache, TRUE); return toUpperOrTitle(c, iter, context, pString, caseLocale, TRUE);
} }
U_CAPI int32_t U_EXPORT2 U_CAPI int32_t U_EXPORT2
ucase_toFullTitle(const UCaseProps *csp, UChar32 c, ucase_toFullTitle(UChar32 c,
UCaseContextIterator *iter, void *context, UCaseContextIterator *iter, void *context,
const UChar **pString, const UChar **pString,
const char *locale, int32_t *locCache) { int32_t caseLocale) {
return toUpperOrTitle(csp, c, iter, context, pString, locale, locCache, FALSE); return toUpperOrTitle(c, iter, context, pString, caseLocale, FALSE);
} }
/* case folding ------------------------------------------------------------- */ /* case folding ------------------------------------------------------------- */
@ -1110,14 +1165,14 @@ ucase_toFullTitle(const UCaseProps *csp, UChar32 c,
/* return the simple case folding mapping for c */ /* return the simple case folding mapping for c */
U_CAPI UChar32 U_EXPORT2 U_CAPI UChar32 U_EXPORT2
ucase_fold(const UCaseProps *csp, UChar32 c, uint32_t options) { ucase_fold(UChar32 c, uint32_t options) {
uint16_t props=UTRIE2_GET16(&csp->trie, c); uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
if(!PROPS_HAS_EXCEPTION(props)) { if(!PROPS_HAS_EXCEPTION(props)) {
if(UCASE_GET_TYPE(props)>=UCASE_UPPER) { if(UCASE_GET_TYPE(props)>=UCASE_UPPER) {
c+=UCASE_GET_DELTA(props); c+=UCASE_GET_DELTA(props);
} }
} else { } else {
const uint16_t *pe=GET_EXCEPTIONS(csp, props); const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
uint16_t excWord=*pe++; uint16_t excWord=*pe++;
int32_t idx; int32_t idx;
if(excWord&UCASE_EXC_CONDITIONAL_FOLD) { if(excWord&UCASE_EXC_CONDITIONAL_FOLD) {
@ -1170,19 +1225,19 @@ ucase_fold(const UCaseProps *csp, UChar32 c, uint32_t options) {
*/ */
U_CAPI int32_t U_EXPORT2 U_CAPI int32_t U_EXPORT2
ucase_toFullFolding(const UCaseProps *csp, UChar32 c, ucase_toFullFolding(UChar32 c,
const UChar **pString, const UChar **pString,
uint32_t options) { uint32_t options) {
// The sign of the result has meaning, input must be non-negative so that it can be returned as is. // The sign of the result has meaning, input must be non-negative so that it can be returned as is.
U_ASSERT(c >= 0); U_ASSERT(c >= 0);
UChar32 result=c; UChar32 result=c;
uint16_t props=UTRIE2_GET16(&csp->trie, c); uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
if(!PROPS_HAS_EXCEPTION(props)) { if(!PROPS_HAS_EXCEPTION(props)) {
if(UCASE_GET_TYPE(props)>=UCASE_UPPER) { if(UCASE_GET_TYPE(props)>=UCASE_UPPER) {
result=c+UCASE_GET_DELTA(props); result=c+UCASE_GET_DELTA(props);
} }
} else { } else {
const uint16_t *pe=GET_EXCEPTIONS(csp, props), *pe2; const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props), *pe2;
uint16_t excWord=*pe++; uint16_t excWord=*pe++;
int32_t full, idx; int32_t full, idx;
@ -1244,66 +1299,59 @@ ucase_toFullFolding(const UCaseProps *csp, UChar32 c,
/* case mapping properties API ---------------------------------------------- */ /* case mapping properties API ---------------------------------------------- */
#define GET_CASE_PROPS() &ucase_props_singleton
/* public API (see uchar.h) */ /* public API (see uchar.h) */
U_CAPI UBool U_EXPORT2 U_CAPI UBool U_EXPORT2
u_isULowercase(UChar32 c) { u_isULowercase(UChar32 c) {
return (UBool)(UCASE_LOWER==ucase_getType(GET_CASE_PROPS(), c)); return (UBool)(UCASE_LOWER==ucase_getType(c));
} }
U_CAPI UBool U_EXPORT2 U_CAPI UBool U_EXPORT2
u_isUUppercase(UChar32 c) { u_isUUppercase(UChar32 c) {
return (UBool)(UCASE_UPPER==ucase_getType(GET_CASE_PROPS(), c)); return (UBool)(UCASE_UPPER==ucase_getType(c));
} }
/* Transforms the Unicode character to its lower case equivalent.*/ /* Transforms the Unicode character to its lower case equivalent.*/
U_CAPI UChar32 U_EXPORT2 U_CAPI UChar32 U_EXPORT2
u_tolower(UChar32 c) { u_tolower(UChar32 c) {
return ucase_tolower(GET_CASE_PROPS(), c); return ucase_tolower(c);
} }
/* Transforms the Unicode character to its upper case equivalent.*/ /* Transforms the Unicode character to its upper case equivalent.*/
U_CAPI UChar32 U_EXPORT2 U_CAPI UChar32 U_EXPORT2
u_toupper(UChar32 c) { u_toupper(UChar32 c) {
return ucase_toupper(GET_CASE_PROPS(), c); return ucase_toupper(c);
} }
/* Transforms the Unicode character to its title case equivalent.*/ /* Transforms the Unicode character to its title case equivalent.*/
U_CAPI UChar32 U_EXPORT2 U_CAPI UChar32 U_EXPORT2
u_totitle(UChar32 c) { u_totitle(UChar32 c) {
return ucase_totitle(GET_CASE_PROPS(), c); return ucase_totitle(c);
} }
/* return the simple case folding mapping for c */ /* return the simple case folding mapping for c */
U_CAPI UChar32 U_EXPORT2 U_CAPI UChar32 U_EXPORT2
u_foldCase(UChar32 c, uint32_t options) { u_foldCase(UChar32 c, uint32_t options) {
return ucase_fold(GET_CASE_PROPS(), c, options); return ucase_fold(c, options);
} }
U_CFUNC int32_t U_EXPORT2 U_CFUNC int32_t U_EXPORT2
ucase_hasBinaryProperty(UChar32 c, UProperty which) { ucase_hasBinaryProperty(UChar32 c, UProperty which) {
/* case mapping properties */ /* case mapping properties */
const UChar *resultString; const UChar *resultString;
int32_t locCache;
const UCaseProps *csp=GET_CASE_PROPS();
if(csp==NULL) {
return FALSE;
}
switch(which) { switch(which) {
case UCHAR_LOWERCASE: case UCHAR_LOWERCASE:
return (UBool)(UCASE_LOWER==ucase_getType(csp, c)); return (UBool)(UCASE_LOWER==ucase_getType(c));
case UCHAR_UPPERCASE: case UCHAR_UPPERCASE:
return (UBool)(UCASE_UPPER==ucase_getType(csp, c)); return (UBool)(UCASE_UPPER==ucase_getType(c));
case UCHAR_SOFT_DOTTED: case UCHAR_SOFT_DOTTED:
return ucase_isSoftDotted(csp, c); return ucase_isSoftDotted(c);
case UCHAR_CASE_SENSITIVE: case UCHAR_CASE_SENSITIVE:
return ucase_isCaseSensitive(csp, c); return ucase_isCaseSensitive(c);
case UCHAR_CASED: case UCHAR_CASED:
return (UBool)(UCASE_NONE!=ucase_getType(csp, c)); return (UBool)(UCASE_NONE!=ucase_getType(c));
case UCHAR_CASE_IGNORABLE: case UCHAR_CASE_IGNORABLE:
return (UBool)(ucase_getTypeOrIgnorable(csp, c)>>2); return (UBool)(ucase_getTypeOrIgnorable(c)>>2);
/* /*
* Note: The following Changes_When_Xyz are defined as testing whether * Note: The following Changes_When_Xyz are defined as testing whether
* the NFD form of the input changes when Xyz-case-mapped. * the NFD form of the input changes when Xyz-case-mapped.
@ -1317,21 +1365,17 @@ ucase_hasBinaryProperty(UChar32 c, UProperty which) {
* start sets for normalization and case mappings. * start sets for normalization and case mappings.
*/ */
case UCHAR_CHANGES_WHEN_LOWERCASED: case UCHAR_CHANGES_WHEN_LOWERCASED:
locCache=UCASE_LOC_ROOT; return (UBool)(ucase_toFullLower(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0);
return (UBool)(ucase_toFullLower(csp, c, NULL, NULL, &resultString, "", &locCache)>=0);
case UCHAR_CHANGES_WHEN_UPPERCASED: case UCHAR_CHANGES_WHEN_UPPERCASED:
locCache=UCASE_LOC_ROOT; return (UBool)(ucase_toFullUpper(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0);
return (UBool)(ucase_toFullUpper(csp, c, NULL, NULL, &resultString, "", &locCache)>=0);
case UCHAR_CHANGES_WHEN_TITLECASED: case UCHAR_CHANGES_WHEN_TITLECASED:
locCache=UCASE_LOC_ROOT; return (UBool)(ucase_toFullTitle(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0);
return (UBool)(ucase_toFullTitle(csp, c, NULL, NULL, &resultString, "", &locCache)>=0);
/* case UCHAR_CHANGES_WHEN_CASEFOLDED: -- in uprops.c */ /* case UCHAR_CHANGES_WHEN_CASEFOLDED: -- in uprops.c */
case UCHAR_CHANGES_WHEN_CASEMAPPED: case UCHAR_CHANGES_WHEN_CASEMAPPED:
locCache=UCASE_LOC_ROOT;
return (UBool)( return (UBool)(
ucase_toFullLower(csp, c, NULL, NULL, &resultString, "", &locCache)>=0 || ucase_toFullLower(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0 ||
ucase_toFullUpper(csp, c, NULL, NULL, &resultString, "", &locCache)>=0 || ucase_toFullUpper(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0 ||
ucase_toFullTitle(csp, c, NULL, NULL, &resultString, "", &locCache)>=0); ucase_toFullTitle(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0);
default: default:
return FALSE; return FALSE;
} }

View File

@ -37,18 +37,8 @@ U_NAMESPACE_END
/* library API -------------------------------------------------------------- */ /* library API -------------------------------------------------------------- */
U_CDECL_BEGIN
struct UCaseProps;
typedef struct UCaseProps UCaseProps;
U_CDECL_END
U_CAPI const UCaseProps * U_EXPORT2
ucase_getSingleton(void);
U_CFUNC void U_EXPORT2 U_CFUNC void U_EXPORT2
ucase_addPropertyStarts(const UCaseProps *csp, const USetAdder *sa, UErrorCode *pErrorCode); ucase_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode);
/** /**
* Requires non-NULL locale ID but otherwise does the equivalent of * Requires non-NULL locale ID but otherwise does the equivalent of
@ -56,7 +46,7 @@ ucase_addPropertyStarts(const UCaseProps *csp, const USetAdder *sa, UErrorCode *
* Accepts both 2- and 3-letter codes and accepts case variants. * Accepts both 2- and 3-letter codes and accepts case variants.
*/ */
U_CFUNC int32_t U_CFUNC int32_t
ucase_getCaseLocale(const char *locale, int32_t *locCache); ucase_getCaseLocale(const char *locale);
/* Casing locale types for ucase_getCaseLocale */ /* Casing locale types for ucase_getCaseLocale */
enum { enum {
@ -87,16 +77,16 @@ enum {
/* single-code point functions */ /* single-code point functions */
U_CAPI UChar32 U_EXPORT2 U_CAPI UChar32 U_EXPORT2
ucase_tolower(const UCaseProps *csp, UChar32 c); ucase_tolower(UChar32 c);
U_CAPI UChar32 U_EXPORT2 U_CAPI UChar32 U_EXPORT2
ucase_toupper(const UCaseProps *csp, UChar32 c); ucase_toupper(UChar32 c);
U_CAPI UChar32 U_EXPORT2 U_CAPI UChar32 U_EXPORT2
ucase_totitle(const UCaseProps *csp, UChar32 c); ucase_totitle(UChar32 c);
U_CAPI UChar32 U_EXPORT2 U_CAPI UChar32 U_EXPORT2
ucase_fold(const UCaseProps *csp, UChar32 c, uint32_t options); ucase_fold(UChar32 c, uint32_t options);
/** /**
* Adds all simple case mappings and the full case folding for c to sa, * Adds all simple case mappings and the full case folding for c to sa,
@ -108,7 +98,7 @@ ucase_fold(const UCaseProps *csp, UChar32 c, uint32_t options);
* - for k include the Kelvin sign * - for k include the Kelvin sign
*/ */
U_CFUNC void U_EXPORT2 U_CFUNC void U_EXPORT2
ucase_addCaseClosure(const UCaseProps *csp, UChar32 c, const USetAdder *sa); ucase_addCaseClosure(UChar32 c, const USetAdder *sa);
/** /**
* Maps the string to single code points and adds the associated case closure * Maps the string to single code points and adds the associated case closure
@ -123,7 +113,7 @@ ucase_addCaseClosure(const UCaseProps *csp, UChar32 c, const USetAdder *sa);
* @return TRUE if the string was found * @return TRUE if the string was found
*/ */
U_CFUNC UBool U_EXPORT2 U_CFUNC UBool U_EXPORT2
ucase_addStringCaseClosure(const UCaseProps *csp, const UChar *s, int32_t length, const USetAdder *sa); ucase_addStringCaseClosure(const UChar *s, int32_t length, const USetAdder *sa);
#ifdef __cplusplus #ifdef __cplusplus
U_NAMESPACE_BEGIN U_NAMESPACE_BEGIN
@ -157,17 +147,17 @@ U_NAMESPACE_END
/** @return UCASE_NONE, UCASE_LOWER, UCASE_UPPER, UCASE_TITLE */ /** @return UCASE_NONE, UCASE_LOWER, UCASE_UPPER, UCASE_TITLE */
U_CAPI int32_t U_EXPORT2 U_CAPI int32_t U_EXPORT2
ucase_getType(const UCaseProps *csp, UChar32 c); ucase_getType(UChar32 c);
/** @return like ucase_getType() but also sets UCASE_IGNORABLE if c is case-ignorable */ /** @return like ucase_getType() but also sets UCASE_IGNORABLE if c is case-ignorable */
U_CAPI int32_t U_EXPORT2 U_CAPI int32_t U_EXPORT2
ucase_getTypeOrIgnorable(const UCaseProps *csp, UChar32 c); ucase_getTypeOrIgnorable(UChar32 c);
U_CAPI UBool U_EXPORT2 U_CAPI UBool U_EXPORT2
ucase_isSoftDotted(const UCaseProps *csp, UChar32 c); ucase_isSoftDotted(UChar32 c);
U_CAPI UBool U_EXPORT2 U_CAPI UBool U_EXPORT2
ucase_isCaseSensitive(const UCaseProps *csp, UChar32 c); ucase_isCaseSensitive(UChar32 c);
/* string case mapping functions */ /* string case mapping functions */
@ -240,10 +230,7 @@ enum {
* @param context Pointer to be passed into iter. * @param context Pointer to be passed into iter.
* @param pString If the mapping result is a string, then the pointer is * @param pString If the mapping result is a string, then the pointer is
* written to *pString. * written to *pString.
* @param locale Locale ID for locale-dependent mappings. * @param caseLocale Case locale value from ucase_getCaseLocale().
* @param locCache Initialize to 0; may be used to cache the result of parsing
* the locale ID for subsequent calls.
* Can be NULL.
* @return Output code point or string length, see UCASE_MAX_STRING_LENGTH. * @return Output code point or string length, see UCASE_MAX_STRING_LENGTH.
* *
* @see UCaseContextIterator * @see UCaseContextIterator
@ -251,25 +238,25 @@ enum {
* @internal * @internal
*/ */
U_CAPI int32_t U_EXPORT2 U_CAPI int32_t U_EXPORT2
ucase_toFullLower(const UCaseProps *csp, UChar32 c, ucase_toFullLower(UChar32 c,
UCaseContextIterator *iter, void *context, UCaseContextIterator *iter, void *context,
const UChar **pString, const UChar **pString,
const char *locale, int32_t *locCache); int32_t caseLocale);
U_CAPI int32_t U_EXPORT2 U_CAPI int32_t U_EXPORT2
ucase_toFullUpper(const UCaseProps *csp, UChar32 c, ucase_toFullUpper(UChar32 c,
UCaseContextIterator *iter, void *context, UCaseContextIterator *iter, void *context,
const UChar **pString, const UChar **pString,
const char *locale, int32_t *locCache); int32_t caseLocale);
U_CAPI int32_t U_EXPORT2 U_CAPI int32_t U_EXPORT2
ucase_toFullTitle(const UCaseProps *csp, UChar32 c, ucase_toFullTitle(UChar32 c,
UCaseContextIterator *iter, void *context, UCaseContextIterator *iter, void *context,
const UChar **pString, const UChar **pString,
const char *locale, int32_t *locCache); int32_t caseLocale);
U_CAPI int32_t U_EXPORT2 U_CAPI int32_t U_EXPORT2
ucase_toFullFolding(const UCaseProps *csp, UChar32 c, ucase_toFullFolding(UChar32 c,
const UChar **pString, const UChar **pString,
uint32_t options); uint32_t options);
@ -283,10 +270,10 @@ U_CDECL_BEGIN
* @internal * @internal
*/ */
typedef int32_t U_CALLCONV typedef int32_t U_CALLCONV
UCaseMapFull(const UCaseProps *csp, UChar32 c, UCaseMapFull(UChar32 c,
UCaseContextIterator *iter, void *context, UCaseContextIterator *iter, void *context,
const UChar **pString, const UChar **pString,
const char *locale, int32_t *locCache); int32_t caseLocale);
U_CDECL_END U_CDECL_END

View File

@ -33,46 +33,46 @@
#include "cmemory.h" #include "cmemory.h"
#include "cstring.h" #include "cstring.h"
#include "ucase.h" #include "ucase.h"
#include "ucasemap_imp.h"
#include "ustr_imp.h" #include "ustr_imp.h"
U_NAMESPACE_USE U_NAMESPACE_USE
/* UCaseMap service object -------------------------------------------------- */ /* UCaseMap service object -------------------------------------------------- */
UCaseMap::UCaseMap(const char *localeID, uint32_t opts, UErrorCode *pErrorCode) :
#if !UCONFIG_NO_BREAK_ITERATION
iter(NULL),
#endif
caseLocale(UCASE_LOC_UNKNOWN), options(opts) {
ucasemap_setLocale(this, localeID, pErrorCode);
}
UCaseMap::~UCaseMap() {
#if !UCONFIG_NO_BREAK_ITERATION
delete iter;
#endif
}
U_CAPI UCaseMap * U_EXPORT2 U_CAPI UCaseMap * U_EXPORT2
ucasemap_open(const char *locale, uint32_t options, UErrorCode *pErrorCode) { ucasemap_open(const char *locale, uint32_t options, UErrorCode *pErrorCode) {
UCaseMap *csm;
if(U_FAILURE(*pErrorCode)) { if(U_FAILURE(*pErrorCode)) {
return NULL; return NULL;
} }
UCaseMap *csm = new UCaseMap(locale, options, pErrorCode);
csm=(UCaseMap *)uprv_malloc(sizeof(UCaseMap));
if(csm==NULL) { if(csm==NULL) {
*pErrorCode = U_MEMORY_ALLOCATION_ERROR;
return NULL;
} else if (U_FAILURE(*pErrorCode)) {
delete csm;
return NULL; return NULL;
} }
uprv_memset(csm, 0, sizeof(UCaseMap));
csm->csp=ucase_getSingleton();
ucasemap_setLocale(csm, locale, pErrorCode);
if(U_FAILURE(*pErrorCode)) {
uprv_free(csm);
return NULL;
}
csm->options=options;
return csm; return csm;
} }
U_CAPI void U_EXPORT2 U_CAPI void U_EXPORT2
ucasemap_close(UCaseMap *csm) { ucasemap_close(UCaseMap *csm) {
if(csm!=NULL) { delete csm;
#if !UCONFIG_NO_BREAK_ITERATION
// Do not call ubrk_close() so that we do not depend on all of the BreakIterator code.
delete reinterpret_cast<BreakIterator *>(csm->iter);
#endif
uprv_free(csm);
}
} }
U_CAPI const char * U_EXPORT2 U_CAPI const char * U_EXPORT2
@ -87,13 +87,16 @@ ucasemap_getOptions(const UCaseMap *csm) {
U_CAPI void U_EXPORT2 U_CAPI void U_EXPORT2
ucasemap_setLocale(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode) { ucasemap_setLocale(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode) {
int32_t length;
if(U_FAILURE(*pErrorCode)) { if(U_FAILURE(*pErrorCode)) {
return; return;
} }
if (locale != NULL && *locale == 0) {
csm->locale[0] = 0;
csm->caseLocale = UCASE_LOC_ROOT;
return;
}
length=uloc_getName(locale, csm->locale, (int32_t)sizeof(csm->locale), pErrorCode); int32_t length=uloc_getName(locale, csm->locale, (int32_t)sizeof(csm->locale), pErrorCode);
if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR || length==sizeof(csm->locale)) { if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR || length==sizeof(csm->locale)) {
*pErrorCode=U_ZERO_ERROR; *pErrorCode=U_ZERO_ERROR;
/* we only really need the language code for case mappings */ /* we only really need the language code for case mappings */
@ -102,16 +105,20 @@ ucasemap_setLocale(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode) {
if(length==sizeof(csm->locale)) { if(length==sizeof(csm->locale)) {
*pErrorCode=U_BUFFER_OVERFLOW_ERROR; *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
} }
csm->locCache=0;
if(U_SUCCESS(*pErrorCode)) { if(U_SUCCESS(*pErrorCode)) {
ucase_getCaseLocale(csm->locale, &csm->locCache); csm->caseLocale=UCASE_LOC_UNKNOWN;
csm->caseLocale = ucase_getCaseLocale(csm->locale);
} else { } else {
csm->locale[0]=0; csm->locale[0]=0;
csm->caseLocale = UCASE_LOC_ROOT;
} }
} }
U_CAPI void U_EXPORT2 U_CAPI void U_EXPORT2
ucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode * /*pErrorCode*/) { ucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode *pErrorCode) {
if(U_FAILURE(*pErrorCode)) {
return;
}
csm->options=options; csm->options=options;
} }
@ -258,7 +265,7 @@ utf8_caseContextIterator(void *context, int8_t dir) {
* context [0..srcLength[ into account. * context [0..srcLength[ into account.
*/ */
static int32_t static int32_t
_caseMap(const UCaseMap *csm, UCaseMapFull *map, _caseMap(int32_t caseLocale, uint32_t /* TODO: options */, UCaseMapFull *map,
uint8_t *dest, int32_t destCapacity, uint8_t *dest, int32_t destCapacity,
const uint8_t *src, UCaseContext *csc, const uint8_t *src, UCaseContext *csc,
int32_t srcStart, int32_t srcLimit, int32_t srcStart, int32_t srcLimit,
@ -266,9 +273,6 @@ _caseMap(const UCaseMap *csm, UCaseMapFull *map,
const UChar *s = NULL; const UChar *s = NULL;
UChar32 c, c2 = 0; UChar32 c, c2 = 0;
int32_t srcIndex, destIndex; int32_t srcIndex, destIndex;
int32_t locCache;
locCache=csm->locCache;
/* case mapping loop */ /* case mapping loop */
srcIndex=srcStart; srcIndex=srcStart;
@ -286,7 +290,7 @@ _caseMap(const UCaseMap *csm, UCaseMapFull *map,
} }
continue; continue;
} }
c=map(csm->csp, c, utf8_caseContextIterator, csc, &s, csm->locale, &locCache); c=map(c, utf8_caseContextIterator, csc, &s, caseLocale);
if((destIndex<destCapacity) && (c<0 ? (c2=~c)<=0x7f : UCASE_MAX_STRING_LENGTH<c && (c2=c)<=0x7f)) { if((destIndex<destCapacity) && (c<0 ? (c2=~c)<=0x7f : UCASE_MAX_STRING_LENGTH<c && (c2=c)<=0x7f)) {
/* fast path version of appendResult() for ASCII results */ /* fast path version of appendResult() for ASCII results */
dest[destIndex++]=(uint8_t)c2; dest[destIndex++]=(uint8_t)c2;
@ -308,10 +312,11 @@ _caseMap(const UCaseMap *csm, UCaseMapFull *map,
#if !UCONFIG_NO_BREAK_ITERATION #if !UCONFIG_NO_BREAK_ITERATION
U_CFUNC int32_t U_CALLCONV U_CFUNC int32_t U_CALLCONV
ucasemap_internalUTF8ToTitle(const UCaseMap *csm, ucasemap_internalUTF8ToTitle(
uint8_t *dest, int32_t destCapacity, int32_t caseLocale, uint32_t options, BreakIterator *iter,
const uint8_t *src, int32_t srcLength, uint8_t *dest, int32_t destCapacity,
UErrorCode *pErrorCode) { const uint8_t *src, int32_t srcLength,
UErrorCode *pErrorCode) {
const UChar *s; const UChar *s;
UChar32 c; UChar32 c;
int32_t prev, titleStart, titleLimit, idx, destIndex; int32_t prev, titleStart, titleLimit, idx, destIndex;
@ -321,12 +326,7 @@ ucasemap_internalUTF8ToTitle(const UCaseMap *csm,
return 0; return 0;
} }
// Use the C++ abstract base class to minimize dependencies.
// TODO: Change UCaseMap.iter to store a BreakIterator directly.
BreakIterator *bi=reinterpret_cast<BreakIterator *>(csm->iter);
/* set up local variables */ /* set up local variables */
int32_t locCache=csm->locCache;
UCaseContext csc=UCASECONTEXT_INITIALIZER; UCaseContext csc=UCASECONTEXT_INITIALIZER;
csc.p=(void *)src; csc.p=(void *)src;
csc.limit=srcLength; csc.limit=srcLength;
@ -339,9 +339,9 @@ ucasemap_internalUTF8ToTitle(const UCaseMap *csm,
/* find next index where to titlecase */ /* find next index where to titlecase */
if(isFirstIndex) { if(isFirstIndex) {
isFirstIndex=FALSE; isFirstIndex=FALSE;
idx=bi->first(); idx=iter->first();
} else { } else {
idx=bi->next(); idx=iter->next();
} }
if(idx==UBRK_DONE || idx>srcLength) { if(idx==UBRK_DONE || idx>srcLength) {
idx=srcLength; idx=srcLength;
@ -364,7 +364,7 @@ ucasemap_internalUTF8ToTitle(const UCaseMap *csm,
/* find and copy uncased characters [prev..titleStart[ */ /* find and copy uncased characters [prev..titleStart[ */
titleStart=titleLimit=prev; titleStart=titleLimit=prev;
U8_NEXT(src, titleLimit, idx, c); U8_NEXT(src, titleLimit, idx, c);
if((csm->options&U_TITLECASE_NO_BREAK_ADJUSTMENT)==0 && UCASE_NONE==ucase_getType(csm->csp, c)) { if((options&U_TITLECASE_NO_BREAK_ADJUSTMENT)==0 && UCASE_NONE==ucase_getType(c)) {
/* Adjust the titlecasing index (titleStart) to the next cased character. */ /* Adjust the titlecasing index (titleStart) to the next cased character. */
for(;;) { for(;;) {
titleStart=titleLimit; titleStart=titleLimit;
@ -376,7 +376,7 @@ ucasemap_internalUTF8ToTitle(const UCaseMap *csm,
break; break;
} }
U8_NEXT(src, titleLimit, idx, c); U8_NEXT(src, titleLimit, idx, c);
if(UCASE_NONE!=ucase_getType(csm->csp, c)) { if(UCASE_NONE!=ucase_getType(c)) {
break; /* cased letter at [titleStart..titleLimit[ */ break; /* cased letter at [titleStart..titleLimit[ */
} }
} }
@ -392,7 +392,7 @@ ucasemap_internalUTF8ToTitle(const UCaseMap *csm,
if(c>=0) { if(c>=0) {
csc.cpStart=titleStart; csc.cpStart=titleStart;
csc.cpLimit=titleLimit; csc.cpLimit=titleLimit;
c=ucase_toFullTitle(csm->csp, c, utf8_caseContextIterator, &csc, &s, csm->locale, &locCache); c=ucase_toFullTitle(c, utf8_caseContextIterator, &csc, &s, caseLocale);
destIndex=appendResult(dest, destIndex, destCapacity, c, s); destIndex=appendResult(dest, destIndex, destCapacity, c, s);
} else { } else {
// Malformed UTF-8. // Malformed UTF-8.
@ -405,7 +405,7 @@ ucasemap_internalUTF8ToTitle(const UCaseMap *csm,
/* Special case Dutch IJ titlecasing */ /* Special case Dutch IJ titlecasing */
if (titleStart+1 < idx && if (titleStart+1 < idx &&
ucase_getCaseLocale(csm->locale, &locCache) == UCASE_LOC_DUTCH && caseLocale == UCASE_LOC_DUTCH &&
(src[titleStart] == 0x0049 || src[titleStart] == 0x0069) && (src[titleStart] == 0x0049 || src[titleStart] == 0x0069) &&
(src[titleStart+1] == 0x004A || src[titleStart+1] == 0x006A)) { (src[titleStart+1] == 0x004A || src[titleStart+1] == 0x006A)) {
destIndex=appendUChar(dest, destIndex, destCapacity, 0x004A); destIndex=appendUChar(dest, destIndex, destCapacity, 0x004A);
@ -413,11 +413,11 @@ ucasemap_internalUTF8ToTitle(const UCaseMap *csm,
} }
/* lowercase [titleLimit..index[ */ /* lowercase [titleLimit..index[ */
if(titleLimit<idx) { if(titleLimit<idx) {
if((csm->options&U_TITLECASE_NO_LOWERCASE)==0) { if((options&U_TITLECASE_NO_LOWERCASE)==0) {
/* Normal operation: Lowercase the rest of the word. */ /* Normal operation: Lowercase the rest of the word. */
destIndex+= destIndex+=
_caseMap( _caseMap(
csm, ucase_toFullLower, caseLocale, options, ucase_toFullLower,
dest+destIndex, destCapacity-destIndex, dest+destIndex, destCapacity-destIndex,
src, &csc, src, &csc,
titleLimit, idx, titleLimit, idx,
@ -454,11 +454,11 @@ ucasemap_internalUTF8ToTitle(const UCaseMap *csm,
U_NAMESPACE_BEGIN U_NAMESPACE_BEGIN
namespace GreekUpper { namespace GreekUpper {
UBool isFollowedByCasedLetter(const UCaseProps *csp, const uint8_t *s, int32_t i, int32_t length) { UBool isFollowedByCasedLetter(const uint8_t *s, int32_t i, int32_t length) {
while (i < length) { while (i < length) {
UChar32 c; UChar32 c;
U8_NEXT(s, i, length, c); U8_NEXT(s, i, length, c);
int32_t type = ucase_getTypeOrIgnorable(csp, c); int32_t type = ucase_getTypeOrIgnorable(c);
if ((type & UCASE_IGNORABLE) != 0) { if ((type & UCASE_IGNORABLE) != 0) {
// Case-ignorable, continue with the loop. // Case-ignorable, continue with the loop.
} else if (type != UCASE_NONE) { } else if (type != UCASE_NONE) {
@ -471,11 +471,10 @@ UBool isFollowedByCasedLetter(const UCaseProps *csp, const uint8_t *s, int32_t i
} }
// Keep this consistent with the UTF-16 version in ustrcase.cpp and the Java version in CaseMap.java. // Keep this consistent with the UTF-16 version in ustrcase.cpp and the Java version in CaseMap.java.
int32_t toUpper(const UCaseMap *csm, int32_t toUpper(int32_t caseLocale, uint32_t /* TODO: options */,
uint8_t *dest, int32_t destCapacity, uint8_t *dest, int32_t destCapacity,
const uint8_t *src, int32_t srcLength, const uint8_t *src, int32_t srcLength,
UErrorCode *pErrorCode) { UErrorCode *pErrorCode) {
int32_t locCache = UCASE_LOC_GREEK;
int32_t destIndex=0; int32_t destIndex=0;
uint32_t state = 0; uint32_t state = 0;
for (int32_t i = 0; i < srcLength;) { for (int32_t i = 0; i < srcLength;) {
@ -483,7 +482,7 @@ int32_t toUpper(const UCaseMap *csm,
UChar32 c; UChar32 c;
U8_NEXT(src, nextIndex, srcLength, c); U8_NEXT(src, nextIndex, srcLength, c);
uint32_t nextState = 0; uint32_t nextState = 0;
int32_t type = ucase_getTypeOrIgnorable(csm->csp, c); int32_t type = ucase_getTypeOrIgnorable(c);
if ((type & UCASE_IGNORABLE) != 0) { if ((type & UCASE_IGNORABLE) != 0) {
// c is case-ignorable // c is case-ignorable
nextState |= (state & AFTER_CASED); nextState |= (state & AFTER_CASED);
@ -533,7 +532,7 @@ int32_t toUpper(const UCaseMap *csm,
(data & HAS_ACCENT) != 0 && (data & HAS_ACCENT) != 0 &&
numYpogegrammeni == 0 && numYpogegrammeni == 0 &&
(state & AFTER_CASED) == 0 && (state & AFTER_CASED) == 0 &&
!isFollowedByCasedLetter(csm->csp, src, nextIndex, srcLength)) { !isFollowedByCasedLetter(src, nextIndex, srcLength)) {
// Keep disjunctive "or" with (only) a tonos. // Keep disjunctive "or" with (only) a tonos.
// We use the same "word boundary" conditions as for the Final_Sigma test. // We use the same "word boundary" conditions as for the Final_Sigma test.
if (i == nextIndex) { if (i == nextIndex) {
@ -569,7 +568,7 @@ int32_t toUpper(const UCaseMap *csm,
} else if(c>=0) { } else if(c>=0) {
const UChar *s; const UChar *s;
UChar32 c2 = 0; UChar32 c2 = 0;
c=ucase_toFullUpper(csm->csp, c, NULL, NULL, &s, csm->locale, &locCache); c=ucase_toFullUpper(c, NULL, NULL, &s, caseLocale);
if((destIndex<destCapacity) && (c<0 ? (c2=~c)<=0x7f : UCASE_MAX_STRING_LENGTH<c && (c2=c)<=0x7f)) { if((destIndex<destCapacity) && (c<0 ? (c2=~c)<=0x7f : UCASE_MAX_STRING_LENGTH<c && (c2=c)<=0x7f)) {
/* fast path version of appendResult() for ASCII results */ /* fast path version of appendResult() for ASCII results */
dest[destIndex++]=(uint8_t)c2; dest[destIndex++]=(uint8_t)c2;
@ -602,7 +601,7 @@ int32_t toUpper(const UCaseMap *csm,
U_NAMESPACE_END U_NAMESPACE_END
static int32_t U_CALLCONV static int32_t U_CALLCONV
ucasemap_internalUTF8ToLower(const UCaseMap *csm, ucasemap_internalUTF8ToLower(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED
uint8_t *dest, int32_t destCapacity, uint8_t *dest, int32_t destCapacity,
const uint8_t *src, int32_t srcLength, const uint8_t *src, int32_t srcLength,
UErrorCode *pErrorCode) { UErrorCode *pErrorCode) {
@ -610,37 +609,35 @@ ucasemap_internalUTF8ToLower(const UCaseMap *csm,
csc.p=(void *)src; csc.p=(void *)src;
csc.limit=srcLength; csc.limit=srcLength;
return _caseMap( return _caseMap(
csm, ucase_toFullLower, caseLocale, options, ucase_toFullLower,
dest, destCapacity, dest, destCapacity,
src, &csc, 0, srcLength, src, &csc, 0, srcLength,
pErrorCode); pErrorCode);
} }
static int32_t U_CALLCONV static int32_t U_CALLCONV
ucasemap_internalUTF8ToUpper(const UCaseMap *csm, ucasemap_internalUTF8ToUpper(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED
uint8_t *dest, int32_t destCapacity, uint8_t *dest, int32_t destCapacity,
const uint8_t *src, int32_t srcLength, const uint8_t *src, int32_t srcLength,
UErrorCode *pErrorCode) { UErrorCode *pErrorCode) {
int32_t locCache = csm->locCache; if (caseLocale == UCASE_LOC_GREEK) {
if (ucase_getCaseLocale(csm->locale, &locCache) == UCASE_LOC_GREEK) { return GreekUpper::toUpper(caseLocale, options, dest, destCapacity, src, srcLength, pErrorCode);
return GreekUpper::toUpper(csm, dest, destCapacity, src, srcLength, pErrorCode);
} }
UCaseContext csc=UCASECONTEXT_INITIALIZER; UCaseContext csc=UCASECONTEXT_INITIALIZER;
csc.p=(void *)src; csc.p=(void *)src;
csc.limit=srcLength; csc.limit=srcLength;
return _caseMap( return _caseMap(
csm, ucase_toFullUpper, caseLocale, options, ucase_toFullUpper,
dest, destCapacity, dest, destCapacity,
src, &csc, 0, srcLength, src, &csc, 0, srcLength,
pErrorCode); pErrorCode);
} }
static int32_t static int32_t U_CALLCONV
utf8_foldCase(const UCaseProps *csp, ucasemap_internalUTF8Fold(int32_t /* caseLocale */, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED
uint8_t *dest, int32_t destCapacity, uint8_t *dest, int32_t destCapacity,
const uint8_t *src, int32_t srcLength, const uint8_t *src, int32_t srcLength,
uint32_t options, UErrorCode *pErrorCode) {
UErrorCode *pErrorCode) {
int32_t srcIndex, destIndex; int32_t srcIndex, destIndex;
const UChar *s; const UChar *s;
@ -661,7 +658,7 @@ utf8_foldCase(const UCaseProps *csp,
} }
continue; continue;
} }
c=ucase_toFullFolding(csp, c, &s, options); c=ucase_toFullFolding(c, &s, options);
if((destIndex<destCapacity) && (c<0 ? (c2=~c)<=0x7f : UCASE_MAX_STRING_LENGTH<c && (c2=c)<=0x7f)) { if((destIndex<destCapacity) && (c<0 ? (c2=~c)<=0x7f : UCASE_MAX_STRING_LENGTH<c && (c2=c)<=0x7f)) {
/* fast path version of appendResult() for ASCII results */ /* fast path version of appendResult() for ASCII results */
dest[destIndex++]=(uint8_t)c2; dest[destIndex++]=(uint8_t)c2;
@ -680,16 +677,8 @@ utf8_foldCase(const UCaseProps *csp,
return destIndex; return destIndex;
} }
static int32_t U_CALLCONV
ucasemap_internalUTF8Fold(const UCaseMap *csm,
uint8_t *dest, int32_t destCapacity,
const uint8_t *src, int32_t srcLength,
UErrorCode *pErrorCode) {
return utf8_foldCase(csm->csp, dest, destCapacity, src, srcLength, csm->options, pErrorCode);
}
U_CFUNC int32_t U_CFUNC int32_t
ucasemap_mapUTF8(const UCaseMap *csm, ucasemap_mapUTF8(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
uint8_t *dest, int32_t destCapacity, uint8_t *dest, int32_t destCapacity,
const uint8_t *src, int32_t srcLength, const uint8_t *src, int32_t srcLength,
UTF8CaseMapper *stringCaseMapper, UTF8CaseMapper *stringCaseMapper,
@ -723,7 +712,8 @@ ucasemap_mapUTF8(const UCaseMap *csm,
return 0; return 0;
} }
destLength=stringCaseMapper(csm, dest, destCapacity, src, srcLength, pErrorCode); destLength=stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR
dest, destCapacity, src, srcLength, pErrorCode);
return u_terminateChars((char *)dest, destCapacity, destLength, pErrorCode); return u_terminateChars((char *)dest, destCapacity, destLength, pErrorCode);
} }
@ -734,10 +724,11 @@ ucasemap_utf8ToLower(const UCaseMap *csm,
char *dest, int32_t destCapacity, char *dest, int32_t destCapacity,
const char *src, int32_t srcLength, const char *src, int32_t srcLength,
UErrorCode *pErrorCode) { UErrorCode *pErrorCode) {
return ucasemap_mapUTF8(csm, return ucasemap_mapUTF8(
(uint8_t *)dest, destCapacity, csm->caseLocale, csm->options, UCASEMAP_BREAK_ITERATOR_NULL
(const uint8_t *)src, srcLength, (uint8_t *)dest, destCapacity,
ucasemap_internalUTF8ToLower, pErrorCode); (const uint8_t *)src, srcLength,
ucasemap_internalUTF8ToLower, pErrorCode);
} }
U_CAPI int32_t U_EXPORT2 U_CAPI int32_t U_EXPORT2
@ -745,10 +736,11 @@ ucasemap_utf8ToUpper(const UCaseMap *csm,
char *dest, int32_t destCapacity, char *dest, int32_t destCapacity,
const char *src, int32_t srcLength, const char *src, int32_t srcLength,
UErrorCode *pErrorCode) { UErrorCode *pErrorCode) {
return ucasemap_mapUTF8(csm, return ucasemap_mapUTF8(
(uint8_t *)dest, destCapacity, csm->caseLocale, csm->options, UCASEMAP_BREAK_ITERATOR_NULL
(const uint8_t *)src, srcLength, (uint8_t *)dest, destCapacity,
ucasemap_internalUTF8ToUpper, pErrorCode); (const uint8_t *)src, srcLength,
ucasemap_internalUTF8ToUpper, pErrorCode);
} }
U_CAPI int32_t U_EXPORT2 U_CAPI int32_t U_EXPORT2
@ -756,8 +748,9 @@ ucasemap_utf8FoldCase(const UCaseMap *csm,
char *dest, int32_t destCapacity, char *dest, int32_t destCapacity,
const char *src, int32_t srcLength, const char *src, int32_t srcLength,
UErrorCode *pErrorCode) { UErrorCode *pErrorCode) {
return ucasemap_mapUTF8(csm, return ucasemap_mapUTF8(
(uint8_t *)dest, destCapacity, UCASE_LOC_ROOT, csm->options, UCASEMAP_BREAK_ITERATOR_NULL
(const uint8_t *)src, srcLength, (uint8_t *)dest, destCapacity,
ucasemap_internalUTF8Fold, pErrorCode); (const uint8_t *)src, srcLength,
ucasemap_internalUTF8Fold, pErrorCode);
} }

View File

@ -0,0 +1,236 @@
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
// ucasemap_imp.h
// created: 2017feb08 Markus W. Scherer
#ifndef __UCASEMAP_IMP_H__
#define __UCASEMAP_IMP_H__
#include "unicode/utypes.h"
#include "unicode/ucasemap.h"
#include "ucase.h"
#ifndef U_COMPARE_IGNORE_CASE
/* see also unorm.h */
/**
* Option bit for unorm_compare:
* Perform case-insensitive comparison.
*/
#define U_COMPARE_IGNORE_CASE 0x10000
#endif
/**
* Internal API, used by u_strcasecmp() etc.
* Compare strings case-insensitively,
* in code point order or code unit order.
*/
U_CFUNC int32_t
u_strcmpFold(const UChar *s1, int32_t length1,
const UChar *s2, int32_t length2,
uint32_t options,
UErrorCode *pErrorCode);
/**
* Interanl API, used for detecting length of
* shared prefix case-insensitively.
* @param s1 input string 1
* @param length1 length of string 1, or -1 (NULL terminated)
* @param s2 input string 2
* @param length2 length of string 2, or -1 (NULL terminated)
* @param options compare options
* @param matchLen1 (output) length of partial prefix match in s1
* @param matchLen2 (output) length of partial prefix match in s2
* @param pErrorCode receives error status
*/
U_CAPI void
u_caseInsensitivePrefixMatch(const UChar *s1, int32_t length1,
const UChar *s2, int32_t length2,
uint32_t options,
int32_t *matchLen1, int32_t *matchLen2,
UErrorCode *pErrorCode);
/**
* Are the Unicode properties loaded?
* This must be used before internal functions are called that do
* not perform this check.
* Generate a debug assertion failure if data is not loaded.
*/
U_CFUNC UBool
uprv_haveProperties(UErrorCode *pErrorCode);
#ifdef __cplusplus
#include "unicode/unistr.h" // for UStringCaseMapper
/*
* Internal string casing functions implementing
* ustring.h/ustrcase.cpp and UnicodeString case mapping functions.
*/
struct UCaseMap : public icu::UMemory {
/** Implements most of ucasemap_open(). */
UCaseMap(const char *localeID, uint32_t opts, UErrorCode *pErrorCode);
~UCaseMap();
#if !UCONFIG_NO_BREAK_ITERATION
icu::BreakIterator *iter; /* We adopt the iterator, so we own it. */
#endif
char locale[32];
int32_t caseLocale;
uint32_t options;
};
#if UCONFIG_NO_BREAK_ITERATION
# define UCASEMAP_BREAK_ITERATOR_PARAM
# define UCASEMAP_BREAK_ITERATOR_UNUSED
# define UCASEMAP_BREAK_ITERATOR
# define UCASEMAP_BREAK_ITERATOR_NULL
#else
# define UCASEMAP_BREAK_ITERATOR_PARAM icu::BreakIterator *iter,
# define UCASEMAP_BREAK_ITERATOR_UNUSED icu::BreakIterator *,
# define UCASEMAP_BREAK_ITERATOR iter,
# define UCASEMAP_BREAK_ITERATOR_NULL NULL,
#endif
U_CFUNC int32_t
ustrcase_getCaseLocale(const char *locale);
// TODO: swap src / dest if approved for new public api
/** Implements UStringCaseMapper. */
U_CFUNC int32_t U_CALLCONV
ustrcase_internalToLower(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
icu::Edits *edits,
UErrorCode &errorCode);
/** Implements UStringCaseMapper. */
U_CFUNC int32_t U_CALLCONV
ustrcase_internalToUpper(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
icu::Edits *edits,
UErrorCode &errorCode);
#if !UCONFIG_NO_BREAK_ITERATION
/** Implements UStringCaseMapper. */
U_CFUNC int32_t U_CALLCONV
ustrcase_internalToTitle(int32_t caseLocale, uint32_t options,
icu::BreakIterator *iter,
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
icu::Edits *edits,
UErrorCode &errorCode);
#endif
/** Implements UStringCaseMapper. */
U_CFUNC int32_t U_CALLCONV
ustrcase_internalFold(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
icu::Edits *edits,
UErrorCode &errorCode);
/**
* Common string case mapping implementation for ucasemap_toXyz() and UnicodeString::toXyz().
* Implements argument checking.
*/
U_CFUNC int32_t
ustrcase_map(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
UStringCaseMapper *stringCaseMapper,
icu::Edits *edits,
UErrorCode &errorCode);
/**
* Common string case mapping implementation for old-fashioned u_strToXyz() functions
* that allow the source string to overlap the destination buffer.
* Implements argument checking and internally works with an intermediate buffer if necessary.
*/
U_CFUNC int32_t
ustrcase_mapWithOverlap(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
UStringCaseMapper *stringCaseMapper,
UErrorCode &errorCode);
/**
* UTF-8 string case mapping function type, used by ucasemap_mapUTF8().
* UTF-8 version of UStringCaseMapper.
* All error checking must be done.
* The UCaseMap must be fully initialized, with locale and/or iter set as needed.
* src and dest must not overlap.
*/
typedef int32_t U_CALLCONV
UTF8CaseMapper(int32_t caseLocale, uint32_t options,
#if !UCONFIG_NO_BREAK_ITERATION
icu::BreakIterator *iter,
#endif
uint8_t *dest, int32_t destCapacity,
const uint8_t *src, int32_t srcLength,
UErrorCode *pErrorCode);
#if !UCONFIG_NO_BREAK_ITERATION
/** Implements UTF8CaseMapper. */
U_CFUNC int32_t U_CALLCONV
ucasemap_internalUTF8ToTitle(int32_t caseLocale, uint32_t options,
icu::BreakIterator *iter,
uint8_t *dest, int32_t destCapacity,
const uint8_t *src, int32_t srcLength,
UErrorCode *pErrorCode);
#endif
/**
* Implements argument checking and buffer handling
* for UTF-8 string case mapping as a common function.
*/
U_CFUNC int32_t
ucasemap_mapUTF8(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
uint8_t *dest, int32_t destCapacity,
const uint8_t *src, int32_t srcLength,
UTF8CaseMapper *stringCaseMapper,
UErrorCode *pErrorCode);
U_NAMESPACE_BEGIN
namespace GreekUpper {
// Data bits.
static const uint32_t UPPER_MASK = 0x3ff;
static const uint32_t HAS_VOWEL = 0x1000;
static const uint32_t HAS_YPOGEGRAMMENI = 0x2000;
static const uint32_t HAS_ACCENT = 0x4000;
static const uint32_t HAS_DIALYTIKA = 0x8000;
// Further bits during data building and processing, not stored in the data map.
static const uint32_t HAS_COMBINING_DIALYTIKA = 0x10000;
static const uint32_t HAS_OTHER_GREEK_DIACRITIC = 0x20000;
static const uint32_t HAS_VOWEL_AND_ACCENT = HAS_VOWEL | HAS_ACCENT;
static const uint32_t HAS_VOWEL_AND_ACCENT_AND_DIALYTIKA =
HAS_VOWEL_AND_ACCENT | HAS_DIALYTIKA;
static const uint32_t HAS_EITHER_DIALYTIKA = HAS_DIALYTIKA | HAS_COMBINING_DIALYTIKA;
// State bits.
static const uint32_t AFTER_CASED = 1;
static const uint32_t AFTER_VOWEL_WITH_ACCENT = 2;
uint32_t getLetterData(UChar32 c);
/**
* Returns a non-zero value for each of the Greek combining diacritics
* listed in The Unicode Standard, version 8, chapter 7.2 Greek,
* plus some perispomeni look-alikes.
*/
uint32_t getDiacriticData(UChar32 c);
} // namespace GreekUpper
U_NAMESPACE_END
#endif // __cplusplus
#endif // __UCASEMAP_IMP_H__

View File

@ -26,20 +26,22 @@
#include "unicode/ucasemap.h" #include "unicode/ucasemap.h"
#include "cmemory.h" #include "cmemory.h"
#include "ucase.h" #include "ucase.h"
#include "ustr_imp.h" #include "ucasemap_imp.h"
U_NAMESPACE_USE U_NAMESPACE_USE
U_CAPI const UBreakIterator * U_EXPORT2 U_CAPI const UBreakIterator * U_EXPORT2
ucasemap_getBreakIterator(const UCaseMap *csm) { ucasemap_getBreakIterator(const UCaseMap *csm) {
return csm->iter; return reinterpret_cast<UBreakIterator *>(csm->iter);
} }
U_CAPI void U_EXPORT2 U_CAPI void U_EXPORT2
ucasemap_setBreakIterator(UCaseMap *csm, UBreakIterator *iterToAdopt, UErrorCode * /*pErrorCode*/) { ucasemap_setBreakIterator(UCaseMap *csm, UBreakIterator *iterToAdopt, UErrorCode *pErrorCode) {
// Do not call ubrk_close() so that we do not depend on all of the BreakIterator code. if(U_FAILURE(*pErrorCode)) {
delete reinterpret_cast<BreakIterator *>(csm->iter); return;
csm->iter=iterToAdopt; }
delete csm->iter;
csm->iter=reinterpret_cast<BreakIterator *>(iterToAdopt);
} }
U_CAPI int32_t U_EXPORT2 U_CAPI int32_t U_EXPORT2
@ -47,21 +49,23 @@ ucasemap_utf8ToTitle(UCaseMap *csm,
char *dest, int32_t destCapacity, char *dest, int32_t destCapacity,
const char *src, int32_t srcLength, const char *src, int32_t srcLength,
UErrorCode *pErrorCode) { UErrorCode *pErrorCode) {
UText utext=UTEXT_INITIALIZER; if (U_FAILURE(*pErrorCode)) {
utext_openUTF8(&utext, (const char *)src, srcLength, pErrorCode);
if(U_FAILURE(*pErrorCode)) {
return 0; return 0;
} }
UText utext=UTEXT_INITIALIZER;
utext_openUTF8(&utext, (const char *)src, srcLength, pErrorCode);
if(csm->iter==NULL) { if(csm->iter==NULL) {
csm->iter=ubrk_open(UBRK_WORD, csm->locale, csm->iter=BreakIterator::createWordInstance(Locale(csm->locale), *pErrorCode);
NULL, 0,
pErrorCode);
} }
ubrk_setUText(csm->iter, &utext, pErrorCode); if (U_FAILURE(*pErrorCode)) {
int32_t length=ucasemap_mapUTF8(csm, return 0;
(uint8_t *)dest, destCapacity, }
(const uint8_t *)src, srcLength, csm->iter->setText(&utext, *pErrorCode);
ucasemap_internalUTF8ToTitle, pErrorCode); int32_t length=ucasemap_mapUTF8(
csm->caseLocale, csm->options, csm->iter,
(uint8_t *)dest, destCapacity,
(const uint8_t *)src, srcLength,
ucasemap_internalUTF8ToTitle, pErrorCode);
utext_close(&utext); utext_close(&utext);
return length; return length;
} }

View File

@ -23,6 +23,7 @@
#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
#include "unicode/uset.h" #include "unicode/uset.h"
#include "unicode/ustring.h"
#include "ucnv_bld.h" #include "ucnv_bld.h"
#include "ucnv_cnv.h" #include "ucnv_cnv.h"
#include "ucnv_ext.h" #include "ucnv_ext.h"

View File

@ -0,0 +1,197 @@
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
// casemap.h
// created: 2017jan12 Markus W. Scherer
#ifndef __CASEMAP_H__
#define __CASEMAP_H__
#include "unicode/utypes.h"
#include "unicode/uobject.h"
/**
* \file
* \brief C++ API: Low-level C++ case mapping functions.
*/
U_NAMESPACE_BEGIN
#ifndef U_HIDE_DRAFT_API
class BreakIterator;
class Edits;
/**
* Low-level C++ case mapping functions.
*
* @draft ICU 59
*/
class U_COMMON_API CaseMap final : public UMemory {
public:
/**
* Lowercases a UTF-16 string and optionally records edits.
* Casing is locale-dependent and context-sensitive.
* The result may be longer or shorter than the original.
* The source string and the destination buffer must not overlap.
*
* @param locale The locale ID. ("" = root locale, NULL = default locale.)
* @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT.
* @param src The original string.
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
* @param dest A buffer for the result string. The result will be NUL-terminated if
* the buffer is large enough.
* The contents is undefined in case of failure.
* @param destCapacity The size of the buffer (number of UChars). If it is 0, then
* dest may be NULL and the function will only return the length of the result
* without writing any of the result string.
* @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any).
* This function calls edits->reset() first. edits can be NULL.
* @param errorCode Reference to an in/out error code value
* which must not indicate a failure before the function call.
* @return The length of the result string, if successful.
* When the result would be longer than destCapacity,
* the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
*
* @see u_strToLower
* @draft ICU 59
*/
static int32_t toLower(
const char *locale, uint32_t options,
const UChar *src, int32_t srcLength,
UChar *dest, int32_t destCapacity, Edits *edits,
UErrorCode &errorCode);
/**
* Uppercases a UTF-16 string and optionally records edits.
* Casing is locale-dependent and context-sensitive.
* The result may be longer or shorter than the original.
* The source string and the destination buffer must not overlap.
*
* @param locale The locale ID. ("" = root locale, NULL = default locale.)
* @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT.
* @param src The original string.
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
* @param dest A buffer for the result string. The result will be NUL-terminated if
* the buffer is large enough.
* The contents is undefined in case of failure.
* @param destCapacity The size of the buffer (number of UChars). If it is 0, then
* dest may be NULL and the function will only return the length of the result
* without writing any of the result string.
* @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any).
* This function calls edits->reset() first. edits can be NULL.
* @param errorCode Reference to an in/out error code value
* which must not indicate a failure before the function call.
* @return The length of the result string, if successful.
* When the result would be longer than destCapacity,
* the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
*
* @see u_strToUpper
* @draft ICU 59
*/
static int32_t toUpper(
const char *locale, uint32_t options,
const UChar *src, int32_t srcLength,
UChar *dest, int32_t destCapacity, Edits *edits,
UErrorCode &errorCode);
#if !UCONFIG_NO_BREAK_ITERATION
/**
* Titlecases a UTF-16 string and optionally records edits.
* Casing is locale-dependent and context-sensitive.
* The result may be longer or shorter than the original.
* The source string and the destination buffer must not overlap.
*
* Titlecasing uses a break iterator to find the first characters of words
* that are to be titlecased. It titlecases those characters and lowercases
* all others. (This can be modified with options bits.)
*
* @param locale The locale ID. ("" = root locale, NULL = default locale.)
* @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT,
* U_TITLECASE_NO_LOWERCASE, U_TITLECASE_NO_BREAK_ADJUSTMENT.
* @param iter A break iterator to find the first characters of words that are to be titlecased.
* It is set to the source string (setText())
* and used one or more times for iteration (first() and next()).
* If NULL, then a word break iterator for the locale is used
* (or something equivalent).
* @param src The original string.
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
* @param dest A buffer for the result string. The result will be NUL-terminated if
* the buffer is large enough.
* The contents is undefined in case of failure.
* @param destCapacity The size of the buffer (number of UChars). If it is 0, then
* dest may be NULL and the function will only return the length of the result
* without writing any of the result string.
* @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any).
* This function calls edits->reset() first. edits can be NULL.
* @param errorCode Reference to an in/out error code value
* which must not indicate a failure before the function call.
* @return The length of the result string, if successful.
* When the result would be longer than destCapacity,
* the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
*
* @see u_strToTitle
* @see ucasemap_toTitle
* @draft ICU 59
*/
static int32_t toTitle(
const char *locale, uint32_t options, BreakIterator *iter,
const UChar *src, int32_t srcLength,
UChar *dest, int32_t destCapacity, Edits *edits,
UErrorCode &errorCode);
#endif // UCONFIG_NO_BREAK_ITERATION
/**
* Case-folds a UTF-16 string and optionally records edits.
*
* Case folding is locale-independent and not context-sensitive,
* but there is an option for whether to include or exclude mappings for dotted I
* and dotless i that are marked with 'T' in CaseFolding.txt.
*
* The result may be longer or shorter than the original.
* The source string and the destination buffer must not overlap.
*
* @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT,
* U_FOLD_CASE_DEFAULT, U_FOLD_CASE_EXCLUDE_SPECIAL_I.
* @param src The original string.
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
* @param dest A buffer for the result string. The result will be NUL-terminated if
* the buffer is large enough.
* The contents is undefined in case of failure.
* @param destCapacity The size of the buffer (number of UChars). If it is 0, then
* dest may be NULL and the function will only return the length of the result
* without writing any of the result string.
* @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any).
* This function calls edits->reset() first. edits can be NULL.
* @param errorCode Reference to an in/out error code value
* which must not indicate a failure before the function call.
* @return The length of the result string, if successful.
* When the result would be longer than destCapacity,
* the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
*
* @see u_strFoldCase
* @draft ICU 59
*/
static int32_t fold(
uint32_t options,
const UChar *src, int32_t srcLength,
UChar *dest, int32_t destCapacity, Edits *edits,
UErrorCode &errorCode);
private:
CaseMap() = delete;
CaseMap(const CaseMap &other) = delete;
CaseMap &operator=(const CaseMap &other) = delete;
};
#endif // U_HIDE_DRAFT_API
U_NAMESPACE_END
#endif // __CASEMAP_H__

View File

@ -0,0 +1,245 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
// edits.h
// created: 2016dec30 Markus W. Scherer
#ifndef __EDITS_H__
#define __EDITS_H__
#include "unicode/utypes.h"
#include "unicode/uobject.h"
/**
* \file
* \brief C++ API: C++ class Edits for low-level string transformations on styled text.
*/
U_NAMESPACE_BEGIN
#ifndef U_HIDE_DRAFT_API
/**
* Records lengths of string edits but not replacement text.
* Supports replacements, insertions, deletions in linear progression.
* Does not support moving/reordering of text.
*
* An Edits object tracks a separate UErrorCode, but ICU string transformation functions
* (e.g., case mapping functions) merge any such errors into their API's UErrorCode.
*
* @draft ICU 59
*/
class U_COMMON_API Edits final : public UMemory {
public:
/**
* Constructs an empty object.
* @draft ICU 59
*/
Edits() :
array(stackArray), capacity(STACK_CAPACITY), length(0), delta(0),
errorCode(U_ZERO_ERROR) {}
/**
* Destructor.
* @draft ICU 59
*/
~Edits();
/**
* Resets the data but may not release memory.
* @draft ICU 59
*/
void reset();
/**
* Adds a record for an unchanged segment of text.
* Normally called from inside ICU string transformation functions, not user code.
* @draft ICU 59
*/
void addUnchanged(int32_t unchangedLength);
/**
* Adds a record for a text replacement/insertion/deletion.
* Normally called from inside ICU string transformation functions, not user code.
* @draft ICU 59
*/
void addReplace(int32_t oldLength, int32_t newLength);
/**
* Sets the UErrorCode if an error occurred while recording edits.
* Preserves older error codes in the outErrorCode.
* Normally called from inside ICU string transformation functions, not user code.
* @return TRUE if U_FAILURE(outErrorCode)
* @draft ICU 59
*/
UBool copyErrorTo(UErrorCode &outErrorCode);
/**
* How much longer is the new text compared with the old text?
* @return new length minus old length
* @draft ICU 59
*/
int32_t lengthDelta() const { return delta; }
/**
* @return TRUE if there are any change edits
* @draft ICU 59
*/
UBool hasChanges() const;
/**
* Access to the list of edits.
* @see getCoarseIterator
* @see getFineIterator
* @draft ICU 59
*/
struct Iterator final : public UMemory {
/**
* Copy constructor.
* @draft ICU 59
*/
Iterator(const Iterator &other) = default;
/**
* Assignment operator.
* @draft ICU 59
*/
Iterator &operator=(const Iterator &other) = default;
/**
* Advances to the next edit.
* @return TRUE if there is another edit
* @draft ICU 59
*/
UBool next(UErrorCode &errorCode) { return next(onlyChanges_, errorCode); }
/**
* Finds the edit that contains the source index.
* The source index may be found in a non-change
* even if normal iteration would skip non-changes.
* Normal iteration can continue from a found edit.
*
* The iterator state before this search logically does not matter.
* (It may affect the performance of the search.)
*
* The iterator state after this search is undefined
* if the source index is out of bounds for the source string.
*
* @param i source index
* @return TRUE if the edit for the source index was found
* @draft ICU 59
*/
UBool findSourceIndex(int32_t i, UErrorCode &errorCode);
/**
* @return TRUE if this edit replaces oldLength() units with newLength() different ones.
* FALSE if oldLength units remain unchanged.
* @draft ICU 59
*/
UBool hasChange() const { return changed; }
/**
* @return the number of units in the original string which are replaced or remain unchanged.
* @draft ICU 59
*/
int32_t oldLength() const { return oldLength_; }
/**
* @return the number of units in the modified string, if hasChange() is TRUE.
* Same as oldLength if hasChange() is FALSE.
* @draft ICU 59
*/
int32_t newLength() const { return newLength_; }
/**
* @return the current index into the source string
* @draft ICU 59
*/
int32_t sourceIndex() const { return srcIndex; }
/**
* @return the current index into the replacement-characters-only string,
* not counting unchanged spans
* @draft ICU 59
*/
int32_t replacementIndex() const { return replIndex; }
/**
* @return the current index into the full destination string
* @draft ICU 59
*/
int32_t destinationIndex() const { return destIndex; }
private:
friend class Edits;
Iterator(const uint16_t *a, int32_t len, UBool oc, UBool crs);
int32_t readLength(int32_t head);
void updateIndexes();
UBool noNext();
UBool next(UBool onlyChanges, UErrorCode &errorCode);
const uint16_t *array;
int32_t index, length;
int32_t remaining;
UBool onlyChanges_, coarse;
UBool changed;
int32_t oldLength_, newLength_;
int32_t srcIndex, replIndex, destIndex;
};
/**
* Returns an Iterator for coarse-grained changes for simple string updates.
* Skips non-changes.
* @return an Iterator that merges adjacent changes.
* @draft ICU 59
*/
Iterator getCoarseChangesIterator() const {
return Iterator(array, length, TRUE, TRUE);
}
/**
* Returns an Iterator for coarse-grained changes and non-changes for simple string updates.
* @return an Iterator that merges adjacent changes.
* @draft ICU 59
*/
Iterator getCoarseIterator() const {
return Iterator(array, length, FALSE, TRUE);
}
/**
* Returns an Iterator for fine-grained changes for modifying styled text.
* Skips non-changes.
* @return an Iterator that separates adjacent changes.
* @draft ICU 59
*/
Iterator getFineChangesIterator() const {
return Iterator(array, length, TRUE, FALSE);
}
/**
* Returns an Iterator for fine-grained changes and non-changes for modifying styled text.
* @return an Iterator that separates adjacent changes.
* @draft ICU 59
*/
Iterator getFineIterator() const {
return Iterator(array, length, FALSE, FALSE);
}
private:
Edits(const Edits &) = delete;
Edits &operator=(const Edits &) = delete;
void setLastUnit(int32_t last) { array[length - 1] = (uint16_t)last; }
int32_t lastUnit() const { return length > 0 ? array[length - 1] : 0xffff; }
void append(int32_t r);
UBool growArray();
static const int32_t STACK_CAPACITY = 100;
uint16_t *array;
int32_t capacity;
int32_t length;
int32_t delta;
UErrorCode errorCode;
uint16_t stackArray[STACK_CAPACITY];
};
#endif // U_HIDE_DRAFT_API
U_NAMESPACE_END
#endif // __EDITS_H__

View File

@ -33,10 +33,8 @@
#include "unicode/utypes.h" #include "unicode/utypes.h"
#include "unicode/uobject.h" #include "unicode/uobject.h"
#include "unicode/unistr.h"
#include "unicode/putil.h" #include "unicode/putil.h"
#include "unicode/uloc.h" #include "unicode/uloc.h"
#include "unicode/strenum.h"
/** /**
* \file * \file
@ -48,6 +46,9 @@ U_NAMESPACE_BEGIN
// Forward Declarations // Forward Declarations
void U_CALLCONV locale_available_init(); /**< @internal */ void U_CALLCONV locale_available_init(); /**< @internal */
class StringEnumeration;
class UnicodeString;
/** /**
* A <code>Locale</code> object represents a specific geographical, political, * A <code>Locale</code> object represents a specific geographical, political,
* or cultural region. An operation that requires a <code>Locale</code> to perform * or cultural region. An operation that requires a <code>Locale</code> to perform

View File

@ -22,8 +22,8 @@
#define __UCASEMAP_H__ #define __UCASEMAP_H__
#include "unicode/utypes.h" #include "unicode/utypes.h"
#include "unicode/ustring.h"
#include "unicode/localpointer.h" #include "unicode/localpointer.h"
#include "unicode/ustring.h"
/** /**
* \file * \file
@ -185,6 +185,15 @@ ucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode *pErrorCode);
*/ */
#define U_TITLECASE_NO_BREAK_ADJUSTMENT 0x200 #define U_TITLECASE_NO_BREAK_ADJUSTMENT 0x200
/**
* Omit unchanged text when case-mapping with Edits.
*
* @see CaseMap
* @see Edits
* @draft ICU 59
*/
#define UCASEMAP_OMIT_UNCHANGED_TEXT 0x4000
#if !UCONFIG_NO_BREAK_ITERATION #if !UCONFIG_NO_BREAK_ITERATION
/** /**
@ -253,7 +262,7 @@ ucasemap_setBreakIterator(UCaseMap *csm, UBreakIterator *iterToAdopt, UErrorCode
* @param dest A buffer for the result string. The result will be NUL-terminated if * @param dest A buffer for the result string. The result will be NUL-terminated if
* the buffer is large enough. * the buffer is large enough.
* The contents is undefined in case of failure. * The contents is undefined in case of failure.
* @param destCapacity The size of the buffer (number of bytes). If it is 0, then * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
* dest may be NULL and the function will only return the length of the result * dest may be NULL and the function will only return the length of the result
* without writing any of the result string. * without writing any of the result string.
* @param src The original string. * @param src The original string.
@ -272,7 +281,7 @@ ucasemap_toTitle(UCaseMap *csm,
const UChar *src, int32_t srcLength, const UChar *src, int32_t srcLength,
UErrorCode *pErrorCode); UErrorCode *pErrorCode);
#endif #endif // UCONFIG_NO_BREAK_ITERATION
/** /**
* Lowercase the characters in a UTF-8 string. * Lowercase the characters in a UTF-8 string.

View File

@ -23,7 +23,9 @@
#include "unicode/localpointer.h" #include "unicode/localpointer.h"
#if U_SHOW_CPLUSPLUS_API #if U_SHOW_CPLUSPLUS_API
#include "unicode/strenum.h" U_NAMESPACE_BEGIN
class StringEnumeration;
U_NAMESPACE_END
#endif #endif
/** /**

View File

@ -33,7 +33,6 @@
#include "unicode/std_string.h" #include "unicode/std_string.h"
#include "unicode/stringpiece.h" #include "unicode/stringpiece.h"
#include "unicode/bytestream.h" #include "unicode/bytestream.h"
#include "unicode/ucasemap.h"
struct UConverter; // unicode/ucnv.h struct UConverter; // unicode/ucnv.h
@ -55,30 +54,34 @@ U_STABLE int32_t U_EXPORT2
u_strlen(const UChar *s); u_strlen(const UChar *s);
#endif #endif
/**
* \def U_STRING_CASE_MAPPER_DEFINED
* @internal
*/
#ifndef U_STRING_CASE_MAPPER_DEFINED
#define U_STRING_CASE_MAPPER_DEFINED
/**
* Internal string case mapping function type.
* @internal
*/
typedef int32_t U_CALLCONV
UStringCaseMapper(const UCaseMap *csm,
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
UErrorCode *pErrorCode);
#endif
U_NAMESPACE_BEGIN U_NAMESPACE_BEGIN
#if !UCONFIG_NO_BREAK_ITERATION #if !UCONFIG_NO_BREAK_ITERATION
class BreakIterator; // unicode/brkiter.h class BreakIterator; // unicode/brkiter.h
#endif #endif
class Edits;
U_NAMESPACE_END
// Not #ifndef U_HIDE_INTERNAL_API because UnicodeString needs the UStringCaseMapper.
/**
* Internal string case mapping function type.
* All error checking must be done.
* src and dest must not overlap.
* @internal
*/
typedef int32_t U_CALLCONV
UStringCaseMapper(int32_t caseLocale, uint32_t options,
#if !UCONFIG_NO_BREAK_ITERATION
icu::BreakIterator *iter,
#endif
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
icu::Edits *edits,
UErrorCode &errorCode);
U_NAMESPACE_BEGIN
class Locale; // unicode/locid.h class Locale; // unicode/locid.h
class StringCharacterIterator; class StringCharacterIterator;
class UnicodeStringAppendable; // unicode/appendable.h class UnicodeStringAppendable; // unicode/appendable.h
@ -3592,7 +3595,11 @@ private:
* as in ustr_imp.h for ustrcase_map(). * as in ustr_imp.h for ustrcase_map().
*/ */
UnicodeString & UnicodeString &
caseMap(const UCaseMap *csm, UStringCaseMapper *stringCaseMapper); caseMap(int32_t caseLocale, uint32_t options,
#if !UCONFIG_NO_BREAK_ITERATION
BreakIterator *iter,
#endif
UStringCaseMapper *stringCaseMapper);
// ref counting // ref counting
void addRef(void); void addRef(void);

View File

@ -184,7 +184,6 @@ UnicodeSet& UnicodeSet::closeOver(int32_t attribute) {
return *this; return *this;
} }
if (attribute & (USET_CASE_INSENSITIVE | USET_ADD_CASE_MAPPINGS)) { if (attribute & (USET_CASE_INSENSITIVE | USET_ADD_CASE_MAPPINGS)) {
const UCaseProps *csp = ucase_getSingleton();
{ {
UnicodeSet foldSet(*this); UnicodeSet foldSet(*this);
UnicodeString str; UnicodeString str;
@ -207,7 +206,6 @@ UnicodeSet& UnicodeSet::closeOver(int32_t attribute) {
int32_t n = getRangeCount(); int32_t n = getRangeCount();
UChar32 result; UChar32 result;
const UChar *full; const UChar *full;
int32_t locCache = 0;
for (int32_t i=0; i<n; ++i) { for (int32_t i=0; i<n; ++i) {
UChar32 start = getRangeStart(i); UChar32 start = getRangeStart(i);
@ -216,22 +214,22 @@ UnicodeSet& UnicodeSet::closeOver(int32_t attribute) {
if (attribute & USET_CASE_INSENSITIVE) { if (attribute & USET_CASE_INSENSITIVE) {
// full case closure // full case closure
for (UChar32 cp=start; cp<=end; ++cp) { for (UChar32 cp=start; cp<=end; ++cp) {
ucase_addCaseClosure(csp, cp, &sa); ucase_addCaseClosure(cp, &sa);
} }
} else { } else {
// add case mappings // add case mappings
// (does not add long s for regular s, or Kelvin for k, for example) // (does not add long s for regular s, or Kelvin for k, for example)
for (UChar32 cp=start; cp<=end; ++cp) { for (UChar32 cp=start; cp<=end; ++cp) {
result = ucase_toFullLower(csp, cp, NULL, NULL, &full, "", &locCache); result = ucase_toFullLower(cp, NULL, NULL, &full, UCASE_LOC_ROOT);
addCaseMapping(foldSet, result, full, str); addCaseMapping(foldSet, result, full, str);
result = ucase_toFullTitle(csp, cp, NULL, NULL, &full, "", &locCache); result = ucase_toFullTitle(cp, NULL, NULL, &full, UCASE_LOC_ROOT);
addCaseMapping(foldSet, result, full, str); addCaseMapping(foldSet, result, full, str);
result = ucase_toFullUpper(csp, cp, NULL, NULL, &full, "", &locCache); result = ucase_toFullUpper(cp, NULL, NULL, &full, UCASE_LOC_ROOT);
addCaseMapping(foldSet, result, full, str); addCaseMapping(foldSet, result, full, str);
result = ucase_toFullFolding(csp, cp, &full, 0); result = ucase_toFullFolding(cp, &full, 0);
addCaseMapping(foldSet, result, full, str); addCaseMapping(foldSet, result, full, str);
} }
} }
@ -241,7 +239,7 @@ UnicodeSet& UnicodeSet::closeOver(int32_t attribute) {
for (int32_t j=0; j<strings->size(); ++j) { for (int32_t j=0; j<strings->size(); ++j) {
str = *(const UnicodeString *) strings->elementAt(j); str = *(const UnicodeString *) strings->elementAt(j);
str.foldCase(); str.foldCase();
if(!ucase_addStringCaseClosure(csp, str.getBuffer(), str.length(), &sa)) { if(!ucase_addStringCaseClosure(str.getBuffer(), str.length(), &sa)) {
foldSet.add(str); // does not map to code points: add the folded string itself foldSet.add(str); // does not map to code points: add the folded string itself
} }
} }

View File

@ -195,7 +195,7 @@ void U_CALLCONV UnicodeSet_initInclusion(int32_t src, UErrorCode &status) {
if(U_SUCCESS(status)) { if(U_SUCCESS(status)) {
impl->addPropertyStarts(&sa, status); impl->addPropertyStarts(&sa, status);
} }
ucase_addPropertyStarts(ucase_getSingleton(), &sa, &status); ucase_addPropertyStarts(&sa, &status);
break; break;
} }
case UPROPS_SRC_NFC: { case UPROPS_SRC_NFC: {
@ -228,7 +228,7 @@ void U_CALLCONV UnicodeSet_initInclusion(int32_t src, UErrorCode &status) {
} }
#endif #endif
case UPROPS_SRC_CASE: case UPROPS_SRC_CASE:
ucase_addPropertyStarts(ucase_getSingleton(), &sa, &status); ucase_addPropertyStarts(&sa, &status);
break; break;
case UPROPS_SRC_BIDI: case UPROPS_SRC_BIDI:
ubidi_addPropertyStarts(ubidi_getSingleton(), &sa, &status); ubidi_addPropertyStarts(ubidi_getSingleton(), &sa, &status);

View File

@ -19,14 +19,17 @@
*/ */
#include "unicode/utypes.h" #include "unicode/utypes.h"
#include "unicode/casemap.h"
#include "unicode/edits.h"
#include "unicode/putil.h" #include "unicode/putil.h"
#include "cstring.h" #include "cstring.h"
#include "cmemory.h" #include "cmemory.h"
#include "unicode/ustring.h" #include "unicode/ustring.h"
#include "unicode/unistr.h" #include "unicode/unistr.h"
#include "unicode/uchar.h" #include "unicode/uchar.h"
#include "uassert.h"
#include "ucasemap_imp.h"
#include "uelement.h" #include "uelement.h"
#include "ustr_imp.h"
U_NAMESPACE_BEGIN U_NAMESPACE_BEGIN
@ -87,56 +90,104 @@ UnicodeString::doCaseCompare(int32_t start,
//======================================== //========================================
UnicodeString & UnicodeString &
UnicodeString::caseMap(const UCaseMap *csm, UnicodeString::caseMap(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
UStringCaseMapper *stringCaseMapper) { UStringCaseMapper *stringCaseMapper) {
if(isEmpty() || !isWritable()) { if(isEmpty() || !isWritable()) {
// nothing to do // nothing to do
return *this; return *this;
} }
UChar oldBuffer[2 * US_STACKBUF_SIZE];
UChar *oldArray;
int32_t oldLength = length();
int32_t newLength;
UBool writable = isBufferWritable();
UErrorCode errorCode = U_ZERO_ERROR;
// Try to avoid heap-allocating a new character array for this string.
if (writable ? oldLength <= UPRV_LENGTHOF(oldBuffer) : oldLength < US_STACKBUF_SIZE) {
// Short string: Copy the contents into a temporary buffer and
// case-map back into the current array, or into the stack buffer.
UChar *buffer = getArrayStart();
int32_t capacity;
oldArray = oldBuffer;
u_memcpy(oldBuffer, buffer, oldLength);
if (writable) {
capacity = getCapacity();
} else {
// Switch from the read-only alias or shared heap buffer to the stack buffer.
if (!cloneArrayIfNeeded(US_STACKBUF_SIZE, US_STACKBUF_SIZE, /* doCopyArray= */ FALSE)) {
return *this;
}
U_ASSERT(fUnion.fFields.fLengthAndFlags & kUsingStackBuffer);
buffer = fUnion.fStackFields.fBuffer;
capacity = US_STACKBUF_SIZE;
}
newLength = stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR
buffer, capacity,
oldArray, oldLength, NULL, errorCode);
if (U_SUCCESS(errorCode)) {
setLength(newLength);
return *this;
} else if (errorCode == U_BUFFER_OVERFLOW_ERROR) {
// common overflow handling below
} else {
setToBogus();
return *this;
}
} else {
// Longer string or read-only buffer:
// Collect only changes and then apply them to this string.
// Case mapping often changes only small parts of a string,
// and often does not change its length.
oldArray = getArrayStart();
Edits edits;
UChar replacementChars[200];
stringCaseMapper(caseLocale, options | UCASEMAP_OMIT_UNCHANGED_TEXT, UCASEMAP_BREAK_ITERATOR
replacementChars, UPRV_LENGTHOF(replacementChars),
oldArray, oldLength, &edits, errorCode);
if (U_SUCCESS(errorCode)) {
// Grow the buffer at most once, not for multiple doReplace() calls.
newLength = oldLength + edits.lengthDelta();
if (newLength > oldLength && !cloneArrayIfNeeded(newLength, newLength)) {
return *this;
}
for (Edits::Iterator ei = edits.getCoarseChangesIterator(); ei.next(errorCode);) {
doReplace(ei.destinationIndex(), ei.oldLength(),
replacementChars, ei.replacementIndex(), ei.newLength());
}
if (U_FAILURE(errorCode)) {
setToBogus();
}
return *this;
} else if (errorCode == U_BUFFER_OVERFLOW_ERROR) {
// common overflow handling below
newLength = oldLength + edits.lengthDelta();
} else {
setToBogus();
return *this;
}
}
// Handle buffer overflow, newLength is known.
// We need to allocate a new buffer for the internal string case mapping function. // We need to allocate a new buffer for the internal string case mapping function.
// This is very similar to how doReplace() keeps the old array pointer // This is very similar to how doReplace() keeps the old array pointer
// and deletes the old array itself after it is done. // and deletes the old array itself after it is done.
// In addition, we are forcing cloneArrayIfNeeded() to always allocate a new array. // In addition, we are forcing cloneArrayIfNeeded() to always allocate a new array.
UChar oldStackBuffer[US_STACKBUF_SIZE];
UChar *oldArray;
int32_t oldLength;
if(fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) {
// copy the stack buffer contents because it will be overwritten
oldArray = oldStackBuffer;
oldLength = getShortLength();
u_memcpy(oldStackBuffer, fUnion.fStackFields.fBuffer, oldLength);
} else {
oldArray = getArrayStart();
oldLength = length();
}
int32_t capacity;
if(oldLength <= US_STACKBUF_SIZE) {
capacity = US_STACKBUF_SIZE;
} else {
capacity = oldLength + 20;
}
int32_t *bufferToDelete = 0; int32_t *bufferToDelete = 0;
if(!cloneArrayIfNeeded(capacity, capacity, FALSE, &bufferToDelete, TRUE)) { if (!cloneArrayIfNeeded(newLength, newLength, FALSE, &bufferToDelete, TRUE)) {
return *this; return *this;
} }
errorCode = U_ZERO_ERROR;
// Case-map, and if the result is too long, then reallocate and repeat. newLength = stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR
UErrorCode errorCode; getArrayStart(), getCapacity(),
int32_t newLength; oldArray, oldLength, NULL, errorCode);
do {
errorCode = U_ZERO_ERROR;
newLength = stringCaseMapper(csm, getArrayStart(), getCapacity(),
oldArray, oldLength, &errorCode);
setLength(newLength);
} while(errorCode==U_BUFFER_OVERFLOW_ERROR && cloneArrayIfNeeded(newLength, newLength, FALSE));
if (bufferToDelete) { if (bufferToDelete) {
uprv_free(bufferToDelete); uprv_free(bufferToDelete);
} }
if(U_FAILURE(errorCode)) { if (U_SUCCESS(errorCode)) {
setLength(newLength);
} else {
setToBogus(); setToBogus();
} }
return *this; return *this;
@ -144,10 +195,7 @@ UnicodeString::caseMap(const UCaseMap *csm,
UnicodeString & UnicodeString &
UnicodeString::foldCase(uint32_t options) { UnicodeString::foldCase(uint32_t options) {
UCaseMap csm=UCASEMAP_INITIALIZER; return caseMap(UCASE_LOC_ROOT, options, UCASEMAP_BREAK_ITERATOR_NULL ustrcase_internalFold);
csm.csp=ucase_getSingleton();
csm.options=options;
return caseMap(&csm, ustrcase_internalFold);
} }
U_NAMESPACE_END U_NAMESPACE_END

View File

@ -19,9 +19,9 @@
#include "unicode/utypes.h" #include "unicode/utypes.h"
#include "unicode/locid.h" #include "unicode/locid.h"
#include "unicode/ucasemap.h"
#include "unicode/unistr.h" #include "unicode/unistr.h"
#include "cmemory.h" #include "ucasemap_imp.h"
#include "ustr_imp.h"
U_NAMESPACE_BEGIN U_NAMESPACE_BEGIN
@ -29,44 +29,28 @@ U_NAMESPACE_BEGIN
// Write implementation // Write implementation
//======================================== //========================================
/*
* Set parameters on an empty UCaseMap, for UCaseMap-less API functions.
* Do this fast because it is called with every function call.
*/
static inline void
setTempCaseMap(UCaseMap *csm, const char *locale) {
if(csm->csp==NULL) {
csm->csp=ucase_getSingleton();
}
if(locale!=NULL && locale[0]==0) {
csm->locale[0]=0;
} else {
ustrcase_setTempCaseMapLocale(csm, locale);
}
}
UnicodeString & UnicodeString &
UnicodeString::toLower() { UnicodeString::toLower() {
return toLower(Locale::getDefault()); return caseMap(ustrcase_getCaseLocale(NULL), 0,
UCASEMAP_BREAK_ITERATOR_NULL ustrcase_internalToLower);
} }
UnicodeString & UnicodeString &
UnicodeString::toLower(const Locale &locale) { UnicodeString::toLower(const Locale &locale) {
UCaseMap csm=UCASEMAP_INITIALIZER; return caseMap(ustrcase_getCaseLocale(locale.getBaseName()), 0,
setTempCaseMap(&csm, locale.getName()); UCASEMAP_BREAK_ITERATOR_NULL ustrcase_internalToLower);
return caseMap(&csm, ustrcase_internalToLower);
} }
UnicodeString & UnicodeString &
UnicodeString::toUpper() { UnicodeString::toUpper() {
return toUpper(Locale::getDefault()); return caseMap(ustrcase_getCaseLocale(NULL), 0,
UCASEMAP_BREAK_ITERATOR_NULL ustrcase_internalToUpper);
} }
UnicodeString & UnicodeString &
UnicodeString::toUpper(const Locale &locale) { UnicodeString::toUpper(const Locale &locale) {
UCaseMap csm=UCASEMAP_INITIALIZER; return caseMap(ustrcase_getCaseLocale(locale.getBaseName()), 0,
setTempCaseMap(&csm, locale.getName()); UCASEMAP_BREAK_ITERATOR_NULL ustrcase_internalToUpper);
return caseMap(&csm, ustrcase_internalToUpper);
} }
U_NAMESPACE_END U_NAMESPACE_END

View File

@ -22,36 +22,10 @@
#if !UCONFIG_NO_BREAK_ITERATION #if !UCONFIG_NO_BREAK_ITERATION
#include "unicode/brkiter.h" #include "unicode/brkiter.h"
#include "unicode/ubrk.h" #include "unicode/locid.h"
#include "unicode/ucasemap.h"
#include "unicode/unistr.h" #include "unicode/unistr.h"
#include "unicode/ustring.h" #include "ucasemap_imp.h"
#include "cmemory.h"
#include "ustr_imp.h"
static int32_t U_CALLCONV
unistr_case_internalToTitle(const UCaseMap *csm,
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
UErrorCode *pErrorCode) {
ubrk_setText(csm->iter, src, srcLength, pErrorCode);
return ustrcase_internalToTitle(csm, dest, destCapacity, src, srcLength, pErrorCode);
}
/*
* Set parameters on an empty UCaseMap, for UCaseMap-less API functions.
* Do this fast because it is called with every function call.
*/
static inline void
setTempCaseMap(UCaseMap *csm, const char *locale) {
if(csm->csp==NULL) {
csm->csp=ucase_getSingleton();
}
if(locale!=NULL && locale[0]==0) {
csm->locale[0]=0;
} else {
ustrcase_setTempCaseMapLocale(csm, locale);
}
}
U_NAMESPACE_BEGIN U_NAMESPACE_BEGIN
@ -67,9 +41,6 @@ UnicodeString::toTitle(BreakIterator *titleIter, const Locale &locale) {
UnicodeString & UnicodeString &
UnicodeString::toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options) { UnicodeString::toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options) {
UCaseMap csm=UCASEMAP_INITIALIZER;
csm.options=options;
setTempCaseMap(&csm, locale.getName());
BreakIterator *bi=titleIter; BreakIterator *bi=titleIter;
if(bi==NULL) { if(bi==NULL) {
UErrorCode errorCode=U_ZERO_ERROR; UErrorCode errorCode=U_ZERO_ERROR;
@ -79,8 +50,8 @@ UnicodeString::toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t
return *this; return *this;
} }
} }
csm.iter=reinterpret_cast<UBreakIterator *>(bi); bi->setText(*this);
caseMap(&csm, unistr_case_internalToTitle); caseMap(ustrcase_getCaseLocale(locale.getBaseName()), options, bi, ustrcase_internalToTitle);
if(titleIter==NULL) { if(titleIter==NULL) {
delete bi; delete bi;
} }

View File

@ -145,7 +145,6 @@ unorm_cmpEquivFold(const UChar *s1, int32_t length1,
uint32_t options, uint32_t options,
UErrorCode *pErrorCode) { UErrorCode *pErrorCode) {
const Normalizer2Impl *nfcImpl; const Normalizer2Impl *nfcImpl;
const UCaseProps *csp;
/* current-level start/limit - s1/s2 as current */ /* current-level start/limit - s1/s2 as current */
const UChar *start1, *start2, *limit1, *limit2; const UChar *start1, *start2, *limit1, *limit2;
@ -183,11 +182,6 @@ unorm_cmpEquivFold(const UChar *s1, int32_t length1,
} else { } else {
nfcImpl=NULL; nfcImpl=NULL;
} }
if((options&U_COMPARE_IGNORE_CASE)!=0) {
csp=ucase_getSingleton();
} else {
csp=NULL;
}
if(U_FAILURE(*pErrorCode)) { if(U_FAILURE(*pErrorCode)) {
return 0; return 0;
} }
@ -319,7 +313,7 @@ unorm_cmpEquivFold(const UChar *s1, int32_t length1,
*/ */
if( level1==0 && (options&U_COMPARE_IGNORE_CASE) && if( level1==0 && (options&U_COMPARE_IGNORE_CASE) &&
(length=ucase_toFullFolding(csp, (UChar32)cp1, &p, options))>=0 (length=ucase_toFullFolding((UChar32)cp1, &p, options))>=0
) { ) {
/* cp1 case-folds to the code point "length" or to p[length] */ /* cp1 case-folds to the code point "length" or to p[length] */
if(U_IS_SURROGATE(c1)) { if(U_IS_SURROGATE(c1)) {
@ -364,7 +358,7 @@ unorm_cmpEquivFold(const UChar *s1, int32_t length1,
} }
if( level2==0 && (options&U_COMPARE_IGNORE_CASE) && if( level2==0 && (options&U_COMPARE_IGNORE_CASE) &&
(length=ucase_toFullFolding(csp, (UChar32)cp2, &p, options))>=0 (length=ucase_toFullFolding((UChar32)cp2, &p, options))>=0
) { ) {
/* cp2 case-folds to the code point "length" or to p[length] */ /* cp2 case-folds to the code point "length" or to p[length] */
if(U_IS_SURROGATE(c2)) { if(U_IS_SURROGATE(c2)) {

View File

@ -128,9 +128,8 @@ static UBool changesWhenCasefolded(const BinaryProperty &/*prop*/, UChar32 c, UP
} }
if(c>=0) { if(c>=0) {
/* single code point */ /* single code point */
const UCaseProps *csp=ucase_getSingleton();
const UChar *resultString; const UChar *resultString;
return (UBool)(ucase_toFullFolding(csp, c, &resultString, U_FOLD_CASE_DEFAULT)>=0); return (UBool)(ucase_toFullFolding(c, &resultString, U_FOLD_CASE_DEFAULT)>=0);
} else { } else {
/* guess some large but stack-friendly capacity */ /* guess some large but stack-friendly capacity */
UChar dest[2*UCASE_MAX_STRING_LENGTH]; UChar dest[2*UCASE_MAX_STRING_LENGTH];
@ -576,14 +575,13 @@ u_getFC_NFKC_Closure(UChar32 c, UChar *dest, int32_t destCapacity, UErrorCode *p
// case folding and NFKC.) // case folding and NFKC.)
// For the derivation, see Unicode's DerivedNormalizationProps.txt. // For the derivation, see Unicode's DerivedNormalizationProps.txt.
const Normalizer2 *nfkc=Normalizer2::getNFKCInstance(*pErrorCode); const Normalizer2 *nfkc=Normalizer2::getNFKCInstance(*pErrorCode);
const UCaseProps *csp=ucase_getSingleton();
if(U_FAILURE(*pErrorCode)) { if(U_FAILURE(*pErrorCode)) {
return 0; return 0;
} }
// first: b = NFKC(Fold(a)) // first: b = NFKC(Fold(a))
UnicodeString folded1String; UnicodeString folded1String;
const UChar *folded1; const UChar *folded1;
int32_t folded1Length=ucase_toFullFolding(csp, c, &folded1, U_FOLD_CASE_DEFAULT); int32_t folded1Length=ucase_toFullFolding(c, &folded1, U_FOLD_CASE_DEFAULT);
if(folded1Length<0) { if(folded1Length<0) {
const Normalizer2Impl *nfkcImpl=Normalizer2Factory::getImpl(nfkc); const Normalizer2Impl *nfkcImpl=Normalizer2Factory::getImpl(nfkc);
if(nfkcImpl->getCompQuickCheck(nfkcImpl->getNorm16(c))!=UNORM_NO) { if(nfkcImpl->getCompQuickCheck(nfkcImpl->getNorm16(c))!=UNORM_NO) {

View File

@ -18,23 +18,6 @@
#define __USTR_IMP_H__ #define __USTR_IMP_H__
#include "unicode/utypes.h" #include "unicode/utypes.h"
#include "unicode/uiter.h"
#include "ucase.h"
/** Simple declaration to avoid including unicode/ubrk.h. */
#ifndef UBRK_TYPEDEF_UBREAK_ITERATOR
# define UBRK_TYPEDEF_UBREAK_ITERATOR
typedef struct UBreakIterator UBreakIterator;
#endif
#ifndef U_COMPARE_IGNORE_CASE
/* see also unorm.h */
/**
* Option bit for unorm_compare:
* Perform case-insensitive comparison.
*/
#define U_COMPARE_IGNORE_CASE 0x10000
#endif
/** /**
* Internal option for unorm_cmpEquivFold() for strncmp style. * Internal option for unorm_cmpEquivFold() for strncmp style.
@ -53,211 +36,6 @@ uprv_strCompare(const UChar *s1, int32_t length1,
const UChar *s2, int32_t length2, const UChar *s2, int32_t length2,
UBool strncmpStyle, UBool codePointOrder); UBool strncmpStyle, UBool codePointOrder);
/**
* Internal API, used by u_strcasecmp() etc.
* Compare strings case-insensitively,
* in code point order or code unit order.
*/
U_CFUNC int32_t
u_strcmpFold(const UChar *s1, int32_t length1,
const UChar *s2, int32_t length2,
uint32_t options,
UErrorCode *pErrorCode);
/**
* Interanl API, used for detecting length of
* shared prefix case-insensitively.
* @param s1 input string 1
* @param length1 length of string 1, or -1 (NULL terminated)
* @param s2 input string 2
* @param length2 length of string 2, or -1 (NULL terminated)
* @param options compare options
* @param matchLen1 (output) length of partial prefix match in s1
* @param matchLen2 (output) length of partial prefix match in s2
* @param pErrorCode receives error status
*/
U_CAPI void
u_caseInsensitivePrefixMatch(const UChar *s1, int32_t length1,
const UChar *s2, int32_t length2,
uint32_t options,
int32_t *matchLen1, int32_t *matchLen2,
UErrorCode *pErrorCode);
/**
* Are the Unicode properties loaded?
* This must be used before internal functions are called that do
* not perform this check.
* Generate a debug assertion failure if data is not loaded.
*/
U_CFUNC UBool
uprv_haveProperties(UErrorCode *pErrorCode);
/**
* Load the Unicode property data.
* Intended primarily for use from u_init().
* Has no effect if property data is already loaded.
* NOT thread safe.
*/
/*U_CFUNC int8_t
uprv_loadPropsData(UErrorCode *errorCode);*/
/*
* Internal string casing functions implementing
* ustring.h/ustrcase.c and UnicodeString case mapping functions.
*/
struct UCaseMap {
const UCaseProps *csp;
#if !UCONFIG_NO_BREAK_ITERATION
UBreakIterator *iter; /* We adopt the iterator, so we own it. */
#endif
char locale[32];
int32_t locCache;
uint32_t options;
};
#ifndef __UCASEMAP_H__
typedef struct UCaseMap UCaseMap;
#endif
#if UCONFIG_NO_BREAK_ITERATION
# define UCASEMAP_INITIALIZER { NULL, { 0 }, 0, 0 }
#else
# define UCASEMAP_INITIALIZER { NULL, NULL, { 0 }, 0, 0 }
#endif
U_CFUNC void
ustrcase_setTempCaseMapLocale(UCaseMap *csm, const char *locale);
#ifndef U_STRING_CASE_MAPPER_DEFINED
#define U_STRING_CASE_MAPPER_DEFINED
/**
* String case mapping function type, used by ustrcase_map().
* All error checking must be done.
* The UCaseMap must be fully initialized, with locale and/or iter set as needed.
* src and dest must not overlap.
*/
typedef int32_t U_CALLCONV
UStringCaseMapper(const UCaseMap *csm,
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
UErrorCode *pErrorCode);
#endif
/** Implements UStringCaseMapper. */
U_CFUNC int32_t U_CALLCONV
ustrcase_internalToLower(const UCaseMap *csm,
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
UErrorCode *pErrorCode);
/** Implements UStringCaseMapper. */
U_CFUNC int32_t U_CALLCONV
ustrcase_internalToUpper(const UCaseMap *csm,
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
UErrorCode *pErrorCode);
#if !UCONFIG_NO_BREAK_ITERATION
/** Implements UStringCaseMapper. */
U_CFUNC int32_t U_CALLCONV
ustrcase_internalToTitle(const UCaseMap *csm,
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
UErrorCode *pErrorCode);
#endif
/** Implements UStringCaseMapper. */
U_CFUNC int32_t U_CALLCONV
ustrcase_internalFold(const UCaseMap *csm,
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
UErrorCode *pErrorCode);
/**
* Implements argument checking and buffer handling
* for string case mapping as a common function.
*/
U_CFUNC int32_t
ustrcase_map(const UCaseMap *csm,
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
UStringCaseMapper *stringCaseMapper,
UErrorCode *pErrorCode);
/**
* UTF-8 string case mapping function type, used by ucasemap_mapUTF8().
* UTF-8 version of UStringCaseMapper.
* All error checking must be done.
* The UCaseMap must be fully initialized, with locale and/or iter set as needed.
* src and dest must not overlap.
*/
typedef int32_t U_CALLCONV
UTF8CaseMapper(const UCaseMap *csm,
uint8_t *dest, int32_t destCapacity,
const uint8_t *src, int32_t srcLength,
UErrorCode *pErrorCode);
/** Implements UTF8CaseMapper. */
U_CFUNC int32_t U_CALLCONV
ucasemap_internalUTF8ToTitle(const UCaseMap *csm,
uint8_t *dest, int32_t destCapacity,
const uint8_t *src, int32_t srcLength,
UErrorCode *pErrorCode);
/**
* Implements argument checking and buffer handling
* for UTF-8 string case mapping as a common function.
*/
U_CFUNC int32_t
ucasemap_mapUTF8(const UCaseMap *csm,
uint8_t *dest, int32_t destCapacity,
const uint8_t *src, int32_t srcLength,
UTF8CaseMapper *stringCaseMapper,
UErrorCode *pErrorCode);
#ifdef __cplusplus
U_NAMESPACE_BEGIN
namespace GreekUpper {
// Data bits.
static const uint32_t UPPER_MASK = 0x3ff;
static const uint32_t HAS_VOWEL = 0x1000;
static const uint32_t HAS_YPOGEGRAMMENI = 0x2000;
static const uint32_t HAS_ACCENT = 0x4000;
static const uint32_t HAS_DIALYTIKA = 0x8000;
// Further bits during data building and processing, not stored in the data map.
static const uint32_t HAS_COMBINING_DIALYTIKA = 0x10000;
static const uint32_t HAS_OTHER_GREEK_DIACRITIC = 0x20000;
static const uint32_t HAS_VOWEL_AND_ACCENT = HAS_VOWEL | HAS_ACCENT;
static const uint32_t HAS_VOWEL_AND_ACCENT_AND_DIALYTIKA =
HAS_VOWEL_AND_ACCENT | HAS_DIALYTIKA;
static const uint32_t HAS_EITHER_DIALYTIKA = HAS_DIALYTIKA | HAS_COMBINING_DIALYTIKA;
// State bits.
static const uint32_t AFTER_CASED = 1;
static const uint32_t AFTER_VOWEL_WITH_ACCENT = 2;
uint32_t getLetterData(UChar32 c);
/**
* Returns a non-zero value for each of the Greek combining diacritics
* listed in The Unicode Standard, version 8, chapter 7.2 Greek,
* plus some perispomeni look-alikes.
*/
uint32_t getDiacriticData(UChar32 c);
} // namespace GreekUpper
U_NAMESPACE_END
#endif // __cplusplus
U_CAPI int32_t U_EXPORT2 U_CAPI int32_t U_EXPORT2
ustr_hashUCharsN(const UChar *str, int32_t length); ustr_hashUCharsN(const UChar *str, int32_t length);

View File

@ -22,31 +22,18 @@
#if !UCONFIG_NO_BREAK_ITERATION #if !UCONFIG_NO_BREAK_ITERATION
#include "unicode/brkiter.h" #include "unicode/brkiter.h"
#include "unicode/casemap.h"
#include "unicode/localpointer.h"
#include "unicode/ubrk.h" #include "unicode/ubrk.h"
#include "unicode/ucasemap.h" #include "unicode/ucasemap.h"
#include "cmemory.h" #include "cmemory.h"
#include "ucase.h" #include "ucase.h"
#include "ustr_imp.h" #include "ucasemap_imp.h"
U_NAMESPACE_USE
/* functions available in the common library (for unistr_case.cpp) */ /* functions available in the common library (for unistr_case.cpp) */
/*
* Set parameters on an empty UCaseMap, for UCaseMap-less API functions.
* Do this fast because it is called with every function call.
* Duplicate of the same function in ustrcase.cpp, to keep it inline.
*/
static inline void
setTempCaseMap(UCaseMap *csm, const char *locale) {
if(csm->csp==NULL) {
csm->csp=ucase_getSingleton();
}
if(locale!=NULL && locale[0]==0) {
csm->locale[0]=0;
} else {
ustrcase_setTempCaseMapLocale(csm, locale);
}
}
/* public API functions */ /* public API functions */
U_CAPI int32_t U_EXPORT2 U_CAPI int32_t U_EXPORT2
@ -55,39 +42,73 @@ u_strToTitle(UChar *dest, int32_t destCapacity,
UBreakIterator *titleIter, UBreakIterator *titleIter,
const char *locale, const char *locale,
UErrorCode *pErrorCode) { UErrorCode *pErrorCode) {
UCaseMap csm=UCASEMAP_INITIALIZER; LocalPointer<BreakIterator> ownedIter;
setTempCaseMap(&csm, locale); BreakIterator *iter;
if(titleIter!=NULL) { if(titleIter!=NULL) {
ubrk_setText(csm.iter=titleIter, src, srcLength, pErrorCode); iter=reinterpret_cast<BreakIterator *>(titleIter);
} else { } else {
csm.iter=ubrk_open(UBRK_WORD, csm.locale, src, srcLength, pErrorCode); iter=BreakIterator::createWordInstance(Locale(locale), *pErrorCode);
ownedIter.adoptInstead(iter);
} }
int32_t length=ustrcase_map( if(U_FAILURE(*pErrorCode)) {
&csm, return 0;
}
UnicodeString s(srcLength<0, src, srcLength);
iter->setText(s);
return ustrcase_mapWithOverlap(
ustrcase_getCaseLocale(locale), 0, iter,
dest, destCapacity, dest, destCapacity,
src, srcLength, src, srcLength,
ustrcase_internalToTitle, pErrorCode); ustrcase_internalToTitle, *pErrorCode);
if(titleIter==NULL && csm.iter!=NULL) {
ubrk_close(csm.iter);
}
return length;
} }
U_NAMESPACE_BEGIN
int32_t CaseMap::toTitle(
const char *locale, uint32_t options, BreakIterator *iter,
const UChar *src, int32_t srcLength,
UChar *dest, int32_t destCapacity, Edits *edits,
UErrorCode &errorCode) {
LocalPointer<BreakIterator> ownedIter;
if(iter==NULL) {
iter=BreakIterator::createWordInstance(Locale(locale), errorCode);
ownedIter.adoptInstead(iter);
}
if(U_FAILURE(errorCode)) {
return 0;
}
UnicodeString s(srcLength<0, src, srcLength);
iter->setText(s);
return ustrcase_map(
ustrcase_getCaseLocale(locale), options, iter,
dest, destCapacity,
src, srcLength,
ustrcase_internalToTitle, edits, errorCode);
}
U_NAMESPACE_END
U_CAPI int32_t U_EXPORT2 U_CAPI int32_t U_EXPORT2
ucasemap_toTitle(UCaseMap *csm, ucasemap_toTitle(UCaseMap *csm,
UChar *dest, int32_t destCapacity, UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength, const UChar *src, int32_t srcLength,
UErrorCode *pErrorCode) { UErrorCode *pErrorCode) {
if(csm->iter!=NULL) { if (U_FAILURE(*pErrorCode)) {
ubrk_setText(csm->iter, src, srcLength, pErrorCode); return 0;
} else {
csm->iter=ubrk_open(UBRK_WORD, csm->locale, src, srcLength, pErrorCode);
} }
if (csm->iter == NULL) {
csm->iter = BreakIterator::createWordInstance(Locale(csm->locale), *pErrorCode);
}
if (U_FAILURE(*pErrorCode)) {
return 0;
}
UnicodeString s(srcLength<0, src, srcLength);
csm->iter->setText(s);
return ustrcase_map( return ustrcase_map(
csm, csm->caseLocale, csm->options, csm->iter,
dest, destCapacity, dest, destCapacity,
src, srcLength, src, srcLength,
ustrcase_internalToTitle, pErrorCode); ustrcase_internalToTitle, NULL, *pErrorCode);
} }
#endif // !UCONFIG_NO_BREAK_ITERATION #endif // !UCONFIG_NO_BREAK_ITERATION

View File

@ -22,6 +22,8 @@
#include "unicode/utypes.h" #include "unicode/utypes.h"
#include "unicode/brkiter.h" #include "unicode/brkiter.h"
#include "unicode/casemap.h"
#include "unicode/edits.h"
#include "unicode/ustring.h" #include "unicode/ustring.h"
#include "unicode/ucasemap.h" #include "unicode/ucasemap.h"
#include "unicode/ubrk.h" #include "unicode/ubrk.h"
@ -29,9 +31,30 @@
#include "unicode/utf16.h" #include "unicode/utf16.h"
#include "cmemory.h" #include "cmemory.h"
#include "ucase.h" #include "ucase.h"
#include "ucasemap_imp.h"
#include "ustr_imp.h" #include "ustr_imp.h"
#include "uassert.h" #include "uassert.h"
U_NAMESPACE_BEGIN
namespace {
int32_t checkOverflowAndEditsError(int32_t destIndex, int32_t destCapacity,
Edits *edits, UErrorCode &errorCode) {
if (U_SUCCESS(errorCode)) {
if (destIndex > destCapacity) {
errorCode = U_BUFFER_OVERFLOW_ERROR;
} else if (edits != NULL) {
edits->copyErrorTo(errorCode);
}
}
return destIndex;
}
} // namespace
U_NAMESPACE_END
U_NAMESPACE_USE U_NAMESPACE_USE
/* string casing ------------------------------------------------------------ */ /* string casing ------------------------------------------------------------ */
@ -39,21 +62,43 @@ U_NAMESPACE_USE
/* Appends a full case mapping result, see UCASE_MAX_STRING_LENGTH. */ /* Appends a full case mapping result, see UCASE_MAX_STRING_LENGTH. */
static inline int32_t static inline int32_t
appendResult(UChar *dest, int32_t destIndex, int32_t destCapacity, appendResult(UChar *dest, int32_t destIndex, int32_t destCapacity,
int32_t result, const UChar *s) { int32_t result, const UChar *s,
int32_t cpLength, uint32_t options, icu::Edits *edits) {
UChar32 c; UChar32 c;
int32_t length; int32_t length;
/* decode the result */ /* decode the result */
if(result<0) { if(result<0) {
/* (not) original code point */ /* (not) original code point */
if(edits!=NULL) {
edits->addUnchanged(cpLength);
if(options & UCASEMAP_OMIT_UNCHANGED_TEXT) {
return destIndex;
}
}
c=~result; c=~result;
length=U16_LENGTH(c); if(destIndex<destCapacity && c<=0xffff) { // BMP slightly-fastpath
} else if(result<=UCASE_MAX_STRING_LENGTH) { dest[destIndex++]=(UChar)c;
c=U_SENTINEL; return destIndex;
length=result; }
length=cpLength;
} else { } else {
c=result; if(result<=UCASE_MAX_STRING_LENGTH) {
length=U16_LENGTH(c); c=U_SENTINEL;
length=result;
} else if(destIndex<destCapacity && result<=0xffff) { // BMP slightly-fastpath
dest[destIndex++]=(UChar)result;
if(edits!=NULL) {
edits->addReplace(cpLength, 1);
}
return destIndex;
} else {
c=result;
length=U16_LENGTH(c);
}
if(edits!=NULL) {
edits->addReplace(cpLength, length);
}
} }
if(length>(INT32_MAX-destIndex)) { if(length>(INT32_MAX-destIndex)) {
return -1; // integer overflow return -1; // integer overflow
@ -99,9 +144,15 @@ appendUChar(UChar *dest, int32_t destIndex, int32_t destCapacity, UChar c) {
} }
static inline int32_t static inline int32_t
appendString(UChar *dest, int32_t destIndex, int32_t destCapacity, appendUnchanged(UChar *dest, int32_t destIndex, int32_t destCapacity,
const UChar *s, int32_t length) { const UChar *s, int32_t length, uint32_t options, icu::Edits *edits) {
if(length>0) { if(length>0) {
if(edits!=NULL) {
edits->addUnchanged(length);
if(options & UCASEMAP_OMIT_UNCHANGED_TEXT) {
return destIndex;
}
}
if(length>(INT32_MAX-destIndex)) { if(length>(INT32_MAX-destIndex)) {
return -1; // integer overflow return -1; // integer overflow
} }
@ -150,84 +201,66 @@ utf16_caseContextIterator(void *context, int8_t dir) {
* context [0..srcLength[ into account. * context [0..srcLength[ into account.
*/ */
static int32_t static int32_t
_caseMap(const UCaseMap *csm, UCaseMapFull *map, _caseMap(int32_t caseLocale, uint32_t options, UCaseMapFull *map,
UChar *dest, int32_t destCapacity, UChar *dest, int32_t destCapacity,
const UChar *src, UCaseContext *csc, const UChar *src, UCaseContext *csc,
int32_t srcStart, int32_t srcLimit, int32_t srcStart, int32_t srcLimit,
UErrorCode *pErrorCode) { icu::Edits *edits,
const UChar *s; UErrorCode &errorCode) {
UChar32 c, c2 = 0;
int32_t srcIndex, destIndex;
int32_t locCache;
locCache=csm->locCache;
/* case mapping loop */ /* case mapping loop */
srcIndex=srcStart; int32_t srcIndex=srcStart;
destIndex=0; int32_t destIndex=0;
while(srcIndex<srcLimit) { while(srcIndex<srcLimit) {
csc->cpStart=srcIndex; int32_t cpStart;
csc->cpStart=cpStart=srcIndex;
UChar32 c;
U16_NEXT(src, srcIndex, srcLimit, c); U16_NEXT(src, srcIndex, srcLimit, c);
csc->cpLimit=srcIndex; csc->cpLimit=srcIndex;
c=map(csm->csp, c, utf16_caseContextIterator, csc, &s, csm->locale, &locCache); const UChar *s;
if((destIndex<destCapacity) && (c<0 ? (c2=~c)<=0xffff : UCASE_MAX_STRING_LENGTH<c && (c2=c)<=0xffff)) { c=map(c, utf16_caseContextIterator, csc, &s, caseLocale);
/* fast path version of appendResult() for BMP results */ destIndex = appendResult(dest, destIndex, destCapacity, c, s,
dest[destIndex++]=(UChar)c2; srcIndex - cpStart, options, edits);
} else { if (destIndex < 0) {
destIndex=appendResult(dest, destIndex, destCapacity, c, s); errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
if(destIndex<0) { return 0;
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
}
} }
} }
if(destIndex>destCapacity) {
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
}
return destIndex; return destIndex;
} }
#if !UCONFIG_NO_BREAK_ITERATION #if !UCONFIG_NO_BREAK_ITERATION
U_CFUNC int32_t U_CALLCONV U_CFUNC int32_t U_CALLCONV
ustrcase_internalToTitle(const UCaseMap *csm, ustrcase_internalToTitle(int32_t caseLocale, uint32_t options, BreakIterator *iter,
UChar *dest, int32_t destCapacity, UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength, const UChar *src, int32_t srcLength,
UErrorCode *pErrorCode) { icu::Edits *edits,
const UChar *s; UErrorCode &errorCode) {
UChar32 c; if(U_FAILURE(errorCode)) {
int32_t prev, titleStart, titleLimit, idx, destIndex;
UBool isFirstIndex;
if(U_FAILURE(*pErrorCode)) {
return 0; return 0;
} }
// Use the C++ abstract base class to minimize dependencies.
// TODO: Change UCaseMap.iter to store a BreakIterator directly.
BreakIterator *bi=reinterpret_cast<BreakIterator *>(csm->iter);
/* set up local variables */ /* set up local variables */
int32_t locCache=csm->locCache;
UCaseContext csc=UCASECONTEXT_INITIALIZER; UCaseContext csc=UCASECONTEXT_INITIALIZER;
csc.p=(void *)src; csc.p=(void *)src;
csc.limit=srcLength; csc.limit=srcLength;
destIndex=0; int32_t destIndex=0;
prev=0; int32_t prev=0;
isFirstIndex=TRUE; UBool isFirstIndex=TRUE;
/* titlecasing loop */ /* titlecasing loop */
while(prev<srcLength) { while(prev<srcLength) {
/* find next index where to titlecase */ /* find next index where to titlecase */
int32_t index;
if(isFirstIndex) { if(isFirstIndex) {
isFirstIndex=FALSE; isFirstIndex=FALSE;
idx=bi->first(); index=iter->first();
} else { } else {
idx=bi->next(); index=iter->next();
} }
if(idx==UBRK_DONE || idx>srcLength) { if(index==UBRK_DONE || index>srcLength) {
idx=srcLength; index=srcLength;
} }
/* /*
@ -243,29 +276,32 @@ ustrcase_internalToTitle(const UCaseMap *csm,
* b) first case letter (titlecase) [titleStart..titleLimit[ * b) first case letter (titlecase) [titleStart..titleLimit[
* c) subsequent characters (lowercase) [titleLimit..index[ * c) subsequent characters (lowercase) [titleLimit..index[
*/ */
if(prev<idx) { if(prev<index) {
/* find and copy uncased characters [prev..titleStart[ */ /* find and copy uncased characters [prev..titleStart[ */
titleStart=titleLimit=prev; int32_t titleStart=prev;
U16_NEXT(src, titleLimit, idx, c); int32_t titleLimit=prev;
if((csm->options&U_TITLECASE_NO_BREAK_ADJUSTMENT)==0 && UCASE_NONE==ucase_getType(csm->csp, c)) { UChar32 c;
U16_NEXT(src, titleLimit, index, c);
if((options&U_TITLECASE_NO_BREAK_ADJUSTMENT)==0 && UCASE_NONE==ucase_getType(c)) {
/* Adjust the titlecasing index (titleStart) to the next cased character. */ /* Adjust the titlecasing index (titleStart) to the next cased character. */
for(;;) { for(;;) {
titleStart=titleLimit; titleStart=titleLimit;
if(titleLimit==idx) { if(titleLimit==index) {
/* /*
* only uncased characters in [prev..index[ * only uncased characters in [prev..index[
* stop with titleStart==titleLimit==index * stop with titleStart==titleLimit==index
*/ */
break; break;
} }
U16_NEXT(src, titleLimit, idx, c); U16_NEXT(src, titleLimit, index, c);
if(UCASE_NONE!=ucase_getType(csm->csp, c)) { if(UCASE_NONE!=ucase_getType(c)) {
break; /* cased letter at [titleStart..titleLimit[ */ break; /* cased letter at [titleStart..titleLimit[ */
} }
} }
destIndex=appendString(dest, destIndex, destCapacity, src+prev, titleStart-prev); destIndex=appendUnchanged(dest, destIndex, destCapacity,
src+prev, titleStart-prev, options, edits);
if(destIndex<0) { if(destIndex<0) {
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
return 0; return 0;
} }
} }
@ -274,48 +310,64 @@ ustrcase_internalToTitle(const UCaseMap *csm,
/* titlecase c which is from [titleStart..titleLimit[ */ /* titlecase c which is from [titleStart..titleLimit[ */
csc.cpStart=titleStart; csc.cpStart=titleStart;
csc.cpLimit=titleLimit; csc.cpLimit=titleLimit;
c=ucase_toFullTitle(csm->csp, c, utf16_caseContextIterator, &csc, &s, csm->locale, &locCache); const UChar *s;
destIndex=appendResult(dest, destIndex, destCapacity, c, s); c=ucase_toFullTitle(c, utf16_caseContextIterator, &csc, &s, caseLocale);
destIndex=appendResult(dest, destIndex, destCapacity, c, s,
titleLimit-titleStart, options, edits);
if(destIndex<0) { if(destIndex<0) {
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
return 0; return 0;
} }
/* Special case Dutch IJ titlecasing */ /* Special case Dutch IJ titlecasing */
if (titleStart+1 < idx && if (titleStart+1 < index &&
ucase_getCaseLocale(csm->locale,&locCache) == UCASE_LOC_DUTCH && caseLocale == UCASE_LOC_DUTCH &&
(src[titleStart] == 0x0049 || src[titleStart] == 0x0069) && (src[titleStart] == 0x0049 || src[titleStart] == 0x0069)) {
(src[titleStart+1] == 0x004A || src[titleStart+1] == 0x006A)) { if (src[titleStart+1] == 0x006A) {
destIndex=appendUChar(dest, destIndex, destCapacity, 0x004A); destIndex=appendUChar(dest, destIndex, destCapacity, 0x004A);
if(destIndex<0) { if(destIndex<0) {
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
return 0; return 0;
}
if(edits!=NULL) {
edits->addReplace(1, 1);
}
titleLimit++;
} else if (src[titleStart+1] == 0x004A) {
// Keep the capital J from getting lowercased.
destIndex=appendUnchanged(dest, destIndex, destCapacity,
src+titleStart+1, 1, options, edits);
if(destIndex<0) {
errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
}
titleLimit++;
} }
titleLimit++;
} }
/* lowercase [titleLimit..index[ */ /* lowercase [titleLimit..index[ */
if(titleLimit<idx) { if(titleLimit<index) {
if((csm->options&U_TITLECASE_NO_LOWERCASE)==0) { if((options&U_TITLECASE_NO_LOWERCASE)==0) {
/* Normal operation: Lowercase the rest of the word. */ /* Normal operation: Lowercase the rest of the word. */
destIndex+= destIndex+=
_caseMap( _caseMap(
csm, ucase_toFullLower, caseLocale, options, ucase_toFullLower,
dest+destIndex, destCapacity-destIndex, dest+destIndex, destCapacity-destIndex,
src, &csc, src, &csc,
titleLimit, idx, titleLimit, index,
pErrorCode); edits, errorCode);
if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
*pErrorCode=U_ZERO_ERROR; errorCode=U_ZERO_ERROR;
} }
if(U_FAILURE(*pErrorCode)) { if(U_FAILURE(errorCode)) {
return destIndex; return destIndex;
} }
} else { } else {
/* Optionally just copy the rest of the word unchanged. */ /* Optionally just copy the rest of the word unchanged. */
destIndex=appendString(dest, destIndex, destCapacity, src+titleLimit, idx-titleLimit); destIndex=appendUnchanged(dest, destIndex, destCapacity,
src+titleLimit, index-titleLimit, options, edits);
if(destIndex<0) { if(destIndex<0) {
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
return 0; return 0;
} }
} }
@ -323,13 +375,10 @@ ustrcase_internalToTitle(const UCaseMap *csm,
} }
} }
prev=idx; prev=index;
} }
if(destIndex>destCapacity) { return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
}
return destIndex;
} }
#endif // !UCONFIG_NO_BREAK_ITERATION #endif // !UCONFIG_NO_BREAK_ITERATION
@ -791,11 +840,11 @@ uint32_t getDiacriticData(UChar32 c) {
} }
} }
UBool isFollowedByCasedLetter(const UCaseProps *csp, const UChar *s, int32_t i, int32_t length) { UBool isFollowedByCasedLetter(const UChar *s, int32_t i, int32_t length) {
while (i < length) { while (i < length) {
UChar32 c; UChar32 c;
U16_NEXT(s, i, length, c); U16_NEXT(s, i, length, c);
int32_t type = ucase_getTypeOrIgnorable(csp, c); int32_t type = ucase_getTypeOrIgnorable(c);
if ((type & UCASE_IGNORABLE) != 0) { if ((type & UCASE_IGNORABLE) != 0) {
// Case-ignorable, continue with the loop. // Case-ignorable, continue with the loop.
} else if (type != UCASE_NONE) { } else if (type != UCASE_NONE) {
@ -813,11 +862,11 @@ UBool isFollowedByCasedLetter(const UCaseProps *csp, const UChar *s, int32_t i,
* for each character. * for each character.
* TODO: Try to re-consolidate one way or another with the non-Greek function. * TODO: Try to re-consolidate one way or another with the non-Greek function.
*/ */
int32_t toUpper(const UCaseMap *csm, int32_t toUpper(uint32_t options,
UChar *dest, int32_t destCapacity, UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength, const UChar *src, int32_t srcLength,
UErrorCode *pErrorCode) { Edits *edits,
int32_t locCache = UCASE_LOC_GREEK; UErrorCode &errorCode) {
int32_t destIndex=0; int32_t destIndex=0;
uint32_t state = 0; uint32_t state = 0;
for (int32_t i = 0; i < srcLength;) { for (int32_t i = 0; i < srcLength;) {
@ -825,7 +874,7 @@ int32_t toUpper(const UCaseMap *csm,
UChar32 c; UChar32 c;
U16_NEXT(src, nextIndex, srcLength, c); U16_NEXT(src, nextIndex, srcLength, c);
uint32_t nextState = 0; uint32_t nextState = 0;
int32_t type = ucase_getTypeOrIgnorable(csm->csp, c); int32_t type = ucase_getTypeOrIgnorable(c);
if ((type & UCASE_IGNORABLE) != 0) { if ((type & UCASE_IGNORABLE) != 0) {
// c is case-ignorable // c is case-ignorable
nextState |= (state & AFTER_CASED); nextState |= (state & AFTER_CASED);
@ -872,7 +921,7 @@ int32_t toUpper(const UCaseMap *csm,
(data & HAS_ACCENT) != 0 && (data & HAS_ACCENT) != 0 &&
numYpogegrammeni == 0 && numYpogegrammeni == 0 &&
(state & AFTER_CASED) == 0 && (state & AFTER_CASED) == 0 &&
!isFollowedByCasedLetter(csm->csp, src, nextIndex, srcLength)) { !isFollowedByCasedLetter(src, nextIndex, srcLength)) {
// Keep disjunctive "or" with (only) a tonos. // Keep disjunctive "or" with (only) a tonos.
// We use the same "word boundary" conditions as for the Final_Sigma test. // We use the same "word boundary" conditions as for the Final_Sigma test.
if (i == nextIndex) { if (i == nextIndex) {
@ -890,44 +939,68 @@ int32_t toUpper(const UCaseMap *csm,
data &= ~HAS_EITHER_DIALYTIKA; data &= ~HAS_EITHER_DIALYTIKA;
} }
} }
destIndex=appendUChar(dest, destIndex, destCapacity, (UChar)upper);
if (destIndex >= 0 && (data & HAS_EITHER_DIALYTIKA) != 0) { UBool change = TRUE;
destIndex=appendUChar(dest, destIndex, destCapacity, 0x308); // restore or add a dialytika if (edits != NULL) {
// Find out first whether we are changing the text.
change = src[i] != upper || numYpogegrammeni > 0;
int32_t i2 = i + 1;
if ((data & HAS_EITHER_DIALYTIKA) != 0) {
change |= i2 >= nextIndex || src[i2] != 0x308;
++i2;
}
if (addTonos) {
change |= i2 >= nextIndex || src[i2] != 0x301;
++i2;
}
int32_t oldLength = nextIndex - i;
int32_t newLength = (i2 - i) + numYpogegrammeni;
change |= oldLength != newLength;
if (change) {
if (edits != NULL) {
edits->addReplace(oldLength, newLength);
}
} else {
if (edits != NULL) {
edits->addUnchanged(oldLength);
}
// Write unchanged text?
change = (options & UCASEMAP_OMIT_UNCHANGED_TEXT) == 0;
}
} }
if (destIndex >= 0 && addTonos) {
destIndex=appendUChar(dest, destIndex, destCapacity, 0x301); if (change) {
} destIndex=appendUChar(dest, destIndex, destCapacity, (UChar)upper);
while (destIndex >= 0 && numYpogegrammeni > 0) { if (destIndex >= 0 && (data & HAS_EITHER_DIALYTIKA) != 0) {
destIndex=appendUChar(dest, destIndex, destCapacity, 0x399); destIndex=appendUChar(dest, destIndex, destCapacity, 0x308); // restore or add a dialytika
--numYpogegrammeni; }
} if (destIndex >= 0 && addTonos) {
if(destIndex<0) { destIndex=appendUChar(dest, destIndex, destCapacity, 0x301);
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; }
return 0; while (destIndex >= 0 && numYpogegrammeni > 0) {
destIndex=appendUChar(dest, destIndex, destCapacity, 0x399);
--numYpogegrammeni;
}
if(destIndex<0) {
errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
}
} }
} else { } else {
const UChar *s; const UChar *s;
UChar32 c2 = 0; c=ucase_toFullUpper(c, NULL, NULL, &s, UCASE_LOC_GREEK);
c=ucase_toFullUpper(csm->csp, c, NULL, NULL, &s, csm->locale, &locCache); destIndex = appendResult(dest, destIndex, destCapacity, c, s,
if((destIndex<destCapacity) && (c<0 ? (c2=~c)<=0xffff : UCASE_MAX_STRING_LENGTH<c && (c2=c)<=0xffff)) { nextIndex - i, options, edits);
/* fast path version of appendResult() for BMP results */ if (destIndex < 0) {
dest[destIndex++]=(UChar)c2; errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
} else { return 0;
destIndex=appendResult(dest, destIndex, destCapacity, c, s);
if(destIndex<0) {
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
}
} }
} }
i = nextIndex; i = nextIndex;
state = nextState; state = nextState;
} }
if(destIndex>destCapacity) { return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
}
return destIndex;
} }
} // namespace GreekUpper } // namespace GreekUpper
@ -936,94 +1009,79 @@ U_NAMESPACE_END
/* functions available in the common library (for unistr_case.cpp) */ /* functions available in the common library (for unistr_case.cpp) */
U_CFUNC int32_t U_CALLCONV U_CFUNC int32_t U_CALLCONV
ustrcase_internalToLower(const UCaseMap *csm, ustrcase_internalToLower(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED
UChar *dest, int32_t destCapacity, UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength, const UChar *src, int32_t srcLength,
UErrorCode *pErrorCode) { icu::Edits *edits,
UErrorCode &errorCode) {
UCaseContext csc=UCASECONTEXT_INITIALIZER; UCaseContext csc=UCASECONTEXT_INITIALIZER;
csc.p=(void *)src; csc.p=(void *)src;
csc.limit=srcLength; csc.limit=srcLength;
return _caseMap( int32_t destIndex = _caseMap(
csm, ucase_toFullLower, caseLocale, options, ucase_toFullLower,
dest, destCapacity, dest, destCapacity,
src, &csc, 0, srcLength, src, &csc, 0, srcLength,
pErrorCode); edits, errorCode);
return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
} }
U_CFUNC int32_t U_CALLCONV U_CFUNC int32_t U_CALLCONV
ustrcase_internalToUpper(const UCaseMap *csm, ustrcase_internalToUpper(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED
UChar *dest, int32_t destCapacity, UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength, const UChar *src, int32_t srcLength,
UErrorCode *pErrorCode) { icu::Edits *edits,
int32_t locCache = csm->locCache; UErrorCode &errorCode) {
if (ucase_getCaseLocale(csm->locale, &locCache) == UCASE_LOC_GREEK) { if (caseLocale == UCASE_LOC_GREEK) {
return GreekUpper::toUpper(csm, dest, destCapacity, src, srcLength, pErrorCode); return GreekUpper::toUpper(options, dest, destCapacity, src, srcLength, edits, errorCode);
} }
UCaseContext csc=UCASECONTEXT_INITIALIZER; UCaseContext csc=UCASECONTEXT_INITIALIZER;
csc.p=(void *)src; csc.p=(void *)src;
csc.limit=srcLength; csc.limit=srcLength;
return _caseMap( int32_t destIndex = _caseMap(
csm, ucase_toFullUpper, caseLocale, options, ucase_toFullUpper,
dest, destCapacity, dest, destCapacity,
src, &csc, 0, srcLength, src, &csc, 0, srcLength,
pErrorCode); edits, errorCode);
return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
} }
static int32_t U_CFUNC int32_t U_CALLCONV
ustr_foldCase(const UCaseProps *csp, ustrcase_internalFold(int32_t /* caseLocale */, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED
UChar *dest, int32_t destCapacity, UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength, const UChar *src, int32_t srcLength,
uint32_t options, icu::Edits *edits,
UErrorCode *pErrorCode) { UErrorCode &errorCode) {
int32_t srcIndex, destIndex;
const UChar *s;
UChar32 c, c2 = 0;
/* case mapping loop */ /* case mapping loop */
srcIndex=destIndex=0; int32_t srcIndex = 0;
while(srcIndex<srcLength) { int32_t destIndex = 0;
while (srcIndex < srcLength) {
int32_t cpStart = srcIndex;
UChar32 c;
U16_NEXT(src, srcIndex, srcLength, c); U16_NEXT(src, srcIndex, srcLength, c);
c=ucase_toFullFolding(csp, c, &s, options); const UChar *s;
if((destIndex<destCapacity) && (c<0 ? (c2=~c)<=0xffff : UCASE_MAX_STRING_LENGTH<c && (c2=c)<=0xffff)) { c = ucase_toFullFolding(c, &s, options);
/* fast path version of appendResult() for BMP results */ destIndex = appendResult(dest, destIndex, destCapacity, c, s,
dest[destIndex++]=(UChar)c2; srcIndex - cpStart, options, edits);
} else { if (destIndex < 0) {
destIndex=appendResult(dest, destIndex, destCapacity, c, s); errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
if(destIndex<0) { return 0;
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
}
} }
} }
if(destIndex>destCapacity) { return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
}
return destIndex;
}
U_CFUNC int32_t U_CALLCONV
ustrcase_internalFold(const UCaseMap *csm,
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
UErrorCode *pErrorCode) {
return ustr_foldCase(csm->csp, dest, destCapacity, src, srcLength, csm->options, pErrorCode);
} }
U_CFUNC int32_t U_CFUNC int32_t
ustrcase_map(const UCaseMap *csm, ustrcase_map(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
UChar *dest, int32_t destCapacity, UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength, const UChar *src, int32_t srcLength,
UStringCaseMapper *stringCaseMapper, UStringCaseMapper *stringCaseMapper,
UErrorCode *pErrorCode) { icu::Edits *edits,
UChar buffer[300]; UErrorCode &errorCode) {
UChar *temp;
int32_t destLength; int32_t destLength;
/* check argument values */ /* check argument values */
if(U_FAILURE(*pErrorCode)) { if(U_FAILURE(errorCode)) {
return 0; return 0;
} }
if( destCapacity<0 || if( destCapacity<0 ||
@ -1031,7 +1089,53 @@ ustrcase_map(const UCaseMap *csm,
src==NULL || src==NULL ||
srcLength<-1 srcLength<-1
) { ) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; errorCode=U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
/* get the string length */
if(srcLength==-1) {
srcLength=u_strlen(src);
}
/* check for overlapping source and destination */
if( dest!=NULL &&
((src>=dest && src<(dest+destCapacity)) ||
(dest>=src && dest<(src+srcLength)))
) {
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
if(edits!=NULL) {
edits->reset();
}
destLength=stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR
dest, destCapacity, src, srcLength, edits, errorCode);
return u_terminateUChars(dest, destCapacity, destLength, &errorCode);
}
U_CFUNC int32_t
ustrcase_mapWithOverlap(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
UStringCaseMapper *stringCaseMapper,
UErrorCode &errorCode) {
UChar buffer[300];
UChar *temp;
int32_t destLength;
/* check argument values */
if(U_FAILURE(errorCode)) {
return 0;
}
if( destCapacity<0 ||
(dest==NULL && destCapacity>0) ||
src==NULL ||
srcLength<-1
) {
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
return 0; return 0;
} }
@ -1053,7 +1157,7 @@ ustrcase_map(const UCaseMap *csm,
/* allocate a buffer */ /* allocate a buffer */
temp=(UChar *)uprv_malloc(destCapacity*U_SIZEOF_UCHAR); temp=(UChar *)uprv_malloc(destCapacity*U_SIZEOF_UCHAR);
if(temp==NULL) { if(temp==NULL) {
*pErrorCode=U_MEMORY_ALLOCATION_ERROR; errorCode=U_MEMORY_ALLOCATION_ERROR;
return 0; return 0;
} }
} }
@ -1061,21 +1165,19 @@ ustrcase_map(const UCaseMap *csm,
temp=dest; temp=dest;
} }
destLength=stringCaseMapper(csm, temp, destCapacity, src, srcLength, pErrorCode); destLength=stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR
temp, destCapacity, src, srcLength, NULL, errorCode);
if(temp!=dest) { if(temp!=dest) {
/* copy the result string to the destination buffer */ /* copy the result string to the destination buffer */
if(destLength>0) { if (U_SUCCESS(errorCode) && 0 < destLength && destLength <= destCapacity) {
int32_t copyLength= destLength<=destCapacity ? destLength : destCapacity; u_memmove(dest, temp, destLength);
if(copyLength>0) {
u_memmove(dest, temp, copyLength);
}
} }
if(temp!=buffer) { if(temp!=buffer) {
uprv_free(temp); uprv_free(temp);
} }
} }
return u_terminateUChars(dest, destCapacity, destLength, pErrorCode); return u_terminateUChars(dest, destCapacity, destLength, &errorCode);
} }
/* public API functions */ /* public API functions */
@ -1085,16 +1187,29 @@ u_strFoldCase(UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength, const UChar *src, int32_t srcLength,
uint32_t options, uint32_t options,
UErrorCode *pErrorCode) { UErrorCode *pErrorCode) {
UCaseMap csm=UCASEMAP_INITIALIZER; return ustrcase_mapWithOverlap(
csm.csp=ucase_getSingleton(); UCASE_LOC_ROOT, options, UCASEMAP_BREAK_ITERATOR_NULL
csm.options=options;
return ustrcase_map(
&csm,
dest, destCapacity, dest, destCapacity,
src, srcLength, src, srcLength,
ustrcase_internalFold, pErrorCode); ustrcase_internalFold, *pErrorCode);
} }
U_NAMESPACE_BEGIN
int32_t CaseMap::fold(
uint32_t options,
const UChar *src, int32_t srcLength,
UChar *dest, int32_t destCapacity, Edits *edits,
UErrorCode &errorCode) {
return ustrcase_map(
UCASE_LOC_ROOT, options, UCASEMAP_BREAK_ITERATOR_NULL
dest, destCapacity,
src, srcLength,
ustrcase_internalFold, edits, errorCode);
}
U_NAMESPACE_END
/* case-insensitive string comparisons -------------------------------------- */ /* case-insensitive string comparisons -------------------------------------- */
/* /*
@ -1134,8 +1249,6 @@ static int32_t _cmpFold(
UErrorCode *pErrorCode) { UErrorCode *pErrorCode) {
int32_t cmpRes = 0; int32_t cmpRes = 0;
const UCaseProps *csp;
/* current-level start/limit - s1/s2 as current */ /* current-level start/limit - s1/s2 as current */
const UChar *start1, *start2, *limit1, *limit2; const UChar *start1, *start2, *limit1, *limit2;
@ -1167,7 +1280,6 @@ static int32_t _cmpFold(
* assume that at least the option U_COMPARE_IGNORE_CASE is set * assume that at least the option U_COMPARE_IGNORE_CASE is set
* otherwise this function would have to behave exactly as uprv_strCompare() * otherwise this function would have to behave exactly as uprv_strCompare()
*/ */
csp=ucase_getSingleton();
if(U_FAILURE(*pErrorCode)) { if(U_FAILURE(*pErrorCode)) {
return 0; return 0;
} }
@ -1349,7 +1461,7 @@ static int32_t _cmpFold(
*/ */
if( level1==0 && if( level1==0 &&
(length=ucase_toFullFolding(csp, (UChar32)cp1, &p, options))>=0 (length=ucase_toFullFolding((UChar32)cp1, &p, options))>=0
) { ) {
/* cp1 case-folds to the code point "length" or to p[length] */ /* cp1 case-folds to the code point "length" or to p[length] */
if(U_IS_SURROGATE(c1)) { if(U_IS_SURROGATE(c1)) {
@ -1395,7 +1507,7 @@ static int32_t _cmpFold(
} }
if( level2==0 && if( level2==0 &&
(length=ucase_toFullFolding(csp, (UChar32)cp2, &p, options))>=0 (length=ucase_toFullFolding((UChar32)cp2, &p, options))>=0
) { ) {
/* cp2 case-folds to the code point "length" or to p[length] */ /* cp2 case-folds to the code point "length" or to p[length] */
if(U_IS_SURROGATE(c2)) { if(U_IS_SURROGATE(c2)) {

View File

@ -18,66 +18,24 @@
*/ */
#include "unicode/utypes.h" #include "unicode/utypes.h"
#include "uassert.h"
#include "unicode/brkiter.h"
#include "unicode/casemap.h"
#include "unicode/ucasemap.h" #include "unicode/ucasemap.h"
#include "unicode/uloc.h" #include "unicode/uloc.h"
#include "unicode/ustring.h" #include "unicode/ustring.h"
#include "ucase.h" #include "ucase.h"
#include "ustr_imp.h" #include "ucasemap_imp.h"
U_CFUNC void U_CFUNC int32_t
ustrcase_setTempCaseMapLocale(UCaseMap *csm, const char *locale) { ustrcase_getCaseLocale(const char *locale) {
/* if (locale == NULL) {
* We could call ucasemap_setLocale(), but here we really only care about locale = uloc_getDefault();
* the initial language subtag, we need not return the real string via
* ucasemap_getLocale(), and we don't care about only getting "x" from
* "x-some-thing" etc.
*
* We ignore locales with a longer-than-3 initial subtag.
*
* We also do not fill in the locCache because it is rarely used,
* and not worth setting unless we reuse it for many case mapping operations.
* (That's why UCaseMap was created.)
*/
int i;
char c;
/* the internal functions require locale!=NULL */
if(locale==NULL) {
// Do not call uprv_getDefaultLocaleID() because that does not see
// changes to the default locale via uloc_setDefault().
// It would also be inefficient if used frequently because uprv_getDefaultLocaleID()
// does not cache the locale ID.
//
// Unfortunately, uloc_getDefault() has many dependencies.
// We only care about a small set of language subtags,
// and we do not need the locale ID to be canonicalized.
//
// Best is to not call case mapping functions with a NULL locale ID.
locale=uloc_getDefault();
} }
for(i=0; i<4 && (c=locale[i])!=0 && c!='-' && c!='_'; ++i) { if (*locale == 0) {
csm->locale[i]=c; return UCASE_LOC_ROOT;
}
if(i<=3) {
csm->locale[i]=0; /* Up to 3 non-separator characters. */
} else { } else {
csm->locale[0]=0; /* Longer-than-3 initial subtag: Ignore. */ return ucase_getCaseLocale(locale);
}
}
/*
* Set parameters on an empty UCaseMap, for UCaseMap-less API functions.
* Do this fast because it is called with every function call.
*/
static inline void
setTempCaseMap(UCaseMap *csm, const char *locale) {
if(csm->csp==NULL) {
csm->csp=ucase_getSingleton();
}
if(locale!=NULL && locale[0]==0) {
csm->locale[0]=0;
} else {
ustrcase_setTempCaseMapLocale(csm, locale);
} }
} }
@ -88,13 +46,11 @@ u_strToLower(UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength, const UChar *src, int32_t srcLength,
const char *locale, const char *locale,
UErrorCode *pErrorCode) { UErrorCode *pErrorCode) {
UCaseMap csm=UCASEMAP_INITIALIZER; return ustrcase_mapWithOverlap(
setTempCaseMap(&csm, locale); ustrcase_getCaseLocale(locale), 0, UCASEMAP_BREAK_ITERATOR_NULL
return ustrcase_map(
&csm,
dest, destCapacity, dest, destCapacity,
src, srcLength, src, srcLength,
ustrcase_internalToLower, pErrorCode); ustrcase_internalToLower, *pErrorCode);
} }
U_CAPI int32_t U_EXPORT2 U_CAPI int32_t U_EXPORT2
@ -102,11 +58,37 @@ u_strToUpper(UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength, const UChar *src, int32_t srcLength,
const char *locale, const char *locale,
UErrorCode *pErrorCode) { UErrorCode *pErrorCode) {
UCaseMap csm=UCASEMAP_INITIALIZER; return ustrcase_mapWithOverlap(
setTempCaseMap(&csm, locale); ustrcase_getCaseLocale(locale), 0, UCASEMAP_BREAK_ITERATOR_NULL
return ustrcase_map(
&csm,
dest, destCapacity, dest, destCapacity,
src, srcLength, src, srcLength,
ustrcase_internalToUpper, pErrorCode); ustrcase_internalToUpper, *pErrorCode);
} }
U_NAMESPACE_BEGIN
int32_t CaseMap::toLower(
const char *locale, uint32_t options,
const UChar *src, int32_t srcLength,
UChar *dest, int32_t destCapacity, Edits *edits,
UErrorCode &errorCode) {
return ustrcase_map(
ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL
dest, destCapacity,
src, srcLength,
ustrcase_internalToLower, edits, errorCode);
}
int32_t CaseMap::toUpper(
const char *locale, uint32_t options,
const UChar *src, int32_t srcLength,
UChar *dest, int32_t destCapacity, Edits *edits,
UErrorCode &errorCode) {
return ustrcase_map(
ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL
dest, destCapacity,
src, srcLength,
ustrcase_internalToUpper, edits, errorCode);
}
U_NAMESPACE_END

View File

@ -19,6 +19,7 @@
#include "unicode/utypes.h" #include "unicode/utypes.h"
#include "unicode/putil.h" #include "unicode/putil.h"
#include "unicode/uchar.h"
#include "unicode/ustring.h" #include "unicode/ustring.h"
#include "unicode/utf16.h" #include "unicode/utf16.h"
#include "cstring.h" #include "cstring.h"

View File

@ -13,7 +13,9 @@
#include "unicode/dcfmtsym.h" #include "unicode/dcfmtsym.h"
#include "unicode/plurrule.h" #include "unicode/plurrule.h"
#include "unicode/strenum.h"
#include "unicode/ucurr.h" #include "unicode/ucurr.h"
#include "unicode/ustring.h"
#include "affixpatternparser.h" #include "affixpatternparser.h"
#include "charstr.h" #include "charstr.h"
#include "precision.h" #include "precision.h"

View File

@ -92,7 +92,6 @@ UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(CaseMapTransliterator)
*/ */
CaseMapTransliterator::CaseMapTransliterator(const UnicodeString &id, UCaseMapFull *map) : CaseMapTransliterator::CaseMapTransliterator(const UnicodeString &id, UCaseMapFull *map) :
Transliterator(id, 0), Transliterator(id, 0),
fCsp(ucase_getSingleton()),
fMap(map) fMap(map)
{ {
// TODO test incremental mode with context-sensitive text (e.g. greek sigma) // TODO test incremental mode with context-sensitive text (e.g. greek sigma)
@ -110,7 +109,7 @@ CaseMapTransliterator::~CaseMapTransliterator() {
*/ */
CaseMapTransliterator::CaseMapTransliterator(const CaseMapTransliterator& o) : CaseMapTransliterator::CaseMapTransliterator(const CaseMapTransliterator& o) :
Transliterator(o), Transliterator(o),
fCsp(o.fCsp), fMap(o.fMap) fMap(o.fMap)
{ {
} }
@ -119,7 +118,6 @@ CaseMapTransliterator::CaseMapTransliterator(const CaseMapTransliterator& o) :
*/ */
/*CaseMapTransliterator& CaseMapTransliterator::operator=(const CaseMapTransliterator& o) { /*CaseMapTransliterator& CaseMapTransliterator::operator=(const CaseMapTransliterator& o) {
Transliterator::operator=(o); Transliterator::operator=(o);
fCsp = o.fCsp;
fMap = o.fMap; fMap = o.fMap;
return *this; return *this;
}*/ }*/
@ -151,14 +149,14 @@ void CaseMapTransliterator::handleTransliterate(Replaceable& text,
UnicodeString tmp; UnicodeString tmp;
const UChar *s; const UChar *s;
UChar32 c; UChar32 c;
int32_t textPos, delta, result, locCache=0; int32_t textPos, delta, result;
for(textPos=offsets.start; textPos<offsets.limit;) { for(textPos=offsets.start; textPos<offsets.limit;) {
csc.cpStart=textPos; csc.cpStart=textPos;
c=text.char32At(textPos); c=text.char32At(textPos);
csc.cpLimit=textPos+=U16_LENGTH(c); csc.cpLimit=textPos+=U16_LENGTH(c);
result=fMap(fCsp, c, utrans_rep_caseContextIterator, &csc, &s, "", &locCache); result=fMap(c, utrans_rep_caseContextIterator, &csc, &s, UCASE_LOC_ROOT);
if(csc.b1 && isIncremental) { if(csc.b1 && isIncremental) {
// fMap() tried to look beyond the context limit // fMap() tried to look beyond the context limit

View File

@ -84,7 +84,6 @@ protected:
UTransPosition& offsets, UTransPosition& offsets,
UBool isIncremental) const; UBool isIncremental) const;
const UCaseProps *fCsp;
UCaseMapFull *fMap; UCaseMapFull *fMap;
private: private:

View File

@ -20,6 +20,7 @@
#include "unicode/locid.h" #include "unicode/locid.h"
#include "unicode/plurrule.h" #include "unicode/plurrule.h"
#include "unicode/strenum.h"
#include "unicode/ures.h" #include "unicode/ures.h"
#include "unicode/numsys.h" #include "unicode/numsys.h"
#include "cstring.h" #include "cstring.h"

View File

@ -49,6 +49,7 @@
#include "unicode/utf16.h" #include "unicode/utf16.h"
#include "unicode/numsys.h" #include "unicode/numsys.h"
#include "unicode/localpointer.h" #include "unicode/localpointer.h"
#include "unicode/ustring.h"
#include "uresimp.h" #include "uresimp.h"
#include "ucurrimp.h" #include "ucurrimp.h"
#include "charstr.h" #include "charstr.h"

View File

@ -26,6 +26,7 @@
#include "unicode/decimfmt.h" #include "unicode/decimfmt.h"
#include "uresimp.h" #include "uresimp.h"
#include "unicode/ures.h" #include "unicode/ures.h"
#include "unicode/ustring.h"
#include "ureslocs.h" #include "ureslocs.h"
#include "cstring.h" #include "cstring.h"
#include "mutex.h" #include "mutex.h"

View File

@ -24,6 +24,7 @@
#include "unicode/format.h" #include "unicode/format.h"
#include "unicode/locid.h" #include "unicode/locid.h"
#include "unicode/parseerr.h" #include "unicode/parseerr.h"
#include "unicode/strenum.h"
#include "unicode/ures.h" #include "unicode/ures.h"
#include "uvector.h" #include "uvector.h"
#include "hash.h" #include "hash.h"

View File

@ -27,6 +27,7 @@
#include "unicode/udata.h" #include "unicode/udata.h"
#include "unicode/udisplaycontext.h" #include "unicode/udisplaycontext.h"
#include "unicode/brkiter.h" #include "unicode/brkiter.h"
#include "unicode/ucasemap.h"
#include "cmemory.h" #include "cmemory.h"
#include "cstring.h" #include "cstring.h"

View File

@ -19,8 +19,7 @@
U_NAMESPACE_BEGIN U_NAMESPACE_BEGIN
CaseFoldingUTextIterator::CaseFoldingUTextIterator(UText &text) : CaseFoldingUTextIterator::CaseFoldingUTextIterator(UText &text) :
fUText(text), fcsp(NULL), fFoldChars(NULL), fFoldLength(0) { fUText(text), fFoldChars(NULL), fFoldLength(0) {
fcsp = ucase_getSingleton();
} }
CaseFoldingUTextIterator::~CaseFoldingUTextIterator() {} CaseFoldingUTextIterator::~CaseFoldingUTextIterator() {}
@ -35,7 +34,7 @@ UChar32 CaseFoldingUTextIterator::next() {
if (originalC == U_SENTINEL) { if (originalC == U_SENTINEL) {
return originalC; return originalC;
} }
fFoldLength = ucase_toFullFolding(fcsp, originalC, &fFoldChars, U_FOLD_CASE_DEFAULT); fFoldLength = ucase_toFullFolding(originalC, &fFoldChars, U_FOLD_CASE_DEFAULT);
if (fFoldLength >= UCASE_MAX_STRING_LENGTH || fFoldLength < 0) { if (fFoldLength >= UCASE_MAX_STRING_LENGTH || fFoldLength < 0) {
// input code point folds to a single code point, possibly itself. // input code point folds to a single code point, possibly itself.
// See comment in ucase.h for explanation of return values from ucase_toFullFoldings. // See comment in ucase.h for explanation of return values from ucase_toFullFoldings.
@ -65,8 +64,7 @@ UBool CaseFoldingUTextIterator::inExpansion() {
CaseFoldingUCharIterator::CaseFoldingUCharIterator(const UChar *chars, int64_t start, int64_t limit) : CaseFoldingUCharIterator::CaseFoldingUCharIterator(const UChar *chars, int64_t start, int64_t limit) :
fChars(chars), fIndex(start), fLimit(limit), fcsp(NULL), fFoldChars(NULL), fFoldLength(0) { fChars(chars), fIndex(start), fLimit(limit), fFoldChars(NULL), fFoldLength(0) {
fcsp = ucase_getSingleton();
} }
@ -84,7 +82,7 @@ UChar32 CaseFoldingUCharIterator::next() {
} }
U16_NEXT(fChars, fIndex, fLimit, originalC); U16_NEXT(fChars, fIndex, fLimit, originalC);
fFoldLength = ucase_toFullFolding(fcsp, originalC, &fFoldChars, U_FOLD_CASE_DEFAULT); fFoldLength = ucase_toFullFolding(originalC, &fFoldChars, U_FOLD_CASE_DEFAULT);
if (fFoldLength >= UCASE_MAX_STRING_LENGTH || fFoldLength < 0) { if (fFoldLength >= UCASE_MAX_STRING_LENGTH || fFoldLength < 0) {
// input code point folds to a single code point, possibly itself. // input code point folds to a single code point, possibly itself.
// See comment in ucase.h for explanation of return values from ucase_toFullFoldings. // See comment in ucase.h for explanation of return values from ucase_toFullFoldings.

View File

@ -374,7 +374,6 @@ class CaseFoldingUTextIterator: public UMemory {
// folding of the same code point from the orignal UText. // folding of the same code point from the orignal UText.
private: private:
UText &fUText; UText &fUText;
const UCaseProps *fcsp;
const UChar *fFoldChars; const UChar *fFoldChars;
int32_t fFoldLength; int32_t fFoldLength;
int32_t fFoldIndex; int32_t fFoldIndex;
@ -404,7 +403,6 @@ class CaseFoldingUCharIterator: public UMemory {
const UChar *fChars; const UChar *fChars;
int64_t fIndex; int64_t fIndex;
int64_t fLimit; int64_t fLimit;
const UCaseProps *fcsp;
const UChar *fFoldChars; const UChar *fFoldChars;
int32_t fFoldLength; int32_t fFoldLength;
int32_t fFoldIndex; int32_t fFoldIndex;

View File

@ -15,6 +15,7 @@
#if !UCONFIG_NO_FORMATTING && !UCONFIG_NO_BREAK_ITERATION #if !UCONFIG_NO_FORMATTING && !UCONFIG_NO_BREAK_ITERATION
#include "unicode/dtfmtsym.h" #include "unicode/dtfmtsym.h"
#include "unicode/ucasemap.h"
#include "unicode/ureldatefmt.h" #include "unicode/ureldatefmt.h"
#include "unicode/udisplaycontext.h" #include "unicode/udisplaycontext.h"
#include "unicode/unum.h" #include "unicode/unum.h"

View File

@ -20,7 +20,7 @@
#include "unicode/udisplaycontext.h" #include "unicode/udisplaycontext.h"
#include "unicode/uchar.h" #include "unicode/uchar.h"
#include "unicode/brkiter.h" #include "unicode/brkiter.h"
#include "unicode/ucasemap.h"
#include "reldtfmt.h" #include "reldtfmt.h"
#include "cmemory.h" #include "cmemory.h"
#include "uresimp.h" #include "uresimp.h"

View File

@ -48,6 +48,7 @@
#include "unicode/simpletz.h" #include "unicode/simpletz.h"
#include "unicode/rbtz.h" #include "unicode/rbtz.h"
#include "unicode/tzfmt.h" #include "unicode/tzfmt.h"
#include "unicode/ucasemap.h"
#include "unicode/utf16.h" #include "unicode/utf16.h"
#include "unicode/vtzone.h" #include "unicode/vtzone.h"
#include "unicode/udisplaycontext.h" #include "unicode/udisplaycontext.h"
@ -64,6 +65,7 @@
#include <float.h> #include <float.h>
#include "smpdtfst.h" #include "smpdtfst.h"
#include "sharednumberformat.h" #include "sharednumberformat.h"
#include "ucasemap_imp.h"
#include "ustr_imp.h" #include "ustr_imp.h"
#include "charstr.h" #include "charstr.h"
#include "uvector.h" #include "uvector.h"

View File

@ -97,7 +97,7 @@ void TitlecaseTransliterator::handleTransliterate(
int32_t start; int32_t start;
for (start = offsets.start - 1; start >= offsets.contextStart; start -= U16_LENGTH(c)) { for (start = offsets.start - 1; start >= offsets.contextStart; start -= U16_LENGTH(c)) {
c = text.char32At(start); c = text.char32At(start);
type=ucase_getTypeOrIgnorable(fCsp, c); type=ucase_getTypeOrIgnorable(c);
if(type>0) { // cased if(type>0) { // cased
doTitle=FALSE; doTitle=FALSE;
break; break;
@ -118,19 +118,19 @@ void TitlecaseTransliterator::handleTransliterate(
UnicodeString tmp; UnicodeString tmp;
const UChar *s; const UChar *s;
int32_t textPos, delta, result, locCache=0; int32_t textPos, delta, result;
for(textPos=offsets.start; textPos<offsets.limit;) { for(textPos=offsets.start; textPos<offsets.limit;) {
csc.cpStart=textPos; csc.cpStart=textPos;
c=text.char32At(textPos); c=text.char32At(textPos);
csc.cpLimit=textPos+=U16_LENGTH(c); csc.cpLimit=textPos+=U16_LENGTH(c);
type=ucase_getTypeOrIgnorable(fCsp, c); type=ucase_getTypeOrIgnorable(c);
if(type>=0) { // not case-ignorable if(type>=0) { // not case-ignorable
if(doTitle) { if(doTitle) {
result=ucase_toFullTitle(fCsp, c, utrans_rep_caseContextIterator, &csc, &s, "", &locCache); result=ucase_toFullTitle(c, utrans_rep_caseContextIterator, &csc, &s, UCASE_LOC_ROOT);
} else { } else {
result=ucase_toFullLower(fCsp, c, utrans_rep_caseContextIterator, &csc, &s, "", &locCache); result=ucase_toFullLower(c, utrans_rep_caseContextIterator, &csc, &s, UCASE_LOC_ROOT);
} }
doTitle = (UBool)(type==0); // doTitle=isUncased doTitle = (UBool)(type==0); // doTitle=isUncased

View File

@ -14,8 +14,10 @@
#include "unicode/calendar.h" #include "unicode/calendar.h"
#include "unicode/tzfmt.h" #include "unicode/tzfmt.h"
#include "unicode/numsys.h" #include "unicode/numsys.h"
#include "unicode/strenum.h"
#include "unicode/uchar.h" #include "unicode/uchar.h"
#include "unicode/udat.h" #include "unicode/udat.h"
#include "unicode/ustring.h"
#include "tzgnames.h" #include "tzgnames.h"
#include "cmemory.h" #include "cmemory.h"
#include "cstring.h" #include "cstring.h"

View File

@ -18,6 +18,7 @@
#include "unicode/rbtz.h" #include "unicode/rbtz.h"
#include "unicode/simpleformatter.h" #include "unicode/simpleformatter.h"
#include "unicode/simpletz.h" #include "unicode/simpletz.h"
#include "unicode/strenum.h"
#include "unicode/vtzone.h" #include "unicode/vtzone.h"
#include "cmemory.h" #include "cmemory.h"

View File

@ -15,6 +15,7 @@
#if !UCONFIG_NO_FORMATTING #if !UCONFIG_NO_FORMATTING
#include "unicode/strenum.h"
#include "unicode/ustring.h" #include "unicode/ustring.h"
#include "unicode/timezone.h" #include "unicode/timezone.h"

View File

@ -15,6 +15,7 @@
#include "unicode/utypes.h" #include "unicode/utypes.h"
#include "unicode/uobject.h" #include "unicode/uobject.h"
#include "unicode/locid.h" #include "unicode/locid.h"
#include "unicode/unistr.h"
#if !UCONFIG_NO_COLLATION #if !UCONFIG_NO_COLLATION

View File

@ -47,6 +47,7 @@ U_NAMESPACE_BEGIN
struct CollationData; struct CollationData;
class CharacterIterator;
class CollationIterator; class CollationIterator;
class RuleBasedCollator; class RuleBasedCollator;
class UCollationPCE; class UCollationPCE;

View File

@ -35,6 +35,7 @@
#include "unicode/uobject.h" #include "unicode/uobject.h"
#include "unicode/locid.h" #include "unicode/locid.h"
#include "unicode/unum.h" #include "unicode/unum.h"
#include "unicode/unistr.h"
/** /**
* \file * \file

View File

@ -230,6 +230,7 @@ typedef enum UDateDirection {
U_NAMESPACE_BEGIN U_NAMESPACE_BEGIN
class BreakIterator;
class RelativeDateTimeCacheData; class RelativeDateTimeCacheData;
class SharedNumberFormat; class SharedNumberFormat;
class SharedPluralRules; class SharedPluralRules;

View File

@ -17,7 +17,7 @@
#include "unicode/ustring.h" #include "unicode/ustring.h"
#include "unicode/putil.h" #include "unicode/putil.h"
#include "unicode/simpletz.h" #include "unicode/simpletz.h"
#include "unicode/strenum.h"
#include "umutex.h" #include "umutex.h"
#include "uvector.h" #include "uvector.h"
#include "cmemory.h" #include "cmemory.h"

View File

@ -27,6 +27,7 @@
#include "unicode/ucasemap.h" #include "unicode/ucasemap.h"
#include "cmemory.h" #include "cmemory.h"
#include "cintltst.h" #include "cintltst.h"
#include "ucasemap_imp.h"
#include "ustr_imp.h" #include "ustr_imp.h"
/* test string case mapping functions --------------------------------------- */ /* test string case mapping functions --------------------------------------- */
@ -744,11 +745,12 @@ TestUCaseMap(void) {
if(0!=strcmp(locale, "tr")) { if(0!=strcmp(locale, "tr")) {
log_err("ucasemap_getLocale(ucasemap_open(\"tur\"))==%s!=\"tr\"\n", locale); log_err("ucasemap_getLocale(ucasemap_open(\"tur\"))==%s!=\"tr\"\n", locale);
} }
/* overly long locale IDs get truncated to their language code to avoid unnecessary allocation */ /* overly long locale IDs may get truncated to their language code to avoid unnecessary allocation */
ucasemap_setLocale(csm, "I-kLInGOn-the-quick-brown-fox-jumps-over-the-lazy-dog", &errorCode); ucasemap_setLocale(csm, "I-kLInGOn-the-quick-brown-fox-jumps-over-the-lazy-dog", &errorCode);
locale=ucasemap_getLocale(csm); locale=ucasemap_getLocale(csm);
if(0!=strcmp(locale, "i-klingon")) { if(0!=strncmp(locale, "i-klingon", 9)) {
log_err("ucasemap_getLocale(ucasemap_setLocale(\"I-kLInGOn-the-quick-br...\"))==%s!=\"i-klingon\"\n", locale); log_err("ucasemap_getLocale(ucasemap_setLocale(\"I-kLInGOn-the-quick-br...\"))==%s\n"
" does not start with \"i-klingon\"\n", locale);
} }
errorCode=U_ZERO_ERROR; errorCode=U_ZERO_ERROR;

View File

@ -34,7 +34,7 @@
#include "uprops.h" #include "uprops.h"
#include "uset_imp.h" #include "uset_imp.h"
#include "usc_impl.h" #include "usc_impl.h"
#include "udatamem.h" /* for testing ucase_openBinary() */ #include "udatamem.h"
#include "cucdapi.h" #include "cucdapi.h"
#include "cmemory.h" #include "cmemory.h"
@ -59,7 +59,6 @@ static void TestNumericProperties(void);
static void TestPropertyNames(void); static void TestPropertyNames(void);
static void TestPropertyValues(void); static void TestPropertyValues(void);
static void TestConsistency(void); static void TestConsistency(void);
static void TestUCase(void);
static void TestUBiDiProps(void); static void TestUBiDiProps(void);
static void TestCaseFolding(void); static void TestCaseFolding(void);
@ -196,7 +195,6 @@ void addUnicodeTest(TestNode** root)
addTest(root, &TestPropertyNames, "tsutil/cucdtst/TestPropertyNames"); addTest(root, &TestPropertyNames, "tsutil/cucdtst/TestPropertyNames");
addTest(root, &TestPropertyValues, "tsutil/cucdtst/TestPropertyValues"); addTest(root, &TestPropertyValues, "tsutil/cucdtst/TestPropertyValues");
addTest(root, &TestConsistency, "tsutil/cucdtst/TestConsistency"); addTest(root, &TestConsistency, "tsutil/cucdtst/TestConsistency");
addTest(root, &TestUCase, "tsutil/cucdtst/TestUCase");
addTest(root, &TestUBiDiProps, "tsutil/cucdtst/TestUBiDiProps"); addTest(root, &TestUBiDiProps, "tsutil/cucdtst/TestUBiDiProps");
addTest(root, &TestCaseFolding, "tsutil/cucdtst/TestCaseFolding"); addTest(root, &TestCaseFolding, "tsutil/cucdtst/TestCaseFolding");
} }
@ -3256,47 +3254,6 @@ TestConsistency() {
*/ */
#define HARDCODED_DATA_4497 1 #define HARDCODED_DATA_4497 1
/* API coverage for ucase.c */
static void TestUCase() {
#if !HARDCODED_DATA_4497
UDataMemory *pData;
UCaseProps *csp;
const UCaseProps *ccsp;
UErrorCode errorCode;
/* coverage for ucase_openBinary() */
errorCode=U_ZERO_ERROR;
pData=udata_open(NULL, UCASE_DATA_TYPE, UCASE_DATA_NAME, &errorCode);
if(U_FAILURE(errorCode)) {
log_data_err("unable to open " UCASE_DATA_NAME "." UCASE_DATA_TYPE ": %s\n",
u_errorName(errorCode));
return;
}
csp=ucase_openBinary((const uint8_t *)pData->pHeader, -1, &errorCode);
if(U_FAILURE(errorCode)) {
log_err("ucase_openBinary() fails for the contents of " UCASE_DATA_NAME "." UCASE_DATA_TYPE ": %s\n",
u_errorName(errorCode));
udata_close(pData);
return;
}
if(UCASE_LOWER!=ucase_getType(csp, 0xdf)) { /* verify islower(sharp s) */
log_err("ucase_openBinary() does not seem to return working UCaseProps\n");
}
ucase_close(csp);
udata_close(pData);
/* coverage for ucase_getDummy() */
errorCode=U_ZERO_ERROR;
ccsp=ucase_getDummy(&errorCode);
if(ucase_tolower(ccsp, 0x41)!=0x41) {
log_err("ucase_tolower(dummy, A)!=A\n");
}
#endif
}
/* API coverage for ubidi_props.c */ /* API coverage for ubidi_props.c */
static void TestUBiDiProps() { static void TestUBiDiProps() {
#if !HARDCODED_DATA_4497 #if !HARDCODED_DATA_4497

View File

@ -41,6 +41,7 @@
#include "apicoll.h" #include "apicoll.h"
#include "unicode/chariter.h" #include "unicode/chariter.h"
#include "unicode/schriter.h" #include "unicode/schriter.h"
#include "unicode/strenum.h"
#include "unicode/ustring.h" #include "unicode/ustring.h"
#include "unicode/ucol.h" #include "unicode/ucol.h"

View File

@ -7,6 +7,7 @@
*********************************************************************/ *********************************************************************/
#include "locnmtst.h" #include "locnmtst.h"
#include "unicode/ustring.h"
#include "cstring.h" #include "cstring.h"
/* /*

View File

@ -21,8 +21,10 @@
#include "unicode/measfmt.h" #include "unicode/measfmt.h"
#include "unicode/measure.h" #include "unicode/measure.h"
#include "unicode/measunit.h" #include "unicode/measunit.h"
#include "unicode/strenum.h"
#include "unicode/tmunit.h" #include "unicode/tmunit.h"
#include "unicode/plurrule.h" #include "unicode/plurrule.h"
#include "unicode/ustring.h"
#include "charstr.h" #include "charstr.h"
#include "cstr.h" #include "cstr.h"
#include "unicode/reldatefmt.h" #include "unicode/reldatefmt.h"

View File

@ -22,6 +22,7 @@
#include "unicode/ustring.h" #include "unicode/ustring.h"
#include "unicode/measfmt.h" #include "unicode/measfmt.h"
#include "unicode/curramt.h" #include "unicode/curramt.h"
#include "unicode/strenum.h"
#include "digitlst.h" #include "digitlst.h"
#include "textfile.h" #include "textfile.h"
#include "tokiter.h" #include "tokiter.h"

View File

@ -19,6 +19,8 @@
*/ */
#include "unicode/std_string.h" #include "unicode/std_string.h"
#include "unicode/casemap.h"
#include "unicode/edits.h"
#include "unicode/uchar.h" #include "unicode/uchar.h"
#include "unicode/ures.h" #include "unicode/ures.h"
#include "unicode/uloc.h" #include "unicode/uloc.h"
@ -31,10 +33,52 @@
#include "unicode/tstdtmod.h" #include "unicode/tstdtmod.h"
#include "cmemory.h" #include "cmemory.h"
struct EditChange {
UBool change;
int32_t oldLength, newLength;
};
class StringCaseTest: public IntlTest {
public:
StringCaseTest();
virtual ~StringCaseTest();
void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=0);
void TestCaseConversion();
void TestCasingImpl(const UnicodeString &input,
const UnicodeString &output,
int32_t whichCase,
void *iter, const char *localeID, uint32_t options);
void TestCasing();
void TestFullCaseFoldingIterator();
void TestGreekUpper();
void TestLongUpper();
void TestMalformedUTF8();
void TestBufferOverflow();
void TestEdits();
void TestCaseMapWithEdits();
void TestLongUnicodeString();
private:
void assertGreekUpper(const char *s, const char *expected);
void checkEditsIter(
const UnicodeString &name, Edits::Iterator ei1, Edits::Iterator ei2, // two equal iterators
const EditChange expected[], int32_t expLength, UBool withUnchanged,
UErrorCode &errorCode);
Locale GREEK_LOCALE_;
};
StringCaseTest::StringCaseTest() : GREEK_LOCALE_("el") {} StringCaseTest::StringCaseTest() : GREEK_LOCALE_("el") {}
StringCaseTest::~StringCaseTest() {} StringCaseTest::~StringCaseTest() {}
extern IntlTest *createStringCaseTest() {
return new StringCaseTest();
}
void void
StringCaseTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) { StringCaseTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
if(exec) { if(exec) {
@ -50,6 +94,9 @@ StringCaseTest::runIndexedTest(int32_t index, UBool exec, const char *&name, cha
TESTCASE_AUTO(TestLongUpper); TESTCASE_AUTO(TestLongUpper);
TESTCASE_AUTO(TestMalformedUTF8); TESTCASE_AUTO(TestMalformedUTF8);
TESTCASE_AUTO(TestBufferOverflow); TESTCASE_AUTO(TestBufferOverflow);
TESTCASE_AUTO(TestEdits);
TESTCASE_AUTO(TestCaseMapWithEdits);
TESTCASE_AUTO(TestLongUnicodeString);
TESTCASE_AUTO_END; TESTCASE_AUTO_END;
} }
@ -848,3 +895,214 @@ void StringCaseTest::TestBufferOverflow() {
errorCode.reset(); errorCode.reset();
#endif // U_HAVE_STD_STRING #endif // U_HAVE_STD_STRING
} }
void StringCaseTest::checkEditsIter(
const UnicodeString &name,
Edits::Iterator ei1, Edits::Iterator ei2, // two equal iterators
const EditChange expected[], int32_t expLength, UBool withUnchanged,
UErrorCode &errorCode) {
assertFalse(name, ei2.findSourceIndex(-1, errorCode));
int32_t expSrcIndex = 0;
int32_t expDestIndex = 0;
int32_t expReplIndex = 0;
for (int32_t expIndex = 0; expIndex < expLength; ++expIndex) {
const EditChange &expect = expected[expIndex];
UnicodeString msg = UnicodeString(name).append(u' ') + expIndex;
if (withUnchanged || expect.change) {
assertTrue(msg, ei1.next(errorCode));
assertEquals(msg, expect.change, ei1.hasChange());
assertEquals(msg, expect.oldLength, ei1.oldLength());
assertEquals(msg, expect.newLength, ei1.newLength());
assertEquals(msg, expSrcIndex, ei1.sourceIndex());
assertEquals(msg, expDestIndex, ei1.destinationIndex());
assertEquals(msg, expReplIndex, ei1.replacementIndex());
}
if (expect.oldLength > 0) {
assertTrue(msg, ei2.findSourceIndex(expSrcIndex, errorCode));
assertEquals(msg, expect.change, ei2.hasChange());
assertEquals(msg, expect.oldLength, ei2.oldLength());
assertEquals(msg, expect.newLength, ei2.newLength());
assertEquals(msg, expSrcIndex, ei2.sourceIndex());
assertEquals(msg, expDestIndex, ei2.destinationIndex());
assertEquals(msg, expReplIndex, ei2.replacementIndex());
if (!withUnchanged) {
// For some iterators, move past the current range
// so that findSourceIndex() has to look before the current index.
ei2.next(errorCode);
ei2.next(errorCode);
}
}
expSrcIndex += expect.oldLength;
expDestIndex += expect.newLength;
if (expect.change) {
expReplIndex += expect.newLength;
}
}
// TODO: remove casts from u"" when merging into trunk
UnicodeString msg = UnicodeString(name).append((const UChar *)u" end");
assertFalse(msg, ei1.next(errorCode));
assertFalse(msg, ei1.hasChange());
assertEquals(msg, 0, ei1.oldLength());
assertEquals(msg, 0, ei1.newLength());
assertEquals(msg, expSrcIndex, ei1.sourceIndex());
assertEquals(msg, expDestIndex, ei1.destinationIndex());
assertEquals(msg, expReplIndex, ei1.replacementIndex());
assertFalse(name, ei2.findSourceIndex(expSrcIndex, errorCode));
}
void StringCaseTest::TestEdits() {
IcuTestErrorCode errorCode(*this, "TestEdits");
Edits edits;
assertFalse("new Edits", edits.hasChanges());
assertEquals("new Edits", 0, edits.lengthDelta());
edits.addUnchanged(1); // multiple unchanged ranges are combined
edits.addUnchanged(10000); // too long, and they are split
edits.addReplace(0, 0);
edits.addUnchanged(2);
assertFalse("unchanged 10003", edits.hasChanges());
assertEquals("unchanged 10003", 0, edits.lengthDelta());
edits.addReplace(1, 1); // multiple short equal-length edits are compressed
edits.addUnchanged(0);
edits.addReplace(1, 1);
edits.addReplace(1, 1);
edits.addReplace(0, 10);
edits.addReplace(100, 0);
edits.addReplace(3000, 4000); // variable-length encoding
edits.addReplace(100000, 100000);
assertTrue("some edits", edits.hasChanges());
assertEquals("some edits", 10 - 100 + 1000, edits.lengthDelta());
UErrorCode outErrorCode = U_ZERO_ERROR;
assertFalse("edits done: copyErrorTo", edits.copyErrorTo(outErrorCode));
static const EditChange coarseExpectedChanges[] = {
{ FALSE, 10003, 10003 },
{ TRUE, 103103, 104013 }
};
checkEditsIter((const UChar *)u"coarse",
edits.getCoarseIterator(), edits.getCoarseIterator(),
coarseExpectedChanges, UPRV_LENGTHOF(coarseExpectedChanges), TRUE, errorCode);
checkEditsIter((const UChar *)u"coarse changes",
edits.getCoarseChangesIterator(), edits.getCoarseChangesIterator(),
coarseExpectedChanges, UPRV_LENGTHOF(coarseExpectedChanges), FALSE, errorCode);
static const EditChange fineExpectedChanges[] = {
{ FALSE, 10003, 10003 },
{ TRUE, 1, 1 },
{ TRUE, 1, 1 },
{ TRUE, 1, 1 },
{ TRUE, 0, 10 },
{ TRUE, 100, 0 },
{ TRUE, 3000, 4000 },
{ TRUE, 100000, 100000 }
};
checkEditsIter((const UChar *)u"fine",
edits.getFineIterator(), edits.getFineIterator(),
fineExpectedChanges, UPRV_LENGTHOF(fineExpectedChanges), TRUE, errorCode);
checkEditsIter((const UChar *)u"fine changes",
edits.getFineChangesIterator(), edits.getFineChangesIterator(),
fineExpectedChanges, UPRV_LENGTHOF(fineExpectedChanges), FALSE, errorCode);
edits.reset();
assertFalse("reset", edits.hasChanges());
assertEquals("reset", 0, edits.lengthDelta());
Edits::Iterator ei = edits.getCoarseChangesIterator();
assertFalse("reset then iterator", ei.next(errorCode));
}
void StringCaseTest::TestCaseMapWithEdits() {
IcuTestErrorCode errorCode(*this, "TestEdits");
UChar dest[20];
Edits edits;
int32_t length = CaseMap::toLower("tr", UCASEMAP_OMIT_UNCHANGED_TEXT,
(const UChar *)u"IstanBul", 8, dest, UPRV_LENGTHOF(dest), &edits, errorCode);
assertEquals((const UChar *)u"toLower(Istanbul)", UnicodeString((const UChar *)u"ıb"), UnicodeString(TRUE, dest, length));
static const EditChange lowerExpectedChanges[] = {
{ TRUE, 1, 1 },
{ FALSE, 4, 4 },
{ TRUE, 1, 1 },
{ FALSE, 2, 2 }
};
checkEditsIter((const UChar *)u"toLower(Istanbul)",
edits.getFineIterator(), edits.getFineIterator(),
lowerExpectedChanges, UPRV_LENGTHOF(lowerExpectedChanges),
TRUE, errorCode);
edits.reset();
length = CaseMap::toUpper("el", UCASEMAP_OMIT_UNCHANGED_TEXT,
(const UChar *)u"Πατάτα", 6, dest, UPRV_LENGTHOF(dest), &edits, errorCode);
assertEquals((const UChar *)u"toUpper(Πατάτα)", UnicodeString((const UChar *)u"ΑΤΑΤΑ"), UnicodeString(TRUE, dest, length));
static const EditChange upperExpectedChanges[] = {
{ FALSE, 1, 1 },
{ TRUE, 1, 1 },
{ TRUE, 1, 1 },
{ TRUE, 1, 1 },
{ TRUE, 1, 1 },
{ TRUE, 1, 1 }
};
checkEditsIter((const UChar *)u"toUpper(Πατάτα)",
edits.getFineIterator(), edits.getFineIterator(),
upperExpectedChanges, UPRV_LENGTHOF(upperExpectedChanges),
TRUE, errorCode);
edits.reset();
length = CaseMap::toTitle("nl",
UCASEMAP_OMIT_UNCHANGED_TEXT |
U_TITLECASE_NO_BREAK_ADJUSTMENT |
U_TITLECASE_NO_LOWERCASE,
NULL, (const UChar *)u"IjssEL IglOo", 12,
dest, UPRV_LENGTHOF(dest), &edits, errorCode);
assertEquals((const UChar *)u"toTitle(IjssEL IglOo)", UnicodeString((const UChar *)u"J"), UnicodeString(TRUE, dest, length));
static const EditChange titleExpectedChanges[] = {
{ FALSE, 1, 1 },
{ TRUE, 1, 1 },
{ FALSE, 10, 10 }
};
checkEditsIter((const UChar *)u"toTitle(IjssEL IglOo)",
edits.getFineIterator(), edits.getFineIterator(),
titleExpectedChanges, UPRV_LENGTHOF(titleExpectedChanges),
TRUE, errorCode);
edits.reset();
length = CaseMap::fold(UCASEMAP_OMIT_UNCHANGED_TEXT | U_FOLD_CASE_EXCLUDE_SPECIAL_I,
(const UChar *)u"IßtanBul", 8, dest, UPRV_LENGTHOF(dest), &edits, errorCode);
assertEquals((const UChar *)u"foldCase(IßtanBul)", UnicodeString((const UChar *)u"ıssb"), UnicodeString(TRUE, dest, length));
static const EditChange foldExpectedChanges[] = {
{ TRUE, 1, 1 },
{ TRUE, 1, 2 },
{ FALSE, 3, 3 },
{ TRUE, 1, 1 },
{ FALSE, 2, 2 }
};
checkEditsIter((const UChar *)u"foldCase(IßtanBul)",
edits.getFineIterator(), edits.getFineIterator(),
foldExpectedChanges, UPRV_LENGTHOF(foldExpectedChanges),
TRUE, errorCode);
}
void StringCaseTest::TestLongUnicodeString() {
// Code coverage for UnicodeString case mapping code handling
// long strings or many changes in a string.
UnicodeString s(TRUE,
(const UChar *)
u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF"
u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF"
u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF"
u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF"
u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF"
u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF", 6 * 51);
UnicodeString expected(TRUE,
(const UChar *)
u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF"
u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF"
u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF"
u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF"
u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF"
u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF", 6 * 51);
s.toUpper(Locale::getRoot());
assertEquals("string length 306", expected, s);
}

View File

@ -32,6 +32,7 @@
#include "unicode/messagepattern.h" #include "unicode/messagepattern.h"
#include "unicode/selfmt.h" #include "unicode/selfmt.h"
#include "unicode/gregocal.h" #include "unicode/gregocal.h"
#include "unicode/strenum.h"
#include <stdio.h> #include <stdio.h>
void void

View File

@ -18,6 +18,7 @@
#include "unicode/tzrule.h" #include "unicode/tzrule.h"
#include "unicode/calendar.h" #include "unicode/calendar.h"
#include "unicode/gregocal.h" #include "unicode/gregocal.h"
#include "unicode/strenum.h"
#include "unicode/ucal.h" #include "unicode/ucal.h"
#include "unicode/unistr.h" #include "unicode/unistr.h"
#include "unicode/ustring.h" #include "unicode/ustring.h"

View File

@ -13,6 +13,7 @@
#include "unicode/uchar.h" #include "unicode/uchar.h"
#include "unicode/ustring.h" #include "unicode/ustring.h"
#include "unicode/locid.h" #include "unicode/locid.h"
#include "unicode/strenum.h"
#include "unicode/ucnv.h" #include "unicode/ucnv.h"
#include "unicode/uenum.h" #include "unicode/uenum.h"
#include "unicode/utf16.h" #include "unicode/utf16.h"
@ -29,11 +30,13 @@ using namespace std;
UnicodeStringTest::~UnicodeStringTest() {} UnicodeStringTest::~UnicodeStringTest() {}
extern IntlTest *createStringCaseTest();
void UnicodeStringTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char *par) void UnicodeStringTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char *par)
{ {
if (exec) logln("TestSuite UnicodeStringTest: "); if (exec) logln("TestSuite UnicodeStringTest: ");
TESTCASE_AUTO_BEGIN; TESTCASE_AUTO_BEGIN;
TESTCASE_AUTO_CLASS(StringCaseTest); TESTCASE_AUTO_CREATE_CLASS(StringCaseTest);
TESTCASE_AUTO(TestBasicManipulation); TESTCASE_AUTO(TestBasicManipulation);
TESTCASE_AUTO(TestCompare); TESTCASE_AUTO(TestCompare);
TESTCASE_AUTO(TestExtract); TESTCASE_AUTO(TestExtract);

View File

@ -94,30 +94,4 @@ public:
void TestMoveSwap(); void TestMoveSwap();
}; };
class StringCaseTest: public IntlTest {
public:
StringCaseTest();
virtual ~StringCaseTest();
void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=0);
void TestCaseConversion();
void TestCasingImpl(const UnicodeString &input,
const UnicodeString &output,
int32_t whichCase,
void *iter, const char *localeID, uint32_t options);
void TestCasing();
void TestFullCaseFoldingIterator();
void TestGreekUpper();
void TestLongUpper();
void TestMalformedUTF8();
void TestBufferOverflow();
private:
void assertGreekUpper(const char *s, const char *expected);
Locale GREEK_LOCALE_;
};
#endif #endif

View File

@ -22,6 +22,7 @@
#include "unicode/ucharstrie.h" #include "unicode/ucharstrie.h"
#include "unicode/bytestrie.h" #include "unicode/bytestrie.h"
#include "unicode/ucnv.h" #include "unicode/ucnv.h"
#include "unicode/ustring.h"
#include "unicode/utf16.h" #include "unicode/utf16.h"
#include "charstr.h" #include "charstr.h"

View File

@ -2,9 +2,14 @@
// License & terms of use: http://www.unicode.org/copyright.html#License // License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl; package com.ibm.icu.impl;
import com.ibm.icu.util.ULocale; import java.io.IOException;
public final class CaseMap { import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.text.BreakIterator;
import com.ibm.icu.text.Edits;
import com.ibm.icu.util.ICUUncheckedIOException;
public final class CaseMapImpl {
/** /**
* Implementation of UCaseProps.ContextIterator, iterates over a String. * Implementation of UCaseProps.ContextIterator, iterates over a String.
* See ustrcase.c/utf16_caseContextIterator(). * See ustrcase.c/utf16_caseContextIterator().
@ -12,11 +17,11 @@ public final class CaseMap {
public static final class StringContextIterator implements UCaseProps.ContextIterator { public static final class StringContextIterator implements UCaseProps.ContextIterator {
/** /**
* Constructor. * Constructor.
* @param s String to iterate over. * @param src String to iterate over.
*/ */
public StringContextIterator(String s) { public StringContextIterator(CharSequence src) {
this.s=s; this.s=src;
limit=s.length(); limit=src.length();
cpStart=cpLimit=index=0; cpStart=cpLimit=index=0;
dir=0; dir=0;
} }
@ -60,7 +65,7 @@ public final class CaseMap {
public int nextCaseMapCP() { public int nextCaseMapCP() {
cpStart=cpLimit; cpStart=cpLimit;
if(cpLimit<limit) { if(cpLimit<limit) {
int c=s.codePointAt(cpLimit); int c=Character.codePointAt(s, cpLimit);
cpLimit+=Character.charCount(c); cpLimit+=Character.charCount(c);
return c; return c;
} else { } else {
@ -84,6 +89,10 @@ public final class CaseMap {
return cpLimit; return cpLimit;
} }
public int getCPLength() {
return cpLimit-cpStart;
}
// implement UCaseProps.ContextIterator // implement UCaseProps.ContextIterator
// The following code is not used anywhere in this private class // The following code is not used anywhere in this private class
@Override @Override
@ -108,11 +117,11 @@ public final class CaseMap {
int c; int c;
if(dir>0 && index<s.length()) { if(dir>0 && index<s.length()) {
c=s.codePointAt(index); c=Character.codePointAt(s, index);
index+=Character.charCount(c); index+=Character.charCount(c);
return c; return c;
} else if(dir<0 && index>0) { } else if(dir<0 && index>0) {
c=s.codePointBefore(index); c=Character.codePointBefore(s, index);
index-=Character.charCount(c); index-=Character.charCount(c);
return c; return c;
} }
@ -120,44 +129,242 @@ public final class CaseMap {
} }
// variables // variables
protected String s; protected CharSequence s;
protected int index, limit, cpStart, cpLimit; protected int index, limit, cpStart, cpLimit;
protected int dir; // 0=initial state >0=forward <0=backward protected int dir; // 0=initial state >0=forward <0=backward
} }
/** Appends a full case mapping result, see {@link UCaseProps#MAX_STRING_LENGTH}. */ /**
private static final void appendResult(int c, StringBuilder result) { * Omit unchanged text when case-mapping with Edits.
// Decode the result. */
if (c < 0) { public static final int OMIT_UNCHANGED_TEXT = 0x4000;
// (not) original code point
result.appendCodePoint(~c); private static int appendCodePoint(Appendable a, int c) throws IOException {
} else if (c <= UCaseProps.MAX_STRING_LENGTH) { if (c <= Character.MAX_VALUE) {
// The mapping has already been appended to result. a.append((char)c);
return 1;
} else { } else {
// Append the single-code point mapping. a.append((char)(0xd7c0 + (c >> 10)));
result.appendCodePoint(c); a.append((char)(Character.MIN_LOW_SURROGATE + (c & 0x3ff)));
return 2;
} }
} }
// TODO: Move the other string case mapping functions from UCharacter to here, too. /**
* Appends a full case mapping result, see {@link UCaseProps#MAX_STRING_LENGTH}.
public static String toUpper(ULocale locale, String str) { * @throws IOException
if (locale == null) { */
locale = ULocale.getDefault(); private static void appendResult(int result, Appendable dest,
} int cpLength, int options, Edits edits) throws IOException {
int[] locCache = new int[] { UCaseProps.getCaseLocale(locale, null) }; // Decode the result.
if (locCache[0] == UCaseProps.LOC_GREEK) { if (result < 0) {
return GreekUpper.toUpper(str, locCache); // (not) original code point
if (edits != null) {
edits.addUnchanged(cpLength);
if ((options & OMIT_UNCHANGED_TEXT) != 0) {
return;
}
}
appendCodePoint(dest, ~result);
} else if (result <= UCaseProps.MAX_STRING_LENGTH) {
// The mapping has already been appended to result.
if (edits != null) {
edits.addReplace(cpLength, result);
}
} else {
// Append the single-code point mapping.
int length = appendCodePoint(dest, result);
if (edits != null) {
edits.addReplace(cpLength, length);
}
} }
}
StringContextIterator iter = new StringContextIterator(str); private static final void appendUnchanged(CharSequence src, int start, int length,
StringBuilder result = new StringBuilder(str.length()); Appendable dest, int options, Edits edits) throws IOException {
if (length > 0) {
if (edits != null) {
edits.addUnchanged(length);
if ((options & OMIT_UNCHANGED_TEXT) != 0) {
return;
}
}
dest.append(src, start, start + length);
}
}
private static void internalToLower(int caseLocale, int options, StringContextIterator iter,
Appendable dest, Edits edits) throws IOException {
int c; int c;
while((c=iter.nextCaseMapCP())>=0) { while ((c = iter.nextCaseMapCP()) >= 0) {
c = UCaseProps.INSTANCE.toFullUpper(c, iter, result, locale, locCache); c = UCaseProps.INSTANCE.toFullLower(c, iter, dest, caseLocale);
appendResult(c, result); appendResult(c, dest, iter.getCPLength(), options, edits);
}
}
public static <A extends Appendable> A toLower(int caseLocale, int options,
CharSequence src, A dest, Edits edits) {
try {
if (edits != null) {
edits.reset();
}
StringContextIterator iter = new StringContextIterator(src);
internalToLower(caseLocale, options, iter, dest, edits);
return dest;
} catch (IOException e) {
throw new ICUUncheckedIOException(e);
}
}
public static <A extends Appendable> A toUpper(int caseLocale, int options,
CharSequence src, A dest, Edits edits) {
try {
if (edits != null) {
edits.reset();
}
if (caseLocale == UCaseProps.LOC_GREEK) {
return GreekUpper.toUpper(options, src, dest, edits);
}
StringContextIterator iter = new StringContextIterator(src);
int c;
while ((c = iter.nextCaseMapCP()) >= 0) {
c = UCaseProps.INSTANCE.toFullUpper(c, iter, dest, caseLocale);
appendResult(c, dest, iter.getCPLength(), options, edits);
}
return dest;
} catch (IOException e) {
throw new ICUUncheckedIOException(e);
}
}
public static <A extends Appendable> A toTitle(
int caseLocale, int options, BreakIterator titleIter,
CharSequence src, A dest, Edits edits) {
try {
if (edits != null) {
edits.reset();
}
/* set up local variables */
StringContextIterator iter = new StringContextIterator(src);
int srcLength = src.length();
int prev=0;
boolean isFirstIndex=true;
/* titlecasing loop */
while(prev<srcLength) {
/* find next index where to titlecase */
int index;
if(isFirstIndex) {
isFirstIndex=false;
index=titleIter.first();
} else {
index=titleIter.next();
}
if(index==BreakIterator.DONE || index>srcLength) {
index=srcLength;
}
/*
* Unicode 4 & 5 section 3.13 Default Case Operations:
*
* R3 toTitlecase(X): Find the word boundaries based on Unicode Standard Annex
* #29, "Text Boundaries." Between each pair of word boundaries, find the first
* cased character F. If F exists, map F to default_title(F); then map each
* subsequent character C to default_lower(C).
*
* In this implementation, segment [prev..index[ into 3 parts:
* a) uncased characters (copy as-is) [prev..titleStart[
* b) first case letter (titlecase) [titleStart..titleLimit[
* c) subsequent characters (lowercase) [titleLimit..index[
*/
if(prev<index) {
// find and copy uncased characters [prev..titleStart[
int titleStart=prev;
iter.setLimit(index);
int c=iter.nextCaseMapCP();
if((options&UCharacter.TITLECASE_NO_BREAK_ADJUSTMENT)==0
&& UCaseProps.NONE==UCaseProps.INSTANCE.getType(c)) {
// Adjust the titlecasing index (titleStart) to the next cased character.
while((c=iter.nextCaseMapCP())>=0
&& UCaseProps.NONE==UCaseProps.INSTANCE.getType(c)) {}
// If c<0 then we have only uncased characters in [prev..index[
// and stopped with titleStart==titleLimit==index.
titleStart=iter.getCPStart();
appendUnchanged(src, prev, titleStart-prev, dest, options, edits);
}
if(titleStart<index) {
int titleLimit=iter.getCPLimit();
// titlecase c which is from [titleStart..titleLimit[
c = UCaseProps.INSTANCE.toFullTitle(c, iter, dest, caseLocale);
appendResult(c, dest, iter.getCPLength(), options, edits);
// Special case Dutch IJ titlecasing
if (titleStart+1 < index && caseLocale == UCaseProps.LOC_DUTCH) {
char c1 = src.charAt(titleStart);
if ((c1 == 'i' || c1 == 'I')) {
char c2 = src.charAt(titleStart+1);
if (c2 == 'j') {
dest.append('J');
if (edits != null) {
edits.addReplace(1, 1);
}
c = iter.nextCaseMapCP();
titleLimit++;
assert c == c2;
assert titleLimit == iter.getCPLimit();
} else if (c2 == 'J') {
// Keep the capital J from getting lowercased.
appendUnchanged(src, titleStart + 1, 1, dest, options, edits);
c = iter.nextCaseMapCP();
titleLimit++;
assert c == c2;
assert titleLimit == iter.getCPLimit();
}
}
}
// lowercase [titleLimit..index[
if(titleLimit<index) {
if((options&UCharacter.TITLECASE_NO_LOWERCASE)==0) {
// Normal operation: Lowercase the rest of the word.
internalToLower(caseLocale, options, iter, dest, edits);
} else {
// Optionally just copy the rest of the word unchanged.
appendUnchanged(src, titleLimit, index-titleLimit, dest, options, edits);
iter.moveToLimit();
}
}
}
}
prev=index;
}
return dest;
} catch (IOException e) {
throw new ICUUncheckedIOException(e);
}
}
public static <A extends Appendable> A fold(int options,
CharSequence src, A dest, Edits edits) {
try {
if (edits != null) {
edits.reset();
}
int length = src.length();
for (int i = 0; i < length;) {
int c = Character.codePointAt(src, i);
int cpLength = Character.charCount(c);
i += cpLength;
c = UCaseProps.INSTANCE.toFullFolding(c, dest, options);
appendResult(c, dest, cpLength, options, edits);
}
return dest;
} catch (IOException e) {
throw new ICUUncheckedIOException(e);
} }
return result.toString();
} }
private static final class GreekUpper { private static final class GreekUpper {
@ -661,12 +868,13 @@ public final class CaseMap {
* TODO: Try to re-consolidate one way or another with the non-Greek function. * TODO: Try to re-consolidate one way or another with the non-Greek function.
* *
* <p>Keep this consistent with the C++ versions in ustrcase.cpp (UTF-16) and ucasemap.cpp (UTF-8). * <p>Keep this consistent with the C++ versions in ustrcase.cpp (UTF-16) and ucasemap.cpp (UTF-8).
* @throws IOException
*/ */
private static String toUpper(CharSequence s, int[] locCache) { private static <A extends Appendable> A toUpper(int options,
StringBuilder result = new StringBuilder(s.length()); CharSequence src, A dest, Edits edits) throws IOException {
int state = 0; int state = 0;
for (int i = 0; i < s.length();) { for (int i = 0; i < src.length();) {
int c = Character.codePointAt(s, i); int c = Character.codePointAt(src, i);
int nextIndex = i + Character.charCount(c); int nextIndex = i + Character.charCount(c);
int nextState = 0; int nextState = 0;
int type = UCaseProps.INSTANCE.getTypeOrIgnorable(c); int type = UCaseProps.INSTANCE.getTypeOrIgnorable(c);
@ -695,8 +903,8 @@ public final class CaseMap {
numYpogegrammeni = 1; numYpogegrammeni = 1;
} }
// Skip combining diacritics after this Greek letter. // Skip combining diacritics after this Greek letter.
while (nextIndex < s.length()) { while (nextIndex < src.length()) {
int diacriticData = getDiacriticData(s.charAt(nextIndex)); int diacriticData = getDiacriticData(src.charAt(nextIndex));
if (diacriticData != 0) { if (diacriticData != 0) {
data |= diacriticData; data |= diacriticData;
if ((diacriticData & HAS_YPOGEGRAMMENI) != 0) { if ((diacriticData & HAS_YPOGEGRAMMENI) != 0) {
@ -716,7 +924,7 @@ public final class CaseMap {
(data & HAS_ACCENT) != 0 && (data & HAS_ACCENT) != 0 &&
numYpogegrammeni == 0 && numYpogegrammeni == 0 &&
(state & AFTER_CASED) == 0 && (state & AFTER_CASED) == 0 &&
!isFollowedByCasedLetter(s, nextIndex)) { !isFollowedByCasedLetter(src, nextIndex)) {
// Keep disjunctive "or" with (only) a tonos. // Keep disjunctive "or" with (only) a tonos.
// We use the same "word boundary" conditions as for the Final_Sigma test. // We use the same "word boundary" conditions as for the Final_Sigma test.
if (i == nextIndex) { if (i == nextIndex) {
@ -734,25 +942,59 @@ public final class CaseMap {
data &= ~HAS_EITHER_DIALYTIKA; data &= ~HAS_EITHER_DIALYTIKA;
} }
} }
result.appendCodePoint(upper);
if ((data & HAS_EITHER_DIALYTIKA) != 0) { boolean change;
result.append('\u0308'); // restore or add a dialytika if (edits == null) {
change = true; // common, simple usage
} else {
// Find out first whether we are changing the text.
change = src.charAt(i) != upper || numYpogegrammeni > 0;
int i2 = i + 1;
if ((data & HAS_EITHER_DIALYTIKA) != 0) {
change |= i2 >= nextIndex || src.charAt(i2) != 0x308;
++i2;
}
if (addTonos) {
change |= i2 >= nextIndex || src.charAt(i2) != 0x301;
++i2;
}
int oldLength = nextIndex - i;
int newLength = (i2 - i) + numYpogegrammeni;
change |= oldLength != newLength;
if (change) {
if (edits != null) {
edits.addReplace(oldLength, newLength);
}
} else {
if (edits != null) {
edits.addUnchanged(oldLength);
}
// Write unchanged text?
change = (options & OMIT_UNCHANGED_TEXT) == 0;
}
} }
if (addTonos) {
result.append('\u0301'); if (change) {
} dest.append((char)upper);
while (numYpogegrammeni > 0) { if ((data & HAS_EITHER_DIALYTIKA) != 0) {
result.append('Ι'); dest.append('\u0308'); // restore or add a dialytika
--numYpogegrammeni; }
if (addTonos) {
dest.append('\u0301');
}
while (numYpogegrammeni > 0) {
dest.append('Ι');
--numYpogegrammeni;
}
} }
} else { } else {
c = UCaseProps.INSTANCE.toFullUpper(c, null, result, null, locCache); c = UCaseProps.INSTANCE.toFullUpper(c, null, dest, UCaseProps.LOC_GREEK);
appendResult(c, result); appendResult(c, dest, nextIndex - i, options, edits);
} }
i = nextIndex; i = nextIndex;
state = nextState; state = nextState;
} }
return result.toString(); return dest;
} }
} }
} }

View File

@ -24,6 +24,7 @@ package com.ibm.icu.impl;
import java.io.IOException; import java.io.IOException;
import java.nio.ByteBuffer; import java.nio.ByteBuffer;
import java.util.Iterator; import java.util.Iterator;
import java.util.Locale;
import com.ibm.icu.lang.UCharacter; import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.lang.UProperty; import com.ibm.icu.lang.UProperty;
@ -71,7 +72,7 @@ public final class UCaseProps {
// read exceptions[] // read exceptions[]
count=indexes[IX_EXC_LENGTH]; count=indexes[IX_EXC_LENGTH];
if(count>0) { if(count>0) {
exceptions=ICUBinary.getChars(bytes, count, 0); exceptions=ICUBinary.getString(bytes, count, 0);
} }
// read unfold[] // read unfold[]
@ -150,7 +151,7 @@ public final class UCaseProps {
* *
* @param excWord (in) initial exceptions word * @param excWord (in) initial exceptions word
* @param index (in) desired slot index * @param index (in) desired slot index
* @param excOffset (in) offset into exceptions[] after excWord=exceptions[excOffset++]; * @param excOffset (in) offset into exceptions[] after excWord=exceptions.charAt(excOffset++);
* @return bits 31..0: slot value * @return bits 31..0: slot value
* 63..32: modified excOffset, moved to the last char of the value, use +1 for beginning of next slot * 63..32: modified excOffset, moved to the last char of the value, use +1 for beginning of next slot
*/ */
@ -158,11 +159,11 @@ public final class UCaseProps {
long value; long value;
if((excWord&EXC_DOUBLE_SLOTS)==0) { if((excWord&EXC_DOUBLE_SLOTS)==0) {
excOffset+=slotOffset(excWord, index); excOffset+=slotOffset(excWord, index);
value=exceptions[excOffset]; value=exceptions.charAt(excOffset);
} else { } else {
excOffset+=2*slotOffset(excWord, index); excOffset+=2*slotOffset(excWord, index);
value=exceptions[excOffset++]; value=exceptions.charAt(excOffset++);
value=(value<<16)|exceptions[excOffset]; value=(value<<16)|exceptions.charAt(excOffset);
} }
return value |((long)excOffset<<32); return value |((long)excOffset<<32);
} }
@ -172,11 +173,11 @@ public final class UCaseProps {
int value; int value;
if((excWord&EXC_DOUBLE_SLOTS)==0) { if((excWord&EXC_DOUBLE_SLOTS)==0) {
excOffset+=slotOffset(excWord, index); excOffset+=slotOffset(excWord, index);
value=exceptions[excOffset]; value=exceptions.charAt(excOffset);
} else { } else {
excOffset+=2*slotOffset(excWord, index); excOffset+=2*slotOffset(excWord, index);
value=exceptions[excOffset++]; value=exceptions.charAt(excOffset++);
value=(value<<16)|exceptions[excOffset]; value=(value<<16)|exceptions.charAt(excOffset);
} }
return value; return value;
} }
@ -191,7 +192,7 @@ public final class UCaseProps {
} }
} else { } else {
int excOffset=getExceptionsOffset(props); int excOffset=getExceptionsOffset(props);
int excWord=exceptions[excOffset++]; int excWord=exceptions.charAt(excOffset++);
if(hasSlot(excWord, EXC_LOWER)) { if(hasSlot(excWord, EXC_LOWER)) {
c=getSlotValue(excWord, EXC_LOWER, excOffset); c=getSlotValue(excWord, EXC_LOWER, excOffset);
} }
@ -207,7 +208,7 @@ public final class UCaseProps {
} }
} else { } else {
int excOffset=getExceptionsOffset(props); int excOffset=getExceptionsOffset(props);
int excWord=exceptions[excOffset++]; int excWord=exceptions.charAt(excOffset++);
if(hasSlot(excWord, EXC_UPPER)) { if(hasSlot(excWord, EXC_UPPER)) {
c=getSlotValue(excWord, EXC_UPPER, excOffset); c=getSlotValue(excWord, EXC_UPPER, excOffset);
} }
@ -223,7 +224,7 @@ public final class UCaseProps {
} }
} else { } else {
int excOffset=getExceptionsOffset(props); int excOffset=getExceptionsOffset(props);
int excWord=exceptions[excOffset++]; int excWord=exceptions.charAt(excOffset++);
int index; int index;
if(hasSlot(excWord, EXC_TITLE)) { if(hasSlot(excWord, EXC_TITLE)) {
index=EXC_TITLE; index=EXC_TITLE;
@ -291,7 +292,7 @@ public final class UCaseProps {
*/ */
int excOffset0, excOffset=getExceptionsOffset(props); int excOffset0, excOffset=getExceptionsOffset(props);
int closureOffset; int closureOffset;
int excWord=exceptions[excOffset++]; int excWord=exceptions.charAt(excOffset++);
int index, closureLength, fullLength, length; int index, closureLength, fullLength, length;
excOffset0=excOffset; excOffset0=excOffset;
@ -334,7 +335,7 @@ public final class UCaseProps {
/* add the full case folding string */ /* add the full case folding string */
length=fullLength&0xf; length=fullLength&0xf;
if(length!=0) { if(length!=0) {
set.add(new String(exceptions, excOffset, length)); set.add(exceptions.substring(excOffset, excOffset+length));
excOffset+=length; excOffset+=length;
} }
@ -348,8 +349,9 @@ public final class UCaseProps {
} }
/* add each code point in the closure string */ /* add each code point in the closure string */
for(index=0; index<closureLength; index+=UTF16.getCharCount(c)) { int limit=closureOffset+closureLength;
c=UTF16.charAt(exceptions, closureOffset, exceptions.length, index); for(index=closureOffset; index<limit; index+=UTF16.getCharCount(c)) {
c=exceptions.codePointAt(index);
set.add(c); set.add(c);
} }
} }
@ -468,7 +470,7 @@ public final class UCaseProps {
if(!propsHasException(props)) { if(!propsHasException(props)) {
return props&DOT_MASK; return props&DOT_MASK;
} else { } else {
return (exceptions[getExceptionsOffset(props)]>>EXC_DOT_SHIFT)&DOT_MASK; return (exceptions.charAt(getExceptionsOffset(props))>>EXC_DOT_SHIFT)&DOT_MASK;
} }
} }
@ -605,38 +607,49 @@ public final class UCaseProps {
*/ */
public static final int MAX_STRING_LENGTH=0x1f; public static final int MAX_STRING_LENGTH=0x1f;
private static final int LOC_UNKNOWN=0; //ivate static final int LOC_UNKNOWN=0;
private static final int LOC_ROOT=1; public static final int LOC_ROOT=1;
private static final int LOC_TURKISH=2; private static final int LOC_TURKISH=2;
private static final int LOC_LITHUANIAN=3; private static final int LOC_LITHUANIAN=3;
static final int LOC_GREEK=4; static final int LOC_GREEK=4;
public static final int LOC_DUTCH=5;
/* public static final int getCaseLocale(Locale locale) {
* Checks and caches the type of locale ID as it is relevant for case mapping. return getCaseLocale(locale.getLanguage());
* If the locCache is not null, then it must be initialized with locCache[0]=0 . }
*/ public static final int getCaseLocale(ULocale locale) {
static final int getCaseLocale(ULocale locale, int[] locCache) { return getCaseLocale(locale.getLanguage());
int result; }
/** Accepts both 2- and 3-letter language subtags. */
if(locCache!=null && (result=locCache[0])!=LOC_UNKNOWN) { private static final int getCaseLocale(String language) {
return result; // Check the subtag length to reduce the number of comparisons
// for locales without special behavior.
// Fastpath for English "en" which is often used for default (=root locale) case mappings,
// and for Chinese "zh": Very common but no special case mapping behavior.
if(language.length()==2) {
if(language.equals("en") || language.charAt(0)>'t') {
return LOC_ROOT;
} else if(language.equals("tr") || language.equals("az")) {
return LOC_TURKISH;
} else if(language.equals("el")) {
return LOC_GREEK;
} else if(language.equals("lt")) {
return LOC_LITHUANIAN;
} else if(language.equals("nl")) {
return LOC_DUTCH;
}
} else if(language.length()==3) {
if(language.equals("tur") || language.equals("aze")) {
return LOC_TURKISH;
} else if(language.equals("ell")) {
return LOC_GREEK;
} else if(language.equals("lit")) {
return LOC_LITHUANIAN;
} else if(language.equals("nld")) {
return LOC_DUTCH;
}
} }
return LOC_ROOT;
result=LOC_ROOT;
String language=locale.getLanguage();
if(language.equals("tr") || language.equals("tur") || language.equals("az") || language.equals("aze")) {
result=LOC_TURKISH;
} else if(language.equals("el") || language.equals("ell")) {
result=LOC_GREEK;
} else if(language.equals("lt") || language.equals("lit")) {
result=LOC_LITHUANIAN;
}
if(locCache!=null) {
locCache[0]=result;
}
return result;
} }
/* Is followed by {case-ignorable}* cased ? (dir determines looking forward/backward) */ /* Is followed by {case-ignorable}* cased ? (dir determines looking forward/backward) */
@ -797,19 +810,14 @@ public final class UCaseProps {
* See ContextIterator for details. * See ContextIterator for details.
* If iter==null then a context-independent result is returned. * If iter==null then a context-independent result is returned.
* @param out If the mapping result is a string, then it is appended to out. * @param out If the mapping result is a string, then it is appended to out.
* @param locale Locale ID for locale-dependent mappings. * @param caseLocale Case locale value from ucase_getCaseLocale().
* @param locCache Initialize locCache[0] to 0; may be used to cache the result of parsing
* the locale ID for subsequent calls.
* Can be null.
* @return Output code point or string length, see MAX_STRING_LENGTH. * @return Output code point or string length, see MAX_STRING_LENGTH.
* *
* @see ContextIterator * @see ContextIterator
* @see #MAX_STRING_LENGTH * @see #MAX_STRING_LENGTH
* @internal * @internal
*/ */
public final int toFullLower(int c, ContextIterator iter, public final int toFullLower(int c, ContextIterator iter, Appendable out, int caseLocale) {
StringBuilder out,
ULocale locale, int[] locCache) {
int result, props; int result, props;
result=c; result=c;
@ -820,22 +828,20 @@ public final class UCaseProps {
} }
} else { } else {
int excOffset=getExceptionsOffset(props), excOffset2; int excOffset=getExceptionsOffset(props), excOffset2;
int excWord=exceptions[excOffset++]; int excWord=exceptions.charAt(excOffset++);
int full; int full;
excOffset2=excOffset; excOffset2=excOffset;
if((excWord&EXC_CONDITIONAL_SPECIAL)!=0) { if((excWord&EXC_CONDITIONAL_SPECIAL)!=0) {
/* use hardcoded conditions and mappings */ /* use hardcoded conditions and mappings */
int loc=getCaseLocale(locale, locCache);
/* /*
* Test for conditional mappings first * Test for conditional mappings first
* (otherwise the unconditional default mappings are always taken), * (otherwise the unconditional default mappings are always taken),
* then test for characters that have unconditional mappings in SpecialCasing.txt, * then test for characters that have unconditional mappings in SpecialCasing.txt,
* then get the UnicodeData.txt mappings. * then get the UnicodeData.txt mappings.
*/ */
if( loc==LOC_LITHUANIAN && if( caseLocale==LOC_LITHUANIAN &&
/* base characters, find accents above */ /* base characters, find accents above */
(((c==0x49 || c==0x4a || c==0x12e) && (((c==0x49 || c==0x4a || c==0x12e) &&
isFollowedByMoreAbove(iter)) || isFollowedByMoreAbove(iter)) ||
@ -858,30 +864,34 @@ public final class UCaseProps {
00CD; 0069 0307 0301; 00CD; 00CD; lt; # LATIN CAPITAL LETTER I WITH ACUTE 00CD; 0069 0307 0301; 00CD; 00CD; lt; # LATIN CAPITAL LETTER I WITH ACUTE
0128; 0069 0307 0303; 0128; 0128; lt; # LATIN CAPITAL LETTER I WITH TILDE 0128; 0069 0307 0303; 0128; 0128; lt; # LATIN CAPITAL LETTER I WITH TILDE
*/ */
switch(c) { try {
case 0x49: /* LATIN CAPITAL LETTER I */ switch(c) {
out.append(iDot); case 0x49: /* LATIN CAPITAL LETTER I */
return 2; out.append(iDot);
case 0x4a: /* LATIN CAPITAL LETTER J */ return 2;
out.append(jDot); case 0x4a: /* LATIN CAPITAL LETTER J */
return 2; out.append(jDot);
case 0x12e: /* LATIN CAPITAL LETTER I WITH OGONEK */ return 2;
out.append(iOgonekDot); case 0x12e: /* LATIN CAPITAL LETTER I WITH OGONEK */
return 2; out.append(iOgonekDot);
case 0xcc: /* LATIN CAPITAL LETTER I WITH GRAVE */ return 2;
out.append(iDotGrave); case 0xcc: /* LATIN CAPITAL LETTER I WITH GRAVE */
return 3; out.append(iDotGrave);
case 0xcd: /* LATIN CAPITAL LETTER I WITH ACUTE */ return 3;
out.append(iDotAcute); case 0xcd: /* LATIN CAPITAL LETTER I WITH ACUTE */
return 3; out.append(iDotAcute);
case 0x128: /* LATIN CAPITAL LETTER I WITH TILDE */ return 3;
out.append(iDotTilde); case 0x128: /* LATIN CAPITAL LETTER I WITH TILDE */
return 3; out.append(iDotTilde);
default: return 3;
return 0; /* will not occur */ default:
return 0; /* will not occur */
}
} catch (IOException e) {
throw new ICUUncheckedIOException(e);
} }
/* # Turkish and Azeri */ /* # Turkish and Azeri */
} else if(loc==LOC_TURKISH && c==0x130) { } else if(caseLocale==LOC_TURKISH && c==0x130) {
/* /*
# I and i-dotless; I-dot and i are case pairs in Turkish and Azeri # I and i-dotless; I-dot and i are case pairs in Turkish and Azeri
# The following rules handle those cases. # The following rules handle those cases.
@ -890,7 +900,7 @@ public final class UCaseProps {
0130; 0069; 0130; 0130; az # LATIN CAPITAL LETTER I WITH DOT ABOVE 0130; 0069; 0130; 0130; az # LATIN CAPITAL LETTER I WITH DOT ABOVE
*/ */
return 0x69; return 0x69;
} else if(loc==LOC_TURKISH && c==0x307 && isPrecededBy_I(iter)) { } else if(caseLocale==LOC_TURKISH && c==0x307 && isPrecededBy_I(iter)) {
/* /*
# When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i. # When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i.
# This matches the behavior of the canonically equivalent I-dot_above # This matches the behavior of the canonically equivalent I-dot_above
@ -899,7 +909,7 @@ public final class UCaseProps {
0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE 0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE
*/ */
return 0; /* remove the dot (continue without output) */ return 0; /* remove the dot (continue without output) */
} else if(loc==LOC_TURKISH && c==0x49 && !isFollowedByDotAbove(iter)) { } else if(caseLocale==LOC_TURKISH && c==0x49 && !isFollowedByDotAbove(iter)) {
/* /*
# When lowercasing, unless an I is before a dot_above, it turns into a dotless i. # When lowercasing, unless an I is before a dot_above, it turns into a dotless i.
@ -913,8 +923,12 @@ public final class UCaseProps {
0130; 0069 0307; 0130; 0130; # LATIN CAPITAL LETTER I WITH DOT ABOVE 0130; 0069 0307; 0130; 0130; # LATIN CAPITAL LETTER I WITH DOT ABOVE
*/ */
out.append(iDot); try {
return 2; out.append(iDot);
return 2;
} catch (IOException e) {
throw new ICUUncheckedIOException(e);
}
} else if( c==0x3a3 && } else if( c==0x3a3 &&
!isFollowedByCasedLetter(iter, 1) && !isFollowedByCasedLetter(iter, 1) &&
isFollowedByCasedLetter(iter, -1) /* -1=preceded */ isFollowedByCasedLetter(iter, -1) /* -1=preceded */
@ -936,11 +950,15 @@ public final class UCaseProps {
/* start of full case mapping strings */ /* start of full case mapping strings */
excOffset=(int)(value>>32)+1; excOffset=(int)(value>>32)+1;
/* set the output pointer to the lowercase mapping */ try {
out.append(exceptions, excOffset, full); // append the lowercase mapping
out.append(exceptions, excOffset, excOffset+full);
/* return the string length */ /* return the string length */
return full; return full;
} catch (IOException e) {
throw new ICUUncheckedIOException(e);
}
} }
} }
@ -954,8 +972,8 @@ public final class UCaseProps {
/* internal */ /* internal */
private final int toUpperOrTitle(int c, ContextIterator iter, private final int toUpperOrTitle(int c, ContextIterator iter,
StringBuilder out, Appendable out,
ULocale locale, int[] locCache, int loc,
boolean upperNotTitle) { boolean upperNotTitle) {
int result; int result;
int props; int props;
@ -968,15 +986,13 @@ public final class UCaseProps {
} }
} else { } else {
int excOffset=getExceptionsOffset(props), excOffset2; int excOffset=getExceptionsOffset(props), excOffset2;
int excWord=exceptions[excOffset++]; int excWord=exceptions.charAt(excOffset++);
int full, index; int full, index;
excOffset2=excOffset; excOffset2=excOffset;
if((excWord&EXC_CONDITIONAL_SPECIAL)!=0) { if((excWord&EXC_CONDITIONAL_SPECIAL)!=0) {
/* use hardcoded conditions and mappings */ /* use hardcoded conditions and mappings */
int loc=getCaseLocale(locale, locCache);
if(loc==LOC_TURKISH && c==0x69) { if(loc==LOC_TURKISH && c==0x69) {
/* /*
# Turkish and Azeri # Turkish and Azeri
@ -1026,11 +1042,15 @@ public final class UCaseProps {
} }
if(full!=0) { if(full!=0) {
/* set the output pointer to the result string */ try {
out.append(exceptions, excOffset, full); // append the result string
out.append(exceptions, excOffset, excOffset+full);
/* return the string length */ /* return the string length */
return full; return full;
} catch (IOException e) {
throw new ICUUncheckedIOException(e);
}
} }
} }
@ -1049,15 +1069,15 @@ public final class UCaseProps {
} }
public final int toFullUpper(int c, ContextIterator iter, public final int toFullUpper(int c, ContextIterator iter,
StringBuilder out, Appendable out,
ULocale locale, int[] locCache) { int caseLocale) {
return toUpperOrTitle(c, iter, out, locale, locCache, true); return toUpperOrTitle(c, iter, out, caseLocale, true);
} }
public final int toFullTitle(int c, ContextIterator iter, public final int toFullTitle(int c, ContextIterator iter,
StringBuilder out, Appendable out,
ULocale locale, int[] locCache) { int caseLocale) {
return toUpperOrTitle(c, iter, out, locale, locCache, false); return toUpperOrTitle(c, iter, out, caseLocale, false);
} }
/* case folding ------------------------------------------------------------- */ /* case folding ------------------------------------------------------------- */
@ -1117,7 +1137,7 @@ public final class UCaseProps {
} }
} else { } else {
int excOffset=getExceptionsOffset(props); int excOffset=getExceptionsOffset(props);
int excWord=exceptions[excOffset++]; int excWord=exceptions.charAt(excOffset++);
int index; int index;
if((excWord&EXC_CONDITIONAL_FOLD)!=0) { if((excWord&EXC_CONDITIONAL_FOLD)!=0) {
/* special case folding mappings, hardcoded */ /* special case folding mappings, hardcoded */
@ -1168,7 +1188,7 @@ public final class UCaseProps {
* together in a way that they still fold to common result strings. * together in a way that they still fold to common result strings.
*/ */
public final int toFullFolding(int c, StringBuilder out, int options) { public final int toFullFolding(int c, Appendable out, int options) {
int result; int result;
int props; int props;
@ -1180,7 +1200,7 @@ public final class UCaseProps {
} }
} else { } else {
int excOffset=getExceptionsOffset(props), excOffset2; int excOffset=getExceptionsOffset(props), excOffset2;
int excWord=exceptions[excOffset++]; int excWord=exceptions.charAt(excOffset++);
int full, index; int full, index;
excOffset2=excOffset; excOffset2=excOffset;
@ -1194,8 +1214,12 @@ public final class UCaseProps {
return 0x69; return 0x69;
} else if(c==0x130) { } else if(c==0x130) {
/* 0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE */ /* 0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE */
out.append(iDot); try {
return 2; out.append(iDot);
return 2;
} catch (IOException e) {
throw new ICUUncheckedIOException(e);
}
} }
} else { } else {
/* Turkic mappings */ /* Turkic mappings */
@ -1219,11 +1243,15 @@ public final class UCaseProps {
full=(full>>4)&0xf; full=(full>>4)&0xf;
if(full!=0) { if(full!=0) {
/* set the output pointer to the result string */ try {
out.append(exceptions, excOffset, full); // append the result string
out.append(exceptions, excOffset, excOffset+full);
/* return the string length */ /* return the string length */
return full; return full;
} catch (IOException e) {
throw new ICUUncheckedIOException(e);
}
} }
} }
@ -1242,7 +1270,6 @@ public final class UCaseProps {
/* case mapping properties API ---------------------------------------------- */ /* case mapping properties API ---------------------------------------------- */
private static final int[] rootLocCache = { LOC_ROOT };
/* /*
* We need a StringBuilder for multi-code point output from the * We need a StringBuilder for multi-code point output from the
* full case mapping functions. However, we do not actually use that output, * full case mapping functions. However, we do not actually use that output,
@ -1282,20 +1309,20 @@ public final class UCaseProps {
*/ */
case UProperty.CHANGES_WHEN_LOWERCASED: case UProperty.CHANGES_WHEN_LOWERCASED:
dummyStringBuilder.setLength(0); dummyStringBuilder.setLength(0);
return toFullLower(c, null, dummyStringBuilder, ULocale.ROOT, rootLocCache)>=0; return toFullLower(c, null, dummyStringBuilder, LOC_ROOT)>=0;
case UProperty.CHANGES_WHEN_UPPERCASED: case UProperty.CHANGES_WHEN_UPPERCASED:
dummyStringBuilder.setLength(0); dummyStringBuilder.setLength(0);
return toFullUpper(c, null, dummyStringBuilder, ULocale.ROOT, rootLocCache)>=0; return toFullUpper(c, null, dummyStringBuilder, LOC_ROOT)>=0;
case UProperty.CHANGES_WHEN_TITLECASED: case UProperty.CHANGES_WHEN_TITLECASED:
dummyStringBuilder.setLength(0); dummyStringBuilder.setLength(0);
return toFullTitle(c, null, dummyStringBuilder, ULocale.ROOT, rootLocCache)>=0; return toFullTitle(c, null, dummyStringBuilder, LOC_ROOT)>=0;
/* case UProperty.CHANGES_WHEN_CASEFOLDED: -- in UCharacterProperty.java */ /* case UProperty.CHANGES_WHEN_CASEFOLDED: -- in UCharacterProperty.java */
case UProperty.CHANGES_WHEN_CASEMAPPED: case UProperty.CHANGES_WHEN_CASEMAPPED:
dummyStringBuilder.setLength(0); dummyStringBuilder.setLength(0);
return return
toFullLower(c, null, dummyStringBuilder, ULocale.ROOT, rootLocCache)>=0 || toFullLower(c, null, dummyStringBuilder, LOC_ROOT)>=0 ||
toFullUpper(c, null, dummyStringBuilder, ULocale.ROOT, rootLocCache)>=0 || toFullUpper(c, null, dummyStringBuilder, LOC_ROOT)>=0 ||
toFullTitle(c, null, dummyStringBuilder, ULocale.ROOT, rootLocCache)>=0; toFullTitle(c, null, dummyStringBuilder, LOC_ROOT)>=0;
default: default:
return false; return false;
} }
@ -1303,7 +1330,7 @@ public final class UCaseProps {
// data members -------------------------------------------------------- *** // data members -------------------------------------------------------- ***
private int indexes[]; private int indexes[];
private char exceptions[]; private String exceptions;
private char unfold[]; private char unfold[];
private Trie2_16 trie; private Trie2_16 trie;

View File

@ -15,8 +15,7 @@ import java.util.Iterator;
import java.util.Locale; import java.util.Locale;
import java.util.Map; import java.util.Map;
import com.ibm.icu.impl.CaseMap; import com.ibm.icu.impl.CaseMapImpl;
import com.ibm.icu.impl.CaseMap.StringContextIterator;
import com.ibm.icu.impl.IllegalIcuArgumentException; import com.ibm.icu.impl.IllegalIcuArgumentException;
import com.ibm.icu.impl.Trie2; import com.ibm.icu.impl.Trie2;
import com.ibm.icu.impl.UBiDiProps; import com.ibm.icu.impl.UBiDiProps;
@ -29,6 +28,7 @@ import com.ibm.icu.impl.UPropertyAliases;
import com.ibm.icu.lang.UCharacterEnums.ECharacterCategory; import com.ibm.icu.lang.UCharacterEnums.ECharacterCategory;
import com.ibm.icu.lang.UCharacterEnums.ECharacterDirection; import com.ibm.icu.lang.UCharacterEnums.ECharacterDirection;
import com.ibm.icu.text.BreakIterator; import com.ibm.icu.text.BreakIterator;
import com.ibm.icu.text.Edits;
import com.ibm.icu.text.Normalizer2; import com.ibm.icu.text.Normalizer2;
import com.ibm.icu.util.RangeValueIterator; import com.ibm.icu.util.RangeValueIterator;
import com.ibm.icu.util.ULocale; import com.ibm.icu.util.ULocale;
@ -4875,7 +4875,7 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
*/ */
public static String toUpperCase(String str) public static String toUpperCase(String str)
{ {
return toUpperCase(ULocale.getDefault(), str); return toUpperCase(getDefaultCaseLocale(), str);
} }
/** /**
@ -4887,7 +4887,7 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
*/ */
public static String toLowerCase(String str) public static String toLowerCase(String str)
{ {
return toLowerCase(ULocale.getDefault(), str); return toLowerCase(getDefaultCaseLocale(), str);
} }
/** /**
@ -4910,7 +4910,94 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
*/ */
public static String toTitleCase(String str, BreakIterator breakiter) public static String toTitleCase(String str, BreakIterator breakiter)
{ {
return toTitleCase(ULocale.getDefault(), str, breakiter); return toTitleCase(Locale.getDefault(), str, breakiter, 0);
}
private static int getDefaultCaseLocale() {
return UCaseProps.getCaseLocale(Locale.getDefault());
}
private static int getCaseLocale(Locale locale) {
if (locale == null) {
locale = Locale.getDefault();
}
return UCaseProps.getCaseLocale(locale);
}
private static int getCaseLocale(ULocale locale) {
if (locale == null) {
locale = ULocale.getDefault();
}
return UCaseProps.getCaseLocale(locale);
}
private static String toLowerCase(int caseLocale, String str) {
if (str.length() <= 100) {
if (str.isEmpty()) {
return str;
}
// Collect and apply only changes.
// Good if no or few changes. Bad (slow) if many changes.
Edits edits = new Edits();
StringBuilder replacementChars = CaseMapImpl.toLower(
caseLocale, CaseMapImpl.OMIT_UNCHANGED_TEXT, str, new StringBuilder(), edits);
return applyEdits(str, replacementChars, edits);
} else {
return CaseMapImpl.toLower(caseLocale, 0, str,
new StringBuilder(str.length()), null).toString();
}
}
private static String toUpperCase(int caseLocale, String str) {
if (str.length() <= 100) {
if (str.isEmpty()) {
return str;
}
// Collect and apply only changes.
// Good if no or few changes. Bad (slow) if many changes.
Edits edits = new Edits();
StringBuilder replacementChars = CaseMapImpl.toUpper(
caseLocale, CaseMapImpl.OMIT_UNCHANGED_TEXT, str, new StringBuilder(), edits);
return applyEdits(str, replacementChars, edits);
} else {
return CaseMapImpl.toUpper(caseLocale, 0, str,
new StringBuilder(str.length()), null).toString();
}
}
private static String toTitleCase(int caseLocale, int options, BreakIterator titleIter, String str) {
if (str.length() <= 100) {
if (str.isEmpty()) {
return str;
}
// Collect and apply only changes.
// Good if no or few changes. Bad (slow) if many changes.
Edits edits = new Edits();
StringBuilder replacementChars = CaseMapImpl.toTitle(
caseLocale, options | CaseMapImpl.OMIT_UNCHANGED_TEXT, titleIter, str,
new StringBuilder(), edits);
return applyEdits(str, replacementChars, edits);
} else {
return CaseMapImpl.toTitle(caseLocale, options, titleIter, str,
new StringBuilder(str.length()), null).toString();
}
}
private static String applyEdits(String str, StringBuilder replacementChars, Edits edits) {
if (!edits.hasChanges()) {
return str;
}
StringBuilder result = new StringBuilder(str.length() + edits.lengthDelta());
for (Edits.Iterator ei = edits.getCoarseIterator(); ei.next();) {
if (ei.hasChange()) {
int i = ei.replacementIndex();
result.append(replacementChars, i, i + ei.newLength());
} else {
int i = ei.sourceIndex();
result.append(str, i, i + ei.oldLength());
}
}
return result.toString();
} }
/** /**
@ -4923,7 +5010,7 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
*/ */
public static String toUpperCase(Locale locale, String str) public static String toUpperCase(Locale locale, String str)
{ {
return toUpperCase(ULocale.forLocale(locale), str); return toUpperCase(getCaseLocale(locale), str);
} }
/** /**
@ -4935,7 +5022,7 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
* @stable ICU 3.2 * @stable ICU 3.2
*/ */
public static String toUpperCase(ULocale locale, String str) { public static String toUpperCase(ULocale locale, String str) {
return CaseMap.toUpper(locale, str); return toUpperCase(getCaseLocale(locale), str);
} }
/** /**
@ -4948,7 +5035,7 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
*/ */
public static String toLowerCase(Locale locale, String str) public static String toLowerCase(Locale locale, String str)
{ {
return toLowerCase(ULocale.forLocale(locale), str); return toLowerCase(getCaseLocale(locale), str);
} }
/** /**
@ -4960,31 +5047,7 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
* @stable ICU 3.2 * @stable ICU 3.2
*/ */
public static String toLowerCase(ULocale locale, String str) { public static String toLowerCase(ULocale locale, String str) {
StringContextIterator iter = new StringContextIterator(str); return toLowerCase(getCaseLocale(locale), str);
StringBuilder result = new StringBuilder(str.length());
int[] locCache = new int[1];
int c;
if (locale == null) {
locale = ULocale.getDefault();
}
locCache[0]=0;
while((c=iter.nextCaseMapCP())>=0) {
c = UCaseProps.INSTANCE.toFullLower(c, iter, result, locale, locCache);
/* decode the result */
if(c<0) {
/* (not) original code point */
c=~c;
} else if(c<=UCaseProps.MAX_STRING_LENGTH) {
/* mapping already appended to result */
continue;
/* } else { append single-code point mapping */
}
result.appendCodePoint(c);
}
return result.toString();
} }
/** /**
@ -5009,7 +5072,7 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
public static String toTitleCase(Locale locale, String str, public static String toTitleCase(Locale locale, String str,
BreakIterator breakiter) BreakIterator breakiter)
{ {
return toTitleCase(ULocale.forLocale(locale), str, breakiter); return toTitleCase(locale, str, breakiter, 0);
} }
/** /**
@ -5059,126 +5122,15 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
* @see #TITLECASE_NO_BREAK_ADJUSTMENT * @see #TITLECASE_NO_BREAK_ADJUSTMENT
*/ */
public static String toTitleCase(ULocale locale, String str, public static String toTitleCase(ULocale locale, String str,
BreakIterator titleIter, BreakIterator titleIter, int options) {
int options) {
StringContextIterator iter = new StringContextIterator(str);
StringBuilder result = new StringBuilder(str.length());
int[] locCache = new int[1];
int c, nc, srcLength = str.length();
if (locale == null) {
locale = ULocale.getDefault();
}
locCache[0]=0;
if(titleIter == null) { if(titleIter == null) {
if (locale == null) {
locale = ULocale.getDefault();
}
titleIter = BreakIterator.getWordInstance(locale); titleIter = BreakIterator.getWordInstance(locale);
} }
titleIter.setText(str); titleIter.setText(str);
return toTitleCase(getCaseLocale(locale), options, titleIter, str);
int prev, titleStart, index;
boolean isFirstIndex;
boolean isDutch = locale.getLanguage().equals("nl");
boolean FirstIJ = true;
/* set up local variables */
prev=0;
isFirstIndex=true;
/* titlecasing loop */
while(prev<srcLength) {
/* find next index where to titlecase */
if(isFirstIndex) {
isFirstIndex=false;
index=titleIter.first();
} else {
index=titleIter.next();
}
if(index==BreakIterator.DONE || index>srcLength) {
index=srcLength;
}
/*
* Unicode 4 & 5 section 3.13 Default Case Operations:
*
* R3 toTitlecase(X): Find the word boundaries based on Unicode Standard Annex
* #29, "Text Boundaries." Between each pair of word boundaries, find the first
* cased character F. If F exists, map F to default_title(F); then map each
* subsequent character C to default_lower(C).
*
* In this implementation, segment [prev..index[ into 3 parts:
* a) uncased characters (copy as-is) [prev..titleStart[
* b) first case letter (titlecase) [titleStart..titleLimit[
* c) subsequent characters (lowercase) [titleLimit..index[
*/
if(prev<index) {
/* find and copy uncased characters [prev..titleStart[ */
iter.setLimit(index);
c=iter.nextCaseMapCP();
if((options&TITLECASE_NO_BREAK_ADJUSTMENT)==0
&& UCaseProps.NONE==UCaseProps.INSTANCE.getType(c)) {
while((c=iter.nextCaseMapCP())>=0
&& UCaseProps.NONE==UCaseProps.INSTANCE.getType(c)) {}
titleStart=iter.getCPStart();
if(prev<titleStart) {
result.append(str, prev, titleStart);
}
} else {
titleStart=prev;
}
if(titleStart<index) {
FirstIJ = true;
/* titlecase c which is from titleStart */
c = UCaseProps.INSTANCE.toFullTitle(c, iter, result, locale, locCache);
/* decode the result and lowercase up to index */
for(;;) {
if(c<0) {
/* (not) original code point */
c=~c;
result.appendCodePoint(c);
} else if(c<=UCaseProps.MAX_STRING_LENGTH) {
/* mapping already appended to result */
} else {
/* append single-code point mapping */
result.appendCodePoint(c);
}
if((options&TITLECASE_NO_LOWERCASE)!=0) {
/* Optionally just copy the rest of the word unchanged. */
int titleLimit=iter.getCPLimit();
if(titleLimit<index) {
/* Special Case - Dutch IJ Titlecasing */
if (isDutch && c == 0x0049 && str.charAt(titleLimit) == 'j') {
result.append('J').append(str, titleLimit + 1, index);
} else {
result.append(str, titleLimit, index);
}
}
iter.moveToLimit();
break;
} else if((nc=iter.nextCaseMapCP())>=0) {
if (isDutch && (nc == 0x004A || nc == 0x006A)
&& (c == 0x0049) && (FirstIJ == true)) {
c = 0x004A; /* J */
FirstIJ = false;
} else {
/* Normal operation: Lowercase the rest of the word. */
c = UCaseProps.INSTANCE.toFullLower(nc, iter, result, locale,
locCache);
}
} else {
break;
}
}
}
}
prev=index;
}
return result.toString();
} }
@ -5281,7 +5233,11 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
public static String toTitleCase(Locale locale, String str, public static String toTitleCase(Locale locale, String str,
BreakIterator titleIter, BreakIterator titleIter,
int options) { int options) {
return toTitleCase(ULocale.forLocale(locale), str, titleIter, options); if(titleIter == null) {
titleIter = BreakIterator.getWordInstance(locale);
}
titleIter.setText(str);
return toTitleCase(getCaseLocale(locale), options, titleIter, str);
} }
/** /**
@ -5398,27 +5354,19 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
* @stable ICU 2.6 * @stable ICU 2.6
*/ */
public static final String foldCase(String str, int options) { public static final String foldCase(String str, int options) {
StringBuilder result = new StringBuilder(str.length()); if (str.length() <= 100) {
int c, i, length; if (str.isEmpty()) {
return str;
length = str.length();
for(i=0; i<length;) {
c=str.codePointAt(i);
i+=Character.charCount(c);
c = UCaseProps.INSTANCE.toFullFolding(c, result, options);
/* decode the result */
if(c<0) {
/* (not) original code point */
c=~c;
} else if(c<=UCaseProps.MAX_STRING_LENGTH) {
/* mapping already appended to result */
continue;
/* } else { append single-code point mapping */
} }
result.appendCodePoint(c); // Collect and apply only changes.
// Good if no or few changes. Bad (slow) if many changes.
Edits edits = new Edits();
StringBuilder replacementChars = CaseMapImpl.fold(
options | CaseMapImpl.OMIT_UNCHANGED_TEXT, str, new StringBuilder(), edits);
return applyEdits(str, replacementChars, edits);
} else {
return CaseMapImpl.fold(options, str, new StringBuilder(str.length()), null).toString();
} }
return result.toString();
} }
/** /**

View File

@ -0,0 +1,339 @@
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.text;
import java.util.Locale;
import com.ibm.icu.impl.CaseMapImpl;
import com.ibm.icu.impl.UCaseProps;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.util.ULocale;
/**
* Low-level case mapping options and methods. Immutable.
* "Setters" return instances with the union of the current and new options set.
*
* This class is not intended for public subclassing.
*
* @draft ICU 59
* @provisional This API might change or be removed in a future release.
*/
public abstract class CaseMap {
/**
* @internal
* @deprecated This API is ICU internal only.
*/
@Deprecated
protected int internalOptions;
private CaseMap(int opt) { internalOptions = opt; }
private static int getCaseLocale(Locale locale) {
if (locale == null) {
locale = Locale.getDefault();
}
return UCaseProps.getCaseLocale(locale);
}
/**
* @return Lowercasing object with default options.
* @draft ICU 59
* @provisional This API might change or be removed in a future release.
*/
public static Lower toLower() { return Lower.DEFAULT; }
/**
* @return Uppercasing object with default options.
* @draft ICU 59
* @provisional This API might change or be removed in a future release.
*/
public static Upper toUpper() { return Upper.DEFAULT; }
/**
* @return Titlecasing object with default options.
* @draft ICU 59
* @provisional This API might change or be removed in a future release.
*/
public static Title toTitle() { return Title.DEFAULT; }
/**
* @return Case folding object with default options.
* @draft ICU 59
* @provisional This API might change or be removed in a future release.
*/
public static Fold fold() { return Fold.DEFAULT; }
/**
* Returns an instance that behaves like this one but
* omits unchanged text when case-mapping with {@link Edits}.
*
* @return an options object with this option.
* @draft ICU 59
* @provisional This API might change or be removed in a future release.
*/
public abstract CaseMap omitUnchangedText();
/**
* Lowercasing options and methods. Immutable.
*
* @see #toLower()
* @draft ICU 59
* @provisional This API might change or be removed in a future release.
*/
public static final class Lower extends CaseMap {
private static final Lower DEFAULT = new Lower(0);
private static final Lower OMIT_UNCHANGED = new Lower(CaseMapImpl.OMIT_UNCHANGED_TEXT);
private Lower(int opt) { super(opt); }
/**
* {@inheritDoc}
* @draft ICU 59
* @provisional This API might change or be removed in a future release.
*/
@Override
public Lower omitUnchangedText() {
return OMIT_UNCHANGED;
}
/**
* Lowercases a string and optionally records edits (see {@link #omitUnchangedText}).
* Casing is locale-dependent and context-sensitive.
* The result may be longer or shorter than the original.
*
* @param locale The locale ID. Can be null for {@link Locale#getDefault}.
* (See {@link ULocale#toLocale}.)
* @param src The original string.
* @param dest A buffer for the result string. Must not be null.
* @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any).
* This function calls edits.reset() first. edits can be null.
* @return dest with the result string (or only changes) appended.
*
* @see UCharacter#toLowerCase(Locale, String)
* @draft ICU 59
* @provisional This API might change or be removed in a future release.
*/
public <A extends Appendable> A apply(
Locale locale, CharSequence src, A dest, Edits edits) {
return CaseMapImpl.toLower(getCaseLocale(locale), internalOptions, src, dest, edits);
}
}
/**
* Uppercasing options and methods. Immutable.
*
* @see #toUpper()
* @draft ICU 59
* @provisional This API might change or be removed in a future release.
*/
public static final class Upper extends CaseMap {
private static final Upper DEFAULT = new Upper(0);
private static final Upper OMIT_UNCHANGED = new Upper(CaseMapImpl.OMIT_UNCHANGED_TEXT);
private Upper(int opt) { super(opt); }
/**
* {@inheritDoc}
* @draft ICU 59
* @provisional This API might change or be removed in a future release.
*/
@Override
public Upper omitUnchangedText() {
return OMIT_UNCHANGED;
}
/**
* Uppercases a string and optionally records edits (see {@link #omitUnchangedText}).
* Casing is locale-dependent and context-sensitive.
* The result may be longer or shorter than the original.
*
* @param locale The locale ID. Can be null for {@link Locale#getDefault}.
* (See {@link ULocale#toLocale}.)
* @param src The original string.
* @param dest A buffer for the result string. Must not be null.
* @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any).
* This function calls edits.reset() first. edits can be null.
* @return dest with the result string (or only changes) appended.
*
* @see UCharacter#toUpperCase(Locale, String)
* @draft ICU 59
* @provisional This API might change or be removed in a future release.
*/
public <A extends Appendable> A apply(
Locale locale, CharSequence src, A dest, Edits edits) {
return CaseMapImpl.toUpper(getCaseLocale(locale), internalOptions, src, dest, edits);
}
}
/**
* Titlecasing options and methods. Immutable.
*
* @see #toTitle()
* @draft ICU 59
* @provisional This API might change or be removed in a future release.
*/
public static final class Title extends CaseMap {
private static final Title DEFAULT = new Title(0);
private static final Title OMIT_UNCHANGED = new Title(CaseMapImpl.OMIT_UNCHANGED_TEXT);
private Title(int opt) { super(opt); }
/**
* {@inheritDoc}
* @draft ICU 59
* @provisional This API might change or be removed in a future release.
*/
@Override
public Title omitUnchangedText() {
if (internalOptions == 0 || internalOptions == CaseMapImpl.OMIT_UNCHANGED_TEXT) {
return OMIT_UNCHANGED;
}
return new Title(internalOptions | CaseMapImpl.OMIT_UNCHANGED_TEXT);
}
/**
* Returns an instance that behaves like this one but
* does not lowercase non-initial parts of words when titlecasing.
*
* <p>By default, titlecasing will titlecase the first cased character
* of a word and lowercase all other characters.
* With this option, the other characters will not be modified.
*
* @return an options object with this option.
* @see UCharacter#TITLECASE_NO_LOWERCASE
* @draft ICU 59
* @provisional This API might change or be removed in a future release.
*/
public Title noLowercase() {
return new Title(internalOptions | UCharacter.TITLECASE_NO_LOWERCASE);
}
// TODO: update references to the Unicode Standard for recent version
/**
* Returns an instance that behaves like this one but
* does not adjust the titlecasing indexes from BreakIterator::next() indexes;
* titlecases exactly the characters at breaks from the iterator.
*
* <p>By default, titlecasing will take each break iterator index,
* adjust it by looking for the next cased character, and titlecase that one.
* Other characters are lowercased.
*
* <p>This follows Unicode 4 &amp; 5 section 3.13 Default Case Operations:
*
* R3 toTitlecase(X): Find the word boundaries based on Unicode Standard Annex
* #29, "Text Boundaries." Between each pair of word boundaries, find the first
* cased character F. If F exists, map F to default_title(F); then map each
* subsequent character C to default_lower(C).
*
* @return an options object with this option.
* @see UCharacter#TITLECASE_NO_BREAK_ADJUSTMENT
* @draft ICU 59
* @provisional This API might change or be removed in a future release.
*/
public Title noBreakAdjustment() {
return new Title(internalOptions | UCharacter.TITLECASE_NO_BREAK_ADJUSTMENT);
}
/**
* Titlecases a string and optionally records edits (see {@link #omitUnchangedText}).
* Casing is locale-dependent and context-sensitive.
* The result may be longer or shorter than the original.
*
* <p>Titlecasing uses a break iterator to find the first characters of words
* that are to be titlecased. It titlecases those characters and lowercases
* all others. (This can be modified with options bits.)
*
* @param locale The locale ID. Can be null for {@link Locale#getDefault}.
* (See {@link ULocale#toLocale}.)
* @param iter A break iterator to find the first characters of words that are to be titlecased.
* It is set to the source string (setText())
* and used one or more times for iteration (first() and next()).
* If null, then a word break iterator for the locale is used
* (or something equivalent).
* @param src The original string.
* @param dest A buffer for the result string. Must not be null.
* @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any).
* This function calls edits.reset() first. edits can be null.
* @return dest with the result string (or only changes) appended.
*
* @see UCharacter#toTitleCase(Locale, String, BreakIterator, int)
* @draft ICU 59
* @provisional This API might change or be removed in a future release.
*/
public <A extends Appendable> A apply(
Locale locale, BreakIterator iter, CharSequence src, A dest, Edits edits) {
if (iter == null) {
iter = BreakIterator.getWordInstance(locale);
}
iter.setText(src.toString());
return CaseMapImpl.toTitle(
getCaseLocale(locale), internalOptions, iter, src, dest, edits);
}
}
/**
* Case folding options and methods. Immutable.
*
* @see #fold()
* @draft ICU 59
* @provisional This API might change or be removed in a future release.
*/
public static final class Fold extends CaseMap {
private static final Fold DEFAULT = new Fold(0);
private static final Fold TURKIC = new Fold(UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I);
private static final Fold OMIT_UNCHANGED = new Fold(CaseMapImpl.OMIT_UNCHANGED_TEXT);
private static final Fold TURKIC_OMIT_UNCHANGED = new Fold(
UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I | CaseMapImpl.OMIT_UNCHANGED_TEXT);
private Fold(int opt) { super(opt); }
/**
* {@inheritDoc}
* @draft ICU 59
* @provisional This API might change or be removed in a future release.
*/
@Override
public Fold omitUnchangedText() {
return (internalOptions & UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I) == 0 ?
OMIT_UNCHANGED : TURKIC_OMIT_UNCHANGED;
}
/**
* Returns an instance that behaves like this one but
* handles dotted I and dotless i appropriately for Turkic languages (tr, az).
*
* <p>Uses the Unicode CaseFolding.txt mappings marked with 'T' that
* are to be excluded for default mappings and
* included for the Turkic-specific mappings.
*
* @return an options object with this option.
* @see UCharacter#FOLD_CASE_EXCLUDE_SPECIAL_I
* @draft ICU 59
* @provisional This API might change or be removed in a future release.
*/
public Fold turkic() {
return (internalOptions & CaseMapImpl.OMIT_UNCHANGED_TEXT) == 0 ?
TURKIC : TURKIC_OMIT_UNCHANGED;
}
/**
* Case-folds a string and optionally records edits (see {@link #omitUnchangedText}).
*
* <p>Case-folding is locale-independent and not context-sensitive,
* but there is an option for whether to include or exclude mappings for dotted I
* and dotless i that are marked with 'T' in CaseFolding.txt.
*
* <p>The result may be longer or shorter than the original.
*
* @param src The original string.
* @param dest A buffer for the result string. Must not be null.
* @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any).
* This function calls edits.reset() first. edits can be null.
* @return dest with the result string (or only changes) appended.
*
* @see UCharacter#foldCase(String, int)
* @draft ICU 59
* @provisional This API might change or be removed in a future release.
*/
public <A extends Appendable> A apply(CharSequence src, A dest, Edits edits) {
return CaseMapImpl.fold(internalOptions, src, dest, edits);
}
}
}

View File

@ -0,0 +1,494 @@
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.text;
import java.nio.BufferOverflowException;
import java.util.Arrays;
/**
* Records lengths of string edits but not replacement text.
* Supports replacements, insertions, deletions in linear progression.
* Does not support moving/reordering of text.
*
* @draft ICU 59
* @provisional This API might change or be removed in a future release.
*/
public final class Edits {
// 0000uuuuuuuuuuuu records u+1 unchanged text units.
private static final int MAX_UNCHANGED_LENGTH = 0x1000;
private static final int MAX_UNCHANGED = MAX_UNCHANGED_LENGTH - 1;
// 0wwwcccccccccccc with w=1..6 records ccc+1 replacements of w:w text units.
// No length change.
private static final int MAX_SHORT_WIDTH = 6;
private static final int MAX_SHORT_CHANGE_LENGTH = 0xfff;
private static final int MAX_SHORT_CHANGE = 0x6fff;
// 0111mmmmmmnnnnnn records a replacement of m text units with n.
// m or n = 61: actual length follows in the next edits array unit.
// m or n = 62..63: actual length follows in the next two edits array units.
// Bit 30 of the actual length is in the head unit.
// Trailing units have bit 15 set.
private static final int LENGTH_IN_1TRAIL = 61;
private static final int LENGTH_IN_2TRAIL = 62;
private static final int STACK_CAPACITY = 100;
private char[] array;
private int length;
private int delta;
/**
* Constructs an empty object.
* @draft ICU 59
* @provisional This API might change or be removed in a future release.
*/
public Edits() {
array = new char[STACK_CAPACITY];
}
/**
* Resets the data but may not release memory.
* @draft ICU 59
* @provisional This API might change or be removed in a future release.
*/
public void reset() {
length = delta = 0;
}
private void setLastUnit(int last) {
array[length - 1] = (char)last;
}
private int lastUnit() {
return length > 0 ? array[length - 1] : 0xffff;
}
/**
* Adds a record for an unchanged segment of text.
* Normally called from inside ICU string transformation functions, not user code.
* @draft ICU 59
* @provisional This API might change or be removed in a future release.
*/
public void addUnchanged(int unchangedLength) {
if(unchangedLength < 0) {
throw new IllegalArgumentException(
"addUnchanged(" + unchangedLength + "): length must not be negative");
}
// Merge into previous unchanged-text record, if any.
int last = lastUnit();
if(last < MAX_UNCHANGED) {
int remaining = MAX_UNCHANGED - last;
if (remaining >= unchangedLength) {
setLastUnit(last + unchangedLength);
return;
}
setLastUnit(MAX_UNCHANGED);
unchangedLength -= remaining;
}
// Split large lengths into multiple units.
while(unchangedLength >= MAX_UNCHANGED_LENGTH) {
append(MAX_UNCHANGED);
unchangedLength -= MAX_UNCHANGED_LENGTH;
}
// Write a small (remaining) length.
if(unchangedLength > 0) {
append(unchangedLength - 1);
}
}
/**
* Adds a record for a text replacement/insertion/deletion.
* Normally called from inside ICU string transformation functions, not user code.
* @draft ICU 59
* @provisional This API might change or be removed in a future release.
*/
public void addReplace(int oldLength, int newLength) {
if(oldLength == newLength && 0 < oldLength && oldLength <= MAX_SHORT_WIDTH) {
// Replacement of short oldLength text units by same-length new text.
// Merge into previous short-replacement record, if any.
int last = lastUnit();
if(MAX_UNCHANGED < last && last < MAX_SHORT_CHANGE &&
(last >> 12) == oldLength && (last & 0xfff) < MAX_SHORT_CHANGE_LENGTH) {
setLastUnit(last + 1);
return;
}
append(oldLength << 12);
return;
}
if(oldLength < 0 || newLength < 0) {
throw new IllegalArgumentException(
"addReplace(" + oldLength + ", " + newLength +
"): both lengths must be non-negative");
}
if (oldLength == 0 && newLength == 0) {
return;
}
int newDelta = newLength - oldLength;
if (newDelta != 0) {
if ((newDelta > 0 && delta >= 0 && newDelta > (Integer.MAX_VALUE - delta)) ||
(newDelta < 0 && delta < 0 && newDelta < (Integer.MIN_VALUE - delta))) {
// Integer overflow or underflow.
throw new IndexOutOfBoundsException();
}
delta += newDelta;
}
int head = 0x7000;
if (oldLength < LENGTH_IN_1TRAIL && newLength < LENGTH_IN_1TRAIL) {
head |= oldLength << 6;
head |= newLength;
append(head);
} else if ((array.length - length) >= 5 || growArray()) {
int limit = length + 1;
if(oldLength < LENGTH_IN_1TRAIL) {
head |= oldLength << 6;
} else if(oldLength <= 0x7fff) {
head |= LENGTH_IN_1TRAIL << 6;
array[limit++] = (char)(0x8000 | oldLength);
} else {
head |= (LENGTH_IN_2TRAIL + (oldLength >> 30)) << 6;
array[limit++] = (char)(0x8000 | (oldLength >> 15));
array[limit++] = (char)(0x8000 | oldLength);
}
if(newLength < LENGTH_IN_1TRAIL) {
head |= newLength;
} else if(newLength <= 0x7fff) {
head |= LENGTH_IN_1TRAIL;
array[limit++] = (char)(0x8000 | newLength);
} else {
head |= LENGTH_IN_2TRAIL + (newLength >> 30);
array[limit++] = (char)(0x8000 | (newLength >> 15));
array[limit++] = (char)(0x8000 | newLength);
}
array[length] = (char)head;
length = limit;
}
}
private void append(int r) {
if(length < array.length || growArray()) {
array[length++] = (char)r;
}
}
private boolean growArray() {
int newCapacity;
if (array.length == STACK_CAPACITY) {
newCapacity = 2000;
} else if (array.length == Integer.MAX_VALUE) {
throw new BufferOverflowException();
} else if (array.length >= (Integer.MAX_VALUE / 2)) {
newCapacity = Integer.MAX_VALUE;
} else {
newCapacity = 2 * array.length;
}
// Grow by at least 5 units so that a maximal change record will fit.
if ((newCapacity - array.length) < 5) {
throw new BufferOverflowException();
}
array = Arrays.copyOf(array, newCapacity);
return true;
}
/**
* How much longer is the new text compared with the old text?
* @return new length minus old length
* @draft ICU 59
* @provisional This API might change or be removed in a future release.
*/
public int lengthDelta() { return delta; }
/**
* @return true if there are any change edits
* @draft ICU 59
* @provisional This API might change or be removed in a future release.
*/
public boolean hasChanges() {
if (delta != 0) {
return true;
}
for (int i = 0; i < length; ++i) {
if (array[i] > MAX_UNCHANGED) {
return true;
}
}
return false;
}
/**
* Access to the list of edits.
* @see #getCoarseIterator
* @see #getFineIterator
* @draft ICU 59
* @provisional This API might change or be removed in a future release.
*/
public static final class Iterator {
private final char[] array;
private int index;
private final int length;
private int remaining;
private final boolean onlyChanges_, coarse;
private boolean changed;
private int oldLength_, newLength_;
private int srcIndex, replIndex, destIndex;
private Iterator(char[] a, int len, boolean oc, boolean crs) {
array = a;
length = len;
onlyChanges_ = oc;
coarse = crs;
}
private int readLength(int head) {
if (head < LENGTH_IN_1TRAIL) {
return head;
} else if (head < LENGTH_IN_2TRAIL) {
assert(index < length);
assert(array[index] >= 0x8000);
return array[index++] & 0x7fff;
} else {
assert((index + 2) <= length);
assert(array[index] >= 0x8000);
assert(array[index + 1] >= 0x8000);
int len = ((head & 1) << 30) |
((array[index] & 0x7fff) << 15) |
(array[index + 1] & 0x7fff);
index += 2;
return len;
}
}
private void updateIndexes() {
srcIndex += oldLength_;
if (changed) {
replIndex += newLength_;
}
destIndex += newLength_;
}
private boolean noNext() {
// No change beyond the string.
changed = false;
oldLength_ = newLength_ = 0;
return false;
}
/**
* Advances to the next edit.
* @return true if there is another edit
* @draft ICU 59
* @provisional This API might change or be removed in a future release.
*/
public boolean next() {
return next(onlyChanges_);
}
private boolean next(boolean onlyChanges) {
// We have an errorCode in case we need to start guarding against integer overflows.
// It is also convenient for caller loops if we bail out when an error was set elsewhere.
updateIndexes();
if (remaining > 0) {
// Fine-grained iterator: Continue a sequence of equal-length changes.
--remaining;
return true;
}
if (index >= length) {
return noNext();
}
int u = array[index++];
if (u <= MAX_UNCHANGED) {
// Combine adjacent unchanged ranges.
changed = false;
oldLength_ = u + 1;
while (index < length && (u = array[index]) <= MAX_UNCHANGED) {
++index;
oldLength_ += u + 1;
}
newLength_ = oldLength_;
if (onlyChanges) {
updateIndexes();
if (index >= length) {
return noNext();
}
// already fetched u > MAX_UNCHANGED at index
++index;
} else {
return true;
}
}
changed = true;
if (u <= MAX_SHORT_CHANGE) {
if (coarse) {
int w = u >> 12;
int len = (u & 0xfff) + 1;
oldLength_ = newLength_ = len * w;
} else {
// Split a sequence of equal-length changes that was compressed into one unit.
oldLength_ = newLength_ = u >> 12;
remaining = u & 0xfff;
return true;
}
} else {
assert(u <= 0x7fff);
oldLength_ = readLength((u >> 6) & 0x3f);
newLength_ = readLength(u & 0x3f);
if (!coarse) {
return true;
}
}
// Combine adjacent changes.
while (index < length && (u = array[index]) > MAX_UNCHANGED) {
++index;
if (u <= MAX_SHORT_CHANGE) {
int w = u >> 12;
int len = (u & 0xfff) + 1;
len = len * w;
oldLength_ += len;
newLength_ += len;
} else {
assert(u <= 0x7fff);
int oldLen = readLength((u >> 6) & 0x3f);
int newLen = readLength(u & 0x3f);
oldLength_ += oldLen;
newLength_ += newLen;
}
}
return true;
}
/**
* Finds the edit that contains the source index.
* The source index may be found in a non-change
* even if normal iteration would skip non-changes.
* Normal iteration can continue from a found edit.
*
* <p>The iterator state before this search logically does not matter.
* (It may affect the performance of the search.)
*
* <p>The iterator state after this search is undefined
* if the source index is out of bounds for the source string.
*
* @param i source index
* @return true if the edit for the source index was found
* @draft ICU 59
* @provisional This API might change or be removed in a future release.
*/
public boolean findSourceIndex(int i) {
if (i < 0) { return false; }
if (i < srcIndex) {
// Reset the iterator to the start.
index = remaining = oldLength_ = newLength_ = srcIndex = replIndex = destIndex = 0;
} else if (i < (srcIndex + oldLength_)) {
// The index is in the current span.
return true;
}
while (next(false)) {
if (i < (srcIndex + oldLength_)) {
// The index is in the current span.
return true;
}
if (remaining > 0) {
// Is the index in one of the remaining compressed edits?
// srcIndex is the start of the current span, before the remaining ones.
int len = (remaining + 1) * oldLength_;
if (i < (srcIndex + len)) {
int n = (i - srcIndex) / oldLength_; // 1 <= n <= remaining
len = n * oldLength_;
srcIndex += len;
replIndex += len;
destIndex += len;
remaining -= n;
return true;
}
// Make next() skip all of these edits at once.
oldLength_ = newLength_ = len;
remaining = 0;
}
}
return false;
}
/**
* @return true if this edit replaces oldLength() units with newLength() different ones.
* false if oldLength units remain unchanged.
* @draft ICU 59
* @provisional This API might change or be removed in a future release.
*/
public boolean hasChange() { return changed; }
/**
* @return the number of units in the original string which are replaced or remain unchanged.
* @draft ICU 59
* @provisional This API might change or be removed in a future release.
*/
public int oldLength() { return oldLength_; }
/**
* @return the number of units in the modified string, if hasChange() is true.
* Same as oldLength if hasChange() is false.
* @draft ICU 59
* @provisional This API might change or be removed in a future release.
*/
public int newLength() { return newLength_; }
/**
* @return the current index into the source string
* @draft ICU 59
* @provisional This API might change or be removed in a future release.
*/
public int sourceIndex() { return srcIndex; }
/**
* @return the current index into the replacement-characters-only string,
* not counting unchanged spans
* @draft ICU 59
* @provisional This API might change or be removed in a future release.
*/
public int replacementIndex() { return replIndex; }
/**
* @return the current index into the full destination string
* @draft ICU 59
* @provisional This API might change or be removed in a future release.
*/
public int destinationIndex() { return destIndex; }
};
/**
* Returns an Iterator for coarse-grained changes for simple string updates.
* Skips non-changes.
* @return an Iterator that merges adjacent changes.
* @draft ICU 59
* @provisional This API might change or be removed in a future release.
*/
public Iterator getCoarseChangesIterator() {
return new Iterator(array, length, true, true);
}
/**
* Returns an Iterator for coarse-grained changes and non-changes for simple string updates.
* @return an Iterator that merges adjacent changes.
* @draft ICU 59
* @provisional This API might change or be removed in a future release.
*/
public Iterator getCoarseIterator() {
return new Iterator(array, length, false, true);
}
/**
* Returns an Iterator for fine-grained changes for modifying styled text.
* Skips non-changes.
* @return an Iterator that separates adjacent changes.
* @draft ICU 59
* @provisional This API might change or be removed in a future release.
*/
public Iterator getFineChangesIterator() {
return new Iterator(array, length, true, false);
}
/**
* Returns an Iterator for fine-grained changes and non-changes for modifying styled text.
* @return an Iterator that separates adjacent changes.
* @draft ICU 59
* @provisional This API might change or be removed in a future release.
*/
public Iterator getFineIterator() {
return new Iterator(array, length, false, false);
}
}

View File

@ -3866,7 +3866,6 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
int n = getRangeCount(); int n = getRangeCount();
int result; int result;
StringBuilder full = new StringBuilder(); StringBuilder full = new StringBuilder();
int locCache[] = new int[1];
for (int i=0; i<n; ++i) { for (int i=0; i<n; ++i) {
int start = getRangeStart(i); int start = getRangeStart(i);
@ -3881,13 +3880,13 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
// add case mappings // add case mappings
// (does not add long s for regular s, or Kelvin for k, for example) // (does not add long s for regular s, or Kelvin for k, for example)
for (int cp=start; cp<=end; ++cp) { for (int cp=start; cp<=end; ++cp) {
result = csp.toFullLower(cp, null, full, root, locCache); result = csp.toFullLower(cp, null, full, UCaseProps.LOC_ROOT);
addCaseMapping(foldSet, result, full); addCaseMapping(foldSet, result, full);
result = csp.toFullTitle(cp, null, full, root, locCache); result = csp.toFullTitle(cp, null, full, UCaseProps.LOC_ROOT);
addCaseMapping(foldSet, result, full); addCaseMapping(foldSet, result, full);
result = csp.toFullUpper(cp, null, full, root, locCache); result = csp.toFullUpper(cp, null, full, UCaseProps.LOC_ROOT);
addCaseMapping(foldSet, result, full); addCaseMapping(foldSet, result, full);
result = csp.toFullFolding(cp, full, 0); result = csp.toFullFolding(cp, full, 0);
@ -3906,6 +3905,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
} else { } else {
BreakIterator bi = BreakIterator.getWordInstance(root); BreakIterator bi = BreakIterator.getWordInstance(root);
for (String str : strings) { for (String str : strings) {
// TODO: call lower-level functions
foldSet.add(UCharacter.toLowerCase(root, str)); foldSet.add(UCharacter.toLowerCase(root, str));
foldSet.add(UCharacter.toTitleCase(root, str, bi)); foldSet.add(UCharacter.toTitleCase(root, str, bi));
foldSet.add(UCharacter.toUpperCase(root, str)); foldSet.add(UCharacter.toUpperCase(root, str));

View File

@ -44,7 +44,7 @@ class LowercaseTransliterator extends Transliterator{
private final UCaseProps csp; private final UCaseProps csp;
private ReplaceableContextIterator iter; private ReplaceableContextIterator iter;
private StringBuilder result; private StringBuilder result;
private int[] locCache; private int caseLocale;
/** /**
* Constructs a transliterator. * Constructs a transliterator.
@ -56,8 +56,7 @@ class LowercaseTransliterator extends Transliterator{
csp=UCaseProps.INSTANCE; csp=UCaseProps.INSTANCE;
iter=new ReplaceableContextIterator(); iter=new ReplaceableContextIterator();
result = new StringBuilder(); result = new StringBuilder();
locCache = new int[1]; caseLocale = UCaseProps.getCaseLocale(locale);
locCache[0]=0;
} }
/** /**
@ -85,7 +84,7 @@ class LowercaseTransliterator extends Transliterator{
iter.setLimit(offsets.limit); iter.setLimit(offsets.limit);
iter.setContextLimits(offsets.contextStart, offsets.contextLimit); iter.setContextLimits(offsets.contextStart, offsets.contextLimit);
while((c=iter.nextCaseMapCP())>=0) { while((c=iter.nextCaseMapCP())>=0) {
c=csp.toFullLower(c, iter, result, locale, locCache); c=csp.toFullLower(c, iter, result, caseLocale);
if(iter.didReachLimit() && isIncremental) { if(iter.didReachLimit() && isIncremental) {
// the case mapping function tried to look beyond the context limit // the case mapping function tried to look beyond the context limit

View File

@ -42,7 +42,7 @@ class TitlecaseTransliterator extends Transliterator {
private final UCaseProps csp; private final UCaseProps csp;
private ReplaceableContextIterator iter; private ReplaceableContextIterator iter;
private StringBuilder result; private StringBuilder result;
private int[] locCache; private int caseLocale;
/** /**
* Constructs a transliterator. * Constructs a transliterator.
@ -55,8 +55,7 @@ class TitlecaseTransliterator extends Transliterator {
csp=UCaseProps.INSTANCE; csp=UCaseProps.INSTANCE;
iter=new ReplaceableContextIterator(); iter=new ReplaceableContextIterator();
result = new StringBuilder(); result = new StringBuilder();
locCache = new int[1]; caseLocale = UCaseProps.getCaseLocale(locale);
locCache[0]=0;
} }
/** /**
@ -119,9 +118,9 @@ class TitlecaseTransliterator extends Transliterator {
type=csp.getTypeOrIgnorable(c); type=csp.getTypeOrIgnorable(c);
if(type>=0) { // not case-ignorable if(type>=0) { // not case-ignorable
if(doTitle) { if(doTitle) {
c=csp.toFullTitle(c, iter, result, locale, locCache); c=csp.toFullTitle(c, iter, result, caseLocale);
} else { } else {
c=csp.toFullLower(c, iter, result, locale, locCache); c=csp.toFullLower(c, iter, result, caseLocale);
} }
doTitle = type==0; // doTitle=isUncased doTitle = type==0; // doTitle=isUncased

View File

@ -41,7 +41,7 @@ class UppercaseTransliterator extends Transliterator {
private final UCaseProps csp; private final UCaseProps csp;
private ReplaceableContextIterator iter; private ReplaceableContextIterator iter;
private StringBuilder result; private StringBuilder result;
private int[] locCache; private int caseLocale;
/** /**
* Constructs a transliterator. * Constructs a transliterator.
@ -52,8 +52,7 @@ class UppercaseTransliterator extends Transliterator {
csp=UCaseProps.INSTANCE; csp=UCaseProps.INSTANCE;
iter=new ReplaceableContextIterator(); iter=new ReplaceableContextIterator();
result = new StringBuilder(); result = new StringBuilder();
locCache = new int[1]; caseLocale = UCaseProps.getCaseLocale(locale);
locCache[0]=0;
} }
/** /**
@ -81,7 +80,7 @@ class UppercaseTransliterator extends Transliterator {
iter.setLimit(offsets.limit); iter.setLimit(offsets.limit);
iter.setContextLimits(offsets.contextStart, offsets.contextLimit); iter.setContextLimits(offsets.contextStart, offsets.contextLimit);
while((c=iter.nextCaseMapCP())>=0) { while((c=iter.nextCaseMapCP())>=0) {
c=csp.toFullUpper(c, iter, result, locale, locCache); c=csp.toFullUpper(c, iter, result, caseLocale);
if(iter.didReachLimit() && isIncremental) { if(iter.didReachLimit() && isIncremental) {
// the case mapping function tried to look beyond the context limit // the case mapping function tried to look beyond the context limit

View File

@ -24,6 +24,8 @@ import com.ibm.icu.impl.Utility;
import com.ibm.icu.lang.UCharacter; import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.lang.UProperty; import com.ibm.icu.lang.UProperty;
import com.ibm.icu.text.BreakIterator; import com.ibm.icu.text.BreakIterator;
import com.ibm.icu.text.CaseMap;
import com.ibm.icu.text.Edits;
import com.ibm.icu.text.RuleBasedBreakIterator; import com.ibm.icu.text.RuleBasedBreakIterator;
import com.ibm.icu.text.UTF16; import com.ibm.icu.text.UTF16;
import com.ibm.icu.util.ULocale; import com.ibm.icu.util.ULocale;
@ -708,6 +710,191 @@ public final class UCharacterCaseTest extends TestFmwk
assertGreekUpper("ρωμέικα", "ΡΩΜΕΪΚΑ"); assertGreekUpper("ρωμέικα", "ΡΩΜΕΪΚΑ");
} }
private static final class EditChange {
private boolean change;
private int oldLength, newLength;
EditChange(boolean change, int oldLength, int newLength) {
this.change = change;
this.oldLength = oldLength;
this.newLength = newLength;
}
}
private static void checkEditsIter(
String name, Edits.Iterator ei1, Edits.Iterator ei2, // two equal iterators
EditChange[] expected, boolean withUnchanged) {
assertFalse(name, ei2.findSourceIndex(-1));
int expSrcIndex = 0;
int expDestIndex = 0;
int expReplIndex = 0;
for (int expIndex = 0; expIndex < expected.length; ++expIndex) {
EditChange expect = expected[expIndex];
String msg = name + ' ' + expIndex;
if (withUnchanged || expect.change) {
assertTrue(msg, ei1.next());
assertEquals(msg, expect.change, ei1.hasChange());
assertEquals(msg, expect.oldLength, ei1.oldLength());
assertEquals(msg, expect.newLength, ei1.newLength());
assertEquals(msg, expSrcIndex, ei1.sourceIndex());
assertEquals(msg, expDestIndex, ei1.destinationIndex());
assertEquals(msg, expReplIndex, ei1.replacementIndex());
}
if (expect.oldLength > 0) {
assertTrue(msg, ei2.findSourceIndex(expSrcIndex));
assertEquals(msg, expect.change, ei2.hasChange());
assertEquals(msg, expect.oldLength, ei2.oldLength());
assertEquals(msg, expect.newLength, ei2.newLength());
assertEquals(msg, expSrcIndex, ei2.sourceIndex());
assertEquals(msg, expDestIndex, ei2.destinationIndex());
assertEquals(msg, expReplIndex, ei2.replacementIndex());
if (!withUnchanged) {
// For some iterators, move past the current range
// so that findSourceIndex() has to look before the current index.
ei2.next();
ei2.next();
}
}
expSrcIndex += expect.oldLength;
expDestIndex += expect.newLength;
if (expect.change) {
expReplIndex += expect.newLength;
}
}
String msg = name + " end";
assertFalse(msg, ei1.next());
assertFalse(msg, ei1.hasChange());
assertEquals(msg, 0, ei1.oldLength());
assertEquals(msg, 0, ei1.newLength());
assertEquals(msg, expSrcIndex, ei1.sourceIndex());
assertEquals(msg, expDestIndex, ei1.destinationIndex());
assertEquals(msg, expReplIndex, ei1.replacementIndex());
assertFalse(name, ei2.findSourceIndex(expSrcIndex));
}
@Test
public void TestEdits() {
Edits edits = new Edits();
assertFalse("new Edits", edits.hasChanges());
assertEquals("new Edits", 0, edits.lengthDelta());
edits.addUnchanged(1); // multiple unchanged ranges are combined
edits.addUnchanged(10000); // too long, and they are split
edits.addReplace(0, 0);
edits.addUnchanged(2);
assertFalse("unchanged 10003", edits.hasChanges());
assertEquals("unchanged 10003", 0, edits.lengthDelta());
edits.addReplace(1, 1); // multiple short equal-length edits are compressed
edits.addUnchanged(0);
edits.addReplace(1, 1);
edits.addReplace(1, 1);
edits.addReplace(0, 10);
edits.addReplace(100, 0);
edits.addReplace(3000, 4000); // variable-length encoding
edits.addReplace(100000, 100000);
assertTrue("some edits", edits.hasChanges());
assertEquals("some edits", 10 - 100 + 1000, edits.lengthDelta());
EditChange[] coarseExpectedChanges = new EditChange[] {
new EditChange(false, 10003, 10003),
new EditChange(true, 103103, 104013)
};
checkEditsIter("coarse",
edits.getCoarseIterator(), edits.getCoarseIterator(),
coarseExpectedChanges, true);
checkEditsIter("coarse changes",
edits.getCoarseChangesIterator(), edits.getCoarseChangesIterator(),
coarseExpectedChanges, false);
EditChange[] fineExpectedChanges = new EditChange[] {
new EditChange(false, 10003, 10003),
new EditChange(true, 1, 1),
new EditChange(true, 1, 1),
new EditChange(true, 1, 1),
new EditChange(true, 0, 10),
new EditChange(true, 100, 0),
new EditChange(true, 3000, 4000),
new EditChange(true, 100000, 100000)
};
checkEditsIter("fine",
edits.getFineIterator(), edits.getFineIterator(),
fineExpectedChanges, true);
checkEditsIter("fine changes",
edits.getFineChangesIterator(), edits.getFineChangesIterator(),
fineExpectedChanges, false);
edits.reset();
assertFalse("reset", edits.hasChanges());
assertEquals("reset", 0, edits.lengthDelta());
Edits.Iterator ei = edits.getCoarseChangesIterator();
assertFalse("reset then iterator", ei.next());
}
@Test
public void TestCaseMapWithEdits() {
StringBuilder sb = new StringBuilder();
Edits edits = new Edits();
sb = CaseMap.toLower().omitUnchangedText().apply(TURKISH_LOCALE_, "IstanBul", sb, edits);
assertEquals("toLower(Istanbul)", "ıb", sb.toString());
EditChange[] lowerExpectedChanges = new EditChange[] {
new EditChange(true, 1, 1),
new EditChange(false, 4, 4),
new EditChange(true, 1, 1),
new EditChange(false, 2, 2)
};
checkEditsIter("toLower(Istanbul)",
edits.getFineIterator(), edits.getFineIterator(),
lowerExpectedChanges, true);
sb.delete(0, sb.length());
edits.reset();
sb = CaseMap.toUpper().omitUnchangedText().apply(GREEK_LOCALE_, "Πατάτα", sb, edits);
assertEquals("toUpper(Πατάτα)", "ΑΤΑΤΑ", sb.toString());
EditChange[] upperExpectedChanges = new EditChange[] {
new EditChange(false, 1, 1),
new EditChange(true, 1, 1),
new EditChange(true, 1, 1),
new EditChange(true, 1, 1),
new EditChange(true, 1, 1),
new EditChange(true, 1, 1)
};
checkEditsIter("toUpper(Πατάτα)",
edits.getFineIterator(), edits.getFineIterator(),
upperExpectedChanges, true);
sb.delete(0, sb.length());
edits.reset();
sb = CaseMap.toTitle().omitUnchangedText().noBreakAdjustment().noLowercase().apply(
new Locale("nl"), null, "IjssEL IglOo", sb, edits);
assertEquals("toTitle(IjssEL IglOo)", "J", sb.toString());
EditChange[] titleExpectedChanges = new EditChange[] {
new EditChange(false, 1, 1),
new EditChange(true, 1, 1),
new EditChange(false, 10, 10)
};
checkEditsIter("toTitle(IjssEL IglOo)",
edits.getFineIterator(), edits.getFineIterator(),
titleExpectedChanges, true);
sb.delete(0, sb.length());
edits.reset();
sb = CaseMap.fold().omitUnchangedText().turkic().apply("IßtanBul", sb, edits);
assertEquals("fold(IßtanBul)", "ıssb", sb.toString());
EditChange[] foldExpectedChanges = new EditChange[] {
new EditChange(true, 1, 1),
new EditChange(true, 1, 2),
new EditChange(false, 3, 3),
new EditChange(true, 1, 1),
new EditChange(false, 2, 2)
};
checkEditsIter("fold(IßtanBul)",
edits.getFineIterator(), edits.getFineIterator(),
foldExpectedChanges, true);
}
// private data members - test data -------------------------------------- // private data members - test data --------------------------------------
private static final Locale TURKISH_LOCALE_ = new Locale("tr", "TR"); private static final Locale TURKISH_LOCALE_ = new Locale("tr", "TR");
@ -945,7 +1132,7 @@ public final class UCharacterCaseTest extends TestFmwk
// private methods ------------------------------------------------------- // private methods -------------------------------------------------------
/** /**
* Converting the hex numbers represented betwee n ';' to Unicode strings * Converting the hex numbers represented between ';' to Unicode strings
* @param str string to break up into Unicode strings * @param str string to break up into Unicode strings
* @return array of Unicode strings ending with a null * @return array of Unicode strings ending with a null
*/ */