ICU-12410 move new code into new files, split ucasemap_imp.h from ustr_imp.h

X-SVN-Rev: 39655
This commit is contained in:
Markus Scherer 2017-02-09 21:15:34 +00:00
parent 1c2a1da83b
commit 5da94f206a
23 changed files with 1088 additions and 978 deletions

View File

@ -94,6 +94,7 @@ stringtriebuilder.o bytestriebuilder.o \
bytestrie.o bytestrieiterator.o \
ucharstrie.o ucharstriebuilder.o ucharstrieiterator.o \
dictionarydata.o \
edits.o \
appendable.o ustr_cnv.o unistr_cnv.o unistr.o unistr_case.o unistr_props.o \
utf_impl.o ustring.o ustrcase.o ucasemap.o ucasemap_titlecase_brkiter.o cstring.o ustrfmt.o ustrtrns.o ustr_wcs.o utext.o \
unistr_case_locale.o ustrcase_locale.o unistr_titlecase_brkiter.o ustr_titlecase_brkiter.o \

View File

@ -449,6 +449,7 @@
<ClCompile Include="cstring.cpp" />
<ClCompile Include="cstr.cpp" />
<ClCompile Include="cwchar.cpp" />
<ClCompile Include="edits.cpp" />
<ClCompile Include="messagepattern.cpp" />
<ClCompile Include="schriter.cpp" />
<ClCompile Include="stringpiece.cpp" />
@ -1511,6 +1512,20 @@
</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">copy "%(FullPath)" ..\..\include\unicode
</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\casemap.h">
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">copy "%(FullPath)" ..\..\include\unicode
</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">copy "%(FullPath)" ..\..\include\unicode
</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">copy "%(FullPath)" ..\..\include\unicode
</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">copy "%(FullPath)" ..\..\include\unicode
</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
@ -1532,6 +1547,20 @@
<ClInclude Include="cstring.h" />
<ClInclude Include="cstr.h" />
<ClInclude Include="cwchar.h" />
<CustomBuild Include="unicode\edits.h">
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">copy "%(FullPath)" ..\..\include\unicode
</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">copy "%(FullPath)" ..\..\include\unicode
</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">copy "%(FullPath)" ..\..\include\unicode
</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">copy "%(FullPath)" ..\..\include\unicode
</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\messagepattern.h">
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">copy "%(FullPath)" ..\..\include\unicode
</Command>
@ -1616,6 +1645,7 @@
</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<ClInclude Include="ucasemap_imp.h" />
<CustomBuild Include="unicode\ucharstrie.h">
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">copy "%(FullPath)" ..\..\include\unicode
</Command>

View File

@ -478,6 +478,9 @@
<ClCompile Include="cwchar.cpp">
<Filter>strings</Filter>
</ClCompile>
<ClCompile Include="edits.cpp">
<Filter>strings</Filter>
</ClCompile>
<ClCompile Include="schriter.cpp">
<Filter>strings</Filter>
</ClCompile>
@ -870,6 +873,9 @@
<ClInclude Include="cwchar.h">
<Filter>strings</Filter>
</ClInclude>
<ClInclude Include="ucasemap_imp.h">
<Filter>strings</Filter>
</ClInclude>
<ClInclude Include="uinvchar.h">
<Filter>strings</Filter>
</ClInclude>
@ -1096,9 +1102,15 @@
<CustomBuild Include="unicode\bytestream.h">
<Filter>strings</Filter>
</CustomBuild>
<CustomBuild Include="unicode\casemap.h">
<Filter>strings</Filter>
</CustomBuild>
<CustomBuild Include="unicode\chariter.h">
<Filter>strings</Filter>
</CustomBuild>
<CustomBuild Include="unicode\edits.h">
<Filter>strings</Filter>
</CustomBuild>
<CustomBuild Include="unicode\rep.h">
<Filter>strings</Filter>
</CustomBuild>

View File

@ -0,0 +1,342 @@
// Copyright (C) 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
// edits.cpp
// created: 2017feb08 Markus W. Scherer
#include "unicode/utypes.h"
#include "unicode/edits.h"
#include "cmemory.h"
#include "uassert.h"
U_NAMESPACE_BEGIN
namespace {
// 0000uuuuuuuuuuuu records u+1 unchanged text units.
const int32_t MAX_UNCHANGED_LENGTH = 0x1000;
const int32_t MAX_UNCHANGED = MAX_UNCHANGED_LENGTH - 1;
// 0wwwcccccccccccc with w=1..6 records ccc+1 replacements of w:w text units.
// No length change.
const int32_t MAX_SHORT_WIDTH = 6;
const int32_t MAX_SHORT_CHANGE_LENGTH = 0xfff;
const int32_t MAX_SHORT_CHANGE = 0x6fff;
// 0111mmmmmmnnnnnn records a replacement of m text units with n.
// m or n = 61: actual length follows in the next edits array unit.
// m or n = 62..63: actual length follows in the next two edits array units.
// Bit 30 of the actual length is in the head unit.
// Trailing units have bit 15 set.
const int32_t LENGTH_IN_1TRAIL = 61;
const int32_t LENGTH_IN_2TRAIL = 62;
} // namespace
Edits::~Edits() {
if(array != stackArray) {
uprv_free(array);
}
}
void Edits::reset() {
length = 0;
}
void Edits::addUnchanged(int32_t unchangedLength) {
if(U_FAILURE(errorCode) || unchangedLength == 0) { return; }
if(unchangedLength < 0) {
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
// Merge into previous unchanged-text record, if any.
int32_t last = lastUnit();
if(last < MAX_UNCHANGED) {
int32_t remaining = MAX_UNCHANGED - last;
if (remaining >= unchangedLength) {
setLastUnit(last + unchangedLength);
return;
}
setLastUnit(MAX_UNCHANGED);
unchangedLength -= remaining;
}
// Split large lengths into multiple units.
while(unchangedLength >= MAX_UNCHANGED_LENGTH) {
append(MAX_UNCHANGED);
unchangedLength -= MAX_UNCHANGED_LENGTH;
}
// Write a small (remaining) length.
if(unchangedLength > 0) {
append(unchangedLength - 1);
}
}
void Edits::addReplace(int32_t oldLength, int32_t newLength) {
if(U_FAILURE(errorCode)) { return; }
if(oldLength == newLength && 0 < oldLength && oldLength <= MAX_SHORT_WIDTH) {
// Replacement of short oldLength text units by same-length new text.
// Merge into previous short-replacement record, if any.
int32_t last = lastUnit();
if(MAX_UNCHANGED < last && last < MAX_SHORT_CHANGE &&
(last >> 12) == oldLength && (last & 0xfff) < MAX_SHORT_CHANGE_LENGTH) {
setLastUnit(last + 1);
return;
}
append(oldLength << 12);
return;
}
if(oldLength < 0 || newLength < 0) {
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
if (oldLength == 0 && newLength == 0) {
return;
}
int32_t newDelta = newLength - oldLength;
if (newDelta != 0) {
if (newDelta > 0 ? newDelta > (INT32_MAX - delta) : newDelta < (INT32_MIN - delta)) {
// Integer overflow or underflow.
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
return;
}
delta += newDelta;
}
int32_t head = 0x7000;
if (oldLength < LENGTH_IN_1TRAIL && newLength < LENGTH_IN_1TRAIL) {
head |= oldLength << 6;
head |= newLength;
append(head);
} else if ((capacity - length) >= 5 || growArray()) {
int32_t limit = length + 1;
if(oldLength < LENGTH_IN_1TRAIL) {
head |= oldLength << 6;
} else if(oldLength <= 0x7fff) {
head |= LENGTH_IN_1TRAIL << 6;
array[limit++] = (uint16_t)(0x8000 | oldLength);
} else {
head |= (LENGTH_IN_2TRAIL + (oldLength >> 30)) << 6;
array[limit++] = (uint16_t)(0x8000 | (oldLength >> 15));
array[limit++] = (uint16_t)(0x8000 | oldLength);
}
if(newLength < LENGTH_IN_1TRAIL) {
head |= newLength;
} else if(newLength <= 0x7fff) {
head |= LENGTH_IN_1TRAIL;
array[limit++] = (uint16_t)(0x8000 | newLength);
} else {
head |= LENGTH_IN_2TRAIL + (newLength >> 30);
array[limit++] = (uint16_t)(0x8000 | (newLength >> 15));
array[limit++] = (uint16_t)(0x8000 | newLength);
}
array[length] = (uint16_t)head;
length = limit;
}
}
void Edits::append(int32_t r) {
if(length < capacity || growArray()) {
array[length++] = (uint16_t)r;
}
}
UBool Edits::growArray() {
int32_t newCapacity;
if (array == stackArray) {
newCapacity = 2000;
} else if (capacity == INT32_MAX) {
errorCode = U_BUFFER_OVERFLOW_ERROR;
return FALSE;
} else if (capacity >= (INT32_MAX / 2)) {
newCapacity = INT32_MAX;
} else {
newCapacity = 2 * capacity;
}
// Grow by at least 5 units so that a maximal change record will fit.
if ((newCapacity - capacity) < 5) {
errorCode = U_BUFFER_OVERFLOW_ERROR;
return FALSE;
}
uint16_t *newArray = (uint16_t *)uprv_malloc((size_t)newCapacity * 2);
if (newArray == NULL) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
return FALSE;
}
uprv_memcpy(newArray, array, (size_t)length * 2);
if (array != stackArray) {
uprv_free(array);
}
array = newArray;
capacity = newCapacity;
return TRUE;
}
UBool Edits::copyErrorTo(UErrorCode &outErrorCode) {
if (U_FAILURE(outErrorCode)) { return TRUE; }
if (U_SUCCESS(errorCode)) { return FALSE; }
outErrorCode = errorCode;
return TRUE;
}
UBool Edits::hasChanges() const {
if (delta != 0) {
return TRUE;
}
for (int32_t i = 0; i < length; ++i) {
if (array[i] > MAX_UNCHANGED) {
return TRUE;
}
}
return FALSE;
}
Edits::Iterator::Iterator(const uint16_t *a, int32_t len, UBool oc, UBool crs) :
array(a), index(0), length(len), remaining(0),
onlyChanges(oc), coarse(crs),
changed(FALSE), oldLength_(0), newLength_(0),
srcIndex(0), replIndex(0), destIndex(0) {}
int32_t Edits::Iterator::readLength(int32_t head) {
if (head < LENGTH_IN_1TRAIL) {
return head;
} else if (head < LENGTH_IN_2TRAIL) {
U_ASSERT(index < length);
U_ASSERT(array[index] >= 0x8000);
return array[index++];
} else {
U_ASSERT((index + 2) <= length);
U_ASSERT(array[index] >= 0x8000);
U_ASSERT(array[index + 1] >= 0x8000);
int32_t len = ((head & 1) << 30) |
((int32_t)(array[index] & 0x7fff) << 15) |
(array[index + 1] & 0x7fff);
index += 2;
return len;
}
}
void Edits::Iterator::updateIndexes() {
srcIndex += oldLength_;
if (changed) {
replIndex += newLength_;
}
destIndex += newLength_;
}
UBool Edits::Iterator::noNext() {
// Empty span beyond the string.
oldLength_ = newLength_ = 0;
return FALSE;
}
UBool Edits::Iterator::next(UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) { return FALSE; }
// We have an errorCode in case we need to start guarding against integer overflows.
// It is also convenient for caller loops if we bail out when an error was set elsewhere.
updateIndexes();
if (remaining > 0) {
// Fine-grained iterator: Continue a sequence of equal-length changes.
--remaining;
return TRUE;
}
if (index >= length) {
return noNext();
}
int32_t u = array[index++];
if (u <= MAX_UNCHANGED) {
// Combine adjacent unchanged ranges.
changed = FALSE;
oldLength_ = u + 1;
while (index < length && (u = array[index]) <= MAX_UNCHANGED) {
++index;
oldLength_ += u + 1;
}
newLength_ = oldLength_;
if (onlyChanges) {
updateIndexes();
if (index >= length) {
return noNext();
}
// already fetched u > MAX_UNCHANGED at index
++index;
} else {
return TRUE;
}
}
changed = TRUE;
if (u <= MAX_SHORT_CHANGE) {
if (coarse) {
int32_t w = u >> 12;
int32_t len = (u & 0xfff) + 1;
oldLength_ = newLength_ = len * w;
} else {
// Split a sequence of equal-length changes that was compressed into one unit.
oldLength_ = newLength_ = u >> 12;
remaining = u & 0xfff;
return TRUE;
}
} else {
U_ASSERT(u <= 0x7fff);
oldLength_ = readLength((u >> 6) & 0x3f);
newLength_ = readLength(u & 0x3f);
if (!coarse) {
return TRUE;
}
}
// Combine adjacent changes.
while (index < length && (u = array[index]) > MAX_UNCHANGED) {
++index;
if (u <= MAX_SHORT_CHANGE) {
int32_t w = u >> 12;
int32_t len = (u & 0xfff) + 1;
len = len * w;
oldLength_ += len;
newLength_ += len;
} else {
U_ASSERT(u <= 0x7fff);
int32_t oldLen = readLength((u >> 6) & 0x3f);
int32_t newLen = readLength(u & 0x3f);
oldLength_ += oldLen;
newLength_ += newLen;
}
}
return TRUE;
}
UBool Edits::Iterator::findSourceIndex(int32_t i, UErrorCode &errorCode) {
if (U_FAILURE(errorCode) || i < 0) { return FALSE; }
if (i < srcIndex) {
// Reset the iterator to the start.
index = remaining = srcIndex = replIndex = destIndex = 0;
} else if (i < (srcIndex + oldLength_)) {
// The index is in the current span.
return TRUE;
}
while (next(errorCode)) {
if (i < (srcIndex + oldLength_)) {
// The index is in the current span.
return TRUE;
}
if (remaining > 0) {
// Is the index in one of the remaining compressed edits?
// srcIndex is the start of the current span, before the remaining ones.
int32_t len = (remaining + 1) * oldLength_;
if (i < (srcIndex + len)) {
int32_t n = (i - srcIndex) / oldLength_; // 1 <= n <= remaining
len = n * oldLength_;
srcIndex += len;
replIndex += len;
destIndex += len;
remaining -= n;
return TRUE;
}
// Make next() skip all of these edits at once.
oldLength_ = newLength_ = len;
remaining = 0;
}
}
return FALSE;
}
U_NAMESPACE_END

View File

@ -22,6 +22,7 @@
#include "unicode/utypes.h"
#include "unicode/locid.h"
#include "unicode/putil.h"
#include "unicode/uchar.h"
#include "unicode/uloc.h"
#include "unicode/ures.h"
#include "unicode/uscript.h"

View File

@ -33,6 +33,7 @@
#include "cmemory.h"
#include "cstring.h"
#include "ucase.h"
#include "ucasemap_imp.h"
#include "ustr_imp.h"
U_NAMESPACE_USE

View File

@ -0,0 +1,236 @@
// Copyright (C) 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
// ucasemap_imp.h
// created: 2017feb08 Markus W. Scherer
#ifndef __UCASEMAP_IMP_H__
#define __UCASEMAP_IMP_H__
#include "unicode/utypes.h"
#include "unicode/ucasemap.h"
#include "ucase.h"
#ifndef U_COMPARE_IGNORE_CASE
/* see also unorm.h */
/**
* Option bit for unorm_compare:
* Perform case-insensitive comparison.
*/
#define U_COMPARE_IGNORE_CASE 0x10000
#endif
/**
* Internal API, used by u_strcasecmp() etc.
* Compare strings case-insensitively,
* in code point order or code unit order.
*/
U_CFUNC int32_t
u_strcmpFold(const UChar *s1, int32_t length1,
const UChar *s2, int32_t length2,
uint32_t options,
UErrorCode *pErrorCode);
/**
* Interanl API, used for detecting length of
* shared prefix case-insensitively.
* @param s1 input string 1
* @param length1 length of string 1, or -1 (NULL terminated)
* @param s2 input string 2
* @param length2 length of string 2, or -1 (NULL terminated)
* @param options compare options
* @param matchLen1 (output) length of partial prefix match in s1
* @param matchLen2 (output) length of partial prefix match in s2
* @param pErrorCode receives error status
*/
U_CAPI void
u_caseInsensitivePrefixMatch(const UChar *s1, int32_t length1,
const UChar *s2, int32_t length2,
uint32_t options,
int32_t *matchLen1, int32_t *matchLen2,
UErrorCode *pErrorCode);
/**
* Are the Unicode properties loaded?
* This must be used before internal functions are called that do
* not perform this check.
* Generate a debug assertion failure if data is not loaded.
*/
U_CFUNC UBool
uprv_haveProperties(UErrorCode *pErrorCode);
#ifdef __cplusplus
#include "unicode/unistr.h" // for UStringCaseMapper
/*
* Internal string casing functions implementing
* ustring.h/ustrcase.cpp and UnicodeString case mapping functions.
*/
struct UCaseMap : public icu::UMemory {
/** Implements most of ucasemap_open(). */
UCaseMap(const char *localeID, uint32_t opts, UErrorCode *pErrorCode);
~UCaseMap();
#if !UCONFIG_NO_BREAK_ITERATION
icu::BreakIterator *iter; /* We adopt the iterator, so we own it. */
#endif
char locale[32];
int32_t caseLocale;
uint32_t options;
};
#if UCONFIG_NO_BREAK_ITERATION
# define UCASEMAP_BREAK_ITERATOR_PARAM
# define UCASEMAP_BREAK_ITERATOR_UNUSED
# define UCASEMAP_BREAK_ITERATOR
# define UCASEMAP_BREAK_ITERATOR_NULL
#else
# define UCASEMAP_BREAK_ITERATOR_PARAM icu::BreakIterator *iter,
# define UCASEMAP_BREAK_ITERATOR_UNUSED icu::BreakIterator *,
# define UCASEMAP_BREAK_ITERATOR iter,
# define UCASEMAP_BREAK_ITERATOR_NULL NULL,
#endif
U_CFUNC int32_t
ustrcase_getCaseLocale(const char *locale);
// TODO: swap src / dest if approved for new public api
/** Implements UStringCaseMapper. */
U_CFUNC int32_t U_CALLCONV
ustrcase_internalToLower(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
icu::Edits *edits,
UErrorCode &errorCode);
/** Implements UStringCaseMapper. */
U_CFUNC int32_t U_CALLCONV
ustrcase_internalToUpper(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
icu::Edits *edits,
UErrorCode &errorCode);
#if !UCONFIG_NO_BREAK_ITERATION
/** Implements UStringCaseMapper. */
U_CFUNC int32_t U_CALLCONV
ustrcase_internalToTitle(int32_t caseLocale, uint32_t options,
icu::BreakIterator *iter,
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
icu::Edits *edits,
UErrorCode &errorCode);
#endif
/** Implements UStringCaseMapper. */
U_CFUNC int32_t U_CALLCONV
ustrcase_internalFold(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
icu::Edits *edits,
UErrorCode &errorCode);
/**
* Common string case mapping implementation for ucasemap_toXyz() and UnicodeString::toXyz().
* Implements argument checking.
*/
U_CFUNC int32_t
ustrcase_map(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
UStringCaseMapper *stringCaseMapper,
icu::Edits *edits,
UErrorCode &errorCode);
/**
* Common string case mapping implementation for old-fashioned u_strToXyz() functions
* that allow the source string to overlap the destination buffer.
* Implements argument checking and internally works with an intermediate buffer if necessary.
*/
U_CFUNC int32_t
ustrcase_mapWithOverlap(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
UStringCaseMapper *stringCaseMapper,
UErrorCode &errorCode);
/**
* UTF-8 string case mapping function type, used by ucasemap_mapUTF8().
* UTF-8 version of UStringCaseMapper.
* All error checking must be done.
* The UCaseMap must be fully initialized, with locale and/or iter set as needed.
* src and dest must not overlap.
*/
typedef int32_t U_CALLCONV
UTF8CaseMapper(int32_t caseLocale, uint32_t options,
#if !UCONFIG_NO_BREAK_ITERATION
icu::BreakIterator *iter,
#endif
uint8_t *dest, int32_t destCapacity,
const uint8_t *src, int32_t srcLength,
UErrorCode *pErrorCode);
#if !UCONFIG_NO_BREAK_ITERATION
/** Implements UTF8CaseMapper. */
U_CFUNC int32_t U_CALLCONV
ucasemap_internalUTF8ToTitle(int32_t caseLocale, uint32_t options,
icu::BreakIterator *iter,
uint8_t *dest, int32_t destCapacity,
const uint8_t *src, int32_t srcLength,
UErrorCode *pErrorCode);
#endif
/**
* Implements argument checking and buffer handling
* for UTF-8 string case mapping as a common function.
*/
U_CFUNC int32_t
ucasemap_mapUTF8(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
uint8_t *dest, int32_t destCapacity,
const uint8_t *src, int32_t srcLength,
UTF8CaseMapper *stringCaseMapper,
UErrorCode *pErrorCode);
U_NAMESPACE_BEGIN
namespace GreekUpper {
// Data bits.
static const uint32_t UPPER_MASK = 0x3ff;
static const uint32_t HAS_VOWEL = 0x1000;
static const uint32_t HAS_YPOGEGRAMMENI = 0x2000;
static const uint32_t HAS_ACCENT = 0x4000;
static const uint32_t HAS_DIALYTIKA = 0x8000;
// Further bits during data building and processing, not stored in the data map.
static const uint32_t HAS_COMBINING_DIALYTIKA = 0x10000;
static const uint32_t HAS_OTHER_GREEK_DIACRITIC = 0x20000;
static const uint32_t HAS_VOWEL_AND_ACCENT = HAS_VOWEL | HAS_ACCENT;
static const uint32_t HAS_VOWEL_AND_ACCENT_AND_DIALYTIKA =
HAS_VOWEL_AND_ACCENT | HAS_DIALYTIKA;
static const uint32_t HAS_EITHER_DIALYTIKA = HAS_DIALYTIKA | HAS_COMBINING_DIALYTIKA;
// State bits.
static const uint32_t AFTER_CASED = 1;
static const uint32_t AFTER_VOWEL_WITH_ACCENT = 2;
uint32_t getLetterData(UChar32 c);
/**
* Returns a non-zero value for each of the Greek combining diacritics
* listed in The Unicode Standard, version 8, chapter 7.2 Greek,
* plus some perispomeni look-alikes.
*/
uint32_t getDiacriticData(UChar32 c);
} // namespace GreekUpper
U_NAMESPACE_END
#endif // __cplusplus
#endif // __UCASEMAP_IMP_H__

View File

@ -26,7 +26,7 @@
#include "unicode/ucasemap.h"
#include "cmemory.h"
#include "ucase.h"
#include "ustr_imp.h"
#include "ucasemap_imp.h"
U_NAMESPACE_USE

View File

@ -0,0 +1,193 @@
// Copyright (C) 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
// casemap.h
// created: 2017jan12 Markus W. Scherer
#ifndef __CASEMAP_H__
#define __CASEMAP_H__
#include "unicode/utypes.h"
#include "unicode/uobject.h"
/**
* \file
* \brief C++ API: Low-level C++ case mapping functions.
*/
U_NAMESPACE_BEGIN
#ifndef U_HIDE_DRAFT_API
class BreakIterator;
class Edits;
/**
* Low-level C++ case mapping functions.
*
* @draft ICU 59
*/
class U_COMMON_API CaseMap final : public UMemory {
public:
/**
* Lowercases a UTF-16 string and optionally records edits.
* Casing is locale-dependent and context-sensitive.
* The result may be longer or shorter than the original.
* The source string and the destination buffer must not overlap.
*
* @param locale The locale ID. ("" = root locale, NULL = default locale.)
* @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT.
* @param src The original string.
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
* @param dest A buffer for the result string. The result will be NUL-terminated if
* the buffer is large enough.
* The contents is undefined in case of failure.
* @param destCapacity The size of the buffer (number of bytes). If it is 0, then
* dest may be NULL and the function will only return the length of the result
* without writing any of the result string.
* @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any).
* This function calls edits->reset() first. edits can be NULL.
* @param errorCode Reference to an in/out error code value
* which must not indicate a failure before the function call.
* @return The length of the result string, if successful - or in case of a buffer overflow,
* in which case it will be greater than destCapacity.
*
* @see u_strToLower
* @draft ICU 59
*/
static int32_t toLower(
const char *locale, uint32_t options,
const UChar *src, int32_t srcLength,
UChar *dest, int32_t destCapacity, Edits *edits,
UErrorCode &errorCode);
/**
* Uppercases a UTF-16 string and optionally records edits.
* Casing is locale-dependent and context-sensitive.
* The result may be longer or shorter than the original.
* The source string and the destination buffer must not overlap.
*
* @param locale The locale ID. ("" = root locale, NULL = default locale.)
* @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT.
* @param src The original string.
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
* @param dest A buffer for the result string. The result will be NUL-terminated if
* the buffer is large enough.
* The contents is undefined in case of failure.
* @param destCapacity The size of the buffer (number of bytes). If it is 0, then
* dest may be NULL and the function will only return the length of the result
* without writing any of the result string.
* @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any).
* This function calls edits->reset() first. edits can be NULL.
* @param errorCode Reference to an in/out error code value
* which must not indicate a failure before the function call.
* @return The length of the result string, if successful - or in case of a buffer overflow,
* in which case it will be greater than destCapacity.
*
* @see u_strToUpper
* @draft ICU 59
*/
static int32_t toUpper(
const char *locale, uint32_t options,
const UChar *src, int32_t srcLength,
UChar *dest, int32_t destCapacity, Edits *edits,
UErrorCode &errorCode);
#if !UCONFIG_NO_BREAK_ITERATION
/**
* Titlecases a UTF-16 string and optionally records edits.
* Casing is locale-dependent and context-sensitive.
* The result may be longer or shorter than the original.
* The source string and the destination buffer must not overlap.
*
* Titlecasing uses a break iterator to find the first characters of words
* that are to be titlecased. It titlecases those characters and lowercases
* all others. (This can be modified with options bits.)
*
* @param locale The locale ID. ("" = root locale, NULL = default locale.)
* @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT,
* U_TITLECASE_NO_LOWERCASE, U_TITLECASE_NO_BREAK_ADJUSTMENT.
* @param iter A break iterator to find the first characters of words that are to be titlecased.
* It is set to the source string (setText())
* and used one or more times for iteration (first() and next()).
* If NULL, then a word break iterator for the locale is used
* (or something equivalent).
* @param src The original string.
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
* @param dest A buffer for the result string. The result will be NUL-terminated if
* the buffer is large enough.
* The contents is undefined in case of failure.
* @param destCapacity The size of the buffer (number of bytes). If it is 0, then
* dest may be NULL and the function will only return the length of the result
* without writing any of the result string.
* @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any).
* This function calls edits->reset() first. edits can be NULL.
* @param errorCode Reference to an in/out error code value
* which must not indicate a failure before the function call.
* @return The length of the result string, if successful - or in case of a buffer overflow,
* in which case it will be greater than destCapacity.
*
* @see u_strToTitle
* @see ucasemap_toTitle
* @draft ICU 59
*/
static int32_t toTitle(
const char *locale, uint32_t options, BreakIterator *iter,
const UChar *src, int32_t srcLength,
UChar *dest, int32_t destCapacity, Edits *edits,
UErrorCode &errorCode);
#endif // UCONFIG_NO_BREAK_ITERATION
/**
* Case-folds a UTF-16 string and optionally records edits.
*
* Case-folding is locale-independent and not context-sensitive,
* but there is an option for whether to include or exclude mappings for dotted I
* and dotless i that are marked with 'T' in CaseFolding.txt.
*
* The result may be longer or shorter than the original.
* The source string and the destination buffer must not overlap.
*
* @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT,
* U_FOLD_CASE_DEFAULT, U_FOLD_CASE_EXCLUDE_SPECIAL_I.
* @param src The original string.
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
* @param dest A buffer for the result string. The result will be NUL-terminated if
* the buffer is large enough.
* The contents is undefined in case of failure.
* @param destCapacity The size of the buffer (number of bytes). If it is 0, then
* dest may be NULL and the function will only return the length of the result
* without writing any of the result string.
* @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any).
* This function calls edits->reset() first. edits can be NULL.
* @param errorCode Reference to an in/out error code value
* which must not indicate a failure before the function call.
* @return The length of the result string, if successful - or in case of a buffer overflow,
* in which case it will be greater than destCapacity.
*
* @see u_strFoldCase
* @draft ICU 59
*/
static int32_t foldCase(
uint32_t options,
const UChar *src, int32_t srcLength,
UChar *dest, int32_t destCapacity, Edits *edits,
UErrorCode &errorCode);
private:
CaseMap() = delete;
CaseMap(const CaseMap &other) = delete;
CaseMap &operator=(const CaseMap &other) = delete;
};
#endif // U_HIDE_DRAFT_API
U_NAMESPACE_END
#endif // __CASEMAP_H__

View File

@ -0,0 +1,244 @@
// Copyright (C) 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
// edits.h
// created: 2016dec30 Markus W. Scherer
#ifndef __EDITS_H__
#define __EDITS_H__
#include "unicode/utypes.h"
#include "unicode/uobject.h"
/**
* \file
* \brief C++ API: C++ class Edits for low-level string transformations on styled text.
*/
U_NAMESPACE_BEGIN
#ifndef U_HIDE_DRAFT_API
/**
* Records lengths of string edits but not replacement text.
* Supports replacements, insertions, deletions in linear progression.
* Does not support moving/reordering of text.
*
* An Edits object tracks a separate UErrorCode, but ICU string transformation functions
* (e.g., case mapping functions) merge any such errors into their API's UErrorCode.
*
* @draft ICU 59
*/
class U_COMMON_API Edits final : public UMemory {
public:
/**
* Constructs an empty object.
* @draft ICU 59
*/
Edits() :
array(stackArray), capacity(STACK_CAPACITY), length(0), delta(0),
errorCode(U_ZERO_ERROR) {}
/**
* Destructor.
* @draft ICU 59
*/
~Edits();
/**
* Resets the data but may not release memory.
* @draft ICU 59
*/
void reset();
/**
* Adds a record for an unchanged segment of text.
* Normally called from inside ICU string transformation functions, not user code.
* @draft ICU 59
*/
void addUnchanged(int32_t unchangedLength);
/**
* Adds a record for a text replacement/insertion/deletion.
* Normally called from inside ICU string transformation functions, not user code.
* @draft ICU 59
*/
void addReplace(int32_t oldLength, int32_t newLength);
/**
* Sets the UErrorCode if an error occurred while recording edits.
* Preserves older error codes in the outErrorCode.
* Normally called from inside ICU string transformation functions, not user code.
* @return TRUE if U_FAILURE(outErrorCode)
* @draft ICU 59
*/
UBool copyErrorTo(UErrorCode &outErrorCode);
/**
* How much longer is the new text compared with the old text?
* @return new length minus old length
* @draft ICU 59
*/
int32_t lengthDelta() const { return delta; }
/**
* @return TRUE if there are any change edits
* @draft ICU 59
*/
UBool hasChanges() const;
/**
* Access to the list of edits.
* @see getCoarseIterator
* @see getFineIterator
* @draft ICU 59
*/
struct Iterator final : public UMemory {
/**
* Copy constructor.
* @draft ICU 59
*/
Iterator(const Iterator &other) = default;
/**
* Assignment operator.
* @draft ICU 59
*/
Iterator &operator=(const Iterator &other) = default;
/**
* Advances to the next edit.
* @return TRUE if there is another edit
* @draft ICU 59
*/
UBool next(UErrorCode &errorCode);
/**
* Finds the edit that contains the source index.
* The source index may be found in a non-change
* even if normal iteration would skip non-changes.
* Normal iteration can continue from a found edit.
*
* The iterator state before this search logically does not matter.
* (It may affect the performance of the search.)
*
* The iterator state after this search is undefined
* if the source index is out of bounds for the source string.
*
* @param i source index
* @return TRUE if the edit for the source index was found
* @draft ICU 59
*/
UBool findSourceIndex(int32_t i, UErrorCode &errorCode);
/**
* @return TRUE if this edit replaces oldLength() units with newLength() different ones.
* FALSE if oldLength units remain unchanged.
* @draft ICU 59
*/
UBool hasChange() const { return changed; }
/**
* @return the number of units in the original string which are replaced or remain unchanged.
* @draft ICU 59
*/
int32_t oldLength() const { return oldLength_; }
/**
* @return the number of units in the modified string, if hasChange() is TRUE.
* Same as oldLength if hasChange() is FALSE.
* @draft ICU 59
*/
int32_t newLength() const { return newLength_; }
/**
* @return the current index into the source string
* @draft ICU 59
*/
int32_t sourceIndex() const { return srcIndex; }
/**
* @return the current index into the replacement-characters-only string,
* not counting unchanged spans
* @draft ICU 59
*/
int32_t replacementIndex() const { return replIndex; }
/**
* @return the current index into the full destination string
* @draft ICU 59
*/
int32_t destinationIndex() const { return destIndex; }
private:
friend class Edits;
Iterator(const uint16_t *a, int32_t len, UBool oc, UBool crs);
int32_t readLength(int32_t head);
void updateIndexes();
UBool noNext();
const uint16_t *array;
int32_t index, length;
int32_t remaining;
UBool onlyChanges, coarse;
UBool changed;
int32_t oldLength_, newLength_;
int32_t srcIndex, replIndex, destIndex;
};
/**
* Returns an Iterator for coarse-grained changes for simple string updates.
* Skips non-changes.
* @return an Iterator that merges adjacent changes.
* @draft ICU 59
*/
Iterator getCoarseChangesIterator() const {
return Iterator(array, length, TRUE, TRUE);
}
/**
* Returns an Iterator for coarse-grained changes and non-changes for simple string updates.
* @return an Iterator that merges adjacent changes.
* @draft ICU 59
*/
Iterator getCoarseIterator() const {
return Iterator(array, length, FALSE, TRUE);
}
/**
* Returns an Iterator for fine-grained changes for modifying styled text.
* Skips non-changes.
* @return an Iterator that separates adjacent changes.
* @draft ICU 59
*/
Iterator getFineChangesIterator() const {
return Iterator(array, length, TRUE, FALSE);
}
/**
* Returns an Iterator for fine-grained changes and non-changes for modifying styled text.
* @return an Iterator that separates adjacent changes.
* @draft ICU 59
*/
Iterator getFineIterator() const {
return Iterator(array, length, FALSE, FALSE);
}
private:
Edits(const Edits &) = delete;
Edits &operator=(const Edits &) = delete;
void setLastUnit(int32_t last) { array[length - 1] = (uint16_t)last; }
int32_t lastUnit() const { return length > 0 ? array[length - 1] : 0xffff; }
void append(int32_t r);
UBool growArray();
static const int32_t STACK_CAPACITY = 100;
uint16_t *array;
int32_t capacity;
int32_t length;
int32_t delta;
UErrorCode errorCode;
uint16_t stackArray[STACK_CAPACITY];
};
#endif // U_HIDE_DRAFT_API
U_NAMESPACE_END
#endif // __EDITS_H__

View File

@ -23,11 +23,6 @@
#include "unicode/utypes.h"
#include "unicode/localpointer.h"
#if U_SHOW_CPLUSPLUS_API
#include "unicode/uobject.h"
#endif // U_SHOW_CPLUSPLUS_API
#include "unicode/ustring.h"
/**
@ -88,8 +83,6 @@ ucasemap_close(UCaseMap *csm);
U_NAMESPACE_BEGIN
class BreakIterator;
/**
* \class LocalUCaseMapPointer
* "Smart pointer" class, closes a UCaseMap via ucasemap_close().
@ -101,401 +94,6 @@ class BreakIterator;
*/
U_DEFINE_LOCAL_OPEN_POINTER(LocalUCaseMapPointer, UCaseMap, ucasemap_close);
// TODO: move to new C++ unicode/casemap.h
#ifndef U_HIDE_DRAFT_API
/**
* Records lengths of string edits but not replacement text.
* Supports replacements, insertions, deletions in linear progression.
* Does not support moving/reordering of text.
*
* An Edits object tracks a separate UErrorCode, but ICU string transformation functions
* (e.g., case mapping functions) merge any such errors into their API's UErrorCode.
*
* @draft ICU 59
*/
class U_COMMON_API Edits final : public UMemory {
public:
/**
* Constructs an empty object.
* @draft ICU 59
*/
Edits() :
array(stackArray), capacity(STACK_CAPACITY), length(0), delta(0),
errorCode(U_ZERO_ERROR) {}
/**
* Destructor.
* @draft ICU 59
*/
~Edits();
/**
* Resets the data but may not release memory.
* @draft ICU 59
*/
void reset();
/**
* Adds a record for an unchanged segment of text.
* Normally called from inside ICU string transformation functions, not user code.
* @draft ICU 59
*/
void addUnchanged(int32_t unchangedLength);
/**
* Adds a record for a text replacement/insertion/deletion.
* Normally called from inside ICU string transformation functions, not user code.
* @draft ICU 59
*/
void addReplace(int32_t oldLength, int32_t newLength);
/**
* Sets the UErrorCode if an error occurred while recording edits.
* Preserves older error codes in the outErrorCode.
* Normally called from inside ICU string transformation functions, not user code.
* @return TRUE if U_FAILURE(outErrorCode)
* @draft ICU 59
*/
UBool copyErrorTo(UErrorCode &outErrorCode);
/**
* How much longer is the new text compared with the old text?
* @return new length minus old length
* @draft ICU 59
*/
int32_t lengthDelta() const { return delta; }
/**
* @return TRUE if there are any change edits
* @draft ICU 59
*/
UBool hasChanges() const;
/**
* Access to the list of edits.
* @see getCoarseIterator
* @see getFineIterator
* @draft ICU 59
*/
struct Iterator final : public UMemory {
/**
* Copy constructor.
* @draft ICU 59
*/
Iterator(const Iterator &other) = default;
/**
* Assignment operator.
* @draft ICU 59
*/
Iterator &operator=(const Iterator &other) = default;
/**
* Advances to the next edit.
* @return TRUE if there is another edit
* @draft ICU 59
*/
UBool next(UErrorCode &errorCode);
/**
* Finds the edit that contains the source index.
* The source index may be found in a non-change
* even if normal iteration would skip non-changes.
* Normal iteration can continue from a found edit.
*
* The iterator state before this search logically does not matter.
* (It may affect the performance of the search.)
*
* The iterator state after this search is undefined
* if the source index is out of bounds for the source string.
*
* @param i source index
* @return TRUE if the edit for the source index was found
* @draft ICU 59
*/
UBool findSourceIndex(int32_t i, UErrorCode &errorCode);
/**
* @return TRUE if this edit replaces oldLength() units with newLength() different ones.
* FALSE if oldLength units remain unchanged.
* @draft ICU 59
*/
UBool hasChange() const { return changed; }
/**
* @return the number of units in the original string which are replaced or remain unchanged.
* @draft ICU 59
*/
int32_t oldLength() const { return oldLength_; }
/**
* @return the number of units in the modified string, if hasChange() is TRUE.
* Same as oldLength if hasChange() is FALSE.
* @draft ICU 59
*/
int32_t newLength() const { return newLength_; }
/**
* @return the current index into the source string
* @draft ICU 59
*/
int32_t sourceIndex() const { return srcIndex; }
/**
* @return the current index into the replacement-characters-only string,
* not counting unchanged spans
* @draft ICU 59
*/
int32_t replacementIndex() const { return replIndex; }
/**
* @return the current index into the full destination string
* @draft ICU 59
*/
int32_t destinationIndex() const { return destIndex; }
private:
friend class Edits;
Iterator(const uint16_t *a, int32_t len, UBool oc, UBool crs);
int32_t readLength(int32_t head);
void updateIndexes();
UBool noNext();
const uint16_t *array;
int32_t index, length;
int32_t remaining;
UBool onlyChanges, coarse;
UBool changed;
int32_t oldLength_, newLength_;
int32_t srcIndex, replIndex, destIndex;
};
/**
* Returns an Iterator for coarse-grained changes for simple string updates.
* Skips non-changes.
* @return an Iterator that merges adjacent changes.
* @draft ICU 59
*/
Iterator getCoarseChangesIterator() const {
return Iterator(array, length, TRUE, TRUE);
}
/**
* Returns an Iterator for coarse-grained changes and non-changes for simple string updates.
* @return an Iterator that merges adjacent changes.
* @draft ICU 59
*/
Iterator getCoarseIterator() const {
return Iterator(array, length, FALSE, TRUE);
}
/**
* Returns an Iterator for fine-grained changes for modifying styled text.
* Skips non-changes.
* @return an Iterator that separates adjacent changes.
* @draft ICU 59
*/
Iterator getFineChangesIterator() const {
return Iterator(array, length, TRUE, FALSE);
}
/**
* Returns an Iterator for fine-grained changes and non-changes for modifying styled text.
* @return an Iterator that separates adjacent changes.
* @draft ICU 59
*/
Iterator getFineIterator() const {
return Iterator(array, length, FALSE, FALSE);
}
private:
Edits(const Edits &) = delete;
Edits &operator=(const Edits &) = delete;
void setLastUnit(int32_t last) { array[length - 1] = (uint16_t)last; }
int32_t lastUnit() const { return length > 0 ? array[length - 1] : 0xffff; }
void append(int32_t r);
UBool growArray();
static const int32_t STACK_CAPACITY = 100;
uint16_t *array;
int32_t capacity;
int32_t length;
int32_t delta;
UErrorCode errorCode;
uint16_t stackArray[STACK_CAPACITY];
};
/**
* Low-level C++ case mapping functions.
*
* @draft ICU 59
*/
class U_COMMON_API CaseMap final : public UMemory {
public:
/**
* Lowercases a UTF-16 string and optionally records edits.
* Casing is locale-dependent and context-sensitive.
* The result may be longer or shorter than the original.
* The source string and the destination buffer must not overlap.
*
* @param locale The locale ID. ("" = root locale, NULL = default locale.)
* @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT.
* @param src The original string.
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
* @param dest A buffer for the result string. The result will be NUL-terminated if
* the buffer is large enough.
* The contents is undefined in case of failure.
* @param destCapacity The size of the buffer (number of bytes). If it is 0, then
* dest may be NULL and the function will only return the length of the result
* without writing any of the result string.
* @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any).
* This function calls edits->reset() first. edits can be NULL.
* @param errorCode Reference to an in/out error code value
* which must not indicate a failure before the function call.
* @return The length of the result string, if successful - or in case of a buffer overflow,
* in which case it will be greater than destCapacity.
*
* @see u_strToLower
* @draft ICU 59
*/
static int32_t toLower(
const char *locale, uint32_t options,
const UChar *src, int32_t srcLength,
UChar *dest, int32_t destCapacity, Edits *edits,
UErrorCode &errorCode);
/**
* Uppercases a UTF-16 string and optionally records edits.
* Casing is locale-dependent and context-sensitive.
* The result may be longer or shorter than the original.
* The source string and the destination buffer must not overlap.
*
* @param locale The locale ID. ("" = root locale, NULL = default locale.)
* @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT.
* @param src The original string.
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
* @param dest A buffer for the result string. The result will be NUL-terminated if
* the buffer is large enough.
* The contents is undefined in case of failure.
* @param destCapacity The size of the buffer (number of bytes). If it is 0, then
* dest may be NULL and the function will only return the length of the result
* without writing any of the result string.
* @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any).
* This function calls edits->reset() first. edits can be NULL.
* @param errorCode Reference to an in/out error code value
* which must not indicate a failure before the function call.
* @return The length of the result string, if successful - or in case of a buffer overflow,
* in which case it will be greater than destCapacity.
*
* @see u_strToUpper
* @draft ICU 59
*/
static int32_t toUpper(
const char *locale, uint32_t options,
const UChar *src, int32_t srcLength,
UChar *dest, int32_t destCapacity, Edits *edits,
UErrorCode &errorCode);
#if !UCONFIG_NO_BREAK_ITERATION
/**
* Titlecases a UTF-16 string and optionally records edits.
* Casing is locale-dependent and context-sensitive.
* The result may be longer or shorter than the original.
* The source string and the destination buffer must not overlap.
*
* Titlecasing uses a break iterator to find the first characters of words
* that are to be titlecased. It titlecases those characters and lowercases
* all others. (This can be modified with options bits.)
*
* @param locale The locale ID. ("" = root locale, NULL = default locale.)
* @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT,
* U_TITLECASE_NO_LOWERCASE, U_TITLECASE_NO_BREAK_ADJUSTMENT.
* @param iter A break iterator to find the first characters of words that are to be titlecased.
* It is set to the source string (setText())
* and used one or more times for iteration (first() and next()).
* If NULL, then a word break iterator for the locale is used
* (or something equivalent).
* @param src The original string.
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
* @param dest A buffer for the result string. The result will be NUL-terminated if
* the buffer is large enough.
* The contents is undefined in case of failure.
* @param destCapacity The size of the buffer (number of bytes). If it is 0, then
* dest may be NULL and the function will only return the length of the result
* without writing any of the result string.
* @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any).
* This function calls edits->reset() first. edits can be NULL.
* @param errorCode Reference to an in/out error code value
* which must not indicate a failure before the function call.
* @return The length of the result string, if successful - or in case of a buffer overflow,
* in which case it will be greater than destCapacity.
*
* @see u_strToTitle
* @see ucasemap_toTitle
* @draft ICU 59
*/
static int32_t toTitle(
const char *locale, uint32_t options, BreakIterator *iter,
const UChar *src, int32_t srcLength,
UChar *dest, int32_t destCapacity, Edits *edits,
UErrorCode &errorCode);
#endif // UCONFIG_NO_BREAK_ITERATION
/**
* Case-folds a UTF-16 string and optionally records edits.
*
* Case-folding is locale-independent and not context-sensitive,
* but there is an option for whether to include or exclude mappings for dotted I
* and dotless i that are marked with 'T' in CaseFolding.txt.
*
* The result may be longer or shorter than the original.
* The source string and the destination buffer must not overlap.
*
* @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT,
* U_FOLD_CASE_DEFAULT, U_FOLD_CASE_EXCLUDE_SPECIAL_I.
* @param src The original string.
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
* @param dest A buffer for the result string. The result will be NUL-terminated if
* the buffer is large enough.
* The contents is undefined in case of failure.
* @param destCapacity The size of the buffer (number of bytes). If it is 0, then
* dest may be NULL and the function will only return the length of the result
* without writing any of the result string.
* @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any).
* This function calls edits->reset() first. edits can be NULL.
* @param errorCode Reference to an in/out error code value
* which must not indicate a failure before the function call.
* @return The length of the result string, if successful - or in case of a buffer overflow,
* in which case it will be greater than destCapacity.
*
* @see u_strFoldCase
* @draft ICU 59
*/
static int32_t foldCase(
uint32_t options,
const UChar *src, int32_t srcLength,
UChar *dest, int32_t destCapacity, Edits *edits,
UErrorCode &errorCode);
private:
CaseMap() = delete;
CaseMap(const CaseMap &other) = delete;
CaseMap &operator=(const CaseMap &other) = delete;
};
/**
* Omit unchanged text when case-mapping with Edits.
*
* @draft ICU 59
*/
#define UCASEMAP_OMIT_UNCHANGED_TEXT 0x4000
#endif // U_HIDE_DRAFT_API
U_NAMESPACE_END
#endif
@ -587,6 +185,15 @@ ucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode *pErrorCode);
*/
#define U_TITLECASE_NO_BREAK_ADJUSTMENT 0x200
/**
* Omit unchanged text when case-mapping with Edits.
*
* @see CaseMap
* @see Edits
* @draft ICU 59
*/
#define UCASEMAP_OMIT_UNCHANGED_TEXT 0x4000
#if !UCONFIG_NO_BREAK_ITERATION
/**

View File

@ -19,6 +19,8 @@
*/
#include "unicode/utypes.h"
#include "unicode/casemap.h"
#include "unicode/edits.h"
#include "unicode/putil.h"
#include "cstring.h"
#include "cmemory.h"
@ -26,8 +28,8 @@
#include "unicode/unistr.h"
#include "unicode/uchar.h"
#include "uassert.h"
#include "ucasemap_imp.h"
#include "uelement.h"
#include "ustr_imp.h"
U_NAMESPACE_BEGIN

View File

@ -21,7 +21,7 @@
#include "unicode/locid.h"
#include "unicode/ucasemap.h"
#include "unicode/unistr.h"
#include "ustr_imp.h"
#include "ucasemap_imp.h"
U_NAMESPACE_BEGIN

View File

@ -25,7 +25,7 @@
#include "unicode/locid.h"
#include "unicode/ucasemap.h"
#include "unicode/unistr.h"
#include "ustr_imp.h"
#include "ucasemap_imp.h"
U_NAMESPACE_BEGIN

View File

@ -18,24 +18,6 @@
#define __USTR_IMP_H__
#include "unicode/utypes.h"
#include "unicode/ucasemap.h"
#include "unicode/uiter.h"
#include "ucase.h"
/** Simple declaration to avoid including unicode/ubrk.h. */
#ifndef UBRK_TYPEDEF_UBREAK_ITERATOR
# define UBRK_TYPEDEF_UBREAK_ITERATOR
typedef struct UBreakIterator UBreakIterator;
#endif
#ifndef U_COMPARE_IGNORE_CASE
/* see also unorm.h */
/**
* Option bit for unorm_compare:
* Perform case-insensitive comparison.
*/
#define U_COMPARE_IGNORE_CASE 0x10000
#endif
/**
* Internal option for unorm_cmpEquivFold() for strncmp style.
@ -54,230 +36,6 @@ uprv_strCompare(const UChar *s1, int32_t length1,
const UChar *s2, int32_t length2,
UBool strncmpStyle, UBool codePointOrder);
/**
* Internal API, used by u_strcasecmp() etc.
* Compare strings case-insensitively,
* in code point order or code unit order.
*/
U_CFUNC int32_t
u_strcmpFold(const UChar *s1, int32_t length1,
const UChar *s2, int32_t length2,
uint32_t options,
UErrorCode *pErrorCode);
/**
* Interanl API, used for detecting length of
* shared prefix case-insensitively.
* @param s1 input string 1
* @param length1 length of string 1, or -1 (NULL terminated)
* @param s2 input string 2
* @param length2 length of string 2, or -1 (NULL terminated)
* @param options compare options
* @param matchLen1 (output) length of partial prefix match in s1
* @param matchLen2 (output) length of partial prefix match in s2
* @param pErrorCode receives error status
*/
U_CAPI void
u_caseInsensitivePrefixMatch(const UChar *s1, int32_t length1,
const UChar *s2, int32_t length2,
uint32_t options,
int32_t *matchLen1, int32_t *matchLen2,
UErrorCode *pErrorCode);
/**
* Are the Unicode properties loaded?
* This must be used before internal functions are called that do
* not perform this check.
* Generate a debug assertion failure if data is not loaded.
*/
U_CFUNC UBool
uprv_haveProperties(UErrorCode *pErrorCode);
/**
* Load the Unicode property data.
* Intended primarily for use from u_init().
* Has no effect if property data is already loaded.
* NOT thread safe.
*/
/*U_CFUNC int8_t
uprv_loadPropsData(UErrorCode *errorCode);*/
#ifdef __cplusplus
// TODO: Consider moving these case mapping definitions
// into a new internal header like ucasemap_imp.h.
#include "unicode/unistr.h" // for UStringCaseMapper
/*
* Internal string casing functions implementing
* ustring.h/ustrcase.c and UnicodeString case mapping functions.
*/
struct UCaseMap : public icu::UMemory {
/** Implements most of ucasemap_open(). */
UCaseMap(const char *localeID, uint32_t opts, UErrorCode *pErrorCode);
~UCaseMap();
#if !UCONFIG_NO_BREAK_ITERATION
icu::BreakIterator *iter; /* We adopt the iterator, so we own it. */
#endif
char locale[32];
int32_t caseLocale;
uint32_t options;
};
#if UCONFIG_NO_BREAK_ITERATION
# define UCASEMAP_BREAK_ITERATOR_PARAM
# define UCASEMAP_BREAK_ITERATOR_UNUSED
# define UCASEMAP_BREAK_ITERATOR
# define UCASEMAP_BREAK_ITERATOR_NULL
#else
# define UCASEMAP_BREAK_ITERATOR_PARAM icu::BreakIterator *iter,
# define UCASEMAP_BREAK_ITERATOR_UNUSED icu::BreakIterator *,
# define UCASEMAP_BREAK_ITERATOR iter,
# define UCASEMAP_BREAK_ITERATOR_NULL NULL,
#endif
U_CFUNC int32_t
ustrcase_getCaseLocale(const char *locale);
// TODO: swap src / dest if approved for new public api
/** Implements UStringCaseMapper. */
U_CFUNC int32_t U_CALLCONV
ustrcase_internalToLower(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
icu::Edits *edits,
UErrorCode &errorCode);
/** Implements UStringCaseMapper. */
U_CFUNC int32_t U_CALLCONV
ustrcase_internalToUpper(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
icu::Edits *edits,
UErrorCode &errorCode);
#if !UCONFIG_NO_BREAK_ITERATION
/** Implements UStringCaseMapper. */
U_CFUNC int32_t U_CALLCONV
ustrcase_internalToTitle(int32_t caseLocale, uint32_t options,
icu::BreakIterator *iter,
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
icu::Edits *edits,
UErrorCode &errorCode);
#endif
/** Implements UStringCaseMapper. */
U_CFUNC int32_t U_CALLCONV
ustrcase_internalFold(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
icu::Edits *edits,
UErrorCode &errorCode);
/**
* Common string case mapping implementation for ucasemap_toXyz() and UnicodeString::toXyz().
* Implements argument checking.
*/
U_CFUNC int32_t
ustrcase_map(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
UStringCaseMapper *stringCaseMapper,
icu::Edits *edits,
UErrorCode &errorCode);
/**
* Common string case mapping implementation for old-fashioned u_strToXyz() functions
* that allow the source string to overlap the destination buffer.
* Implements argument checking and internally works with an intermediate buffer if necessary.
*/
U_CFUNC int32_t
ustrcase_mapWithOverlap(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
UStringCaseMapper *stringCaseMapper,
UErrorCode &errorCode);
/**
* UTF-8 string case mapping function type, used by ucasemap_mapUTF8().
* UTF-8 version of UStringCaseMapper.
* All error checking must be done.
* The UCaseMap must be fully initialized, with locale and/or iter set as needed.
* src and dest must not overlap.
*/
typedef int32_t U_CALLCONV
UTF8CaseMapper(int32_t caseLocale, uint32_t options,
#if !UCONFIG_NO_BREAK_ITERATION
icu::BreakIterator *iter,
#endif
uint8_t *dest, int32_t destCapacity,
const uint8_t *src, int32_t srcLength,
UErrorCode *pErrorCode);
#if !UCONFIG_NO_BREAK_ITERATION
/** Implements UTF8CaseMapper. */
U_CFUNC int32_t U_CALLCONV
ucasemap_internalUTF8ToTitle(int32_t caseLocale, uint32_t options,
icu::BreakIterator *iter,
uint8_t *dest, int32_t destCapacity,
const uint8_t *src, int32_t srcLength,
UErrorCode *pErrorCode);
#endif
/**
* Implements argument checking and buffer handling
* for UTF-8 string case mapping as a common function.
*/
U_CFUNC int32_t
ucasemap_mapUTF8(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
uint8_t *dest, int32_t destCapacity,
const uint8_t *src, int32_t srcLength,
UTF8CaseMapper *stringCaseMapper,
UErrorCode *pErrorCode);
U_NAMESPACE_BEGIN
namespace GreekUpper {
// Data bits.
static const uint32_t UPPER_MASK = 0x3ff;
static const uint32_t HAS_VOWEL = 0x1000;
static const uint32_t HAS_YPOGEGRAMMENI = 0x2000;
static const uint32_t HAS_ACCENT = 0x4000;
static const uint32_t HAS_DIALYTIKA = 0x8000;
// Further bits during data building and processing, not stored in the data map.
static const uint32_t HAS_COMBINING_DIALYTIKA = 0x10000;
static const uint32_t HAS_OTHER_GREEK_DIACRITIC = 0x20000;
static const uint32_t HAS_VOWEL_AND_ACCENT = HAS_VOWEL | HAS_ACCENT;
static const uint32_t HAS_VOWEL_AND_ACCENT_AND_DIALYTIKA =
HAS_VOWEL_AND_ACCENT | HAS_DIALYTIKA;
static const uint32_t HAS_EITHER_DIALYTIKA = HAS_DIALYTIKA | HAS_COMBINING_DIALYTIKA;
// State bits.
static const uint32_t AFTER_CASED = 1;
static const uint32_t AFTER_VOWEL_WITH_ACCENT = 2;
uint32_t getLetterData(UChar32 c);
/**
* Returns a non-zero value for each of the Greek combining diacritics
* listed in The Unicode Standard, version 8, chapter 7.2 Greek,
* plus some perispomeni look-alikes.
*/
uint32_t getDiacriticData(UChar32 c);
} // namespace GreekUpper
U_NAMESPACE_END
#endif // __cplusplus
U_CAPI int32_t U_EXPORT2
ustr_hashUCharsN(const UChar *str, int32_t length);

View File

@ -22,12 +22,13 @@
#if !UCONFIG_NO_BREAK_ITERATION
#include "unicode/brkiter.h"
#include "unicode/casemap.h"
#include "unicode/localpointer.h"
#include "unicode/ubrk.h"
#include "unicode/ucasemap.h"
#include "cmemory.h"
#include "ucase.h"
#include "ustr_imp.h"
#include "ucasemap_imp.h"
U_NAMESPACE_USE

View File

@ -22,6 +22,8 @@
#include "unicode/utypes.h"
#include "unicode/brkiter.h"
#include "unicode/casemap.h"
#include "unicode/edits.h"
#include "unicode/ustring.h"
#include "unicode/ucasemap.h"
#include "unicode/ubrk.h"
@ -29,6 +31,7 @@
#include "unicode/utf16.h"
#include "cmemory.h"
#include "ucase.h"
#include "ucasemap_imp.h"
#include "ustr_imp.h"
#include "uassert.h"
@ -36,334 +39,6 @@ U_NAMESPACE_BEGIN
namespace {
// 0000uuuuuuuuuuuu records u+1 unchanged text units.
const int32_t MAX_UNCHANGED_LENGTH = 0x1000;
const int32_t MAX_UNCHANGED = MAX_UNCHANGED_LENGTH - 1;
// 0wwwcccccccccccc with w=1..6 records ccc+1 replacements of w:w text units.
// No length change.
const int32_t MAX_SHORT_WIDTH = 6;
const int32_t MAX_SHORT_CHANGE_LENGTH = 0xfff;
const int32_t MAX_SHORT_CHANGE = 0x6fff;
// 0111mmmmmmnnnnnn records a replacement of m text units with n.
// m or n = 61: actual length follows in the next edits array unit.
// m or n = 62..63: actual length follows in the next two edits array units.
// Bit 30 of the actual length is in the head unit.
// Trailing units have bit 15 set.
const int32_t LENGTH_IN_1TRAIL = 61;
const int32_t LENGTH_IN_2TRAIL = 62;
} // namespace
Edits::~Edits() {
if(array != stackArray) {
uprv_free(array);
}
}
void Edits::reset() {
length = 0;
}
void Edits::addUnchanged(int32_t unchangedLength) {
if(U_FAILURE(errorCode) || unchangedLength == 0) { return; }
if(unchangedLength < 0) {
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
// Merge into previous unchanged-text record, if any.
int32_t last = lastUnit();
if(last < MAX_UNCHANGED) {
int32_t remaining = MAX_UNCHANGED - last;
if (remaining >= unchangedLength) {
setLastUnit(last + unchangedLength);
return;
}
setLastUnit(MAX_UNCHANGED);
unchangedLength -= remaining;
}
// Split large lengths into multiple units.
while(unchangedLength >= MAX_UNCHANGED_LENGTH) {
append(MAX_UNCHANGED);
unchangedLength -= MAX_UNCHANGED_LENGTH;
}
// Write a small (remaining) length.
if(unchangedLength > 0) {
append(unchangedLength - 1);
}
}
void Edits::addReplace(int32_t oldLength, int32_t newLength) {
if(U_FAILURE(errorCode)) { return; }
if(oldLength == newLength && 0 < oldLength && oldLength <= MAX_SHORT_WIDTH) {
// Replacement of short oldLength text units by same-length new text.
// Merge into previous short-replacement record, if any.
int32_t last = lastUnit();
if(MAX_UNCHANGED < last && last < MAX_SHORT_CHANGE &&
(last >> 12) == oldLength && (last & 0xfff) < MAX_SHORT_CHANGE_LENGTH) {
setLastUnit(last + 1);
return;
}
append(oldLength << 12);
return;
}
if(oldLength < 0 || newLength < 0) {
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
if (oldLength == 0 && newLength == 0) {
return;
}
int32_t newDelta = newLength - oldLength;
if (newDelta != 0) {
if (newDelta > 0 ? newDelta > (INT32_MAX - delta) : newDelta < (INT32_MIN - delta)) {
// Integer overflow or underflow.
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
return;
}
delta += newDelta;
}
int32_t head = 0x7000;
if (oldLength < LENGTH_IN_1TRAIL && newLength < LENGTH_IN_1TRAIL) {
head |= oldLength << 6;
head |= newLength;
append(head);
} else if ((capacity - length) >= 5 || growArray()) {
int32_t limit = length + 1;
if(oldLength < LENGTH_IN_1TRAIL) {
head |= oldLength << 6;
} else if(oldLength <= 0x7fff) {
head |= LENGTH_IN_1TRAIL << 6;
array[limit++] = (uint16_t)(0x8000 | oldLength);
} else {
head |= (LENGTH_IN_2TRAIL + (oldLength >> 30)) << 6;
array[limit++] = (uint16_t)(0x8000 | (oldLength >> 15));
array[limit++] = (uint16_t)(0x8000 | oldLength);
}
if(newLength < LENGTH_IN_1TRAIL) {
head |= newLength;
} else if(newLength <= 0x7fff) {
head |= LENGTH_IN_1TRAIL;
array[limit++] = (uint16_t)(0x8000 | newLength);
} else {
head |= LENGTH_IN_2TRAIL + (newLength >> 30);
array[limit++] = (uint16_t)(0x8000 | (newLength >> 15));
array[limit++] = (uint16_t)(0x8000 | newLength);
}
array[length] = (uint16_t)head;
length = limit;
}
}
void Edits::append(int32_t r) {
if(length < capacity || growArray()) {
array[length++] = (uint16_t)r;
}
}
UBool Edits::growArray() {
int32_t newCapacity;
if (array == stackArray) {
newCapacity = 2000;
} else if (capacity == INT32_MAX) {
errorCode = U_BUFFER_OVERFLOW_ERROR;
return FALSE;
} else if (capacity >= (INT32_MAX / 2)) {
newCapacity = INT32_MAX;
} else {
newCapacity = 2 * capacity;
}
// Grow by at least 5 units so that a maximal change record will fit.
if ((newCapacity - capacity) < 5) {
errorCode = U_BUFFER_OVERFLOW_ERROR;
return FALSE;
}
uint16_t *newArray = (uint16_t *)uprv_malloc((size_t)newCapacity * 2);
if (newArray == NULL) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
return FALSE;
}
uprv_memcpy(newArray, array, (size_t)length * 2);
if (array != stackArray) {
uprv_free(array);
}
array = newArray;
capacity = newCapacity;
return TRUE;
}
UBool Edits::copyErrorTo(UErrorCode &outErrorCode) {
if (U_FAILURE(outErrorCode)) { return TRUE; }
if (U_SUCCESS(errorCode)) { return FALSE; }
outErrorCode = errorCode;
return TRUE;
}
UBool Edits::hasChanges() const {
if (delta != 0) {
return TRUE;
}
for (int32_t i = 0; i < length; ++i) {
if (array[i] > MAX_UNCHANGED) {
return TRUE;
}
}
return FALSE;
}
Edits::Iterator::Iterator(const uint16_t *a, int32_t len, UBool oc, UBool crs) :
array(a), index(0), length(len), remaining(0),
onlyChanges(oc), coarse(crs),
changed(FALSE), oldLength_(0), newLength_(0),
srcIndex(0), replIndex(0), destIndex(0) {}
int32_t Edits::Iterator::readLength(int32_t head) {
if (head < LENGTH_IN_1TRAIL) {
return head;
} else if (head < LENGTH_IN_2TRAIL) {
U_ASSERT(index < length);
U_ASSERT(array[index] >= 0x8000);
return array[index++];
} else {
U_ASSERT((index + 2) <= length);
U_ASSERT(array[index] >= 0x8000);
U_ASSERT(array[index + 1] >= 0x8000);
int32_t len = ((head & 1) << 30) |
((int32_t)(array[index] & 0x7fff) << 15) |
(array[index + 1] & 0x7fff);
index += 2;
return len;
}
}
void Edits::Iterator::updateIndexes() {
srcIndex += oldLength_;
if (changed) {
replIndex += newLength_;
}
destIndex += newLength_;
}
UBool Edits::Iterator::noNext() {
// Empty span beyond the string.
oldLength_ = newLength_ = 0;
return FALSE;
}
UBool Edits::Iterator::next(UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) { return FALSE; }
// We have an errorCode in case we need to start guarding against integer overflows.
// It is also convenient for caller loops if we bail out when an error was set elsewhere.
updateIndexes();
if (remaining > 0) {
// Fine-grained iterator: Continue a sequence of equal-length changes.
--remaining;
return TRUE;
}
if (index >= length) {
return noNext();
}
int32_t u = array[index++];
if (u <= MAX_UNCHANGED) {
// Combine adjacent unchanged ranges.
changed = FALSE;
oldLength_ = u + 1;
while (index < length && (u = array[index]) <= MAX_UNCHANGED) {
++index;
oldLength_ += u + 1;
}
newLength_ = oldLength_;
if (onlyChanges) {
updateIndexes();
if (index >= length) {
return noNext();
}
// already fetched u > MAX_UNCHANGED at index
++index;
} else {
return TRUE;
}
}
changed = TRUE;
if (u <= MAX_SHORT_CHANGE) {
if (coarse) {
int32_t w = u >> 12;
int32_t len = (u & 0xfff) + 1;
oldLength_ = newLength_ = len * w;
} else {
// Split a sequence of equal-length changes that was compressed into one unit.
oldLength_ = newLength_ = u >> 12;
remaining = u & 0xfff;
return TRUE;
}
} else {
U_ASSERT(u <= 0x7fff);
oldLength_ = readLength((u >> 6) & 0x3f);
newLength_ = readLength(u & 0x3f);
if (!coarse) {
return TRUE;
}
}
// Combine adjacent changes.
while (index < length && (u = array[index]) > MAX_UNCHANGED) {
++index;
if (u <= MAX_SHORT_CHANGE) {
int32_t w = u >> 12;
int32_t len = (u & 0xfff) + 1;
len = len * w;
oldLength_ += len;
newLength_ += len;
} else {
U_ASSERT(u <= 0x7fff);
int32_t oldLen = readLength((u >> 6) & 0x3f);
int32_t newLen = readLength(u & 0x3f);
oldLength_ += oldLen;
newLength_ += newLen;
}
}
return TRUE;
}
UBool Edits::Iterator::findSourceIndex(int32_t i, UErrorCode &errorCode) {
if (U_FAILURE(errorCode) || i < 0) { return FALSE; }
if (i < srcIndex) {
// Reset the iterator to the start.
index = remaining = srcIndex = replIndex = destIndex = 0;
} else if (i < (srcIndex + oldLength_)) {
// The index is in the current span.
return TRUE;
}
while (next(errorCode)) {
if (i < (srcIndex + oldLength_)) {
// The index is in the current span.
return TRUE;
}
if (remaining > 0) {
// Is the index in one of the remaining compressed edits?
// srcIndex is the start of the current span, before the remaining ones.
int32_t len = (remaining + 1) * oldLength_;
if (i < (srcIndex + len)) {
int32_t n = (i - srcIndex) / oldLength_; // 1 <= n <= remaining
len = n * oldLength_;
srcIndex += len;
replIndex += len;
destIndex += len;
remaining -= n;
return TRUE;
}
// Make next() skip all of these edits at once.
oldLength_ = newLength_ = len;
remaining = 0;
}
}
return FALSE;
}
namespace {
int32_t checkOverflowAndEditsError(int32_t destIndex, int32_t destCapacity,
Edits *edits, UErrorCode &errorCode) {
if (U_SUCCESS(errorCode)) {

View File

@ -20,11 +20,12 @@
#include "unicode/utypes.h"
#include "uassert.h"
#include "unicode/brkiter.h"
#include "unicode/casemap.h"
#include "unicode/ucasemap.h"
#include "unicode/uloc.h"
#include "unicode/ustring.h"
#include "ucase.h"
#include "ustr_imp.h"
#include "ucasemap_imp.h"
U_CFUNC int32_t
ustrcase_getCaseLocale(const char *locale) {

View File

@ -19,6 +19,7 @@
#include "unicode/utypes.h"
#include "unicode/putil.h"
#include "unicode/uchar.h"
#include "unicode/ustring.h"
#include "unicode/utf16.h"
#include "cstring.h"

View File

@ -26,6 +26,7 @@
#include "unicode/decimfmt.h"
#include "uresimp.h"
#include "unicode/ures.h"
#include "unicode/ustring.h"
#include "ureslocs.h"
#include "cstring.h"
#include "mutex.h"

View File

@ -15,6 +15,7 @@
#if !UCONFIG_NO_FORMATTING && !UCONFIG_NO_BREAK_ITERATION
#include "unicode/dtfmtsym.h"
#include "unicode/ucasemap.h"
#include "unicode/ureldatefmt.h"
#include "unicode/udisplaycontext.h"
#include "unicode/unum.h"

View File

@ -48,6 +48,7 @@
#include "unicode/simpletz.h"
#include "unicode/rbtz.h"
#include "unicode/tzfmt.h"
#include "unicode/ucasemap.h"
#include "unicode/utf16.h"
#include "unicode/vtzone.h"
#include "unicode/udisplaycontext.h"
@ -64,6 +65,7 @@
#include <float.h>
#include "smpdtfst.h"
#include "sharednumberformat.h"
#include "ucasemap_imp.h"
#include "ustr_imp.h"
#include "charstr.h"
#include "uvector.h"

View File

@ -27,6 +27,7 @@
#include "unicode/ucasemap.h"
#include "cmemory.h"
#include "cintltst.h"
#include "ucasemap_imp.h"
#include "ustr_imp.h"
/* test string case mapping functions --------------------------------------- */