ICU-8575 option for not including utf headers by default; replace uses of deprecated utf_old.h macros
X-SVN-Rev: 30430
This commit is contained in:
parent
df14bcec37
commit
e31ce99b84
@ -594,7 +594,8 @@
|
||||
This can lead to inadvertent construction of a <code>UnicodeString</code>
|
||||
with a single character by using an integer,
|
||||
and it can lead to inadvertent dependency on the conversion framework
|
||||
by using a C string literal.
|
||||
by using a C string literal.<br>
|
||||
Beginning with ICU 49, you should do the following:
|
||||
<ul>
|
||||
<li>Consider marking the from-<code>UChar</code>
|
||||
and from-<code>UChar32</code> constructors explicit via
|
||||
@ -602,7 +603,18 @@
|
||||
<li>Consider marking the from-<code>const char*</code> and
|
||||
from-<code>const UChar*</code></li> constructors explicit via
|
||||
<code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code> or similar.</li>
|
||||
Note: The ICU test suites cannot be compiled with these settings.
|
||||
</li>
|
||||
<li><b>utf.h, utf8.h, utf16.h, utf_old.h:</b>
|
||||
By default, utypes.h (and thus almost every public ICU header)
|
||||
includes all of these header files.
|
||||
Often, none of them are needed, or only one or two of them.
|
||||
All of utf_old.h is deprecated or obsolete.<br>
|
||||
Beginning with ICU 49,
|
||||
you should define <code>U_NO_DEFAULT_INCLUDE_UTF_HEADERS</code> to 1
|
||||
(via -D or uconfig.h, as above)
|
||||
and include those header files explicitly that you actually need.<br>
|
||||
Note: The ICU test suites cannot be compiled with this setting.</li>
|
||||
<li><b>.dat file:</b> By default, the ICU data is built into
|
||||
a shared library (DLL). This is convenient because it requires no
|
||||
install-time or runtime configuration,
|
||||
|
@ -14,6 +14,7 @@
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/appendable.h"
|
||||
#include "unicode/utf16.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2007-2008, International Business Machines
|
||||
* Copyright (C) 2007-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
@ -16,6 +16,8 @@
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uniset.h"
|
||||
#include "unicode/utf8.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "cmemory.h"
|
||||
#include "bmpset.h"
|
||||
|
||||
|
@ -15,6 +15,7 @@
|
||||
#include "unicode/uniset.h"
|
||||
#include "unicode/usetiter.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "cmemory.h"
|
||||
#include "hash.h"
|
||||
#include "normalizer2impl.h"
|
||||
@ -207,14 +208,14 @@ void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &st
|
||||
|
||||
// i should initialy be the number of code units at the
|
||||
// start of the string
|
||||
i = UTF16_CHAR_LENGTH(source.char32At(0));
|
||||
i = U16_LENGTH(source.char32At(0));
|
||||
//int32_t i = 1;
|
||||
// find the segments
|
||||
// This code iterates through the source string and
|
||||
// extracts segments that end up on a codepoint that
|
||||
// doesn't start any decompositions. (Analysis is done
|
||||
// on the NFD form - see above).
|
||||
for (; i < source.length(); i += UTF16_CHAR_LENGTH(cp)) {
|
||||
for (; i < source.length(); i += U16_LENGTH(cp)) {
|
||||
cp = source.char32At(i);
|
||||
if (nfcImpl.isCanonSegmentStarter(cp)) {
|
||||
source.extract(start, i-start, list[list_length++]); // add up to i
|
||||
@ -290,7 +291,7 @@ void U_EXPORT2 CanonicalIterator::permute(UnicodeString &source, UBool skipZeros
|
||||
}
|
||||
subpermute.setValueDeleter(uprv_deleteUObject);
|
||||
|
||||
for (i = 0; i < source.length(); i += UTF16_CHAR_LENGTH(cp)) {
|
||||
for (i = 0; i < source.length(); i += U16_LENGTH(cp)) {
|
||||
cp = source.char32At(i);
|
||||
const UHashElement *ne = NULL;
|
||||
int32_t el = -1;
|
||||
@ -308,7 +309,7 @@ void U_EXPORT2 CanonicalIterator::permute(UnicodeString &source, UBool skipZeros
|
||||
|
||||
// see what the permutations of the characters before and after this one are
|
||||
//Hashtable *subpermute = permute(source.substring(0,i) + source.substring(i + UTF16.getCharCount(cp)));
|
||||
permute(subPermuteString.replace(i, UTF16_CHAR_LENGTH(cp), NULL, 0), skipZeros, &subpermute, status);
|
||||
permute(subPermuteString.replace(i, U16_LENGTH(cp), NULL, 0), skipZeros, &subpermute, status);
|
||||
/* Test for buffer overflows */
|
||||
if(U_FAILURE(status)) {
|
||||
return;
|
||||
@ -442,9 +443,9 @@ Hashtable *CanonicalIterator::getEquivalents2(Hashtable *fillinResult, const UCh
|
||||
|
||||
// cycle through all the characters
|
||||
UChar32 cp;
|
||||
for (int32_t i = 0; i < segLen; i += UTF16_CHAR_LENGTH(cp)) {
|
||||
for (int32_t i = 0; i < segLen; i += U16_LENGTH(cp)) {
|
||||
// see if any character is at the start of some decomposition
|
||||
UTF_GET_CHAR(segment, 0, i, segLen, cp);
|
||||
U16_GET(segment, 0, i, segLen, cp);
|
||||
if (!nfcImpl.getCanonStartSet(cp, starts)) {
|
||||
continue;
|
||||
}
|
||||
|
@ -18,6 +18,7 @@
|
||||
|
||||
#include "unicode/messagepattern.h"
|
||||
#include "unicode/unistr.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "cmemory.h"
|
||||
#include "cstring.h"
|
||||
#include "messageimpl.h"
|
||||
|
@ -21,6 +21,7 @@
|
||||
#include "unicode/normalizer2.h"
|
||||
#include "unicode/udata.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "cmemory.h"
|
||||
#include "mutex.h"
|
||||
#include "normalizer2impl.h"
|
||||
|
@ -25,6 +25,7 @@
|
||||
#include "unicode/udata.h"
|
||||
#include "unicode/unistr.h"
|
||||
#include "unicode/unorm.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "mutex.h"
|
||||
#include "uset_imp.h"
|
||||
#include "utrie2.h"
|
||||
|
@ -16,6 +16,7 @@
|
||||
#include "unicode/schriter.h"
|
||||
#include "unicode/uchriter.h"
|
||||
#include "unicode/normlzr.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "cmemory.h"
|
||||
#include "normalizer2impl.h"
|
||||
#include "uprops.h" // for uniset_getUnicode32Instance()
|
||||
@ -262,7 +263,7 @@ UChar32 Normalizer::current() {
|
||||
UChar32 Normalizer::next() {
|
||||
if(bufferPos<buffer.length() || nextNormalize()) {
|
||||
UChar32 c=buffer.char32At(bufferPos);
|
||||
bufferPos+=UTF_CHAR_LENGTH(c);
|
||||
bufferPos+=U16_LENGTH(c);
|
||||
return c;
|
||||
} else {
|
||||
return DONE;
|
||||
@ -277,7 +278,7 @@ UChar32 Normalizer::next() {
|
||||
UChar32 Normalizer::previous() {
|
||||
if(bufferPos>0 || previousNormalize()) {
|
||||
UChar32 c=buffer.char32At(bufferPos-1);
|
||||
bufferPos-=UTF_CHAR_LENGTH(c);
|
||||
bufferPos-=U16_LENGTH(c);
|
||||
return c;
|
||||
} else {
|
||||
return DONE;
|
||||
|
@ -46,11 +46,13 @@ Disclaimer and license
|
||||
|
||||
#if !UCONFIG_NO_IDNA
|
||||
|
||||
#include "unicode/ustring.h"
|
||||
#include "unicode/utf.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "ustr_imp.h"
|
||||
#include "cstring.h"
|
||||
#include "cmemory.h"
|
||||
#include "punycode.h"
|
||||
#include "unicode/ustring.h"
|
||||
|
||||
|
||||
/* Punycode ----------------------------------------------------------------- */
|
||||
@ -222,11 +224,11 @@ u_strToPunycode(const UChar *src, int32_t srcLength,
|
||||
++destLength;
|
||||
} else {
|
||||
n=(caseFlags!=NULL && caseFlags[j])<<31L;
|
||||
if(UTF_IS_SINGLE(c)) {
|
||||
if(U16_IS_SINGLE(c)) {
|
||||
n|=c;
|
||||
} else if(UTF_IS_LEAD(c) && UTF_IS_TRAIL(c2=src[j+1])) {
|
||||
} else if(U16_IS_LEAD(c) && U16_IS_TRAIL(c2=src[j+1])) {
|
||||
++j;
|
||||
n|=(int32_t)UTF16_GET_PAIR_VALUE(c, c2);
|
||||
n|=(int32_t)U16_GET_SUPPLEMENTARY(c, c2);
|
||||
} else {
|
||||
/* error: unmatched surrogate */
|
||||
*pErrorCode=U_INVALID_CHAR_FOUND;
|
||||
@ -255,11 +257,11 @@ u_strToPunycode(const UChar *src, int32_t srcLength,
|
||||
++destLength;
|
||||
} else {
|
||||
n=(caseFlags!=NULL && caseFlags[j])<<31L;
|
||||
if(UTF_IS_SINGLE(c)) {
|
||||
if(U16_IS_SINGLE(c)) {
|
||||
n|=c;
|
||||
} else if(UTF_IS_LEAD(c) && (j+1)<srcLength && UTF_IS_TRAIL(c2=src[j+1])) {
|
||||
} else if(U16_IS_LEAD(c) && (j+1)<srcLength && U16_IS_TRAIL(c2=src[j+1])) {
|
||||
++j;
|
||||
n|=(int32_t)UTF16_GET_PAIR_VALUE(c, c2);
|
||||
n|=(int32_t)U16_GET_SUPPLEMENTARY(c, c2);
|
||||
} else {
|
||||
/* error: unmatched surrogate */
|
||||
*pErrorCode=U_INVALID_CHAR_FOUND;
|
||||
@ -510,14 +512,14 @@ u_strFromPunycode(const UChar *src, int32_t srcLength,
|
||||
/* not needed for Punycode: */
|
||||
/* if (decode_digit(n) <= BASE) return punycode_invalid_input; */
|
||||
|
||||
if(n>0x10ffff || UTF_IS_SURROGATE(n)) {
|
||||
if(n>0x10ffff || U_IS_SURROGATE(n)) {
|
||||
/* Unicode code point overflow */
|
||||
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Insert n at position i of the output: */
|
||||
cpLength=UTF_CHAR_LENGTH(n);
|
||||
cpLength=U16_LENGTH(n);
|
||||
if((destLength+cpLength)<=destCapacity) {
|
||||
int32_t codeUnitIndex;
|
||||
|
||||
@ -540,7 +542,7 @@ u_strFromPunycode(const UChar *src, int32_t srcLength,
|
||||
}
|
||||
} else {
|
||||
codeUnitIndex=firstSupplementaryIndex;
|
||||
UTF_FWD_N(dest, codeUnitIndex, destLength, i-codeUnitIndex);
|
||||
U16_FWD_N(dest, codeUnitIndex, destLength, i-codeUnitIndex);
|
||||
}
|
||||
|
||||
/* use the UChar index codeUnitIndex instead of the code point index i */
|
||||
@ -559,8 +561,8 @@ u_strFromPunycode(const UChar *src, int32_t srcLength,
|
||||
dest[codeUnitIndex]=(UChar)n;
|
||||
} else {
|
||||
/* supplementary character, insert two code units */
|
||||
dest[codeUnitIndex]=UTF16_LEAD(n);
|
||||
dest[codeUnitIndex+1]=UTF16_TRAIL(n);
|
||||
dest[codeUnitIndex]=U16_LEAD(n);
|
||||
dest[codeUnitIndex+1]=U16_TRAIL(n);
|
||||
}
|
||||
if(caseFlags!=NULL) {
|
||||
/* Case of last character determines uppercase flag: */
|
||||
|
@ -10,8 +10,9 @@
|
||||
*/
|
||||
#include "ruleiter.h"
|
||||
#include "unicode/parsepos.h"
|
||||
#include "unicode/unistr.h"
|
||||
#include "unicode/symtable.h"
|
||||
#include "unicode/unistr.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "patternprops.h"
|
||||
|
||||
/* \U87654321 or \ud800\udc00 */
|
||||
@ -40,7 +41,7 @@ UChar32 RuleCharacterIterator::next(int32_t options, UBool& isEscaped, UErrorCod
|
||||
|
||||
for (;;) {
|
||||
c = _current();
|
||||
_advance(UTF_CHAR_LENGTH(c));
|
||||
_advance(U16_LENGTH(c));
|
||||
|
||||
if (c == SymbolTable::SYMBOL_REF && buf == 0 &&
|
||||
(options & PARSE_VARIABLES) != 0 && sym != 0) {
|
||||
@ -102,7 +103,7 @@ void RuleCharacterIterator::skipIgnored(int32_t options) {
|
||||
for (;;) {
|
||||
UChar32 a = _current();
|
||||
if (!PatternProps::isWhiteSpace(a)) break;
|
||||
_advance(UTF_CHAR_LENGTH(a));
|
||||
_advance(U16_LENGTH(a));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -19,6 +19,7 @@
|
||||
#include "unicode/ustring.h"
|
||||
#include "unicode/uchar.h"
|
||||
#include "unicode/ubidi.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "ubidi_props.h"
|
||||
#include "ubidiimp.h"
|
||||
#include "uassert.h"
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2000-2007, International Business Machines
|
||||
* Copyright (C) 2000-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
@ -26,6 +26,7 @@
|
||||
#include "unicode/ustring.h"
|
||||
#include "unicode/uchar.h"
|
||||
#include "unicode/ubidi.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "cmemory.h"
|
||||
#include "ustr_imp.h"
|
||||
#include "ubidiimp.h"
|
||||
@ -86,9 +87,9 @@ doWriteForward(const UChar *src, int32_t srcLength,
|
||||
return srcLength;
|
||||
}
|
||||
do {
|
||||
UTF_NEXT_CHAR(src, i, srcLength, c);
|
||||
U16_NEXT(src, i, srcLength, c);
|
||||
c=u_charMirror(c);
|
||||
UTF_APPEND_CHAR_UNSAFE(dest, j, c);
|
||||
U16_APPEND_UNSAFE(dest, j, c);
|
||||
} while(i<srcLength);
|
||||
return srcLength;
|
||||
}
|
||||
@ -123,7 +124,7 @@ doWriteForward(const UChar *src, int32_t srcLength,
|
||||
UChar32 c;
|
||||
do {
|
||||
i=0;
|
||||
UTF_NEXT_CHAR(src, i, srcLength, c);
|
||||
U16_NEXT(src, i, srcLength, c);
|
||||
src+=i;
|
||||
srcLength-=i;
|
||||
if(!IS_BIDI_CONTROL_CHAR(c)) {
|
||||
@ -142,7 +143,7 @@ doWriteForward(const UChar *src, int32_t srcLength,
|
||||
return destSize-remaining;
|
||||
}
|
||||
c=u_charMirror(c);
|
||||
UTF_APPEND_CHAR_UNSAFE(dest, j, c);
|
||||
U16_APPEND_UNSAFE(dest, j, c);
|
||||
}
|
||||
} while(srcLength>0);
|
||||
return j;
|
||||
@ -197,7 +198,7 @@ doWriteReverse(const UChar *src, int32_t srcLength,
|
||||
i=srcLength;
|
||||
|
||||
/* collect code units for one base character */
|
||||
UTF_BACK_1(src, 0, srcLength);
|
||||
U16_BACK_1(src, 0, srcLength);
|
||||
|
||||
/* copy this base character */
|
||||
j=srcLength;
|
||||
@ -226,7 +227,7 @@ doWriteReverse(const UChar *src, int32_t srcLength,
|
||||
|
||||
/* collect code units and modifier letters for one base character */
|
||||
do {
|
||||
UTF_PREV_CHAR(src, 0, srcLength, c);
|
||||
U16_PREV(src, 0, srcLength, c);
|
||||
} while(srcLength>0 && IS_COMBINING(u_charType(c)));
|
||||
|
||||
/* copy this "user character" */
|
||||
@ -274,11 +275,11 @@ doWriteReverse(const UChar *src, int32_t srcLength,
|
||||
i=srcLength;
|
||||
|
||||
/* collect code units for one base character */
|
||||
UTF_PREV_CHAR(src, 0, srcLength, c);
|
||||
U16_PREV(src, 0, srcLength, c);
|
||||
if(options&UBIDI_KEEP_BASE_COMBINING) {
|
||||
/* collect modifier letters for this base character */
|
||||
while(srcLength>0 && IS_COMBINING(u_charType(c))) {
|
||||
UTF_PREV_CHAR(src, 0, srcLength, c);
|
||||
U16_PREV(src, 0, srcLength, c);
|
||||
}
|
||||
}
|
||||
|
||||
@ -293,7 +294,7 @@ doWriteReverse(const UChar *src, int32_t srcLength,
|
||||
/* mirror only the base character */
|
||||
int32_t k=0;
|
||||
c=u_charMirror(c);
|
||||
UTF_APPEND_CHAR_UNSAFE(dest, k, c);
|
||||
U16_APPEND_UNSAFE(dest, k, c);
|
||||
dest+=k;
|
||||
j+=k;
|
||||
}
|
||||
|
@ -20,6 +20,7 @@
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uset.h"
|
||||
#include "unicode/udata.h" /* UDataInfo */
|
||||
#include "unicode/utf16.h"
|
||||
#include "ucmndata.h" /* DataHeader */
|
||||
#include "udatamem.h"
|
||||
#include "umutex.h"
|
||||
|
@ -25,6 +25,9 @@
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
#include "unicode/utext.h"
|
||||
#endif
|
||||
#include "unicode/utf.h"
|
||||
#include "unicode/utf8.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "cmemory.h"
|
||||
#include "cstring.h"
|
||||
#include "ucase.h"
|
||||
|
@ -16,6 +16,7 @@
|
||||
#include "unicode/appendable.h"
|
||||
#include "unicode/ucharstrie.h"
|
||||
#include "unicode/uobject.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "cmemory.h"
|
||||
#include "uassert.h"
|
||||
|
||||
@ -37,6 +38,24 @@ UCharsTrie::current() const {
|
||||
}
|
||||
}
|
||||
|
||||
UStringTrieResult
|
||||
UCharsTrie::firstForCodePoint(UChar32 cp) {
|
||||
return cp<=0xffff ?
|
||||
first(cp) :
|
||||
(USTRINGTRIE_HAS_NEXT(first(U16_LEAD(cp))) ?
|
||||
next(U16_TRAIL(cp)) :
|
||||
USTRINGTRIE_NO_MATCH);
|
||||
}
|
||||
|
||||
UStringTrieResult
|
||||
UCharsTrie::nextForCodePoint(UChar32 cp) {
|
||||
return cp<=0xffff ?
|
||||
next(cp) :
|
||||
(USTRINGTRIE_HAS_NEXT(next(U16_LEAD(cp))) ?
|
||||
next(U16_TRAIL(cp)) :
|
||||
USTRINGTRIE_NO_MATCH);
|
||||
}
|
||||
|
||||
UStringTrieResult
|
||||
UCharsTrie::branchNext(const UChar *pos, int32_t length, int32_t uchar) {
|
||||
// Branch according to the current unit.
|
||||
|
@ -9,6 +9,7 @@
|
||||
|
||||
#include "unicode/uchriter.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "ustr_imp.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
@ -191,7 +192,7 @@ UCharCharacterIterator::first32() {
|
||||
if(pos < end) {
|
||||
int32_t i = pos;
|
||||
UChar32 c;
|
||||
UTF_NEXT_CHAR(text, i, end, c);
|
||||
U16_NEXT(text, i, end, c);
|
||||
return c;
|
||||
} else {
|
||||
return DONE;
|
||||
@ -203,7 +204,7 @@ UCharCharacterIterator::first32PostInc() {
|
||||
pos = begin;
|
||||
if(pos < end) {
|
||||
UChar32 c;
|
||||
UTF_NEXT_CHAR(text, pos, end, c);
|
||||
U16_NEXT(text, pos, end, c);
|
||||
return c;
|
||||
} else {
|
||||
return DONE;
|
||||
@ -215,7 +216,7 @@ UCharCharacterIterator::last32() {
|
||||
pos = end;
|
||||
if(pos > begin) {
|
||||
UChar32 c;
|
||||
UTF_PREV_CHAR(text, begin, pos, c);
|
||||
U16_PREV(text, begin, pos, c);
|
||||
return c;
|
||||
} else {
|
||||
return DONE;
|
||||
@ -230,10 +231,10 @@ UCharCharacterIterator::setIndex32(int32_t position) {
|
||||
position = end;
|
||||
}
|
||||
if(position < end) {
|
||||
UTF_SET_CHAR_START(text, begin, position);
|
||||
U16_SET_CP_START(text, begin, position);
|
||||
int32_t i = this->pos = position;
|
||||
UChar32 c;
|
||||
UTF_NEXT_CHAR(text, i, end, c);
|
||||
U16_NEXT(text, i, end, c);
|
||||
return c;
|
||||
} else {
|
||||
this->pos = position;
|
||||
@ -245,7 +246,7 @@ UChar32
|
||||
UCharCharacterIterator::current32() const {
|
||||
if (pos >= begin && pos < end) {
|
||||
UChar32 c;
|
||||
UTF_GET_CHAR(text, begin, pos, end, c);
|
||||
U16_GET(text, begin, pos, end, c);
|
||||
return c;
|
||||
} else {
|
||||
return DONE;
|
||||
@ -255,11 +256,11 @@ UCharCharacterIterator::current32() const {
|
||||
UChar32
|
||||
UCharCharacterIterator::next32() {
|
||||
if (pos < end) {
|
||||
UTF_FWD_1(text, pos, end);
|
||||
U16_FWD_1(text, pos, end);
|
||||
if(pos < end) {
|
||||
int32_t i = pos;
|
||||
UChar32 c;
|
||||
UTF_NEXT_CHAR(text, i, end, c);
|
||||
U16_NEXT(text, i, end, c);
|
||||
return c;
|
||||
}
|
||||
}
|
||||
@ -272,7 +273,7 @@ UChar32
|
||||
UCharCharacterIterator::next32PostInc() {
|
||||
if (pos < end) {
|
||||
UChar32 c;
|
||||
UTF_NEXT_CHAR(text, pos, end, c);
|
||||
U16_NEXT(text, pos, end, c);
|
||||
return c;
|
||||
} else {
|
||||
return DONE;
|
||||
@ -283,7 +284,7 @@ UChar32
|
||||
UCharCharacterIterator::previous32() {
|
||||
if (pos > begin) {
|
||||
UChar32 c;
|
||||
UTF_PREV_CHAR(text, begin, pos, c);
|
||||
U16_PREV(text, begin, pos, c);
|
||||
return c;
|
||||
} else {
|
||||
return DONE;
|
||||
@ -323,20 +324,20 @@ UCharCharacterIterator::move32(int32_t delta, CharacterIterator::EOrigin origin)
|
||||
case kStart:
|
||||
pos = begin;
|
||||
if(delta > 0) {
|
||||
UTF_FWD_N(text, pos, end, delta);
|
||||
U16_FWD_N(text, pos, end, delta);
|
||||
}
|
||||
break;
|
||||
case kCurrent:
|
||||
if(delta > 0) {
|
||||
UTF_FWD_N(text, pos, end, delta);
|
||||
U16_FWD_N(text, pos, end, delta);
|
||||
} else {
|
||||
UTF_BACK_N(text, begin, pos, -delta);
|
||||
U16_BACK_N(text, begin, pos, -delta);
|
||||
}
|
||||
break;
|
||||
case kEnd:
|
||||
pos = end;
|
||||
if(delta < 0) {
|
||||
UTF_BACK_N(text, begin, pos, -delta);
|
||||
U16_BACK_N(text, begin, pos, -delta);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
|
@ -27,6 +27,8 @@
|
||||
#include "unicode/ucnv.h"
|
||||
#include "unicode/ucnv_err.h"
|
||||
#include "unicode/uset.h"
|
||||
#include "unicode/utf.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "putilimp.h"
|
||||
#include "cmemory.h"
|
||||
#include "cstring.h"
|
||||
|
@ -34,6 +34,7 @@
|
||||
#include "unicode/uset.h"
|
||||
#include "unicode/ucnv_err.h"
|
||||
#include "unicode/ucnv_cb.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "ucnv_imp.h"
|
||||
#include "ucnv_bld.h"
|
||||
#include "ucnv_cnv.h"
|
||||
@ -1645,16 +1646,16 @@ UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args
|
||||
|
||||
sourceChar = *(source++);
|
||||
/*check if the char is a First surrogate*/
|
||||
if(UTF_IS_SURROGATE(sourceChar)) {
|
||||
if(UTF_IS_SURROGATE_FIRST(sourceChar)) {
|
||||
if(U16_IS_SURROGATE(sourceChar)) {
|
||||
if(U16_IS_SURROGATE_LEAD(sourceChar)) {
|
||||
getTrail:
|
||||
/*look ahead to find the trail surrogate*/
|
||||
if(source < sourceLimit) {
|
||||
/* test the following code unit */
|
||||
UChar trail=(UChar) *source;
|
||||
if(UTF_IS_SECOND_SURROGATE(trail)) {
|
||||
if(U16_IS_TRAIL(trail)) {
|
||||
source++;
|
||||
sourceChar=UTF16_GET_PAIR_VALUE(sourceChar, trail);
|
||||
sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail);
|
||||
cnv->fromUChar32=0x00;
|
||||
/* convert this supplementary code point */
|
||||
/* exit this condition tree */
|
||||
@ -2422,16 +2423,16 @@ UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args
|
||||
*/
|
||||
|
||||
/*check if the char is a First surrogate*/
|
||||
if(UTF_IS_SURROGATE(sourceChar)) {
|
||||
if(UTF_IS_SURROGATE_FIRST(sourceChar)) {
|
||||
if(U16_IS_SURROGATE(sourceChar)) {
|
||||
if(U16_IS_SURROGATE_LEAD(sourceChar)) {
|
||||
getTrail:
|
||||
/*look ahead to find the trail surrogate*/
|
||||
if(source < sourceLimit) {
|
||||
/* test the following code unit */
|
||||
UChar trail=(UChar) *source;
|
||||
if(UTF_IS_SECOND_SURROGATE(trail)) {
|
||||
if(U16_IS_TRAIL(trail)) {
|
||||
source++;
|
||||
sourceChar=UTF16_GET_PAIR_VALUE(sourceChar, trail);
|
||||
sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail);
|
||||
*err = U_INVALID_CHAR_FOUND;
|
||||
/* convert this surrogate code point */
|
||||
/* exit this condition tree */
|
||||
@ -2894,16 +2895,16 @@ UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args
|
||||
|
||||
sourceChar = *(source++);
|
||||
/*check if the char is a First surrogate*/
|
||||
if(UTF_IS_SURROGATE(sourceChar)) {
|
||||
if(UTF_IS_SURROGATE_FIRST(sourceChar)) {
|
||||
if(U16_IS_SURROGATE(sourceChar)) {
|
||||
if(U16_IS_SURROGATE_LEAD(sourceChar)) {
|
||||
getTrail:
|
||||
/*look ahead to find the trail surrogate*/
|
||||
if(source < sourceLimit) {
|
||||
/* test the following code unit */
|
||||
UChar trail=(UChar) *source;
|
||||
if(UTF_IS_SECOND_SURROGATE(trail)) {
|
||||
if(U16_IS_TRAIL(trail)) {
|
||||
source++;
|
||||
sourceChar=UTF16_GET_PAIR_VALUE(sourceChar, trail);
|
||||
sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail);
|
||||
cnv->fromUChar32=0x00;
|
||||
/* convert this supplementary code point */
|
||||
/* exit this condition tree */
|
||||
|
@ -23,6 +23,7 @@
|
||||
|
||||
#include "unicode/ucnv.h"
|
||||
#include "unicode/ucnv_err.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "ucnv_cnv.h"
|
||||
#include "ucnvmbcs.h"
|
||||
#include "ucnv_ext.h"
|
||||
|
@ -20,6 +20,7 @@
|
||||
#include "unicode/uset.h"
|
||||
#include "unicode/ucnv_err.h"
|
||||
#include "unicode/ucnv_cb.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "ucnv_imp.h"
|
||||
#include "ucnv_bld.h"
|
||||
#include "ucnv_cnv.h"
|
||||
@ -356,16 +357,16 @@ UConverter_fromUnicode_CompoundText_OFFSETS(UConverterFromUnicodeArgs* args, UEr
|
||||
|
||||
sourceChar = *(source++);
|
||||
/*check if the char is a First surrogate*/
|
||||
if(UTF_IS_SURROGATE(sourceChar)) {
|
||||
if(UTF_IS_SURROGATE_FIRST(sourceChar)) {
|
||||
if(U16_IS_SURROGATE(sourceChar)) {
|
||||
if(U16_IS_SURROGATE_LEAD(sourceChar)) {
|
||||
getTrail:
|
||||
/*look ahead to find the trail surrogate*/
|
||||
if(source < sourceLimit) {
|
||||
/* test the following code unit */
|
||||
UChar trail=(UChar) *source;
|
||||
if(UTF_IS_SECOND_SURROGATE(trail)) {
|
||||
if(U16_IS_TRAIL(trail)) {
|
||||
source++;
|
||||
sourceChar=UTF16_GET_PAIR_VALUE(sourceChar, trail);
|
||||
sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail);
|
||||
cnv->fromUChar32=0x00;
|
||||
/* convert this supplementary code point */
|
||||
/* exit this condition tree */
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 2002-2009, International Business Machines
|
||||
* Copyright (C) 2002-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* file name: ucnv_u32.c
|
||||
@ -19,6 +19,7 @@
|
||||
#if !UCONFIG_NO_CONVERSION
|
||||
|
||||
#include "unicode/ucnv.h"
|
||||
#include "unicode/utf.h"
|
||||
#include "ucnv_bld.h"
|
||||
#include "ucnv_cnv.h"
|
||||
#include "cmemory.h"
|
||||
@ -246,7 +247,7 @@ T_UConverter_fromUnicode_UTF32_BE(UConverterFromUnicodeArgs * args,
|
||||
while (mySource < sourceLimit && myTarget < targetLimit) {
|
||||
ch = *(mySource++);
|
||||
|
||||
if (UTF_IS_SURROGATE(ch)) {
|
||||
if (U_IS_SURROGATE(ch)) {
|
||||
if (U_IS_LEAD(ch)) {
|
||||
lowsurogate:
|
||||
if (mySource < sourceLimit) {
|
||||
@ -350,7 +351,7 @@ T_UConverter_fromUnicode_UTF32_BE_OFFSET_LOGIC(UConverterFromUnicodeArgs * args,
|
||||
while (mySource < sourceLimit && myTarget < targetLimit) {
|
||||
ch = *(mySource++);
|
||||
|
||||
if (UTF_IS_SURROGATE(ch)) {
|
||||
if (U_IS_SURROGATE(ch)) {
|
||||
if (U_IS_LEAD(ch)) {
|
||||
lowsurogate:
|
||||
if (mySource < sourceLimit) {
|
||||
@ -726,14 +727,14 @@ T_UConverter_fromUnicode_UTF32_LE(UConverterFromUnicodeArgs * args,
|
||||
{
|
||||
ch = *(mySource++);
|
||||
|
||||
if (UTF_IS_SURROGATE(ch)) {
|
||||
if (U_IS_LEAD(ch))
|
||||
if (U16_IS_SURROGATE(ch)) {
|
||||
if (U16_IS_LEAD(ch))
|
||||
{
|
||||
lowsurogate:
|
||||
if (mySource < sourceLimit)
|
||||
{
|
||||
ch2 = *mySource;
|
||||
if (U_IS_TRAIL(ch2)) {
|
||||
if (U16_IS_TRAIL(ch2)) {
|
||||
ch = ((ch - SURROGATE_HIGH_START) << HALF_SHIFT) + ch2 + SURROGATE_LOW_BASE;
|
||||
mySource++;
|
||||
}
|
||||
@ -838,14 +839,14 @@ T_UConverter_fromUnicode_UTF32_LE_OFFSET_LOGIC(UConverterFromUnicodeArgs * args,
|
||||
{
|
||||
ch = *(mySource++);
|
||||
|
||||
if (UTF_IS_SURROGATE(ch)) {
|
||||
if (U_IS_LEAD(ch))
|
||||
if (U16_IS_SURROGATE(ch)) {
|
||||
if (U16_IS_LEAD(ch))
|
||||
{
|
||||
lowsurogate:
|
||||
if (mySource < sourceLimit)
|
||||
{
|
||||
ch2 = *mySource;
|
||||
if (U_IS_TRAIL(ch2))
|
||||
if (U16_IS_TRAIL(ch2))
|
||||
{
|
||||
ch = ((ch - SURROGATE_HIGH_START) << HALF_SHIFT) + ch2 + SURROGATE_LOW_BASE;
|
||||
mySource++;
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 2002-2007, International Business Machines
|
||||
* Copyright (C) 2002-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* file name: ucnv_u8.c
|
||||
@ -23,6 +23,9 @@
|
||||
#if !UCONFIG_NO_CONVERSION
|
||||
|
||||
#include "unicode/ucnv.h"
|
||||
#include "unicode/utf.h"
|
||||
#include "unicode/utf8.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "ucnv_bld.h"
|
||||
#include "ucnv_cnv.h"
|
||||
#include "cmemory.h"
|
||||
@ -130,7 +133,7 @@ morebytes:
|
||||
if (mySource < sourceLimit)
|
||||
{
|
||||
toUBytes[i] = (char) (ch2 = *mySource);
|
||||
if (!UTF8_IS_TRAIL(ch2))
|
||||
if (!U8_IS_TRAIL(ch2))
|
||||
{
|
||||
break; /* i < inBytes */
|
||||
}
|
||||
@ -164,7 +167,7 @@ morebytes:
|
||||
* In CESU-8, only surrogates, not supplementary code points, are encoded directly.
|
||||
*/
|
||||
if (i == inBytes && ch <= MAXIMUM_UTF && ch >= utf8_minChar32[i] &&
|
||||
(isCESU8 ? i <= 3 : !UTF_IS_SURROGATE(ch)))
|
||||
(isCESU8 ? i <= 3 : !U_IS_SURROGATE(ch)))
|
||||
{
|
||||
/* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
|
||||
if (ch <= MAXIMUM_UCS2)
|
||||
@ -259,7 +262,7 @@ morebytes:
|
||||
if (mySource < sourceLimit)
|
||||
{
|
||||
toUBytes[i] = (char) (ch2 = *mySource);
|
||||
if (!UTF8_IS_TRAIL(ch2))
|
||||
if (!U8_IS_TRAIL(ch2))
|
||||
{
|
||||
break; /* i < inBytes */
|
||||
}
|
||||
@ -292,7 +295,7 @@ morebytes:
|
||||
* In CESU-8, only surrogates, not supplementary code points, are encoded directly.
|
||||
*/
|
||||
if (i == inBytes && ch <= MAXIMUM_UTF && ch >= utf8_minChar32[i] &&
|
||||
(isCESU8 ? i <= 3 : !UTF_IS_SURROGATE(ch)))
|
||||
(isCESU8 ? i <= 3 : !U_IS_SURROGATE(ch)))
|
||||
{
|
||||
/* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
|
||||
if (ch <= MAXIMUM_UCS2)
|
||||
@ -387,13 +390,13 @@ U_CFUNC void ucnv_fromUnicode_UTF8 (UConverterFromUnicodeArgs * args,
|
||||
}
|
||||
else {
|
||||
/* Check for surrogates */
|
||||
if(UTF_IS_SURROGATE(ch) && isNotCESU8) {
|
||||
if(U16_IS_SURROGATE(ch) && isNotCESU8) {
|
||||
lowsurrogate:
|
||||
if (mySource < sourceLimit) {
|
||||
/* test both code units */
|
||||
if(UTF_IS_SURROGATE_FIRST(ch) && UTF_IS_SECOND_SURROGATE(*mySource)) {
|
||||
if(U16_IS_SURROGATE_LEAD(ch) && U16_IS_TRAIL(*mySource)) {
|
||||
/* convert and consume this supplementary code point */
|
||||
ch=UTF16_GET_PAIR_VALUE(ch, *mySource);
|
||||
ch=U16_GET_SUPPLEMENTARY(ch, *mySource);
|
||||
++mySource;
|
||||
/* exit this condition tree */
|
||||
}
|
||||
@ -513,13 +516,13 @@ U_CFUNC void ucnv_fromUnicode_UTF8_OFFSETS_LOGIC (UConverterFromUnicodeArgs * ar
|
||||
{
|
||||
nextSourceIndex = offsetNum + 1;
|
||||
|
||||
if(UTF_IS_SURROGATE(ch) && isNotCESU8) {
|
||||
if(U16_IS_SURROGATE(ch) && isNotCESU8) {
|
||||
lowsurrogate:
|
||||
if (mySource < sourceLimit) {
|
||||
/* test both code units */
|
||||
if(UTF_IS_SURROGATE_FIRST(ch) && UTF_IS_SECOND_SURROGATE(*mySource)) {
|
||||
if(U16_IS_SURROGATE_LEAD(ch) && U16_IS_TRAIL(*mySource)) {
|
||||
/* convert and consume this supplementary code point */
|
||||
ch=UTF16_GET_PAIR_VALUE(ch, *mySource);
|
||||
ch=U16_GET_SUPPLEMENTARY(ch, *mySource);
|
||||
++mySource;
|
||||
++nextSourceIndex;
|
||||
/* exit this condition tree */
|
||||
@ -662,7 +665,7 @@ static UChar32 ucnv_getNextUChar_UTF8(UConverterToUnicodeArgs *args,
|
||||
case 6:
|
||||
ch += (myByte = *source);
|
||||
ch <<= 6;
|
||||
if (!UTF8_IS_TRAIL(myByte))
|
||||
if (!U8_IS_TRAIL(myByte))
|
||||
{
|
||||
isLegalSequence = 0;
|
||||
break;
|
||||
@ -671,7 +674,7 @@ static UChar32 ucnv_getNextUChar_UTF8(UConverterToUnicodeArgs *args,
|
||||
case 5:
|
||||
ch += (myByte = *source);
|
||||
ch <<= 6;
|
||||
if (!UTF8_IS_TRAIL(myByte))
|
||||
if (!U8_IS_TRAIL(myByte))
|
||||
{
|
||||
isLegalSequence = 0;
|
||||
break;
|
||||
@ -680,7 +683,7 @@ static UChar32 ucnv_getNextUChar_UTF8(UConverterToUnicodeArgs *args,
|
||||
case 4:
|
||||
ch += (myByte = *source);
|
||||
ch <<= 6;
|
||||
if (!UTF8_IS_TRAIL(myByte))
|
||||
if (!U8_IS_TRAIL(myByte))
|
||||
{
|
||||
isLegalSequence = 0;
|
||||
break;
|
||||
@ -689,7 +692,7 @@ static UChar32 ucnv_getNextUChar_UTF8(UConverterToUnicodeArgs *args,
|
||||
case 3:
|
||||
ch += (myByte = *source);
|
||||
ch <<= 6;
|
||||
if (!UTF8_IS_TRAIL(myByte))
|
||||
if (!U8_IS_TRAIL(myByte))
|
||||
{
|
||||
isLegalSequence = 0;
|
||||
break;
|
||||
@ -697,7 +700,7 @@ static UChar32 ucnv_getNextUChar_UTF8(UConverterToUnicodeArgs *args,
|
||||
++source;
|
||||
case 2:
|
||||
ch += (myByte = *source);
|
||||
if (!UTF8_IS_TRAIL(myByte))
|
||||
if (!U8_IS_TRAIL(myByte))
|
||||
{
|
||||
isLegalSequence = 0;
|
||||
break;
|
||||
|
@ -23,6 +23,7 @@
|
||||
|
||||
#include "unicode/ucnv.h"
|
||||
#include "unicode/ucnv_cb.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "ucnv_bld.h"
|
||||
#include "ucnv_cnv.h"
|
||||
|
||||
@ -476,15 +477,15 @@ fastSingle:
|
||||
continue;
|
||||
}
|
||||
|
||||
if(UTF_IS_LEAD(c)) {
|
||||
if(U16_IS_LEAD(c)) {
|
||||
getTrail:
|
||||
if(source<sourceLimit) {
|
||||
/* test the following code unit */
|
||||
UChar trail=*source;
|
||||
if(UTF_IS_SECOND_SURROGATE(trail)) {
|
||||
if(U16_IS_TRAIL(trail)) {
|
||||
++source;
|
||||
++nextSourceIndex;
|
||||
c=UTF16_GET_PAIR_VALUE(c, trail);
|
||||
c=U16_GET_SUPPLEMENTARY(c, trail);
|
||||
}
|
||||
} else {
|
||||
/* no more input */
|
||||
@ -711,14 +712,14 @@ fastSingle:
|
||||
continue;
|
||||
}
|
||||
|
||||
if(UTF_IS_LEAD(c)) {
|
||||
if(U16_IS_LEAD(c)) {
|
||||
getTrail:
|
||||
if(source<sourceLimit) {
|
||||
/* test the following code unit */
|
||||
UChar trail=*source;
|
||||
if(UTF_IS_SECOND_SURROGATE(trail)) {
|
||||
if(U16_IS_TRAIL(trail)) {
|
||||
++source;
|
||||
c=UTF16_GET_PAIR_VALUE(c, trail);
|
||||
c=U16_GET_SUPPLEMENTARY(c, trail);
|
||||
}
|
||||
} else {
|
||||
/* no more input */
|
||||
@ -1110,15 +1111,15 @@ getTrail:
|
||||
*offsets++=sourceIndex;
|
||||
} else {
|
||||
/* output surrogate pair */
|
||||
*target++=UTF16_LEAD(c);
|
||||
*target++=U16_LEAD(c);
|
||||
if(target<targetLimit) {
|
||||
*target++=UTF16_TRAIL(c);
|
||||
*target++=U16_TRAIL(c);
|
||||
*offsets++=sourceIndex;
|
||||
*offsets++=sourceIndex;
|
||||
} else {
|
||||
/* target overflow */
|
||||
*offsets++=sourceIndex;
|
||||
cnv->UCharErrorBuffer[0]=UTF16_TRAIL(c);
|
||||
cnv->UCharErrorBuffer[0]=U16_TRAIL(c);
|
||||
cnv->UCharErrorBufferLength=1;
|
||||
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
|
||||
break;
|
||||
@ -1315,12 +1316,12 @@ getTrail:
|
||||
*target++=(UChar)c;
|
||||
} else {
|
||||
/* output surrogate pair */
|
||||
*target++=UTF16_LEAD(c);
|
||||
*target++=U16_LEAD(c);
|
||||
if(target<targetLimit) {
|
||||
*target++=UTF16_TRAIL(c);
|
||||
*target++=U16_TRAIL(c);
|
||||
} else {
|
||||
/* target overflow */
|
||||
cnv->UCharErrorBuffer[0]=UTF16_TRAIL(c);
|
||||
cnv->UCharErrorBuffer[0]=U16_TRAIL(c);
|
||||
cnv->UCharErrorBufferLength=1;
|
||||
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
|
||||
break;
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 2000-2009, International Business Machines
|
||||
* Copyright (C) 2000-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* file name: ucnvhz.c
|
||||
@ -22,6 +22,7 @@
|
||||
#include "unicode/ucnv.h"
|
||||
#include "unicode/ucnv_cb.h"
|
||||
#include "unicode/uset.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "ucnv_bld.h"
|
||||
#include "ucnv_cnv.h"
|
||||
#include "ucnv_imp.h"
|
||||
@ -446,17 +447,17 @@ UConverter_fromUnicode_HZ_OFFSETS_LOGIC (UConverterFromUnicodeArgs * args,
|
||||
/* oops.. the code point is unassigned */
|
||||
/*Handle surrogates */
|
||||
/*check if the char is a First surrogate*/
|
||||
if(UTF_IS_SURROGATE(mySourceChar)) {
|
||||
if(UTF_IS_SURROGATE_FIRST(mySourceChar)) {
|
||||
if(U16_IS_SURROGATE(mySourceChar)) {
|
||||
if(U16_IS_SURROGATE_LEAD(mySourceChar)) {
|
||||
args->converter->fromUChar32=mySourceChar;
|
||||
getTrail:
|
||||
/*look ahead to find the trail surrogate*/
|
||||
if(mySourceIndex < mySourceLength) {
|
||||
/* test the following code unit */
|
||||
UChar trail=(UChar) args->source[mySourceIndex];
|
||||
if(UTF_IS_SECOND_SURROGATE(trail)) {
|
||||
if(U16_IS_TRAIL(trail)) {
|
||||
++mySourceIndex;
|
||||
mySourceChar=UTF16_GET_PAIR_VALUE(args->converter->fromUChar32, trail);
|
||||
mySourceChar=U16_GET_SUPPLEMENTARY(args->converter->fromUChar32, trail);
|
||||
args->converter->fromUChar32=0x00;
|
||||
/* there are no surrogates in GB2312*/
|
||||
*err = U_INVALID_CHAR_FOUND;
|
||||
|
@ -19,11 +19,12 @@
|
||||
|
||||
#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
|
||||
|
||||
#include "unicode/ucnv.h"
|
||||
#include "unicode/ucnv_cb.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "cmemory.h"
|
||||
#include "ucnv_bld.h"
|
||||
#include "unicode/ucnv.h"
|
||||
#include "ucnv_cnv.h"
|
||||
#include "unicode/ucnv_cb.h"
|
||||
#include "cstring.h"
|
||||
|
||||
#define UCNV_OPTIONS_VERSION_MASK 0xf
|
||||
@ -1054,16 +1055,16 @@ static void UConverter_fromUnicode_ISCII_OFFSETS_LOGIC(
|
||||
} else {
|
||||
/* oops.. the code point is unassigned */
|
||||
/*check if the char is a First surrogate*/
|
||||
if (UTF_IS_SURROGATE(sourceChar)) {
|
||||
if (UTF_IS_SURROGATE_FIRST(sourceChar)) {
|
||||
if (U16_IS_SURROGATE(sourceChar)) {
|
||||
if (U16_IS_SURROGATE_LEAD(sourceChar)) {
|
||||
getTrail:
|
||||
/*look ahead to find the trail surrogate*/
|
||||
if (source < sourceLimit) {
|
||||
/* test the following code unit */
|
||||
UChar trail= (*source);
|
||||
if (UTF_IS_SECOND_SURROGATE(trail)) {
|
||||
if (U16_IS_TRAIL(trail)) {
|
||||
source++;
|
||||
sourceChar=UTF16_GET_PAIR_VALUE(sourceChar, trail);
|
||||
sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail);
|
||||
*err =U_INVALID_CHAR_FOUND;
|
||||
/* convert this surrogate code point */
|
||||
/* exit this condition tree */
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 2000-2007, International Business Machines
|
||||
* Copyright (C) 2000-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* file name: ucnvlat1.cpp
|
||||
@ -18,6 +18,7 @@
|
||||
|
||||
#include "unicode/ucnv.h"
|
||||
#include "unicode/uset.h"
|
||||
#include "unicode/utf8.h"
|
||||
#include "ucnv_bld.h"
|
||||
#include "ucnv_cnv.h"
|
||||
|
||||
|
@ -48,6 +48,8 @@
|
||||
#include "unicode/ucnv_cb.h"
|
||||
#include "unicode/udata.h"
|
||||
#include "unicode/uset.h"
|
||||
#include "unicode/utf8.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "ucnv_bld.h"
|
||||
#include "ucnvmbcs.h"
|
||||
#include "ucnv_ext.h"
|
||||
@ -3352,16 +3354,16 @@ ucnv_MBCSDoubleFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
|
||||
* If it does, then surrogates are not paired but mapped separately.
|
||||
* Note that in this case unmatched surrogates are not detected.
|
||||
*/
|
||||
if(UTF_IS_SURROGATE(c) && !(unicodeMask&UCNV_HAS_SURROGATES)) {
|
||||
if(UTF_IS_SURROGATE_FIRST(c)) {
|
||||
if(U16_IS_SURROGATE(c) && !(unicodeMask&UCNV_HAS_SURROGATES)) {
|
||||
if(U16_IS_SURROGATE_LEAD(c)) {
|
||||
getTrail:
|
||||
if(source<sourceLimit) {
|
||||
/* test the following code unit */
|
||||
UChar trail=*source;
|
||||
if(UTF_IS_SECOND_SURROGATE(trail)) {
|
||||
if(U16_IS_TRAIL(trail)) {
|
||||
++source;
|
||||
++nextSourceIndex;
|
||||
c=UTF16_GET_PAIR_VALUE(c, trail);
|
||||
c=U16_GET_SUPPLEMENTARY(c, trail);
|
||||
if(!(unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
|
||||
/* BMP-only codepages are stored without stage 1 entries for supplementary code points */
|
||||
/* callback(unassigned) */
|
||||
@ -3557,16 +3559,16 @@ ucnv_MBCSSingleFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
|
||||
*/
|
||||
c=*source++;
|
||||
++nextSourceIndex;
|
||||
if(UTF_IS_SURROGATE(c)) {
|
||||
if(UTF_IS_SURROGATE_FIRST(c)) {
|
||||
if(U16_IS_SURROGATE(c)) {
|
||||
if(U16_IS_SURROGATE_LEAD(c)) {
|
||||
getTrail:
|
||||
if(source<sourceLimit) {
|
||||
/* test the following code unit */
|
||||
UChar trail=*source;
|
||||
if(UTF_IS_SECOND_SURROGATE(trail)) {
|
||||
if(U16_IS_TRAIL(trail)) {
|
||||
++source;
|
||||
++nextSourceIndex;
|
||||
c=UTF16_GET_PAIR_VALUE(c, trail);
|
||||
c=U16_GET_SUPPLEMENTARY(c, trail);
|
||||
if(!hasSupplementary) {
|
||||
/* BMP-only codepages are stored without stage 1 entries for supplementary code points */
|
||||
/* callback(unassigned) */
|
||||
@ -3805,16 +3807,16 @@ unrolled:
|
||||
/* normal end of conversion: prepare for a new character */
|
||||
c=0;
|
||||
continue;
|
||||
} else if(!UTF_IS_SURROGATE(c)) {
|
||||
} else if(!U16_IS_SURROGATE(c)) {
|
||||
/* normal, unassigned BMP character */
|
||||
} else if(UTF_IS_SURROGATE_FIRST(c)) {
|
||||
} else if(U16_IS_SURROGATE_LEAD(c)) {
|
||||
getTrail:
|
||||
if(source<sourceLimit) {
|
||||
/* test the following code unit */
|
||||
UChar trail=*source;
|
||||
if(UTF_IS_SECOND_SURROGATE(trail)) {
|
||||
if(U16_IS_TRAIL(trail)) {
|
||||
++source;
|
||||
c=UTF16_GET_PAIR_VALUE(c, trail);
|
||||
c=U16_GET_SUPPLEMENTARY(c, trail);
|
||||
/* this codepage does not map supplementary code points */
|
||||
/* callback(unassigned) */
|
||||
} else {
|
||||
@ -4235,16 +4237,16 @@ ucnv_MBCSFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
|
||||
* If it does, then surrogates are not paired but mapped separately.
|
||||
* Note that in this case unmatched surrogates are not detected.
|
||||
*/
|
||||
if(UTF_IS_SURROGATE(c) && !(unicodeMask&UCNV_HAS_SURROGATES)) {
|
||||
if(UTF_IS_SURROGATE_FIRST(c)) {
|
||||
if(U16_IS_SURROGATE(c) && !(unicodeMask&UCNV_HAS_SURROGATES)) {
|
||||
if(U16_IS_SURROGATE_LEAD(c)) {
|
||||
getTrail:
|
||||
if(source<sourceLimit) {
|
||||
/* test the following code unit */
|
||||
UChar trail=*source;
|
||||
if(UTF_IS_SECOND_SURROGATE(trail)) {
|
||||
if(U16_IS_TRAIL(trail)) {
|
||||
++source;
|
||||
++nextSourceIndex;
|
||||
c=UTF16_GET_PAIR_VALUE(c, trail);
|
||||
c=U16_GET_SUPPLEMENTARY(c, trail);
|
||||
if(!(unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
|
||||
/* BMP-only codepages are stored without stage 1 entries for supplementary code points */
|
||||
cnv->fromUnicodeStatus=prevLength; /* save the old state */
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2000-2009, International Business Machines
|
||||
* Copyright (C) 2000-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
@ -25,6 +25,7 @@
|
||||
|
||||
#include "unicode/ucnv.h"
|
||||
#include "unicode/ucnv_cb.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "ucnv_bld.h"
|
||||
#include "ucnv_cnv.h"
|
||||
#include "cmemory.h"
|
||||
@ -1098,17 +1099,17 @@ loop:
|
||||
*offsets++=sourceIndex;
|
||||
}
|
||||
--targetCapacity;
|
||||
} else if(UTF_IS_SURROGATE(c)) {
|
||||
if(UTF_IS_SURROGATE_FIRST(c)) {
|
||||
} else if(U16_IS_SURROGATE(c)) {
|
||||
if(U16_IS_SURROGATE_LEAD(c)) {
|
||||
getTrailSingle:
|
||||
lead=(UChar)c;
|
||||
if(source<sourceLimit) {
|
||||
/* test the following code unit */
|
||||
trail=*source;
|
||||
if(UTF_IS_SECOND_SURROGATE(trail)) {
|
||||
if(U16_IS_TRAIL(trail)) {
|
||||
++source;
|
||||
++nextSourceIndex;
|
||||
c=UTF16_GET_PAIR_VALUE(c, trail);
|
||||
c=U16_GET_SUPPLEMENTARY(c, trail);
|
||||
/* convert this surrogate code point */
|
||||
/* exit this condition tree */
|
||||
} else {
|
||||
@ -1296,16 +1297,16 @@ getTrailSingle:
|
||||
goto outputBytes;
|
||||
} else if(c<0xe000) {
|
||||
/* c is a surrogate */
|
||||
if(UTF_IS_SURROGATE_FIRST(c)) {
|
||||
if(U16_IS_SURROGATE_LEAD(c)) {
|
||||
getTrailUnicode:
|
||||
lead=(UChar)c;
|
||||
if(source<sourceLimit) {
|
||||
/* test the following code unit */
|
||||
trail=*source;
|
||||
if(UTF_IS_SECOND_SURROGATE(trail)) {
|
||||
if(U16_IS_TRAIL(trail)) {
|
||||
++source;
|
||||
++nextSourceIndex;
|
||||
c=UTF16_GET_PAIR_VALUE(c, trail);
|
||||
c=U16_GET_SUPPLEMENTARY(c, trail);
|
||||
/* convert this surrogate code point */
|
||||
/* exit this condition tree */
|
||||
} else {
|
||||
@ -1573,16 +1574,16 @@ loop:
|
||||
/* use the current dynamic window */
|
||||
*target++=(uint8_t)(delta|0x80);
|
||||
--targetCapacity;
|
||||
} else if(UTF_IS_SURROGATE(c)) {
|
||||
if(UTF_IS_SURROGATE_FIRST(c)) {
|
||||
} else if(U16_IS_SURROGATE(c)) {
|
||||
if(U16_IS_SURROGATE_LEAD(c)) {
|
||||
getTrailSingle:
|
||||
lead=(UChar)c;
|
||||
if(source<sourceLimit) {
|
||||
/* test the following code unit */
|
||||
trail=*source;
|
||||
if(UTF_IS_SECOND_SURROGATE(trail)) {
|
||||
if(U16_IS_TRAIL(trail)) {
|
||||
++source;
|
||||
c=UTF16_GET_PAIR_VALUE(c, trail);
|
||||
c=U16_GET_SUPPLEMENTARY(c, trail);
|
||||
/* convert this surrogate code point */
|
||||
/* exit this condition tree */
|
||||
} else {
|
||||
@ -1758,15 +1759,15 @@ getTrailSingle:
|
||||
goto outputBytes;
|
||||
} else if(c<0xe000) {
|
||||
/* c is a surrogate */
|
||||
if(UTF_IS_SURROGATE_FIRST(c)) {
|
||||
if(U16_IS_SURROGATE_LEAD(c)) {
|
||||
getTrailUnicode:
|
||||
lead=(UChar)c;
|
||||
if(source<sourceLimit) {
|
||||
/* test the following code unit */
|
||||
trail=*source;
|
||||
if(UTF_IS_SECOND_SURROGATE(trail)) {
|
||||
if(U16_IS_TRAIL(trail)) {
|
||||
++source;
|
||||
c=UTF16_GET_PAIR_VALUE(c, trail);
|
||||
c=U16_GET_SUPPLEMENTARY(c, trail);
|
||||
/* convert this surrogate code point */
|
||||
/* exit this condition tree */
|
||||
} else {
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2002-2006, International Business Machines
|
||||
* Copyright (C) 2002-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
@ -19,6 +19,9 @@
|
||||
#include "unicode/chariter.h"
|
||||
#include "unicode/rep.h"
|
||||
#include "unicode/uiter.h"
|
||||
#include "unicode/utf.h"
|
||||
#include "unicode/utf8.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "cstring.h"
|
||||
|
||||
U_NAMESPACE_USE
|
||||
@ -1038,22 +1041,22 @@ uiter_current32(UCharIterator *iter) {
|
||||
UChar32 c, c2;
|
||||
|
||||
c=iter->current(iter);
|
||||
if(UTF_IS_SURROGATE(c)) {
|
||||
if(UTF_IS_SURROGATE_FIRST(c)) {
|
||||
if(U16_IS_SURROGATE(c)) {
|
||||
if(U16_IS_SURROGATE_LEAD(c)) {
|
||||
/*
|
||||
* go to the next code unit
|
||||
* we know that we are not at the limit because c!=U_SENTINEL
|
||||
*/
|
||||
iter->move(iter, 1, UITER_CURRENT);
|
||||
if(UTF_IS_SECOND_SURROGATE(c2=iter->current(iter))) {
|
||||
c=UTF16_GET_PAIR_VALUE(c, c2);
|
||||
if(U16_IS_TRAIL(c2=iter->current(iter))) {
|
||||
c=U16_GET_SUPPLEMENTARY(c, c2);
|
||||
}
|
||||
|
||||
/* undo index movement */
|
||||
iter->move(iter, -1, UITER_CURRENT);
|
||||
} else {
|
||||
if(UTF_IS_FIRST_SURROGATE(c2=iter->previous(iter))) {
|
||||
c=UTF16_GET_PAIR_VALUE(c2, c);
|
||||
if(U16_IS_LEAD(c2=iter->previous(iter))) {
|
||||
c=U16_GET_SUPPLEMENTARY(c2, c);
|
||||
}
|
||||
if(c2>=0) {
|
||||
/* undo index movement */
|
||||
@ -1069,9 +1072,9 @@ uiter_next32(UCharIterator *iter) {
|
||||
UChar32 c, c2;
|
||||
|
||||
c=iter->next(iter);
|
||||
if(UTF_IS_FIRST_SURROGATE(c)) {
|
||||
if(UTF_IS_SECOND_SURROGATE(c2=iter->next(iter))) {
|
||||
c=UTF16_GET_PAIR_VALUE(c, c2);
|
||||
if(U16_IS_LEAD(c)) {
|
||||
if(U16_IS_TRAIL(c2=iter->next(iter))) {
|
||||
c=U16_GET_SUPPLEMENTARY(c, c2);
|
||||
} else if(c2>=0) {
|
||||
/* unmatched first surrogate, undo index movement */
|
||||
iter->move(iter, -1, UITER_CURRENT);
|
||||
@ -1085,9 +1088,9 @@ uiter_previous32(UCharIterator *iter) {
|
||||
UChar32 c, c2;
|
||||
|
||||
c=iter->previous(iter);
|
||||
if(UTF_IS_SECOND_SURROGATE(c)) {
|
||||
if(UTF_IS_FIRST_SURROGATE(c2=iter->previous(iter))) {
|
||||
c=UTF16_GET_PAIR_VALUE(c2, c);
|
||||
if(U16_IS_TRAIL(c)) {
|
||||
if(U16_IS_LEAD(c2=iter->previous(iter))) {
|
||||
c=U16_GET_SUPPLEMENTARY(c2, c);
|
||||
} else if(c2>=0) {
|
||||
/* unmatched second surrogate, undo index movement */
|
||||
iter->move(iter, 1, UITER_CURRENT);
|
||||
|
@ -18,6 +18,8 @@
|
||||
#include "unicode/putil.h"
|
||||
#include "unicode/uchar.h"
|
||||
#include "unicode/udata.h"
|
||||
#include "unicode/utf.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "ustr_imp.h"
|
||||
#include "umutex.h"
|
||||
#include "cmemory.h"
|
||||
@ -442,12 +444,12 @@ compareName(UCharNames *names,
|
||||
static uint8_t getCharCat(UChar32 cp) {
|
||||
uint8_t cat;
|
||||
|
||||
if (UTF_IS_UNICODE_NONCHAR(cp)) {
|
||||
if (U_IS_UNICODE_NONCHAR(cp)) {
|
||||
return U_NONCHARACTER_CODE_POINT;
|
||||
}
|
||||
|
||||
if ((cat = u_charType(cp)) == U_SURROGATE) {
|
||||
cat = UTF_IS_LEAD(cp) ? U_LEAD_SURROGATE : U_TRAIL_SURROGATE;
|
||||
cat = U_IS_LEAD(cp) ? U_LEAD_SURROGATE : U_TRAIL_SURROGATE;
|
||||
}
|
||||
|
||||
return cat;
|
||||
|
@ -172,13 +172,7 @@ public:
|
||||
* @return The match/value Result.
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
inline UStringTrieResult firstForCodePoint(UChar32 cp) {
|
||||
return cp<=0xffff ?
|
||||
first(cp) :
|
||||
(USTRINGTRIE_HAS_NEXT(first(U16_LEAD(cp))) ?
|
||||
next(U16_TRAIL(cp)) :
|
||||
USTRINGTRIE_NO_MATCH);
|
||||
}
|
||||
UStringTrieResult firstForCodePoint(UChar32 cp);
|
||||
|
||||
/**
|
||||
* Traverses the trie from the current state for this input UChar.
|
||||
@ -195,13 +189,7 @@ public:
|
||||
* @return The match/value Result.
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
inline UStringTrieResult nextForCodePoint(UChar32 cp) {
|
||||
return cp<=0xffff ?
|
||||
next(cp) :
|
||||
(USTRINGTRIE_HAS_NEXT(next(U16_LEAD(cp))) ?
|
||||
next(U16_TRAIL(cp)) :
|
||||
USTRINGTRIE_NO_MATCH);
|
||||
}
|
||||
UStringTrieResult nextForCodePoint(UChar32 cp);
|
||||
|
||||
/**
|
||||
* Traverses the trie from the current state for this string.
|
||||
|
@ -120,6 +120,24 @@
|
||||
#define U_DISABLE_RENAMING 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_NO_DEFAULT_INCLUDE_UTF_HEADERS
|
||||
* Determines whether utypes.h includes utf.h, utf8.h, utf16.h and utf_old.h.
|
||||
* utypes.h includes those headers if this macro is defined to 0.
|
||||
* Otherwise, each those headers must be included explicitly when using one of their macros.
|
||||
* Defaults to 0 for backward compatibility, except inside ICU.
|
||||
* @draft ICU 49
|
||||
*/
|
||||
#ifdef U_NO_DEFAULT_INCLUDE_UTF_HEADERS
|
||||
/* Use the predefined value. */
|
||||
#elif defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || \
|
||||
defined(U_IO_IMPLEMENTATION) || defined(U_LAYOUT_IMPLEMENTATION) || defined(U_LAYOUTEX_IMPLEMENTATION) || \
|
||||
defined(U_TOOLUTIL_IMPLEMENTATION)
|
||||
# define U_NO_DEFAULT_INCLUDE_UTF_HEADERS 1
|
||||
#else
|
||||
# define U_NO_DEFAULT_INCLUDE_UTF_HEADERS 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_OVERRIDE_CXX_ALLOCATION
|
||||
* Determines whether to override new and delete.
|
||||
|
@ -1296,7 +1296,7 @@ public:
|
||||
* or 0xffff if the offset is not valid for this string
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
inline UChar32 char32At(int32_t offset) const;
|
||||
UChar32 char32At(int32_t offset) const;
|
||||
|
||||
/**
|
||||
* Adjust a random-access offset so that
|
||||
@ -1313,7 +1313,7 @@ public:
|
||||
* @see U16_SET_CP_START
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
inline int32_t getChar32Start(int32_t offset) const;
|
||||
int32_t getChar32Start(int32_t offset) const;
|
||||
|
||||
/**
|
||||
* Adjust a random-access offset so that
|
||||
@ -1331,7 +1331,7 @@ public:
|
||||
* @see U16_SET_CP_LIMIT
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
inline int32_t getChar32Limit(int32_t offset) const;
|
||||
int32_t getChar32Limit(int32_t offset) const;
|
||||
|
||||
/**
|
||||
* Move the code unit index along the string by delta code points.
|
||||
@ -2122,7 +2122,7 @@ public:
|
||||
* @return a reference to this
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
inline UnicodeString& append(UChar32 srcChar);
|
||||
UnicodeString& append(UChar32 srcChar);
|
||||
|
||||
|
||||
/* Insert operations */
|
||||
@ -2317,9 +2317,7 @@ public:
|
||||
* @return a reference to this
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
inline UnicodeString& replace(int32_t start,
|
||||
int32_t length,
|
||||
UChar32 srcChar);
|
||||
UnicodeString& replace(int32_t start, int32_t length, UChar32 srcChar);
|
||||
|
||||
/**
|
||||
* Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)
|
||||
@ -4125,17 +4123,6 @@ UnicodeString::replace(int32_t start,
|
||||
UChar srcChar)
|
||||
{ return doReplace(start, _length, &srcChar, 0, 1); }
|
||||
|
||||
inline UnicodeString&
|
||||
UnicodeString::replace(int32_t start,
|
||||
int32_t _length,
|
||||
UChar32 srcChar) {
|
||||
UChar buffer[U16_MAX_LENGTH];
|
||||
int32_t count = 0;
|
||||
UBool isError = FALSE;
|
||||
U16_APPEND(buffer, count, U16_MAX_LENGTH, srcChar, isError);
|
||||
return doReplace(start, _length, buffer, 0, count);
|
||||
}
|
||||
|
||||
inline UnicodeString&
|
||||
UnicodeString::replaceBetween(int32_t start,
|
||||
int32_t limit,
|
||||
@ -4234,43 +4221,6 @@ inline UChar
|
||||
UnicodeString::operator[] (int32_t offset) const
|
||||
{ return doCharAt(offset); }
|
||||
|
||||
inline UChar32
|
||||
UnicodeString::char32At(int32_t offset) const
|
||||
{
|
||||
int32_t len = length();
|
||||
if((uint32_t)offset < (uint32_t)len) {
|
||||
const UChar *array = getArrayStart();
|
||||
UChar32 c;
|
||||
U16_GET(array, 0, offset, len, c);
|
||||
return c;
|
||||
} else {
|
||||
return kInvalidUChar;
|
||||
}
|
||||
}
|
||||
|
||||
inline int32_t
|
||||
UnicodeString::getChar32Start(int32_t offset) const {
|
||||
if((uint32_t)offset < (uint32_t)length()) {
|
||||
const UChar *array = getArrayStart();
|
||||
U16_SET_CP_START(array, 0, offset);
|
||||
return offset;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
inline int32_t
|
||||
UnicodeString::getChar32Limit(int32_t offset) const {
|
||||
int32_t len = length();
|
||||
if((uint32_t)offset < (uint32_t)len) {
|
||||
const UChar *array = getArrayStart();
|
||||
U16_SET_CP_LIMIT(array, 0, offset, len);
|
||||
return offset;
|
||||
} else {
|
||||
return len;
|
||||
}
|
||||
}
|
||||
|
||||
inline UBool
|
||||
UnicodeString::isEmpty() const {
|
||||
return fShortLength == 0;
|
||||
@ -4423,15 +4373,6 @@ inline UnicodeString&
|
||||
UnicodeString::append(UChar srcChar)
|
||||
{ return doReplace(length(), 0, &srcChar, 0, 1); }
|
||||
|
||||
inline UnicodeString&
|
||||
UnicodeString::append(UChar32 srcChar) {
|
||||
UChar buffer[U16_MAX_LENGTH];
|
||||
int32_t _length = 0;
|
||||
UBool isError = FALSE;
|
||||
U16_APPEND(buffer, _length, U16_MAX_LENGTH, srcChar, isError);
|
||||
return doReplace(length(), 0, buffer, 0, _length);
|
||||
}
|
||||
|
||||
inline UnicodeString&
|
||||
UnicodeString::operator+= (UChar ch)
|
||||
{ return doReplace(length(), 0, &ch, 0, 1); }
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1999-2010, International Business Machines
|
||||
* Copyright (C) 1999-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
@ -22,15 +22,20 @@
|
||||
* a surrogate or a non-character etc.
|
||||
*
|
||||
* The UChar and UChar32 data types for Unicode code units and code points
|
||||
* are defined in umachines.h because they can be machine-dependent.
|
||||
* are defined in umachine.h because they can be machine-dependent.
|
||||
*
|
||||
* utf.h is included by utypes.h and itself includes utf8.h and utf16.h after some
|
||||
* common definitions. Those files define macros for efficiently getting code points
|
||||
* If U_NO_DEFAULT_INCLUDE_UTF_HEADERS is 0 then utf.h is included by utypes.h
|
||||
* and itself includes utf8.h and utf16.h after some
|
||||
* common definitions.
|
||||
* If U_NO_DEFAULT_INCLUDE_UTF_HEADERS is 1 then each of these headers must be
|
||||
* included explicitly if their definitions are used.
|
||||
*
|
||||
* utf8.h and utf16.h define macros for efficiently getting code points
|
||||
* in and out of UTF-8/16 strings.
|
||||
* utf16.h macros have "U16_" prefixes.
|
||||
* utf8.h defines similar macros with "U8_" prefixes for UTF-8 string handling.
|
||||
*
|
||||
* ICU processes 16-bit Unicode strings.
|
||||
* ICU mostly processes 16-bit Unicode strings.
|
||||
* Most of the time, such strings are well-formed UTF-16.
|
||||
* Single, unpaired surrogates must be handled as well, and are treated in ICU
|
||||
* like regular code points where possible.
|
||||
@ -42,15 +47,16 @@
|
||||
* ICU functions handle supplementary code points (U+10000..U+10ffff)
|
||||
* but are optimized for the much more frequently occurring BMP code points.
|
||||
*
|
||||
* utf.h defines UChar to be an unsigned 16-bit integer. If this matches wchar_t, then
|
||||
* UChar is defined to be exactly wchar_t, otherwise uint16_t.
|
||||
* umachine.h defines UChar to be an unsigned 16-bit integer.
|
||||
* Where available, UChar is defined to be a char16_t
|
||||
* or a wchar_t (if that is an unsigned 16-bit type), otherwise uint16_t.
|
||||
*
|
||||
* UChar32 is defined to be a signed 32-bit integer (int32_t), large enough for a 21-bit
|
||||
* Unicode code point (Unicode scalar value, 0..0x10ffff).
|
||||
* Before ICU 2.4, the definition of UChar32 was similarly platform-dependent as
|
||||
* the definition of UChar. For details see the documentation for UChar32 itself.
|
||||
*
|
||||
* utf.h also defines a small number of C macros for single Unicode code points.
|
||||
* utf.h defines a small number of C macros for single Unicode code points.
|
||||
* These are simple checks for surrogates and non-characters.
|
||||
* For actual Unicode character properties see uchar.h.
|
||||
*
|
||||
@ -59,9 +65,6 @@
|
||||
* The macros will detect if a surrogate code unit is unpaired
|
||||
* (lead unit without trail unit or vice versa) and just return the unit itself
|
||||
* as the code point.
|
||||
* (It is an accidental property of Unicode and UTF-16 that all
|
||||
* malformed sequences can be expressed unambiguously with a distinct subrange
|
||||
* of Unicode code points.)
|
||||
*
|
||||
* The regular "safe" macros require that the initial, passed-in string index
|
||||
* is within bounds. They only check the index when they read more than one
|
||||
@ -95,7 +98,7 @@
|
||||
* code point values (0..U+10ffff). They are indicated with negative values instead.
|
||||
*
|
||||
* For more information see the ICU User Guide Strings chapter
|
||||
* (http://icu-project.org/userguide/strings.html).
|
||||
* (http://userguide.icu-project.org/strings).
|
||||
*
|
||||
* <em>Usage:</em>
|
||||
* ICU coding guidelines for if() statements should be followed when using these macros.
|
||||
@ -108,7 +111,7 @@
|
||||
#ifndef __UTF_H__
|
||||
#define __UTF_H__
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/umachine.h"
|
||||
/* include the utfXX.h after the following definitions */
|
||||
|
||||
/* single-code point definitions -------------------------------------------- */
|
||||
@ -227,10 +230,14 @@
|
||||
|
||||
/* include the utfXX.h ------------------------------------------------------ */
|
||||
|
||||
#if !U_NO_DEFAULT_INCLUDE_UTF_HEADERS
|
||||
|
||||
#include "unicode/utf8.h"
|
||||
#include "unicode/utf16.h"
|
||||
|
||||
/* utf_old.h contains deprecated, pre-ICU 2.4 definitions */
|
||||
#include "unicode/utf_old.h"
|
||||
|
||||
#endif
|
||||
#endif /* !U_NO_DEFAULT_INCLUDE_UTF_HEADERS */
|
||||
|
||||
#endif /* __UTF_H__ */
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1999-2010, International Business Machines
|
||||
* Copyright (C) 1999-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
@ -19,11 +19,9 @@
|
||||
* \brief C API: 16-bit Unicode handling macros
|
||||
*
|
||||
* This file defines macros to deal with 16-bit Unicode (UTF-16) code units and strings.
|
||||
* utf16.h is included by utf.h after unicode/umachine.h
|
||||
* and some common definitions.
|
||||
*
|
||||
* For more information see utf.h and the ICU User Guide Strings chapter
|
||||
* (http://icu-project.org/userguide/strings.html).
|
||||
* (http://userguide.icu-project.org/strings).
|
||||
*
|
||||
* <em>Usage:</em>
|
||||
* ICU coding guidelines for if() statements should be followed when using these macros.
|
||||
@ -34,7 +32,7 @@
|
||||
#ifndef __UTF16_H__
|
||||
#define __UTF16_H__
|
||||
|
||||
/* utf.h must be included first. */
|
||||
#include "unicode/umachine.h"
|
||||
#ifndef __UTF_H__
|
||||
# include "unicode/utf.h"
|
||||
#endif
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1999-2009, International Business Machines
|
||||
* Copyright (C) 1999-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
@ -19,11 +19,9 @@
|
||||
* \brief C API: 8-bit Unicode handling macros
|
||||
*
|
||||
* This file defines macros to deal with 8-bit Unicode (UTF-8) code units (bytes) and strings.
|
||||
* utf8.h is included by utf.h after unicode/umachine.h
|
||||
* and some common definitions.
|
||||
*
|
||||
* For more information see utf.h and the ICU User Guide Strings chapter
|
||||
* (http://icu-project.org/userguide/strings.html).
|
||||
* (http://userguide.icu-project.org/strings).
|
||||
*
|
||||
* <em>Usage:</em>
|
||||
* ICU coding guidelines for if() statements should be followed when using these macros.
|
||||
@ -34,7 +32,7 @@
|
||||
#ifndef __UTF8_H__
|
||||
#define __UTF8_H__
|
||||
|
||||
/* utf.h must be included first. */
|
||||
#include "unicode/umachine.h"
|
||||
#ifndef __UTF_H__
|
||||
# include "unicode/utf.h"
|
||||
#endif
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2002-2008, International Business Machines
|
||||
* Copyright (C) 2002-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
@ -28,9 +28,6 @@
|
||||
* If you are looking for a new equivalent to an old macro, please see the
|
||||
* comment at the old one.
|
||||
*
|
||||
* utf_old.h is included by utf.h after unicode/umachine.h
|
||||
* and some common definitions, to not break old code.
|
||||
*
|
||||
* Brief summary of reasons for deprecation:
|
||||
* - Switch on UTF_SIZE (selection of UTF-8/16/32 default string processing)
|
||||
* was impractical.
|
||||
@ -148,10 +145,9 @@
|
||||
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
|
||||
/* utf.h must be included first. */
|
||||
#ifndef __UTF_H__
|
||||
# include "unicode/utf.h"
|
||||
#endif
|
||||
#include "unicode/utf.h"
|
||||
#include "unicode/utf8.h"
|
||||
#include "unicode/utf16.h"
|
||||
|
||||
/* Formerly utf.h, part 1 --------------------------------------------------- */
|
||||
|
||||
|
@ -34,10 +34,13 @@
|
||||
|
||||
|
||||
#include "unicode/umachine.h"
|
||||
#include "unicode/utf.h"
|
||||
#include "unicode/uversion.h"
|
||||
#include "unicode/uconfig.h"
|
||||
#include "float.h"
|
||||
#include <float.h>
|
||||
|
||||
#if !U_NO_DEFAULT_INCLUDE_UTF_HEADERS
|
||||
# include "unicode/utf.h"
|
||||
#endif
|
||||
|
||||
/*!
|
||||
* \file
|
||||
@ -238,7 +241,7 @@ typedef double UDate;
|
||||
* ICU is separated into three libraries.
|
||||
*/
|
||||
|
||||
/*
|
||||
/**
|
||||
* \def U_COMBINED_IMPLEMENTATION
|
||||
* Set to export library symbols from inside the ICU library
|
||||
* when all of ICU is in a single library.
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
**********************************************************************
|
||||
* Copyright (c) 2001-2004, International Business Machines
|
||||
* Copyright (c) 2001-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* Date Name Description
|
||||
@ -10,6 +10,7 @@
|
||||
|
||||
#include "unicode/unifilt.h"
|
||||
#include "unicode/rep.h"
|
||||
#include "unicode/utf16.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(UnicodeFilter)
|
||||
@ -43,7 +44,7 @@ UMatchDegree UnicodeFilter::matches(const Replaceable& text,
|
||||
UChar32 c;
|
||||
if (offset < limit &&
|
||||
contains(c = text.char32At(offset))) {
|
||||
offset += UTF_CHAR_LENGTH(c);
|
||||
offset += U16_LENGTH(c);
|
||||
return U_MATCH;
|
||||
}
|
||||
if (offset > limit &&
|
||||
@ -53,7 +54,7 @@ UMatchDegree UnicodeFilter::matches(const Replaceable& text,
|
||||
// the lead surrogate).
|
||||
--offset;
|
||||
if (offset >= 0) {
|
||||
offset -= UTF_CHAR_LENGTH(text.char32At(offset)) - 1;
|
||||
offset -= U16_LENGTH(text.char32At(offset)) - 1;
|
||||
}
|
||||
return U_MATCH;
|
||||
}
|
||||
|
@ -9,9 +9,11 @@
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uniset.h"
|
||||
#include "unicode/parsepos.h"
|
||||
#include "unicode/symtable.h"
|
||||
#include "unicode/uniset.h"
|
||||
#include "unicode/utf8.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "ruleiter.h"
|
||||
#include "cmemory.h"
|
||||
#include "cstring.h"
|
||||
@ -1059,7 +1061,7 @@ int32_t UnicodeSet::getSingleCP(const UnicodeString& s) {
|
||||
*/
|
||||
UnicodeSet& UnicodeSet::addAll(const UnicodeString& s) {
|
||||
UChar32 cp;
|
||||
for (int32_t i = 0; i < s.length(); i += UTF_CHAR_LENGTH(cp)) {
|
||||
for (int32_t i = 0; i < s.length(); i += U16_LENGTH(cp)) {
|
||||
cp = s.char32At(i);
|
||||
add(cp);
|
||||
}
|
||||
@ -1892,7 +1894,7 @@ void UnicodeSet::retain(const UChar32* other, int32_t otherLen, int8_t polarity)
|
||||
void UnicodeSet::_appendToPat(UnicodeString& buf, const UnicodeString& s, UBool
|
||||
escapeUnprintable) {
|
||||
UChar32 cp;
|
||||
for (int32_t i = 0; i < s.length(); i += UTF_CHAR_LENGTH(cp)) {
|
||||
for (int32_t i = 0; i < s.length(); i += U16_LENGTH(cp)) {
|
||||
_appendToPat(buf, cp = s.char32At(i), escapeUnprintable);
|
||||
}
|
||||
}
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2007, International Business Machines
|
||||
* Copyright (C) 2007-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
@ -17,6 +17,8 @@
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uniset.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "unicode/utf8.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "cmemory.h"
|
||||
#include "uvector.h"
|
||||
#include "unisetspan.h"
|
||||
|
@ -25,6 +25,8 @@
|
||||
#include "cmemory.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "unicode/unistr.h"
|
||||
#include "unicode/utf.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "uelement.h"
|
||||
#include "ustr_imp.h"
|
||||
#include "umutex.h"
|
||||
@ -158,7 +160,7 @@ UnicodeString::UnicodeString(int32_t capacity, UChar32 c, int32_t count)
|
||||
allocate(capacity);
|
||||
} else {
|
||||
// count > 0, allocate and fill the new string with count c's
|
||||
int32_t unitCount = UTF_CHAR_LENGTH(c), length = count * unitCount;
|
||||
int32_t unitCount = U16_LENGTH(c), length = count * unitCount;
|
||||
if(capacity < length) {
|
||||
capacity = length;
|
||||
}
|
||||
@ -174,8 +176,8 @@ UnicodeString::UnicodeString(int32_t capacity, UChar32 c, int32_t count)
|
||||
}
|
||||
} else {
|
||||
// get the code units for c
|
||||
UChar units[UTF_MAX_CHAR_LENGTH];
|
||||
UTF_APPEND_CHAR_UNSAFE(units, i, c);
|
||||
UChar units[U16_MAX_LENGTH];
|
||||
U16_APPEND_UNSAFE(units, i, c);
|
||||
|
||||
// now it must be i==unitCount
|
||||
i = 0;
|
||||
@ -686,6 +688,43 @@ UnicodeString::getChar32At(int32_t offset) const {
|
||||
return char32At(offset);
|
||||
}
|
||||
|
||||
UChar32
|
||||
UnicodeString::char32At(int32_t offset) const
|
||||
{
|
||||
int32_t len = length();
|
||||
if((uint32_t)offset < (uint32_t)len) {
|
||||
const UChar *array = getArrayStart();
|
||||
UChar32 c;
|
||||
U16_GET(array, 0, offset, len, c);
|
||||
return c;
|
||||
} else {
|
||||
return kInvalidUChar;
|
||||
}
|
||||
}
|
||||
|
||||
int32_t
|
||||
UnicodeString::getChar32Start(int32_t offset) const {
|
||||
if((uint32_t)offset < (uint32_t)length()) {
|
||||
const UChar *array = getArrayStart();
|
||||
U16_SET_CP_START(array, 0, offset);
|
||||
return offset;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
int32_t
|
||||
UnicodeString::getChar32Limit(int32_t offset) const {
|
||||
int32_t len = length();
|
||||
if((uint32_t)offset < (uint32_t)len) {
|
||||
const UChar *array = getArrayStart();
|
||||
U16_SET_CP_LIMIT(array, 0, offset, len);
|
||||
return offset;
|
||||
} else {
|
||||
return len;
|
||||
}
|
||||
}
|
||||
|
||||
int32_t
|
||||
UnicodeString::countChar32(int32_t start, int32_t length) const {
|
||||
pinIndices(start, length);
|
||||
@ -712,9 +751,9 @@ UnicodeString::moveIndex32(int32_t index, int32_t delta) const {
|
||||
|
||||
const UChar *array = getArrayStart();
|
||||
if(delta>0) {
|
||||
UTF_FWD_N(array, index, len, delta);
|
||||
U16_FWD_N(array, index, len, delta);
|
||||
} else {
|
||||
UTF_BACK_N(array, 0, index, -delta);
|
||||
U16_BACK_N(array, 0, index, -delta);
|
||||
}
|
||||
|
||||
return index;
|
||||
@ -1196,6 +1235,26 @@ UnicodeString::setCharAt(int32_t offset,
|
||||
return *this;
|
||||
}
|
||||
|
||||
UnicodeString&
|
||||
UnicodeString::replace(int32_t start,
|
||||
int32_t _length,
|
||||
UChar32 srcChar) {
|
||||
UChar buffer[U16_MAX_LENGTH];
|
||||
int32_t count = 0;
|
||||
UBool isError = FALSE;
|
||||
U16_APPEND(buffer, count, U16_MAX_LENGTH, srcChar, isError);
|
||||
return doReplace(start, _length, buffer, 0, count);
|
||||
}
|
||||
|
||||
UnicodeString&
|
||||
UnicodeString::append(UChar32 srcChar) {
|
||||
UChar buffer[U16_MAX_LENGTH];
|
||||
int32_t _length = 0;
|
||||
UBool isError = FALSE;
|
||||
U16_APPEND(buffer, _length, U16_MAX_LENGTH, srcChar, isError);
|
||||
return doReplace(length(), 0, buffer, 0, _length);
|
||||
}
|
||||
|
||||
UnicodeString&
|
||||
UnicodeString::doReplace( int32_t start,
|
||||
int32_t length,
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1999-2007, International Business Machines
|
||||
* Copyright (C) 1999-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
@ -19,6 +19,7 @@
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uchar.h"
|
||||
#include "unicode/unistr.h"
|
||||
#include "unicode/utf16.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2003-2008, International Business Machines
|
||||
* Copyright (C) 2003-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
@ -20,6 +20,7 @@
|
||||
|
||||
#include "unicode/uiter.h"
|
||||
#include "unicode/unorm.h"
|
||||
#include "unicode/utf.h"
|
||||
#include "unorm_it.h"
|
||||
#include "cmemory.h"
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* *
|
||||
* Copyright (C) 1999-2010, International Business Machines Corporation *
|
||||
* Copyright (C) 1999-2011, International Business Machines Corporation *
|
||||
* and others. All Rights Reserved. *
|
||||
* *
|
||||
*******************************************************************************
|
||||
@ -22,6 +22,7 @@
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/udata.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "cmemory.h"
|
||||
#include "cstring.h"
|
||||
#include "uarrsort.h"
|
||||
|
@ -27,17 +27,13 @@
|
||||
|
||||
#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
|
||||
|
||||
#if UTF_SIZE<16
|
||||
/*
|
||||
* This implementation assumes that the internal encoding is UTF-16
|
||||
* or UTF-32, not UTF-8.
|
||||
* The main assumption is that the Arabic characters and their
|
||||
* presentation forms each fit into a single UChar.
|
||||
* With UTF-8, they occupy 2 or 3 bytes, and more than the ASCII
|
||||
* characters.
|
||||
*/
|
||||
# error This implementation assumes UTF-16 or UTF-32 (check UTF_SIZE)
|
||||
#endif
|
||||
/*
|
||||
* This implementation is designed for 16-bit Unicode strings.
|
||||
* The main assumption is that the Arabic characters and their
|
||||
* presentation forms each fit into a single UChar.
|
||||
* With UTF-8, they occupy 2 or 3 bytes, and more than the ASCII
|
||||
* characters.
|
||||
*/
|
||||
|
||||
/*
|
||||
* ### TODO in general for letter shaping:
|
||||
|
@ -23,6 +23,8 @@
|
||||
#include "unicode/ustring.h"
|
||||
#include "unicode/ucasemap.h"
|
||||
#include "unicode/ubrk.h"
|
||||
#include "unicode/utf.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "cmemory.h"
|
||||
#include "ucase.h"
|
||||
#include "ustr_imp.h"
|
||||
|
@ -18,6 +18,7 @@
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/putil.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "cstring.h"
|
||||
#include "cwchar.h"
|
||||
#include "cmemory.h"
|
||||
@ -791,8 +792,8 @@ uprv_strCompare(const UChar *s1, int32_t length1,
|
||||
if(c1>=0xd800 && c2>=0xd800 && codePointOrder) {
|
||||
/* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */
|
||||
if(
|
||||
(c1<=0xdbff && (s1+1)!=limit1 && UTF_IS_TRAIL(*(s1+1))) ||
|
||||
(UTF_IS_TRAIL(c1) && start1!=s1 && UTF_IS_LEAD(*(s1-1)))
|
||||
(c1<=0xdbff && (s1+1)!=limit1 && U16_IS_TRAIL(*(s1+1))) ||
|
||||
(U16_IS_TRAIL(c1) && start1!=s1 && U16_IS_LEAD(*(s1-1)))
|
||||
) {
|
||||
/* part of a surrogate pair, leave >=d800 */
|
||||
} else {
|
||||
@ -801,8 +802,8 @@ uprv_strCompare(const UChar *s1, int32_t length1,
|
||||
}
|
||||
|
||||
if(
|
||||
(c2<=0xdbff && (s2+1)!=limit2 && UTF_IS_TRAIL(*(s2+1))) ||
|
||||
(UTF_IS_TRAIL(c2) && start2!=s2 && UTF_IS_LEAD(*(s2-1)))
|
||||
(c2<=0xdbff && (s2+1)!=limit2 && U16_IS_TRAIL(*(s2+1))) ||
|
||||
(U16_IS_TRAIL(c2) && start2!=s2 && U16_IS_LEAD(*(s2-1)))
|
||||
) {
|
||||
/* part of a surrogate pair, leave >=d800 */
|
||||
} else {
|
||||
@ -853,8 +854,8 @@ u_strCompareIter(UCharIterator *iter1, UCharIterator *iter2, UBool codePointOrde
|
||||
if(c1>=0xd800 && c2>=0xd800 && codePointOrder) {
|
||||
/* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */
|
||||
if(
|
||||
(c1<=0xdbff && UTF_IS_TRAIL(iter1->current(iter1))) ||
|
||||
(UTF_IS_TRAIL(c1) && (iter1->previous(iter1), UTF_IS_LEAD(iter1->previous(iter1))))
|
||||
(c1<=0xdbff && U16_IS_TRAIL(iter1->current(iter1))) ||
|
||||
(U16_IS_TRAIL(c1) && (iter1->previous(iter1), U16_IS_LEAD(iter1->previous(iter1))))
|
||||
) {
|
||||
/* part of a surrogate pair, leave >=d800 */
|
||||
} else {
|
||||
@ -863,8 +864,8 @@ u_strCompareIter(UCharIterator *iter1, UCharIterator *iter2, UBool codePointOrde
|
||||
}
|
||||
|
||||
if(
|
||||
(c2<=0xdbff && UTF_IS_TRAIL(iter2->current(iter2))) ||
|
||||
(UTF_IS_TRAIL(c2) && (iter2->previous(iter2), UTF_IS_LEAD(iter2->previous(iter2))))
|
||||
(c2<=0xdbff && U16_IS_TRAIL(iter2->current(iter2))) ||
|
||||
(U16_IS_TRAIL(c2) && (iter2->previous(iter2), U16_IS_LEAD(iter2->previous(iter2))))
|
||||
) {
|
||||
/* part of a surrogate pair, leave >=d800 */
|
||||
} else {
|
||||
@ -897,14 +898,14 @@ u_strCompareIter(UCharIterator *iter1, UCharIterator *iter2, UBool codePointOrde
|
||||
void fragment {
|
||||
/* iff a surrogate is part of a surrogate pair, leave >=d800 */
|
||||
if(c1<=0xdbff) {
|
||||
if(!UTF_IS_TRAIL(iter1->current(iter1))) {
|
||||
if(!U16_IS_TRAIL(iter1->current(iter1))) {
|
||||
/* lead surrogate code point - make <d800 */
|
||||
c1-=0x2800;
|
||||
}
|
||||
} else if(c1<=0xdfff) {
|
||||
int32_t idx=iter1->getIndex(iter1, UITER_CURRENT);
|
||||
iter1->previous(iter1); /* ==c1 */
|
||||
if(!UTF_IS_LEAD(iter1->previous(iter1))) {
|
||||
if(!U16_IS_LEAD(iter1->previous(iter1))) {
|
||||
/* trail surrogate code point - make <d800 */
|
||||
c1-=0x2800;
|
||||
}
|
||||
@ -1012,7 +1013,7 @@ u_countChar32(const UChar *s, int32_t length) {
|
||||
if(length>=0) {
|
||||
while(length>0) {
|
||||
++count;
|
||||
if(UTF_IS_LEAD(*s) && length>=2 && UTF_IS_TRAIL(*(s+1))) {
|
||||
if(U16_IS_LEAD(*s) && length>=2 && U16_IS_TRAIL(*(s+1))) {
|
||||
s+=2;
|
||||
length-=2;
|
||||
} else {
|
||||
@ -1033,7 +1034,7 @@ u_countChar32(const UChar *s, int32_t length) {
|
||||
* sufficient to look ahead one because of UTF-16;
|
||||
* safe to look ahead one because at worst that would be the terminating NUL
|
||||
*/
|
||||
if(UTF_IS_LEAD(c) && UTF_IS_TRAIL(*s)) {
|
||||
if(U16_IS_LEAD(c) && U16_IS_TRAIL(*s)) {
|
||||
++s;
|
||||
}
|
||||
}
|
||||
@ -1306,11 +1307,11 @@ u_unescapeAt(UNESCAPE_CHAR_AT charAt,
|
||||
/* Map \cX to control-X: X & 0x1F */
|
||||
if (c == 0x0063 /*'c'*/ && *offset < length) {
|
||||
c = charAt((*offset)++, context);
|
||||
if (UTF_IS_FIRST_SURROGATE(c) && *offset < length) {
|
||||
if (U16_IS_LEAD(c) && *offset < length) {
|
||||
UChar c2 = charAt(*offset, context);
|
||||
if (UTF_IS_SECOND_SURROGATE(c2)) {
|
||||
if (U16_IS_TRAIL(c2)) {
|
||||
++(*offset);
|
||||
c = (UChar) UTF16_GET_PAIR_VALUE(c, c2); /* [sic] */
|
||||
c = (UChar) U16_GET_SUPPLEMENTARY(c, c2); /* [sic] */
|
||||
}
|
||||
}
|
||||
return 0x1F & c;
|
||||
@ -1319,11 +1320,11 @@ u_unescapeAt(UNESCAPE_CHAR_AT charAt,
|
||||
/* If no special forms are recognized, then consider
|
||||
* the backslash to generically escape the next character.
|
||||
* Deal with surrogate pairs. */
|
||||
if (UTF_IS_FIRST_SURROGATE(c) && *offset < length) {
|
||||
if (U16_IS_LEAD(c) && *offset < length) {
|
||||
UChar c2 = charAt(*offset, context);
|
||||
if (UTF_IS_SECOND_SURROGATE(c2)) {
|
||||
if (U16_IS_TRAIL(c2)) {
|
||||
++(*offset);
|
||||
return UTF16_GET_PAIR_VALUE(c, c2);
|
||||
return U16_GET_SUPPLEMENTARY(c, c2);
|
||||
}
|
||||
}
|
||||
return c;
|
||||
@ -1383,10 +1384,10 @@ u_unescape(const char *src, UChar *dest, int32_t destCapacity) {
|
||||
goto err;
|
||||
}
|
||||
src += lenParsed; /* advance past escape seq. */
|
||||
if (dest != NULL && UTF_CHAR_LENGTH(c32) <= (destCapacity - i)) {
|
||||
UTF_APPEND_CHAR_UNSAFE(dest, i, c32);
|
||||
if (dest != NULL && U16_LENGTH(c32) <= (destCapacity - i)) {
|
||||
U16_APPEND_UNSAFE(dest, i, c32);
|
||||
} else {
|
||||
i += UTF_CHAR_LENGTH(c32);
|
||||
i += U16_LENGTH(c32);
|
||||
}
|
||||
segment = src;
|
||||
} else {
|
||||
|
@ -26,6 +26,9 @@
|
||||
|
||||
#include "unicode/putil.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "unicode/utf.h"
|
||||
#include "unicode/utf8.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "cstring.h"
|
||||
#include "cmemory.h"
|
||||
#include "ustr_imp.h"
|
||||
@ -268,8 +271,8 @@ static UChar32
|
||||
utf8_nextCharSafeBodyTerminated(const uint8_t **ps, UChar32 c) {
|
||||
const uint8_t *s=*ps;
|
||||
uint8_t trail, illegal=0;
|
||||
uint8_t count=UTF8_COUNT_TRAIL_BYTES(c);
|
||||
UTF8_MASK_LEAD_BYTE((c), count);
|
||||
uint8_t count=U8_COUNT_TRAIL_BYTES(c);
|
||||
U8_MASK_LEAD_BYTE((c), count);
|
||||
/* count==0 for illegally leading trail bytes and the illegal bytes 0xfe and 0xff */
|
||||
switch(count) {
|
||||
/* each branch falls through to the next one */
|
||||
@ -309,11 +312,11 @@ utf8_nextCharSafeBodyTerminated(const uint8_t **ps, UChar32 c) {
|
||||
|
||||
/* correct sequence - all trail bytes have (b7..b6)==(10)? */
|
||||
/* illegal is also set if count>=4 */
|
||||
if(illegal || c<utf8_minLegal[count] || UTF_IS_SURROGATE(c)) {
|
||||
if(illegal || c<utf8_minLegal[count] || U_IS_SURROGATE(c)) {
|
||||
/* error handling */
|
||||
/* don't go beyond this sequence */
|
||||
s=*ps;
|
||||
while(count>0 && UTF8_IS_TRAIL(*s)) {
|
||||
while(count>0 && U8_IS_TRAIL(*s)) {
|
||||
++s;
|
||||
--count;
|
||||
}
|
||||
@ -336,9 +339,9 @@ static UChar32
|
||||
utf8_nextCharSafeBodyPointer(const uint8_t **ps, const uint8_t *limit, UChar32 c) {
|
||||
const uint8_t *s=*ps;
|
||||
uint8_t trail, illegal=0;
|
||||
uint8_t count=UTF8_COUNT_TRAIL_BYTES(c);
|
||||
uint8_t count=U8_COUNT_TRAIL_BYTES(c);
|
||||
if((limit-s)>=count) {
|
||||
UTF8_MASK_LEAD_BYTE((c), count);
|
||||
U8_MASK_LEAD_BYTE((c), count);
|
||||
/* count==0 for illegally leading trail bytes and the illegal bytes 0xfe and 0xff */
|
||||
switch(count) {
|
||||
/* each branch falls through to the next one */
|
||||
@ -376,11 +379,11 @@ utf8_nextCharSafeBodyPointer(const uint8_t **ps, const uint8_t *limit, UChar32 c
|
||||
|
||||
/* correct sequence - all trail bytes have (b7..b6)==(10)? */
|
||||
/* illegal is also set if count>=4 */
|
||||
if(illegal || c<utf8_minLegal[count] || UTF_IS_SURROGATE(c)) {
|
||||
if(illegal || c<utf8_minLegal[count] || U_IS_SURROGATE(c)) {
|
||||
/* error handling */
|
||||
/* don't go beyond this sequence */
|
||||
s=*ps;
|
||||
while(count>0 && s<limit && UTF8_IS_TRAIL(*s)) {
|
||||
while(count>0 && s<limit && U8_IS_TRAIL(*s)) {
|
||||
++s;
|
||||
--count;
|
||||
}
|
||||
@ -479,9 +482,9 @@ u_strFromUTF8WithSub(UChar *dest,
|
||||
} else if(ch<=0xFFFF) {
|
||||
*(pDest++)=(UChar)ch;
|
||||
} else {
|
||||
*(pDest++)=UTF16_LEAD(ch);
|
||||
*(pDest++)=U16_LEAD(ch);
|
||||
if(pDest<pDestLimit) {
|
||||
*(pDest++)=UTF16_TRAIL(ch);
|
||||
*(pDest++)=U16_TRAIL(ch);
|
||||
} else {
|
||||
reqLength++;
|
||||
break;
|
||||
@ -600,8 +603,8 @@ u_strFromUTF8WithSub(UChar *dest,
|
||||
}else if(ch<=0xFFFF){
|
||||
*(pDest++)=(UChar)ch;
|
||||
}else{
|
||||
*(pDest++)=UTF16_LEAD(ch);
|
||||
*(pDest++)=UTF16_TRAIL(ch);
|
||||
*(pDest++)=U16_LEAD(ch);
|
||||
*(pDest++)=U16_TRAIL(ch);
|
||||
}
|
||||
}
|
||||
} while(--count > 0);
|
||||
@ -646,9 +649,9 @@ u_strFromUTF8WithSub(UChar *dest,
|
||||
}else if(ch<=0xFFFF){
|
||||
*(pDest++)=(UChar)ch;
|
||||
}else{
|
||||
*(pDest++)=UTF16_LEAD(ch);
|
||||
*(pDest++)=U16_LEAD(ch);
|
||||
if(pDest<pDestLimit){
|
||||
*(pDest++)=UTF16_TRAIL(ch);
|
||||
*(pDest++)=U16_TRAIL(ch);
|
||||
}else{
|
||||
reqLength++;
|
||||
break;
|
||||
@ -693,7 +696,7 @@ u_strFromUTF8WithSub(UChar *dest,
|
||||
*pErrorCode = U_INVALID_CHAR_FOUND;
|
||||
return NULL;
|
||||
}
|
||||
reqLength+=UTF_CHAR_LENGTH(ch);
|
||||
reqLength+=U16_LENGTH(ch);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1029,10 +1032,10 @@ u_strToUTF8WithSub(char *dest,
|
||||
} else /* ch is a surrogate */ {
|
||||
int32_t length;
|
||||
|
||||
/*need not check for NUL because NUL fails UTF_IS_TRAIL() anyway*/
|
||||
if(UTF_IS_SURROGATE_FIRST(ch) && UTF_IS_TRAIL(ch2=*pSrc)) {
|
||||
/*need not check for NUL because NUL fails U16_IS_TRAIL() anyway*/
|
||||
if(U16_IS_SURROGATE_LEAD(ch) && U16_IS_TRAIL(ch2=*pSrc)) {
|
||||
++pSrc;
|
||||
ch=UTF16_GET_PAIR_VALUE(ch, ch2);
|
||||
ch=U16_GET_SUPPLEMENTARY(ch, ch2);
|
||||
} else if(subchar>=0) {
|
||||
ch=subchar;
|
||||
++numSubstitutions;
|
||||
@ -1057,9 +1060,9 @@ u_strToUTF8WithSub(char *dest,
|
||||
++reqLength;
|
||||
} else if(ch<=0x7ff) {
|
||||
reqLength+=2;
|
||||
} else if(!UTF_IS_SURROGATE(ch)) {
|
||||
} else if(!U16_IS_SURROGATE(ch)) {
|
||||
reqLength+=3;
|
||||
} else if(UTF_IS_SURROGATE_FIRST(ch) && UTF_IS_TRAIL(ch2=*pSrc)) {
|
||||
} else if(U16_IS_SURROGATE_LEAD(ch) && U16_IS_TRAIL(ch2=*pSrc)) {
|
||||
++pSrc;
|
||||
reqLength+=4;
|
||||
} else if(subchar>=0) {
|
||||
@ -1117,9 +1120,9 @@ u_strToUTF8WithSub(char *dest,
|
||||
break; /* recompute count */
|
||||
}
|
||||
|
||||
if(UTF_IS_SURROGATE_FIRST(ch) && UTF_IS_TRAIL(ch2=*pSrc)) {
|
||||
if(U16_IS_SURROGATE_LEAD(ch) && U16_IS_TRAIL(ch2=*pSrc)) {
|
||||
++pSrc;
|
||||
ch=UTF16_GET_PAIR_VALUE(ch, ch2);
|
||||
ch=U16_GET_SUPPLEMENTARY(ch, ch2);
|
||||
|
||||
/* writing 4 bytes per 2 UChars is ok */
|
||||
*pDest++=(uint8_t)((ch>>18)|0xf0);
|
||||
@ -1172,9 +1175,9 @@ u_strToUTF8WithSub(char *dest,
|
||||
} else /* ch is a surrogate */ {
|
||||
int32_t length;
|
||||
|
||||
if(UTF_IS_SURROGATE_FIRST(ch) && pSrc<pSrcLimit && UTF_IS_TRAIL(ch2=*pSrc)) {
|
||||
if(U16_IS_SURROGATE_LEAD(ch) && pSrc<pSrcLimit && U16_IS_TRAIL(ch2=*pSrc)) {
|
||||
++pSrc;
|
||||
ch=UTF16_GET_PAIR_VALUE(ch, ch2);
|
||||
ch=U16_GET_SUPPLEMENTARY(ch, ch2);
|
||||
} else if(subchar>=0) {
|
||||
ch=subchar;
|
||||
++numSubstitutions;
|
||||
@ -1200,9 +1203,9 @@ u_strToUTF8WithSub(char *dest,
|
||||
++reqLength;
|
||||
} else if(ch<=0x7ff) {
|
||||
reqLength+=2;
|
||||
} else if(!UTF_IS_SURROGATE(ch)) {
|
||||
} else if(!U16_IS_SURROGATE(ch)) {
|
||||
reqLength+=3;
|
||||
} else if(UTF_IS_SURROGATE_FIRST(ch) && pSrc<pSrcLimit && UTF_IS_TRAIL(ch2=*pSrc)) {
|
||||
} else if(U16_IS_SURROGATE_LEAD(ch) && pSrc<pSrcLimit && U16_IS_TRAIL(ch2=*pSrc)) {
|
||||
++pSrc;
|
||||
reqLength+=4;
|
||||
} else if(subchar>=0) {
|
||||
|
@ -19,6 +19,9 @@
|
||||
#include "unicode/unistr.h"
|
||||
#include "unicode/chariter.h"
|
||||
#include "unicode/utext.h"
|
||||
#include "unicode/utf.h"
|
||||
#include "unicode/utf8.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "ustr_imp.h"
|
||||
#include "cmemory.h"
|
||||
#include "cstring.h"
|
||||
@ -118,13 +121,13 @@ utext_setNativeIndex(UText *ut, int64_t index) {
|
||||
// Adjust the index position if it is in the middle of a surrogate pair.
|
||||
if (ut->chunkOffset<ut->chunkLength) {
|
||||
UChar c= ut->chunkContents[ut->chunkOffset];
|
||||
if (UTF16_IS_TRAIL(c)) {
|
||||
if (U16_IS_TRAIL(c)) {
|
||||
if (ut->chunkOffset==0) {
|
||||
ut->pFuncs->access(ut, ut->chunkNativeStart, FALSE);
|
||||
}
|
||||
if (ut->chunkOffset>0) {
|
||||
UChar lead = ut->chunkContents[ut->chunkOffset-1];
|
||||
if (UTF16_IS_LEAD(lead)) {
|
||||
if (U16_IS_LEAD(lead)) {
|
||||
ut->chunkOffset--;
|
||||
}
|
||||
}
|
||||
@ -1768,9 +1771,9 @@ utext_strFromUTF8(UChar *dest,
|
||||
if(U_IS_BMP(ch)){
|
||||
*(pDest++)=(UChar)ch;
|
||||
}else{
|
||||
*(pDest++)=UTF16_LEAD(ch);
|
||||
*(pDest++)=U16_LEAD(ch);
|
||||
if(pDest<pDestLimit){
|
||||
*(pDest++)=UTF16_TRAIL(ch);
|
||||
*(pDest++)=U16_TRAIL(ch);
|
||||
}else{
|
||||
reqLength++;
|
||||
break;
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1999-2006, International Business Machines
|
||||
* Copyright (C) 1999-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
@ -23,6 +23,9 @@
|
||||
#endif
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/utf.h"
|
||||
#include "unicode/utf8.h"
|
||||
#include "unicode/utf_old.h"
|
||||
|
||||
/*
|
||||
* This table could be replaced on many machines by
|
||||
@ -107,11 +110,11 @@ utf8_errorValue[6]={
|
||||
U_CAPI UChar32 U_EXPORT2
|
||||
utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c, UBool strict) {
|
||||
int32_t i=*pi;
|
||||
uint8_t count=UTF8_COUNT_TRAIL_BYTES(c);
|
||||
uint8_t count=U8_COUNT_TRAIL_BYTES(c);
|
||||
if((i)+count<=(length)) {
|
||||
uint8_t trail, illegal=0;
|
||||
|
||||
UTF8_MASK_LEAD_BYTE((c), count);
|
||||
U8_MASK_LEAD_BYTE((c), count);
|
||||
/* count==0 for illegally leading trail bytes and the illegal bytes 0xfe and 0xff */
|
||||
switch(count) {
|
||||
/* each branch falls through to the next one */
|
||||
@ -161,12 +164,12 @@ utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c,
|
||||
|
||||
/* correct sequence - all trail bytes have (b7..b6)==(10)? */
|
||||
/* illegal is also set if count>=4 */
|
||||
if(illegal || (c)<utf8_minLegal[count] || (UTF_IS_SURROGATE(c) && strict!=-2)) {
|
||||
if(illegal || (c)<utf8_minLegal[count] || (U_IS_SURROGATE(c) && strict!=-2)) {
|
||||
/* error handling */
|
||||
uint8_t errorCount=count;
|
||||
/* don't go beyond this sequence */
|
||||
i=*pi;
|
||||
while(count>0 && UTF8_IS_TRAIL(s[i])) {
|
||||
while(count>0 && U8_IS_TRAIL(s[i])) {
|
||||
++(i);
|
||||
--count;
|
||||
}
|
||||
@ -175,7 +178,7 @@ utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c,
|
||||
} else {
|
||||
c=U_SENTINEL;
|
||||
}
|
||||
} else if((strict)>0 && UTF_IS_UNICODE_NONCHAR(c)) {
|
||||
} else if((strict)>0 && U_IS_UNICODE_NONCHAR(c)) {
|
||||
/* strict: forbid non-characters like U+fffe */
|
||||
c=utf8_errorValue[count];
|
||||
}
|
||||
@ -183,7 +186,7 @@ utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c,
|
||||
/* error handling */
|
||||
int32_t i0=i;
|
||||
/* don't just set (i)=(length) in case there is an illegal sequence */
|
||||
while((i)<(length) && UTF8_IS_TRAIL(s[i])) {
|
||||
while((i)<(length) && U8_IS_TRAIL(s[i])) {
|
||||
++(i);
|
||||
}
|
||||
if(strict>=0) {
|
||||
@ -265,14 +268,14 @@ utf8_prevCharSafeBody(const uint8_t *s, int32_t start, int32_t *pi, UChar32 c, U
|
||||
if((uint8_t)(b-0x80)<0x7e) { /* 0x80<=b<0xfe */
|
||||
if(b&0x40) {
|
||||
/* lead byte, this will always end the loop */
|
||||
uint8_t shouldCount=UTF8_COUNT_TRAIL_BYTES(b);
|
||||
uint8_t shouldCount=U8_COUNT_TRAIL_BYTES(b);
|
||||
|
||||
if(count==shouldCount) {
|
||||
/* set the new position */
|
||||
*pi=i;
|
||||
UTF8_MASK_LEAD_BYTE(b, count);
|
||||
U8_MASK_LEAD_BYTE(b, count);
|
||||
c|=(UChar32)b<<shift;
|
||||
if(count>=4 || c>0x10ffff || c<utf8_minLegal[count] || (UTF_IS_SURROGATE(c) && strict!=-2) || (strict>0 && UTF_IS_UNICODE_NONCHAR(c))) {
|
||||
if(count>=4 || c>0x10ffff || c<utf8_minLegal[count] || (U_IS_SURROGATE(c) && strict!=-2) || (strict>0 && U_IS_UNICODE_NONCHAR(c))) {
|
||||
/* illegal sequence or (strict and non-character) */
|
||||
if(count>=4) {
|
||||
count=3;
|
||||
@ -351,7 +354,7 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i) {
|
||||
if((uint8_t)(b-0x80)>=0x7e) { /* not 0x80<=b<0xfe */
|
||||
break;
|
||||
} else if(b>=0xc0) {
|
||||
if(UTF8_COUNT_TRAIL_BYTES(b)>=(i-I)) {
|
||||
if(U8_COUNT_TRAIL_BYTES(b)>=(i-I)) {
|
||||
return I;
|
||||
} else {
|
||||
break;
|
||||
|
@ -9,6 +9,7 @@
|
||||
*/
|
||||
|
||||
#include "unicode/unimatch.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "patternprops.h"
|
||||
#include "util.h"
|
||||
|
||||
@ -170,9 +171,9 @@ int32_t ICU_Utility::skipWhitespace(const UnicodeString& str, int32_t& pos,
|
||||
//? while (pos != stop &&
|
||||
//? PatternProps::isWhiteSpace(c = text.char32At(pos))) {
|
||||
//? if (isForward) {
|
||||
//? pos += UTF_CHAR_LENGTH(c);
|
||||
//? pos += U16_LENGTH(c);
|
||||
//? } else {
|
||||
//? pos -= UTF_CHAR_LENGTH(c);
|
||||
//? pos -= U16_LENGTH(c);
|
||||
//? }
|
||||
//? }
|
||||
//?
|
||||
@ -242,7 +243,7 @@ int32_t ICU_Utility::parsePattern(const UnicodeString& pat,
|
||||
// parse \s*
|
||||
if (cpat == 126 /*~*/) {
|
||||
if (PatternProps::isWhiteSpace(c)) {
|
||||
index += UTF_CHAR_LENGTH(c);
|
||||
index += U16_LENGTH(c);
|
||||
continue;
|
||||
} else {
|
||||
if (++ipat == pat.length()) {
|
||||
@ -254,8 +255,8 @@ int32_t ICU_Utility::parsePattern(const UnicodeString& pat,
|
||||
|
||||
// parse literal
|
||||
else if (c == cpat) {
|
||||
index += UTF_CHAR_LENGTH(c);
|
||||
ipat += UTF_CHAR_LENGTH(cpat);
|
||||
index += U16_LENGTH(c);
|
||||
ipat += U16_LENGTH(cpat);
|
||||
if (ipat == pat.length()) {
|
||||
return index; // success; c parsed
|
||||
}
|
||||
|
@ -9,6 +9,7 @@
|
||||
*/
|
||||
|
||||
#include "unicode/uchar.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "patternprops.h"
|
||||
#include "util.h"
|
||||
|
||||
@ -159,7 +160,7 @@ UnicodeString ICU_Utility::parseUnicodeIdentifier(const UnicodeString& str, int3
|
||||
break;
|
||||
}
|
||||
}
|
||||
p += UTF_CHAR_LENGTH(ch);
|
||||
p += U16_LENGTH(ch);
|
||||
}
|
||||
pos = p;
|
||||
return buf;
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2001-2008, International Business Machines
|
||||
* Copyright (C) 2001-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
@ -18,6 +18,7 @@
|
||||
#define __UTRIE_H__
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "udataswp.h"
|
||||
|
||||
U_CDECL_BEGIN
|
||||
@ -210,7 +211,7 @@ typedef struct UTrie UTrie;
|
||||
(result)=_UTRIE_GET_FROM_BMP(trie, data, c32); \
|
||||
} else if((uint32_t)(c32)<=0x10ffff) { \
|
||||
/* supplementary code point */ \
|
||||
UChar __lead16=UTF16_LEAD(c32); \
|
||||
UChar __lead16=U16_LEAD(c32); \
|
||||
_UTRIE_GET_FROM_PAIR(trie, data, __lead16, c32, result, resultType); \
|
||||
} else { \
|
||||
/* out of range */ \
|
||||
@ -220,10 +221,10 @@ typedef struct UTrie UTrie;
|
||||
/** Internal next-post-increment: get the next code point (c, c2) and its data */
|
||||
#define _UTRIE_NEXT(trie, data, src, limit, c, c2, result, resultType) { \
|
||||
(c)=*(src)++; \
|
||||
if(!UTF_IS_LEAD(c)) { \
|
||||
if(!U16_IS_LEAD(c)) { \
|
||||
(c2)=0; \
|
||||
(result)=_UTRIE_GET_RAW((trie), data, 0, (c)); \
|
||||
} else if((src)!=(limit) && UTF_IS_TRAIL((c2)=*(src))) { \
|
||||
} else if((src)!=(limit) && U16_IS_TRAIL((c2)=*(src))) { \
|
||||
++(src); \
|
||||
_UTRIE_GET_FROM_PAIR((trie), data, (c), (c2), (result), resultType); \
|
||||
} else { \
|
||||
@ -236,12 +237,12 @@ typedef struct UTrie UTrie;
|
||||
/** Internal previous: get the previous code point (c, c2) and its data */
|
||||
#define _UTRIE_PREVIOUS(trie, data, start, src, c, c2, result, resultType) { \
|
||||
(c)=*--(src); \
|
||||
if(!UTF_IS_SURROGATE(c)) { \
|
||||
if(!U16_IS_SURROGATE(c)) { \
|
||||
(c2)=0; \
|
||||
(result)=_UTRIE_GET_RAW((trie), data, 0, (c)); \
|
||||
} else if(!UTF_IS_SURROGATE_FIRST(c)) { \
|
||||
} else if(!U16_IS_SURROGATE_LEAD(c)) { \
|
||||
/* trail surrogate */ \
|
||||
if((start)!=(src) && UTF_IS_LEAD((c2)=*((src)-1))) { \
|
||||
if((start)!=(src) && U16_IS_LEAD((c2)=*((src)-1))) { \
|
||||
--(src); \
|
||||
(result)=(c); (c)=(c2); (c2)=(UChar)(result); /* swap c, c2 */ \
|
||||
_UTRIE_GET_FROM_PAIR((trie), data, (c), (c2), (result), resultType); \
|
||||
|
@ -27,6 +27,9 @@
|
||||
#endif
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/utf.h"
|
||||
#include "unicode/utf8.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "cmemory.h"
|
||||
#include "utrie2.h"
|
||||
#include "utrie2_impl.h"
|
||||
|
@ -620,6 +620,7 @@ U_CDECL_END
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
#include "unicode/utf.h"
|
||||
#include "mutex.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
@ -20,6 +20,7 @@
|
||||
#include "unicode/normalizer2.h"
|
||||
#include "unicode/uscript.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "cmemory.h"
|
||||
#include "cstring.h"
|
||||
#include "punycode.h"
|
||||
|
@ -16,10 +16,11 @@
|
||||
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/bytestream.h"
|
||||
|
||||
#if !UCONFIG_NO_COLLATION
|
||||
|
||||
#include "unicode/bytestream.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "bocsu.h"
|
||||
|
||||
/*
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2001-2010, International Business Machines
|
||||
* Copyright (C) 2001-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
@ -22,6 +22,8 @@
|
||||
|
||||
#include "unicode/uchar.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "unicode/utf.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "tolowtrn.h"
|
||||
#include "ucase.h"
|
||||
#include "cpputils.h"
|
||||
|
@ -53,6 +53,7 @@
|
||||
#include "unicode/curramt.h"
|
||||
#include "unicode/currpinf.h"
|
||||
#include "unicode/plurrule.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "uresimp.h"
|
||||
#include "ucurrimp.h"
|
||||
#include "charstr.h"
|
||||
|
@ -12,6 +12,7 @@
|
||||
|
||||
#if !UCONFIG_NO_TRANSLITERATION
|
||||
|
||||
#include "unicode/utf16.h"
|
||||
#include "esctrn.h"
|
||||
#include "util.h"
|
||||
|
||||
@ -140,7 +141,7 @@ void EscapeTransliterator::handleTransliterate(Replaceable& text,
|
||||
|
||||
while (start < limit) {
|
||||
int32_t c = grokSupplementals ? text.char32At(start) : text.charAt(start);
|
||||
int32_t charLen = grokSupplementals ? UTF_CHAR_LENGTH(c) : 1;
|
||||
int32_t charLen = grokSupplementals ? U16_LENGTH(c) : 1;
|
||||
|
||||
if ((c & 0xFFFF0000) != 0 && supplementalHandler != NULL) {
|
||||
buf.truncate(0);
|
||||
|
@ -15,6 +15,7 @@
|
||||
#include "unicode/unifilt.h"
|
||||
#include "unicode/uchar.h"
|
||||
#include "unicode/uniset.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "cmemory.h"
|
||||
#include "name2uni.h"
|
||||
#include "patternprops.h"
|
||||
@ -194,7 +195,7 @@ void NameUnicodeTransliterator::handleTransliterate(Replaceable& text, UTransPos
|
||||
if (U_SUCCESS(status)) {
|
||||
// Lookup succeeded
|
||||
|
||||
// assert(UTF_CHAR_LENGTH(CLOSE_DELIM) == 1);
|
||||
// assert(U16_LENGTH(CLOSE_DELIM) == 1);
|
||||
cursor++; // advance over CLOSE_DELIM
|
||||
|
||||
str.truncate(0);
|
||||
@ -238,7 +239,7 @@ void NameUnicodeTransliterator::handleTransliterate(Replaceable& text, UTransPos
|
||||
break;
|
||||
}
|
||||
|
||||
cursor += UTF_CHAR_LENGTH(c);
|
||||
cursor += U16_LENGTH(c);
|
||||
}
|
||||
|
||||
offsets.contextLimit += limit - offsets.limit;
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 2001-2010, International Business Machines
|
||||
* Copyright (C) 2001-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* Date Name Description
|
||||
@ -13,6 +13,7 @@
|
||||
#if !UCONFIG_NO_TRANSLITERATION
|
||||
|
||||
#include "unicode/normalizer2.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "cstring.h"
|
||||
#include "nortrans.h"
|
||||
|
||||
|
@ -19,6 +19,7 @@
|
||||
#include "unicode/uchar.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "unicode/uniset.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "cstring.h"
|
||||
#include "funcrepl.h"
|
||||
#include "hash.h"
|
||||
@ -793,7 +794,7 @@ void RuleHalf::removeContext() {
|
||||
UBool RuleHalf::isValidOutput(TransliteratorParser& transParser) {
|
||||
for (int32_t i=0; i<text.length(); ) {
|
||||
UChar32 c = text.char32At(i);
|
||||
i += UTF_CHAR_LENGTH(c);
|
||||
i += U16_LENGTH(c);
|
||||
if (!transParser.parseData->isReplacer(c)) {
|
||||
return FALSE;
|
||||
}
|
||||
@ -808,7 +809,7 @@ UBool RuleHalf::isValidOutput(TransliteratorParser& transParser) {
|
||||
UBool RuleHalf::isValidInput(TransliteratorParser& transParser) {
|
||||
for (int32_t i=0; i<text.length(); ) {
|
||||
UChar32 c = text.char32At(i);
|
||||
i += UTF_CHAR_LENGTH(c);
|
||||
i += U16_LENGTH(c);
|
||||
if (!transParser.parseData->isMatcher(c)) {
|
||||
return FALSE;
|
||||
}
|
||||
|
@ -15,6 +15,7 @@
|
||||
#include "unicode/rep.h"
|
||||
#include "unicode/unifilt.h"
|
||||
#include "unicode/uniset.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "rbt_rule.h"
|
||||
#include "rbt_data.h"
|
||||
#include "cmemory.h"
|
||||
@ -315,13 +316,13 @@ UBool TransliterationRule::masks(const TransliterationRule& r2) const {
|
||||
|
||||
static inline int32_t posBefore(const Replaceable& str, int32_t pos) {
|
||||
return (pos > 0) ?
|
||||
pos - UTF_CHAR_LENGTH(str.char32At(pos-1)) :
|
||||
pos - U16_LENGTH(str.char32At(pos-1)) :
|
||||
pos - 1;
|
||||
}
|
||||
|
||||
static inline int32_t posAfter(const Replaceable& str, int32_t pos) {
|
||||
return (pos >= 0 && pos < str.length()) ?
|
||||
pos + UTF_CHAR_LENGTH(str.char32At(pos)) :
|
||||
pos + U16_LENGTH(str.char32At(pos)) :
|
||||
pos + 1;
|
||||
}
|
||||
|
||||
@ -531,7 +532,7 @@ void TransliterationRule::addSourceSetTo(UnicodeSet& toUnionTo) const {
|
||||
int32_t limit = anteContextLength + keyLength;
|
||||
for (int32_t i=anteContextLength; i<limit; ) {
|
||||
UChar32 ch = pattern.char32At(i);
|
||||
i += UTF_CHAR_LENGTH(ch);
|
||||
i += U16_LENGTH(ch);
|
||||
const UnicodeMatcher* matcher = data->lookupMatcher(ch);
|
||||
if (matcher == NULL) {
|
||||
toUnionTo.add(ch);
|
||||
|
@ -14,6 +14,7 @@
|
||||
|
||||
#include "unicode/unistr.h"
|
||||
#include "unicode/uniset.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "rbt_set.h"
|
||||
#include "rbt_rule.h"
|
||||
#include "cmemory.h"
|
||||
@ -90,7 +91,7 @@ UnicodeString& _escape(const UnicodeString &source,
|
||||
UnicodeString &target) {
|
||||
for (int32_t i = 0; i < source.length(); ) {
|
||||
UChar32 ch = source.char32At(i);
|
||||
i += UTF_CHAR_LENGTH(ch);
|
||||
i += U16_LENGTH(ch);
|
||||
if (ch < 0x09 || (ch > 0x0A && ch < 0x20)|| ch > 0x7E) {
|
||||
if (ch <= 0xFFFF) {
|
||||
target += "\\u";
|
||||
@ -416,7 +417,7 @@ UBool TransliterationRuleSet::transliterate(Replaceable& text,
|
||||
}
|
||||
}
|
||||
// No match or partial match from any rule
|
||||
pos.start += UTF_CHAR_LENGTH(text.char32At(pos.start));
|
||||
pos.start += U16_LENGTH(text.char32At(pos.start));
|
||||
_debugOut("no match", NULL, text, pos);
|
||||
return TRUE;
|
||||
}
|
||||
|
@ -21,6 +21,8 @@
|
||||
#include "unicode/parsepos.h"
|
||||
#include "unicode/parseerr.h"
|
||||
#include "unicode/regex.h"
|
||||
#include "unicode/utf.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "patternprops.h"
|
||||
#include "putilimp.h"
|
||||
#include "cmemory.h"
|
||||
|
@ -1,6 +1,6 @@
|
||||
/********************************************************************
|
||||
* COPYRIGHT:
|
||||
* Copyright (c) 2008-2010, International Business Machines Corporation and
|
||||
* Copyright (c) 2008-2011, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
********************************************************************/
|
||||
//
|
||||
@ -9,6 +9,7 @@
|
||||
// This file contains utility code for supporting UText in the regular expression engine.
|
||||
//
|
||||
|
||||
#include "unicode/utf.h"
|
||||
#include "regextxt.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
@ -28,7 +29,7 @@ uregex_utext_unescape_charAt(int32_t offset, void *ct) {
|
||||
c = UTEXT_NEXT32(context->text);
|
||||
context->lastOffset = offset;
|
||||
}
|
||||
|
||||
|
||||
// !!!: Doesn't handle characters outside BMP
|
||||
if (U_IS_BMP(c)) {
|
||||
return (UChar)c;
|
||||
|
@ -19,6 +19,8 @@
|
||||
#include "unicode/uchar.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "unicode/rbbi.h"
|
||||
#include "unicode/utf.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "uassert.h"
|
||||
#include "cmemory.h"
|
||||
#include "uvector.h"
|
||||
|
@ -45,6 +45,7 @@
|
||||
#include "unicode/basictz.h"
|
||||
#include "unicode/simpletz.h"
|
||||
#include "unicode/rbtz.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "unicode/vtzone.h"
|
||||
#include "olsontz.h"
|
||||
#include "patternprops.h"
|
||||
@ -2562,7 +2563,7 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC
|
||||
if (!u_isUWhiteSpace(c) /*||*/ && !PatternProps::isWhiteSpace(c)) {
|
||||
break;
|
||||
}
|
||||
start += UTF_CHAR_LENGTH(c);
|
||||
start += U16_LENGTH(c);
|
||||
}
|
||||
pos.setIndex(start);
|
||||
|
||||
|
@ -16,6 +16,7 @@
|
||||
#include "rbt_data.h"
|
||||
#include "util.h"
|
||||
#include "unicode/uniset.h"
|
||||
#include "unicode/utf16.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
@ -194,7 +195,7 @@ UBool StringMatcher::matchesIndexValue(uint8_t v) const {
|
||||
*/
|
||||
void StringMatcher::addMatchSetTo(UnicodeSet& toUnionTo) const {
|
||||
UChar32 ch;
|
||||
for (int32_t i=0; i<pattern.length(); i+=UTF_CHAR_LENGTH(ch)) {
|
||||
for (int32_t i=0; i<pattern.length(); i+=U16_LENGTH(ch)) {
|
||||
ch = pattern.char32At(i);
|
||||
const UnicodeMatcher* matcher = data->lookupMatcher(ch);
|
||||
if (matcher == NULL) {
|
||||
@ -276,8 +277,8 @@ void StringMatcher::setData(const TransliterationRuleData* d) {
|
||||
if (f != NULL) {
|
||||
f->setData(data);
|
||||
}
|
||||
i += UTF_CHAR_LENGTH(c);
|
||||
}
|
||||
i += U16_LENGTH(c);
|
||||
}
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
@ -12,10 +12,11 @@
|
||||
|
||||
#if !UCONFIG_NO_TRANSLITERATION
|
||||
|
||||
#include "unicode/uniset.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "strrepl.h"
|
||||
#include "rbt_data.h"
|
||||
#include "util.h"
|
||||
#include "unicode/uniset.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
@ -142,7 +143,7 @@ int32_t StringReplacer::replace(Replaceable& text,
|
||||
int32_t tempStart = text.length(); // start of temp buffer
|
||||
int32_t destStart = tempStart; // copy new text to here
|
||||
if (start > 0) {
|
||||
int32_t len = UTF_CHAR_LENGTH(text.char32At(start-1));
|
||||
int32_t len = U16_LENGTH(text.char32At(start-1));
|
||||
text.copy(start-len, start, tempStart);
|
||||
destStart += len;
|
||||
} else {
|
||||
@ -176,7 +177,7 @@ int32_t StringReplacer::replace(Replaceable& text,
|
||||
int32_t len = r->replace(text, destLimit, destLimit, cursor);
|
||||
destLimit += len;
|
||||
}
|
||||
oOutput += UTF_CHAR_LENGTH(c);
|
||||
oOutput += U16_LENGTH(c);
|
||||
}
|
||||
// Insert any accumulated straight text.
|
||||
if (buf.length() > 0) {
|
||||
@ -208,7 +209,7 @@ int32_t StringReplacer::replace(Replaceable& text,
|
||||
int32_t n = cursorPos;
|
||||
// Outside the output string, cursorPos counts code points
|
||||
while (n < 0 && newStart > 0) {
|
||||
newStart -= UTF_CHAR_LENGTH(text.char32At(newStart-1));
|
||||
newStart -= U16_LENGTH(text.char32At(newStart-1));
|
||||
++n;
|
||||
}
|
||||
newStart += n;
|
||||
@ -217,7 +218,7 @@ int32_t StringReplacer::replace(Replaceable& text,
|
||||
int32_t n = cursorPos - output.length();
|
||||
// Outside the output string, cursorPos counts code points
|
||||
while (n > 0 && newStart < text.length()) {
|
||||
newStart += UTF_CHAR_LENGTH(text.char32At(newStart));
|
||||
newStart += U16_LENGTH(text.char32At(newStart));
|
||||
--n;
|
||||
}
|
||||
newStart += n;
|
||||
@ -292,7 +293,7 @@ UnicodeString& StringReplacer::toReplacerPattern(UnicodeString& rule,
|
||||
*/
|
||||
void StringReplacer::addReplacementSetTo(UnicodeSet& toUnionTo) const {
|
||||
UChar32 ch;
|
||||
for (int32_t i=0; i<output.length(); i+=UTF_CHAR_LENGTH(ch)) {
|
||||
for (int32_t i=0; i<output.length(); i+=U16_LENGTH(ch)) {
|
||||
ch = output.char32At(i);
|
||||
UnicodeReplacer* r = data->lookupReplacer(ch);
|
||||
if (r == NULL) {
|
||||
@ -315,7 +316,7 @@ void StringReplacer::setData(const TransliterationRuleData* d) {
|
||||
if (f != NULL) {
|
||||
f->setData(data);
|
||||
}
|
||||
i += UTF_CHAR_LENGTH(c);
|
||||
i += U16_LENGTH(c);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 2001-2007, International Business Machines
|
||||
* Copyright (C) 2001-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* Date Name Description
|
||||
@ -15,6 +15,7 @@
|
||||
#include "unicode/uchar.h"
|
||||
#include "unicode/uniset.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "titletrn.h"
|
||||
#include "umutex.h"
|
||||
#include "ucase.h"
|
||||
|
@ -24,6 +24,7 @@
|
||||
#include "unicode/uniset.h"
|
||||
#include "unicode/uscript.h"
|
||||
#include "unicode/strenum.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "cpdtrans.h"
|
||||
#include "nultrans.h"
|
||||
#include "rbt_data.h"
|
||||
@ -368,7 +369,7 @@ void Transliterator::_transliterate(Replaceable& text,
|
||||
}
|
||||
|
||||
if (index.limit > 0 &&
|
||||
UTF_IS_LEAD(text.charAt(index.limit - 1))) {
|
||||
U16_IS_LEAD(text.charAt(index.limit - 1))) {
|
||||
// Oops, there is a dangling lead surrogate in the buffer.
|
||||
// This will break most transliterators, since they will
|
||||
// assume it is part of a pair. Don't transliterate until
|
||||
@ -407,7 +408,7 @@ void Transliterator::_transliterate(Replaceable& text,
|
||||
int32_t n = getMaximumContextLength();
|
||||
while (newCS > originalStart && n-- > 0) {
|
||||
--newCS;
|
||||
newCS -= UTF_CHAR_LENGTH(text.char32At(newCS)) - 1;
|
||||
newCS -= U16_LENGTH(text.char32At(newCS)) - 1;
|
||||
}
|
||||
index.contextStart = uprv_max(newCS, originalStart);
|
||||
#endif
|
||||
@ -478,14 +479,14 @@ void Transliterator::filteredTransliterate(Replaceable& text,
|
||||
UChar32 c;
|
||||
while (index.start < globalLimit &&
|
||||
!filter->contains(c=text.char32At(index.start))) {
|
||||
index.start += UTF_CHAR_LENGTH(c);
|
||||
index.start += U16_LENGTH(c);
|
||||
}
|
||||
|
||||
// Find the end of this run of unfiltered chars
|
||||
index.limit = index.start;
|
||||
while (index.limit < globalLimit &&
|
||||
filter->contains(c=text.char32At(index.limit))) {
|
||||
index.limit += UTF_CHAR_LENGTH(c);
|
||||
index.limit += U16_LENGTH(c);
|
||||
}
|
||||
}
|
||||
|
||||
@ -568,8 +569,7 @@ void Transliterator::filteredTransliterate(Replaceable& text,
|
||||
// transliterations and commit complete transliterations.
|
||||
for (;;) {
|
||||
// Length of additional code point, either one or two
|
||||
int32_t charLength =
|
||||
UTF_CHAR_LENGTH(text.char32At(passLimit));
|
||||
int32_t charLength = U16_LENGTH(text.char32At(passLimit));
|
||||
passLimit += charLength;
|
||||
if (passLimit > runLimit) {
|
||||
break;
|
||||
@ -1144,7 +1144,7 @@ UnicodeString& Transliterator::toRules(UnicodeString& rulesSource,
|
||||
if (!ICU_Utility::escapeUnprintable(rulesSource, c)) {
|
||||
rulesSource.append(c);
|
||||
}
|
||||
i += UTF_CHAR_LENGTH(c);
|
||||
i += U16_LENGTH(c);
|
||||
}
|
||||
} else {
|
||||
rulesSource = getID();
|
||||
|
@ -27,6 +27,7 @@
|
||||
#include "unicode/uniset.h"
|
||||
#include "unicode/uscript.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "normalizer2impl.h"
|
||||
#include "ucol_bld.h"
|
||||
#include "ucol_elm.h"
|
||||
@ -982,7 +983,7 @@ _processUCACompleteIgnorables(const void *context, UChar32 start, UChar32 limit,
|
||||
el.cPoints = el.uchars;
|
||||
|
||||
el.cSize = 0;
|
||||
UTF_APPEND_CHAR(el.uchars, el.cSize, 1024, start);
|
||||
U16_APPEND_UNSAFE(el.uchars, el.cSize, start);
|
||||
|
||||
el.noOfCEs = 1;
|
||||
el.CEs[0] = 0;
|
||||
|
@ -31,6 +31,7 @@
|
||||
#include "unicode/unistr.h"
|
||||
#include "unicode/ucoleitr.h"
|
||||
#include "unicode/normlzr.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "normalizer2impl.h"
|
||||
#include "ucol_elm.h"
|
||||
#include "ucol_tok.h"
|
||||
@ -767,7 +768,7 @@ static void uprv_uca_unsafeCPAddCCNZ(tempUCATable *t, UErrorCode *status) {
|
||||
for (c=0; c<0xffff; c++) {
|
||||
fcd = unorm_getFCD16(fcdTrieIndex, c);
|
||||
if (fcd >= 0x100 || // if the leading combining class(c) > 0 ||
|
||||
(UTF_IS_LEAD(c) && fcd != 0)) {// c is a leading surrogate with some FCD data
|
||||
(U16_IS_LEAD(c) && fcd != 0)) {// c is a leading surrogate with some FCD data
|
||||
if (buildCMTable) {
|
||||
uint32_t cClass = fcd & 0xff;
|
||||
//uint32_t temp=(cClass<<8)+index[cClass];
|
||||
@ -845,7 +846,7 @@ static uint32_t uprv_uca_addPrefix(tempUCATable *t, uint32_t CE,
|
||||
for (j = 1; j<element->prefixSize; j++) { /* First add NFD prefix chars to unsafe CP hash table */
|
||||
// Unless it is a trail surrogate, which is handled algoritmically and
|
||||
// shouldn't take up space in the table.
|
||||
if(!(UTF_IS_TRAIL(element->prefix[j]))) {
|
||||
if(!(U16_IS_TRAIL(element->prefix[j]))) {
|
||||
unsafeCPSet(t->unsafeCP, element->prefix[j]);
|
||||
}
|
||||
}
|
||||
@ -868,13 +869,13 @@ static uint32_t uprv_uca_addPrefix(tempUCATable *t, uint32_t CE,
|
||||
#endif
|
||||
|
||||
// the first codepoint is also unsafe, as it forms a 'contraction' with the prefix
|
||||
if(!(UTF_IS_TRAIL(element->cPoints[0]))) {
|
||||
if(!(U16_IS_TRAIL(element->cPoints[0]))) {
|
||||
unsafeCPSet(t->unsafeCP, element->cPoints[0]);
|
||||
}
|
||||
|
||||
// Maybe we need this... To handle prefixes completely in the forward direction...
|
||||
//if(element->cSize == 1) {
|
||||
// if(!(UTF_IS_TRAIL(element->cPoints[0]))) {
|
||||
// if(!(U16_IS_TRAIL(element->cPoints[0]))) {
|
||||
// ContrEndCPSet(t->contrEndCP, element->cPoints[0]);
|
||||
// }
|
||||
//}
|
||||
@ -885,12 +886,12 @@ static uint32_t uprv_uca_addPrefix(tempUCATable *t, uint32_t CE,
|
||||
// Add the last char of the contraction to the contraction-end hash table.
|
||||
// unless it is a trail surrogate, which is handled algorithmically and
|
||||
// shouldn't be in the table
|
||||
if(!(UTF_IS_TRAIL(element->cPoints[element->cSize -1]))) {
|
||||
if(!(U16_IS_TRAIL(element->cPoints[element->cSize -1]))) {
|
||||
ContrEndCPSet(t->contrEndCP, element->cPoints[element->cSize -1]);
|
||||
}
|
||||
|
||||
// First we need to check if contractions starts with a surrogate
|
||||
UTF_NEXT_CHAR(element->cPoints, cpsize, element->cSize, cp);
|
||||
U16_NEXT(element->cPoints, cpsize, element->cSize, cp);
|
||||
|
||||
// If there are any Jamos in the contraction, we should turn on special
|
||||
// processing for Jamos
|
||||
@ -943,21 +944,21 @@ static uint32_t uprv_uca_addContraction(tempUCATable *t, uint32_t CE,
|
||||
contractions->currentTag = CONTRACTION_TAG;
|
||||
|
||||
// First we need to check if contractions starts with a surrogate
|
||||
UTF_NEXT_CHAR(element->cPoints, cpsize, element->cSize, cp);
|
||||
U16_NEXT(element->cPoints, cpsize, element->cSize, cp);
|
||||
|
||||
if(cpsize<element->cSize) { // This is a real contraction, if there are other characters after the first
|
||||
uint32_t j = 0;
|
||||
for (j=1; j<element->cSize; j++) { /* First add contraction chars to unsafe CP hash table */
|
||||
// Unless it is a trail surrogate, which is handled algoritmically and
|
||||
// shouldn't take up space in the table.
|
||||
if(!(UTF_IS_TRAIL(element->cPoints[j]))) {
|
||||
if(!(U16_IS_TRAIL(element->cPoints[j]))) {
|
||||
unsafeCPSet(t->unsafeCP, element->cPoints[j]);
|
||||
}
|
||||
}
|
||||
// Add the last char of the contraction to the contraction-end hash table.
|
||||
// unless it is a trail surrogate, which is handled algorithmically and
|
||||
// shouldn't be in the table
|
||||
if(!(UTF_IS_TRAIL(element->cPoints[element->cSize -1]))) {
|
||||
if(!(U16_IS_TRAIL(element->cPoints[element->cSize -1]))) {
|
||||
ContrEndCPSet(t->contrEndCP, element->cPoints[element->cSize -1]);
|
||||
}
|
||||
|
||||
@ -1065,7 +1066,7 @@ static uint32_t uprv_uca_finalizeAddition(tempUCATable *t, UCAElements *element,
|
||||
uint32_t i = 0;
|
||||
if(element->mapCE == 0) {
|
||||
for(i = 0; i < element->cSize; i++) {
|
||||
if(!UTF_IS_TRAIL(element->cPoints[i])) {
|
||||
if(!U16_IS_TRAIL(element->cPoints[i])) {
|
||||
unsafeCPSet(t->unsafeCP, element->cPoints[i]);
|
||||
}
|
||||
}
|
||||
@ -1074,7 +1075,7 @@ static uint32_t uprv_uca_finalizeAddition(tempUCATable *t, UCAElements *element,
|
||||
uint32_t i = 0;
|
||||
UChar32 cp;
|
||||
|
||||
UTF_NEXT_CHAR(element->cPoints, i, element->cSize, cp);
|
||||
U16_NEXT(element->cPoints, i, element->cSize, cp);
|
||||
/*CE = ucmpe32_get(t->mapping, cp);*/
|
||||
CE = utrie_get32(t->mapping, cp, NULL);
|
||||
|
||||
@ -1286,7 +1287,7 @@ uprv_uca_addAnElement(tempUCATable *t, UCAElements *element, UErrorCode *status)
|
||||
// We need to use the canonical iterator here
|
||||
// the way we do it is to generate the canonically equivalent strings
|
||||
// for the contraction and then add the sequences that pass FCD check
|
||||
if(element->cSize > 1 && !(element->cSize==2 && UTF16_IS_LEAD(element->cPoints[0]) && UTF16_IS_TRAIL(element->cPoints[1]))) { // this is a contraction, we should check whether a composed form should also be included
|
||||
if(element->cSize > 1 && !(element->cSize==2 && U16_IS_LEAD(element->cPoints[0]) && U16_IS_TRAIL(element->cPoints[1]))) { // this is a contraction, we should check whether a composed form should also be included
|
||||
UnicodeString source(element->cPoints, element->cSize);
|
||||
CanonicalIterator it(source, *status);
|
||||
source = it.next();
|
||||
@ -1406,7 +1407,7 @@ UBool enumRange(const void *context, UChar32 start, UChar32 limit, uint32_t valu
|
||||
if(start<0x10000) {
|
||||
fprintf(stdout, "%08X, %08X, %08X\n", start, limit, value);
|
||||
} else {
|
||||
fprintf(stdout, "%08X=%04X %04X, %08X=%04X %04X, %08X\n", start, UTF16_LEAD(start), UTF16_TRAIL(start), limit, UTF16_LEAD(limit), UTF16_TRAIL(limit), value);
|
||||
fprintf(stdout, "%08X=%04X %04X, %08X=%04X %04X, %08X\n", start, U16_LEAD(start), U16_TRAIL(start), limit, U16_LEAD(limit), U16_TRAIL(limit), value);
|
||||
}
|
||||
return TRUE;
|
||||
}
|
||||
|
@ -27,6 +27,9 @@
|
||||
#define UCOL_IMP_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#ifdef __cplusplus
|
||||
# include "unicode/utf16.h"
|
||||
#endif
|
||||
|
||||
#define UCA_DATA_TYPE "icu"
|
||||
#define UCA_DATA_NAME "ucadata"
|
||||
@ -1104,7 +1107,7 @@ static inline UBool ucol_unsafeCP(UChar c, const UCollator *coll) {
|
||||
|
||||
hash = c;
|
||||
if (hash >= UCOL_UNSAFECP_TABLE_SIZE*8) {
|
||||
if(UTF_IS_SURROGATE(c)) {
|
||||
if(U16_IS_SURROGATE(c)) {
|
||||
/* Lead or trail surrogate */
|
||||
/* These are always considered unsafe. */
|
||||
return TRUE;
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
**********************************************************************
|
||||
* Copyright (c) 2001-2008, International Business Machines
|
||||
* Copyright (c) 2001-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* Date Name Description
|
||||
@ -13,6 +13,7 @@
|
||||
#if !UCONFIG_NO_TRANSLITERATION
|
||||
|
||||
#include "unicode/uchar.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "unesctrn.h"
|
||||
#include "util.h"
|
||||
|
||||
@ -229,7 +230,7 @@ void UnescapeTransliterator::handleTransliterate(Replaceable& text, UTransPositi
|
||||
if (digit < 0) {
|
||||
break;
|
||||
}
|
||||
s += UTF_CHAR_LENGTH(ch);
|
||||
s += U16_LENGTH(ch);
|
||||
u = (u * radix) + digit;
|
||||
if (++digitCount == maxDigits) {
|
||||
break;
|
||||
@ -273,7 +274,7 @@ void UnescapeTransliterator::handleTransliterate(Replaceable& text, UTransPositi
|
||||
}
|
||||
|
||||
if (start < limit) {
|
||||
start += UTF_CHAR_LENGTH(text.char32At(start));
|
||||
start += U16_LENGTH(text.char32At(start));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 2001-2007, International Business Machines
|
||||
* Copyright (C) 2001-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* Date Name Description
|
||||
@ -14,6 +14,7 @@
|
||||
|
||||
#include "unicode/unifilt.h"
|
||||
#include "unicode/uchar.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "uni2name.h"
|
||||
#include "cstring.h"
|
||||
#include "cmemory.h"
|
||||
@ -94,7 +95,7 @@ void UnicodeNameTransliterator::handleTransliterate(Replaceable& text, UTransPos
|
||||
|
||||
while (cursor < limit) {
|
||||
UChar32 c = text.char32At(cursor);
|
||||
int32_t clen = UTF_CHAR_LENGTH(c);
|
||||
int32_t clen = U16_LENGTH(c);
|
||||
status = U_ZERO_ERROR;
|
||||
if ((len = u_charName(c, U_EXTENDED_CHAR_NAME, buf, maxLen, &status)) >0 && !U_FAILURE(status)) {
|
||||
str.truncate(OPEN_DELIM_LEN);
|
||||
|
@ -16,6 +16,7 @@
|
||||
#include "unicode/ustring.h"
|
||||
#include "unicode/uchar.h"
|
||||
#include "unicode/uobject.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "umutex.h"
|
||||
#include "uassert.h"
|
||||
#include "cmemory.h"
|
||||
|
@ -14,6 +14,7 @@
|
||||
#include "unicode/usearch.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "unicode/uchar.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "normalizer2impl.h"
|
||||
#include "ucol_imp.h"
|
||||
#include "usrchimp.h"
|
||||
@ -459,7 +460,7 @@ inline int16_t initializePattern(UStringSearch *strsrch, UErrorCode *status)
|
||||
pattern->hasPrefixAccents = getFCD(patterntext, &index, length) >>
|
||||
SECOND_LAST_BYTE_SHIFT_;
|
||||
index = length;
|
||||
UTF_BACK_1(patterntext, 0, index);
|
||||
U16_BACK_1(patterntext, 0, index);
|
||||
pattern->hasSuffixAccents = getFCD(patterntext, &index, length) &
|
||||
LAST_BYTE_MASK_;
|
||||
}
|
||||
@ -717,7 +718,7 @@ inline int32_t getNextUStringSearchBaseOffset(UStringSearch *strsrch,
|
||||
textoffset < textlength) {
|
||||
int32_t temp = textoffset;
|
||||
const UChar *text = strsrch->search->text;
|
||||
UTF_BACK_1(text, 0, temp);
|
||||
U16_BACK_1(text, 0, temp);
|
||||
if (getFCD(text, &temp, textlength) & LAST_BYTE_MASK_) {
|
||||
return getNextBaseOffset(text, textoffset, textlength);
|
||||
}
|
||||
@ -847,7 +848,7 @@ UBool checkExtraMatchAccents(const UStringSearch *strsrch, int32_t start,
|
||||
int32_t offset = 0;
|
||||
const UChar *text = strsrch->search->text + start;
|
||||
|
||||
UTF_FWD_1(text, offset, length);
|
||||
U16_FWD_1(text, offset, length);
|
||||
// we are only concerned with the first composite character
|
||||
if (unorm_quickCheck(text, offset, UNORM_NFD, status) == UNORM_NO) {
|
||||
int32_t safeoffset = getNextSafeOffset(strsrch->collator,
|
||||
@ -893,7 +894,7 @@ UBool checkExtraMatchAccents(const UStringSearch *strsrch, int32_t start,
|
||||
ce = ucol_next(coleiter, status);
|
||||
}
|
||||
UChar32 codepoint;
|
||||
UTF_PREV_CHAR(norm, 0, offset, codepoint);
|
||||
U16_PREV(norm, 0, offset, codepoint);
|
||||
result = !ignorable && (u_getCombiningClass(codepoint) != 0);
|
||||
|
||||
if (norm != buffer) {
|
||||
@ -975,7 +976,7 @@ UBool hasAccentsBeforeMatch(const UStringSearch *strsrch, int32_t start,
|
||||
}
|
||||
if (start > 0) {
|
||||
temp = start;
|
||||
UTF_BACK_1(strsrch->search->text, 0, temp);
|
||||
U16_BACK_1(strsrch->search->text, 0, temp);
|
||||
if (getFCD(strsrch->search->text, &temp,
|
||||
strsrch->search->textLength) & LAST_BYTE_MASK_) {
|
||||
setColEIterOffset(coleiter, start);
|
||||
@ -1015,7 +1016,7 @@ UBool hasAccentsAfterMatch(const UStringSearch *strsrch, int32_t start,
|
||||
const UChar *text = strsrch->search->text;
|
||||
int32_t temp = end;
|
||||
int32_t textlength = strsrch->search->textLength;
|
||||
UTF_BACK_1(text, 0, temp);
|
||||
U16_BACK_1(text, 0, temp);
|
||||
if (getFCD(text, &temp, textlength) & LAST_BYTE_MASK_) {
|
||||
int32_t firstce = strsrch->pattern.CE[0];
|
||||
UCollationElements *coleiter = strsrch->textIter;
|
||||
@ -1304,7 +1305,7 @@ inline int32_t getPreviousBaseOffset(const UChar *text,
|
||||
if (textoffset > 0) {
|
||||
for (;;) {
|
||||
int32_t result = textoffset;
|
||||
UTF_BACK_1(text, 0, textoffset);
|
||||
U16_BACK_1(text, 0, textoffset);
|
||||
int32_t temp = textoffset;
|
||||
uint16_t fcd = getFCD(text, &temp, result);
|
||||
if ((fcd >> SECOND_LAST_BYTE_SHIFT_) == 0) {
|
||||
@ -1338,7 +1339,7 @@ inline int getUnblockedAccentIndex(UChar *accents, int32_t *accentsindex)
|
||||
int32_t temp;
|
||||
while (index < length) {
|
||||
temp = index;
|
||||
UTF_NEXT_CHAR(accents, index, length, codepoint);
|
||||
U16_NEXT(accents, index, length, codepoint);
|
||||
if (u_getCombiningClass(codepoint) != cclass) {
|
||||
cclass = u_getCombiningClass(codepoint);
|
||||
accentsindex[result] = temp;
|
||||
@ -1722,7 +1723,7 @@ UBool doNextCanonicalMatch(UStringSearch *strsrch,
|
||||
{
|
||||
const UChar *text = strsrch->search->text;
|
||||
int32_t temp = textoffset;
|
||||
UTF_BACK_1(text, 0, temp);
|
||||
U16_BACK_1(text, 0, temp);
|
||||
if ((getFCD(text, &temp, textoffset) & LAST_BYTE_MASK_) == 0) {
|
||||
UCollationElements *coleiter = strsrch->textIter;
|
||||
int32_t offset = getColElemIterOffset(coleiter, FALSE);
|
||||
@ -2164,7 +2165,7 @@ int32_t doPreviousCanonicalSuffixMatch(UStringSearch *strsrch,
|
||||
const UChar *text = strsrch->search->text;
|
||||
int32_t tempend = end;
|
||||
|
||||
UTF_BACK_1(text, 0, tempend);
|
||||
U16_BACK_1(text, 0, tempend);
|
||||
if (!(getFCD(text, &tempend, strsrch->search->textLength) &
|
||||
LAST_BYTE_MASK_)) {
|
||||
// die... failed at a base character
|
||||
@ -2513,7 +2514,7 @@ UBool checkPreviousCanonicalContractionMatch(UStringSearch *strsrch,
|
||||
// accents may have extra starting ces, this occurs when a
|
||||
// pure accent pattern is matched without rearrangement
|
||||
int32_t expected = patternce[patterncelength - 1];
|
||||
UTF_BACK_1(text, 0, *end);
|
||||
U16_BACK_1(text, 0, *end);
|
||||
if (getFCD(text, end, textlength) & LAST_BYTE_MASK_) {
|
||||
ce = getCE(strsrch, ucol_previous(coleiter, status));
|
||||
while (U_SUCCESS(*status) && ce != expected &&
|
||||
@ -3227,7 +3228,7 @@ U_CAPI int32_t U_EXPORT2 usearch_next(UStringSearch *strsrch,
|
||||
search->matchedIndex = offset;
|
||||
}
|
||||
else { // moves by codepoints
|
||||
UTF_FWD_1(search->text, search->matchedIndex, textlength);
|
||||
U16_FWD_1(search->text, search->matchedIndex, textlength);
|
||||
}
|
||||
|
||||
search->matchedLength = 0;
|
||||
@ -3341,7 +3342,7 @@ U_CAPI int32_t U_EXPORT2 usearch_previous(UStringSearch *strsrch,
|
||||
// status checked below
|
||||
}
|
||||
else { // move by codepoints
|
||||
UTF_BACK_1(search->text, 0, search->matchedIndex);
|
||||
U16_BACK_1(search->text, 0, search->matchedIndex);
|
||||
setColEIterOffset(strsrch->textIter, search->matchedIndex);
|
||||
// status checked below
|
||||
search->matchedLength = 0;
|
||||
|
@ -17,6 +17,7 @@
|
||||
#include "unicode/uspoof.h"
|
||||
#include "unicode/unorm.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "cmemory.h"
|
||||
#include "uspoof_impl.h"
|
||||
#include "uassert.h"
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include "unicode/unorm.h"
|
||||
#include "unicode/uchar.h"
|
||||
#include "unicode/uniset.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "utrie2.h"
|
||||
#include "cmemory.h"
|
||||
#include "cstring.h"
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1998-2004, International Business Machines
|
||||
* Copyright (C) 1998-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
@ -21,10 +21,11 @@
|
||||
#define UFMT_CMN_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/utf16.h"
|
||||
|
||||
#define UFMT_DEFAULT_BUFFER_SIZE 128
|
||||
#define MAX_UCHAR_BUFFER_SIZE(buffer) (sizeof(buffer)/(UTF_MAX_CHAR_LENGTH*sizeof(UChar)))
|
||||
#define MAX_UCHAR_BUFFER_NEEDED(strLen) ((strLen+1)*UTF_MAX_CHAR_LENGTH*sizeof(UChar))
|
||||
#define MAX_UCHAR_BUFFER_SIZE(buffer) (sizeof(buffer)/(U16_MAX_LENGTH*sizeof(UChar)))
|
||||
#define MAX_UCHAR_BUFFER_NEEDED(strLen) ((strLen+1)*U16_MAX_LENGTH*sizeof(UChar))
|
||||
|
||||
/**
|
||||
* Enum representing the possible argument types for uprintf/uscanf
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1998-2008, International Business Machines
|
||||
* Copyright (C) 1998-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
@ -22,7 +22,7 @@
|
||||
#if !UCONFIG_NO_FORMATTING
|
||||
|
||||
#include "unicode/ustring.h"
|
||||
|
||||
#include "unicode/utf16.h"
|
||||
#include "uprintf.h"
|
||||
#include "ufmt_cmn.h"
|
||||
#include "cmemory.h"
|
||||
@ -246,7 +246,7 @@ u_printf_char_handler(const u_printf_stream_handler *handler,
|
||||
const u_printf_spec_info *info,
|
||||
const ufmt_args *args)
|
||||
{
|
||||
UChar s[UTF_MAX_CHAR_LENGTH+1];
|
||||
UChar s[U16_MAX_LENGTH+1];
|
||||
int32_t len = 1, written;
|
||||
unsigned char arg = (unsigned char)(args[0].int64Value);
|
||||
|
||||
|
@ -17,6 +17,7 @@
|
||||
#include "unicode/ustream.h"
|
||||
#include "unicode/ucnv.h"
|
||||
#include "unicode/uchar.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "ustr_cnv.h"
|
||||
#include "cmemory.h"
|
||||
#include <string.h>
|
||||
|
@ -31,6 +31,7 @@
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "unicode/ucnv.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "cmemory.h"
|
||||
#include "cintltst.h"
|
||||
|
||||
@ -715,7 +716,7 @@ writeString(const UChar *s, int32_t length, uint8_t *p) {
|
||||
p0=p;
|
||||
i=0;
|
||||
while(i<length) {
|
||||
UTF_NEXT_CHAR(s, i, length, c);
|
||||
U16_NEXT(s, i, length, c);
|
||||
p+=writePacked(encodeBocu1(&prev, c), p);
|
||||
}
|
||||
return (int32_t)(p-p0);
|
||||
@ -743,7 +744,7 @@ readString(const uint8_t *p, int32_t length, UChar *s) {
|
||||
return -1;
|
||||
}
|
||||
if(c>=0) {
|
||||
UTF_APPEND_CHAR_UNSAFE(s, sLength, c);
|
||||
U16_APPEND_UNSAFE(s, sLength, c);
|
||||
}
|
||||
}
|
||||
return sLength;
|
||||
|
@ -37,6 +37,7 @@
|
||||
#include "unicode/ucnv.h"
|
||||
#include "unicode/ures.h"
|
||||
#include "unicode/uscript.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "uparse.h"
|
||||
#include "putilimp.h"
|
||||
|
||||
@ -1678,7 +1679,7 @@ static void TestComposeDecompose(void) {
|
||||
for(u = 0; u < charsToTestSize; u++) {
|
||||
UChar32 ch = uset_charAt(charsToTest, u);
|
||||
len = 0;
|
||||
UTF_APPEND_CHAR_UNSAFE(comp, len, ch);
|
||||
U16_APPEND_UNSAFE(comp, len, ch);
|
||||
nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
|
||||
nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
|
||||
|
||||
@ -1712,7 +1713,7 @@ static void TestComposeDecompose(void) {
|
||||
uprv_memset(t[noCases], 0, sizeof(tester));
|
||||
t[noCases]->u = u;
|
||||
len = 0;
|
||||
UTF_APPEND_CHAR_UNSAFE(comp, len, u);
|
||||
U16_APPEND_UNSAFE(comp, len, u);
|
||||
comp[len] = 0;
|
||||
nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
|
||||
nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
|
||||
|
@ -16,6 +16,7 @@
|
||||
/*tests for u_normalization*/
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/unorm.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "cintltst.h"
|
||||
|
||||
#if UCONFIG_NO_NORMALIZATION
|
||||
@ -811,13 +812,13 @@ TestNormCoverage() {
|
||||
|
||||
hangulPrefixLength=inLength;
|
||||
|
||||
input[inLength++]=UTF16_LEAD(MUSICAL_HALF_NOTE);
|
||||
input[inLength++]=UTF16_TRAIL(MUSICAL_HALF_NOTE);
|
||||
input[inLength++]=U16_LEAD(MUSICAL_HALF_NOTE);
|
||||
input[inLength++]=U16_TRAIL(MUSICAL_HALF_NOTE);
|
||||
for(i=0; i<200; ++i) {
|
||||
input[inLength++]=UTF16_LEAD(MUSICAL_STACCATO);
|
||||
input[inLength++]=UTF16_TRAIL(MUSICAL_STACCATO);
|
||||
input[inLength++]=UTF16_LEAD(MUSICAL_STEM);
|
||||
input[inLength++]=UTF16_TRAIL(MUSICAL_STEM);
|
||||
input[inLength++]=U16_LEAD(MUSICAL_STACCATO);
|
||||
input[inLength++]=U16_TRAIL(MUSICAL_STACCATO);
|
||||
input[inLength++]=U16_LEAD(MUSICAL_STEM);
|
||||
input[inLength++]=U16_TRAIL(MUSICAL_STEM);
|
||||
}
|
||||
|
||||
/* (compatibility) Jamo L, T do not compose */
|
||||
@ -870,17 +871,17 @@ TestNormCoverage() {
|
||||
|
||||
expect[expectLength++]=HANGUL_AC00+14*28;
|
||||
|
||||
expect[expectLength++]=UTF16_LEAD(MUSICAL_VOID_NOTEHEAD);
|
||||
expect[expectLength++]=UTF16_TRAIL(MUSICAL_VOID_NOTEHEAD);
|
||||
expect[expectLength++]=UTF16_LEAD(MUSICAL_STEM);
|
||||
expect[expectLength++]=UTF16_TRAIL(MUSICAL_STEM);
|
||||
expect[expectLength++]=U16_LEAD(MUSICAL_VOID_NOTEHEAD);
|
||||
expect[expectLength++]=U16_TRAIL(MUSICAL_VOID_NOTEHEAD);
|
||||
expect[expectLength++]=U16_LEAD(MUSICAL_STEM);
|
||||
expect[expectLength++]=U16_TRAIL(MUSICAL_STEM);
|
||||
for(i=0; i<200; ++i) {
|
||||
expect[expectLength++]=UTF16_LEAD(MUSICAL_STEM);
|
||||
expect[expectLength++]=UTF16_TRAIL(MUSICAL_STEM);
|
||||
expect[expectLength++]=U16_LEAD(MUSICAL_STEM);
|
||||
expect[expectLength++]=U16_TRAIL(MUSICAL_STEM);
|
||||
}
|
||||
for(i=0; i<200; ++i) {
|
||||
expect[expectLength++]=UTF16_LEAD(MUSICAL_STACCATO);
|
||||
expect[expectLength++]=UTF16_TRAIL(MUSICAL_STACCATO);
|
||||
expect[expectLength++]=U16_LEAD(MUSICAL_STACCATO);
|
||||
expect[expectLength++]=U16_TRAIL(MUSICAL_STACCATO);
|
||||
}
|
||||
|
||||
expect[expectLength++]=HANGUL_KIYEOK;
|
||||
@ -921,17 +922,17 @@ TestNormCoverage() {
|
||||
u_memcpy(expect, input, hangulPrefixLength);
|
||||
expectLength=hangulPrefixLength;
|
||||
|
||||
expect[expectLength++]=UTF16_LEAD(MUSICAL_VOID_NOTEHEAD);
|
||||
expect[expectLength++]=UTF16_TRAIL(MUSICAL_VOID_NOTEHEAD);
|
||||
expect[expectLength++]=UTF16_LEAD(MUSICAL_STEM);
|
||||
expect[expectLength++]=UTF16_TRAIL(MUSICAL_STEM);
|
||||
expect[expectLength++]=U16_LEAD(MUSICAL_VOID_NOTEHEAD);
|
||||
expect[expectLength++]=U16_TRAIL(MUSICAL_VOID_NOTEHEAD);
|
||||
expect[expectLength++]=U16_LEAD(MUSICAL_STEM);
|
||||
expect[expectLength++]=U16_TRAIL(MUSICAL_STEM);
|
||||
for(i=0; i<200; ++i) {
|
||||
expect[expectLength++]=UTF16_LEAD(MUSICAL_STEM);
|
||||
expect[expectLength++]=UTF16_TRAIL(MUSICAL_STEM);
|
||||
expect[expectLength++]=U16_LEAD(MUSICAL_STEM);
|
||||
expect[expectLength++]=U16_TRAIL(MUSICAL_STEM);
|
||||
}
|
||||
for(i=0; i<200; ++i) {
|
||||
expect[expectLength++]=UTF16_LEAD(MUSICAL_STACCATO);
|
||||
expect[expectLength++]=UTF16_TRAIL(MUSICAL_STACCATO);
|
||||
expect[expectLength++]=U16_LEAD(MUSICAL_STACCATO);
|
||||
expect[expectLength++]=U16_TRAIL(MUSICAL_STACCATO);
|
||||
}
|
||||
|
||||
expect[expectLength++]=HANGUL_K_KIYEOK;
|
||||
|
@ -1439,7 +1439,7 @@ static void TestCharLength()
|
||||
for(i=0; i<(int32_t)(sizeof(codepoint)/sizeof(codepoint[0])); i=(int16_t)(i+2)){
|
||||
UChar32 c=codepoint[i+1];
|
||||
if(UTF_CHAR_LENGTH(c) != codepoint[i] || U16_LENGTH(c) != codepoint[i]){
|
||||
log_err("The no: of code units for U+%04x:- Expected: %d Got: %d\n", c, codepoint[i], UTF_CHAR_LENGTH(c));
|
||||
log_err("The no: of code units for U+%04x:- Expected: %d Got: %d\n", c, codepoint[i], U16_LENGTH(c));
|
||||
}
|
||||
multiple=(UBool)(codepoint[i] == 1 ? FALSE : TRUE);
|
||||
if(UTF_NEED_MULTIPLE_UCHAR(c) != multiple){
|
||||
|
@ -1,6 +1,6 @@
|
||||
/********************************************************************
|
||||
* COPYRIGHT:
|
||||
* Copyright (c) 1997-2010, International Business Machines Corporation and
|
||||
* Copyright (c) 1997-2011, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
********************************************************************/
|
||||
/*
|
||||
@ -25,6 +25,8 @@
|
||||
#include "unicode/ustring.h"
|
||||
#include "nccbtst.h"
|
||||
#include "unicode/ucnv_cb.h"
|
||||
#include "unicode/utf16.h"
|
||||
|
||||
#define NEW_MAX_BUFFER 999
|
||||
|
||||
#define nct_min(x,y) ((x<y) ? x : y)
|
||||
@ -1736,10 +1738,10 @@ static void TestSub(int32_t inputsize, int32_t outputsize)
|
||||
in4[]={ 0x00, 0x01, 0x02, 0x03, 0x00, 0x11, 0x12, 0x00, 0x00, 0x00, 0x4e, 0x00 };
|
||||
|
||||
static const UChar
|
||||
out1[]={ UTF16_LEAD(0x100f00), UTF16_TRAIL(0x100f00), 0xfeff },
|
||||
out2[]={ UTF16_LEAD(0x0f1000), UTF16_TRAIL(0x0f1000), 0xfffe },
|
||||
out3[]={ 0xfefe, UTF16_LEAD(0x100f00), UTF16_TRAIL(0x100f00), 0xfffd, 0xfffd },
|
||||
out4[]={ UTF16_LEAD(0x10203), UTF16_TRAIL(0x10203), 0xfffd, 0x4e00 };
|
||||
out1[]={ U16_LEAD(0x100f00), U16_TRAIL(0x100f00), 0xfeff },
|
||||
out2[]={ U16_LEAD(0x0f1000), U16_TRAIL(0x0f1000), 0xfffe },
|
||||
out3[]={ 0xfefe, U16_LEAD(0x100f00), U16_TRAIL(0x100f00), 0xfffd, 0xfffd },
|
||||
out4[]={ U16_LEAD(0x10203), U16_TRAIL(0x10203), 0xfffd, 0x4e00 };
|
||||
|
||||
static const int32_t
|
||||
offsets1[]={ 4, 4, 8 },
|
||||
|
@ -22,6 +22,7 @@
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "unicode/ucol.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "cmemory.h"
|
||||
#include "nucnvtst.h"
|
||||
|
||||
@ -2972,9 +2973,9 @@ TestGetNextUChar2022(UConverter* cnv, const char* source, const char* limit,
|
||||
log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode));
|
||||
break;
|
||||
} else {
|
||||
if(UTF_IS_FIRST_SURROGATE(*r)){
|
||||
if(U16_IS_LEAD(*r)){
|
||||
int i =0, len = 2;
|
||||
UTF_NEXT_CHAR_SAFE(r, i, len, exC, FALSE);
|
||||
U16_NEXT(r, i, len, exC);
|
||||
r++;
|
||||
}else{
|
||||
exC = *r;
|
||||
@ -3476,9 +3477,9 @@ unescape(UChar* dst, int32_t dstLen,const char* src,int32_t srcLen,UErrorCode *s
|
||||
}
|
||||
if(dstIndex < dstLen){
|
||||
if(c>0xFFFF){
|
||||
dst[dstIndex++] = UTF16_LEAD(c);
|
||||
dst[dstIndex++] = U16_LEAD(c);
|
||||
if(dstIndex<dstLen){
|
||||
dst[dstIndex]=UTF16_TRAIL(c);
|
||||
dst[dstIndex]=U16_TRAIL(c);
|
||||
}else{
|
||||
*status=U_BUFFER_OVERFLOW_ERROR;
|
||||
}
|
||||
@ -3516,8 +3517,8 @@ TestFullRoundtrip(const char* cp){
|
||||
usource[0] =(UChar) i;
|
||||
len=1;
|
||||
}else{
|
||||
usource[0]=UTF16_LEAD(i);
|
||||
usource[1]=UTF16_TRAIL(i);
|
||||
usource[0]=U16_LEAD(i);
|
||||
usource[1]=U16_TRAIL(i);
|
||||
len=2;
|
||||
}
|
||||
ulen=len;
|
||||
|
@ -23,6 +23,7 @@
|
||||
#include "unicode/putil.h"
|
||||
#include "cintltst.h"
|
||||
#include "unicode/usprep.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "sprpimpl.h"
|
||||
#include "uparse.h"
|
||||
#include "cmemory.h"
|
||||
@ -206,8 +207,8 @@ compareMapping(UStringPrepProfile* data, uint32_t codepoint, uint32_t* mapping,i
|
||||
log_err("Did not get the expected result. Expected: 0x%04X Got: 0x%04X \n", mapping[i], mappingData[index+i]);
|
||||
}
|
||||
}else{
|
||||
UChar lead = UTF16_LEAD(mapping[i]);
|
||||
UChar trail = UTF16_TRAIL(mapping[i]);
|
||||
UChar lead = U16_LEAD(mapping[i]);
|
||||
UChar trail = U16_TRAIL(mapping[i]);
|
||||
if(mappingData[index+i] != lead ||
|
||||
mappingData[index+i+1] != trail){
|
||||
log_err( "Did not get the expected result. Expected: 0x%04X 0x%04X Got: 0x%04X 0x%04X\n", lead, trail, mappingData[index+i], mappingData[index+i+1]);
|
||||
@ -234,7 +235,7 @@ compareFlagsForRange(UStringPrepProfile* data,
|
||||
UTrie trie = data->sprepTrie;
|
||||
/*
|
||||
// supplementary code point
|
||||
UChar __lead16=UTF16_LEAD(0x2323E);
|
||||
UChar __lead16=U16_LEAD(0x2323E);
|
||||
int32_t __offset;
|
||||
|
||||
// get data for lead surrogate
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2001-2008, International Business Machines
|
||||
* Copyright (C) 2001-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
@ -16,6 +16,7 @@
|
||||
|
||||
#include <stdio.h>
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "utrie.h"
|
||||
#include "cstring.h"
|
||||
#include "cmemory.h"
|
||||
@ -156,7 +157,7 @@ testTrieIteration(const char *testName,
|
||||
c=checkRanges[i].limit;
|
||||
if(c!=0) {
|
||||
--c;
|
||||
UTF_APPEND_CHAR_UNSAFE(s, length, c);
|
||||
U16_APPEND_UNSAFE(s, length, c);
|
||||
values[countValues++]=checkRanges[i].value;
|
||||
}
|
||||
}
|
||||
@ -179,7 +180,7 @@ testTrieIteration(const char *testName,
|
||||
if(
|
||||
c2==0 ?
|
||||
c!=*(p-1) :
|
||||
!UTF_IS_LEAD(c) || !UTF_IS_TRAIL(c2) || c!=*(p-2) || c2!=*(p-1)
|
||||
!U16_IS_LEAD(c) || !U16_IS_TRAIL(c2) || c!=*(p-2) || c2!=*(p-1)
|
||||
) {
|
||||
log_err("error: wrong (c, c2) from UTRIE_NEXT(%s): (U+%04lx, U+%04lx)\n",
|
||||
testName, c, c2);
|
||||
@ -243,7 +244,7 @@ testTrieIteration(const char *testName,
|
||||
if(
|
||||
c2==0 ?
|
||||
c!=*p:
|
||||
!UTF_IS_LEAD(c) || !UTF_IS_TRAIL(c2) || c!=*p || c2!=*(p+1)
|
||||
!U16_IS_LEAD(c) || !U16_IS_TRAIL(c2) || c!=*p || c2!=*(p+1)
|
||||
) {
|
||||
log_err("error: wrong (c, c2) from UTRIE_PREVIOUS(%s): (U+%04lx, U+%04lx)\n",
|
||||
testName, c, c2);
|
||||
@ -384,7 +385,7 @@ testTrieRangesWithMalloc(const char *testName,
|
||||
log_err("error: unserialized trie(%s).fromBMP(U+%04lx)==0x%lx instead of 0x%lx\n",
|
||||
testName, start, value2, value);
|
||||
}
|
||||
if(!UTF_IS_LEAD(start)) {
|
||||
if(!U16_IS_LEAD(start)) {
|
||||
if(dataIs32) {
|
||||
value2=UTRIE_GET32_FROM_LEAD(&trie, start);
|
||||
} else {
|
||||
@ -593,7 +594,7 @@ testTrieRanges(const char *testName,
|
||||
log_err("error: unserialized trie(%s).fromBMP(U+%04lx)==0x%lx instead of 0x%lx\n",
|
||||
testName, start, value2, value);
|
||||
}
|
||||
if(!UTF_IS_LEAD(start)) {
|
||||
if(!U16_IS_LEAD(start)) {
|
||||
if(dataIs32) {
|
||||
value2=UTRIE_GET32_FROM_LEAD(&trie, start);
|
||||
} else {
|
||||
|
@ -1,6 +1,6 @@
|
||||
/****************************************************************************************
|
||||
* COPYRIGHT:
|
||||
* Copyright (c) 1997-2010, International Business Machines Corporation and
|
||||
* Copyright (c) 1997-2011, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
* Modification History:
|
||||
*
|
||||
@ -18,6 +18,7 @@
|
||||
#include "unicode/uchriter.h"
|
||||
#include "unicode/uiter.h"
|
||||
#include "unicode/putil.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "citrtest.h"
|
||||
|
||||
|
||||
@ -102,20 +103,20 @@ public:
|
||||
case kStart:
|
||||
pos = begin;
|
||||
if(delta > 0) {
|
||||
UTF_FWD_N(text, pos, end, delta);
|
||||
U16_FWD_N(text, pos, end, delta);
|
||||
}
|
||||
break;
|
||||
case kCurrent:
|
||||
if(delta > 0) {
|
||||
UTF_FWD_N(text, pos, end, delta);
|
||||
U16_FWD_N(text, pos, end, delta);
|
||||
} else {
|
||||
UTF_BACK_N(text, begin, pos, -delta);
|
||||
U16_BACK_N(text, begin, pos, -delta);
|
||||
}
|
||||
break;
|
||||
case kEnd:
|
||||
pos = end;
|
||||
if(delta < 0) {
|
||||
UTF_BACK_N(text, begin, pos, -delta);
|
||||
U16_BACK_N(text, begin, pos, -delta);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
@ -666,7 +667,7 @@ void CharIterTest::TestIterationUChar32() {
|
||||
c=iter.first32PostInc();
|
||||
if(c != text.char32At(i))
|
||||
errln("first32PostInc failed. Expected->%X Got->%X", text.char32At(i), c);
|
||||
if(iter.getIndex() != UTF16_CHAR_LENGTH(c) + i)
|
||||
if(iter.getIndex() != U16_LENGTH(c) + i)
|
||||
errln((UnicodeString)"getIndex() after first32PostInc() failed");
|
||||
|
||||
iter.setToStart();
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2003-2009, International Business Machines
|
||||
* Copyright (C) 2003-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
@ -30,6 +30,7 @@
|
||||
#include "cmemory.h"
|
||||
#include "cstring.h"
|
||||
#include "unicode/udata.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "unewdata.h"
|
||||
#include "uoptions.h"
|
||||
#include "uparse.h"
|
||||
@ -381,8 +382,8 @@ compareMapping(uint32_t codepoint, uint32_t* mapping,int32_t mapLength,
|
||||
pTestIDNA->errln("Did not get the expected result. Expected: 0x%04X Got: 0x%04X \n", mapping[i], mappingData[index+i]);
|
||||
}
|
||||
}else{
|
||||
UChar lead = UTF16_LEAD(mapping[i]);
|
||||
UChar trail = UTF16_TRAIL(mapping[i]);
|
||||
UChar lead = U16_LEAD(mapping[i]);
|
||||
UChar trail = U16_TRAIL(mapping[i]);
|
||||
if(mappingData[index+i] != lead ||
|
||||
mappingData[index+i+1] != trail){
|
||||
pTestIDNA->errln( "Did not get the expected result. Expected: 0x%04X 0x%04X Got: 0x%04X 0x%04X", lead, trail, mappingData[index+i], mappingData[index+i+1]);
|
||||
@ -405,9 +406,9 @@ compareFlagsForRange(uint32_t start, uint32_t end,
|
||||
UStringPrepType retType;
|
||||
UBool isIndex=FALSE;
|
||||
int32_t value=0;
|
||||
/*
|
||||
/*
|
||||
// supplementary code point
|
||||
UChar __lead16=UTF16_LEAD(0x2323E);
|
||||
UChar __lead16=U16_LEAD(0x2323E);
|
||||
int32_t __offset;
|
||||
|
||||
// get data for lead surrogate
|
||||
|
@ -26,6 +26,7 @@
|
||||
#include "unicode/locid.h"
|
||||
#include "unicode/ulocdata.h"
|
||||
#include "unicode/utf8.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "putilimp.h"
|
||||
#include "cmemory.h"
|
||||
#include "transrt.h"
|
||||
@ -439,7 +440,7 @@ UBool RTTest::isCamel(const UnicodeString& a) {
|
||||
// see if string is of the form aB; e.g. lower, then upper or title
|
||||
UChar32 cp;
|
||||
UBool haveLower = FALSE;
|
||||
for (int32_t i = 0; i < a.length(); i += UTF_CHAR_LENGTH(cp)) {
|
||||
for (int32_t i = 0; i < a.length(); i += U16_LENGTH(cp)) {
|
||||
cp = a.char32At(i);
|
||||
int8_t t = u_charType(cp);
|
||||
switch (t) {
|
||||
|
@ -23,6 +23,7 @@
|
||||
#include "unicode/ustring.h"
|
||||
#include "unicode/usetiter.h"
|
||||
#include "unicode/uscript.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "cpdtrans.h"
|
||||
#include "nultrans.h"
|
||||
#include "rbt.h"
|
||||
@ -3460,7 +3461,7 @@ void TransliteratorTest::TestSurrogateCasing (void) {
|
||||
char buffer[20];
|
||||
UChar buffer2[20];
|
||||
UChar32 dee;
|
||||
UTF_GET_CHAR(DESERET_dee,0, 0, DESERET_dee.length(), dee);
|
||||
U16_GET(DESERET_dee,0, 0, DESERET_dee.length(), dee);
|
||||
UnicodeString DEE(u_totitle(dee));
|
||||
if (DEE != DESERET_DEE) {
|
||||
err("Fails titlecase of surrogates");
|
||||
|
@ -14,6 +14,7 @@
|
||||
#include "unicode/uniset.h"
|
||||
#include "unicode/usetiter.h"
|
||||
#include "unicode/schriter.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "cstring.h"
|
||||
#include "normalizer2impl.h"
|
||||
#include "tstnorm.h"
|
||||
@ -688,8 +689,8 @@ void
|
||||
BasicNormalizerTest::TestPreviousNext() {
|
||||
// src and expect strings
|
||||
static const UChar src[]={
|
||||
UTF16_LEAD(0x2f999), UTF16_TRAIL(0x2f999),
|
||||
UTF16_LEAD(0x1d15f), UTF16_TRAIL(0x1d15f),
|
||||
U16_LEAD(0x2f999), U16_TRAIL(0x2f999),
|
||||
U16_LEAD(0x1d15f), U16_TRAIL(0x1d15f),
|
||||
0xc4,
|
||||
0x1ed0
|
||||
};
|
||||
@ -711,7 +712,7 @@ BasicNormalizerTest::TestPreviousNext() {
|
||||
|
||||
// src and expect strings for regression test for j2911
|
||||
static const UChar src_j2911[]={
|
||||
UTF16_LEAD(0x2f999), UTF16_TRAIL(0x2f999),
|
||||
U16_LEAD(0x2f999), U16_TRAIL(0x2f999),
|
||||
0xdd00, 0xd900, // unpaired surrogates - regression test for j2911
|
||||
0xc4,
|
||||
0x4f, 0x302, 0x301
|
||||
|
@ -13,6 +13,7 @@
|
||||
#include "unicode/locid.h"
|
||||
#include "unicode/ucnv.h"
|
||||
#include "unicode/uenum.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "cmemory.h"
|
||||
#include "charstr.h"
|
||||
|
||||
@ -1275,7 +1276,7 @@ UnicodeStringTest::TestStackAllocation()
|
||||
|
||||
// test the UChar32 constructor
|
||||
UnicodeString c32Test((UChar32)0x10ff2a);
|
||||
if( c32Test.length() != UTF_CHAR_LENGTH(0x10ff2a) ||
|
||||
if( c32Test.length() != U16_LENGTH(0x10ff2a) ||
|
||||
c32Test.char32At(c32Test.length() - 1) != 0x10ff2a
|
||||
) {
|
||||
errln("The UnicodeString(UChar32) constructor does not work with a 0x10ff2a filler");
|
||||
@ -1283,7 +1284,7 @@ UnicodeStringTest::TestStackAllocation()
|
||||
|
||||
// test the (new) capacity constructor
|
||||
UnicodeString capTest(5, (UChar32)0x2a, 5);
|
||||
if( capTest.length() != 5 * UTF_CHAR_LENGTH(0x2a) ||
|
||||
if( capTest.length() != 5 * U16_LENGTH(0x2a) ||
|
||||
capTest.char32At(0) != 0x2a ||
|
||||
capTest.char32At(4) != 0x2a
|
||||
) {
|
||||
@ -1291,7 +1292,7 @@ UnicodeStringTest::TestStackAllocation()
|
||||
}
|
||||
|
||||
capTest = UnicodeString(5, (UChar32)0x10ff2a, 5);
|
||||
if( capTest.length() != 5 * UTF_CHAR_LENGTH(0x10ff2a) ||
|
||||
if( capTest.length() != 5 * U16_LENGTH(0x10ff2a) ||
|
||||
capTest.char32At(0) != 0x10ff2a ||
|
||||
capTest.char32At(4) != 0x10ff2a
|
||||
) {
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user