ICU-7344 C functions for Java modified UTF-8
X-SVN-Rev: 27261
This commit is contained in:
parent
e6e5208a8d
commit
ad83876755
@ -1,6 +1,6 @@
|
||||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 1998-2009, International Business Machines
|
||||
* Copyright (C) 1998-2010, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
*
|
||||
@ -1602,4 +1602,95 @@ u_strFromUTF32WithSub(UChar *dest,
|
||||
UChar32 subchar, int32_t *pNumSubstitutions,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Convert a 16-bit Unicode string to Java Modified UTF-8.
|
||||
* See http://java.sun.com/javase/6/docs/api/java/io/DataInput.html#modified-utf-8
|
||||
*
|
||||
* This function behaves according to the documentation for Java DataOutput.writeUTF()
|
||||
* except that it does not encode the output length in the destination buffer
|
||||
* and does not have an output length restriction.
|
||||
* See http://java.sun.com/javase/6/docs/api/java/io/DataOutput.html#writeUTF(java.lang.String)
|
||||
*
|
||||
* The input string need not be well-formed UTF-16.
|
||||
* (Therefore there is no subchar parameter.)
|
||||
*
|
||||
* @param dest A buffer for the result string. The result will be zero-terminated if
|
||||
* the buffer is large enough.
|
||||
* @param destCapacity The size of the buffer (number of chars). If it is 0, then
|
||||
* dest may be NULL and the function will only return the length of the
|
||||
* result without writing any of the result string (pre-flighting).
|
||||
* @param pDestLength A pointer to receive the number of units written to the destination. If
|
||||
* pDestLength!=NULL then *pDestLength is always set to the
|
||||
* number of output units corresponding to the transformation of
|
||||
* all the input units, even in case of a buffer overflow.
|
||||
* @param src The original source string
|
||||
* @param srcLength The length of the original string. If -1, then src must be zero-terminated.
|
||||
* @param pErrorCode Pointer to a standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return The pointer to destination buffer.
|
||||
* @draft ICU 4.4
|
||||
* @see u_strToUTF8WithSub
|
||||
* @see u_strFromJavaModifiedUTF8WithSub
|
||||
*/
|
||||
U_DRAFT char* U_EXPORT2
|
||||
u_strToJavaModifiedUTF8(
|
||||
char *dest,
|
||||
int32_t destCapacity,
|
||||
int32_t *pDestLength,
|
||||
const UChar *src,
|
||||
int32_t srcLength,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Convert a Java Modified UTF-8 string to a 16-bit Unicode string.
|
||||
* If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set.
|
||||
*
|
||||
* This function behaves according to the documentation for Java DataInput.readUTF()
|
||||
* except that it takes a length parameter rather than
|
||||
* interpreting the first two input bytes as the length.
|
||||
* See http://java.sun.com/javase/6/docs/api/java/io/DataInput.html#readUTF()
|
||||
*
|
||||
* The output string may not be well-formed UTF-16.
|
||||
*
|
||||
* @param dest A buffer for the result string. The result will be zero-terminated if
|
||||
* the buffer is large enough.
|
||||
* @param destCapacity The size of the buffer (number of UChars). If it is 0, then
|
||||
* dest may be NULL and the function will only return the length of the
|
||||
* result without writing any of the result string (pre-flighting).
|
||||
* @param pDestLength A pointer to receive the number of units written to the destination. If
|
||||
* pDestLength!=NULL then *pDestLength is always set to the
|
||||
* number of output units corresponding to the transformation of
|
||||
* all the input units, even in case of a buffer overflow.
|
||||
* @param src The original source string
|
||||
* @param srcLength The length of the original string. If -1, then src must be zero-terminated.
|
||||
* @param subchar The substitution character to use in place of an illegal input sequence,
|
||||
* or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead.
|
||||
* A substitution character can be any valid Unicode code point (up to U+10FFFF)
|
||||
* except for surrogate code points (U+D800..U+DFFF).
|
||||
* The recommended value is U+FFFD "REPLACEMENT CHARACTER".
|
||||
* @param pNumSubstitutions Output parameter receiving the number of substitutions if subchar>=0.
|
||||
* Set to 0 if no substitutions occur or subchar<0.
|
||||
* pNumSubstitutions can be NULL.
|
||||
* @param pErrorCode Pointer to a standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return The pointer to destination buffer.
|
||||
* @see u_strFromUTF8WithSub
|
||||
* @see u_strFromUTF8Lenient
|
||||
* @see u_strToJavaModifiedUTF8
|
||||
* @draft ICU 4.4
|
||||
*/
|
||||
U_DRAFT UChar* U_EXPORT2
|
||||
u_strFromJavaModifiedUTF8WithSub(
|
||||
UChar *dest,
|
||||
int32_t destCapacity,
|
||||
int32_t *pDestLength,
|
||||
const char *src,
|
||||
int32_t srcLength,
|
||||
UChar32 subchar, int32_t *pNumSubstitutions,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
#endif
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2001-2009, International Business Machines
|
||||
* Copyright (C) 2001-2010, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
@ -396,7 +396,6 @@ u_strFromUTF8WithSub(UChar *dest,
|
||||
int32_t srcLength,
|
||||
UChar32 subchar, int32_t *pNumSubstitutions,
|
||||
UErrorCode *pErrorCode){
|
||||
|
||||
UChar *pDest = dest;
|
||||
UChar *pDestLimit = dest+destCapacity;
|
||||
UChar32 ch;
|
||||
@ -599,12 +598,7 @@ u_strFromUTF8WithSub(UChar *dest,
|
||||
*(pDest++)=(UChar)ch;
|
||||
}else{
|
||||
*(pDest++)=UTF16_LEAD(ch);
|
||||
if(pDest<pDestLimit){
|
||||
*(pDest++)=UTF16_TRAIL(ch);
|
||||
}else{
|
||||
reqLength++;
|
||||
break;
|
||||
}
|
||||
*(pDest++)=UTF16_TRAIL(ch);
|
||||
}
|
||||
}
|
||||
} while(--count > 0);
|
||||
@ -659,7 +653,7 @@ u_strFromUTF8WithSub(UChar *dest,
|
||||
}
|
||||
}
|
||||
}
|
||||
/* donot fill the dest buffer just count the UChars needed */
|
||||
/* do not fill the dest buffer just count the UChars needed */
|
||||
while(pSrc < pSrcLimit){
|
||||
ch = *pSrc;
|
||||
if(ch <= 0x7f){
|
||||
@ -738,7 +732,6 @@ u_strFromUTF8Lenient(UChar *dest,
|
||||
const char *src,
|
||||
int32_t srcLength,
|
||||
UErrorCode *pErrorCode) {
|
||||
|
||||
UChar *pDest = dest;
|
||||
UChar32 ch;
|
||||
int32_t reqLength = 0;
|
||||
@ -977,7 +970,6 @@ u_strToUTF8WithSub(char *dest,
|
||||
int32_t srcLength,
|
||||
UChar32 subchar, int32_t *pNumSubstitutions,
|
||||
UErrorCode *pErrorCode){
|
||||
|
||||
int32_t reqLength=0;
|
||||
uint32_t ch=0,ch2=0;
|
||||
uint8_t *pDest = (uint8_t *)dest;
|
||||
@ -1006,7 +998,7 @@ u_strToUTF8WithSub(char *dest,
|
||||
++pSrc;
|
||||
if(ch <= 0x7f) {
|
||||
if(pDest<pDestLimit) {
|
||||
*pDest++ = (char)ch;
|
||||
*pDest++ = (uint8_t)ch;
|
||||
} else {
|
||||
reqLength = 1;
|
||||
break;
|
||||
@ -1100,7 +1092,7 @@ u_strToUTF8WithSub(char *dest,
|
||||
do {
|
||||
ch=*pSrc++;
|
||||
if(ch <= 0x7f) {
|
||||
*pDest++ = (char)ch;
|
||||
*pDest++ = (uint8_t)ch;
|
||||
} else if(ch <= 0x7ff) {
|
||||
*pDest++=(uint8_t)((ch>>6)|0xc0);
|
||||
*pDest++=(uint8_t)((ch&0x3f)|0x80);
|
||||
@ -1149,7 +1141,7 @@ u_strToUTF8WithSub(char *dest,
|
||||
ch=*pSrc++;
|
||||
if(ch <= 0x7f) {
|
||||
if(pDest<pDestLimit) {
|
||||
*pDest++ = (char)ch;
|
||||
*pDest++ = (uint8_t)ch;
|
||||
} else {
|
||||
reqLength = 1;
|
||||
break;
|
||||
@ -1229,9 +1221,8 @@ u_strToUTF8WithSub(char *dest,
|
||||
}
|
||||
|
||||
/* Terminate the buffer */
|
||||
u_terminateChars((char*)dest,destCapacity,reqLength,pErrorCode);
|
||||
|
||||
return (char*)dest;
|
||||
u_terminateChars(dest, destCapacity, reqLength, pErrorCode);
|
||||
return dest;
|
||||
}
|
||||
|
||||
U_CAPI char* U_EXPORT2
|
||||
@ -1247,3 +1238,397 @@ u_strToUTF8(char *dest,
|
||||
U_SENTINEL, NULL,
|
||||
pErrorCode);
|
||||
}
|
||||
|
||||
U_CAPI UChar* U_EXPORT2
|
||||
u_strFromJavaModifiedUTF8WithSub(
|
||||
UChar *dest,
|
||||
int32_t destCapacity,
|
||||
int32_t *pDestLength,
|
||||
const char *src,
|
||||
int32_t srcLength,
|
||||
UChar32 subchar, int32_t *pNumSubstitutions,
|
||||
UErrorCode *pErrorCode) {
|
||||
UChar *pDest = dest;
|
||||
UChar *pDestLimit = dest+destCapacity;
|
||||
UChar32 ch;
|
||||
int32_t reqLength = 0;
|
||||
const uint8_t* pSrc = (const uint8_t*) src;
|
||||
const uint8_t *pSrcLimit;
|
||||
int32_t count;
|
||||
uint8_t t1, t2; /* trail bytes */
|
||||
int32_t numSubstitutions;
|
||||
|
||||
/* args check */
|
||||
if(U_FAILURE(*pErrorCode)){
|
||||
return NULL;
|
||||
}
|
||||
if( (src==NULL && srcLength!=0) || srcLength < -1 ||
|
||||
(dest==NULL && destCapacity!=0) || destCapacity<0 ||
|
||||
subchar > 0x10ffff || U_IS_SURROGATE(subchar)
|
||||
) {
|
||||
*pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if(pNumSubstitutions!=NULL) {
|
||||
*pNumSubstitutions=0;
|
||||
}
|
||||
numSubstitutions=0;
|
||||
|
||||
if(srcLength < 0) {
|
||||
/*
|
||||
* Transform a NUL-terminated ASCII string.
|
||||
* Handle non-ASCII strings with slower code.
|
||||
*/
|
||||
while(((ch = *pSrc) != 0) && ch <= 0x7f && (pDest < pDestLimit)) {
|
||||
*pDest++=(UChar)ch;
|
||||
++pSrc;
|
||||
}
|
||||
if(ch == 0) {
|
||||
reqLength=(int32_t)(pDest - dest);
|
||||
if(pDestLength) {
|
||||
*pDestLength = reqLength;
|
||||
}
|
||||
|
||||
/* Terminate the buffer */
|
||||
u_terminateUChars(dest, destCapacity, reqLength, pErrorCode);
|
||||
return dest;
|
||||
}
|
||||
srcLength = uprv_strlen((const char *)pSrc);
|
||||
}
|
||||
|
||||
/* Faster loop without ongoing checking for pSrcLimit and pDestLimit. */
|
||||
pSrcLimit = pSrc + srcLength;
|
||||
for(;;) {
|
||||
count = (int32_t)(pDestLimit - pDest);
|
||||
srcLength = (int32_t)(pSrcLimit - pSrc);
|
||||
if(count >= srcLength && srcLength > 0 && *pSrc <= 0x7f) {
|
||||
/* fast ASCII loop */
|
||||
const uint8_t *prevSrc = pSrc;
|
||||
int32_t delta;
|
||||
while(pSrc < pSrcLimit && (ch = *pSrc) <= 0x7f) {
|
||||
*pDest++=(UChar)ch;
|
||||
++pSrc;
|
||||
}
|
||||
delta = (int32_t)(pSrc - prevSrc);
|
||||
count -= delta;
|
||||
srcLength -= delta;
|
||||
}
|
||||
/*
|
||||
* Each iteration of the inner loop progresses by at most 3 UTF-8
|
||||
* bytes and one UChar.
|
||||
*/
|
||||
srcLength /= 3;
|
||||
if(count > srcLength) {
|
||||
count = srcLength; /* min(remaining dest, remaining src/3) */
|
||||
}
|
||||
if(count < 3) {
|
||||
/*
|
||||
* Too much overhead if we get near the end of the string,
|
||||
* continue with the next loop.
|
||||
*/
|
||||
break;
|
||||
}
|
||||
do {
|
||||
ch = *pSrc;
|
||||
if(ch <= 0x7f){
|
||||
*pDest++=(UChar)ch;
|
||||
++pSrc;
|
||||
} else {
|
||||
if(ch >= 0xe0) {
|
||||
if( /* handle U+0000..U+FFFF inline */
|
||||
ch <= 0xef &&
|
||||
(t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f &&
|
||||
(t2 = (uint8_t)(pSrc[2] - 0x80)) <= 0x3f
|
||||
) {
|
||||
/* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */
|
||||
*pDest++ = (UChar)((ch << 12) | (t1 << 6) | t2);
|
||||
pSrc += 3;
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
if( /* handle U+0000..U+07FF inline */
|
||||
ch >= 0xc0 &&
|
||||
(t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f
|
||||
) {
|
||||
*pDest++ = (UChar)(((ch & 0x1f) << 6) | t1);
|
||||
pSrc += 2;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if(subchar < 0) {
|
||||
*pErrorCode = U_INVALID_CHAR_FOUND;
|
||||
return NULL;
|
||||
} else if(subchar > 0xffff && --count == 0) {
|
||||
/*
|
||||
* We need to write two UChars, adjusted count for that,
|
||||
* and ran out of space.
|
||||
*/
|
||||
break;
|
||||
} else {
|
||||
/* function call for error cases */
|
||||
++pSrc; /* continue after the lead byte */
|
||||
utf8_nextCharSafeBodyPointer(&pSrc, pSrcLimit, ch);
|
||||
++numSubstitutions;
|
||||
if(subchar<=0xFFFF) {
|
||||
*(pDest++)=(UChar)subchar;
|
||||
} else {
|
||||
*(pDest++)=U16_LEAD(subchar);
|
||||
*(pDest++)=U16_TRAIL(subchar);
|
||||
}
|
||||
}
|
||||
}
|
||||
} while(--count > 0);
|
||||
}
|
||||
|
||||
while((pSrc<pSrcLimit) && (pDest<pDestLimit)) {
|
||||
ch = *pSrc;
|
||||
if(ch <= 0x7f){
|
||||
*pDest++=(UChar)ch;
|
||||
++pSrc;
|
||||
} else {
|
||||
if(ch >= 0xe0) {
|
||||
if( /* handle U+0000..U+FFFF inline */
|
||||
ch <= 0xef &&
|
||||
((pSrcLimit - pSrc) >= 3) &&
|
||||
(t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f &&
|
||||
(t2 = (uint8_t)(pSrc[2] - 0x80)) <= 0x3f
|
||||
) {
|
||||
/* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */
|
||||
*pDest++ = (UChar)((ch << 12) | (t1 << 6) | t2);
|
||||
pSrc += 3;
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
if( /* handle U+0000..U+07FF inline */
|
||||
ch >= 0xc0 &&
|
||||
((pSrcLimit - pSrc) >= 2) &&
|
||||
(t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f
|
||||
) {
|
||||
*pDest++ = (UChar)(((ch & 0x1f) << 6) | t1);
|
||||
pSrc += 2;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if(subchar < 0) {
|
||||
*pErrorCode = U_INVALID_CHAR_FOUND;
|
||||
return NULL;
|
||||
} else {
|
||||
/* function call for error cases */
|
||||
++pSrc; /* continue after the lead byte */
|
||||
utf8_nextCharSafeBodyPointer(&pSrc, pSrcLimit, ch);
|
||||
++numSubstitutions;
|
||||
if(subchar<=0xFFFF) {
|
||||
*(pDest++)=(UChar)subchar;
|
||||
} else {
|
||||
*(pDest++)=U16_LEAD(subchar);
|
||||
if(pDest<pDestLimit) {
|
||||
*(pDest++)=U16_TRAIL(subchar);
|
||||
} else {
|
||||
reqLength++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* do not fill the dest buffer just count the UChars needed */
|
||||
while(pSrc < pSrcLimit){
|
||||
ch = *pSrc;
|
||||
if(ch <= 0x7f) {
|
||||
reqLength++;
|
||||
++pSrc;
|
||||
} else {
|
||||
if(ch >= 0xe0) {
|
||||
if( /* handle U+0000..U+FFFF inline */
|
||||
ch <= 0xef &&
|
||||
((pSrcLimit - pSrc) >= 3) &&
|
||||
(uint8_t)(pSrc[1] - 0x80) <= 0x3f &&
|
||||
(uint8_t)(pSrc[2] - 0x80) <= 0x3f
|
||||
) {
|
||||
reqLength++;
|
||||
pSrc += 3;
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
if( /* handle U+0000..U+07FF inline */
|
||||
ch >= 0xc0 &&
|
||||
((pSrcLimit - pSrc) >= 2) &&
|
||||
(uint8_t)(pSrc[1] - 0x80) <= 0x3f
|
||||
) {
|
||||
reqLength++;
|
||||
pSrc += 2;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if(subchar < 0) {
|
||||
*pErrorCode = U_INVALID_CHAR_FOUND;
|
||||
return NULL;
|
||||
} else {
|
||||
/* function call for error cases */
|
||||
++pSrc; /* continue after the lead byte */
|
||||
utf8_nextCharSafeBodyPointer(&pSrc, pSrcLimit, ch);
|
||||
++numSubstitutions;
|
||||
reqLength+=U16_LENGTH(ch);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(pNumSubstitutions!=NULL) {
|
||||
*pNumSubstitutions=numSubstitutions;
|
||||
}
|
||||
|
||||
reqLength+=(int32_t)(pDest - dest);
|
||||
if(pDestLength) {
|
||||
*pDestLength = reqLength;
|
||||
}
|
||||
|
||||
/* Terminate the buffer */
|
||||
u_terminateUChars(dest, destCapacity, reqLength, pErrorCode);
|
||||
return dest;
|
||||
}
|
||||
|
||||
U_CAPI char* U_EXPORT2
|
||||
u_strToJavaModifiedUTF8(
|
||||
char *dest,
|
||||
int32_t destCapacity,
|
||||
int32_t *pDestLength,
|
||||
const UChar *src,
|
||||
int32_t srcLength,
|
||||
UErrorCode *pErrorCode) {
|
||||
int32_t reqLength=0;
|
||||
uint32_t ch=0,ch2=0;
|
||||
uint8_t *pDest = (uint8_t *)dest;
|
||||
uint8_t *pDestLimit = pDest + destCapacity;
|
||||
const UChar *pSrcLimit;
|
||||
int32_t count;
|
||||
|
||||
/* args check */
|
||||
if(U_FAILURE(*pErrorCode)){
|
||||
return NULL;
|
||||
}
|
||||
if( (src==NULL && srcLength!=0) || srcLength < -1 ||
|
||||
(dest==NULL && destCapacity!=0) || destCapacity<0
|
||||
) {
|
||||
*pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if(srcLength==-1) {
|
||||
/* Convert NUL-terminated ASCII, then find the string length. */
|
||||
while((ch=*src)<=0x7f && ch != 0 && pDest<pDestLimit) {
|
||||
*pDest++ = (uint8_t)ch;
|
||||
++src;
|
||||
}
|
||||
if(ch == 0) {
|
||||
reqLength=(int32_t)(pDest - (uint8_t *)dest);
|
||||
if(pDestLength) {
|
||||
*pDestLength = reqLength;
|
||||
}
|
||||
|
||||
/* Terminate the buffer */
|
||||
u_terminateChars(dest, destCapacity, reqLength, pErrorCode);
|
||||
return dest;
|
||||
}
|
||||
srcLength = u_strlen(src);
|
||||
}
|
||||
|
||||
/* Faster loop without ongoing checking for pSrcLimit and pDestLimit. */
|
||||
pSrcLimit = src+srcLength;
|
||||
for(;;) {
|
||||
count = (int32_t)(pDestLimit - pDest);
|
||||
srcLength = (int32_t)(pSrcLimit - src);
|
||||
if(count >= srcLength && srcLength > 0 && *src <= 0x7f) {
|
||||
/* fast ASCII loop */
|
||||
const UChar *prevSrc = src;
|
||||
int32_t delta;
|
||||
while(src < pSrcLimit && (ch = *src) <= 0x7f && ch != 0) {
|
||||
*pDest++=(uint8_t)ch;
|
||||
++src;
|
||||
}
|
||||
delta = (int32_t)(src - prevSrc);
|
||||
count -= delta;
|
||||
srcLength -= delta;
|
||||
}
|
||||
/*
|
||||
* Each iteration of the inner loop progresses by at most 3 UTF-8
|
||||
* bytes and one UChar.
|
||||
*/
|
||||
count /= 3;
|
||||
if(count > srcLength) {
|
||||
count = srcLength; /* min(remaining dest/3, remaining src) */
|
||||
}
|
||||
if(count < 3) {
|
||||
/*
|
||||
* Too much overhead if we get near the end of the string,
|
||||
* continue with the next loop.
|
||||
*/
|
||||
break;
|
||||
}
|
||||
do {
|
||||
ch=*src++;
|
||||
if(ch <= 0x7f && ch != 0) {
|
||||
*pDest++ = (uint8_t)ch;
|
||||
} else if(ch <= 0x7ff) {
|
||||
*pDest++=(uint8_t)((ch>>6)|0xc0);
|
||||
*pDest++=(uint8_t)((ch&0x3f)|0x80);
|
||||
} else {
|
||||
*pDest++=(uint8_t)((ch>>12)|0xe0);
|
||||
*pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80);
|
||||
*pDest++=(uint8_t)((ch&0x3f)|0x80);
|
||||
}
|
||||
} while(--count > 0);
|
||||
}
|
||||
|
||||
while(src<pSrcLimit) {
|
||||
ch=*src++;
|
||||
if(ch <= 0x7f && ch != 0) {
|
||||
if(pDest<pDestLimit) {
|
||||
*pDest++ = (uint8_t)ch;
|
||||
} else {
|
||||
reqLength = 1;
|
||||
break;
|
||||
}
|
||||
} else if(ch <= 0x7ff) {
|
||||
if((pDestLimit - pDest) >= 2) {
|
||||
*pDest++=(uint8_t)((ch>>6)|0xc0);
|
||||
*pDest++=(uint8_t)((ch&0x3f)|0x80);
|
||||
} else {
|
||||
reqLength = 2;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
if((pDestLimit - pDest) >= 3) {
|
||||
*pDest++=(uint8_t)((ch>>12)|0xe0);
|
||||
*pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80);
|
||||
*pDest++=(uint8_t)((ch&0x3f)|0x80);
|
||||
} else {
|
||||
reqLength = 3;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
while(src<pSrcLimit) {
|
||||
ch=*src++;
|
||||
if(ch <= 0x7f && ch != 0) {
|
||||
++reqLength;
|
||||
} else if(ch<=0x7ff) {
|
||||
reqLength+=2;
|
||||
} else {
|
||||
reqLength+=3;
|
||||
}
|
||||
}
|
||||
|
||||
reqLength+=(int32_t)(pDest - (uint8_t *)dest);
|
||||
if(pDestLength){
|
||||
*pDestLength = reqLength;
|
||||
}
|
||||
|
||||
/* Terminate the buffer */
|
||||
u_terminateChars(dest, destCapacity, reqLength, pErrorCode);
|
||||
return dest;
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
/********************************************************************
|
||||
* COPYRIGHT:
|
||||
* Copyright (c) 2001-2009, International Business Machines Corporation and
|
||||
* Copyright (c) 2001-2010, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
********************************************************************/
|
||||
/********************************************************************************
|
||||
@ -17,6 +17,7 @@
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "unicode/ures.h"
|
||||
@ -40,6 +41,8 @@ static void Test_FromUTF8Lenient(void);
|
||||
static void Test_UChar_WCHART_API(void);
|
||||
static void Test_widestrs(void);
|
||||
static void Test_WCHART_LongString(void);
|
||||
static void Test_strToJavaModifiedUTF8(void);
|
||||
static void Test_strFromJavaModifiedUTF8(void);
|
||||
|
||||
void
|
||||
addUCharTransformTest(TestNode** root)
|
||||
@ -56,6 +59,8 @@ addUCharTransformTest(TestNode** root)
|
||||
#if !UCONFIG_NO_FILE_IO
|
||||
addTest(root, &Test_WCHART_LongString, "custrtrn/Test_WCHART_LongString");
|
||||
#endif
|
||||
addTest(root, &Test_strToJavaModifiedUTF8, "custrtrn/Test_strToJavaModifiedUTF8");
|
||||
addTest(root, &Test_strFromJavaModifiedUTF8, "custrtrn/Test_strFromJavaModifiedUTF8");
|
||||
}
|
||||
|
||||
static const UChar32 src32[]={
|
||||
@ -1473,3 +1478,480 @@ Test_WCHART_LongString(){
|
||||
#endif
|
||||
}
|
||||
|
||||
static void Test_strToJavaModifiedUTF8() {
|
||||
static const UChar src[]={
|
||||
0x61, 0x62, 0x63, 0xe1, 0xe2, 0xe3,
|
||||
0xe01, 0xe02, 0xe03, 0xe001, 0xe002, 0xe003,
|
||||
0xd800, 0xdc00, 0xdc00, 0xd800, 0,
|
||||
0xdbff, 0xdfff,
|
||||
0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0xed, 0xe0e, 0x6f
|
||||
};
|
||||
static const uint8_t expected[]={
|
||||
0x61, 0x62, 0x63, 0xc3, 0xa1, 0xc3, 0xa2, 0xc3, 0xa3,
|
||||
0xe0, 0xb8, 0x81, 0xe0, 0xb8, 0x82, 0xe0, 0xb8, 0x83,
|
||||
0xee, 0x80, 0x81, 0xee, 0x80, 0x82, 0xee, 0x80, 0x83,
|
||||
0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80, 0xed, 0xb0, 0x80, 0xed, 0xa0, 0x80, 0xc0, 0x80,
|
||||
0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf,
|
||||
0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0xc3, 0xad, 0xe0, 0xb8, 0x8e, 0x6f
|
||||
};
|
||||
static const UChar shortSrc[]={
|
||||
0xe01, 0xe1, 0x61
|
||||
};
|
||||
static const uint8_t shortExpected[]={
|
||||
0xe0, 0xb8, 0x81, 0xc3, 0xa1, 0x61
|
||||
};
|
||||
static const UChar asciiNul[]={
|
||||
0x61, 0x62, 0x63, 0
|
||||
};
|
||||
static const uint8_t asciiNulExpected[]={
|
||||
0x61, 0x62, 0x63
|
||||
};
|
||||
char dest[200];
|
||||
char *p;
|
||||
int32_t length, expectedTerminatedLength;
|
||||
UErrorCode errorCode;
|
||||
|
||||
expectedTerminatedLength=(int32_t)(strstr((const char *)expected, "\xc0\x80")-
|
||||
(const char *)expected);
|
||||
|
||||
errorCode=U_ZERO_ERROR;
|
||||
length=-5;
|
||||
p=u_strToJavaModifiedUTF8(dest, (int32_t)sizeof(dest), &length,
|
||||
src, LENGTHOF(src), &errorCode);
|
||||
if( U_FAILURE(errorCode) || p!=dest ||
|
||||
length!=LENGTHOF(expected) || 0!=memcmp(dest, expected, length) ||
|
||||
dest[length]!=0
|
||||
) {
|
||||
log_err("u_strToJavaModifiedUTF8(normal) failed - %s\n", u_errorName(errorCode));
|
||||
}
|
||||
memset(dest, 0xff, sizeof(dest));
|
||||
errorCode=U_ZERO_ERROR;
|
||||
length=-5;
|
||||
p=u_strToJavaModifiedUTF8(dest, (int32_t)sizeof(dest), NULL,
|
||||
src, LENGTHOF(src), &errorCode);
|
||||
if( U_FAILURE(errorCode) || p!=dest ||
|
||||
0!=memcmp(dest, expected, LENGTHOF(expected)) ||
|
||||
dest[LENGTHOF(expected)]!=0
|
||||
) {
|
||||
log_err("u_strToJavaModifiedUTF8(normal, pLength=NULL) failed - %s\n", u_errorName(errorCode));
|
||||
}
|
||||
memset(dest, 0xff, sizeof(dest));
|
||||
errorCode=U_ZERO_ERROR;
|
||||
length=-5;
|
||||
p=u_strToJavaModifiedUTF8(dest, LENGTHOF(expected), &length,
|
||||
src, LENGTHOF(src), &errorCode);
|
||||
if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || p!=dest ||
|
||||
length!=LENGTHOF(expected) || 0!=memcmp(dest, expected, length) ||
|
||||
dest[length]!=(char)0xff
|
||||
) {
|
||||
log_err("u_strToJavaModifiedUTF8(tight) failed - %s\n", u_errorName(errorCode));
|
||||
}
|
||||
memset(dest, 0xff, sizeof(dest));
|
||||
errorCode=U_ZERO_ERROR;
|
||||
length=-5;
|
||||
p=u_strToJavaModifiedUTF8(dest, (int32_t)sizeof(dest), &length, src, -1, &errorCode);
|
||||
if( U_FAILURE(errorCode) || p!=dest ||
|
||||
length!=expectedTerminatedLength || 0!=memcmp(dest, expected, length) ||
|
||||
dest[length]!=0
|
||||
) {
|
||||
log_err("u_strToJavaModifiedUTF8(NUL-terminated) failed - %s\n", u_errorName(errorCode));
|
||||
}
|
||||
memset(dest, 0xff, sizeof(dest));
|
||||
errorCode=U_ZERO_ERROR;
|
||||
length=-5;
|
||||
p=u_strToJavaModifiedUTF8(dest, (int32_t)sizeof(dest), NULL, src, -1, &errorCode);
|
||||
if( U_FAILURE(errorCode) || p!=dest ||
|
||||
0!=memcmp(dest, expected, expectedTerminatedLength) ||
|
||||
dest[expectedTerminatedLength]!=0
|
||||
) {
|
||||
log_err("u_strToJavaModifiedUTF8(NUL-terminated, pLength=NULL) failed - %s\n", u_errorName(errorCode));
|
||||
}
|
||||
memset(dest, 0xff, sizeof(dest));
|
||||
errorCode=U_ZERO_ERROR;
|
||||
length=-5;
|
||||
p=u_strToJavaModifiedUTF8(dest, LENGTHOF(expected)/2, &length,
|
||||
src, LENGTHOF(src), &errorCode);
|
||||
if( errorCode!=U_BUFFER_OVERFLOW_ERROR ||
|
||||
length!=LENGTHOF(expected) || dest[LENGTHOF(expected)/2]!=(char)0xff
|
||||
) {
|
||||
log_err("u_strToJavaModifiedUTF8(overflow) failed - %s\n", u_errorName(errorCode));
|
||||
}
|
||||
memset(dest, 0xff, sizeof(dest));
|
||||
errorCode=U_ZERO_ERROR;
|
||||
length=-5;
|
||||
p=u_strToJavaModifiedUTF8(NULL, 0, &length,
|
||||
src, LENGTHOF(src), &errorCode);
|
||||
if( errorCode!=U_BUFFER_OVERFLOW_ERROR ||
|
||||
length!=LENGTHOF(expected) || dest[0]!=(char)0xff
|
||||
) {
|
||||
log_err("u_strToJavaModifiedUTF8(pure preflighting) failed - %s\n", u_errorName(errorCode));
|
||||
}
|
||||
memset(dest, 0xff, sizeof(dest));
|
||||
errorCode=U_ZERO_ERROR;
|
||||
length=-5;
|
||||
p=u_strToJavaModifiedUTF8(dest, (int32_t)sizeof(dest), &length,
|
||||
shortSrc, LENGTHOF(shortSrc), &errorCode);
|
||||
if( U_FAILURE(errorCode) || p!=dest ||
|
||||
length!=LENGTHOF(shortExpected) || 0!=memcmp(dest, shortExpected, length) ||
|
||||
dest[length]!=0
|
||||
) {
|
||||
log_err("u_strToJavaModifiedUTF8(short) failed - %s\n", u_errorName(errorCode));
|
||||
}
|
||||
memset(dest, 0xff, sizeof(dest));
|
||||
errorCode=U_ZERO_ERROR;
|
||||
length=-5;
|
||||
p=u_strToJavaModifiedUTF8(dest, (int32_t)sizeof(dest), &length,
|
||||
asciiNul, -1, &errorCode);
|
||||
if( U_FAILURE(errorCode) || p!=dest ||
|
||||
length!=LENGTHOF(asciiNulExpected) || 0!=memcmp(dest, asciiNulExpected, length) ||
|
||||
dest[length]!=0
|
||||
) {
|
||||
log_err("u_strToJavaModifiedUTF8(asciiNul) failed - %s\n", u_errorName(errorCode));
|
||||
}
|
||||
memset(dest, 0xff, sizeof(dest));
|
||||
errorCode=U_ZERO_ERROR;
|
||||
length=-5;
|
||||
p=u_strToJavaModifiedUTF8(dest, (int32_t)sizeof(dest), &length,
|
||||
NULL, 0, &errorCode);
|
||||
if( U_FAILURE(errorCode) || p!=dest ||
|
||||
length!=0 || dest[0]!=0
|
||||
) {
|
||||
log_err("u_strToJavaModifiedUTF8(empty) failed - %s\n", u_errorName(errorCode));
|
||||
}
|
||||
|
||||
/* illegal arguments */
|
||||
memset(dest, 0xff, sizeof(dest));
|
||||
errorCode=U_ZERO_ERROR;
|
||||
length=-5;
|
||||
p=u_strToJavaModifiedUTF8(NULL, sizeof(dest), &length,
|
||||
src, LENGTHOF(src), &errorCode);
|
||||
if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=(char)0xff) {
|
||||
log_err("u_strToJavaModifiedUTF8(dest=NULL) failed - %s\n", u_errorName(errorCode));
|
||||
}
|
||||
memset(dest, 0xff, sizeof(dest));
|
||||
errorCode=U_ZERO_ERROR;
|
||||
length=-5;
|
||||
p=u_strToJavaModifiedUTF8(dest, -1, &length,
|
||||
src, LENGTHOF(src), &errorCode);
|
||||
if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=(char)0xff) {
|
||||
log_err("u_strToJavaModifiedUTF8(destCapacity<0) failed - %s\n", u_errorName(errorCode));
|
||||
}
|
||||
memset(dest, 0xff, sizeof(dest));
|
||||
errorCode=U_ZERO_ERROR;
|
||||
length=-5;
|
||||
p=u_strToJavaModifiedUTF8(dest, sizeof(dest), &length,
|
||||
NULL, LENGTHOF(src), &errorCode);
|
||||
if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=(char)0xff) {
|
||||
log_err("u_strToJavaModifiedUTF8(src=NULL) failed - %s\n", u_errorName(errorCode));
|
||||
}
|
||||
memset(dest, 0xff, sizeof(dest));
|
||||
errorCode=U_ZERO_ERROR;
|
||||
length=-5;
|
||||
p=u_strToJavaModifiedUTF8(dest, sizeof(dest), &length,
|
||||
NULL, -1, &errorCode);
|
||||
if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=(char)0xff) {
|
||||
log_err("u_strToJavaModifiedUTF8(src=NULL, srcLength<0) failed - %s\n", u_errorName(errorCode));
|
||||
}
|
||||
}
|
||||
|
||||
static void Test_strFromJavaModifiedUTF8() {
|
||||
static const uint8_t src[]={
|
||||
0x61, 0x62, 0x63, 0xc3, 0xa1, 0xc3, 0xa2, 0xc3, 0xa3,
|
||||
0xe0, 0xb8, 0x81, 0xe0, 0xb8, 0x82, 0xe0, 0xb8, 0x83,
|
||||
0xee, 0x80, 0x81, 0xee, 0x80, 0x82, 0xee, 0x80, 0x83,
|
||||
0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80, 0xed, 0xb0, 0x80, 0xed, 0xa0, 0x80, 0,
|
||||
0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf,
|
||||
0x81, 0xc0, 0xe0, 0xb8, 0xf0, 0x90, 0x80, 0x80, /* invalid sequences */
|
||||
0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b,
|
||||
0xe0, 0x81, 0xac, 0xe0, 0x83, 0xad, /* non-shortest forms are allowed */
|
||||
0xe0, 0xb8, 0x8e, 0x6f
|
||||
};
|
||||
static const UChar expected[]={
|
||||
0x61, 0x62, 0x63, 0xe1, 0xe2, 0xe3,
|
||||
0xe01, 0xe02, 0xe03, 0xe001, 0xe002, 0xe003,
|
||||
0xd800, 0xdc00, 0xdc00, 0xd800, 0,
|
||||
0xdbff, 0xdfff,
|
||||
0xfffd, 0xfffd, 0xfffd, 0xfffd,
|
||||
0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b,
|
||||
0x6c, 0xed,
|
||||
0xe0e, 0x6f
|
||||
};
|
||||
static const uint8_t shortSrc[]={
|
||||
0xe0, 0xb8, 0x81, 0xc3, 0xa1, 0x61
|
||||
};
|
||||
static const UChar shortExpected[]={
|
||||
0xe01, 0xe1, 0x61
|
||||
};
|
||||
static const uint8_t asciiNul[]={
|
||||
0x61, 0x62, 0x63, 0
|
||||
};
|
||||
static const UChar asciiNulExpected[]={
|
||||
0x61, 0x62, 0x63
|
||||
};
|
||||
static const uint8_t invalid[]={
|
||||
0x81, 0xc0, 0xe0, 0xb8, 0xf0, 0x90, 0x80, 0x80
|
||||
};
|
||||
static const UChar invalidExpectedFFFD[]={
|
||||
0xfffd, 0xfffd, 0xfffd, 0xfffd
|
||||
};
|
||||
static const UChar invalidExpected50000[]={
|
||||
0xd900, 0xdc00, 0xd900, 0xdc00, 0xd900, 0xdc00, 0xd900, 0xdc00
|
||||
};
|
||||
UChar dest[200];
|
||||
UChar *p;
|
||||
int32_t length, expectedTerminatedLength;
|
||||
int32_t numSubstitutions;
|
||||
UErrorCode errorCode;
|
||||
|
||||
expectedTerminatedLength=(int32_t)(u_strchr(expected, 0)-expected);
|
||||
|
||||
errorCode=U_ZERO_ERROR;
|
||||
length=numSubstitutions=-5;
|
||||
p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
|
||||
(const char *)src, LENGTHOF(src),
|
||||
0xfffd, &numSubstitutions, &errorCode);
|
||||
if( U_FAILURE(errorCode) || p!=dest ||
|
||||
length!=LENGTHOF(expected) || 0!=memcmp(dest, expected, length) ||
|
||||
dest[length]!=0 ||
|
||||
numSubstitutions!=LENGTHOF(invalidExpectedFFFD)
|
||||
) {
|
||||
log_err("u_strFromJavaModifiedUTF8WithSub(normal) failed - %s\n", u_errorName(errorCode));
|
||||
}
|
||||
memset(dest, 0xff, sizeof(dest));
|
||||
errorCode=U_ZERO_ERROR;
|
||||
length=numSubstitutions=-5;
|
||||
p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), NULL,
|
||||
(const char *)src, LENGTHOF(src),
|
||||
0xfffd, &numSubstitutions, &errorCode);
|
||||
if( U_FAILURE(errorCode) || p!=dest ||
|
||||
0!=memcmp(dest, expected, LENGTHOF(expected)) ||
|
||||
dest[LENGTHOF(expected)]!=0 ||
|
||||
numSubstitutions!=LENGTHOF(invalidExpectedFFFD)
|
||||
) {
|
||||
log_err("u_strFromJavaModifiedUTF8WithSub(normal, pLength=NULL) failed - %s\n", u_errorName(errorCode));
|
||||
}
|
||||
memset(dest, 0xff, sizeof(dest));
|
||||
errorCode=U_ZERO_ERROR;
|
||||
length=numSubstitutions=-5;
|
||||
p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
|
||||
(const char *)src, LENGTHOF(src),
|
||||
0xfffd, NULL, &errorCode);
|
||||
if( U_FAILURE(errorCode) || p!=dest ||
|
||||
length!=LENGTHOF(expected) || 0!=memcmp(dest, expected, length) ||
|
||||
dest[length]!=0
|
||||
) {
|
||||
log_err("u_strFromJavaModifiedUTF8WithSub(normal, pNumSubstitutions=NULL) failed - %s\n", u_errorName(errorCode));
|
||||
}
|
||||
memset(dest, 0xff, sizeof(dest));
|
||||
errorCode=U_ZERO_ERROR;
|
||||
length=numSubstitutions=-5;
|
||||
p=u_strFromJavaModifiedUTF8WithSub(dest, LENGTHOF(expected), &length,
|
||||
(const char *)src, LENGTHOF(src),
|
||||
0xfffd, &numSubstitutions, &errorCode);
|
||||
if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || p!=dest ||
|
||||
length!=LENGTHOF(expected) || 0!=memcmp(dest, expected, length) ||
|
||||
dest[length]!=0xffff ||
|
||||
numSubstitutions!=LENGTHOF(invalidExpectedFFFD)
|
||||
) {
|
||||
log_err("u_strFromJavaModifiedUTF8WithSub(tight) failed - %s\n", u_errorName(errorCode));
|
||||
}
|
||||
memset(dest, 0xff, sizeof(dest));
|
||||
errorCode=U_ZERO_ERROR;
|
||||
length=numSubstitutions=-5;
|
||||
p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
|
||||
(const char *)src, -1,
|
||||
0xfffd, &numSubstitutions, &errorCode);
|
||||
if( U_FAILURE(errorCode) || p!=dest ||
|
||||
length!=expectedTerminatedLength || 0!=memcmp(dest, expected, length) ||
|
||||
dest[length]!=0 ||
|
||||
numSubstitutions!=0
|
||||
) {
|
||||
log_err("u_strFromJavaModifiedUTF8WithSub(NUL-terminated) failed - %s\n", u_errorName(errorCode));
|
||||
}
|
||||
memset(dest, 0xff, sizeof(dest));
|
||||
errorCode=U_ZERO_ERROR;
|
||||
length=numSubstitutions=-5;
|
||||
p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), NULL,
|
||||
(const char *)src, -1,
|
||||
0xfffd, &numSubstitutions, &errorCode);
|
||||
if( U_FAILURE(errorCode) || p!=dest ||
|
||||
0!=memcmp(dest, expected, expectedTerminatedLength) ||
|
||||
dest[expectedTerminatedLength]!=0 ||
|
||||
numSubstitutions!=0
|
||||
) {
|
||||
log_err("u_strFromJavaModifiedUTF8WithSub(NUL-terminated, pLength=NULL) failed - %s\n", u_errorName(errorCode));
|
||||
}
|
||||
memset(dest, 0xff, sizeof(dest));
|
||||
errorCode=U_ZERO_ERROR;
|
||||
length=numSubstitutions=-5;
|
||||
p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
|
||||
(const char *)src, -1,
|
||||
0xfffd, NULL, &errorCode);
|
||||
if( U_FAILURE(errorCode) || p!=dest ||
|
||||
length!=expectedTerminatedLength || 0!=memcmp(dest, expected, length) ||
|
||||
dest[length]!=0
|
||||
) {
|
||||
log_err("u_strFromJavaModifiedUTF8WithSub(NUL-terminated, pNumSubstitutions=NULL) failed - %s\n", u_errorName(errorCode));
|
||||
}
|
||||
memset(dest, 0xff, sizeof(dest));
|
||||
errorCode=U_ZERO_ERROR;
|
||||
length=numSubstitutions=-5;
|
||||
p=u_strFromJavaModifiedUTF8WithSub(dest, LENGTHOF(expected)/2, &length,
|
||||
(const char *)src, LENGTHOF(src),
|
||||
0xfffd, &numSubstitutions, &errorCode);
|
||||
if( errorCode!=U_BUFFER_OVERFLOW_ERROR ||
|
||||
length!=LENGTHOF(expected) || dest[LENGTHOF(expected)/2]!=0xffff
|
||||
) {
|
||||
log_err("u_strFromJavaModifiedUTF8WithSub(overflow) failed - %s\n", u_errorName(errorCode));
|
||||
}
|
||||
memset(dest, 0xff, sizeof(dest));
|
||||
errorCode=U_ZERO_ERROR;
|
||||
length=numSubstitutions=-5;
|
||||
p=u_strFromJavaModifiedUTF8WithSub(NULL, 0, &length,
|
||||
(const char *)src, LENGTHOF(src),
|
||||
0xfffd, &numSubstitutions, &errorCode);
|
||||
if( errorCode!=U_BUFFER_OVERFLOW_ERROR ||
|
||||
length!=LENGTHOF(expected) || dest[0]!=0xffff
|
||||
) {
|
||||
log_err("u_strFromJavaModifiedUTF8WithSub(pure preflighting) failed - %s\n", u_errorName(errorCode));
|
||||
}
|
||||
memset(dest, 0xff, sizeof(dest));
|
||||
errorCode=U_ZERO_ERROR;
|
||||
length=numSubstitutions=-5;
|
||||
p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
|
||||
(const char *)shortSrc, LENGTHOF(shortSrc),
|
||||
0xfffd, &numSubstitutions, &errorCode);
|
||||
if( U_FAILURE(errorCode) || p!=dest ||
|
||||
length!=LENGTHOF(shortExpected) || 0!=memcmp(dest, shortExpected, length) ||
|
||||
dest[length]!=0 ||
|
||||
numSubstitutions!=0
|
||||
) {
|
||||
log_err("u_strFromJavaModifiedUTF8WithSub(short) failed - %s\n", u_errorName(errorCode));
|
||||
}
|
||||
memset(dest, 0xff, sizeof(dest));
|
||||
errorCode=U_ZERO_ERROR;
|
||||
length=numSubstitutions=-5;
|
||||
p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
|
||||
(const char *)asciiNul, -1,
|
||||
0xfffd, &numSubstitutions, &errorCode);
|
||||
if( U_FAILURE(errorCode) || p!=dest ||
|
||||
length!=LENGTHOF(asciiNulExpected) || 0!=memcmp(dest, asciiNulExpected, length) ||
|
||||
dest[length]!=0 ||
|
||||
numSubstitutions!=0
|
||||
) {
|
||||
log_err("u_strFromJavaModifiedUTF8WithSub(asciiNul) failed - %s\n", u_errorName(errorCode));
|
||||
}
|
||||
memset(dest, 0xff, sizeof(dest));
|
||||
errorCode=U_ZERO_ERROR;
|
||||
length=numSubstitutions=-5;
|
||||
p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
|
||||
NULL, 0, 0xfffd, &numSubstitutions, &errorCode);
|
||||
if( U_FAILURE(errorCode) || p!=dest ||
|
||||
length!=0 || dest[0]!=0 ||
|
||||
numSubstitutions!=0
|
||||
) {
|
||||
log_err("u_strFromJavaModifiedUTF8WithSub(empty) failed - %s\n", u_errorName(errorCode));
|
||||
}
|
||||
memset(dest, 0xff, sizeof(dest));
|
||||
errorCode=U_ZERO_ERROR;
|
||||
length=numSubstitutions=-5;
|
||||
p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
|
||||
(const char *)invalid, LENGTHOF(invalid),
|
||||
0xfffd, &numSubstitutions, &errorCode);
|
||||
if( U_FAILURE(errorCode) || p!=dest ||
|
||||
length!=LENGTHOF(invalidExpectedFFFD) || 0!=memcmp(dest, invalidExpectedFFFD, length) ||
|
||||
dest[length]!=0 ||
|
||||
numSubstitutions!=LENGTHOF(invalidExpectedFFFD)
|
||||
) {
|
||||
log_err("u_strFromJavaModifiedUTF8WithSub(invalid->fffd) failed - %s\n", u_errorName(errorCode));
|
||||
}
|
||||
memset(dest, 0xff, sizeof(dest));
|
||||
errorCode=U_ZERO_ERROR;
|
||||
length=numSubstitutions=-5;
|
||||
p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
|
||||
(const char *)invalid, LENGTHOF(invalid),
|
||||
0x50000, &numSubstitutions, &errorCode);
|
||||
if( U_FAILURE(errorCode) || p!=dest ||
|
||||
length!=LENGTHOF(invalidExpected50000) || 0!=memcmp(dest, invalidExpected50000, length) ||
|
||||
dest[length]!=0 ||
|
||||
numSubstitutions!=LENGTHOF(invalidExpectedFFFD) /* not ...50000 */
|
||||
) {
|
||||
log_err("u_strFromJavaModifiedUTF8WithSub(invalid->50000) failed - %s\n", u_errorName(errorCode));
|
||||
}
|
||||
memset(dest, 0xff, sizeof(dest));
|
||||
errorCode=U_ZERO_ERROR;
|
||||
length=numSubstitutions=-5;
|
||||
p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
|
||||
(const char *)invalid, LENGTHOF(invalid),
|
||||
U_SENTINEL, &numSubstitutions, &errorCode);
|
||||
if(errorCode!=U_INVALID_CHAR_FOUND || dest[0]!=0xffff || numSubstitutions!=0) {
|
||||
log_err("u_strFromJavaModifiedUTF8WithSub(invalid->error) failed - %s\n", u_errorName(errorCode));
|
||||
}
|
||||
memset(dest, 0xff, sizeof(dest));
|
||||
errorCode=U_ZERO_ERROR;
|
||||
length=numSubstitutions=-5;
|
||||
p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
|
||||
(const char *)src, LENGTHOF(src),
|
||||
U_SENTINEL, &numSubstitutions, &errorCode);
|
||||
if( errorCode!=U_INVALID_CHAR_FOUND ||
|
||||
length>=LENGTHOF(expected) || dest[LENGTHOF(expected)-1]!=0xffff ||
|
||||
numSubstitutions!=0
|
||||
) {
|
||||
log_err("u_strFromJavaModifiedUTF8WithSub(normal->error) failed - %s\n", u_errorName(errorCode));
|
||||
}
|
||||
|
||||
/* illegal arguments */
|
||||
memset(dest, 0xff, sizeof(dest));
|
||||
errorCode=U_ZERO_ERROR;
|
||||
length=numSubstitutions=-5;
|
||||
p=u_strFromJavaModifiedUTF8WithSub(NULL, sizeof(dest), &length,
|
||||
(const char *)src, LENGTHOF(src),
|
||||
0xfffd, &numSubstitutions, &errorCode);
|
||||
if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0xffff) {
|
||||
log_err("u_strFromJavaModifiedUTF8WithSub(dest=NULL) failed - %s\n", u_errorName(errorCode));
|
||||
}
|
||||
memset(dest, 0xff, sizeof(dest));
|
||||
errorCode=U_ZERO_ERROR;
|
||||
length=numSubstitutions=-5;
|
||||
p=u_strFromJavaModifiedUTF8WithSub(dest, -1, &length,
|
||||
(const char *)src, LENGTHOF(src),
|
||||
0xfffd, &numSubstitutions, &errorCode);
|
||||
if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0xffff) {
|
||||
log_err("u_strFromJavaModifiedUTF8WithSub(destCapacity<0) failed - %s\n", u_errorName(errorCode));
|
||||
}
|
||||
memset(dest, 0xff, sizeof(dest));
|
||||
errorCode=U_ZERO_ERROR;
|
||||
length=numSubstitutions=-5;
|
||||
p=u_strFromJavaModifiedUTF8WithSub(dest, sizeof(dest), &length,
|
||||
NULL, LENGTHOF(src),
|
||||
0xfffd, &numSubstitutions, &errorCode);
|
||||
if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0xffff) {
|
||||
log_err("u_strFromJavaModifiedUTF8WithSub(src=NULL) failed - %s\n", u_errorName(errorCode));
|
||||
}
|
||||
memset(dest, 0xff, sizeof(dest));
|
||||
errorCode=U_ZERO_ERROR;
|
||||
length=numSubstitutions=-5;
|
||||
p=u_strFromJavaModifiedUTF8WithSub(dest, sizeof(dest), &length,
|
||||
NULL, -1, 0xfffd, &numSubstitutions, &errorCode);
|
||||
if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0xffff) {
|
||||
log_err("u_strFromJavaModifiedUTF8WithSub(src=NULL, srcLength<0) failed - %s\n", u_errorName(errorCode));
|
||||
}
|
||||
memset(dest, 0xff, sizeof(dest));
|
||||
errorCode=U_ZERO_ERROR;
|
||||
length=numSubstitutions=-5;
|
||||
p=u_strFromJavaModifiedUTF8WithSub(dest, sizeof(dest), &length,
|
||||
(const char *)src, LENGTHOF(src),
|
||||
0x110000, &numSubstitutions, &errorCode);
|
||||
if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0xffff) {
|
||||
log_err("u_strFromJavaModifiedUTF8WithSub(subchar=U_SENTINEL) failed - %s\n", u_errorName(errorCode));
|
||||
}
|
||||
memset(dest, 0xff, sizeof(dest));
|
||||
errorCode=U_ZERO_ERROR;
|
||||
length=numSubstitutions=-5;
|
||||
p=u_strFromJavaModifiedUTF8WithSub(dest, sizeof(dest), &length,
|
||||
(const char *)src, LENGTHOF(src),
|
||||
0xdfff, &numSubstitutions, &errorCode);
|
||||
if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0xffff) {
|
||||
log_err("u_strFromJavaModifiedUTF8WithSub(subchar is surrogate) failed - %s\n", u_errorName(errorCode));
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user