ICU-5230 faster u_strToUTF8() and u_strFromUTF8() functions, added ...WithSub() variants, added u_strFromUTF8Lenient()

X-SVN-Rev: 19723
This commit is contained in:
Markus Scherer 2006-06-15 19:22:04 +00:00
parent 154314acd1
commit 1d687cde69
3 changed files with 1397 additions and 86 deletions

View File

@ -1230,6 +1230,8 @@ u_strFromWCS(UChar *dest,
* which must not indicate a failure before the function call.
* @return The pointer to destination buffer.
* @stable ICU 2.0
* @see u_strToUTF8WithSub
* @see u_strFromUTF8
*/
U_STABLE char* U_EXPORT2
u_strToUTF8(char *dest,
@ -1257,6 +1259,8 @@ u_strToUTF8(char *dest,
* which must not indicate a failure before the function call.
* @return The pointer to destination buffer.
* @stable ICU 2.0
* @see u_strFromUTF8WithSub
* @see u_strFromUTF8Lenient
*/
U_STABLE UChar* U_EXPORT2
u_strFromUTF8(UChar *dest,
@ -1266,6 +1270,148 @@ u_strFromUTF8(UChar *dest,
int32_t srcLength,
UErrorCode *pErrorCode);
/**
* Converts a sequence of UChars (UTF-16) to UTF-8 bytes.
* Same as u_strToUTF8() except for the additional subchar which is output for
* illegal input sequences, instead of stopping with the U_INVALID_CHAR_FOUND error code.
* With subchar==U_SENTINEL, this function behaves exactly like u_strToUTF8().
*
* @param dest A buffer for the result string. The result will be zero-terminated if
* the buffer is large enough.
* @param destCapacity The size of the buffer (number of chars). If it is 0, then
* dest may be NULL and the function will only return the length of the
* result without writing any of the result string (pre-flighting).
* @param pDestLength A pointer to receive the number of units written to the destination. If
* pDestLength!=NULL then *pDestLength is always set to the
* number of output units corresponding to the transformation of
* all the input units, even in case of a buffer overflow.
* @param src The original source string
* @param srcLength The length of the original string. If -1, then src must be zero-terminated.
* @param subchar The substitution character to use in place of an illegal input sequence,
* or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead.
* A substitution character can be any valid Unicode code point (up to U+10FFFF)
* except for surrogate code points (U+D800..U+DFFF).
* The recommended value is U+FFFD "REPLACEMENT CHARACTER".
* @param pNumSubstitutions Output parameter receiving the number of substitutions if subchar>=0.
* Set to 0 if no substitutions occur or subchar<0.
* pNumSubstitutions can be NULL.
* @param pErrorCode Pointer to a standard ICU error code. Its input value must
* pass the U_SUCCESS() test, or else the function returns
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @return The pointer to destination buffer.
* @see u_strToUTF8
* @see u_strFromUTF8WithSub
* @draft ICU 3.6
*/
U_DRAFT char* U_EXPORT2
u_strToUTF8WithSub(char *dest,
int32_t destCapacity,
int32_t *pDestLength,
const UChar *src,
int32_t srcLength,
UChar32 subchar, int32_t *pNumSubstitutions,
UErrorCode *pErrorCode);
/**
* Converts a sequence of UTF-8 bytes to UChars (UTF-16).
* Same as u_strFromUTF8() except for the additional subchar which is output for
* illegal input sequences, instead of stopping with the U_INVALID_CHAR_FOUND error code.
* With subchar==U_SENTINEL, this function behaves exactly like u_strFromUTF8().
*
* @param dest A buffer for the result string. The result will be zero-terminated if
* the buffer is large enough.
* @param destCapacity The size of the buffer (number of UChars). If it is 0, then
* dest may be NULL and the function will only return the length of the
* result without writing any of the result string (pre-flighting).
* @param pDestLength A pointer to receive the number of units written to the destination. If
* pDestLength!=NULL then *pDestLength is always set to the
* number of output units corresponding to the transformation of
* all the input units, even in case of a buffer overflow.
* @param src The original source string
* @param srcLength The length of the original string. If -1, then src must be zero-terminated.
* @param subchar The substitution character to use in place of an illegal input sequence,
* or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead.
* A substitution character can be any valid Unicode code point (up to U+10FFFF)
* except for surrogate code points (U+D800..U+DFFF).
* The recommended value is U+FFFD "REPLACEMENT CHARACTER".
* @param pNumSubstitutions Output parameter receiving the number of substitutions if subchar>=0.
* Set to 0 if no substitutions occur or subchar<0.
* pNumSubstitutions can be NULL.
* @param pErrorCode Pointer to a standard ICU error code. Its input value must
* pass the U_SUCCESS() test, or else the function returns
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @return The pointer to destination buffer.
* @see u_strFromUTF8
* @see u_strFromUTF8Lenient
* @see u_strToUTF8WithSub
* @draft ICU 3.6
*/
U_DRAFT UChar* U_EXPORT2
u_strFromUTF8WithSub(UChar *dest,
int32_t destCapacity,
int32_t *pDestLength,
const char *src,
int32_t srcLength,
UChar32 subchar, int32_t *pNumSubstitutions,
UErrorCode *pErrorCode);
/**
* Converts a sequence of UTF-8 bytes to UChars (UTF-16).
* Same as u_strFromUTF8() except that this function is designed to be very fast,
* which it achieves by being lenient about malformed UTF-8 sequences.
* This function is intended for use in environments where UTF-8 text is
* expected to be well-formed.
*
* Its semantics are:
* - Well-formed UTF-8 text is correctly converted to well-formed UTF-16 text.
* - The function will not read beyond the input string, nor write beyond
* the destCapacity.
* - Malformed UTF-8 results in "garbage" 16-bit Unicode strings which may not
* be well-formed UTF-16.
* The function will resynchronize to valid code point boundaries
* within a small number of code points after an illegal sequence.
* - Non-shortest forms are not detected and will result in "spoofing" output.
*
* For further performance improvement, if srcLength is given (>=0),
* then it must be destCapacity>=srcLength.
*
* @param dest A buffer for the result string. The result will be zero-terminated if
* the buffer is large enough.
* @param destCapacity The size of the buffer (number of UChars). If it is 0, then
* dest may be NULL and the function will only return the length of the
* result without writing any of the result string (pre-flighting).
* Unlike for other ICU functions, if srcLength>=0 then it
* must be destCapacity>=srcLength.
* @param pDestLength A pointer to receive the number of units written to the destination. If
* pDestLength!=NULL then *pDestLength is always set to the
* number of output units corresponding to the transformation of
* all the input units, even in case of a buffer overflow.
* Unlike for other ICU functions, if srcLength>=0 but
* destCapacity<srcLength, then *pDestLength will be set to srcLength
* (and U_BUFFER_OVERFLOW_ERROR will be set)
* regardless of the actual result length.
* @param src The original source string
* @param srcLength The length of the original string. If -1, then src must be zero-terminated.
* @param pErrorCode Pointer to a standard ICU error code. Its input value must
* pass the U_SUCCESS() test, or else the function returns
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @return The pointer to destination buffer.
* @see u_strFromUTF8
* @see u_strFromUTF8WithSub
* @see u_strToUTF8WithSub
* @draft ICU 3.6
*/
U_CAPI UChar * U_EXPORT2
u_strFromUTF8Lenient(UChar *dest,
int32_t destCapacity,
int32_t *pDestLength,
const char *src,
int32_t srcLength,
UErrorCode *pErrorCode);
/**
* Converts a sequence of UChars (UTF-16) to UTF32 units.
*

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,6 @@
/********************************************************************
* COPYRIGHT:
* Copyright (c) 2001-2005, International Business Machines Corporation and
* Copyright (c) 2001-2006, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
/********************************************************************************
@ -22,6 +22,8 @@
#include "unicode/ures.h"
#include "ustr_imp.h"
#include "cintltst.h"
#include "cmemory.h"
#include "cstring.h"
#include "cwchar.h"
#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
@ -30,6 +32,7 @@ void addUCharTransformTest(TestNode** root);
static void Test_UChar_UTF32_API(void);
static void Test_UChar_UTF8_API(void);
static void Test_FromUTF8Lenient(void);
static void Test_UChar_WCHART_API(void);
static void Test_widestrs(void);
static void Test_WCHART_LongString(void);
@ -39,6 +42,7 @@ addUCharTransformTest(TestNode** root)
{
addTest(root, &Test_UChar_UTF32_API, "custrtrn/Test_UChar_UTF32_API");
addTest(root, &Test_UChar_UTF8_API, "custrtrn/Test_UChar_UTF8_API");
addTest(root, &Test_FromUTF8Lenient, "custrtrn/Test_FromUTF8Lenient");
addTest(root, &Test_UChar_WCHART_API, "custrtrn/Test_UChar_WCHART_API");
addTest(root, &Test_widestrs, "custrtrn/Test_widestrs");
addTest(root, &Test_WCHART_LongString, "custrtrn/Test_WCHART_LongString");
@ -270,16 +274,21 @@ static void Test_UChar_UTF8_API(void){
int32_t u8DestLen =0;
UBool failed = FALSE;
int i= 0;
int32_t numSubstitutions;
{
/* preflight */
u8Temp[0] = 0x12;
u_strToUTF8(u8Target,u8TargetLength, &u8DestLen, uSrc, uSrcLen,&err);
if(err == U_BUFFER_OVERFLOW_ERROR){
if(err == U_BUFFER_OVERFLOW_ERROR && u8Temp[0] == 0x12){
err = U_ZERO_ERROR;
u8Target = (char*) malloc (sizeof(uint8_t) * (u8DestLen+1));
u8TargetLength = u8DestLen;
u8Target[u8TargetLength] = 0xfe;
u8DestLen = -1;
u_strToUTF8(u8Target,u8TargetLength, &u8DestLen, uSrc, uSrcLen,&err);
if(U_FAILURE(err)){
if(U_FAILURE(err) || u8DestLen != u8TargetLength || u8Target[u8TargetLength] != (char)0xfe){
log_err("u_strToUTF8 failed after preflight. Error: %s\n", u_errorName(err));
return;
}
@ -308,16 +317,19 @@ static void Test_UChar_UTF8_API(void){
u8SrcLen = u8DestLen;
/* preflight */
uTemp[0] = 0x1234;
u_strFromUTF8(uTarget,uTargetLength,&uDestLen,u8Src,u8SrcLen,&err);
if(err == U_BUFFER_OVERFLOW_ERROR){
if(err == U_BUFFER_OVERFLOW_ERROR && uTemp[0] == 0x1234){
err = U_ZERO_ERROR;
uTarget = (UChar*) malloc( sizeof(UChar) * (uDestLen+1));
uTargetLength = uDestLen;
uTarget[uTargetLength] = 0xfff0;
uDestLen = -1;
u_strFromUTF8(uTarget,uTargetLength,&uDestLen,u8Src,u8SrcLen,&err);
}
else {
log_err("Should have gotten U_BUFFER_OVERFLOW_ERROR");
log_err("error: u_strFromUTF8(preflight) should have gotten U_BUFFER_OVERFLOW_ERROR\n");
}
/*for(i=0; i< uDestLen; i++){
printf("0x%04X, ",uTarget[i]);
@ -325,7 +337,10 @@ static void Test_UChar_UTF8_API(void){
printf("\n");
}
}*/
if(U_FAILURE(err) || uDestLen != uTargetLength || uTarget[uTargetLength] != 0xfff0) {
failed = TRUE;
}
for(i=0; i< uSrcLen; i++){
if(uTarget[i] != src16[i]){
log_verbose("u_strFromUTF8() failed expected: \\u%04X got: \\u%04X at index: %i \n", src16[i] ,uTarget[i],i);
@ -333,7 +348,7 @@ static void Test_UChar_UTF8_API(void){
}
}
if(failed){
log_err("u_strToUTF8() failed \n");
log_err("error: u_strFromUTF8(after preflighting) failed\n");
}
free(u8Target);
@ -414,10 +429,14 @@ static void Test_UChar_UTF8_API(void){
{
static const UChar
withLead16[]={ 0x1800, 0xd89a, 0x0061 },
withTrail16[]={ 0x1800, 0xdcba, 0x0061, 0 };
withTrail16[]={ 0x1800, 0xdcba, 0x0061, 0 },
withTrail16SubFFFD[]={ 0x1800, 0xfffd, 0x0061, 0 }, /* sub==U+FFFD */
withTrail16Sub50005[]={ 0x1800, 0xd900, 0xdc05, 0x0061, 0 }; /* sub==U+50005 */
static const uint8_t
withLead8[]={ 0xe1, 0xa0, 0x80, 0xed, 0xa2, 0x9a, 0x61 },
withTrail8[]={ 0xe1, 0xa0, 0x80, 0xed, 0xb2, 0xba, 0x61 };
withTrail8[]={ 0xe1, 0xa0, 0x80, 0xed, 0xb2, 0xba, 0x61, 0 },
withTrail8Sub1A[]={ 0xe1, 0xa0, 0x80, 0x1a, 0x61, 0 }, /* sub==U+001A */
withTrail8SubFFFD[]={ 0xe1, 0xa0, 0x80, 0xef, 0xbf, 0xbd, 0x61, 0 }; /* sub==U+FFFD */
UChar out16[10];
char out8[10];
@ -429,8 +448,384 @@ static void Test_UChar_UTF8_API(void){
) {
log_err("error: u_strTo/FromUTF8(string with single surrogate) fails to report error\n");
}
/* test error handling with substitution characters */
/* from UTF-8 with length */
err=U_ZERO_ERROR;
numSubstitutions=-1;
out16[0]=0x55aa;
uDestLen=0;
u_strFromUTF8WithSub(out16, LENGTHOF(out16), &uDestLen,
(const char *)withTrail8, uprv_strlen((const char *)withTrail8),
0x50005, &numSubstitutions,
&err);
if(U_FAILURE(err) || uDestLen!=u_strlen(withTrail16Sub50005) ||
0!=u_memcmp(withTrail16Sub50005, out16, uDestLen+1) ||
numSubstitutions!=1) {
log_err("error: u_strFromUTF8WithSub(length) failed\n");
}
/* from UTF-8 with NUL termination */
err=U_ZERO_ERROR;
numSubstitutions=-1;
out16[0]=0x55aa;
uDestLen=0;
u_strFromUTF8WithSub(out16, LENGTHOF(out16), &uDestLen,
(const char *)withTrail8, -1,
0xfffd, &numSubstitutions,
&err);
if(U_FAILURE(err) || uDestLen!=u_strlen(withTrail16SubFFFD) ||
0!=u_memcmp(withTrail16SubFFFD, out16, uDestLen+1) ||
numSubstitutions!=1) {
log_err("error: u_strFromUTF8WithSub(NUL termination) failed\n");
}
/* preflight from UTF-8 with NUL termination */
err=U_ZERO_ERROR;
numSubstitutions=-1;
out16[0]=0x55aa;
uDestLen=0;
u_strFromUTF8WithSub(out16, 1, &uDestLen,
(const char *)withTrail8, -1,
0x50005, &numSubstitutions,
&err);
if(err!=U_BUFFER_OVERFLOW_ERROR || uDestLen!=u_strlen(withTrail16Sub50005) || numSubstitutions!=1) {
log_err("error: u_strFromUTF8WithSub(preflight/NUL termination) failed\n");
}
/* to UTF-8 with length */
err=U_ZERO_ERROR;
numSubstitutions=-1;
out8[0]=0xf5;
u8DestLen=0;
u_strToUTF8WithSub(out8, LENGTHOF(out8), &u8DestLen,
withTrail16, u_strlen(withTrail16),
0xfffd, &numSubstitutions,
&err);
if(U_FAILURE(err) || u8DestLen!=uprv_strlen((const char *)withTrail8SubFFFD) ||
0!=uprv_memcmp((const char *)withTrail8SubFFFD, out8, u8DestLen+1) ||
numSubstitutions!=1) {
log_err("error: u_strToUTF8WithSub(length) failed\n");
}
/* to UTF-8 with NUL termination */
err=U_ZERO_ERROR;
numSubstitutions=-1;
out8[0]=0xf5;
u8DestLen=0;
u_strToUTF8WithSub(out8, LENGTHOF(out8), &u8DestLen,
withTrail16, -1,
0x1a, &numSubstitutions,
&err);
if(U_FAILURE(err) || u8DestLen!=uprv_strlen((const char *)withTrail8Sub1A) ||
0!=uprv_memcmp((const char *)withTrail8Sub1A, out8, u8DestLen+1) ||
numSubstitutions!=1) {
log_err("error: u_strToUTF8WithSub(NUL termination) failed\n");
}
/* preflight to UTF-8 with NUL termination */
err=U_ZERO_ERROR;
numSubstitutions=-1;
out8[0]=0xf5;
u8DestLen=0;
u_strToUTF8WithSub(out8, 1, &u8DestLen,
withTrail16, -1,
0xfffd, &numSubstitutions,
&err);
if(err!=U_BUFFER_OVERFLOW_ERROR || u8DestLen!=uprv_strlen((const char *)withTrail8SubFFFD) ||
numSubstitutions!=1) {
log_err("error: u_strToUTF8WithSub(preflight/NUL termination) failed\n");
}
/* test that numSubstitutions==0 if there are no substitutions */
/* from UTF-8 with length (just first 3 bytes which are valid) */
err=U_ZERO_ERROR;
numSubstitutions=-1;
out16[0]=0x55aa;
uDestLen=0;
u_strFromUTF8WithSub(out16, LENGTHOF(out16), &uDestLen,
(const char *)withTrail8, 3,
0x50005, &numSubstitutions,
&err);
if(U_FAILURE(err) || uDestLen!=1 ||
0!=u_memcmp(withTrail16Sub50005, out16, uDestLen) ||
numSubstitutions!=0) {
log_err("error: u_strFromUTF8WithSub(no subs) failed\n");
}
/* to UTF-8 with length (just first UChar which is valid) */
err=U_ZERO_ERROR;
numSubstitutions=-1;
out8[0]=0xf5;
u8DestLen=0;
u_strToUTF8WithSub(out8, LENGTHOF(out8), &u8DestLen,
withTrail16, 1,
0xfffd, &numSubstitutions,
&err);
if(U_FAILURE(err) || u8DestLen!=3 ||
0!=uprv_memcmp((const char *)withTrail8SubFFFD, out8, u8DestLen) ||
numSubstitutions!=0) {
log_err("error: u_strToUTF8WithSub(no subs) failed\n");
}
/* test that numSubstitutions==0 if subchar==U_SENTINEL (no subchar) */
/* from UTF-8 with length (just first 3 bytes which are valid) */
err=U_ZERO_ERROR;
numSubstitutions=-1;
out16[0]=0x55aa;
uDestLen=0;
u_strFromUTF8WithSub(out16, LENGTHOF(out16), &uDestLen,
(const char *)withTrail8, 3,
U_SENTINEL, &numSubstitutions,
&err);
if(U_FAILURE(err) || uDestLen!=1 ||
0!=u_memcmp(withTrail16Sub50005, out16, uDestLen) ||
numSubstitutions!=0) {
log_err("error: u_strFromUTF8WithSub(no subchar) failed\n");
}
/* to UTF-8 with length (just first UChar which is valid) */
err=U_ZERO_ERROR;
numSubstitutions=-1;
out8[0]=0xf5;
u8DestLen=0;
u_strToUTF8WithSub(out8, LENGTHOF(out8), &u8DestLen,
withTrail16, 1,
U_SENTINEL, &numSubstitutions,
&err);
if(U_FAILURE(err) || u8DestLen!=3 ||
0!=uprv_memcmp((const char *)withTrail8SubFFFD, out8, u8DestLen) ||
numSubstitutions!=0) {
log_err("error: u_strToUTF8WithSub(no subchar) failed\n");
}
}
}
/* compare if two strings are equal, but match 0xfffd in the second string with anything in the first */
static UBool
equalAnyFFFD(const UChar *s, const UChar *t, int32_t length) {
UChar c1, c2;
while(length>0) {
c1=*s++;
c2=*t++;
if(c1!=c2 && c2!=0xfffd) {
return FALSE;
}
--length;
}
return TRUE;
}
/* test u_strFromUTF8Lenient() */
static void
Test_FromUTF8Lenient(void) {
/*
* Multiple input strings, each NUL-terminated.
* Terminate with a string starting with 0xff.
*/
static const uint8_t bytes[]={
/* well-formed UTF-8 */
0x61, 0xc3, 0x9f, 0xe0, 0xa0, 0x80, 0xf0, 0xa0, 0x80, 0x80,
0x62, 0xc3, 0xa0, 0xe0, 0xa0, 0x81, 0xf0, 0xa0, 0x80, 0x81, 0,
/* various malformed sequences */
0xc3, 0xc3, 0x9f, 0xc3, 0xa0, 0xe0, 0x80, 0x8a, 0xf0, 0x41, 0x42, 0x43, 0,
/* truncated input */
0xc3, 0,
0xe0, 0,
0xe0, 0xa0, 0,
0xf0, 0,
0xf0, 0x90, 0,
0xf0, 0x90, 0x80, 0,
/* empty string */
0,
/* finish */
0xff, 0
};
/* Multiple output strings, each NUL-terminated. 0xfffd matches anything. */
static const UChar uchars[]={
0x61, 0xdf, 0x800, 0xd840, 0xdc00,
0x62, 0xe0, 0x801, 0xd840, 0xdc01, 0,
0xfffd, 0x9f, 0xe0, 0xa, 0xfffd, 0xfffd, 0,
0xfffd, 0,
0xfffd, 0,
0xfffd, 0,
0xfffd, 0,
0xfffd, 0,
0xfffd, 0,
0,
0
};
UChar dest[64];
const char *pb;
const UChar *pu, *pDest;
int32_t srcLength, destLength0, destLength;
int number;
UErrorCode errorCode;
/* verify checking for some illegal arguments */
dest[0]=0x1234;
destLength=-1;
errorCode=U_ZERO_ERROR;
pDest=u_strFromUTF8Lenient(dest, 1, &destLength, NULL, -1, &errorCode);
if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0x1234) {
log_err("u_strFromUTF8Lenient(src=NULL) failed\n");
}
dest[0]=0x1234;
destLength=-1;
errorCode=U_ZERO_ERROR;
pDest=u_strFromUTF8Lenient(NULL, 1, &destLength, (const char *)bytes, -1, &errorCode);
if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
log_err("u_strFromUTF8Lenient(dest=NULL[1]) failed\n");
}
dest[0]=0x1234;
destLength=-1;
errorCode=U_MEMORY_ALLOCATION_ERROR;
pDest=u_strFromUTF8Lenient(dest, 1, &destLength, (const char *)bytes, -1, &errorCode);
if(errorCode!=U_MEMORY_ALLOCATION_ERROR || dest[0]!=0x1234) {
log_err("u_strFromUTF8Lenient(U_MEMORY_ALLOCATION_ERROR) failed\n");
}
dest[0]=0x1234;
destLength=-1;
errorCode=U_MEMORY_ALLOCATION_ERROR;
pDest=u_strFromUTF8Lenient(dest, 1, &destLength, (const char *)bytes, -1, NULL);
if(dest[0]!=0x1234) {
log_err("u_strFromUTF8Lenient(pErrorCode=NULL) failed\n");
}
/* test normal behavior */
number=0; /* string number for log_err() */
for(pb=(const char *)bytes, pu=uchars;
*pb!=(char)0xff;
pb+=srcLength+1, pu+=destLength0+1, ++number
) {
srcLength=uprv_strlen(pb);
destLength0=u_strlen(pu);
/* preflighting with NUL-termination */
dest[0]=0x1234;
destLength=-1;
errorCode=U_ZERO_ERROR;
pDest=u_strFromUTF8Lenient(NULL, 0, &destLength, pb, -1, &errorCode);
if (errorCode!= (destLength0==0 ? U_STRING_NOT_TERMINATED_WARNING : U_BUFFER_OVERFLOW_ERROR) ||
pDest!=NULL || dest[0]!=0x1234 || destLength!=destLength0
) {
log_err("u_strFromUTF8Lenient(%d preflighting with NUL-termination) failed\n", number);
}
/* preflighting/some capacity with NUL-termination */
if(srcLength>0) {
dest[destLength0-1]=0x1234;
destLength=-1;
errorCode=U_ZERO_ERROR;
pDest=u_strFromUTF8Lenient(dest, destLength0-1, &destLength, pb, -1, &errorCode);
if (errorCode!=U_BUFFER_OVERFLOW_ERROR ||
dest[destLength0-1]!=0x1234 || destLength!=destLength0
) {
log_err("u_strFromUTF8Lenient(%d preflighting/some capacity with NUL-termination) failed\n", number);
}
}
/* conversion with NUL-termination, much capacity */
dest[0]=dest[destLength0]=0x1234;
destLength=-1;
errorCode=U_ZERO_ERROR;
pDest=u_strFromUTF8Lenient(dest, LENGTHOF(dest), &destLength, pb, -1, &errorCode);
if (errorCode!=U_ZERO_ERROR ||
pDest!=dest || dest[destLength0]!=0 ||
destLength!=destLength0 || !equalAnyFFFD(dest, pu, destLength)
) {
log_err("u_strFromUTF8Lenient(%d conversion with NUL-termination, much capacity) failed\n", number);
}
/* conversion with NUL-termination, exact capacity */
dest[0]=dest[destLength0]=0x1234;
destLength=-1;
errorCode=U_ZERO_ERROR;
pDest=u_strFromUTF8Lenient(dest, destLength0, &destLength, pb, -1, &errorCode);
if (errorCode!=U_STRING_NOT_TERMINATED_WARNING ||
pDest!=dest || dest[destLength0]!=0x1234 ||
destLength!=destLength0 || !equalAnyFFFD(dest, pu, destLength)
) {
log_err("u_strFromUTF8Lenient(%d conversion with NUL-termination, exact capacity) failed\n", number);
}
/* preflighting with length */
dest[0]=0x1234;
destLength=-1;
errorCode=U_ZERO_ERROR;
pDest=u_strFromUTF8Lenient(NULL, 0, &destLength, pb, srcLength, &errorCode);
if (errorCode!= (destLength0==0 ? U_STRING_NOT_TERMINATED_WARNING : U_BUFFER_OVERFLOW_ERROR) ||
pDest!=NULL || dest[0]!=0x1234 || destLength!=srcLength
) {
log_err("u_strFromUTF8Lenient(%d preflighting with length) failed\n", number);
}
/* preflighting/some capacity with length */
if(srcLength>0) {
dest[srcLength-1]=0x1234;
destLength=-1;
errorCode=U_ZERO_ERROR;
pDest=u_strFromUTF8Lenient(dest, srcLength-1, &destLength, pb, srcLength, &errorCode);
if (errorCode!=U_BUFFER_OVERFLOW_ERROR ||
dest[srcLength-1]!=0x1234 || destLength!=srcLength
) {
log_err("u_strFromUTF8Lenient(%d preflighting/some capacity with length) failed\n", number);
}
}
/* conversion with length, much capacity */
dest[0]=dest[destLength0]=0x1234;
destLength=-1;
errorCode=U_ZERO_ERROR;
pDest=u_strFromUTF8Lenient(dest, LENGTHOF(dest), &destLength, pb, srcLength, &errorCode);
if (errorCode!=U_ZERO_ERROR ||
pDest!=dest || dest[destLength0]!=0 ||
destLength!=destLength0 || !equalAnyFFFD(dest, pu, destLength)
) {
log_err("u_strFromUTF8Lenient(%d conversion with length, much capacity) failed\n", number);
}
/* conversion with length, srcLength capacity */
dest[0]=dest[srcLength]=dest[destLength0]=0x1234;
destLength=-1;
errorCode=U_ZERO_ERROR;
pDest=u_strFromUTF8Lenient(dest, srcLength, &destLength, pb, srcLength, &errorCode);
if(srcLength==destLength0) {
if (errorCode!=U_STRING_NOT_TERMINATED_WARNING ||
pDest!=dest || dest[destLength0]!=0x1234 ||
destLength!=destLength0 || !equalAnyFFFD(dest, pu, destLength)
) {
log_err("u_strFromUTF8Lenient(%d conversion with length, srcLength capacity/not terminated) failed\n", number);
}
} else {
if (errorCode!=U_ZERO_ERROR ||
pDest!=dest || dest[destLength0]!=0 ||
destLength!=destLength0 || !equalAnyFFFD(dest, pu, destLength)
) {
log_err("u_strFromUTF8Lenient(%d conversion with length, srcLength capacity/terminated) failed\n", number);
}
}
}
}
static const uint16_t src16j[] = {
0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,