ICU-2144 add ucnv_convertEx()
X-SVN-Rev: 11589
This commit is contained in:
parent
e76f98cf2d
commit
4f2c865b52
@ -82,7 +82,7 @@ static void UCNV_DEBUG_CNV(UConverter *c, int line)
|
|||||||
|
|
||||||
|
|
||||||
/* size of intermediate and preflighting buffers in ucnv_convert() */
|
/* size of intermediate and preflighting buffers in ucnv_convert() */
|
||||||
#define CHUNK_SIZE 5*1024
|
#define CHUNK_SIZE 1024
|
||||||
|
|
||||||
typedef struct UAmbiguousConverter {
|
typedef struct UAmbiguousConverter {
|
||||||
const char *name;
|
const char *name;
|
||||||
@ -809,6 +809,11 @@ ucnv_fromUnicode (UConverter * _this,
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if(!flush && *source == sourceLimit) {
|
||||||
|
/* the overflow buffer is emptied and there is no new input: we are done */
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
args.converter = _this;
|
args.converter = _this;
|
||||||
args.flush = flush;
|
args.flush = flush;
|
||||||
args.offsets = offsets;
|
args.offsets = offsets;
|
||||||
@ -903,6 +908,11 @@ ucnv_toUnicode (UConverter * _this,
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if(!flush && *source == sourceLimit) {
|
||||||
|
/* the overflow buffer is emptied and there is no new input: we are done */
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
args.converter = _this;
|
args.converter = _this;
|
||||||
args.flush = flush;
|
args.flush = flush;
|
||||||
args.offsets = offsets;
|
args.offsets = offsets;
|
||||||
@ -1116,6 +1126,120 @@ ucnv_getNextUChar(UConverter * converter,
|
|||||||
return ch;
|
return ch;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
U_CAPI void U_EXPORT2
|
||||||
|
ucnv_convertEx(UConverter *targetCnv, UConverter *sourceCnv,
|
||||||
|
char **target, const char *targetLimit,
|
||||||
|
const char **source, const char *sourceLimit,
|
||||||
|
UChar *pivotStart, UChar **pivotSource,
|
||||||
|
UChar **pivotTarget, const UChar *pivotLimit,
|
||||||
|
UBool reset, UBool flush,
|
||||||
|
UErrorCode *pErrorCode) {
|
||||||
|
UChar pivotBuffer[CHUNK_SIZE];
|
||||||
|
UChar *myPivotSource, *myPivotTarget;
|
||||||
|
|
||||||
|
/* error checking */
|
||||||
|
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if( targetCnv==NULL || sourceCnv==NULL ||
|
||||||
|
source==NULL || *source==NULL ||
|
||||||
|
target==NULL || *target==NULL || targetLimit==NULL
|
||||||
|
) {
|
||||||
|
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(pivotStart==NULL) {
|
||||||
|
/* use the stack pivot buffer */
|
||||||
|
pivotStart=myPivotSource=myPivotTarget=pivotBuffer;
|
||||||
|
pivotSource=&myPivotSource;
|
||||||
|
pivotTarget=&myPivotTarget;
|
||||||
|
pivotLimit=pivotBuffer+CHUNK_SIZE;
|
||||||
|
} else if( pivotStart>=pivotLimit ||
|
||||||
|
pivotSource==NULL || *pivotSource==NULL ||
|
||||||
|
pivotTarget==NULL || *pivotTarget==NULL ||
|
||||||
|
pivotLimit==NULL
|
||||||
|
) {
|
||||||
|
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(sourceLimit==NULL) {
|
||||||
|
/* get limit of single-byte-NUL-terminated source string */
|
||||||
|
sourceLimit=uprv_strchr(*source, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
if(reset) {
|
||||||
|
ucnv_resetToUnicode(sourceCnv);
|
||||||
|
ucnv_resetFromUnicode(targetCnv);
|
||||||
|
*pivotTarget=*pivotSource=pivotStart;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* conversion loop */
|
||||||
|
for(;;) {
|
||||||
|
if(reset) {
|
||||||
|
/*
|
||||||
|
* if we did a reset in this function, we know that there is nothing
|
||||||
|
* to convert to the target yet, so we save a function call
|
||||||
|
*/
|
||||||
|
reset=FALSE;
|
||||||
|
} else {
|
||||||
|
/*
|
||||||
|
* convert to the target first in case the pivot is filled at entry
|
||||||
|
* or the targetCnv has some output bytes in its state
|
||||||
|
*/
|
||||||
|
ucnv_fromUnicode(targetCnv,
|
||||||
|
target, targetLimit,
|
||||||
|
pivotSource, *pivotTarget,
|
||||||
|
NULL,
|
||||||
|
(UBool)(flush && *source==sourceLimit),
|
||||||
|
pErrorCode);
|
||||||
|
if(U_FAILURE(*pErrorCode)) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ucnv_fromUnicode() must have consumed the pivot contents since it returned with U_SUCCESS() */
|
||||||
|
*pivotSource=*pivotTarget=pivotStart;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* convert from the source to the pivot */
|
||||||
|
ucnv_toUnicode(sourceCnv,
|
||||||
|
pivotTarget, pivotLimit,
|
||||||
|
source, sourceLimit,
|
||||||
|
NULL,
|
||||||
|
flush,
|
||||||
|
pErrorCode);
|
||||||
|
if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
|
||||||
|
/* pivot overflow: continue with the conversion loop */
|
||||||
|
*pErrorCode=U_ZERO_ERROR;
|
||||||
|
} else if(U_FAILURE(*pErrorCode) || *pivotTarget==pivotStart) {
|
||||||
|
/* conversion error, or there was nothing left to convert */
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
/* else ucnv_toUnicode() wrote into the pivot buffer: continue */
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The conversion loop is exited when one of the following is true:
|
||||||
|
* - the entire source text has been converted successfully to the target buffer
|
||||||
|
* - a target buffer overflow occurred
|
||||||
|
* - a conversion error occurred
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* terminate the target buffer if possible */
|
||||||
|
if(flush && U_SUCCESS(*pErrorCode)) {
|
||||||
|
if(*target!=targetLimit) {
|
||||||
|
**target=0;
|
||||||
|
if(*pErrorCode==U_STRING_NOT_TERMINATED_WARNING) {
|
||||||
|
*pErrorCode=U_ZERO_ERROR;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
*pErrorCode=U_STRING_NOT_TERMINATED_WARNING;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
U_CAPI int32_t U_EXPORT2
|
U_CAPI int32_t U_EXPORT2
|
||||||
ucnv_convert(const char *toConverterName, const char *fromConverterName,
|
ucnv_convert(const char *toConverterName, const char *fromConverterName,
|
||||||
char *target, int32_t targetSize,
|
char *target, int32_t targetSize,
|
||||||
@ -1166,40 +1290,14 @@ ucnv_convert(const char *toConverterName, const char *fromConverterName,
|
|||||||
|
|
||||||
if(targetSize>0) {
|
if(targetSize>0) {
|
||||||
/* perform real conversion */
|
/* perform real conversion */
|
||||||
|
|
||||||
/*
|
|
||||||
* loops until the input buffer is completely consumed
|
|
||||||
* or an error is encountered;
|
|
||||||
* first we convert from inConverter codepage to Unicode
|
|
||||||
* then from Unicode to outConverter codepage
|
|
||||||
*/
|
|
||||||
targetLimit=target+targetSize;
|
targetLimit=target+targetSize;
|
||||||
do {
|
ucnv_convertEx(outConverter, inConverter,
|
||||||
pivot=pivotBuffer;
|
&myTarget, targetLimit,
|
||||||
ucnv_toUnicode(inConverter,
|
|
||||||
&pivot, pivotBuffer+CHUNK_SIZE,
|
|
||||||
&source, sourceLimit,
|
&source, sourceLimit,
|
||||||
NULL,
|
pivotBuffer, &pivot, &pivot2, pivotBuffer+CHUNK_SIZE,
|
||||||
|
FALSE,
|
||||||
TRUE,
|
TRUE,
|
||||||
pErrorCode);
|
pErrorCode);
|
||||||
|
|
||||||
/* U_BUFFER_OVERFLOW_ERROR only means that the pivot buffer is full */
|
|
||||||
if(U_SUCCESS(*pErrorCode) || *pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
|
|
||||||
*pErrorCode=U_ZERO_ERROR;
|
|
||||||
pivot2=pivotBuffer;
|
|
||||||
ucnv_fromUnicode(outConverter,
|
|
||||||
&myTarget, targetLimit,
|
|
||||||
(const UChar **)&pivot2, pivot,
|
|
||||||
NULL,
|
|
||||||
(UBool)(source==sourceLimit),
|
|
||||||
pErrorCode);
|
|
||||||
/*
|
|
||||||
* If this overflows the real target, then we must stop
|
|
||||||
* converting and preflight with the loop below.
|
|
||||||
*/
|
|
||||||
}
|
|
||||||
} while(U_SUCCESS(*pErrorCode) && source!=sourceLimit);
|
|
||||||
|
|
||||||
targetCapacity=myTarget-target;
|
targetCapacity=myTarget-target;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1214,53 +1312,32 @@ ucnv_convert(const char *toConverterName, const char *fromConverterName,
|
|||||||
|
|
||||||
targetLimit=targetBuffer+CHUNK_SIZE;
|
targetLimit=targetBuffer+CHUNK_SIZE;
|
||||||
do {
|
do {
|
||||||
/* since the pivot buffer may still contain some characters, start with emptying it */
|
|
||||||
*pErrorCode=U_ZERO_ERROR;
|
*pErrorCode=U_ZERO_ERROR;
|
||||||
while(pivot2!=pivot && U_SUCCESS(*pErrorCode)) {
|
|
||||||
myTarget=targetBuffer;
|
myTarget=targetBuffer;
|
||||||
ucnv_fromUnicode(outConverter,
|
ucnv_convertEx(outConverter, inConverter,
|
||||||
&myTarget, targetLimit,
|
&myTarget, targetLimit,
|
||||||
(const UChar **)&pivot2, pivot,
|
&source, sourceLimit,
|
||||||
NULL,
|
pivotBuffer, &pivot, &pivot2, pivotBuffer+CHUNK_SIZE,
|
||||||
(UBool)(source==sourceLimit),
|
FALSE,
|
||||||
|
TRUE,
|
||||||
pErrorCode);
|
pErrorCode);
|
||||||
targetCapacity+=(myTarget-targetBuffer);
|
targetCapacity+=(myTarget-targetBuffer);
|
||||||
if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
|
} while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR);
|
||||||
*pErrorCode=U_ZERO_ERROR;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if(U_FAILURE(*pErrorCode)) {
|
if(U_SUCCESS(*pErrorCode)) {
|
||||||
/* an error occurred: done */
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if(source==sourceLimit) {
|
|
||||||
/*
|
/*
|
||||||
* source is consumed:
|
* done with preflighting, set the buffer overflow error as
|
||||||
* done, and set the buffer overflow error as
|
|
||||||
* the result for the entire function
|
* the result for the entire function
|
||||||
*/
|
*/
|
||||||
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
|
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* now convert from the source into the pivot buffer again */
|
|
||||||
pivot=pivot2=pivotBuffer;
|
|
||||||
ucnv_toUnicode(inConverter,
|
|
||||||
&pivot, pivotBuffer+CHUNK_SIZE,
|
|
||||||
&source, sourceLimit,
|
|
||||||
NULL,
|
|
||||||
TRUE,
|
|
||||||
pErrorCode);
|
|
||||||
}
|
|
||||||
while(U_SUCCESS(*pErrorCode) || *pErrorCode==U_BUFFER_OVERFLOW_ERROR);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ucnv_close (inConverter);
|
ucnv_close (inConverter);
|
||||||
ucnv_close (outConverter);
|
ucnv_close (outConverter);
|
||||||
|
|
||||||
return u_terminateChars(target, targetSize, targetCapacity, pErrorCode);
|
/* no need to call u_terminateChars() because ucnv_convertEx() took care of that */
|
||||||
|
return targetCapacity;
|
||||||
}
|
}
|
||||||
|
|
||||||
U_CAPI UConverterType U_EXPORT2
|
U_CAPI UConverterType U_EXPORT2
|
||||||
|
@ -1060,6 +1060,141 @@ ucnv_getNextUChar(UConverter * converter,
|
|||||||
const char * sourceLimit,
|
const char * sourceLimit,
|
||||||
UErrorCode * err);
|
UErrorCode * err);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert from one external charset to another using two existing UConverters.
|
||||||
|
* Internally, two conversions - ucnv_toUnicode() and ucnv_fromUnicode() -
|
||||||
|
* are used, "pivoting" through 16-bit Unicode.
|
||||||
|
*
|
||||||
|
* There is a similar function, ucnv_convert(),
|
||||||
|
* which has the following limitations:
|
||||||
|
* - it takes charset names, not converter objects, so that
|
||||||
|
* - two converters are opened for each call
|
||||||
|
* - only single-string conversion is possible, not streaming operation
|
||||||
|
* - it does not provide enough information to find out,
|
||||||
|
* in case of failure, whether the toUnicode or
|
||||||
|
* the fromUnicode conversion failed
|
||||||
|
*
|
||||||
|
* By contrast, ucnv_convertEx()
|
||||||
|
* - takes UConverter parameters instead of charset names
|
||||||
|
* - fully exposes the pivot buffer for complete error handling
|
||||||
|
*
|
||||||
|
* ucnv_convertEx() also provides further convenience:
|
||||||
|
* - an option to reset the converters at the beginning
|
||||||
|
* (if reset==TRUE, see parameters;
|
||||||
|
* also sets *pivotTarget=*pivotSource=pivotStart)
|
||||||
|
* - allow NUL-terminated input
|
||||||
|
* (only a single NUL byte, will not work for charsets with multi-byte NULs)
|
||||||
|
* (if sourceLimit==NULL, see parameters)
|
||||||
|
* - terminate with a NUL on output
|
||||||
|
* (only a single NUL byte, not useful for charsets with multi-byte NULs),
|
||||||
|
* or set U_STRING_NOT_TERMINATED_WARNING if the output exactly fills
|
||||||
|
* the target buffer
|
||||||
|
* - the pivot buffer can be provided internally;
|
||||||
|
* in this case, the caller will not be able to get details about where an
|
||||||
|
* error occurred
|
||||||
|
* (if pivotStart==NULL, see below)
|
||||||
|
*
|
||||||
|
* The function returns when one of the following is true:
|
||||||
|
* - the entire source text has been converted successfully to the target buffer
|
||||||
|
* - a target buffer overflow occurred (U_BUFFER_OVERFLOW_ERROR)
|
||||||
|
* - a conversion error occurred
|
||||||
|
* (other U_FAILURE(), see description of pErrorCode)
|
||||||
|
*
|
||||||
|
* Limitation compared to the direct use of
|
||||||
|
* ucnv_fromUnicode() and ucnv_toUnicode():
|
||||||
|
* ucnv_convertEx() does not provide offset information.
|
||||||
|
*
|
||||||
|
* Limitation compared to ucnv_fromUChars() and ucnv_toUChars():
|
||||||
|
* ucnv_convertEx() does not support preflighting directly.
|
||||||
|
*
|
||||||
|
* Sample code for converting a single string from
|
||||||
|
* one external charset to UTF-8, ignoring the location of errors:
|
||||||
|
*
|
||||||
|
* \code
|
||||||
|
* int32_t
|
||||||
|
* myToUTF8(UConverter *cnv,
|
||||||
|
* const char *s, int32_t length,
|
||||||
|
* char *u8, int32_t capacity,
|
||||||
|
* UErrorCode *pErrorCode) {
|
||||||
|
* UConverter *utf8Cnv;
|
||||||
|
* char *target;
|
||||||
|
*
|
||||||
|
* if(U_FAILURE(*pErrorCode)) {
|
||||||
|
* return 0;
|
||||||
|
* }
|
||||||
|
*
|
||||||
|
* utf8Cnv=myGetCachedUTF8Converter(pErrorCode);
|
||||||
|
* if(U_FAILURE(*pErrorCode)) {
|
||||||
|
* return 0;
|
||||||
|
* }
|
||||||
|
*
|
||||||
|
* target=u8;
|
||||||
|
* ucnv_convertEx(cnv, utf8Cnv,
|
||||||
|
* &target, u8+capacity,
|
||||||
|
* &s, length>=0 ? s+length : NULL,
|
||||||
|
* NULL, NULL, NULL, NULL,
|
||||||
|
* TRUE, TRUE,
|
||||||
|
* pErrorCode);
|
||||||
|
*
|
||||||
|
* myReleaseCachedUTF8Converter(utf8Cnv);
|
||||||
|
*
|
||||||
|
* // return the output string length, but without preflighting
|
||||||
|
* return (int32_t)(target-u8);
|
||||||
|
* }
|
||||||
|
* \endcode
|
||||||
|
*
|
||||||
|
* @param targetCnv Output converter, used to convert from the UTF-16 pivot
|
||||||
|
* to the target using ucnv_fromUnicode().
|
||||||
|
* @param sourceCnv Input converter, used to convert from the source to
|
||||||
|
* the UTF-16 pivot using ucnv_toUnicode().
|
||||||
|
* @param target I/O parameter, same as for ucnv_fromUChars().
|
||||||
|
* Input: *target points to the beginning of the target buffer.
|
||||||
|
* Output: *target points to the first unit after the last char written.
|
||||||
|
* @param targetLimit Pointer to the first unit after the target buffer.
|
||||||
|
* @param source I/O parameter, same as for ucnv_toUChars().
|
||||||
|
* Input: *source points to the beginning of the source buffer.
|
||||||
|
* Output: *source points to the first unit after the last char read.
|
||||||
|
* @param sourceLimit Pointer to the first unit after the source buffer.
|
||||||
|
* @param pivotStart Pointer to the UTF-16 pivot buffer. If pivotStart==NULL,
|
||||||
|
* then an internal buffer is used and the other pivot
|
||||||
|
* arguments are ignored and can be NULL as well.
|
||||||
|
* @param pivotSource I/O parameter, same as source in ucnv_fromUChars() for
|
||||||
|
* conversion from the pivot buffer to the target buffer.
|
||||||
|
* @param pivotTarget I/O parameter, same as target in ucnv_toUChars() for
|
||||||
|
* conversion from the source buffer to the pivot buffer.
|
||||||
|
* It must be pivotStart<=*pivotSource<=*pivotTarget<=pivotLimit
|
||||||
|
* and pivotStart<pivotLimit (unless pivotStart==NULL).
|
||||||
|
* @param pivotLimit Pointer to the first unit after the pivot buffer.
|
||||||
|
* @param reset If TRUE, then ucnv_resetToUnicode(sourceCnv) and
|
||||||
|
* ucnv_resetFromUnicode(targetCnv) are called, and the
|
||||||
|
* pivot pointers are reset (*pivotTarget=*pivotSource=pivotStart).
|
||||||
|
* @param flush If true, indicates the end of the input.
|
||||||
|
* Passed directly to ucnv_toUnicode(), and carried over to
|
||||||
|
* ucnv_fromUnicode() when the source is empty as well.
|
||||||
|
* @param pErrorCode ICU error code in/out parameter.
|
||||||
|
* Must fulfill U_SUCCESS before the function call.
|
||||||
|
* U_BUFFER_OVERFLOW_ERROR always refers to the target buffer
|
||||||
|
* because overflows into the pivot buffer are handled internally.
|
||||||
|
* Other conversion errors are from the source-to-pivot
|
||||||
|
* conversion if *pivotSource==pivotStart, otherwise from
|
||||||
|
* the pivot-to-target conversion.
|
||||||
|
*
|
||||||
|
* @see ucnv_convert
|
||||||
|
* @see ucnv_fromUnicode
|
||||||
|
* @see ucnv_toUnicode
|
||||||
|
* @see ucnv_fromUChars
|
||||||
|
* @see ucnv_toUChars
|
||||||
|
* @draft ICU 2.6
|
||||||
|
*/
|
||||||
|
U_CAPI void U_EXPORT2
|
||||||
|
ucnv_convertEx(UConverter *targetCnv, UConverter *sourceCnv,
|
||||||
|
char **target, const char *targetLimit,
|
||||||
|
const char **source, const char *sourceLimit,
|
||||||
|
UChar *pivotStart, UChar **pivotSource,
|
||||||
|
UChar **pivotTarget, const UChar *pivotLimit,
|
||||||
|
UBool reset, UBool flush,
|
||||||
|
UErrorCode *pErrorCode);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Will convert a sequence of bytes from one codepage to another.
|
* Will convert a sequence of bytes from one codepage to another.
|
||||||
* This is <STRONG>NOT AN EFFICIENT</STRONG> way to transcode.
|
* This is <STRONG>NOT AN EFFICIENT</STRONG> way to transcode.
|
||||||
@ -1075,6 +1210,8 @@ ucnv_getNextUChar(UConverter * converter,
|
|||||||
* @param err error status.
|
* @param err error status.
|
||||||
* <code>U_BUFFER_OVERFLOW_ERROR</code> will be set if the target is full and there is still input left in the source.
|
* <code>U_BUFFER_OVERFLOW_ERROR</code> will be set if the target is full and there is still input left in the source.
|
||||||
* @return will be filled in with the number of bytes needed in target
|
* @return will be filled in with the number of bytes needed in target
|
||||||
|
*
|
||||||
|
* @see ucnv_convertEx
|
||||||
* @see ucnv_fromUnicode
|
* @see ucnv_fromUnicode
|
||||||
* @see ucnv_toUnicode
|
* @see ucnv_toUnicode
|
||||||
* @see ucnv_fromUChars
|
* @see ucnv_fromUChars
|
||||||
|
Loading…
Reference in New Issue
Block a user