ICU-2144 add ucnv_convertEx()
X-SVN-Rev: 11589
This commit is contained in:
parent
e76f98cf2d
commit
4f2c865b52
@ -82,7 +82,7 @@ static void UCNV_DEBUG_CNV(UConverter *c, int line)
|
||||
|
||||
|
||||
/* size of intermediate and preflighting buffers in ucnv_convert() */
|
||||
#define CHUNK_SIZE 5*1024
|
||||
#define CHUNK_SIZE 1024
|
||||
|
||||
typedef struct UAmbiguousConverter {
|
||||
const char *name;
|
||||
@ -809,6 +809,11 @@ ucnv_fromUnicode (UConverter * _this,
|
||||
return;
|
||||
}
|
||||
|
||||
if(!flush && *source == sourceLimit) {
|
||||
/* the overflow buffer is emptied and there is no new input: we are done */
|
||||
return;
|
||||
}
|
||||
|
||||
args.converter = _this;
|
||||
args.flush = flush;
|
||||
args.offsets = offsets;
|
||||
@ -903,6 +908,11 @@ ucnv_toUnicode (UConverter * _this,
|
||||
return;
|
||||
}
|
||||
|
||||
if(!flush && *source == sourceLimit) {
|
||||
/* the overflow buffer is emptied and there is no new input: we are done */
|
||||
return;
|
||||
}
|
||||
|
||||
args.converter = _this;
|
||||
args.flush = flush;
|
||||
args.offsets = offsets;
|
||||
@ -1116,6 +1126,120 @@ ucnv_getNextUChar(UConverter * converter,
|
||||
return ch;
|
||||
}
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
ucnv_convertEx(UConverter *targetCnv, UConverter *sourceCnv,
|
||||
char **target, const char *targetLimit,
|
||||
const char **source, const char *sourceLimit,
|
||||
UChar *pivotStart, UChar **pivotSource,
|
||||
UChar **pivotTarget, const UChar *pivotLimit,
|
||||
UBool reset, UBool flush,
|
||||
UErrorCode *pErrorCode) {
|
||||
UChar pivotBuffer[CHUNK_SIZE];
|
||||
UChar *myPivotSource, *myPivotTarget;
|
||||
|
||||
/* error checking */
|
||||
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if( targetCnv==NULL || sourceCnv==NULL ||
|
||||
source==NULL || *source==NULL ||
|
||||
target==NULL || *target==NULL || targetLimit==NULL
|
||||
) {
|
||||
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return;
|
||||
}
|
||||
|
||||
if(pivotStart==NULL) {
|
||||
/* use the stack pivot buffer */
|
||||
pivotStart=myPivotSource=myPivotTarget=pivotBuffer;
|
||||
pivotSource=&myPivotSource;
|
||||
pivotTarget=&myPivotTarget;
|
||||
pivotLimit=pivotBuffer+CHUNK_SIZE;
|
||||
} else if( pivotStart>=pivotLimit ||
|
||||
pivotSource==NULL || *pivotSource==NULL ||
|
||||
pivotTarget==NULL || *pivotTarget==NULL ||
|
||||
pivotLimit==NULL
|
||||
) {
|
||||
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return;
|
||||
}
|
||||
|
||||
if(sourceLimit==NULL) {
|
||||
/* get limit of single-byte-NUL-terminated source string */
|
||||
sourceLimit=uprv_strchr(*source, 0);
|
||||
}
|
||||
|
||||
if(reset) {
|
||||
ucnv_resetToUnicode(sourceCnv);
|
||||
ucnv_resetFromUnicode(targetCnv);
|
||||
*pivotTarget=*pivotSource=pivotStart;
|
||||
}
|
||||
|
||||
/* conversion loop */
|
||||
for(;;) {
|
||||
if(reset) {
|
||||
/*
|
||||
* if we did a reset in this function, we know that there is nothing
|
||||
* to convert to the target yet, so we save a function call
|
||||
*/
|
||||
reset=FALSE;
|
||||
} else {
|
||||
/*
|
||||
* convert to the target first in case the pivot is filled at entry
|
||||
* or the targetCnv has some output bytes in its state
|
||||
*/
|
||||
ucnv_fromUnicode(targetCnv,
|
||||
target, targetLimit,
|
||||
pivotSource, *pivotTarget,
|
||||
NULL,
|
||||
(UBool)(flush && *source==sourceLimit),
|
||||
pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
break;
|
||||
}
|
||||
|
||||
/* ucnv_fromUnicode() must have consumed the pivot contents since it returned with U_SUCCESS() */
|
||||
*pivotSource=*pivotTarget=pivotStart;
|
||||
}
|
||||
|
||||
/* convert from the source to the pivot */
|
||||
ucnv_toUnicode(sourceCnv,
|
||||
pivotTarget, pivotLimit,
|
||||
source, sourceLimit,
|
||||
NULL,
|
||||
flush,
|
||||
pErrorCode);
|
||||
if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
|
||||
/* pivot overflow: continue with the conversion loop */
|
||||
*pErrorCode=U_ZERO_ERROR;
|
||||
} else if(U_FAILURE(*pErrorCode) || *pivotTarget==pivotStart) {
|
||||
/* conversion error, or there was nothing left to convert */
|
||||
break;
|
||||
}
|
||||
/* else ucnv_toUnicode() wrote into the pivot buffer: continue */
|
||||
}
|
||||
|
||||
/*
|
||||
* The conversion loop is exited when one of the following is true:
|
||||
* - the entire source text has been converted successfully to the target buffer
|
||||
* - a target buffer overflow occurred
|
||||
* - a conversion error occurred
|
||||
*/
|
||||
|
||||
/* terminate the target buffer if possible */
|
||||
if(flush && U_SUCCESS(*pErrorCode)) {
|
||||
if(*target!=targetLimit) {
|
||||
**target=0;
|
||||
if(*pErrorCode==U_STRING_NOT_TERMINATED_WARNING) {
|
||||
*pErrorCode=U_ZERO_ERROR;
|
||||
}
|
||||
} else {
|
||||
*pErrorCode=U_STRING_NOT_TERMINATED_WARNING;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ucnv_convert(const char *toConverterName, const char *fromConverterName,
|
||||
char *target, int32_t targetSize,
|
||||
@ -1166,40 +1290,14 @@ ucnv_convert(const char *toConverterName, const char *fromConverterName,
|
||||
|
||||
if(targetSize>0) {
|
||||
/* perform real conversion */
|
||||
|
||||
/*
|
||||
* loops until the input buffer is completely consumed
|
||||
* or an error is encountered;
|
||||
* first we convert from inConverter codepage to Unicode
|
||||
* then from Unicode to outConverter codepage
|
||||
*/
|
||||
targetLimit=target+targetSize;
|
||||
do {
|
||||
pivot=pivotBuffer;
|
||||
ucnv_toUnicode(inConverter,
|
||||
&pivot, pivotBuffer+CHUNK_SIZE,
|
||||
ucnv_convertEx(outConverter, inConverter,
|
||||
&myTarget, targetLimit,
|
||||
&source, sourceLimit,
|
||||
NULL,
|
||||
pivotBuffer, &pivot, &pivot2, pivotBuffer+CHUNK_SIZE,
|
||||
FALSE,
|
||||
TRUE,
|
||||
pErrorCode);
|
||||
|
||||
/* U_BUFFER_OVERFLOW_ERROR only means that the pivot buffer is full */
|
||||
if(U_SUCCESS(*pErrorCode) || *pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
|
||||
*pErrorCode=U_ZERO_ERROR;
|
||||
pivot2=pivotBuffer;
|
||||
ucnv_fromUnicode(outConverter,
|
||||
&myTarget, targetLimit,
|
||||
(const UChar **)&pivot2, pivot,
|
||||
NULL,
|
||||
(UBool)(source==sourceLimit),
|
||||
pErrorCode);
|
||||
/*
|
||||
* If this overflows the real target, then we must stop
|
||||
* converting and preflight with the loop below.
|
||||
*/
|
||||
}
|
||||
} while(U_SUCCESS(*pErrorCode) && source!=sourceLimit);
|
||||
|
||||
targetCapacity=myTarget-target;
|
||||
}
|
||||
|
||||
@ -1214,53 +1312,32 @@ ucnv_convert(const char *toConverterName, const char *fromConverterName,
|
||||
|
||||
targetLimit=targetBuffer+CHUNK_SIZE;
|
||||
do {
|
||||
/* since the pivot buffer may still contain some characters, start with emptying it */
|
||||
*pErrorCode=U_ZERO_ERROR;
|
||||
while(pivot2!=pivot && U_SUCCESS(*pErrorCode)) {
|
||||
myTarget=targetBuffer;
|
||||
ucnv_fromUnicode(outConverter,
|
||||
ucnv_convertEx(outConverter, inConverter,
|
||||
&myTarget, targetLimit,
|
||||
(const UChar **)&pivot2, pivot,
|
||||
NULL,
|
||||
(UBool)(source==sourceLimit),
|
||||
&source, sourceLimit,
|
||||
pivotBuffer, &pivot, &pivot2, pivotBuffer+CHUNK_SIZE,
|
||||
FALSE,
|
||||
TRUE,
|
||||
pErrorCode);
|
||||
targetCapacity+=(myTarget-targetBuffer);
|
||||
if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
|
||||
*pErrorCode=U_ZERO_ERROR;
|
||||
}
|
||||
}
|
||||
} while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR);
|
||||
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
/* an error occurred: done */
|
||||
break;
|
||||
}
|
||||
|
||||
if(source==sourceLimit) {
|
||||
if(U_SUCCESS(*pErrorCode)) {
|
||||
/*
|
||||
* source is consumed:
|
||||
* done, and set the buffer overflow error as
|
||||
* done with preflighting, set the buffer overflow error as
|
||||
* the result for the entire function
|
||||
*/
|
||||
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
|
||||
break;
|
||||
}
|
||||
|
||||
/* now convert from the source into the pivot buffer again */
|
||||
pivot=pivot2=pivotBuffer;
|
||||
ucnv_toUnicode(inConverter,
|
||||
&pivot, pivotBuffer+CHUNK_SIZE,
|
||||
&source, sourceLimit,
|
||||
NULL,
|
||||
TRUE,
|
||||
pErrorCode);
|
||||
}
|
||||
while(U_SUCCESS(*pErrorCode) || *pErrorCode==U_BUFFER_OVERFLOW_ERROR);
|
||||
}
|
||||
|
||||
ucnv_close (inConverter);
|
||||
ucnv_close (outConverter);
|
||||
|
||||
return u_terminateChars(target, targetSize, targetCapacity, pErrorCode);
|
||||
/* no need to call u_terminateChars() because ucnv_convertEx() took care of that */
|
||||
return targetCapacity;
|
||||
}
|
||||
|
||||
U_CAPI UConverterType U_EXPORT2
|
||||
|
@ -1060,6 +1060,141 @@ ucnv_getNextUChar(UConverter * converter,
|
||||
const char * sourceLimit,
|
||||
UErrorCode * err);
|
||||
|
||||
/**
|
||||
* Convert from one external charset to another using two existing UConverters.
|
||||
* Internally, two conversions - ucnv_toUnicode() and ucnv_fromUnicode() -
|
||||
* are used, "pivoting" through 16-bit Unicode.
|
||||
*
|
||||
* There is a similar function, ucnv_convert(),
|
||||
* which has the following limitations:
|
||||
* - it takes charset names, not converter objects, so that
|
||||
* - two converters are opened for each call
|
||||
* - only single-string conversion is possible, not streaming operation
|
||||
* - it does not provide enough information to find out,
|
||||
* in case of failure, whether the toUnicode or
|
||||
* the fromUnicode conversion failed
|
||||
*
|
||||
* By contrast, ucnv_convertEx()
|
||||
* - takes UConverter parameters instead of charset names
|
||||
* - fully exposes the pivot buffer for complete error handling
|
||||
*
|
||||
* ucnv_convertEx() also provides further convenience:
|
||||
* - an option to reset the converters at the beginning
|
||||
* (if reset==TRUE, see parameters;
|
||||
* also sets *pivotTarget=*pivotSource=pivotStart)
|
||||
* - allow NUL-terminated input
|
||||
* (only a single NUL byte, will not work for charsets with multi-byte NULs)
|
||||
* (if sourceLimit==NULL, see parameters)
|
||||
* - terminate with a NUL on output
|
||||
* (only a single NUL byte, not useful for charsets with multi-byte NULs),
|
||||
* or set U_STRING_NOT_TERMINATED_WARNING if the output exactly fills
|
||||
* the target buffer
|
||||
* - the pivot buffer can be provided internally;
|
||||
* in this case, the caller will not be able to get details about where an
|
||||
* error occurred
|
||||
* (if pivotStart==NULL, see below)
|
||||
*
|
||||
* The function returns when one of the following is true:
|
||||
* - the entire source text has been converted successfully to the target buffer
|
||||
* - a target buffer overflow occurred (U_BUFFER_OVERFLOW_ERROR)
|
||||
* - a conversion error occurred
|
||||
* (other U_FAILURE(), see description of pErrorCode)
|
||||
*
|
||||
* Limitation compared to the direct use of
|
||||
* ucnv_fromUnicode() and ucnv_toUnicode():
|
||||
* ucnv_convertEx() does not provide offset information.
|
||||
*
|
||||
* Limitation compared to ucnv_fromUChars() and ucnv_toUChars():
|
||||
* ucnv_convertEx() does not support preflighting directly.
|
||||
*
|
||||
* Sample code for converting a single string from
|
||||
* one external charset to UTF-8, ignoring the location of errors:
|
||||
*
|
||||
* \code
|
||||
* int32_t
|
||||
* myToUTF8(UConverter *cnv,
|
||||
* const char *s, int32_t length,
|
||||
* char *u8, int32_t capacity,
|
||||
* UErrorCode *pErrorCode) {
|
||||
* UConverter *utf8Cnv;
|
||||
* char *target;
|
||||
*
|
||||
* if(U_FAILURE(*pErrorCode)) {
|
||||
* return 0;
|
||||
* }
|
||||
*
|
||||
* utf8Cnv=myGetCachedUTF8Converter(pErrorCode);
|
||||
* if(U_FAILURE(*pErrorCode)) {
|
||||
* return 0;
|
||||
* }
|
||||
*
|
||||
* target=u8;
|
||||
* ucnv_convertEx(cnv, utf8Cnv,
|
||||
* &target, u8+capacity,
|
||||
* &s, length>=0 ? s+length : NULL,
|
||||
* NULL, NULL, NULL, NULL,
|
||||
* TRUE, TRUE,
|
||||
* pErrorCode);
|
||||
*
|
||||
* myReleaseCachedUTF8Converter(utf8Cnv);
|
||||
*
|
||||
* // return the output string length, but without preflighting
|
||||
* return (int32_t)(target-u8);
|
||||
* }
|
||||
* \endcode
|
||||
*
|
||||
* @param targetCnv Output converter, used to convert from the UTF-16 pivot
|
||||
* to the target using ucnv_fromUnicode().
|
||||
* @param sourceCnv Input converter, used to convert from the source to
|
||||
* the UTF-16 pivot using ucnv_toUnicode().
|
||||
* @param target I/O parameter, same as for ucnv_fromUChars().
|
||||
* Input: *target points to the beginning of the target buffer.
|
||||
* Output: *target points to the first unit after the last char written.
|
||||
* @param targetLimit Pointer to the first unit after the target buffer.
|
||||
* @param source I/O parameter, same as for ucnv_toUChars().
|
||||
* Input: *source points to the beginning of the source buffer.
|
||||
* Output: *source points to the first unit after the last char read.
|
||||
* @param sourceLimit Pointer to the first unit after the source buffer.
|
||||
* @param pivotStart Pointer to the UTF-16 pivot buffer. If pivotStart==NULL,
|
||||
* then an internal buffer is used and the other pivot
|
||||
* arguments are ignored and can be NULL as well.
|
||||
* @param pivotSource I/O parameter, same as source in ucnv_fromUChars() for
|
||||
* conversion from the pivot buffer to the target buffer.
|
||||
* @param pivotTarget I/O parameter, same as target in ucnv_toUChars() for
|
||||
* conversion from the source buffer to the pivot buffer.
|
||||
* It must be pivotStart<=*pivotSource<=*pivotTarget<=pivotLimit
|
||||
* and pivotStart<pivotLimit (unless pivotStart==NULL).
|
||||
* @param pivotLimit Pointer to the first unit after the pivot buffer.
|
||||
* @param reset If TRUE, then ucnv_resetToUnicode(sourceCnv) and
|
||||
* ucnv_resetFromUnicode(targetCnv) are called, and the
|
||||
* pivot pointers are reset (*pivotTarget=*pivotSource=pivotStart).
|
||||
* @param flush If true, indicates the end of the input.
|
||||
* Passed directly to ucnv_toUnicode(), and carried over to
|
||||
* ucnv_fromUnicode() when the source is empty as well.
|
||||
* @param pErrorCode ICU error code in/out parameter.
|
||||
* Must fulfill U_SUCCESS before the function call.
|
||||
* U_BUFFER_OVERFLOW_ERROR always refers to the target buffer
|
||||
* because overflows into the pivot buffer are handled internally.
|
||||
* Other conversion errors are from the source-to-pivot
|
||||
* conversion if *pivotSource==pivotStart, otherwise from
|
||||
* the pivot-to-target conversion.
|
||||
*
|
||||
* @see ucnv_convert
|
||||
* @see ucnv_fromUnicode
|
||||
* @see ucnv_toUnicode
|
||||
* @see ucnv_fromUChars
|
||||
* @see ucnv_toUChars
|
||||
* @draft ICU 2.6
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
ucnv_convertEx(UConverter *targetCnv, UConverter *sourceCnv,
|
||||
char **target, const char *targetLimit,
|
||||
const char **source, const char *sourceLimit,
|
||||
UChar *pivotStart, UChar **pivotSource,
|
||||
UChar **pivotTarget, const UChar *pivotLimit,
|
||||
UBool reset, UBool flush,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Will convert a sequence of bytes from one codepage to another.
|
||||
* This is <STRONG>NOT AN EFFICIENT</STRONG> way to transcode.
|
||||
@ -1075,6 +1210,8 @@ ucnv_getNextUChar(UConverter * converter,
|
||||
* @param err error status.
|
||||
* <code>U_BUFFER_OVERFLOW_ERROR</code> will be set if the target is full and there is still input left in the source.
|
||||
* @return will be filled in with the number of bytes needed in target
|
||||
*
|
||||
* @see ucnv_convertEx
|
||||
* @see ucnv_fromUnicode
|
||||
* @see ucnv_toUnicode
|
||||
* @see ucnv_fromUChars
|
||||
|
Loading…
Reference in New Issue
Block a user