ICU-2144 add ucnv_convertEx()

X-SVN-Rev: 11589
2003-04-18 23:11:39 +00:00 · 2003-04-18 23:11:39 +00:00 · 4f2c865b52
commit 4f2c865b52
parent e76f98cf2d
2 changed files with 286 additions and 72 deletions
--- a/icu4c/source/common/ucnv.c
+++ b/icu4c/source/common/ucnv.c
@ -82,7 +82,7 @@ static void UCNV_DEBUG_CNV(UConverter *c, int line)
 /* size of intermediate and preflighting buffers in ucnv_convert() */
-#define CHUNK_SIZE 5*1024
+#define CHUNK_SIZE 1024
 typedef struct UAmbiguousConverter {
    const char *name;
@ -808,7 +808,12 @@ ucnv_fromUnicode (UConverter * _this,
        if (U_FAILURE (*err))
            return;
    }
-    
+
    if(!flush && *source == sourceLimit) {
        /* the overflow buffer is emptied and there is no new input: we are done */
        return;
    }
    args.converter = _this;
    args.flush = flush;
    args.offsets = offsets;
@ -903,6 +908,11 @@ ucnv_toUnicode (UConverter * _this,
            return;
    }
    if(!flush && *source == sourceLimit) {
        /* the overflow buffer is emptied and there is no new input: we are done */
        return;
    }
    args.converter = _this;
    args.flush = flush;
    args.offsets = offsets;
@ -1116,6 +1126,120 @@ ucnv_getNextUChar(UConverter * converter,
    return ch;
 }
 U_CAPI void U_EXPORT2
 ucnv_convertEx(UConverter *targetCnv, UConverter *sourceCnv,
               char **target, const char *targetLimit,
               const char **source, const char *sourceLimit,
               UChar *pivotStart, UChar **pivotSource,
               UChar **pivotTarget, const UChar *pivotLimit,
               UBool reset, UBool flush,
               UErrorCode *pErrorCode) {
    UChar pivotBuffer[CHUNK_SIZE];
    UChar *myPivotSource, *myPivotTarget;
    /* error checking */
    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
        return;
    }
    if( targetCnv==NULL || sourceCnv==NULL ||
        source==NULL || *source==NULL ||
        target==NULL || *target==NULL || targetLimit==NULL
    ) {
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
        return;
    }
    if(pivotStart==NULL) {
        /* use the stack pivot buffer */
        pivotStart=myPivotSource=myPivotTarget=pivotBuffer;
        pivotSource=&myPivotSource;
        pivotTarget=&myPivotTarget;
        pivotLimit=pivotBuffer+CHUNK_SIZE;
    } else if(  pivotStart>=pivotLimit ||
                pivotSource==NULL || *pivotSource==NULL ||
                pivotTarget==NULL || *pivotTarget==NULL ||
                pivotLimit==NULL
    ) {
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
        return;
    }
    if(sourceLimit==NULL) {
        /* get limit of single-byte-NUL-terminated source string */
        sourceLimit=uprv_strchr(*source, 0);
    }
    if(reset) {
        ucnv_resetToUnicode(sourceCnv);
        ucnv_resetFromUnicode(targetCnv);
        *pivotTarget=*pivotSource=pivotStart;
    }
    /* conversion loop */
    for(;;) {
        if(reset) {
            /*
             * if we did a reset in this function, we know that there is nothing
             * to convert to the target yet, so we save a function call
             */
            reset=FALSE;
        } else {
            /*
             * convert to the target first in case the pivot is filled at entry
             * or the targetCnv has some output bytes in its state
             */
            ucnv_fromUnicode(targetCnv,
                             target, targetLimit,
                             pivotSource, *pivotTarget,
                             NULL,
                             (UBool)(flush && *source==sourceLimit),
                             pErrorCode);
            if(U_FAILURE(*pErrorCode)) {
                break;
            }
            /* ucnv_fromUnicode() must have consumed the pivot contents since it returned with U_SUCCESS() */
            *pivotSource=*pivotTarget=pivotStart;
        }
        /* convert from the source to the pivot */
        ucnv_toUnicode(sourceCnv,
                       pivotTarget, pivotLimit,
                       source, sourceLimit,
                       NULL,
                       flush,
                       pErrorCode);
        if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
            /* pivot overflow: continue with the conversion loop */
            *pErrorCode=U_ZERO_ERROR;
        } else if(U_FAILURE(*pErrorCode) || *pivotTarget==pivotStart) {
            /* conversion error, or there was nothing left to convert */
            break;
        }
        /* else ucnv_toUnicode() wrote into the pivot buffer: continue */
    }
    /*
     * The conversion loop is exited when one of the following is true:
     * - the entire source text has been converted successfully to the target buffer
     * - a target buffer overflow occurred
     * - a conversion error occurred
     */
    /* terminate the target buffer if possible */
    if(flush && U_SUCCESS(*pErrorCode)) {
        if(*target!=targetLimit) {
            **target=0;
            if(*pErrorCode==U_STRING_NOT_TERMINATED_WARNING) {
                *pErrorCode=U_ZERO_ERROR;
            }
        } else {
            *pErrorCode=U_STRING_NOT_TERMINATED_WARNING;
        }
    }
 }
 U_CAPI int32_t U_EXPORT2
 ucnv_convert(const char *toConverterName, const char *fromConverterName,
             char *target, int32_t targetSize,
@ -1166,40 +1290,14 @@ ucnv_convert(const char *toConverterName, const char *fromConverterName,
    if(targetSize>0) {
        /* perform real conversion */
        /*
         * loops until the input buffer is completely consumed
         * or an error is encountered;
         * first we convert from inConverter codepage to Unicode
         * then from Unicode to outConverter codepage
         */
        targetLimit=target+targetSize;
-        do {
+        ucnv_convertEx(outConverter, inConverter,
-            pivot=pivotBuffer;
+                       &myTarget, targetLimit,
-            ucnv_toUnicode(inConverter,
+                       &source, sourceLimit,
-                           &pivot, pivotBuffer+CHUNK_SIZE,
+                       pivotBuffer, &pivot, &pivot2, pivotBuffer+CHUNK_SIZE,
-                           &source, sourceLimit,
+                       FALSE,
-                           NULL,
+                       TRUE,
-                           TRUE,
+                       pErrorCode);
                           pErrorCode);
            /* U_BUFFER_OVERFLOW_ERROR only means that the pivot buffer is full */
            if(U_SUCCESS(*pErrorCode) || *pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
                *pErrorCode=U_ZERO_ERROR;
                pivot2=pivotBuffer;
                ucnv_fromUnicode(outConverter,
                                 &myTarget, targetLimit,
                                 (const UChar **)&pivot2, pivot,
                                 NULL,
                                 (UBool)(source==sourceLimit),
                                 pErrorCode);
                /*
                 * If this overflows the real target, then we must stop
                 * converting and preflight with the loop below.
                 */
            }
        } while(U_SUCCESS(*pErrorCode) && source!=sourceLimit);
        targetCapacity=myTarget-target;
    }
@ -1214,53 +1312,32 @@ ucnv_convert(const char *toConverterName, const char *fromConverterName,
        targetLimit=targetBuffer+CHUNK_SIZE;
        do {
            /* since the pivot buffer may still contain some characters, start with emptying it */
            *pErrorCode=U_ZERO_ERROR;
-            while(pivot2!=pivot && U_SUCCESS(*pErrorCode)) {
+            myTarget=targetBuffer;
-                myTarget=targetBuffer;
+            ucnv_convertEx(outConverter, inConverter,
-                ucnv_fromUnicode(outConverter,
+                           &myTarget, targetLimit,
                                 &myTarget, targetLimit,
                                 (const UChar **)&pivot2, pivot,
                                 NULL,
                                 (UBool)(source==sourceLimit),
                                 pErrorCode);
                targetCapacity+=(myTarget-targetBuffer);
                if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
                    *pErrorCode=U_ZERO_ERROR;
                }
            }
            if(U_FAILURE(*pErrorCode)) {
                /* an error occurred: done */
                break;
            }
            if(source==sourceLimit) {
                /*
                 * source is consumed:
                 * done, and set the buffer overflow error as
                 * the result for the entire function
                 */
                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
                break;
            }
            /* now convert from the source into the pivot buffer again */
            pivot=pivot2=pivotBuffer;
            ucnv_toUnicode(inConverter,
                           &pivot, pivotBuffer+CHUNK_SIZE,
                           &source, sourceLimit,
-                           NULL,
+                           pivotBuffer, &pivot, &pivot2, pivotBuffer+CHUNK_SIZE,
                           FALSE,
                           TRUE,
                           pErrorCode);
            targetCapacity+=(myTarget-targetBuffer);
        } while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR);
        if(U_SUCCESS(*pErrorCode)) {
            /*
             * done with preflighting, set the buffer overflow error as
             * the result for the entire function
             */
            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
        }
        while(U_SUCCESS(*pErrorCode) || *pErrorCode==U_BUFFER_OVERFLOW_ERROR);
    }
    ucnv_close (inConverter);
    ucnv_close (outConverter);
-    return u_terminateChars(target, targetSize, targetCapacity, pErrorCode);
+    /* no need to call u_terminateChars() because ucnv_convertEx() took care of that */
    return targetCapacity;
 }
 U_CAPI UConverterType  U_EXPORT2
--- a/icu4c/source/common/unicode/ucnv.h
+++ b/icu4c/source/common/unicode/ucnv.h
@ -1060,6 +1060,141 @@ ucnv_getNextUChar(UConverter * converter,
                  const char * sourceLimit,
                  UErrorCode * err);
 /**
 * Convert from one external charset to another using two existing UConverters.
 * Internally, two conversions - ucnv_toUnicode() and ucnv_fromUnicode() -
 * are used, "pivoting" through 16-bit Unicode.
 *
 * There is a similar function, ucnv_convert(),
 * which has the following limitations:
 * - it takes charset names, not converter objects, so that
 *   - two converters are opened for each call
 *   - only single-string conversion is possible, not streaming operation
 * - it does not provide enough information to find out,
 *   in case of failure, whether the toUnicode or
 *   the fromUnicode conversion failed
 *
 * By contrast, ucnv_convertEx()
 * - takes UConverter parameters instead of charset names
 * - fully exposes the pivot buffer for complete error handling
 *
 * ucnv_convertEx() also provides further convenience:
 * - an option to reset the converters at the beginning
 *   (if reset==TRUE, see parameters;
 *    also sets *pivotTarget=*pivotSource=pivotStart)
 * - allow NUL-terminated input
 *   (only a single NUL byte, will not work for charsets with multi-byte NULs)
 *   (if sourceLimit==NULL, see parameters)
 * - terminate with a NUL on output
 *   (only a single NUL byte, not useful for charsets with multi-byte NULs),
 *   or set U_STRING_NOT_TERMINATED_WARNING if the output exactly fills
 *   the target buffer
 * - the pivot buffer can be provided internally;
 *   in this case, the caller will not be able to get details about where an
 *   error occurred
 *   (if pivotStart==NULL, see below)
 *
 * The function returns when one of the following is true:
 * - the entire source text has been converted successfully to the target buffer
 * - a target buffer overflow occurred (U_BUFFER_OVERFLOW_ERROR)
 * - a conversion error occurred
 *   (other U_FAILURE(), see description of pErrorCode)
 *
 * Limitation compared to the direct use of
 * ucnv_fromUnicode() and ucnv_toUnicode():
 * ucnv_convertEx() does not provide offset information.
 *
 * Limitation compared to ucnv_fromUChars() and ucnv_toUChars():
 * ucnv_convertEx() does not support preflighting directly.
 *
 * Sample code for converting a single string from
 * one external charset to UTF-8, ignoring the location of errors:
 *
 * \code
 * int32_t
 * myToUTF8(UConverter *cnv,
 *          const char *s, int32_t length,
 *          char *u8, int32_t capacity,
 *          UErrorCode *pErrorCode) {
 *     UConverter *utf8Cnv;
 *     char *target;
 *
 *     if(U_FAILURE(*pErrorCode)) {
 *         return 0;
 *     }
 *
 *     utf8Cnv=myGetCachedUTF8Converter(pErrorCode);
 *     if(U_FAILURE(*pErrorCode)) {
 *         return 0;
 *     }
 *
 *     target=u8;
 *     ucnv_convertEx(cnv, utf8Cnv,
 *                    &target, u8+capacity,
 *                    &s, length>=0 ? s+length : NULL,
 *                    NULL, NULL, NULL, NULL,
 *                    TRUE, TRUE,
 *                    pErrorCode);
 * 
 *     myReleaseCachedUTF8Converter(utf8Cnv);
 *
 *     // return the output string length, but without preflighting
 *     return (int32_t)(target-u8);
 * }
 * \endcode
 *
 * @param targetCnv     Output converter, used to convert from the UTF-16 pivot
 *                      to the target using ucnv_fromUnicode().
 * @param sourceCnv     Input converter, used to convert from the source to
 *                      the UTF-16 pivot using ucnv_toUnicode().
 * @param target        I/O parameter, same as for ucnv_fromUChars().
 *                      Input: *target points to the beginning of the target buffer.
 *                      Output: *target points to the first unit after the last char written.
 * @param targetLimit   Pointer to the first unit after the target buffer.
 * @param source        I/O parameter, same as for ucnv_toUChars().
 *                      Input: *source points to the beginning of the source buffer.
 *                      Output: *source points to the first unit after the last char read.
 * @param sourceLimit   Pointer to the first unit after the source buffer.
 * @param pivotStart    Pointer to the UTF-16 pivot buffer. If pivotStart==NULL,
 *                      then an internal buffer is used and the other pivot
 *                      arguments are ignored and can be NULL as well.
 * @param pivotSource   I/O parameter, same as source in ucnv_fromUChars() for
 *                      conversion from the pivot buffer to the target buffer.
 * @param pivotTarget   I/O parameter, same as target in ucnv_toUChars() for
 *                      conversion from the source buffer to the pivot buffer.
 *                      It must be pivotStart<=*pivotSource<=*pivotTarget<=pivotLimit
 *                      and pivotStart<pivotLimit (unless pivotStart==NULL).
 * @param pivotLimit    Pointer to the first unit after the pivot buffer.
 * @param reset         If TRUE, then ucnv_resetToUnicode(sourceCnv) and
 *                      ucnv_resetFromUnicode(targetCnv) are called, and the
 *                      pivot pointers are reset (*pivotTarget=*pivotSource=pivotStart).
 * @param flush         If true, indicates the end of the input.
 *                      Passed directly to ucnv_toUnicode(), and carried over to
 *                      ucnv_fromUnicode() when the source is empty as well.
 * @param pErrorCode    ICU error code in/out parameter.
 *                      Must fulfill U_SUCCESS before the function call.
 *                      U_BUFFER_OVERFLOW_ERROR always refers to the target buffer
 *                      because overflows into the pivot buffer are handled internally.
 *                      Other conversion errors are from the source-to-pivot
 *                      conversion if *pivotSource==pivotStart, otherwise from
 *                      the pivot-to-target conversion.
 *
 * @see ucnv_convert
 * @see ucnv_fromUnicode
 * @see ucnv_toUnicode
 * @see ucnv_fromUChars
 * @see ucnv_toUChars
 * @draft ICU 2.6
 */
 U_CAPI void U_EXPORT2
 ucnv_convertEx(UConverter *targetCnv, UConverter *sourceCnv,
               char **target, const char *targetLimit,
               const char **source, const char *sourceLimit,
               UChar *pivotStart, UChar **pivotSource,
               UChar **pivotTarget, const UChar *pivotLimit,
               UBool reset, UBool flush,
               UErrorCode *pErrorCode);
 /**
 * Will convert a sequence of bytes from one codepage to another.
 * This is <STRONG>NOT AN EFFICIENT</STRONG> way to transcode.
@ -1075,6 +1210,8 @@ ucnv_getNextUChar(UConverter * converter,
 * @param err error status. 
 * <code>U_BUFFER_OVERFLOW_ERROR</code> will be set if the target is full and there is still input left in the source.
 * @return  will be filled in with the number of bytes needed in target
 *
 * @see ucnv_convertEx
 * @see ucnv_fromUnicode
 * @see ucnv_toUnicode
 * @see ucnv_fromUChars