/* ****************************************************************************** * * Copyright (C) 1998-2013, International Business Machines * Corporation and others. All Rights Reserved. * ****************************************************************************** * * ucnv.c: * Implements APIs for the ICU's codeset conversion library; * mostly calls through internal functions; * created by Bertrand A. Damiba * * Modification History: * * Date Name Description * 04/04/99 helena Fixed internal header inclusion. * 05/09/00 helena Added implementation to handle fallback mappings. * 06/20/2000 helena OS/400 port changes; mostly typecast. */ #include "unicode/utypes.h" #if !UCONFIG_NO_CONVERSION #include "unicode/ustring.h" #include "unicode/ucnv.h" #include "unicode/ucnv_err.h" #include "unicode/uset.h" #include "unicode/utf.h" #include "unicode/utf16.h" #include "putilimp.h" #include "cmemory.h" #include "cstring.h" #include "uassert.h" #include "utracimp.h" #include "ustr_imp.h" #include "ucnv_imp.h" #include "ucnv_cnv.h" #include "ucnv_bld.h" /* size of intermediate and preflighting buffers in ucnv_convert() */ #define CHUNK_SIZE 1024 typedef struct UAmbiguousConverter { const char *name; const UChar variant5c; } UAmbiguousConverter; static const UAmbiguousConverter ambiguousConverters[]={ { "ibm-897_P100-1995", 0xa5 }, { "ibm-942_P120-1999", 0xa5 }, { "ibm-943_P130-1999", 0xa5 }, { "ibm-946_P100-1995", 0xa5 }, { "ibm-33722_P120-1999", 0xa5 }, { "ibm-1041_P100-1995", 0xa5 }, /*{ "ibm-54191_P100-2006", 0xa5 },*/ /*{ "ibm-62383_P100-2007", 0xa5 },*/ /*{ "ibm-891_P100-1995", 0x20a9 },*/ { "ibm-944_P100-1995", 0x20a9 }, { "ibm-949_P110-1999", 0x20a9 }, { "ibm-1363_P110-1997", 0x20a9 }, { "ISO_2022,locale=ko,version=0", 0x20a9 }, { "ibm-1088_P100-1995", 0x20a9 } }; /*Calls through createConverter */ U_CAPI UConverter* U_EXPORT2 ucnv_open (const char *name, UErrorCode * err) { UConverter *r; if (err == NULL || U_FAILURE (*err)) { return NULL; } r = ucnv_createConverter(NULL, name, err); return r; } U_CAPI UConverter* U_EXPORT2 ucnv_openPackage (const char *packageName, const char *converterName, UErrorCode * err) { return ucnv_createConverterFromPackage(packageName, converterName, err); } /*Extracts the UChar* to a char* and calls through createConverter */ U_CAPI UConverter* U_EXPORT2 ucnv_openU (const UChar * name, UErrorCode * err) { char asciiName[UCNV_MAX_CONVERTER_NAME_LENGTH]; if (err == NULL || U_FAILURE(*err)) return NULL; if (name == NULL) return ucnv_open (NULL, err); if (u_strlen(name) >= UCNV_MAX_CONVERTER_NAME_LENGTH) { *err = U_ILLEGAL_ARGUMENT_ERROR; return NULL; } return ucnv_open(u_austrcpy(asciiName, name), err); } /* Copy the string that is represented by the UConverterPlatform enum * @param platformString An output buffer * @param platform An enum representing a platform * @return the length of the copied string. */ static int32_t ucnv_copyPlatformString(char *platformString, UConverterPlatform pltfrm) { switch (pltfrm) { case UCNV_IBM: uprv_strcpy(platformString, "ibm-"); return 4; case UCNV_UNKNOWN: break; } /* default to empty string */ *platformString = 0; return 0; } /*Assumes a $platform-#codepage.$CONVERTER_FILE_EXTENSION scheme and calls *through createConverter*/ U_CAPI UConverter* U_EXPORT2 ucnv_openCCSID (int32_t codepage, UConverterPlatform platform, UErrorCode * err) { char myName[UCNV_MAX_CONVERTER_NAME_LENGTH]; int32_t myNameLen; if (err == NULL || U_FAILURE (*err)) return NULL; /* ucnv_copyPlatformString could return "ibm-" or "cp" */ myNameLen = ucnv_copyPlatformString(myName, platform); T_CString_integerToString(myName + myNameLen, codepage, 10); return ucnv_createConverter(NULL, myName, err); } /* Creating a temporary stack-based object that can be used in one thread, and created from a converter that is shared across threads. */ U_CAPI UConverter* U_EXPORT2 ucnv_safeClone(const UConverter* cnv, void *stackBuffer, int32_t *pBufferSize, UErrorCode *status) { UConverter *localConverter, *allocatedConverter; int32_t stackBufferSize; int32_t bufferSizeNeeded; char *stackBufferChars = (char *)stackBuffer; UErrorCode cbErr; UConverterToUnicodeArgs toUArgs = { sizeof(UConverterToUnicodeArgs), TRUE, NULL, NULL, NULL, NULL, NULL, NULL }; UConverterFromUnicodeArgs fromUArgs = { sizeof(UConverterFromUnicodeArgs), TRUE, NULL, NULL, NULL, NULL, NULL, NULL }; UTRACE_ENTRY_OC(UTRACE_UCNV_CLONE); if (status == NULL || U_FAILURE(*status)){ UTRACE_EXIT_STATUS(status? *status: U_ILLEGAL_ARGUMENT_ERROR); return NULL; } if (cnv == NULL) { *status = U_ILLEGAL_ARGUMENT_ERROR; UTRACE_EXIT_STATUS(*status); return NULL; } UTRACE_DATA3(UTRACE_OPEN_CLOSE, "clone converter %s at %p into stackBuffer %p", ucnv_getName(cnv, status), cnv, stackBuffer); if (cnv->sharedData->impl->safeClone != NULL) { /* call the custom safeClone function for sizing */ bufferSizeNeeded = 0; cnv->sharedData->impl->safeClone(cnv, NULL, &bufferSizeNeeded, status); if (U_FAILURE(*status)) { UTRACE_EXIT_STATUS(*status); return NULL; } } else { /* inherent sizing */ bufferSizeNeeded = sizeof(UConverter); } if (pBufferSize == NULL) { stackBufferSize = 1; pBufferSize = &stackBufferSize; } else { stackBufferSize = *pBufferSize; if (stackBufferSize <= 0){ /* 'preflighting' request - set needed size into *pBufferSize */ *pBufferSize = bufferSizeNeeded; UTRACE_EXIT_VALUE(bufferSizeNeeded); return NULL; } } /* Pointers on 64-bit platforms need to be aligned * on a 64-bit boundary in memory. */ if (U_ALIGNMENT_OFFSET(stackBuffer) != 0) { int32_t offsetUp = (int32_t)U_ALIGNMENT_OFFSET_UP(stackBufferChars); if(stackBufferSize > offsetUp) { stackBufferSize -= offsetUp; stackBufferChars += offsetUp; } else { /* prevent using the stack buffer but keep the size > 0 so that we do not just preflight */ stackBufferSize = 1; } } stackBuffer = (void *)stackBufferChars; /* Now, see if we must allocate any memory */ if (stackBufferSize < bufferSizeNeeded || stackBuffer == NULL) { /* allocate one here...*/ localConverter = allocatedConverter = (UConverter *) uprv_malloc (bufferSizeNeeded); if(localConverter == NULL) { *status = U_MEMORY_ALLOCATION_ERROR; UTRACE_EXIT_STATUS(*status); return NULL; } *status = U_SAFECLONE_ALLOCATED_WARNING; /* record the fact that memory was allocated */ *pBufferSize = bufferSizeNeeded; } else { /* just use the stack buffer */ localConverter = (UConverter*) stackBuffer; allocatedConverter = NULL; } uprv_memset(localConverter, 0, bufferSizeNeeded); /* Copy initial state */ uprv_memcpy(localConverter, cnv, sizeof(UConverter)); localConverter->isCopyLocal = localConverter->isExtraLocal = FALSE; /* copy the substitution string */ if (cnv->subChars == (uint8_t *)cnv->subUChars) { localConverter->subChars = (uint8_t *)localConverter->subUChars; } else { localConverter->subChars = (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR); if (localConverter->subChars == NULL) { uprv_free(allocatedConverter); UTRACE_EXIT_STATUS(*status); return NULL; } uprv_memcpy(localConverter->subChars, cnv->subChars, UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR); } /* now either call the safeclone fcn or not */ if (cnv->sharedData->impl->safeClone != NULL) { /* call the custom safeClone function */ localConverter = cnv->sharedData->impl->safeClone(cnv, localConverter, pBufferSize, status); } if(localConverter==NULL || U_FAILURE(*status)) { if (allocatedConverter != NULL && allocatedConverter->subChars != (uint8_t *)allocatedConverter->subUChars) { uprv_free(allocatedConverter->subChars); } uprv_free(allocatedConverter); UTRACE_EXIT_STATUS(*status); return NULL; } /* increment refcount of shared data if needed */ /* Checking whether it's an algorithic converter is okay in multithreaded applications because the value never changes. Don't check referenceCounter for any other value. */ if (cnv->sharedData->referenceCounter != ~0) { ucnv_incrementRefCount(cnv->sharedData); } if(localConverter == (UConverter*)stackBuffer) { /* we're using user provided data - set to not destroy */ localConverter->isCopyLocal = TRUE; } /* allow callback functions to handle any memory allocation */ toUArgs.converter = fromUArgs.converter = localConverter; cbErr = U_ZERO_ERROR; cnv->fromCharErrorBehaviour(cnv->toUContext, &toUArgs, NULL, 0, UCNV_CLONE, &cbErr); cbErr = U_ZERO_ERROR; cnv->fromUCharErrorBehaviour(cnv->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_CLONE, &cbErr); UTRACE_EXIT_PTR_STATUS(localConverter, *status); return localConverter; } /*Decreases the reference counter in the shared immutable section of the object *and frees the mutable part*/ U_CAPI void U_EXPORT2 ucnv_close (UConverter * converter) { UErrorCode errorCode = U_ZERO_ERROR; UTRACE_ENTRY_OC(UTRACE_UCNV_CLOSE); if (converter == NULL) { UTRACE_EXIT(); return; } UTRACE_DATA3(UTRACE_OPEN_CLOSE, "close converter %s at %p, isCopyLocal=%b", ucnv_getName(converter, &errorCode), converter, converter->isCopyLocal); /* In order to speed up the close, only call the callbacks when they have been changed. This performance check will only work when the callbacks are set within a shared library or from user code that statically links this code. */ /* first, notify the callback functions that the converter is closed */ if (converter->fromCharErrorBehaviour != UCNV_TO_U_DEFAULT_CALLBACK) { UConverterToUnicodeArgs toUArgs = { sizeof(UConverterToUnicodeArgs), TRUE, NULL, NULL, NULL, NULL, NULL, NULL }; toUArgs.converter = converter; errorCode = U_ZERO_ERROR; converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_CLOSE, &errorCode); } if (converter->fromUCharErrorBehaviour != UCNV_FROM_U_DEFAULT_CALLBACK) { UConverterFromUnicodeArgs fromUArgs = { sizeof(UConverterFromUnicodeArgs), TRUE, NULL, NULL, NULL, NULL, NULL, NULL }; fromUArgs.converter = converter; errorCode = U_ZERO_ERROR; converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_CLOSE, &errorCode); } if (converter->sharedData->impl->close != NULL) { converter->sharedData->impl->close(converter); } if (converter->subChars != (uint8_t *)converter->subUChars) { uprv_free(converter->subChars); } /* Checking whether it's an algorithic converter is okay in multithreaded applications because the value never changes. Don't check referenceCounter for any other value. */ if (converter->sharedData->referenceCounter != ~0) { ucnv_unloadSharedDataIfReady(converter->sharedData); } if(!converter->isCopyLocal){ uprv_free(converter); } UTRACE_EXIT(); } /*returns a single Name from the list, will return NULL if out of bounds */ U_CAPI const char* U_EXPORT2 ucnv_getAvailableName (int32_t n) { if (0 <= n && n <= 0xffff) { UErrorCode err = U_ZERO_ERROR; const char *name = ucnv_bld_getAvailableConverter((uint16_t)n, &err); if (U_SUCCESS(err)) { return name; } } return NULL; } U_CAPI int32_t U_EXPORT2 ucnv_countAvailable () { UErrorCode err = U_ZERO_ERROR; return ucnv_bld_countAvailableConverters(&err); } U_CAPI void U_EXPORT2 ucnv_getSubstChars (const UConverter * converter, char *mySubChar, int8_t * len, UErrorCode * err) { if (U_FAILURE (*err)) return; if (converter->subCharLen <= 0) { /* Unicode string or empty string from ucnv_setSubstString(). */ *len = 0; return; } if (*len < converter->subCharLen) /*not enough space in subChars */ { *err = U_INDEX_OUTOFBOUNDS_ERROR; return; } uprv_memcpy (mySubChar, converter->subChars, converter->subCharLen); /*fills in the subchars */ *len = converter->subCharLen; /*store # of bytes copied to buffer */ } U_CAPI void U_EXPORT2 ucnv_setSubstChars (UConverter * converter, const char *mySubChar, int8_t len, UErrorCode * err) { if (U_FAILURE (*err)) return; /*Makes sure that the subChar is within the codepages char length boundaries */ if ((len > converter->sharedData->staticData->maxBytesPerChar) || (len < converter->sharedData->staticData->minBytesPerChar)) { *err = U_ILLEGAL_ARGUMENT_ERROR; return; } uprv_memcpy (converter->subChars, mySubChar, len); /*copies the subchars */ converter->subCharLen = len; /*sets the new len */ /* * There is currently (2001Feb) no separate API to set/get subChar1. * In order to always have subChar written after it is explicitly set, * we set subChar1 to 0. */ converter->subChar1 = 0; return; } U_CAPI void U_EXPORT2 ucnv_setSubstString(UConverter *cnv, const UChar *s, int32_t length, UErrorCode *err) { UAlignedMemory cloneBuffer[U_CNV_SAFECLONE_BUFFERSIZE / sizeof(UAlignedMemory) + 1]; char chars[UCNV_ERROR_BUFFER_LENGTH]; UConverter *clone; uint8_t *subChars; int32_t cloneSize, length8; /* Let the following functions check all arguments. */ cloneSize = sizeof(cloneBuffer); clone = ucnv_safeClone(cnv, cloneBuffer, &cloneSize, err); ucnv_setFromUCallBack(clone, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, err); length8 = ucnv_fromUChars(clone, chars, (int32_t)sizeof(chars), s, length, err); ucnv_close(clone); if (U_FAILURE(*err)) { return; } if (cnv->sharedData->impl->writeSub == NULL #if !UCONFIG_NO_LEGACY_CONVERSION || (cnv->sharedData->staticData->conversionType == UCNV_MBCS && ucnv_MBCSGetType(cnv) != UCNV_EBCDIC_STATEFUL) #endif ) { /* The converter is not stateful. Store the charset bytes as a fixed string. */ subChars = (uint8_t *)chars; } else { /* * The converter has a non-default writeSub() function, indicating * that it is stateful. * Store the Unicode string for on-the-fly conversion for correct * state handling. */ if (length > UCNV_ERROR_BUFFER_LENGTH) { /* * Should not occur. The converter should output at least one byte * per UChar, which means that ucnv_fromUChars() should catch all * overflows. */ *err = U_BUFFER_OVERFLOW_ERROR; return; } subChars = (uint8_t *)s; if (length < 0) { length = u_strlen(s); } length8 = length * U_SIZEOF_UCHAR; } /* * For storing the substitution string, select either the small buffer inside * UConverter or allocate a subChars buffer. */ if (length8 > UCNV_MAX_SUBCHAR_LEN) { /* Use a separate buffer for the string. Outside UConverter to not make it too large. */ if (cnv->subChars == (uint8_t *)cnv->subUChars) { /* Allocate a new buffer for the string. */ cnv->subChars = (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR); if (cnv->subChars == NULL) { cnv->subChars = (uint8_t *)cnv->subUChars; *err = U_MEMORY_ALLOCATION_ERROR; return; } uprv_memset(cnv->subChars, 0, UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR); } } /* Copy the substitution string into the UConverter or its subChars buffer. */ if (length8 == 0) { cnv->subCharLen = 0; } else { uprv_memcpy(cnv->subChars, subChars, length8); if (subChars == (uint8_t *)chars) { cnv->subCharLen = (int8_t)length8; } else /* subChars == s */ { cnv->subCharLen = (int8_t)-length; } } /* See comment in ucnv_setSubstChars(). */ cnv->subChar1 = 0; } /*resets the internal states of a converter *goal : have the same behaviour than a freshly created converter */ static void _reset(UConverter *converter, UConverterResetChoice choice, UBool callCallback) { if(converter == NULL) { return; } if(callCallback) { /* first, notify the callback functions that the converter is reset */ UErrorCode errorCode; if(choice<=UCNV_RESET_TO_UNICODE && converter->fromCharErrorBehaviour != UCNV_TO_U_DEFAULT_CALLBACK) { UConverterToUnicodeArgs toUArgs = { sizeof(UConverterToUnicodeArgs), TRUE, NULL, NULL, NULL, NULL, NULL, NULL }; toUArgs.converter = converter; errorCode = U_ZERO_ERROR; converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_RESET, &errorCode); } if(choice!=UCNV_RESET_TO_UNICODE && converter->fromUCharErrorBehaviour != UCNV_FROM_U_DEFAULT_CALLBACK) { UConverterFromUnicodeArgs fromUArgs = { sizeof(UConverterFromUnicodeArgs), TRUE, NULL, NULL, NULL, NULL, NULL, NULL }; fromUArgs.converter = converter; errorCode = U_ZERO_ERROR; converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_RESET, &errorCode); } } /* now reset the converter itself */ if(choice<=UCNV_RESET_TO_UNICODE) { converter->toUnicodeStatus = converter->sharedData->toUnicodeStatus; converter->mode = 0; converter->toULength = 0; converter->invalidCharLength = converter->UCharErrorBufferLength = 0; converter->preToULength = 0; } if(choice!=UCNV_RESET_TO_UNICODE) { converter->fromUnicodeStatus = 0; converter->fromUChar32 = 0; converter->invalidUCharLength = converter->charErrorBufferLength = 0; converter->preFromUFirstCP = U_SENTINEL; converter->preFromULength = 0; } if (converter->sharedData->impl->reset != NULL) { /* call the custom reset function */ converter->sharedData->impl->reset(converter, choice); } } U_CAPI void U_EXPORT2 ucnv_reset(UConverter *converter) { _reset(converter, UCNV_RESET_BOTH, TRUE); } U_CAPI void U_EXPORT2 ucnv_resetToUnicode(UConverter *converter) { _reset(converter, UCNV_RESET_TO_UNICODE, TRUE); } U_CAPI void U_EXPORT2 ucnv_resetFromUnicode(UConverter *converter) { _reset(converter, UCNV_RESET_FROM_UNICODE, TRUE); } U_CAPI int8_t U_EXPORT2 ucnv_getMaxCharSize (const UConverter * converter) { return converter->maxBytesPerUChar; } U_CAPI int8_t U_EXPORT2 ucnv_getMinCharSize (const UConverter * converter) { return converter->sharedData->staticData->minBytesPerChar; } U_CAPI const char* U_EXPORT2 ucnv_getName (const UConverter * converter, UErrorCode * err) { if (U_FAILURE (*err)) return NULL; if(converter->sharedData->impl->getName){ const char* temp= converter->sharedData->impl->getName(converter); if(temp) return temp; } return converter->sharedData->staticData->name; } U_CAPI int32_t U_EXPORT2 ucnv_getCCSID(const UConverter * converter, UErrorCode * err) { int32_t ccsid; if (U_FAILURE (*err)) return -1; ccsid = converter->sharedData->staticData->codepage; if (ccsid == 0) { /* Rare case. This is for cases like gb18030, which doesn't have an IBM canonical name, but does have an IBM alias. */ const char *standardName = ucnv_getStandardName(ucnv_getName(converter, err), "IBM", err); if (U_SUCCESS(*err) && standardName) { const char *ccsidStr = uprv_strchr(standardName, '-'); if (ccsidStr) { ccsid = (int32_t)atol(ccsidStr+1); /* +1 to skip '-' */ } } } return ccsid; } U_CAPI UConverterPlatform U_EXPORT2 ucnv_getPlatform (const UConverter * converter, UErrorCode * err) { if (U_FAILURE (*err)) return UCNV_UNKNOWN; return (UConverterPlatform)converter->sharedData->staticData->platform; } U_CAPI void U_EXPORT2 ucnv_getToUCallBack (const UConverter * converter, UConverterToUCallback *action, const void **context) { *action = converter->fromCharErrorBehaviour; *context = converter->toUContext; } U_CAPI void U_EXPORT2 ucnv_getFromUCallBack (const UConverter * converter, UConverterFromUCallback *action, const void **context) { *action = converter->fromUCharErrorBehaviour; *context = converter->fromUContext; } U_CAPI void U_EXPORT2 ucnv_setToUCallBack (UConverter * converter, UConverterToUCallback newAction, const void* newContext, UConverterToUCallback *oldAction, const void** oldContext, UErrorCode * err) { if (U_FAILURE (*err)) return; if (oldAction) *oldAction = converter->fromCharErrorBehaviour; converter->fromCharErrorBehaviour = newAction; if (oldContext) *oldContext = converter->toUContext; converter->toUContext = newContext; } U_CAPI void U_EXPORT2 ucnv_setFromUCallBack (UConverter * converter, UConverterFromUCallback newAction, const void* newContext, UConverterFromUCallback *oldAction, const void** oldContext, UErrorCode * err) { if (U_FAILURE (*err)) return; if (oldAction) *oldAction = converter->fromUCharErrorBehaviour; converter->fromUCharErrorBehaviour = newAction; if (oldContext) *oldContext = converter->fromUContext; converter->fromUContext = newContext; } static void _updateOffsets(int32_t *offsets, int32_t length, int32_t sourceIndex, int32_t errorInputLength) { int32_t *limit; int32_t delta, offset; if(sourceIndex>=0) { /* * adjust each offset by adding the previous sourceIndex * minus the length of the input sequence that caused an * error, if any */ delta=sourceIndex-errorInputLength; } else { /* * set each offset to -1 because this conversion function * does not handle offsets */ delta=-1; } limit=offsets+length; if(delta==0) { /* most common case, nothing to do */ } else if(delta>0) { /* add the delta to each offset (but not if the offset is <0) */ while(offsets=0) { *offsets=offset+delta; } ++offsets; } } else /* delta<0 */ { /* * set each offset to -1 because this conversion function * does not handle offsets * or the error input sequence started in a previous buffer */ while(offsetsconverter; s=pArgs->source; t=pArgs->target; offsets=pArgs->offsets; /* get the converter implementation function */ sourceIndex=0; if(offsets==NULL) { fromUnicode=cnv->sharedData->impl->fromUnicode; } else { fromUnicode=cnv->sharedData->impl->fromUnicodeWithOffsets; if(fromUnicode==NULL) { /* there is no WithOffsets implementation */ fromUnicode=cnv->sharedData->impl->fromUnicode; /* we will write -1 for each offset */ sourceIndex=-1; } } if(cnv->preFromULength>=0) { /* normal mode */ realSource=NULL; /* avoid compiler warnings - not otherwise necessary, and the values do not matter */ realSourceLimit=NULL; realFlush=FALSE; realSourceIndex=0; } else { /* * Previous m:n conversion stored source units from a partial match * and failed to consume all of them. * We need to "replay" them from a temporary buffer and convert them first. */ realSource=pArgs->source; realSourceLimit=pArgs->sourceLimit; realFlush=pArgs->flush; realSourceIndex=sourceIndex; uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SIZEOF_UCHAR); pArgs->source=replay; pArgs->sourceLimit=replay-cnv->preFromULength; pArgs->flush=FALSE; sourceIndex=-1; cnv->preFromULength=0; } /* * loop for conversion and error handling * * loop { * convert * loop { * update offsets * handle end of input * handle errors/call callback * } * } */ for(;;) { if(U_SUCCESS(*err)) { /* convert */ fromUnicode(pArgs, err); /* * set a flag for whether the converter * successfully processed the end of the input * * need not check cnv->preFromULength==0 because a replay (<0) will cause * sflush && pArgs->source==pArgs->sourceLimit && cnv->fromUChar32==0); } else { /* handle error from ucnv_convertEx() */ converterSawEndOfInput=FALSE; } /* no callback called yet for this iteration */ calledCallback=FALSE; /* no sourceIndex adjustment for conversion, only for callback output */ errorInputLength=0; /* * loop for offsets and error handling * * iterates at most 3 times: * 1. to clean up after the conversion function * 2. after the callback * 3. after the callback again if there was truncated input */ for(;;) { /* update offsets if we write any */ if(offsets!=NULL) { int32_t length=(int32_t)(pArgs->target-t); if(length>0) { _updateOffsets(offsets, length, sourceIndex, errorInputLength); /* * if a converter handles offsets and updates the offsets * pointer at the end, then pArgs->offset should not change * here; * however, some converters do not handle offsets at all * (sourceIndex<0) or may not update the offsets pointer */ pArgs->offsets=offsets+=length; } if(sourceIndex>=0) { sourceIndex+=(int32_t)(pArgs->source-s); } } if(cnv->preFromULength<0) { /* * switch the source to new replay units (cannot occur while replaying) * after offset handling and before end-of-input and callback handling */ if(realSource==NULL) { realSource=pArgs->source; realSourceLimit=pArgs->sourceLimit; realFlush=pArgs->flush; realSourceIndex=sourceIndex; uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SIZEOF_UCHAR); pArgs->source=replay; pArgs->sourceLimit=replay-cnv->preFromULength; pArgs->flush=FALSE; if((sourceIndex+=cnv->preFromULength)<0) { sourceIndex=-1; } cnv->preFromULength=0; } else { /* see implementation note before _fromUnicodeWithCallback() */ U_ASSERT(realSource==NULL); *err=U_INTERNAL_PROGRAM_ERROR; } } /* update pointers */ s=pArgs->source; t=pArgs->target; if(U_SUCCESS(*err)) { if(ssourceLimit) { /* * continue with the conversion loop while there is still input left * (continue converting by breaking out of only the inner loop) */ break; } else if(realSource!=NULL) { /* switch back from replaying to the real source and continue */ pArgs->source=realSource; pArgs->sourceLimit=realSourceLimit; pArgs->flush=realFlush; sourceIndex=realSourceIndex; realSource=NULL; break; } else if(pArgs->flush && cnv->fromUChar32!=0) { /* * the entire input stream is consumed * and there is a partial, truncated input sequence left */ /* inject an error and continue with callback handling */ *err=U_TRUNCATED_CHAR_FOUND; calledCallback=FALSE; /* new error condition */ } else { /* input consumed */ if(pArgs->flush) { /* * return to the conversion loop once more if the flush * flag is set and the conversion function has not * successfully processed the end of the input yet * * (continue converting by breaking out of only the inner loop) */ if(!converterSawEndOfInput) { break; } /* reset the converter without calling the callback function */ _reset(cnv, UCNV_RESET_FROM_UNICODE, FALSE); } /* done successfully */ return; } } /* U_FAILURE(*err) */ { UErrorCode e; if( calledCallback || (e=*err)==U_BUFFER_OVERFLOW_ERROR || (e!=U_INVALID_CHAR_FOUND && e!=U_ILLEGAL_CHAR_FOUND && e!=U_TRUNCATED_CHAR_FOUND) ) { /* * the callback did not or cannot resolve the error: * set output pointers and return * * the check for buffer overflow is redundant but it is * a high-runner case and hopefully documents the intent * well * * if we were replaying, then the replay buffer must be * copied back into the UConverter * and the real arguments must be restored */ if(realSource!=NULL) { int32_t length; U_ASSERT(cnv->preFromULength==0); length=(int32_t)(pArgs->sourceLimit-pArgs->source); if(length>0) { uprv_memcpy(cnv->preFromU, pArgs->source, length*U_SIZEOF_UCHAR); cnv->preFromULength=(int8_t)-length; } pArgs->source=realSource; pArgs->sourceLimit=realSourceLimit; pArgs->flush=realFlush; } return; } } /* callback handling */ { UChar32 codePoint; /* get and write the code point */ codePoint=cnv->fromUChar32; errorInputLength=0; U16_APPEND_UNSAFE(cnv->invalidUCharBuffer, errorInputLength, codePoint); cnv->invalidUCharLength=(int8_t)errorInputLength; /* set the converter state to deal with the next character */ cnv->fromUChar32=0; /* call the callback function */ cnv->fromUCharErrorBehaviour(cnv->fromUContext, pArgs, cnv->invalidUCharBuffer, errorInputLength, codePoint, *err==U_INVALID_CHAR_FOUND ? UCNV_UNASSIGNED : UCNV_ILLEGAL, err); } /* * loop back to the offset handling * * this flag will indicate after offset handling * that a callback was called; * if the callback did not resolve the error, then we return */ calledCallback=TRUE; } } } /* * Output the fromUnicode overflow buffer. * Call this function if(cnv->charErrorBufferLength>0). * @return TRUE if overflow */ static UBool ucnv_outputOverflowFromUnicode(UConverter *cnv, char **target, const char *targetLimit, int32_t **pOffsets, UErrorCode *err) { int32_t *offsets; char *overflow, *t; int32_t i, length; t=*target; if(pOffsets!=NULL) { offsets=*pOffsets; } else { offsets=NULL; } overflow=(char *)cnv->charErrorBuffer; length=cnv->charErrorBufferLength; i=0; while(icharErrorBufferLength=(int8_t)j; *target=t; if(offsets!=NULL) { *pOffsets=offsets; } *err=U_BUFFER_OVERFLOW_ERROR; return TRUE; } /* copy the overflow contents to the target */ *t++=overflow[i++]; if(offsets!=NULL) { *offsets++=-1; /* no source index available for old output */ } } /* the overflow buffer is completely copied to the target */ cnv->charErrorBufferLength=0; *target=t; if(offsets!=NULL) { *pOffsets=offsets; } return FALSE; } U_CAPI void U_EXPORT2 ucnv_fromUnicode(UConverter *cnv, char **target, const char *targetLimit, const UChar **source, const UChar *sourceLimit, int32_t *offsets, UBool flush, UErrorCode *err) { UConverterFromUnicodeArgs args; const UChar *s; char *t; /* check parameters */ if(err==NULL || U_FAILURE(*err)) { return; } if(cnv==NULL || target==NULL || source==NULL) { *err=U_ILLEGAL_ARGUMENT_ERROR; return; } s=*source; t=*target; if ((const void *)U_MAX_PTR(sourceLimit) == (const void *)sourceLimit) { /* Prevent code from going into an infinite loop in case we do hit this limit. The limit pointer is expected to be on a UChar * boundary. This also prevents the next argument check from failing. */ sourceLimit = (const UChar *)(((const char *)sourceLimit) - 1); } /* * All these conditions should never happen. * * 1) Make sure that the limits are >= to the address source or target * * 2) Make sure that the buffer sizes do not exceed the number range for * int32_t because some functions use the size (in units or bytes) * rather than comparing pointers, and because offsets are int32_t values. * * size_t is guaranteed to be unsigned and large enough for the job. * * Return with an error instead of adjusting the limits because we would * not be able to maintain the semantics that either the source must be * consumed or the target filled (unless an error occurs). * An adjustment would be targetLimit=t+0x7fffffff; for example. * * 3) Make sure that the user didn't incorrectly cast a UChar * pointer * to a char * pointer and provide an incomplete UChar code unit. */ if (sourceLimit(size_t)0x3fffffff && sourceLimit>s) || ((size_t)(targetLimit-t)>(size_t)0x7fffffff && targetLimit>t) || (((const char *)sourceLimit-(const char *)s) & 1) != 0) { *err=U_ILLEGAL_ARGUMENT_ERROR; return; } /* output the target overflow buffer */ if( cnv->charErrorBufferLength>0 && ucnv_outputOverflowFromUnicode(cnv, target, targetLimit, &offsets, err) ) { /* U_BUFFER_OVERFLOW_ERROR */ return; } /* *target may have moved, therefore stop using t */ if(!flush && s==sourceLimit && cnv->preFromULength>=0) { /* the overflow buffer is emptied and there is no new input: we are done */ return; } /* * Do not simply return with a buffer overflow error if * !flush && t==targetLimit * because it is possible that the source will not generate any output. * For example, the skip callback may be called; * it does not output anything. */ /* prepare the converter arguments */ args.converter=cnv; args.flush=flush; args.offsets=offsets; args.source=s; args.sourceLimit=sourceLimit; args.target=*target; args.targetLimit=targetLimit; args.size=sizeof(args); _fromUnicodeWithCallback(&args, err); *source=args.source; *target=args.target; } /* ucnv_toUnicode() --------------------------------------------------------- */ static void _toUnicodeWithCallback(UConverterToUnicodeArgs *pArgs, UErrorCode *err) { UConverterToUnicode toUnicode; UConverter *cnv; const char *s; UChar *t; int32_t *offsets; int32_t sourceIndex; int32_t errorInputLength; UBool converterSawEndOfInput, calledCallback; /* variables for m:n conversion */ char replay[UCNV_EXT_MAX_BYTES]; const char *realSource, *realSourceLimit; int32_t realSourceIndex; UBool realFlush; cnv=pArgs->converter; s=pArgs->source; t=pArgs->target; offsets=pArgs->offsets; /* get the converter implementation function */ sourceIndex=0; if(offsets==NULL) { toUnicode=cnv->sharedData->impl->toUnicode; } else { toUnicode=cnv->sharedData->impl->toUnicodeWithOffsets; if(toUnicode==NULL) { /* there is no WithOffsets implementation */ toUnicode=cnv->sharedData->impl->toUnicode; /* we will write -1 for each offset */ sourceIndex=-1; } } if(cnv->preToULength>=0) { /* normal mode */ realSource=NULL; /* avoid compiler warnings - not otherwise necessary, and the values do not matter */ realSourceLimit=NULL; realFlush=FALSE; realSourceIndex=0; } else { /* * Previous m:n conversion stored source units from a partial match * and failed to consume all of them. * We need to "replay" them from a temporary buffer and convert them first. */ realSource=pArgs->source; realSourceLimit=pArgs->sourceLimit; realFlush=pArgs->flush; realSourceIndex=sourceIndex; uprv_memcpy(replay, cnv->preToU, -cnv->preToULength); pArgs->source=replay; pArgs->sourceLimit=replay-cnv->preToULength; pArgs->flush=FALSE; sourceIndex=-1; cnv->preToULength=0; } /* * loop for conversion and error handling * * loop { * convert * loop { * update offsets * handle end of input * handle errors/call callback * } * } */ for(;;) { if(U_SUCCESS(*err)) { /* convert */ toUnicode(pArgs, err); /* * set a flag for whether the converter * successfully processed the end of the input * * need not check cnv->preToULength==0 because a replay (<0) will cause * sflush && pArgs->source==pArgs->sourceLimit && cnv->toULength==0); } else { /* handle error from getNextUChar() or ucnv_convertEx() */ converterSawEndOfInput=FALSE; } /* no callback called yet for this iteration */ calledCallback=FALSE; /* no sourceIndex adjustment for conversion, only for callback output */ errorInputLength=0; /* * loop for offsets and error handling * * iterates at most 3 times: * 1. to clean up after the conversion function * 2. after the callback * 3. after the callback again if there was truncated input */ for(;;) { /* update offsets if we write any */ if(offsets!=NULL) { int32_t length=(int32_t)(pArgs->target-t); if(length>0) { _updateOffsets(offsets, length, sourceIndex, errorInputLength); /* * if a converter handles offsets and updates the offsets * pointer at the end, then pArgs->offset should not change * here; * however, some converters do not handle offsets at all * (sourceIndex<0) or may not update the offsets pointer */ pArgs->offsets=offsets+=length; } if(sourceIndex>=0) { sourceIndex+=(int32_t)(pArgs->source-s); } } if(cnv->preToULength<0) { /* * switch the source to new replay units (cannot occur while replaying) * after offset handling and before end-of-input and callback handling */ if(realSource==NULL) { realSource=pArgs->source; realSourceLimit=pArgs->sourceLimit; realFlush=pArgs->flush; realSourceIndex=sourceIndex; uprv_memcpy(replay, cnv->preToU, -cnv->preToULength); pArgs->source=replay; pArgs->sourceLimit=replay-cnv->preToULength; pArgs->flush=FALSE; if((sourceIndex+=cnv->preToULength)<0) { sourceIndex=-1; } cnv->preToULength=0; } else { /* see implementation note before _fromUnicodeWithCallback() */ U_ASSERT(realSource==NULL); *err=U_INTERNAL_PROGRAM_ERROR; } } /* update pointers */ s=pArgs->source; t=pArgs->target; if(U_SUCCESS(*err)) { if(ssourceLimit) { /* * continue with the conversion loop while there is still input left * (continue converting by breaking out of only the inner loop) */ break; } else if(realSource!=NULL) { /* switch back from replaying to the real source and continue */ pArgs->source=realSource; pArgs->sourceLimit=realSourceLimit; pArgs->flush=realFlush; sourceIndex=realSourceIndex; realSource=NULL; break; } else if(pArgs->flush && cnv->toULength>0) { /* * the entire input stream is consumed * and there is a partial, truncated input sequence left */ /* inject an error and continue with callback handling */ *err=U_TRUNCATED_CHAR_FOUND; calledCallback=FALSE; /* new error condition */ } else { /* input consumed */ if(pArgs->flush) { /* * return to the conversion loop once more if the flush * flag is set and the conversion function has not * successfully processed the end of the input yet * * (continue converting by breaking out of only the inner loop) */ if(!converterSawEndOfInput) { break; } /* reset the converter without calling the callback function */ _reset(cnv, UCNV_RESET_TO_UNICODE, FALSE); } /* done successfully */ return; } } /* U_FAILURE(*err) */ { UErrorCode e; if( calledCallback || (e=*err)==U_BUFFER_OVERFLOW_ERROR || (e!=U_INVALID_CHAR_FOUND && e!=U_ILLEGAL_CHAR_FOUND && e!=U_TRUNCATED_CHAR_FOUND && e!=U_ILLEGAL_ESCAPE_SEQUENCE && e!=U_UNSUPPORTED_ESCAPE_SEQUENCE) ) { /* * the callback did not or cannot resolve the error: * set output pointers and return * * the check for buffer overflow is redundant but it is * a high-runner case and hopefully documents the intent * well * * if we were replaying, then the replay buffer must be * copied back into the UConverter * and the real arguments must be restored */ if(realSource!=NULL) { int32_t length; U_ASSERT(cnv->preToULength==0); length=(int32_t)(pArgs->sourceLimit-pArgs->source); if(length>0) { uprv_memcpy(cnv->preToU, pArgs->source, length); cnv->preToULength=(int8_t)-length; } pArgs->source=realSource; pArgs->sourceLimit=realSourceLimit; pArgs->flush=realFlush; } return; } } /* copy toUBytes[] to invalidCharBuffer[] */ errorInputLength=cnv->invalidCharLength=cnv->toULength; if(errorInputLength>0) { uprv_memcpy(cnv->invalidCharBuffer, cnv->toUBytes, errorInputLength); } /* set the converter state to deal with the next character */ cnv->toULength=0; /* call the callback function */ if(cnv->toUCallbackReason==UCNV_ILLEGAL && *err==U_INVALID_CHAR_FOUND) { cnv->toUCallbackReason = UCNV_UNASSIGNED; } cnv->fromCharErrorBehaviour(cnv->toUContext, pArgs, cnv->invalidCharBuffer, errorInputLength, cnv->toUCallbackReason, err); cnv->toUCallbackReason = UCNV_ILLEGAL; /* reset to default value */ /* * loop back to the offset handling * * this flag will indicate after offset handling * that a callback was called; * if the callback did not resolve the error, then we return */ calledCallback=TRUE; } } } /* * Output the toUnicode overflow buffer. * Call this function if(cnv->UCharErrorBufferLength>0). * @return TRUE if overflow */ static UBool ucnv_outputOverflowToUnicode(UConverter *cnv, UChar **target, const UChar *targetLimit, int32_t **pOffsets, UErrorCode *err) { int32_t *offsets; UChar *overflow, *t; int32_t i, length; t=*target; if(pOffsets!=NULL) { offsets=*pOffsets; } else { offsets=NULL; } overflow=cnv->UCharErrorBuffer; length=cnv->UCharErrorBufferLength; i=0; while(iUCharErrorBufferLength=(int8_t)j; *target=t; if(offsets!=NULL) { *pOffsets=offsets; } *err=U_BUFFER_OVERFLOW_ERROR; return TRUE; } /* copy the overflow contents to the target */ *t++=overflow[i++]; if(offsets!=NULL) { *offsets++=-1; /* no source index available for old output */ } } /* the overflow buffer is completely copied to the target */ cnv->UCharErrorBufferLength=0; *target=t; if(offsets!=NULL) { *pOffsets=offsets; } return FALSE; } U_CAPI void U_EXPORT2 ucnv_toUnicode(UConverter *cnv, UChar **target, const UChar *targetLimit, const char **source, const char *sourceLimit, int32_t *offsets, UBool flush, UErrorCode *err) { UConverterToUnicodeArgs args; const char *s; UChar *t; /* check parameters */ if(err==NULL || U_FAILURE(*err)) { return; } if(cnv==NULL || target==NULL || source==NULL) { *err=U_ILLEGAL_ARGUMENT_ERROR; return; } s=*source; t=*target; if ((const void *)U_MAX_PTR(targetLimit) == (const void *)targetLimit) { /* Prevent code from going into an infinite loop in case we do hit this limit. The limit pointer is expected to be on a UChar * boundary. This also prevents the next argument check from failing. */ targetLimit = (const UChar *)(((const char *)targetLimit) - 1); } /* * All these conditions should never happen. * * 1) Make sure that the limits are >= to the address source or target * * 2) Make sure that the buffer sizes do not exceed the number range for * int32_t because some functions use the size (in units or bytes) * rather than comparing pointers, and because offsets are int32_t values. * * size_t is guaranteed to be unsigned and large enough for the job. * * Return with an error instead of adjusting the limits because we would * not be able to maintain the semantics that either the source must be * consumed or the target filled (unless an error occurs). * An adjustment would be sourceLimit=t+0x7fffffff; for example. * * 3) Make sure that the user didn't incorrectly cast a UChar * pointer * to a char * pointer and provide an incomplete UChar code unit. */ if (sourceLimit(size_t)0x7fffffff && sourceLimit>s) || ((size_t)(targetLimit-t)>(size_t)0x3fffffff && targetLimit>t) || (((const char *)targetLimit-(const char *)t) & 1) != 0 ) { *err=U_ILLEGAL_ARGUMENT_ERROR; return; } /* output the target overflow buffer */ if( cnv->UCharErrorBufferLength>0 && ucnv_outputOverflowToUnicode(cnv, target, targetLimit, &offsets, err) ) { /* U_BUFFER_OVERFLOW_ERROR */ return; } /* *target may have moved, therefore stop using t */ if(!flush && s==sourceLimit && cnv->preToULength>=0) { /* the overflow buffer is emptied and there is no new input: we are done */ return; } /* * Do not simply return with a buffer overflow error if * !flush && t==targetLimit * because it is possible that the source will not generate any output. * For example, the skip callback may be called; * it does not output anything. */ /* prepare the converter arguments */ args.converter=cnv; args.flush=flush; args.offsets=offsets; args.source=s; args.sourceLimit=sourceLimit; args.target=*target; args.targetLimit=targetLimit; args.size=sizeof(args); _toUnicodeWithCallback(&args, err); *source=args.source; *target=args.target; } /* ucnv_to/fromUChars() ----------------------------------------------------- */ U_CAPI int32_t U_EXPORT2 ucnv_fromUChars(UConverter *cnv, char *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, UErrorCode *pErrorCode) { const UChar *srcLimit; char *originalDest, *destLimit; int32_t destLength; /* check arguments */ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return 0; } if( cnv==NULL || destCapacity<0 || (destCapacity>0 && dest==NULL) || srcLength<-1 || (srcLength!=0 && src==NULL) ) { *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; return 0; } /* initialize */ ucnv_resetFromUnicode(cnv); originalDest=dest; if(srcLength==-1) { srcLength=u_strlen(src); } if(srcLength>0) { srcLimit=src+srcLength; destLimit=dest+destCapacity; /* pin the destination limit to U_MAX_PTR; NULL check is for OS/400 */ if(destLimit0 && dest==NULL) || srcLength<-1 || (srcLength!=0 && src==NULL)) { *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; return 0; } /* initialize */ ucnv_resetToUnicode(cnv); originalDest=dest; if(srcLength==-1) { srcLength=(int32_t)uprv_strlen(src); } if(srcLength>0) { srcLimit=src+srcLength; destLimit=dest+destCapacity; /* pin the destination limit to U_MAX_PTR; NULL check is for OS/400 */ if(destLimit(size_t)0x7fffffff && sourceLimit>s)) { *err=U_ILLEGAL_ARGUMENT_ERROR; return 0xffff; } c=U_SENTINEL; /* flush the target overflow buffer */ if(cnv->UCharErrorBufferLength>0) { UChar *overflow; overflow=cnv->UCharErrorBuffer; i=0; length=cnv->UCharErrorBufferLength; U16_NEXT(overflow, i, length, c); /* move the remaining overflow contents up to the beginning */ if((cnv->UCharErrorBufferLength=(int8_t)(length-i))>0) { uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+i, cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR); } if(!U16_IS_LEAD(c) || itoULength==0 && cnv->sharedData->impl->getNextUChar!=NULL) { c=cnv->sharedData->impl->getNextUChar(&args, err); *source=s=args.source; if(*err==U_INDEX_OUTOFBOUNDS_ERROR) { /* reset the converter without calling the callback function */ _reset(cnv, UCNV_RESET_TO_UNICODE, FALSE); return 0xffff; /* no output */ } else if(U_SUCCESS(*err) && c>=0) { return c; /* * else fall through to use _toUnicode() because * UCNV_GET_NEXT_UCHAR_USE_TO_U: the native function did not want to handle it after all * U_FAILURE: call _toUnicode() for callback handling (do not output c) */ } } /* convert to one UChar in buffer[0], or handle getNextUChar() errors */ _toUnicodeWithCallback(&args, err); if(*err==U_BUFFER_OVERFLOW_ERROR) { *err=U_ZERO_ERROR; } i=0; length=(int32_t)(args.target-buffer); } else { /* write the lead surrogate from the overflow buffer */ buffer[0]=(UChar)c; args.target=buffer+1; i=0; length=1; } /* buffer contents starts at i and ends before length */ if(U_FAILURE(*err)) { c=0xffff; /* no output */ } else if(length==0) { /* no input or only state changes */ *err=U_INDEX_OUTOFBOUNDS_ERROR; /* no need to reset explicitly because _toUnicodeWithCallback() did it */ c=0xffff; /* no output */ } else { c=buffer[0]; i=1; if(!U16_IS_LEAD(c)) { /* consume c=buffer[0], done */ } else { /* got a lead surrogate, see if a trail surrogate follows */ UChar c2; if(cnv->UCharErrorBufferLength>0) { /* got overflow output from the conversion */ if(U16_IS_TRAIL(c2=cnv->UCharErrorBuffer[0])) { /* got a trail surrogate, too */ c=U16_GET_SUPPLEMENTARY(c, c2); /* move the remaining overflow contents up to the beginning */ if((--cnv->UCharErrorBufferLength)>0) { uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+1, cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR); } } else { /* c is an unpaired lead surrogate, just return it */ } } else if(args.sourceUCharErrorBufferLength)>0) { uprv_memmove(cnv->UCharErrorBuffer+delta, cnv->UCharErrorBuffer, length*U_SIZEOF_UCHAR); } cnv->UCharErrorBufferLength=(int8_t)(length+delta); cnv->UCharErrorBuffer[0]=buffer[i++]; if(delta>1) { cnv->UCharErrorBuffer[1]=buffer[i]; } } *source=args.source; return c; } /* ucnv_convert() and siblings ---------------------------------------------- */ U_CAPI void U_EXPORT2 ucnv_convertEx(UConverter *targetCnv, UConverter *sourceCnv, char **target, const char *targetLimit, const char **source, const char *sourceLimit, UChar *pivotStart, UChar **pivotSource, UChar **pivotTarget, const UChar *pivotLimit, UBool reset, UBool flush, UErrorCode *pErrorCode) { UChar pivotBuffer[CHUNK_SIZE]; const UChar *myPivotSource; UChar *myPivotTarget; const char *s; char *t; UConverterToUnicodeArgs toUArgs; UConverterFromUnicodeArgs fromUArgs; UConverterConvert convert; /* error checking */ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return; } if( targetCnv==NULL || sourceCnv==NULL || source==NULL || *source==NULL || target==NULL || *target==NULL || targetLimit==NULL ) { *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; return; } s=*source; t=*target; if((sourceLimit!=NULL && sourceLimit(size_t)0x7fffffff && sourceLimit>s)) || ((size_t)(targetLimit-t)>(size_t)0x7fffffff && targetLimit>t) ) { *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; return; } if(pivotStart==NULL) { if(!flush) { /* streaming conversion requires an explicit pivot buffer */ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; return; } /* use the stack pivot buffer */ myPivotSource=myPivotTarget=pivotStart=pivotBuffer; pivotSource=(UChar **)&myPivotSource; pivotTarget=&myPivotTarget; pivotLimit=pivotBuffer+CHUNK_SIZE; } else if( pivotStart>=pivotLimit || pivotSource==NULL || *pivotSource==NULL || pivotTarget==NULL || *pivotTarget==NULL || pivotLimit==NULL ) { *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; return; } if(sourceLimit==NULL) { /* get limit of single-byte-NUL-terminated source string */ sourceLimit=uprv_strchr(*source, 0); } if(reset) { ucnv_resetToUnicode(sourceCnv); ucnv_resetFromUnicode(targetCnv); *pivotSource=*pivotTarget=pivotStart; } else if(targetCnv->charErrorBufferLength>0) { /* output the targetCnv overflow buffer */ if(ucnv_outputOverflowFromUnicode(targetCnv, target, targetLimit, NULL, pErrorCode)) { /* U_BUFFER_OVERFLOW_ERROR */ return; } /* *target has moved, therefore stop using t */ if( !flush && targetCnv->preFromULength>=0 && *pivotSource==*pivotTarget && sourceCnv->UCharErrorBufferLength==0 && sourceCnv->preToULength>=0 && s==sourceLimit ) { /* the fromUnicode overflow buffer is emptied and there is no new input: we are done */ return; } } /* Is direct-UTF-8 conversion available? */ if( sourceCnv->sharedData->staticData->conversionType==UCNV_UTF8 && targetCnv->sharedData->impl->fromUTF8!=NULL ) { convert=targetCnv->sharedData->impl->fromUTF8; } else if( targetCnv->sharedData->staticData->conversionType==UCNV_UTF8 && sourceCnv->sharedData->impl->toUTF8!=NULL ) { convert=sourceCnv->sharedData->impl->toUTF8; } else { convert=NULL; } /* * If direct-UTF-8 conversion is available, then we use a smaller * pivot buffer for error handling and partial matches * so that we quickly return to direct conversion. * * 32 is large enough for UCNV_EXT_MAX_UCHARS and UCNV_ERROR_BUFFER_LENGTH. * * We could reduce the pivot buffer size further, at the cost of * buffer overflows from callbacks. * The pivot buffer should not be smaller than the maximum number of * fromUnicode extension table input UChars * (for m:n conversion, see * targetCnv->sharedData->mbcs.extIndexes[UCNV_EXT_COUNT_UCHARS]) * or 2 for surrogate pairs. * * Too small a buffer can cause thrashing between pivoting and direct * conversion, with function call overhead outweighing the benefits * of direct conversion. */ if(convert!=NULL && (pivotLimit-pivotStart)>32) { pivotLimit=pivotStart+32; } /* prepare the converter arguments */ fromUArgs.converter=targetCnv; fromUArgs.flush=FALSE; fromUArgs.offsets=NULL; fromUArgs.target=*target; fromUArgs.targetLimit=targetLimit; fromUArgs.size=sizeof(fromUArgs); toUArgs.converter=sourceCnv; toUArgs.flush=flush; toUArgs.offsets=NULL; toUArgs.source=s; toUArgs.sourceLimit=sourceLimit; toUArgs.targetLimit=pivotLimit; toUArgs.size=sizeof(toUArgs); /* * TODO: Consider separating this function into two functions, * extracting exactly the conversion loop, * for readability and to reduce the set of visible variables. * * Otherwise stop using s and t from here on. */ s=t=NULL; /* * conversion loop * * The sequence of steps in the loop may appear backward, * but the principle is simple: * In the chain of * source - sourceCnv overflow - pivot - targetCnv overflow - target * empty out later buffers before refilling them from earlier ones. * * The targetCnv overflow buffer is flushed out only once before the loop. */ for(;;) { /* * if(pivot not empty or error or replay or flush fromUnicode) { * fromUnicode(pivot -> target); * } * * For pivoting conversion; and for direct conversion for * error callback handling and flushing the replay buffer. */ if( *pivotSource<*pivotTarget || U_FAILURE(*pErrorCode) || targetCnv->preFromULength<0 || fromUArgs.flush ) { fromUArgs.source=*pivotSource; fromUArgs.sourceLimit=*pivotTarget; _fromUnicodeWithCallback(&fromUArgs, pErrorCode); if(U_FAILURE(*pErrorCode)) { /* target overflow, or conversion error */ *pivotSource=(UChar *)fromUArgs.source; break; } /* * _fromUnicodeWithCallback() must have consumed the pivot contents * (*pivotSource==*pivotTarget) since it returned with U_SUCCESS() */ } /* The pivot buffer is empty; reset it so we start at pivotStart. */ *pivotSource=*pivotTarget=pivotStart; /* * if(sourceCnv overflow buffer not empty) { * move(sourceCnv overflow buffer -> pivot); * continue; * } */ /* output the sourceCnv overflow buffer */ if(sourceCnv->UCharErrorBufferLength>0) { if(ucnv_outputOverflowToUnicode(sourceCnv, pivotTarget, pivotLimit, NULL, pErrorCode)) { /* U_BUFFER_OVERFLOW_ERROR */ *pErrorCode=U_ZERO_ERROR; } continue; } /* * check for end of input and break if done * * Checking both flush and fromUArgs.flush ensures that the converters * have been called with the flush flag set if the ucnv_convertEx() * caller set it. */ if( toUArgs.source==sourceLimit && sourceCnv->preToULength>=0 && sourceCnv->toULength==0 && (!flush || fromUArgs.flush) ) { /* done successfully */ break; } /* * use direct conversion if available * but not if continuing a partial match * or flushing the toUnicode replay buffer */ if(convert!=NULL && targetCnv->preFromUFirstCP<0 && sourceCnv->preToULength==0) { if(*pErrorCode==U_USING_DEFAULT_WARNING) { /* remove a warning that may be set by this function */ *pErrorCode=U_ZERO_ERROR; } convert(&fromUArgs, &toUArgs, pErrorCode); if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { break; } else if(U_FAILURE(*pErrorCode)) { if(sourceCnv->toULength>0) { /* * Fall through to calling _toUnicodeWithCallback() * for callback handling. * * The pivot buffer will be reset with * *pivotSource=*pivotTarget=pivotStart; * which indicates a toUnicode error to the caller * (*pivotSource==pivotStart shows no pivot UChars consumed). */ } else { /* * Indicate a fromUnicode error to the caller * (*pivotSource>pivotStart shows some pivot UChars consumed). */ *pivotSource=*pivotTarget=pivotStart+1; /* * Loop around to calling _fromUnicodeWithCallbacks() * for callback handling. */ continue; } } else if(*pErrorCode==U_USING_DEFAULT_WARNING) { /* * No error, but the implementation requested to temporarily * fall back to pivoting. */ *pErrorCode=U_ZERO_ERROR; /* * The following else branches are almost identical to the end-of-input * handling in _toUnicodeWithCallback(). * Avoid calling it just for the end of input. */ } else if(flush && sourceCnv->toULength>0) { /* flush==toUArgs.flush */ /* * the entire input stream is consumed * and there is a partial, truncated input sequence left */ /* inject an error and continue with callback handling */ *pErrorCode=U_TRUNCATED_CHAR_FOUND; } else { /* input consumed */ if(flush) { /* reset the converters without calling the callback functions */ _reset(sourceCnv, UCNV_RESET_TO_UNICODE, FALSE); _reset(targetCnv, UCNV_RESET_FROM_UNICODE, FALSE); } /* done successfully */ break; } } /* * toUnicode(source -> pivot); * * For pivoting conversion; and for direct conversion for * error callback handling, continuing partial matches * and flushing the replay buffer. * * The pivot buffer is empty and reset. */ toUArgs.target=pivotStart; /* ==*pivotTarget */ /* toUArgs.targetLimit=pivotLimit; already set before the loop */ _toUnicodeWithCallback(&toUArgs, pErrorCode); *pivotTarget=toUArgs.target; if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { /* pivot overflow: continue with the conversion loop */ *pErrorCode=U_ZERO_ERROR; } else if(U_FAILURE(*pErrorCode) || (!flush && *pivotTarget==pivotStart)) { /* conversion error, or there was nothing left to convert */ break; } /* * else: * _toUnicodeWithCallback() wrote into the pivot buffer, * continue with fromUnicode conversion. * * Set the fromUnicode flush flag if we flush and if toUnicode has * processed the end of the input. */ if( flush && toUArgs.source==sourceLimit && sourceCnv->preToULength>=0 && sourceCnv->UCharErrorBufferLength==0 ) { fromUArgs.flush=TRUE; } } /* * The conversion loop is exited when one of the following is true: * - the entire source text has been converted successfully to the target buffer * - a target buffer overflow occurred * - a conversion error occurred */ *source=toUArgs.source; *target=fromUArgs.target; /* terminate the target buffer if possible */ if(flush && U_SUCCESS(*pErrorCode)) { if(*target!=targetLimit) { **target=0; if(*pErrorCode==U_STRING_NOT_TERMINATED_WARNING) { *pErrorCode=U_ZERO_ERROR; } } else { *pErrorCode=U_STRING_NOT_TERMINATED_WARNING; } } } /* internal implementation of ucnv_convert() etc. with preflighting */ static int32_t ucnv_internalConvert(UConverter *outConverter, UConverter *inConverter, char *target, int32_t targetCapacity, const char *source, int32_t sourceLength, UErrorCode *pErrorCode) { UChar pivotBuffer[CHUNK_SIZE]; UChar *pivot, *pivot2; char *myTarget; const char *sourceLimit; const char *targetLimit; int32_t targetLength=0; /* set up */ if(sourceLength<0) { sourceLimit=uprv_strchr(source, 0); } else { sourceLimit=source+sourceLength; } /* if there is no input data, we're done */ if(source==sourceLimit) { return u_terminateChars(target, targetCapacity, 0, pErrorCode); } pivot=pivot2=pivotBuffer; myTarget=target; targetLength=0; if(targetCapacity>0) { /* perform real conversion */ targetLimit=target+targetCapacity; ucnv_convertEx(outConverter, inConverter, &myTarget, targetLimit, &source, sourceLimit, pivotBuffer, &pivot, &pivot2, pivotBuffer+CHUNK_SIZE, FALSE, TRUE, pErrorCode); targetLength=(int32_t)(myTarget-target); } /* * If the output buffer is exhausted (or we are only "preflighting"), we need to stop writing * to it but continue the conversion in order to store in targetCapacity * the number of bytes that was required. */ if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR || targetCapacity==0) { char targetBuffer[CHUNK_SIZE]; targetLimit=targetBuffer+CHUNK_SIZE; do { *pErrorCode=U_ZERO_ERROR; myTarget=targetBuffer; ucnv_convertEx(outConverter, inConverter, &myTarget, targetLimit, &source, sourceLimit, pivotBuffer, &pivot, &pivot2, pivotBuffer+CHUNK_SIZE, FALSE, TRUE, pErrorCode); targetLength+=(int32_t)(myTarget-targetBuffer); } while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR); /* done with preflighting, set warnings and errors as appropriate */ return u_terminateChars(target, targetCapacity, targetLength, pErrorCode); } /* no need to call u_terminateChars() because ucnv_convertEx() took care of that */ return targetLength; } U_CAPI int32_t U_EXPORT2 ucnv_convert(const char *toConverterName, const char *fromConverterName, char *target, int32_t targetCapacity, const char *source, int32_t sourceLength, UErrorCode *pErrorCode) { UConverter in, out; /* stack-allocated */ UConverter *inConverter, *outConverter; int32_t targetLength; if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return 0; } if( source==NULL || sourceLength<-1 || targetCapacity<0 || (targetCapacity>0 && target==NULL) ) { *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; return 0; } /* if there is no input data, we're done */ if(sourceLength==0 || (sourceLength<0 && *source==0)) { return u_terminateChars(target, targetCapacity, 0, pErrorCode); } /* create the converters */ inConverter=ucnv_createConverter(&in, fromConverterName, pErrorCode); if(U_FAILURE(*pErrorCode)) { return 0; } outConverter=ucnv_createConverter(&out, toConverterName, pErrorCode); if(U_FAILURE(*pErrorCode)) { ucnv_close(inConverter); return 0; } targetLength=ucnv_internalConvert(outConverter, inConverter, target, targetCapacity, source, sourceLength, pErrorCode); ucnv_close(inConverter); ucnv_close(outConverter); return targetLength; } /* @internal */ static int32_t ucnv_convertAlgorithmic(UBool convertToAlgorithmic, UConverterType algorithmicType, UConverter *cnv, char *target, int32_t targetCapacity, const char *source, int32_t sourceLength, UErrorCode *pErrorCode) { UConverter algoConverterStatic; /* stack-allocated */ UConverter *algoConverter, *to, *from; int32_t targetLength; if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return 0; } if( cnv==NULL || source==NULL || sourceLength<-1 || targetCapacity<0 || (targetCapacity>0 && target==NULL) ) { *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; return 0; } /* if there is no input data, we're done */ if(sourceLength==0 || (sourceLength<0 && *source==0)) { return u_terminateChars(target, targetCapacity, 0, pErrorCode); } /* create the algorithmic converter */ algoConverter=ucnv_createAlgorithmicConverter(&algoConverterStatic, algorithmicType, "", 0, pErrorCode); if(U_FAILURE(*pErrorCode)) { return 0; } /* reset the other converter */ if(convertToAlgorithmic) { /* cnv->Unicode->algo */ ucnv_resetToUnicode(cnv); to=algoConverter; from=cnv; } else { /* algo->Unicode->cnv */ ucnv_resetFromUnicode(cnv); from=algoConverter; to=cnv; } targetLength=ucnv_internalConvert(to, from, target, targetCapacity, source, sourceLength, pErrorCode); ucnv_close(algoConverter); return targetLength; } U_CAPI int32_t U_EXPORT2 ucnv_toAlgorithmic(UConverterType algorithmicType, UConverter *cnv, char *target, int32_t targetCapacity, const char *source, int32_t sourceLength, UErrorCode *pErrorCode) { return ucnv_convertAlgorithmic(TRUE, algorithmicType, cnv, target, targetCapacity, source, sourceLength, pErrorCode); } U_CAPI int32_t U_EXPORT2 ucnv_fromAlgorithmic(UConverter *cnv, UConverterType algorithmicType, char *target, int32_t targetCapacity, const char *source, int32_t sourceLength, UErrorCode *pErrorCode) { return ucnv_convertAlgorithmic(FALSE, algorithmicType, cnv, target, targetCapacity, source, sourceLength, pErrorCode); } U_CAPI UConverterType U_EXPORT2 ucnv_getType(const UConverter* converter) { int8_t type = converter->sharedData->staticData->conversionType; #if !UCONFIG_NO_LEGACY_CONVERSION if(type == UCNV_MBCS) { return ucnv_MBCSGetType(converter); } #endif return (UConverterType)type; } U_CAPI void U_EXPORT2 ucnv_getStarters(const UConverter* converter, UBool starters[256], UErrorCode* err) { if (err == NULL || U_FAILURE(*err)) { return; } if(converter->sharedData->impl->getStarters != NULL) { converter->sharedData->impl->getStarters(converter, starters, err); } else { *err = U_ILLEGAL_ARGUMENT_ERROR; } } static const UAmbiguousConverter *ucnv_getAmbiguous(const UConverter *cnv) { UErrorCode errorCode; const char *name; int32_t i; if(cnv==NULL) { return NULL; } errorCode=U_ZERO_ERROR; name=ucnv_getName(cnv, &errorCode); if(U_FAILURE(errorCode)) { return NULL; } for(i=0; i<(int32_t)(sizeof(ambiguousConverters)/sizeof(UAmbiguousConverter)); ++i) { if(0==uprv_strcmp(name, ambiguousConverters[i].name)) { return ambiguousConverters+i; } } return NULL; } U_CAPI void U_EXPORT2 ucnv_fixFileSeparator(const UConverter *cnv, UChar* source, int32_t sourceLength) { const UAmbiguousConverter *a; int32_t i; UChar variant5c; if(cnv==NULL || source==NULL || sourceLength<=0 || (a=ucnv_getAmbiguous(cnv))==NULL) { return; } variant5c=a->variant5c; for(i=0; iuseFallback = usesFallback; } U_CAPI UBool U_EXPORT2 ucnv_usesFallback(const UConverter *cnv) { return cnv->useFallback; } U_CAPI void U_EXPORT2 ucnv_getInvalidChars (const UConverter * converter, char *errBytes, int8_t * len, UErrorCode * err) { if (err == NULL || U_FAILURE(*err)) { return; } if (len == NULL || errBytes == NULL || converter == NULL) { *err = U_ILLEGAL_ARGUMENT_ERROR; return; } if (*len < converter->invalidCharLength) { *err = U_INDEX_OUTOFBOUNDS_ERROR; return; } if ((*len = converter->invalidCharLength) > 0) { uprv_memcpy (errBytes, converter->invalidCharBuffer, *len); } } U_CAPI void U_EXPORT2 ucnv_getInvalidUChars (const UConverter * converter, UChar *errChars, int8_t * len, UErrorCode * err) { if (err == NULL || U_FAILURE(*err)) { return; } if (len == NULL || errChars == NULL || converter == NULL) { *err = U_ILLEGAL_ARGUMENT_ERROR; return; } if (*len < converter->invalidUCharLength) { *err = U_INDEX_OUTOFBOUNDS_ERROR; return; } if ((*len = converter->invalidUCharLength) > 0) { uprv_memcpy (errChars, converter->invalidUCharBuffer, sizeof(UChar) * (*len)); } } #define SIG_MAX_LEN 5 U_CAPI const char* U_EXPORT2 ucnv_detectUnicodeSignature( const char* source, int32_t sourceLength, int32_t* signatureLength, UErrorCode* pErrorCode) { int32_t dummy; /* initial 0xa5 bytes: make sure that if we read preFromUFirstCP >= 0){ return U16_LENGTH(cnv->preFromUFirstCP)+cnv->preFromULength ; }else if(cnv->preFromULength < 0){ return -cnv->preFromULength ; }else if(cnv->fromUChar32 > 0){ return 1; } return 0; } U_CAPI int32_t U_EXPORT2 ucnv_toUCountPending(const UConverter* cnv, UErrorCode* status){ if(status == NULL || U_FAILURE(*status)){ return -1; } if(cnv == NULL){ *status = U_ILLEGAL_ARGUMENT_ERROR; return -1; } if(cnv->preToULength > 0){ return cnv->preToULength ; }else if(cnv->preToULength < 0){ return -cnv->preToULength; }else if(cnv->toULength > 0){ return cnv->toULength; } return 0; } U_CAPI UBool U_EXPORT2 ucnv_isFixedWidth(UConverter *cnv, UErrorCode *status){ if (U_FAILURE(*status)) { return FALSE; } if (cnv == NULL) { *status = U_ILLEGAL_ARGUMENT_ERROR; return FALSE; } switch (ucnv_getType(cnv)) { case UCNV_SBCS: case UCNV_DBCS: case UCNV_UTF32_BigEndian: case UCNV_UTF32_LittleEndian: case UCNV_UTF32: case UCNV_US_ASCII: return TRUE; default: return FALSE; } } #endif /* * Hey, Emacs, please set the following: * * Local Variables: * indent-tabs-mode: nil * End: * */