1999-08-16 21:50:52 +00:00
|
|
|
/*
|
2000-01-13 23:54:23 +00:00
|
|
|
*******************************************************************************
|
|
|
|
*
|
2000-02-05 00:19:15 +00:00
|
|
|
* Copyright (C) 2000, International Business Machines
|
2000-01-13 23:54:23 +00:00
|
|
|
* Corporation and others. All Rights Reserved.
|
|
|
|
*
|
|
|
|
*******************************************************************************
|
|
|
|
*
|
|
|
|
* uconv_cnv.c:
|
|
|
|
* Implements all the low level conversion functions
|
|
|
|
* T_UnicodeConverter_{to,from}Unicode_$ConversionType
|
|
|
|
*
|
2000-06-27 20:47:56 +00:00
|
|
|
* Change history:
|
|
|
|
*
|
|
|
|
* 06/29/2000 helena Major rewrite of the callback APIs.
|
2000-01-13 23:54:23 +00:00
|
|
|
*/
|
1999-08-16 21:50:52 +00:00
|
|
|
|
1999-12-28 23:39:02 +00:00
|
|
|
#include "unicode/utypes.h"
|
|
|
|
#include "unicode/ucnv_err.h"
|
1999-08-16 21:50:52 +00:00
|
|
|
#include "ucnv_cnv.h"
|
1999-12-28 23:39:02 +00:00
|
|
|
#include "unicode/ucnv.h"
|
1999-08-16 21:50:52 +00:00
|
|
|
#include "cmemory.h"
|
|
|
|
|
|
|
|
/*Empties the internal unicode output buffer */
|
|
|
|
void flushInternalUnicodeBuffer (UConverter * _this,
|
2000-01-19 01:27:30 +00:00
|
|
|
UChar * myTarget,
|
|
|
|
int32_t * myTargetIndex,
|
|
|
|
int32_t targetLength,
|
|
|
|
int32_t** offsets,
|
|
|
|
UErrorCode * err)
|
1999-08-16 21:50:52 +00:00
|
|
|
{
|
|
|
|
int32_t myUCharErrorBufferLength = _this->UCharErrorBufferLength;
|
|
|
|
|
|
|
|
if (myUCharErrorBufferLength <= targetLength)
|
|
|
|
{
|
|
|
|
/*we have enough space
|
|
|
|
*So we just copy the whole Error Buffer in to the output stream*/
|
1999-12-28 23:39:02 +00:00
|
|
|
uprv_memcpy (myTarget,
|
2000-01-19 01:27:30 +00:00
|
|
|
_this->UCharErrorBuffer,
|
|
|
|
sizeof (UChar) * myUCharErrorBufferLength);
|
1999-08-16 21:50:52 +00:00
|
|
|
if (offsets)
|
2000-01-19 01:27:30 +00:00
|
|
|
{
|
|
|
|
int32_t i=0;
|
|
|
|
for (i=0; i<myUCharErrorBufferLength;i++) (*offsets)[i] = -1;
|
|
|
|
*offsets += myUCharErrorBufferLength;
|
|
|
|
}
|
1999-08-16 21:50:52 +00:00
|
|
|
*myTargetIndex += myUCharErrorBufferLength;
|
|
|
|
_this->UCharErrorBufferLength = 0;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* We don't have enough space so we copy as much as we can
|
|
|
|
* on the output stream and update the object
|
|
|
|
* by updating the internal buffer*/
|
1999-12-28 23:39:02 +00:00
|
|
|
uprv_memcpy (myTarget, _this->UCharErrorBuffer, sizeof (UChar) * targetLength);
|
1999-08-16 21:50:52 +00:00
|
|
|
if (offsets)
|
2000-01-19 01:27:30 +00:00
|
|
|
{
|
|
|
|
int32_t i=0;
|
|
|
|
for (i=0; i< targetLength;i++) (*offsets)[i] = -1;
|
|
|
|
*offsets += targetLength;
|
|
|
|
}
|
1999-12-28 23:39:02 +00:00
|
|
|
uprv_memmove (_this->UCharErrorBuffer,
|
2000-01-19 01:27:30 +00:00
|
|
|
_this->UCharErrorBuffer + targetLength,
|
|
|
|
sizeof (UChar) * (myUCharErrorBufferLength - targetLength));
|
1999-08-16 21:50:52 +00:00
|
|
|
_this->UCharErrorBufferLength -= (int8_t) targetLength;
|
|
|
|
*myTargetIndex = targetLength;
|
2000-08-11 19:23:50 +00:00
|
|
|
*err = U_BUFFER_OVERFLOW_ERROR;
|
1999-08-16 21:50:52 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*Empties the internal codepage output buffer */
|
|
|
|
void flushInternalCharBuffer (UConverter * _this,
|
2000-01-19 01:27:30 +00:00
|
|
|
char *myTarget,
|
|
|
|
int32_t * myTargetIndex,
|
|
|
|
int32_t targetLength,
|
|
|
|
int32_t** offsets,
|
|
|
|
UErrorCode * err)
|
1999-08-16 21:50:52 +00:00
|
|
|
{
|
|
|
|
int32_t myCharErrorBufferLength = _this->charErrorBufferLength;
|
|
|
|
|
|
|
|
/*we have enough space */
|
|
|
|
if (myCharErrorBufferLength <= targetLength)
|
|
|
|
{
|
1999-12-28 23:39:02 +00:00
|
|
|
uprv_memcpy (myTarget, _this->charErrorBuffer, myCharErrorBufferLength);
|
1999-08-16 21:50:52 +00:00
|
|
|
if (offsets)
|
2000-01-19 01:27:30 +00:00
|
|
|
{
|
|
|
|
int32_t i=0;
|
|
|
|
for (i=0; i<myCharErrorBufferLength;i++) (*offsets)[i] = -1;
|
|
|
|
*offsets += myCharErrorBufferLength;
|
|
|
|
}
|
1999-08-16 21:50:52 +00:00
|
|
|
|
|
|
|
*myTargetIndex += myCharErrorBufferLength;
|
|
|
|
_this->charErrorBufferLength = 0;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
/* We don't have enough space so we copy as much as we can
|
|
|
|
* on the output stream and update the object*/
|
|
|
|
{
|
1999-12-28 23:39:02 +00:00
|
|
|
uprv_memcpy (myTarget, _this->charErrorBuffer, targetLength);
|
1999-08-16 21:50:52 +00:00
|
|
|
if (offsets)
|
2000-01-19 01:27:30 +00:00
|
|
|
{
|
|
|
|
int32_t i=0;
|
|
|
|
for (i=0; i< targetLength;i++) (*offsets)[i] = -1;
|
|
|
|
*offsets += targetLength;
|
|
|
|
}
|
1999-12-28 23:39:02 +00:00
|
|
|
uprv_memmove (_this->charErrorBuffer,
|
2000-01-19 01:27:30 +00:00
|
|
|
_this->charErrorBuffer + targetLength,
|
|
|
|
(myCharErrorBufferLength - targetLength));
|
1999-08-16 21:50:52 +00:00
|
|
|
_this->charErrorBufferLength -= (int8_t) targetLength;
|
|
|
|
*myTargetIndex = targetLength;
|
2000-08-11 19:23:50 +00:00
|
|
|
*err = U_BUFFER_OVERFLOW_ERROR;
|
1999-08-16 21:50:52 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return;
|
|
|
|
}
|
2000-07-17 21:30:13 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* This function is useful for implementations of getNextUChar().
|
|
|
|
* After a call to a callback function or to toUnicode(), an output buffer
|
|
|
|
* begins with a Unicode code point that needs to be returned as UChar32,
|
|
|
|
* and all following code units must be prepended to the - potentially
|
|
|
|
* prefilled - overflow buffer in the UConverter.
|
|
|
|
* The buffer should be at least of capacity UTF_MAX_CHAR_LENGTH so that a
|
|
|
|
* complete UChar32's UChars fit into it.
|
|
|
|
*
|
|
|
|
* @param cnv The converter that will get remaining UChars copied to its overflow area.
|
|
|
|
* @param buffer An array of UChars that was passed into a callback function
|
|
|
|
* or a toUnicode() function.
|
|
|
|
* @param length The number of code units (UChars) that are actually in the buffer.
|
|
|
|
* This must be >0.
|
|
|
|
* @return The code point from the first UChars in the buffer.
|
|
|
|
*/
|
|
|
|
U_CFUNC UChar32
|
|
|
|
ucnv_getUChar32KeepOverflow(UConverter *cnv, const UChar *buffer, int32_t length) {
|
|
|
|
UChar32 c;
|
|
|
|
int32_t i;
|
|
|
|
|
|
|
|
if(length<=0) {
|
|
|
|
return 0xffff;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* get the first code point in the buffer */
|
|
|
|
i=0;
|
2000-12-12 22:11:08 +00:00
|
|
|
UTF_NEXT_CHAR_SAFE(buffer, i, length, c, FALSE);
|
2000-07-17 21:30:13 +00:00
|
|
|
if(i<length) {
|
|
|
|
/* there are UChars left in the buffer that need to go into the overflow buffer */
|
|
|
|
UChar *overflow=cnv->UCharErrorBuffer;
|
|
|
|
int32_t j=cnv->UCharErrorBufferLength;
|
|
|
|
|
|
|
|
if(j>0) {
|
|
|
|
/* move the overflow buffer contents to make room for the extra UChars */
|
|
|
|
int32_t k;
|
|
|
|
|
|
|
|
cnv->UCharErrorBufferLength=(int8_t)(k=(length-i)+j);
|
|
|
|
do {
|
|
|
|
overflow[--k]=overflow[--j];
|
|
|
|
} while(j>0);
|
|
|
|
} else {
|
|
|
|
cnv->UCharErrorBufferLength=(int8_t)(length-i);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* copy the remaining UChars to the beginning of the overflow buffer */
|
|
|
|
do {
|
|
|
|
overflow[j++]=buffer[i++];
|
|
|
|
} while(i<length);
|
|
|
|
}
|
|
|
|
return c;
|
|
|
|
}
|
2000-11-07 22:37:01 +00:00
|
|
|
|
|
|
|
/* update target offsets after a callback call */
|
|
|
|
U_CFUNC int32_t *
|
|
|
|
ucnv_updateCallbackOffsets(int32_t *offsets, int32_t length, int32_t sourceIndex) {
|
|
|
|
if(offsets!=NULL) {
|
|
|
|
if(sourceIndex>=0) {
|
|
|
|
/* add the sourceIndex to the relative offsets that the callback wrote */
|
|
|
|
while(length>0) {
|
|
|
|
*offsets+=sourceIndex;
|
|
|
|
++offsets;
|
|
|
|
--length;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
/* sourceIndex==-1, set -1 offsets */
|
|
|
|
while(length>0) {
|
|
|
|
*offsets=-1;
|
|
|
|
++offsets;
|
|
|
|
--length;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return offsets;
|
|
|
|
} else {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
}
|
2000-12-19 00:29:27 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* This is a simple implementation of ucnv_getNextUChar() that uses the
|
|
|
|
* converter's toUnicode() function. See ucnv_cnv.h for details.
|
|
|
|
*/
|
|
|
|
U_CFUNC UChar32
|
|
|
|
ucnv_getNextUCharFromToUImpl(UConverterToUnicodeArgs *pArgs,
|
2000-12-19 01:21:32 +00:00
|
|
|
T_ToUnicodeFunction toU,
|
|
|
|
UBool collectPairs,
|
|
|
|
UErrorCode *pErrorCode) {
|
2000-12-19 00:29:27 +00:00
|
|
|
UChar buffer[UTF_MAX_CHAR_LENGTH];
|
|
|
|
const char *realLimit=pArgs->sourceLimit;
|
|
|
|
|
|
|
|
pArgs->target=buffer;
|
|
|
|
pArgs->targetLimit=buffer+UTF_MAX_CHAR_LENGTH;
|
|
|
|
|
|
|
|
while(pArgs->source<realLimit) {
|
|
|
|
/* feed in one byte at a time to make sure to get only one character out */
|
|
|
|
pArgs->sourceLimit=pArgs->source+1;
|
|
|
|
pArgs->flush= (UBool)(pArgs->sourceLimit==realLimit);
|
|
|
|
|
2000-12-19 01:21:32 +00:00
|
|
|
/* convert this byte and check the result */
|
2000-12-19 00:29:27 +00:00
|
|
|
toU(pArgs, pErrorCode);
|
|
|
|
if(U_SUCCESS(*pErrorCode)) {
|
|
|
|
int32_t length=pArgs->target-buffer;
|
|
|
|
|
2000-12-19 01:21:32 +00:00
|
|
|
/* this test is UTF-16 specific */
|
2000-12-19 00:29:27 +00:00
|
|
|
if(/* some output and
|
2000-12-19 01:21:32 +00:00
|
|
|
(source consumed or don't collect surrogate pairs or not a surrogate or a surrogate pair) */
|
2000-12-19 00:29:27 +00:00
|
|
|
length>0 &&
|
|
|
|
(pArgs->flush || !collectPairs || !UTF_IS_FIRST_SURROGATE(buffer[0]) || length==2)
|
|
|
|
) {
|
|
|
|
return ucnv_getUChar32KeepOverflow(pArgs->converter, buffer, length);
|
|
|
|
}
|
2000-12-19 01:21:32 +00:00
|
|
|
/* else continue with the loop */
|
|
|
|
} else if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
|
2000-12-19 00:29:27 +00:00
|
|
|
*pErrorCode=U_ZERO_ERROR;
|
|
|
|
return ucnv_getUChar32KeepOverflow(pArgs->converter, buffer, UTF_MAX_CHAR_LENGTH);
|
|
|
|
} else {
|
2000-12-19 01:21:32 +00:00
|
|
|
/* U_FAILURE() */
|
2000-12-19 00:29:27 +00:00
|
|
|
return 0xffff;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* no output because of empty input or only state changes and skipping callbacks */
|
|
|
|
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
|
|
|
return 0xffff;
|
|
|
|
}
|