69ba12f77c
X-SVN-Rev: 1410
488 lines
14 KiB
C++
488 lines
14 KiB
C++
/*
|
|
**********************************************************************
|
|
* Copyright (C) 1998-1999, International Business Machines
|
|
* Corporation and others. All Rights Reserved.
|
|
**********************************************************************
|
|
*/
|
|
/* C++ wrappers for the ICUs Codeset Conversion Routines*/
|
|
|
|
class Locale;
|
|
class UnicodeString;
|
|
class Mutex;
|
|
|
|
#include "unicode/utypes.h"
|
|
#include "unicode/resbund.h"
|
|
#include "cmemory.h"
|
|
#include "mutex.h"
|
|
extern "C" {
|
|
#include "ucnv_io.h"
|
|
#include "unicode/ucnv_bld.h"
|
|
#include "unicode/ucnv.h"
|
|
}
|
|
#include "unicode/convert.h"
|
|
|
|
/* list of converter and alias names */
|
|
const char **UnicodeConverterCPP::availableConverterNames=NULL;
|
|
int32_t UnicodeConverterCPP::availableConverterNamesCount=0;
|
|
|
|
UnicodeConverterCPP::UnicodeConverterCPP()
|
|
{
|
|
UErrorCode err = U_ZERO_ERROR;
|
|
myUnicodeConverter = ucnv_open(NULL, &err);
|
|
}
|
|
UnicodeConverterCPP::UnicodeConverterCPP(const char* name, UErrorCode& err)
|
|
{
|
|
myUnicodeConverter = ucnv_open(name, &err);
|
|
}
|
|
|
|
UnicodeConverterCPP::UnicodeConverterCPP(const UnicodeString& name, UErrorCode& err)
|
|
{
|
|
char myName[UCNV_MAX_CONVERTER_NAME_LENGTH];
|
|
int i;
|
|
name.extract(0, i = name.length(), myName);
|
|
myName[i]='\0';
|
|
myUnicodeConverter = ucnv_open(myName, &err);
|
|
}
|
|
|
|
|
|
UnicodeConverterCPP::UnicodeConverterCPP(int32_t codepageNumber,
|
|
UConverterPlatform platform,
|
|
UErrorCode& err)
|
|
{
|
|
myUnicodeConverter = ucnv_openCCSID(codepageNumber,
|
|
platform,
|
|
&err);
|
|
}
|
|
|
|
UnicodeConverterCPP& UnicodeConverterCPP::operator=(const UnicodeConverterCPP& that)
|
|
{
|
|
{
|
|
/*Decrements the overwritten converter's ref count
|
|
*Increments the assigner converter's ref count
|
|
*/
|
|
Mutex updateReferenceCounters;
|
|
if (myUnicodeConverter->sharedData->referenceCounter != 0 && myUnicodeConverter->sharedData->referenceCounter != (uint32_t) ~0) {
|
|
myUnicodeConverter->sharedData->referenceCounter--;
|
|
}
|
|
if (that.myUnicodeConverter->sharedData->referenceCounter != (uint32_t) ~0) {
|
|
that.myUnicodeConverter->sharedData->referenceCounter++;
|
|
}
|
|
}
|
|
|
|
*myUnicodeConverter = *(that.myUnicodeConverter);
|
|
return *this;
|
|
}
|
|
|
|
UBool UnicodeConverterCPP::operator==(const UnicodeConverterCPP& that) const
|
|
{
|
|
if ((myUnicodeConverter->sharedData == that.myUnicodeConverter->sharedData) &&
|
|
(myUnicodeConverter->fromCharErrorBehaviour == that.myUnicodeConverter->fromCharErrorBehaviour) &&
|
|
(myUnicodeConverter->toUnicodeStatus == that.myUnicodeConverter->toUnicodeStatus) &&
|
|
(myUnicodeConverter->subCharLen == that.myUnicodeConverter->subCharLen) &&
|
|
(uprv_memcmp(myUnicodeConverter->subChar, that.myUnicodeConverter->subChar, myUnicodeConverter->subCharLen) == 0) &&
|
|
(myUnicodeConverter->UCharErrorBufferLength == that.myUnicodeConverter->UCharErrorBufferLength) &&
|
|
(myUnicodeConverter->charErrorBufferLength == that.myUnicodeConverter->charErrorBufferLength) &&
|
|
(uprv_memcmp(myUnicodeConverter->UCharErrorBuffer, that.myUnicodeConverter->UCharErrorBuffer, myUnicodeConverter->UCharErrorBufferLength) == 0) &&
|
|
(uprv_memcmp(myUnicodeConverter->charErrorBuffer, that.myUnicodeConverter->charErrorBuffer, myUnicodeConverter->charErrorBufferLength) == 0) &&
|
|
(myUnicodeConverter->fromUCharErrorBehaviour == that.myUnicodeConverter->fromUCharErrorBehaviour))
|
|
return TRUE;
|
|
else return FALSE;
|
|
}
|
|
|
|
UBool UnicodeConverterCPP::operator!=(const UnicodeConverterCPP& that) const
|
|
{
|
|
return !(*this == that);
|
|
}
|
|
|
|
UnicodeConverterCPP::UnicodeConverterCPP(const UnicodeConverterCPP& that)
|
|
{
|
|
/*increments the referenceCounter to let the static table know
|
|
*it has one more client
|
|
*/
|
|
myUnicodeConverter = new UConverter;
|
|
{
|
|
Mutex updateReferenceCounter;
|
|
if (that.myUnicodeConverter->sharedData->referenceCounter != (uint32_t) ~0) {
|
|
that.myUnicodeConverter->sharedData->referenceCounter++;
|
|
}
|
|
}
|
|
*myUnicodeConverter = *(that.myUnicodeConverter);
|
|
}
|
|
|
|
|
|
UnicodeConverterCPP::~UnicodeConverterCPP()
|
|
{
|
|
ucnv_close(myUnicodeConverter);
|
|
}
|
|
|
|
void
|
|
UnicodeConverterCPP::fromUnicodeString(char* target,
|
|
int32_t& targetSize,
|
|
const UnicodeString& source,
|
|
UErrorCode& err) const
|
|
{
|
|
const UChar* mySource = NULL;
|
|
int32_t mySourceLength = 0;
|
|
UConverter myConverter;
|
|
char *myTarget = NULL;
|
|
|
|
if (U_FAILURE(err)) return;
|
|
|
|
if ((myUnicodeConverter == NULL) || source.isBogus() || (targetSize <= 0))
|
|
{
|
|
err = U_ILLEGAL_ARGUMENT_ERROR;
|
|
return;
|
|
}
|
|
|
|
/*makes a local copy of the UnicodeConverter*/
|
|
myConverter = *myUnicodeConverter;
|
|
|
|
/*Removes all state info on the UnicodeConverter*/
|
|
ucnv_reset(&myConverter);
|
|
|
|
|
|
mySourceLength = source.length();
|
|
mySource = source.getArrayStart();
|
|
myTarget = target;
|
|
ucnv_fromUnicode(&myConverter,
|
|
&myTarget,
|
|
target + targetSize,
|
|
&mySource,
|
|
mySource + mySourceLength,
|
|
NULL,
|
|
TRUE,
|
|
&err);
|
|
targetSize = myTarget - target;
|
|
|
|
return;
|
|
}
|
|
|
|
void
|
|
UnicodeConverterCPP::toUnicodeString(UnicodeString& target,
|
|
const char* source,
|
|
int32_t sourceSize,
|
|
UErrorCode& err) const
|
|
{
|
|
const char* mySource = source;
|
|
const char* mySourceLimit = source + sourceSize;
|
|
UChar* myTargetUChars = NULL;
|
|
UChar* myTargetUCharsAlias = NULL;
|
|
int32_t myTargetUCharsLength = 0;
|
|
UConverter myConverter;
|
|
|
|
if (U_FAILURE(err)) return;
|
|
if ((myUnicodeConverter == NULL) || target.isBogus() || (sourceSize <= 0))
|
|
{
|
|
err = U_ILLEGAL_ARGUMENT_ERROR;
|
|
return;
|
|
}
|
|
|
|
/*makes a local bitwise copy of the UnicodeConverter*/
|
|
myConverter = *myUnicodeConverter;
|
|
|
|
/*Removes all state info on the UnicodeConverter*/
|
|
ucnv_reset(&myConverter);
|
|
/*Allocates the theoritically (Not counting added bytes from the error functions) max buffer
|
|
*on a "normal" call, only one iteration will be necessary.
|
|
*/
|
|
myTargetUChars =
|
|
(UChar*)uprv_malloc(sizeof(UChar)*(myTargetUCharsLength = (sourceSize/(int32_t)getMinBytesPerChar())));
|
|
|
|
if (myTargetUChars == NULL)
|
|
{
|
|
err = U_MEMORY_ALLOCATION_ERROR;
|
|
return;
|
|
}
|
|
/*renders the target clean*/
|
|
target.remove();
|
|
|
|
/*Will loop until (re-use the same buffer) until no more memory is requested
|
|
*or an error (other than INDEX_OUTOF_BOUNDS) is encountered
|
|
*/
|
|
do
|
|
{
|
|
err = U_ZERO_ERROR;
|
|
myTargetUCharsAlias = myTargetUChars;
|
|
ucnv_toUnicode(&myConverter,
|
|
&myTargetUCharsAlias,
|
|
myTargetUChars + myTargetUCharsLength,
|
|
&mySource,
|
|
mySourceLimit,
|
|
NULL,
|
|
TRUE,
|
|
&err);
|
|
|
|
/*appends what we got thus far to the UnicodeString*/
|
|
target.replace((UTextOffset)target.length(),
|
|
myTargetUCharsAlias - myTargetUChars,
|
|
myTargetUChars,
|
|
myTargetUCharsAlias - myTargetUChars);
|
|
/*Checks for the integrity of target (UnicodeString) as it adds data to it*/
|
|
if (target.isBogus()) err = U_MEMORY_ALLOCATION_ERROR;
|
|
} while (err == U_INDEX_OUTOFBOUNDS_ERROR);
|
|
|
|
|
|
uprv_free(myTargetUChars);
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
|
|
void
|
|
UnicodeConverterCPP::fromUnicode(char*& target,
|
|
const char* targetLimit,
|
|
const UChar*& source,
|
|
const UChar* sourceLimit,
|
|
int32_t *offsets,
|
|
UBool flush,
|
|
UErrorCode& err)
|
|
{
|
|
ucnv_fromUnicode(myUnicodeConverter,
|
|
&target,
|
|
targetLimit,
|
|
&source,
|
|
sourceLimit,
|
|
offsets,
|
|
flush,
|
|
&err);
|
|
}
|
|
|
|
|
|
|
|
void
|
|
UnicodeConverterCPP::toUnicode(UChar*& target,
|
|
const UChar* targetLimit,
|
|
const char*& source,
|
|
const char* sourceLimit,
|
|
int32_t* offsets,
|
|
UBool flush,
|
|
UErrorCode& err)
|
|
{
|
|
ucnv_toUnicode(myUnicodeConverter,
|
|
&target,
|
|
targetLimit,
|
|
&source,
|
|
sourceLimit,
|
|
offsets,
|
|
flush,
|
|
&err);
|
|
}
|
|
|
|
const char*
|
|
UnicodeConverterCPP::getName(UErrorCode& err) const
|
|
{
|
|
return ucnv_getName(myUnicodeConverter, &err);
|
|
}
|
|
|
|
int8_t
|
|
UnicodeConverterCPP::getMaxBytesPerChar() const
|
|
{
|
|
return ucnv_getMaxCharSize(myUnicodeConverter);
|
|
}
|
|
|
|
int8_t
|
|
UnicodeConverterCPP::getMinBytesPerChar() const
|
|
{
|
|
return ucnv_getMinCharSize(myUnicodeConverter);
|
|
}
|
|
|
|
void
|
|
UnicodeConverterCPP::getSubstitutionChars(char* subChars,
|
|
int8_t& len,
|
|
UErrorCode& err) const
|
|
{
|
|
ucnv_getSubstChars(myUnicodeConverter,
|
|
subChars,
|
|
&len,
|
|
&err);
|
|
}
|
|
|
|
void
|
|
UnicodeConverterCPP::setSubstitutionChars(const char* subChars,
|
|
int8_t len,
|
|
UErrorCode& err)
|
|
{
|
|
ucnv_setSubstChars(myUnicodeConverter,
|
|
subChars,
|
|
len,
|
|
&err);
|
|
}
|
|
|
|
|
|
void
|
|
UnicodeConverterCPP::resetState()
|
|
{
|
|
ucnv_reset(myUnicodeConverter);
|
|
}
|
|
|
|
|
|
int32_t
|
|
UnicodeConverterCPP::getCodepage(UErrorCode& err) const
|
|
{
|
|
return ucnv_getCCSID(myUnicodeConverter, &err);
|
|
}
|
|
|
|
UConverterToUCallback
|
|
UnicodeConverterCPP::getMissingCharAction() const
|
|
{
|
|
return ucnv_getToUCallBack(myUnicodeConverter);
|
|
}
|
|
|
|
UConverterFromUCallback
|
|
UnicodeConverterCPP::getMissingUnicodeAction() const
|
|
{
|
|
return ucnv_getFromUCallBack(myUnicodeConverter);
|
|
}
|
|
|
|
|
|
void
|
|
UnicodeConverterCPP::setMissingCharAction(UConverterToUCallback action,
|
|
UErrorCode& err)
|
|
{
|
|
ucnv_setToUCallBack(myUnicodeConverter, action, &err);
|
|
}
|
|
|
|
void
|
|
UnicodeConverterCPP::setMissingUnicodeAction(UConverterFromUCallback action,
|
|
UErrorCode& err)
|
|
{
|
|
ucnv_setFromUCallBack(myUnicodeConverter, action, &err);
|
|
}
|
|
|
|
|
|
void
|
|
UnicodeConverterCPP::getDisplayName(const Locale& displayLocale,
|
|
UnicodeString& displayName) const
|
|
{
|
|
|
|
UErrorCode err = U_ZERO_ERROR;
|
|
UChar name[UCNV_MAX_CONVERTER_NAME_LENGTH];
|
|
int32_t length = ucnv_getDisplayName(myUnicodeConverter, displayLocale.getName(), name, sizeof(name), &err);
|
|
if (U_SUCCESS(err))
|
|
{
|
|
displayName.replace(0, 0x7fffffff, name, length);
|
|
}
|
|
|
|
else
|
|
{
|
|
/*Error While creating the resource bundle use the internal name instead*/
|
|
displayName.remove();
|
|
displayName = getName(err); /*Get the raw ASCII name*/
|
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
UConverterPlatform
|
|
UnicodeConverterCPP::getCodepagePlatform(UErrorCode &err) const
|
|
{
|
|
return ucnv_getPlatform(myUnicodeConverter, &err);
|
|
}
|
|
|
|
UConverterType UnicodeConverterCPP::getType() const
|
|
{
|
|
return ucnv_getType(myUnicodeConverter);
|
|
}
|
|
|
|
void UnicodeConverterCPP::getStarters(UBool starters[256],
|
|
UErrorCode& err) const
|
|
{
|
|
ucnv_getStarters(myUnicodeConverter,
|
|
starters,
|
|
&err);
|
|
return;
|
|
}
|
|
|
|
const char* const*
|
|
UnicodeConverterCPP::getAvailableNames(int32_t& num, UErrorCode& err)
|
|
{
|
|
if(U_FAILURE(err)) {
|
|
num = 0;
|
|
return NULL;
|
|
}
|
|
if (availableConverterNames==NULL) {
|
|
int32_t count = ucnv_io_countAvailableConverters(&err);
|
|
if (count > 0) {
|
|
const char **names = new const char *[count];
|
|
if (names != NULL) {
|
|
ucnv_io_fillAvailableConverters(names, &err);
|
|
|
|
/* in the mutex block, set the data for this process */
|
|
umtx_lock(0);
|
|
if (availableConverterNames == NULL) {
|
|
availableConverterNamesCount = count;
|
|
availableConverterNames = names;
|
|
names = 0;
|
|
}
|
|
umtx_unlock(0);
|
|
|
|
/* if a different thread set it first, then delete the extra data */
|
|
if (names != 0) {
|
|
delete [] names;
|
|
}
|
|
} else {
|
|
num = 0;
|
|
err = U_MEMORY_ALLOCATION_ERROR;
|
|
return NULL;
|
|
}
|
|
}
|
|
}
|
|
num = availableConverterNamesCount;
|
|
return availableConverterNames;
|
|
}
|
|
|
|
int32_t UnicodeConverterCPP::flushCache()
|
|
{
|
|
return ucnv_flushCache();
|
|
}
|
|
|
|
/* HSYS: To be cleaned up. The usage of UChar* and UnicodeString in
|
|
the C++ APIs need to be revisited. */
|
|
void UnicodeConverterCPP::fixFileSeparator(UnicodeString& source) const
|
|
{
|
|
int32_t i = 0;
|
|
int32_t index = 0;
|
|
int32_t ccsid = 0;
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
if (source.length() == 0)
|
|
{
|
|
return;
|
|
}
|
|
ccsid = getCodepage(status);
|
|
if (U_FAILURE(status))
|
|
{
|
|
return;
|
|
}
|
|
for (i = 0; i < UCNV_MAX_AMBIGUOUSCCSIDS; i++) {
|
|
if (ccsid == UCNV_AMBIGUOUSCONVERTERS[i].ccsid)
|
|
{
|
|
index = i;
|
|
break;
|
|
}
|
|
}
|
|
if (index != -1)
|
|
{
|
|
for (i = 0; i < source.length(); i++)
|
|
{
|
|
if (source[i] == UCNV_AMBIGUOUSCONVERTERS[index].mismapped)
|
|
{
|
|
source[i] = UCNV_AMBIGUOUSCONVERTERS[index].replacement;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
UBool UnicodeConverterCPP::isAmbiguous(void) const
|
|
{
|
|
return ucnv_isAmbiguous(myUnicodeConverter);
|
|
}
|
|
|
|
|
|
|
|
|
|
|