scuffed-code/icu4c/source/common/convert.cpp

482 lines
13 KiB
C++
Raw Normal View History

/*
**********************************************************************
* Copyright (C) 1998-1999, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*/
1999-08-16 21:50:52 +00:00
/* C++ wrappers for the ICUs Codeset Conversion Routines*/
class Locale;
class UnicodeString;
class Mutex;
#include "unicode/utypes.h"
#include "unicode/resbund.h"
1999-08-16 21:50:52 +00:00
#include "cmemory.h"
#include "mutex.h"
extern "C" {
#include "ucnv_io.h"
#include "unicode/ucnv_bld.h"
#include "unicode/ucnv.h"
1999-08-16 21:50:52 +00:00
}
#include "unicode/convert.h"
1999-08-16 21:50:52 +00:00
/* list of converter and alias names */
const char **UnicodeConverterCPP::availableConverterNames=NULL;
int32_t UnicodeConverterCPP::availableConverterNamesCount=0;
1999-08-16 21:50:52 +00:00
UnicodeConverterCPP::UnicodeConverterCPP()
{
UErrorCode err = U_ZERO_ERROR;
1999-08-16 21:50:52 +00:00
myUnicodeConverter = ucnv_open(NULL, &err);
}
UnicodeConverterCPP::UnicodeConverterCPP(const char* name, UErrorCode& err)
{
myUnicodeConverter = ucnv_open(name, &err);
}
UnicodeConverterCPP::UnicodeConverterCPP(const UnicodeString& name, UErrorCode& err)
{
char myName[UCNV_MAX_CONVERTER_NAME_LENGTH];
1999-08-16 21:50:52 +00:00
int i;
name.extract(0, i = name.length(), myName);
1999-08-16 21:50:52 +00:00
myName[i]='\0';
myUnicodeConverter = ucnv_open(myName, &err);
}
UnicodeConverterCPP::UnicodeConverterCPP(int32_t codepageNumber,
UConverterPlatform platform,
1999-08-16 21:50:52 +00:00
UErrorCode& err)
{
myUnicodeConverter = ucnv_openCCSID(codepageNumber,
platform,
&err);
}
UnicodeConverterCPP& UnicodeConverterCPP::operator=(const UnicodeConverterCPP& that)
{
{
/*Decrements the overwritten converter's ref count
*Increments the assigner converter's ref count
*/
Mutex updateReferenceCounters;
myUnicodeConverter->sharedData->referenceCounter--;
that.myUnicodeConverter->sharedData->referenceCounter++;
}
*myUnicodeConverter = *(that.myUnicodeConverter);
return *this;
}
bool_t UnicodeConverterCPP::operator==(const UnicodeConverterCPP& that) const
{
if ((myUnicodeConverter->sharedData == that.myUnicodeConverter->sharedData) &&
(myUnicodeConverter->fromCharErrorBehaviour == that.myUnicodeConverter->fromCharErrorBehaviour) &&
(myUnicodeConverter->toUnicodeStatus == that.myUnicodeConverter->toUnicodeStatus) &&
(myUnicodeConverter->subCharLen == that.myUnicodeConverter->subCharLen) &&
(uprv_memcmp(myUnicodeConverter->subChar, that.myUnicodeConverter->subChar, myUnicodeConverter->subCharLen) == 0) &&
1999-08-16 21:50:52 +00:00
(myUnicodeConverter->UCharErrorBufferLength == that.myUnicodeConverter->UCharErrorBufferLength) &&
(myUnicodeConverter->charErrorBufferLength == that.myUnicodeConverter->charErrorBufferLength) &&
(uprv_memcmp(myUnicodeConverter->UCharErrorBuffer, that.myUnicodeConverter->UCharErrorBuffer, myUnicodeConverter->UCharErrorBufferLength) == 0) &&
(uprv_memcmp(myUnicodeConverter->charErrorBuffer, that.myUnicodeConverter->charErrorBuffer, myUnicodeConverter->charErrorBufferLength) == 0) &&
1999-08-16 21:50:52 +00:00
(myUnicodeConverter->fromUCharErrorBehaviour == that.myUnicodeConverter->fromUCharErrorBehaviour))
return TRUE;
else return FALSE;
}
bool_t UnicodeConverterCPP::operator!=(const UnicodeConverterCPP& that) const
{
return !(*this == that);
}
UnicodeConverterCPP::UnicodeConverterCPP(const UnicodeConverterCPP& that)
{
/*increments the referenceCounter to let the static table know
*it has one more client
*/
myUnicodeConverter = new UConverter;
{
Mutex updateReferenceCounter;
that.myUnicodeConverter->sharedData->referenceCounter++;
}
*myUnicodeConverter = *(that.myUnicodeConverter);
}
UnicodeConverterCPP::~UnicodeConverterCPP()
{
ucnv_close(myUnicodeConverter);
}
void
UnicodeConverterCPP::fromUnicodeString(char* target,
int32_t& targetSize,
const UnicodeString& source,
UErrorCode& err) const
{
const UChar* mySource = NULL;
int32_t mySourceLength = 0;
UConverter myConverter;
char *myTarget = NULL;
if (U_FAILURE(err)) return;
1999-08-16 21:50:52 +00:00
if ((myUnicodeConverter == NULL) || source.isBogus() || (targetSize <= 0))
{
err = U_ILLEGAL_ARGUMENT_ERROR;
1999-08-16 21:50:52 +00:00
return;
}
/*makes a local copy of the UnicodeConverter*/
myConverter = *myUnicodeConverter;
/*Removes all state info on the UnicodeConverter*/
ucnv_reset(&myConverter);
mySourceLength = source.length();
1999-08-16 21:50:52 +00:00
mySource = source.getUChars();
myTarget = target;
ucnv_fromUnicode(&myConverter,
&myTarget,
target + targetSize,
&mySource,
mySource + mySourceLength,
NULL,
TRUE,
&err);
targetSize = myTarget - target;
return;
}
void
UnicodeConverterCPP::toUnicodeString(UnicodeString& target,
const char* source,
int32_t sourceSize,
UErrorCode& err) const
{
const char* mySource = source;
const char* mySourceLimit = source + sourceSize;
UChar* myTargetUChars = NULL;
UChar* myTargetUCharsAlias = NULL;
int32_t myTargetUCharsLength = 0;
UConverter myConverter;
if (U_FAILURE(err)) return;
1999-08-16 21:50:52 +00:00
if ((myUnicodeConverter == NULL) || target.isBogus() || (sourceSize <= 0))
{
err = U_ILLEGAL_ARGUMENT_ERROR;
1999-08-16 21:50:52 +00:00
return;
}
/*makes a local bitwise copy of the UnicodeConverter*/
myConverter = *myUnicodeConverter;
/*Removes all state info on the UnicodeConverter*/
ucnv_reset(&myConverter);
/*Allocates the theoritically (Not counting added bytes from the error functions) max buffer
*on a "normal" call, only one iteration will be necessary.
*/
myTargetUChars =
(UChar*)uprv_malloc(sizeof(UChar)*(myTargetUCharsLength = (sourceSize/(int32_t)getMinBytesPerChar())));
1999-08-16 21:50:52 +00:00
if (myTargetUChars == NULL)
{
err = U_MEMORY_ALLOCATION_ERROR;
1999-08-16 21:50:52 +00:00
return;
}
/*renders the target clean*/
target.remove();
/*Will loop until (re-use the same buffer) until no more memory is requested
*or an error (other than INDEX_OUTOF_BOUNDS) is encountered
*/
do
{
err = U_ZERO_ERROR;
1999-08-16 21:50:52 +00:00
myTargetUCharsAlias = myTargetUChars;
ucnv_toUnicode(&myConverter,
&myTargetUCharsAlias,
myTargetUChars + myTargetUCharsLength,
&mySource,
mySourceLimit,
NULL,
TRUE,
&err);
/*appends what we got thus far to the UnicodeString*/
target.replace((UTextOffset)target.length(),
1999-08-16 21:50:52 +00:00
myTargetUCharsAlias - myTargetUChars,
myTargetUChars,
myTargetUCharsAlias - myTargetUChars);
/*Checks for the integrity of target (UnicodeString) as it adds data to it*/
if (target.isBogus()) err = U_MEMORY_ALLOCATION_ERROR;
} while (err == U_INDEX_OUTOFBOUNDS_ERROR);
1999-08-16 21:50:52 +00:00
uprv_free(myTargetUChars);
1999-08-16 21:50:52 +00:00
return;
}
void
UnicodeConverterCPP::fromUnicode(char*& target,
const char* targetLimit,
const UChar*& source,
const UChar* sourceLimit,
int32_t *offsets,
bool_t flush,
UErrorCode& err)
{
ucnv_fromUnicode(myUnicodeConverter,
&target,
targetLimit,
&source,
sourceLimit,
offsets,
flush,
&err);
}
void
UnicodeConverterCPP::toUnicode(UChar*& target,
const UChar* targetLimit,
const char*& source,
const char* sourceLimit,
int32_t* offsets,
bool_t flush,
UErrorCode& err)
{
ucnv_toUnicode(myUnicodeConverter,
&target,
targetLimit,
&source,
sourceLimit,
offsets,
flush,
&err);
}
const char*
UnicodeConverterCPP::getName(UErrorCode& err) const
{
return ucnv_getName(myUnicodeConverter, &err);
}
int8_t
UnicodeConverterCPP::getMaxBytesPerChar() const
{
return ucnv_getMaxCharSize(myUnicodeConverter);
}
int8_t
UnicodeConverterCPP::getMinBytesPerChar() const
{
return ucnv_getMinCharSize(myUnicodeConverter);
}
void
UnicodeConverterCPP::getSubstitutionChars(char* subChars,
int8_t& len,
UErrorCode& err) const
{
ucnv_getSubstChars(myUnicodeConverter,
subChars,
&len,
&err);
}
void
UnicodeConverterCPP::setSubstitutionChars(const char* subChars,
int8_t len,
UErrorCode& err)
{
ucnv_setSubstChars(myUnicodeConverter,
subChars,
len,
&err);
}
void
UnicodeConverterCPP::resetState()
{
ucnv_reset(myUnicodeConverter);
}
int32_t
UnicodeConverterCPP::getCodepage(UErrorCode& err) const
{
return ucnv_getCCSID(myUnicodeConverter, &err);
}
UConverterToUCallback
1999-08-16 21:50:52 +00:00
UnicodeConverterCPP::getMissingCharAction() const
{
return ucnv_getToUCallBack(myUnicodeConverter);
}
UConverterFromUCallback
1999-08-16 21:50:52 +00:00
UnicodeConverterCPP::getMissingUnicodeAction() const
{
return ucnv_getFromUCallBack(myUnicodeConverter);
}
void
UnicodeConverterCPP::setMissingCharAction(UConverterToUCallback action,
1999-08-16 21:50:52 +00:00
UErrorCode& err)
{
ucnv_setToUCallBack(myUnicodeConverter, action, &err);
}
void
UnicodeConverterCPP::setMissingUnicodeAction(UConverterFromUCallback action,
1999-08-16 21:50:52 +00:00
UErrorCode& err)
{
ucnv_setFromUCallBack(myUnicodeConverter, action, &err);
}
void
UnicodeConverterCPP::getDisplayName(const Locale& displayLocale,
UnicodeString& displayName) const
{
UErrorCode err = U_ZERO_ERROR;
UChar name[UCNV_MAX_CONVERTER_NAME_LENGTH];
int32_t length = ucnv_getDisplayName(myUnicodeConverter, displayLocale.getName(), name, sizeof(name), &err);
if (U_SUCCESS(err))
1999-08-16 21:50:52 +00:00
{
displayName.replace(0, 0x7fffffff, name, length);
1999-08-16 21:50:52 +00:00
}
else
1999-08-16 21:50:52 +00:00
{
/*Error While creating the resource bundle use the internal name instead*/
displayName.remove();
displayName = getName(err); /*Get the raw ASCII name*/
}
return;
}
UConverterPlatform
1999-08-16 21:50:52 +00:00
UnicodeConverterCPP::getCodepagePlatform(UErrorCode &err) const
{
return ucnv_getPlatform(myUnicodeConverter, &err);
}
UConverterType UnicodeConverterCPP::getType() const
1999-08-16 21:50:52 +00:00
{
return ucnv_getType(myUnicodeConverter);
}
void UnicodeConverterCPP::getStarters(bool_t starters[256],
UErrorCode& err) const
{
ucnv_getStarters(myUnicodeConverter,
starters,
&err);
return;
}
const char* const*
UnicodeConverterCPP::getAvailableNames(int32_t& num, UErrorCode& err)
{
if(U_FAILURE(err)) {
num = 0;
return NULL;
}
if (availableConverterNames==NULL) {
int32_t count = ucnv_io_countAvailableConverters(&err);
if (count > 0) {
const char **names = new const char *[count];
if (names != NULL) {
ucnv_io_fillAvailableConverters(names, &err);
/* in the mutex block, set the data for this process */
umtx_lock(0);
if (availableConverterNames == NULL) {
availableConverterNamesCount = count;
availableConverterNames = names;
names = 0;
}
umtx_unlock(0);
/* if a different thread set it first, then delete the extra data */
if (names != 0) {
delete [] names;
}
} else {
num = 0;
err = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
}
}
num = availableConverterNamesCount;
return availableConverterNames;
1999-08-16 21:50:52 +00:00
}
int32_t UnicodeConverterCPP::flushCache()
{
return ucnv_flushCache();
}
/* HSYS: To be cleaned up. The usage of UChar* and UnicodeString in
the C++ APIs need to be revisited. */
void UnicodeConverterCPP::fixFileSeparator(UnicodeString& source) const
{
int32_t i = 0;
int32_t index = 0;
int32_t ccsid = 0;
UErrorCode status = U_ZERO_ERROR;
if (source.length() == 0)
{
return;
}
ccsid = getCodepage(status);
if (U_FAILURE(status))
{
return;
}
for (i = 0; i < UCNV_MAX_AMBIGUOUSCCSIDS; i++) {
if (ccsid == UCNV_AMBIGUOUSCONVERTERS[i].ccsid)
{
index = i;
break;
}
}
if (index != -1)
{
for (i = 0; i < source.length(); i++)
{
if (source[i] == UCNV_AMBIGUOUSCONVERTERS[index].mismapped)
{
source[i] = UCNV_AMBIGUOUSCONVERTERS[index].replacement;
}
}
}
}
bool_t UnicodeConverterCPP::isAmbiguous(void) const
{
return ucnv_isAmbiguous(myUnicodeConverter);
}