ICU-216 Added fixFileSeparator and isAmbiguous in UnicodeConverterCPP class and ucnv_xxx interface.

X-SVN-Rev: 497
This commit is contained in:
Helena Chapman 2000-01-08 00:51:44 +00:00
parent 8624681ac4
commit 62bfc59330
5 changed files with 150 additions and 2 deletions

View File

@ -433,3 +433,44 @@ int32_t UnicodeConverterCPP::flushCache()
{
return ucnv_flushCache();
}
/* HSYS: To be cleaned up. The usage of UChar* and UnicodeString in
the C++ APIs need to be revisited. */
void UnicodeConverterCPP::fixFileSeparator(UnicodeString& source) const
{
int32_t i = 0;
int32_t index = 0;
int32_t ccsid = 0;
UErrorCode status = U_ZERO_ERROR;
if (source.length() == 0)
{
return;
}
ccsid = getCodepage(status);
if (U_FAILURE(status))
{
return;
}
for (i = 0; i < UCNV_MAX_AMBIGUOUSCCSIDS; i++) {
if (ccsid == UCNV_AMBIGUOUSCONVERTERS[i].ccsid)
{
index = i;
break;
}
}
if (index != -1)
{
for (i = 0; i < source.length(); i++)
{
if (source[i] == UCNV_AMBIGUOUSCONVERTERS[index].mismapped)
{
source[i] = UCNV_AMBIGUOUSCONVERTERS[index].replacement;
}
}
}
}
bool_t UnicodeConverterCPP::isAmbiguous(void) const
{
return ucnv_isAmbiguous(myUnicodeConverter);
}

View File

@ -38,6 +38,9 @@
#define CHUNK_SIZE 5*1024
/* Internal function : begin */
static int32_t ucnv_getAmbiguousCCSID (const UConverter* cnv);
/* Internal function : end */
typedef void (*T_ToUnicodeFunction) (UConverter *,
UChar **,
@ -130,7 +133,6 @@ static T_GetNextUCharFunction GET_NEXT_UChar_FUNCTIONS[UCNV_NUMBER_OF_SUPPORTED_
T_UConverter_getNextUChar_ISO_2022
};
void flushInternalUnicodeBuffer (UConverter * _this,
UChar * myTarget,
int32_t * myTargetIndex,
@ -512,7 +514,6 @@ UConverterFromUCallback ucnv_setFromUCallBack (UConverter * converter,
return myReturn;
}
#include <stdio.h>
void ucnv_fromUnicode (UConverter * _this,
char **target,
const char *targetLimit,
@ -1154,3 +1155,53 @@ void ucnv_getStarters(const UConverter* converter,
uprv_memcpy(starters, converter->sharedData->table->mbcs.starters, 256*sizeof(bool_t));
return;
}
int32_t ucnv_getAmbiguousCCSID(const UConverter *cnv)
{
UErrorCode status = U_ZERO_ERROR;
int32_t i = 0;
int32_t ccsid = 0;
if (cnv == NULL)
{
return -1;
}
ccsid = ucnv_getCCSID(cnv, &status);
if (U_FAILURE(status))
{
return -1;
}
for (i = 0; i < UCNV_MAX_AMBIGUOUSCCSIDS; i++) {
if (ccsid == UCNV_AMBIGUOUSCONVERTERS[i].ccsid)
{
return i;
}
}
return -1;
}
void ucnv_fixFileSeparator(const UConverter *cnv,
UChar* source,
int32_t sourceLength)
{
int32_t i = 0;
int32_t index = 0;
if ((source == NULL) || (cnv == NULL))
{
return;
}
if ((index = ucnv_getAmbiguousCCSID(cnv)) != -1)
{
for (i = 0; i < sourceLength; i++)
{
if (source[i] == UCNV_AMBIGUOUSCONVERTERS[index].mismapped)
{
source[i] = UCNV_AMBIGUOUSCONVERTERS[index].replacement;
}
}
}
}
bool_t ucnv_isAmbiguous(const UConverter *cnv)
{
return (ucnv_getAmbiguousCCSID(cnv) == -1 ? FALSE : TRUE);
}

View File

@ -321,5 +321,24 @@ static const char* const* getAvailableNames(int32_t& num,
* @return the number of cached converters successfully deleted
*/
static int32_t flushCache(void);
/**
* Fixes the backslash character mismapping. For example, in SJIS, the backslash
* character in the ASCII portion is also used to represent the yen currency sign.
* When mapping from Unicode character 0x005C, it's unclear whether to map the
* character back to yen or backslash in SJIS. This function will take the input
* buffer and replace all the yen sign characters with backslash. This is necessary
* when the user tries to open a file with the input buffer on Windows.
* @param source the input buffer to be fixed
*/
void fixFileSeparator(UnicodeString& source) const;
/**
* Determines if the converter contains ambiguous mappings of the same
* character or not.
* @return TRUE if the converter contains ambiguous mapping of the same
* character, FALSE otherwise.
*/
bool_t isAmbiguous(void) const;
};
#endif

View File

@ -600,6 +600,26 @@ U_CAPI const char * U_EXPORT2 ucnv_getDefaultName (void);
*/
U_CAPI void U_EXPORT2 ucnv_setDefaultName (const char *name);
/**
* Fixes the backslash character mismapping. For example, in SJIS, the backslash
* character in the ASCII portion is also used to represent the yen currency sign.
* When mapping from Unicode character 0x005C, it's unclear whether to map the
* character back to yen or backslash in SJIS. This function will take the input
* buffer and replace all the yen sign characters with backslash. This is necessary
* when the user tries to open a file with the input buffer on Windows.
* @param source the input buffer to be fixed
* @param sourceLength the length of the input buffer
*/
U_CAPI void U_EXPORT2 ucnv_fixFileSeparator(const UConverter *cnv, UChar* source, int32_t sourceLen);
/**
* Determines if the converter contains ambiguous mappings of the same
* character or not.
* @return TRUE if the converter contains ambiguous mapping of the same
* character, FALSE otherwise.
*/
U_CAPI bool_t U_EXPORT2 ucnv_isAmbiguous(const UConverter *cnv);
#endif
/*_UCNV*/

View File

@ -24,6 +24,7 @@
#define UCNV_MAX_SUBCHAR_LEN 4
#define UCNV_ERROR_BUFFER_LENGTH 20
#define UCNV_MAX_AMBIGUOUSCCSIDS 5
#ifndef UCMP16_H
typedef struct _CompactShortArray CompactShortArray;
@ -66,6 +67,22 @@ typedef enum {
UCNV_GB = 11
} UConverterType;
typedef struct
{
int32_t ccsid;
UChar mismapped;
UChar replacement;
} UAmbiguousConverter;
static const UAmbiguousConverter UCNV_AMBIGUOUSCONVERTERS[UCNV_MAX_AMBIGUOUSCCSIDS] =
{
943, 0x00A5, 0x005C,
949, 0x20A9, 0x005C,
1361, 0x20A9, 0x005C,
942, 0x00A5, 0x005C,
1363, 0x20A9, 0x005C
};
typedef enum {
UCNV_UNKNOWN = -1,
UCNV_IBM = 0