ICU-216 Added fixFileSeparator and isAmbiguous in UnicodeConverterCPP class and ucnv_xxx interface.
X-SVN-Rev: 497
This commit is contained in:
parent
8624681ac4
commit
62bfc59330
@ -433,3 +433,44 @@ int32_t UnicodeConverterCPP::flushCache()
|
||||
{
|
||||
return ucnv_flushCache();
|
||||
}
|
||||
|
||||
/* HSYS: To be cleaned up. The usage of UChar* and UnicodeString in
|
||||
the C++ APIs need to be revisited. */
|
||||
void UnicodeConverterCPP::fixFileSeparator(UnicodeString& source) const
|
||||
{
|
||||
int32_t i = 0;
|
||||
int32_t index = 0;
|
||||
int32_t ccsid = 0;
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
if (source.length() == 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
ccsid = getCodepage(status);
|
||||
if (U_FAILURE(status))
|
||||
{
|
||||
return;
|
||||
}
|
||||
for (i = 0; i < UCNV_MAX_AMBIGUOUSCCSIDS; i++) {
|
||||
if (ccsid == UCNV_AMBIGUOUSCONVERTERS[i].ccsid)
|
||||
{
|
||||
index = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (index != -1)
|
||||
{
|
||||
for (i = 0; i < source.length(); i++)
|
||||
{
|
||||
if (source[i] == UCNV_AMBIGUOUSCONVERTERS[index].mismapped)
|
||||
{
|
||||
source[i] = UCNV_AMBIGUOUSCONVERTERS[index].replacement;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool_t UnicodeConverterCPP::isAmbiguous(void) const
|
||||
{
|
||||
return ucnv_isAmbiguous(myUnicodeConverter);
|
||||
}
|
@ -38,6 +38,9 @@
|
||||
|
||||
#define CHUNK_SIZE 5*1024
|
||||
|
||||
/* Internal function : begin */
|
||||
static int32_t ucnv_getAmbiguousCCSID (const UConverter* cnv);
|
||||
/* Internal function : end */
|
||||
|
||||
typedef void (*T_ToUnicodeFunction) (UConverter *,
|
||||
UChar **,
|
||||
@ -130,7 +133,6 @@ static T_GetNextUCharFunction GET_NEXT_UChar_FUNCTIONS[UCNV_NUMBER_OF_SUPPORTED_
|
||||
T_UConverter_getNextUChar_ISO_2022
|
||||
};
|
||||
|
||||
|
||||
void flushInternalUnicodeBuffer (UConverter * _this,
|
||||
UChar * myTarget,
|
||||
int32_t * myTargetIndex,
|
||||
@ -512,7 +514,6 @@ UConverterFromUCallback ucnv_setFromUCallBack (UConverter * converter,
|
||||
|
||||
return myReturn;
|
||||
}
|
||||
#include <stdio.h>
|
||||
void ucnv_fromUnicode (UConverter * _this,
|
||||
char **target,
|
||||
const char *targetLimit,
|
||||
@ -1154,3 +1155,53 @@ void ucnv_getStarters(const UConverter* converter,
|
||||
uprv_memcpy(starters, converter->sharedData->table->mbcs.starters, 256*sizeof(bool_t));
|
||||
return;
|
||||
}
|
||||
|
||||
int32_t ucnv_getAmbiguousCCSID(const UConverter *cnv)
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
int32_t i = 0;
|
||||
int32_t ccsid = 0;
|
||||
if (cnv == NULL)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
ccsid = ucnv_getCCSID(cnv, &status);
|
||||
if (U_FAILURE(status))
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
for (i = 0; i < UCNV_MAX_AMBIGUOUSCCSIDS; i++) {
|
||||
if (ccsid == UCNV_AMBIGUOUSCONVERTERS[i].ccsid)
|
||||
{
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
void ucnv_fixFileSeparator(const UConverter *cnv,
|
||||
UChar* source,
|
||||
int32_t sourceLength)
|
||||
{
|
||||
int32_t i = 0;
|
||||
int32_t index = 0;
|
||||
if ((source == NULL) || (cnv == NULL))
|
||||
{
|
||||
return;
|
||||
}
|
||||
if ((index = ucnv_getAmbiguousCCSID(cnv)) != -1)
|
||||
{
|
||||
for (i = 0; i < sourceLength; i++)
|
||||
{
|
||||
if (source[i] == UCNV_AMBIGUOUSCONVERTERS[index].mismapped)
|
||||
{
|
||||
source[i] = UCNV_AMBIGUOUSCONVERTERS[index].replacement;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool_t ucnv_isAmbiguous(const UConverter *cnv)
|
||||
{
|
||||
return (ucnv_getAmbiguousCCSID(cnv) == -1 ? FALSE : TRUE);
|
||||
}
|
||||
|
@ -321,5 +321,24 @@ static const char* const* getAvailableNames(int32_t& num,
|
||||
* @return the number of cached converters successfully deleted
|
||||
*/
|
||||
static int32_t flushCache(void);
|
||||
/**
|
||||
* Fixes the backslash character mismapping. For example, in SJIS, the backslash
|
||||
* character in the ASCII portion is also used to represent the yen currency sign.
|
||||
* When mapping from Unicode character 0x005C, it's unclear whether to map the
|
||||
* character back to yen or backslash in SJIS. This function will take the input
|
||||
* buffer and replace all the yen sign characters with backslash. This is necessary
|
||||
* when the user tries to open a file with the input buffer on Windows.
|
||||
* @param source the input buffer to be fixed
|
||||
*/
|
||||
void fixFileSeparator(UnicodeString& source) const;
|
||||
|
||||
/**
|
||||
* Determines if the converter contains ambiguous mappings of the same
|
||||
* character or not.
|
||||
* @return TRUE if the converter contains ambiguous mapping of the same
|
||||
* character, FALSE otherwise.
|
||||
*/
|
||||
bool_t isAmbiguous(void) const;
|
||||
|
||||
};
|
||||
#endif
|
||||
|
@ -600,6 +600,26 @@ U_CAPI const char * U_EXPORT2 ucnv_getDefaultName (void);
|
||||
*/
|
||||
U_CAPI void U_EXPORT2 ucnv_setDefaultName (const char *name);
|
||||
|
||||
/**
|
||||
* Fixes the backslash character mismapping. For example, in SJIS, the backslash
|
||||
* character in the ASCII portion is also used to represent the yen currency sign.
|
||||
* When mapping from Unicode character 0x005C, it's unclear whether to map the
|
||||
* character back to yen or backslash in SJIS. This function will take the input
|
||||
* buffer and replace all the yen sign characters with backslash. This is necessary
|
||||
* when the user tries to open a file with the input buffer on Windows.
|
||||
* @param source the input buffer to be fixed
|
||||
* @param sourceLength the length of the input buffer
|
||||
*/
|
||||
U_CAPI void U_EXPORT2 ucnv_fixFileSeparator(const UConverter *cnv, UChar* source, int32_t sourceLen);
|
||||
|
||||
/**
|
||||
* Determines if the converter contains ambiguous mappings of the same
|
||||
* character or not.
|
||||
* @return TRUE if the converter contains ambiguous mapping of the same
|
||||
* character, FALSE otherwise.
|
||||
*/
|
||||
U_CAPI bool_t U_EXPORT2 ucnv_isAmbiguous(const UConverter *cnv);
|
||||
|
||||
|
||||
#endif
|
||||
/*_UCNV*/
|
||||
|
@ -24,6 +24,7 @@
|
||||
|
||||
#define UCNV_MAX_SUBCHAR_LEN 4
|
||||
#define UCNV_ERROR_BUFFER_LENGTH 20
|
||||
#define UCNV_MAX_AMBIGUOUSCCSIDS 5
|
||||
|
||||
#ifndef UCMP16_H
|
||||
typedef struct _CompactShortArray CompactShortArray;
|
||||
@ -66,6 +67,22 @@ typedef enum {
|
||||
UCNV_GB = 11
|
||||
} UConverterType;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int32_t ccsid;
|
||||
UChar mismapped;
|
||||
UChar replacement;
|
||||
} UAmbiguousConverter;
|
||||
|
||||
static const UAmbiguousConverter UCNV_AMBIGUOUSCONVERTERS[UCNV_MAX_AMBIGUOUSCCSIDS] =
|
||||
{
|
||||
943, 0x00A5, 0x005C,
|
||||
949, 0x20A9, 0x005C,
|
||||
1361, 0x20A9, 0x005C,
|
||||
942, 0x00A5, 0x005C,
|
||||
1363, 0x20A9, 0x005C
|
||||
};
|
||||
|
||||
typedef enum {
|
||||
UCNV_UNKNOWN = -1,
|
||||
UCNV_IBM = 0
|
||||
|
Loading…
Reference in New Issue
Block a user