ICU-7138 Merge branch to trunk adding support for alternative SI/SO ebcdic codepages.

X-SVN-Rev: 27717
This commit is contained in:
Michael Ow 2010-02-27 03:39:15 +00:00
parent 0bef6f34d0
commit 5df76eb00a
8 changed files with 163 additions and 28 deletions

View File

@ -1,7 +1,7 @@
/*
******************************************************************************
*
* Copyright (C) 2000-2009, International Business Machines
* Copyright (C) 2000-2010, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
@ -398,6 +398,76 @@ gb18030Ranges[13][4]={
/* bit flag for UConverter.options indicating GB 18030 special handling */
#define _MBCS_OPTION_GB18030 0x8000
/* bit flag for UConverter.options indicating KEIS,JEF,JIF special handling */
#define _MBCS_OPTION_KEIS 0x01000
#define _MBCS_OPTION_JEF 0x02000
#define _MBCS_OPTION_JIPS 0x04000
#define KEIS_SO_CHAR_1 0x0A
#define KEIS_SO_CHAR_2 0x42
#define KEIS_SI_CHAR_1 0x0A
#define KEIS_SI_CHAR_2 0x41
#define JEF_SO_CHAR 0x28
#define JEF_SI_CHAR 0x29
#define JIPS_SO_CHAR_1 0x1A
#define JIPS_SO_CHAR_2 0x70
#define JIPS_SI_CHAR_1 0x1A
#define JIPS_SI_CHAR_2 0x71
enum SISO_Option {
SI,
SO
};
typedef enum SISO_Option SISO_Option;
static int32_t getSISOBytes(SISO_Option option, uint32_t cnvOption, uint8_t *value) {
int32_t SISOLength = 0;
switch (option) {
case SI:
if ((cnvOption&_MBCS_OPTION_KEIS)!=0) {
value[0] = KEIS_SI_CHAR_1;
value[1] = KEIS_SI_CHAR_2;
SISOLength = 2;
} else if ((cnvOption&_MBCS_OPTION_JEF)!=0) {
value[0] = JEF_SI_CHAR;
SISOLength = 1;
} else if ((cnvOption&_MBCS_OPTION_JIPS)!=0) {
value[0] = JIPS_SI_CHAR_1;
value[1] = JIPS_SI_CHAR_2;
SISOLength = 2;
} else {
value[0] = UCNV_SI;
SISOLength = 1;
}
break;
case SO:
if ((cnvOption&_MBCS_OPTION_KEIS)!=0) {
value[0] = KEIS_SO_CHAR_1;
value[1] = KEIS_SO_CHAR_2;
SISOLength = 2;
} else if ((cnvOption&_MBCS_OPTION_JEF)!=0) {
value[0] = JEF_SO_CHAR;
SISOLength = 1;
} else if ((cnvOption&_MBCS_OPTION_JIPS)!=0) {
value[0] = JIPS_SO_CHAR_1;
value[1] = JIPS_SO_CHAR_2;
SISOLength = 2;
} else {
value[0] = UCNV_SO;
SISOLength = 1;
}
break;
default:
/* Should never happen. */
break;
}
return SISOLength;
}
/* Miscellaneous ------------------------------------------------------------ */
/**
@ -1724,6 +1794,12 @@ ucnv_MBCSOpen(UConverter *cnv,
/* set a flag for GB 18030 mode, which changes the callback behavior */
cnv->options|=_MBCS_OPTION_GB18030;
}
} else if(uprv_strstr(pArgs->name, "KEIS")!=NULL) {
cnv->options|=_MBCS_OPTION_KEIS;
} else if(uprv_strstr(pArgs->name, "JEF")!=NULL) {
cnv->options|=_MBCS_OPTION_JEF;
} else if(uprv_strstr(pArgs->name, "JIPS")!=NULL) {
cnv->options|=_MBCS_OPTION_JIPS;
}
/* fix maxBytesPerUChar depending on outputType and options etc. */
@ -3859,6 +3935,7 @@ ucnv_MBCSFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
uint32_t stage2Entry;
uint32_t asciiRoundtrips;
uint32_t value;
uint8_t si_value[2], so_value[2], si_value_length, so_value_length;
int32_t length, prevLength;
uint8_t unicodeMask;
@ -3930,6 +4007,10 @@ ucnv_MBCSFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
sourceIndex= c==0 ? 0 : -1;
nextSourceIndex=0;
/* Get the SI/SO character for the converter */
si_value_length = getSISOBytes(SI, cnv->options, si_value);
so_value_length = getSISOBytes(SO, cnv->options, so_value);
/* conversion loop */
/*
* This is another piece of ugly code:
@ -4019,8 +4100,14 @@ ucnv_MBCSFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
length=1;
} else {
/* change from double-byte mode to single-byte */
value|=(uint32_t)UCNV_SI<<8;
length=2;
if (si_value_length == 1) {
value|=si_value[0]<<8;
length = 2;
} else if (si_value_length == 2) {
value|=si_value[1]<<8;
value|=si_value[0]<<16;
length = 3;
}
prevLength=1;
}
} else {
@ -4028,8 +4115,14 @@ ucnv_MBCSFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
length=2;
} else {
/* change from single-byte mode to double-byte */
value|=(uint32_t)UCNV_SO<<16;
length=3;
if (so_value_length == 1) {
value|=so_value[0]<<16;
length = 3;
} else if (so_value_length == 2) {
value|=so_value[1]<<16;
value|=so_value[0]<<24;
length = 4;
}
prevLength=2;
}
}
@ -4239,8 +4332,14 @@ getTrail:
length=1;
} else {
/* change from double-byte mode to single-byte */
value|=(uint32_t)UCNV_SI<<8;
length=2;
if (si_value_length == 1) {
value|=si_value[0]<<8;
length = 2;
} else if (si_value_length == 2) {
value|=si_value[1]<<8;
value|=si_value[0]<<16;
length = 3;
}
prevLength=1;
}
} else {
@ -4248,8 +4347,14 @@ getTrail:
length=2;
} else {
/* change from single-byte mode to double-byte */
value|=(uint32_t)UCNV_SO<<16;
length=3;
if (so_value_length == 1) {
value|=so_value[0]<<16;
length = 3;
} else if (so_value_length == 2) {
value|=so_value[1]<<16;
value|=so_value[0]<<24;
length = 4;
}
prevLength=2;
}
}
@ -4502,15 +4607,27 @@ unassigned:
) {
/* EBCDIC_STATEFUL ending with DBCS: emit an SI to return the output stream to SBCS */
if(targetCapacity>0) {
*target++=(uint8_t)UCNV_SI;
*target++=(uint8_t)si_value[0];
if (si_value_length == 2) {
if (targetCapacity<2) {
cnv->charErrorBuffer[0]=(uint8_t)si_value[1];
cnv->charErrorBufferLength=1;
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
} else {
*target++=(uint8_t)si_value[1];
}
}
if(offsets!=NULL) {
/* set the last source character's index (sourceIndex points at sourceLimit now) */
*offsets++=prevSourceIndex;
}
} else {
/* target is full */
cnv->charErrorBuffer[0]=(char)UCNV_SI;
cnv->charErrorBufferLength=1;
cnv->charErrorBuffer[0]=(uint8_t)si_value[0];
if (si_value_length == 2) {
cnv->charErrorBuffer[1]=(uint8_t)si_value[1];
}
cnv->charErrorBufferLength=si_value_length;
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
}
prevLength=1; /* we switched into SBCS */

View File

@ -258,6 +258,10 @@ ALL_UCM_SOURCE=ibm-37_P100-1995.ucm ibm-1047_P100-1995.ucm $(UCM_SOURCE_CORE) $(
UCM_FILES = $(ALL_UCM_SOURCE:%=$(SRCDATADIR)/%)
CNV_FILES = $(ALL_UCM_SOURCE:%.ucm=$(BUILDDIR)/%.cnv)
CNV_FILES_SHORT = $(ALL_UCM_SOURCE:%.ucm=%.cnv)
UCM_SOURCE_SPECIAL=$(UCM_SOURCE_EBCDIC_IGNORE_SISO)
UCM_FILES_SPECIAL=$(UCM_SOURCE_SPECIAL:%=$(UCMSRCDIR)/%)
CNV_FILES_SPECIAL=$(UCM_SOURCE_SPECIAL:%.ucm=$(BUILDDIR)/%.cnv)
CNV_FILES_SHORT_SPECIAL=$(UCM_SOURCE_SPECIAL:%.ucm=%.cnv)
## RES files
-include $(LOCSRCDIR)/resfiles.mk
@ -396,13 +400,13 @@ SPREP_FILES = $(ALL_SPREP_SOURCE:%.txt=$(BUILDDIR)/%.spp)
SPREP_FILES_SHORT = $(ALL_SPREP_SOURCE:%.txt=%.spp)
## All generated files
ALL_FILES = $(DAT_FILES) $(CNV_FILES) $(BRK_FILES) $(CTD_FILES) $(RES_FILES) $(CURR_FILES) $(LANG_FILES) $(REGION_FILES) $(ZONE_FILES) $(COLLATION_FILES) $(BRK_RES_FILES) $(RBNF_FILES) $(TRANSLIT_FILES) $(SPREP_FILES) $(CFU_FILES)
ALL_FILES = $(DAT_FILES) $(CNV_FILES) $(CNV_FILES_SPECIAL) $(BRK_FILES) $(CTD_FILES) $(RES_FILES) $(INDEX_RES_FILE) $(CURR_FILES) $(LANG_FILES) $(REGION_FILES) $(ZONE_FILES) $(COLLATION_FILES) $(BRK_RES_FILES) $(RBNF_FILES) $(TRANSLIT_FILES) $(SPREP_FILES) $(CFU_FILES)
ALL_INDEX_SRC_FILES = $(PKGDATA_LIST) $(INDEX_FILE) $(CURR_INDEX_FILE) $(LANG_INDEX_FILE) $(REGION_INDEX_FILE) $(ZONE_INDEX_FILE) $(COLLATION_INDEX_FILE) $(BRK_RES_INDEX_FILE) $(RBNF_INDEX_FILE)
# a list to use in the .lst files (package-relative)
COLL_FILES_LIST=$(COLLATION_FILES_SHORT) $(COLLATION_INDEX_RES_SHORT)
BRK_FILES_LIST=$(BRK_FILES_SHORT) $(CTD_FILES_SHORT) $(BRK_RES_FILES_SHORT) $(BRK_RES_INDEX_RES_SHORT)
LOCALE_FILES_LIST= $(RES_FILES_SHORT) $(LANG_FILES_SHORT) $(REGION_FILES_SHORT) $(ZONE_FILES_SHORT)
MISC_FILES_LIST=$(DAT_FILES_SHORT) $(CNV_FILES_SHORT) $(CURR_FILES_SHORT) $(RBNF_FILES_SHORT) $(RBNF_INDEX_RES_SHORT) $(TRANSLIT_FILES_SHORT) $(SPREP_FILES_SHORT) $(CFU_FILES_SHORT)
MISC_FILES_LIST=$(DAT_FILES_SHORT) $(CNV_FILES_SHORT) $(CNV_FILES_SHORT_SPECIAL) $(CURR_FILES_SHORT) $(RBNF_FILES_SHORT) $(RBNF_INDEX_RES_SHORT) $(TRANSLIT_FILES_SHORT) $(SPREP_FILES_SHORT) $(CFU_FILES_SHORT)
UNI_CORE_DATA=uprops.icu ucase.icu ubidi.icu
UNI_CORE_TARGET_DATA=$(UNI_CORE_DATA:%=$(BUILDDIR)/%)
@ -543,6 +547,8 @@ $(CFU_FILES): $(ALL_CFU_SOURCE) $(TOOLBINDIR)/gencfu$(EXEEXT) $(DAT_FILES)
#################################################### CNV
# CNV FILES
$(CNV_FILES_SPECIAL) : $(UCM_FILES_SPECIAL) $(TOOLBINDIR)/makeconv$(TOOLEXEEXT)
$(INVOKE) $(TOOLBINDIR)/makeconv --ignore-siso-check -c -d $(BUILDDIR) $(UCMSRCDIR)/$(@F:%.cnv=%.ucm)
$(BUILDDIR)/%.cnv: $(UCMSRCDIR)/%.ucm $(TOOLBINDIR)/makeconv$(TOOLEXEEXT)
$(INVOKE) $(TOOLBINDIR)/makeconv -c -d $(BUILDDIR) $(UCMSRCDIR)/$(<F)

View File

@ -1,4 +1,4 @@
# Copyright (c) 1999-2007, International Business Machines Corporation and
# Copyright (c) 1999-2010, International Business Machines Corporation and
# others. All Rights Reserved.
# A list of EBCDIC UCM's to build
# ibm-37 and ibm-1047 are already mentioned in makedata.mak and Makefile.in
@ -25,3 +25,5 @@ ibm-297_P100-1995.ucm ibm-420_X120-1999.ucm ibm-424_P100-1995.ucm\
ibm-4517_P100-2005.ucm ibm-4899_P100-1998.ucm ibm-4971_P100-1999.ucm\
ibm-500_P100-1995.ucm ibm-5123_P100-1999.ucm ibm-803_P100-1999.ucm\
ibm-8482_P100-1999.ucm ibm-9067_X100-2005.ucm ibm-16684_P110-2003.ucm
UCM_SOURCE_EBCDIC_IGNORE_SISO =

View File

@ -1,7 +1,7 @@
/*
*******************************************************************************
*
* Copyright (C) 2000-2009, International Business Machines
* Copyright (C) 2000-2010, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@ -687,7 +687,7 @@ MBCSAddFromUnicode(MBCSData *mbcsData,
maxCharLength=mbcsData->ucm->states.maxCharLength;
if( mbcsData->ucm->states.outputType==MBCS_OUTPUT_2_SISO &&
(*bytes==0xe || *bytes==0xf)
(!IGNORE_SISO_CHECK && (*bytes==0xe || *bytes==0xf))
) {
fprintf(stderr, "error: illegal mapping to SI or SO for SI/SO codepage: U+%04x<->0x%s\n",
(int)c, printBytes(buffer, bytes, length));

View File

@ -79,6 +79,7 @@ extern const UConverterStaticData * ucnv_converterStaticData[UCNV_NUMBER_OF_SUPP
*/
UBool VERBOSE = FALSE;
UBool SMALL = FALSE;
UBool IGNORE_SISO_CHECK = FALSE;
static void
createConverter(ConvData *data, const char* converterName, UErrorCode *pErrorCode);
@ -174,6 +175,7 @@ enum {
OPT_DESTDIR,
OPT_VERBOSE,
OPT_SMALL,
OPT_IGNORE_SISO_CHECK,
OPT_COUNT
};
@ -184,7 +186,8 @@ static UOption options[]={
UOPTION_VERSION,
UOPTION_DESTDIR,
UOPTION_VERBOSE,
{ "small", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 }
{ "small", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 },
{ "ignore-siso-check", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 }
};
int main(int argc, char* argv[])
@ -236,7 +239,8 @@ int main(int argc, char* argv[])
"\t --small Generate smaller .cnv files. They will be\n"
"\t significantly smaller but may not be compatible with\n"
"\t older versions of ICU and will require heap memory\n"
"\t allocation when loaded.\n");
"\t allocation when loaded.\n"
"\t --ignore-siso-check Use SI/SO other than 0xf/0xe.\n");
return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
}
@ -253,6 +257,10 @@ int main(int argc, char* argv[])
VERBOSE = options[OPT_VERBOSE].doesOccur;
SMALL = options[OPT_SMALL].doesOccur;
if (options[OPT_IGNORE_SISO_CHECK].doesOccur) {
IGNORE_SISO_CHECK = TRUE;
}
if (destdir != NULL && *destdir != 0) {
uprv_strcpy(outFileName, destdir);
destdirlen = uprv_strlen(destdir);
@ -579,7 +587,7 @@ readFile(ConvData *data, const char* converterName,
if(data->ucm->baseName[0]==0) {
dataIsBase=TRUE;
baseStates=&data->ucm->states;
ucm_processStates(baseStates);
ucm_processStates(baseStates, IGNORE_SISO_CHECK);
} else {
dataIsBase=FALSE;
baseStates=NULL;
@ -782,7 +790,7 @@ createConverter(ConvData *data, const char *converterName, UErrorCode *pErrorCod
fprintf(stderr, " the substitution character byte sequence is illegal in this codepage structure!\n");
*pErrorCode=U_INVALID_TABLE_FORMAT;
} else if(staticData->subChar1!=0 && 1!=ucm_countChars(baseStates, &staticData->subChar1, 1)) {
} else if(1!=ucm_countChars(baseStates, &staticData->subChar1, 1)) {
fprintf(stderr, " the subchar1 byte is illegal in this codepage structure!\n");
*pErrorCode=U_INVALID_TABLE_FORMAT;

View File

@ -1,7 +1,7 @@
/*
*******************************************************************************
*
* Copyright (C) 2000-2007, International Business Machines
* Copyright (C) 2000-2010, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@ -25,6 +25,7 @@
/* exports from makeconv.c */
U_CFUNC UBool VERBOSE;
U_CFUNC UBool SMALL;
U_CFUNC UBool IGNORE_SISO_CHECK;
/* converter table type for writing */
enum {

View File

@ -1,6 +1,6 @@
/*
*******************************************************************************
* Copyright (C) 2003-2009, International Business Machines
* Copyright (C) 2003-2010, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* file name: ucm.h
@ -237,7 +237,7 @@ U_CAPI void U_EXPORT2
ucm_addState(UCMStates *states, const char *s);
U_CAPI void U_EXPORT2
ucm_processStates(UCMStates *states);
ucm_processStates(UCMStates *states, UBool ignoreSISOCheck);
U_CAPI int32_t U_EXPORT2
ucm_countChars(UCMStates *states,

View File

@ -1,7 +1,7 @@
/*
*******************************************************************************
*
* Copyright (C) 2003-2005, International Business Machines
* Copyright (C) 2003-2010, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@ -421,7 +421,7 @@ sumUpStates(UCMStates *states) {
}
U_CAPI void U_EXPORT2
ucm_processStates(UCMStates *states) {
ucm_processStates(UCMStates *states, UBool ignoreSISOCheck) {
int32_t entry, state, cell, count;
if(states->conversionType==UCNV_UNSUPPORTED_CONVERTER) {
@ -557,10 +557,11 @@ ucm_processStates(UCMStates *states) {
exit(U_INVALID_TABLE_FORMAT);
}
/* are the SI/SO all in the right places? */
if( states->stateTable[0][0xe]==MBCS_ENTRY_FINAL(1, MBCS_STATE_CHANGE_ONLY, 0) &&
if( ignoreSISOCheck ||
(states->stateTable[0][0xe]==MBCS_ENTRY_FINAL(1, MBCS_STATE_CHANGE_ONLY, 0) &&
states->stateTable[0][0xf]==MBCS_ENTRY_FINAL(0, MBCS_STATE_CHANGE_ONLY, 0) &&
states->stateTable[1][0xe]==MBCS_ENTRY_FINAL(1, MBCS_STATE_CHANGE_ONLY, 0) &&
states->stateTable[1][0xf]==MBCS_ENTRY_FINAL(0, MBCS_STATE_CHANGE_ONLY, 0)
states->stateTable[1][0xf]==MBCS_ENTRY_FINAL(0, MBCS_STATE_CHANGE_ONLY, 0))
) {
states->outputType=MBCS_OUTPUT_2_SISO;
} else {