diff --git a/icu4c/source/common/ucnv.c b/icu4c/source/common/ucnv.c index 3308c6c7bb..8e5073c2e3 100644 --- a/icu4c/source/common/ucnv.c +++ b/icu4c/source/common/ucnv.c @@ -1,7 +1,7 @@ /* ****************************************************************************** * -* Copyright (C) 1998-2011, International Business Machines +* Copyright (C) 1998-2012, International Business Machines * Corporation and others. All Rights Reserved. * ****************************************************************************** @@ -2851,14 +2851,12 @@ ucnv_fromUCountPending(const UConverter* cnv, UErrorCode* status) return -1; } - if(cnv->preFromULength > 0){ + if(cnv->preFromUFirstCP >= 0){ return U16_LENGTH(cnv->preFromUFirstCP)+cnv->preFromULength ; }else if(cnv->preFromULength < 0){ return -cnv->preFromULength ; }else if(cnv->fromUChar32 > 0){ return 1; - }else if(cnv->preFromUFirstCP >0){ - return U16_LENGTH(cnv->preFromUFirstCP); } return 0; diff --git a/icu4c/source/common/ucnvmbcs.c b/icu4c/source/common/ucnvmbcs.c index 159b5e5ca5..f3d83a3625 100644 --- a/icu4c/source/common/ucnvmbcs.c +++ b/icu4c/source/common/ucnvmbcs.c @@ -5122,6 +5122,7 @@ moreBytes: * but then exit the loop because the extension match would * have consumed the source. */ + *pErrorCode=U_USING_DEFAULT_WARNING; break; } else { /* a mapping was written to the target, continue */ @@ -5142,7 +5143,9 @@ moreBytes: * to stop before a truncated sequence. * If so, then collect the truncated sequence now. */ - if(U_SUCCESS(*pErrorCode) && source<(sourceLimit=(uint8_t *)pToUArgs->sourceLimit)) { + if(U_SUCCESS(*pErrorCode) && + cnv->preFromUFirstCP<0 && + source<(sourceLimit=(uint8_t *)pToUArgs->sourceLimit)) { c=utf8->toUBytes[0]=b=*source++; toULength=1; toULimit=utf8_countTrailBytes[b]+1; @@ -5428,6 +5431,7 @@ unassigned: * but then exit the loop because the extension match would * have consumed the source. */ + *pErrorCode=U_USING_DEFAULT_WARNING; break; } else { /* a mapping was written to the target, continue */ @@ -5449,7 +5453,9 @@ unassigned: * to stop before a truncated sequence. * If so, then collect the truncated sequence now. */ - if(U_SUCCESS(*pErrorCode) && source<(sourceLimit=(uint8_t *)pToUArgs->sourceLimit)) { + if(U_SUCCESS(*pErrorCode) && + cnv->preFromUFirstCP<0 && + source<(sourceLimit=(uint8_t *)pToUArgs->sourceLimit)) { c=utf8->toUBytes[0]=b=*source++; toULength=1; toULimit=utf8_countTrailBytes[b]+1; diff --git a/icu4c/source/test/testdata/Makefile.in b/icu4c/source/test/testdata/Makefile.in index 3baada9692..df5a82c0d7 100644 --- a/icu4c/source/test/testdata/Makefile.in +++ b/icu4c/source/test/testdata/Makefile.in @@ -1,6 +1,6 @@ #****************************************************************************** # -# Copyright (C) 1998-2011, International Business Machines +# Copyright (C) 1998-2012, International Business Machines # Corporation and others. All Rights Reserved. # #****************************************************************************** @@ -129,7 +129,7 @@ TESTDT=$(TESTDATA) TEST_DAT_FILES=$(TESTBUILDDIR)/test.icu TEST_SPP_FILES=$(TESTBUILDDIR)/nfscsi.spp $(TESTBUILDDIR)/nfscss.spp $(TESTBUILDDIR)/nfscis.spp $(TESTBUILDDIR)/nfsmxs.spp $(TESTBUILDDIR)/nfsmxp.spp -TEST_UCM_SOURCE= test1.ucm test1bmp.ucm test3.ucm test4.ucm test4x.ucm test5.ucm ibm9027.ucm +TEST_UCM_SOURCE= test1.ucm test1bmp.ucm test2.ucm test3.ucm test4.ucm test4x.ucm test5.ucm ibm9027.ucm TEST_UCM_FILES=$(TEST_UCM_SOURCE:%=$(TESTSRCDATADIR)/data/%) TEST_CNV_FILES=$(TEST_UCM_SOURCE:%.ucm=$(TESTBUILDDIR)/%.cnv) diff --git a/icu4c/source/test/testdata/conversion.txt b/icu4c/source/test/testdata/conversion.txt index f1c4cfed73..e9881a000f 100644 --- a/icu4c/source/test/testdata/conversion.txt +++ b/icu4c/source/test/testdata/conversion.txt @@ -1808,6 +1808,21 @@ conversion:table(nofallback) { :intvector{ 0,0,0,0,0,1,2,2,2,3,3,3,3,3 }, :int{1}, :int{0}, "", "0", "" } + // Bug #9601 direct-from-UTF-8 m:n Unicode:charset conversion. + { + "*test1bmp", + "uv", + :bin{ 08 }, + :intvector{ 0 }, + :int{1}, :int{0}, "", "?", "" + } + { + "*test2", + "\U00101234\U00050005", + :bin{ 0700010e05 }, + :intvector{ 0,0,0,0,0 }, + :int{1}, :int{0}, "", "?", "" + } } } diff --git a/icu4c/source/test/testdata/test2.ucm b/icu4c/source/test/testdata/test2.ucm new file mode 100644 index 0000000000..3b950e1cd9 --- /dev/null +++ b/icu4c/source/test/testdata/test2.ucm @@ -0,0 +1,59 @@ +# ******************************************************************************* +# * Copyright (C) 2012, International Business Machines +# * Corporation and others. All Rights Reserved. +# ******************************************************************************* +# +# test2.ucm +# +# Test file for MBCS conversion with two-byte codepage data. (DBCS) +# Also contains extension mappings (m:n). + + "test2" + 2 + 1 + "MBCS" + \x1A + 0, 1:1, 5-9, 1a, ff + a-f.p + +CHARMAP + +# fromUnicode result is zero byte from other than U+0000 + \x00 |0 + +# nothing special + \x05 |0 + +# extensions + \x05+\x01\x0d |0 + \x05+\x01\x0e |3 + \x05+\xff |3 + +# toUnicode result is fallback direct + \x06 |3 + +# toUnicode result is direct non-BMP code point + \x07 |0 + \x08 |3 + +# extensions +++ \x07+\x00+\x01\x0f+\x09 |0 ++ \x07+\x00+\x01\x0e+\x05 |0 ++ \x07+\x00+\x01\x0f+\x06 |0 ++ \x07+\x00+\x01\x0f |1 + +#unassigned \x09 + +# extensions where the first code point is unassigned, for replay testing +# \x09+\x09 |0 + \x05+\x01\x0c |0 + +# toUnicode result is surrogate pair: test real pair, single unit, unassigned + \x01\x0a |0 + \x01\x0b |0 +#unassigned \x01\x0c + \x01\x0d |3 + \x01\x0e |3 +#unassigned \x01\x0f + +END CHARMAP diff --git a/icu4c/source/test/testdata/testdata.mak b/icu4c/source/test/testdata/testdata.mak index 6d3786faef..d38a19ff99 100644 --- a/icu4c/source/test/testdata/testdata.mak +++ b/icu4c/source/test/testdata/testdata.mak @@ -1,5 +1,5 @@ #********************************************************************** -#* Copyright (C) 1999-2010, International Business Machines Corporation +#* Copyright (C) 1999-2012, International Business Machines Corporation #* and others. All Rights Reserved. #********************************************************************** # @@ -28,7 +28,7 @@ ALL : "$(TESTDATAOUT)\testdata.dat" TEST_RES_FILES = $(TEST_RES_SOURCE:.txt=.res) -"$(TESTDATAOUT)\testdata.dat" : $(TEST_RES_FILES) "$(TESTDATABLD)\casing.res" "$(TESTDATABLD)\conversion.res" "$(TESTDATABLD)\icuio.res" "$(TESTDATABLD)\mc.res" "$(TESTDATABLD)\structLocale.res" "$(TESTDATABLD)\root.res" "$(TESTDATABLD)\sh.res" "$(TESTDATABLD)\sh_YU.res" "$(TESTDATABLD)\te.res" "$(TESTDATABLD)\te_IN.res" "$(TESTDATABLD)\te_IN_REVISED.res" "$(TESTDATABLD)\testaliases.res" "$(TESTDATABLD)\testtypes.res" "$(TESTDATABLD)\testempty.res" "$(TESTDATABLD)\iscii.res" "$(TESTDATABLD)\idna_rules.res" "$(TESTDATABLD)\DataDrivenCollationTest.res" "$(TESTDATABLD)\test.icu" "$(TESTDATABLD)\testtable32.res" "$(TESTDATABLD)\test1.cnv" "$(TESTDATABLD)\test1bmp.cnv" "$(TESTDATABLD)\test3.cnv" "$(TESTDATABLD)\test4.cnv" "$(TESTDATABLD)\test4x.cnv" "$(TESTDATABLD)\test5.cnv" "$(TESTDATABLD)\ibm9027.cnv" "$(TESTDATABLD)\nfscsi.spp" "$(TESTDATABLD)\nfscss.spp" "$(TESTDATABLD)\nfscis.spp" "$(TESTDATABLD)\nfsmxs.spp" "$(TESTDATABLD)\nfsmxp.spp" "$(TESTDATABLD)\testnorm.nrm" +"$(TESTDATAOUT)\testdata.dat" : $(TEST_RES_FILES) "$(TESTDATABLD)\casing.res" "$(TESTDATABLD)\conversion.res" "$(TESTDATABLD)\icuio.res" "$(TESTDATABLD)\mc.res" "$(TESTDATABLD)\structLocale.res" "$(TESTDATABLD)\root.res" "$(TESTDATABLD)\sh.res" "$(TESTDATABLD)\sh_YU.res" "$(TESTDATABLD)\te.res" "$(TESTDATABLD)\te_IN.res" "$(TESTDATABLD)\te_IN_REVISED.res" "$(TESTDATABLD)\testaliases.res" "$(TESTDATABLD)\testtypes.res" "$(TESTDATABLD)\testempty.res" "$(TESTDATABLD)\iscii.res" "$(TESTDATABLD)\idna_rules.res" "$(TESTDATABLD)\DataDrivenCollationTest.res" "$(TESTDATABLD)\test.icu" "$(TESTDATABLD)\testtable32.res" "$(TESTDATABLD)\test1.cnv" "$(TESTDATABLD)\test1bmp.cnv" "$(TESTDATABLD)\test2.cnv" "$(TESTDATABLD)\test3.cnv" "$(TESTDATABLD)\test4.cnv" "$(TESTDATABLD)\test4x.cnv" "$(TESTDATABLD)\test5.cnv" "$(TESTDATABLD)\ibm9027.cnv" "$(TESTDATABLD)\nfscsi.spp" "$(TESTDATABLD)\nfscss.spp" "$(TESTDATABLD)\nfscis.spp" "$(TESTDATABLD)\nfsmxs.spp" "$(TESTDATABLD)\nfsmxp.spp" "$(TESTDATABLD)\testnorm.nrm" @echo Building test data @copy "$(TESTDATABLD)\te.res" "$(TESTDATAOUT)\$(TESTDT)\nam.typ" @copy "$(TESTDATA)\old_l_testtypes.res" "$(TESTDATABLD)" @@ -55,6 +55,7 @@ iscii.res test.icu test1.cnv test1bmp.cnv +test2.cnv test3.cnv test4.cnv test4x.cnv @@ -133,6 +134,10 @@ $(TEST_RES_FILES:.res =.res @echo Building $@ @"$(ICUTOOLS)\makeconv\$(CFG)\makeconv" --small -d"$(TESTDATABLD)" $** +"$(TESTDATABLD)\test2.cnv": "$(TESTDATA)\test2.ucm" + @echo Building $@ + @"$(ICUTOOLS)\makeconv\$(CFG)\makeconv" --small -d"$(TESTDATABLD)" $** + "$(TESTDATABLD)\test3.cnv": "$(TESTDATA)\test3.ucm" @echo Building $@ @"$(ICUTOOLS)\makeconv\$(CFG)\makeconv" --small -d"$(TESTDATABLD)" $**