2000-01-15 02:00:06 +00:00
/********************************************************************
2001-04-18 19:31:05 +00:00
* COPYRIGHT :
2001-03-21 19:46:49 +00:00
* Copyright ( c ) 1997 - 2001 , International Business Machines Corporation and
2000-01-15 02:00:06 +00:00
* others . All Rights Reserved .
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/********************************************************************************
1999-08-16 21:50:52 +00:00
*
* File CCONVTST . C
*
* Modification History :
2001-04-18 19:31:05 +00:00
* Name Description
1999-08-16 21:50:52 +00:00
* Steven R . Loomis 7 / 8 / 1999 Adding input buffer test
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
*/
# include <stdio.h>
2000-07-21 23:28:07 +00:00
# include "cstring.h"
1999-12-28 23:57:50 +00:00
# include "unicode/uloc.h"
# include "unicode/ucnv.h"
# include "unicode/ucnv_err.h"
1999-08-16 21:50:52 +00:00
# include "cintltst.h"
1999-12-28 23:57:50 +00:00
# include "unicode/utypes.h"
# include "unicode/ustring.h"
2001-11-05 23:17:51 +00:00
# include "unicode/ucol.h"
1999-08-16 21:50:52 +00:00
2000-06-22 01:18:30 +00:00
static void TestNextUChar ( UConverter * cnv , const char * source , const char * limit , const uint32_t results [ ] , const char * message ) ;
2000-06-22 23:46:02 +00:00
static void TestNextUCharError ( UConverter * cnv , const char * source , const char * limit , UErrorCode expected , const char * message ) ;
2001-11-13 04:28:44 +00:00
static void TestJitterbug981 ( void ) ;
static void TestJitterbug1293 ( void ) ;
2000-11-21 04:05:39 +00:00
static void TestNewConvertWithBufferSizes ( int32_t osize , int32_t isize ) ;
static void TestConverterTypesAndStarters ( void ) ;
static void TestAmbiguous ( void ) ;
2002-01-08 01:05:57 +00:00
static void TestSignatureDetection ( ) ;
2001-01-09 22:57:47 +00:00
static void TestUTF7 ( void ) ;
2000-11-21 04:05:39 +00:00
static void TestUTF8 ( void ) ;
static void TestUTF16BE ( void ) ;
static void TestUTF16LE ( void ) ;
2000-12-20 00:02:15 +00:00
static void TestUTF32BE ( void ) ;
static void TestUTF32LE ( void ) ;
2000-11-21 04:05:39 +00:00
static void TestLATIN1 ( void ) ;
static void TestSBCS ( void ) ;
static void TestDBCS ( void ) ;
static void TestMBCS ( void ) ;
static void TestISO_2022 ( void ) ;
static void TestISO_2022_JP ( void ) ;
static void TestISO_2022_JP_1 ( void ) ;
static void TestISO_2022_JP_2 ( void ) ;
static void TestISO_2022_KR ( void ) ;
2001-02-24 02:52:14 +00:00
static void TestISO_2022_KR_1 ( void ) ;
2000-11-21 04:05:39 +00:00
static void TestISO_2022_CN ( void ) ;
static void TestISO_2022_CN_EXT ( void ) ;
2001-02-23 04:40:39 +00:00
static void TestJIS ( void ) ;
2000-11-21 04:05:39 +00:00
static void TestHZ ( void ) ;
2001-03-02 23:55:49 +00:00
static void TestSCSU ( void ) ;
2000-11-21 04:05:39 +00:00
static void TestEBCDIC_STATEFUL ( void ) ;
static void TestGB18030 ( void ) ;
static void TestLMBCS ( void ) ;
static void TestJitterbug255 ( void ) ;
2001-03-16 23:03:31 +00:00
static void TestJitterbug792 ( void ) ;
2000-11-21 04:05:39 +00:00
static void TestEBCDICUS4XML ( void ) ;
2001-05-11 02:30:47 +00:00
static void TestJitterbug915 ( void ) ;
2001-07-14 02:29:21 +00:00
static void TestISCII ( void ) ;
static void TestConv ( const uint16_t in [ ] ,
int len ,
const char * conv ,
const char * lang ,
char byteArr [ ] ,
int byteArrLen ) ;
2001-05-31 23:30:09 +00:00
void addTestNewConvert ( TestNode * * root ) ;
1999-08-16 21:50:52 +00:00
# define NEW_MAX_BUFFER 999
2001-01-26 03:05:11 +00:00
static int32_t gInBufferSize = NEW_MAX_BUFFER ;
static int32_t gOutBufferSize = NEW_MAX_BUFFER ;
1999-08-16 21:50:52 +00:00
static char gNuConvTestName [ 1024 ] ;
# define nct_min(x,y) ((x<y) ? x : y)
2000-11-21 04:05:39 +00:00
static void printSeq ( const unsigned char * a , int len )
1999-08-16 21:50:52 +00:00
{
2000-02-05 00:01:54 +00:00
int i = 0 ;
2000-07-13 00:28:06 +00:00
log_verbose ( " { " ) ;
2000-11-21 04:05:39 +00:00
while ( i < len )
log_verbose ( " 0x%02x " , a [ i + + ] ) ;
2000-02-05 00:01:54 +00:00
log_verbose ( " } \n " ) ;
1999-08-16 21:50:52 +00:00
}
2000-11-21 04:05:39 +00:00
static void printUSeq ( const UChar * a , int len )
1999-08-16 21:50:52 +00:00
{
2000-02-05 00:01:54 +00:00
int i = 0 ;
2000-07-13 00:28:06 +00:00
log_verbose ( " {U+ " ) ;
2000-08-15 18:05:12 +00:00
while ( i < len ) log_verbose ( " 0x%04x " , a [ i + + ] ) ;
2000-02-05 00:01:54 +00:00
log_verbose ( " } \n " ) ;
1999-08-16 21:50:52 +00:00
}
2000-11-21 04:05:39 +00:00
static void printSeqErr ( const unsigned char * a , int len )
1999-08-16 21:50:52 +00:00
{
2000-02-05 00:01:54 +00:00
int i = 0 ;
2000-07-13 00:28:06 +00:00
fprintf ( stderr , " { " ) ;
2000-11-21 04:05:39 +00:00
while ( i < len )
fprintf ( stderr , " 0x%02x " , a [ i + + ] ) ;
2000-02-05 00:01:54 +00:00
fprintf ( stderr , " } \n " ) ;
1999-08-16 21:50:52 +00:00
}
2000-11-21 04:05:39 +00:00
2000-12-08 01:13:38 +00:00
static void printUSeqErr ( const UChar * a , int len )
1999-08-16 21:50:52 +00:00
{
2000-02-05 00:01:54 +00:00
int i = 0 ;
2000-07-13 00:28:06 +00:00
fprintf ( stderr , " {U+ " ) ;
2000-11-21 04:05:39 +00:00
while ( i < len )
fprintf ( stderr , " 0x%04x " , a [ i + + ] ) ;
2000-02-05 00:01:54 +00:00
fprintf ( stderr , " } \n " ) ;
1999-08-16 21:50:52 +00:00
}
2000-11-21 04:05:39 +00:00
2001-04-18 19:31:05 +00:00
static void
2000-06-22 01:18:30 +00:00
TestNextUChar ( UConverter * cnv , const char * source , const char * limit , const uint32_t results [ ] , const char * message )
{
const char * s0 ;
2000-06-29 01:48:34 +00:00
const char * s = ( char * ) source ;
2000-06-22 01:18:30 +00:00
const uint32_t * r = results ;
UErrorCode errorCode = U_ZERO_ERROR ;
uint32_t c ;
1999-08-16 21:50:52 +00:00
2000-06-22 01:18:30 +00:00
while ( s < limit ) {
s0 = s ;
c = ucnv_getNextUChar ( cnv , & s , limit , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
log_err ( " %s ucnv_getNextUChar() failed: %s \n " , message , u_errorName ( errorCode ) ) ;
break ;
} else if ( ( uint32_t ) ( s - s0 ) ! = * r | | c ! = ( UChar32 ) * ( r + 1 ) ) {
log_err ( " %s ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes. \n " ,
message , c , ( s - s0 ) , * ( r + 1 ) , * r ) ;
break ;
}
r + = 2 ;
}
}
2000-11-21 04:05:39 +00:00
2001-04-18 19:31:05 +00:00
static void
2000-06-22 23:46:02 +00:00
TestNextUCharError ( UConverter * cnv , const char * source , const char * limit , UErrorCode expected , const char * message )
{
2000-06-29 01:48:34 +00:00
const char * s = ( char * ) source ;
2000-06-22 23:46:02 +00:00
UErrorCode errorCode = U_ZERO_ERROR ;
uint32_t c ;
c = ucnv_getNextUChar ( cnv , & s , limit , & errorCode ) ;
if ( errorCode ! = expected ) {
log_err ( " FAIL: Expected:%s when %s-----Got:%s \n " , myErrorName ( expected ) , message , myErrorName ( errorCode ) ) ;
}
2000-07-13 00:28:06 +00:00
if ( c ! = 0xFFFD & & c ! = 0xffff ) {
log_err ( " FAIL: Expected return value of 0xfffd or 0xffff when %s-----Got 0x%lx \n " , message , c ) ;
2000-06-22 23:46:02 +00:00
}
2001-04-18 19:31:05 +00:00
}
2000-11-21 04:05:39 +00:00
static void TestInBufSizes ( void )
1999-08-16 21:50:52 +00:00
{
TestNewConvertWithBufferSizes ( NEW_MAX_BUFFER , 1 ) ;
2000-06-29 02:53:29 +00:00
# if 1
1999-08-16 21:50:52 +00:00
TestNewConvertWithBufferSizes ( NEW_MAX_BUFFER , 2 ) ;
TestNewConvertWithBufferSizes ( NEW_MAX_BUFFER , 3 ) ;
TestNewConvertWithBufferSizes ( NEW_MAX_BUFFER , 4 ) ;
TestNewConvertWithBufferSizes ( NEW_MAX_BUFFER , 5 ) ;
TestNewConvertWithBufferSizes ( NEW_MAX_BUFFER , 6 ) ;
TestNewConvertWithBufferSizes ( 1 , 1 ) ;
TestNewConvertWithBufferSizes ( 2 , 3 ) ;
TestNewConvertWithBufferSizes ( 3 , 2 ) ;
2000-06-29 02:53:29 +00:00
# endif
1999-08-16 21:50:52 +00:00
}
2000-11-21 04:05:39 +00:00
static void TestOutBufSizes ( void )
1999-08-16 21:50:52 +00:00
{
2000-06-29 02:53:29 +00:00
# if 1
1999-08-16 21:50:52 +00:00
TestNewConvertWithBufferSizes ( NEW_MAX_BUFFER , NEW_MAX_BUFFER ) ;
TestNewConvertWithBufferSizes ( 1 , NEW_MAX_BUFFER ) ;
TestNewConvertWithBufferSizes ( 2 , NEW_MAX_BUFFER ) ;
TestNewConvertWithBufferSizes ( 3 , NEW_MAX_BUFFER ) ;
TestNewConvertWithBufferSizes ( 4 , NEW_MAX_BUFFER ) ;
TestNewConvertWithBufferSizes ( 5 , NEW_MAX_BUFFER ) ;
2001-04-18 19:31:05 +00:00
2000-06-29 02:53:29 +00:00
# endif
1999-08-16 21:50:52 +00:00
}
void addTestNewConvert ( TestNode * * root )
{
addTest ( root , & TestInBufSizes , " tsconv/nucnvtst/TestInBufSizes " ) ;
addTest ( root , & TestOutBufSizes , " tsconv/nucnvtst/TestOutBufSizes " ) ;
addTest ( root , & TestConverterTypesAndStarters , " tsconv/nucnvtst/TestConverterTypesAndStarters " ) ;
2000-01-08 00:54:57 +00:00
addTest ( root , & TestAmbiguous , " tsconv/nucnvtst/TestAmbiguous " ) ;
2002-01-08 01:05:57 +00:00
addTest ( root , & TestSignatureDetection , " tsconv/nucnvtst/TestSignatureDetection " ) ;
2001-01-09 22:57:47 +00:00
addTest ( root , & TestUTF7 , " tsconv/nucnvtst/TestUTF7 " ) ;
2000-01-19 19:00:53 +00:00
addTest ( root , & TestUTF8 , " tsconv/nucnvtst/TestUTF8 " ) ;
2000-06-22 01:18:30 +00:00
addTest ( root , & TestUTF16BE , " tsconv/nucnvtst/TestUTF16BE " ) ;
addTest ( root , & TestUTF16LE , " tsconv/nucnvtst/TestUTF16LE " ) ;
2000-12-20 00:02:15 +00:00
addTest ( root , & TestUTF32BE , " tsconv/nucnvtst/TestUTF32BE " ) ;
addTest ( root , & TestUTF32LE , " tsconv/nucnvtst/TestUTF32LE " ) ;
2000-06-22 01:18:30 +00:00
addTest ( root , & TestLATIN1 , " tsconv/nucnvtst/TestLATIN1 " ) ;
addTest ( root , & TestSBCS , " tsconv/nucnvtst/TestSBCS " ) ;
addTest ( root , & TestDBCS , " tsconv/nucnvtst/TestDBCS " ) ;
addTest ( root , & TestMBCS , " tsconv/nucnvtst/TestMBCS " ) ;
addTest ( root , & TestISO_2022 , " tsconv/nucnvtst/TestISO_2022 " ) ;
2000-08-15 00:07:33 +00:00
addTest ( root , & TestISO_2022_JP , " tsconv/nucnvtst/TestISO_2022_JP " ) ;
2001-02-23 04:40:39 +00:00
addTest ( root , & TestJIS , " tsconv/nucnvtst/TestJIS " ) ;
2000-10-28 01:08:25 +00:00
addTest ( root , & TestISO_2022_JP_1 , " tsconv/nucnvtst/TestISO_2022_JP_1 " ) ;
addTest ( root , & TestISO_2022_JP_2 , " tsconv/nucnvtst/TestISO_2022_JP_2 " ) ;
2000-08-22 00:04:27 +00:00
addTest ( root , & TestISO_2022_KR , " tsconv/nucnvtst/TestISO_2022_KR " ) ;
2001-02-24 02:52:14 +00:00
addTest ( root , & TestISO_2022_KR_1 , " tsconv/nucnvtst/TestISO_2022_KR_1 " ) ;
2000-09-21 00:35:06 +00:00
addTest ( root , & TestISO_2022_CN , " tsconv/nucnvtst/TestISO_2022_CN " ) ;
2000-10-28 01:08:25 +00:00
addTest ( root , & TestISO_2022_CN_EXT , " tsconv/nucnvtst/TestISO_2022_CN_EXT " ) ;
2001-05-11 02:30:47 +00:00
addTest ( root , & TestJitterbug915 , " tsconv/nucnvtst/TestJitterbug915 " ) ;
2000-10-17 08:05:02 +00:00
addTest ( root , & TestHZ , " tsconv/nucnvtst/TestHZ " ) ;
2001-03-02 23:55:49 +00:00
addTest ( root , & TestSCSU , " tsconv/nucnvtst/TestSCSU " ) ;
2000-06-22 01:18:30 +00:00
addTest ( root , & TestEBCDIC_STATEFUL , " tsconv/nucnvtst/TestEBCDIC_STATEFUL " ) ;
2000-10-26 00:18:34 +00:00
addTest ( root , & TestGB18030 , " tsconv/nucnvtst/TestGB18030 " ) ;
2000-03-31 16:53:09 +00:00
addTest ( root , & TestLMBCS , " tsconv/nucnvtst/TestLMBCS " ) ;
2000-02-05 00:01:54 +00:00
addTest ( root , & TestJitterbug255 , " tsconv/nucnvtst/TestJitterbug255 " ) ;
2001-03-16 23:03:31 +00:00
addTest ( root , & TestJitterbug792 , " tsconv/nucnvtst/TestJitterbug792 " ) ;
2000-04-18 21:57:47 +00:00
addTest ( root , & TestEBCDICUS4XML , " tsconv/nucnvtst/TestEBCDICUS4XML " ) ;
2001-07-14 02:29:21 +00:00
addTest ( root , & TestISCII , " tsconv/nucnvtst/TestISCII " ) ;
2001-11-05 23:17:51 +00:00
addTest ( root , & TestJitterbug981 , " tsconv/nucnvtst/TestJitterbug981 " ) ;
2001-11-06 00:45:10 +00:00
addTest ( root , & TestJitterbug1293 , " tsconv/nucnvtst/TestJitterbug1293 " ) ;
2001-07-14 02:29:21 +00:00
1999-08-16 21:50:52 +00:00
}
2001-04-18 19:31:05 +00:00
/* Note that this test already makes use of statics, so it's not really
multithread safe .
1999-08-16 21:50:52 +00:00
This convenience function lets us make the error messages actually useful .
*/
2000-11-21 04:05:39 +00:00
static void setNuConvTestName ( const char * codepage , const char * direction )
1999-08-16 21:50:52 +00:00
{
sprintf ( gNuConvTestName , " [Testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d] " ,
2000-02-05 00:01:54 +00:00
codepage ,
direction ,
gInBufferSize ,
gOutBufferSize ) ;
1999-08-16 21:50:52 +00:00
}
2001-01-26 03:05:11 +00:00
/* Note: This function uses global variables and it will not do offset
checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */
2001-04-18 19:31:05 +00:00
static UBool testConvertFromU ( const UChar * source , int sourceLen , const uint8_t * expect , int expectLen ,
2001-02-23 04:40:39 +00:00
const char * codepage , const int32_t * expectOffsets , UBool useFallback )
1999-08-16 21:50:52 +00:00
{
2000-02-05 00:01:54 +00:00
UErrorCode status = U_ZERO_ERROR ;
UConverter * conv = 0 ;
2000-08-11 03:35:25 +00:00
uint8_t junkout [ NEW_MAX_BUFFER ] ; /* FIX */
2000-02-05 00:01:54 +00:00
int32_t junokout [ NEW_MAX_BUFFER ] ; /* FIX */
2000-08-11 03:35:25 +00:00
uint8_t * p ;
2000-02-05 00:01:54 +00:00
const UChar * src ;
2000-08-11 03:35:25 +00:00
uint8_t * end ;
uint8_t * targ ;
2000-02-05 00:01:54 +00:00
int32_t * offs ;
int i ;
int32_t realBufferSize ;
2000-08-11 03:35:25 +00:00
uint8_t * realBufferEnd ;
2000-02-05 00:01:54 +00:00
const UChar * realSourceEnd ;
const UChar * sourceLimit ;
2000-05-18 22:08:39 +00:00
UBool checkOffsets = TRUE ;
UBool doFlush ;
2000-02-05 00:01:54 +00:00
for ( i = 0 ; i < NEW_MAX_BUFFER ; i + + )
2000-08-11 03:35:25 +00:00
junkout [ i ] = 0xF0 ;
2000-02-05 00:01:54 +00:00
for ( i = 0 ; i < NEW_MAX_BUFFER ; i + + )
junokout [ i ] = 0xFF ;
setNuConvTestName ( codepage , " FROM " ) ;
log_verbose ( " \n ========= %s \n " , gNuConvTestName ) ;
conv = ucnv_open ( codepage , & status ) ;
if ( U_FAILURE ( status ) )
{
2001-04-18 19:31:05 +00:00
log_err ( " Couldn't open converter %s \n " , codepage ) ;
2000-02-05 00:01:54 +00:00
return FALSE ;
}
2001-02-23 04:40:39 +00:00
if ( useFallback ) {
ucnv_setFallback ( conv , useFallback ) ;
}
2000-02-05 00:01:54 +00:00
log_verbose ( " Converter opened.. \n " ) ;
src = source ;
targ = junkout ;
offs = junokout ;
realBufferSize = ( sizeof ( junkout ) / sizeof ( junkout [ 0 ] ) ) ;
realBufferEnd = junkout + realBufferSize ;
realSourceEnd = source + sourceLen ;
2001-01-26 03:05:11 +00:00
if ( gOutBufferSize ! = realBufferSize | | gInBufferSize ! = NEW_MAX_BUFFER )
2000-08-11 16:33:09 +00:00
checkOffsets = FALSE ;
2001-04-18 19:31:05 +00:00
2000-02-05 00:01:54 +00:00
do
2000-08-11 16:33:09 +00:00
{
2000-02-05 00:01:54 +00:00
end = nct_min ( targ + gOutBufferSize , realBufferEnd ) ;
sourceLimit = nct_min ( src + gInBufferSize , realSourceEnd ) ;
1999-08-16 21:50:52 +00:00
2000-08-11 03:35:25 +00:00
doFlush = ( UBool ) ( sourceLimit = = realSourceEnd ) ;
1999-08-16 21:50:52 +00:00
2000-02-05 00:01:54 +00:00
if ( targ = = realBufferEnd )
{
log_err ( " Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s " , targ , gNuConvTestName ) ;
return FALSE ;
}
log_verbose ( " calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s \n " , src , sourceLimit , targ , end , doFlush ? " TRUE " : " FALSE " ) ;
2001-04-18 19:31:05 +00:00
1999-08-16 21:50:52 +00:00
2000-02-05 00:01:54 +00:00
status = U_ZERO_ERROR ;
2001-04-18 19:31:05 +00:00
2000-02-05 00:01:54 +00:00
ucnv_fromUnicode ( conv ,
2000-08-11 03:35:25 +00:00
( char * * ) & targ ,
2000-08-16 22:30:12 +00:00
( const char * ) end ,
2000-02-05 00:01:54 +00:00
& src ,
sourceLimit ,
checkOffsets ? offs : NULL ,
doFlush , /* flush if we're at the end of the input data */
& status ) ;
2000-08-11 19:51:13 +00:00
} while ( ( status = = U_BUFFER_OVERFLOW_ERROR ) | | ( U_SUCCESS ( status ) & & sourceLimit < realSourceEnd ) ) ;
2000-02-05 00:01:54 +00:00
if ( U_FAILURE ( status ) )
2000-08-11 16:33:09 +00:00
{
2000-07-19 20:14:27 +00:00
log_err ( " Problem doing fromUnicode to %s, errcode %s %s \n " , codepage , myErrorName ( status ) , gNuConvTestName ) ;
2000-02-05 00:01:54 +00:00
return FALSE ;
2000-08-11 16:33:09 +00:00
}
2000-02-05 00:01:54 +00:00
log_verbose ( " \n Conversion done [%d uchars in -> %d chars out]. \n Result : " ,
sourceLen , targ - junkout ) ;
if ( VERBOSITY )
{
2000-08-11 16:33:09 +00:00
char junk [ 9999 ] ;
char offset_str [ 9999 ] ;
2000-12-08 01:13:38 +00:00
uint8_t * ptr ;
2000-08-11 03:35:25 +00:00
2000-02-05 00:01:54 +00:00
junk [ 0 ] = 0 ;
offset_str [ 0 ] = 0 ;
2000-12-08 01:13:38 +00:00
for ( ptr = junkout ; ptr < targ ; ptr + + )
2000-02-05 00:01:54 +00:00
{
2000-12-08 01:13:38 +00:00
sprintf ( junk + strlen ( junk ) , " 0x%02x, " , ( int ) ( 0xFF & * ptr ) ) ;
sprintf ( offset_str + strlen ( offset_str ) , " 0x%02x, " , ( int ) ( 0xFF & junokout [ ptr - junkout ] ) ) ;
2000-02-05 00:01:54 +00:00
}
2000-08-11 03:35:25 +00:00
2000-08-11 16:33:09 +00:00
log_verbose ( junk ) ;
2000-08-11 03:35:25 +00:00
printSeq ( ( const uint8_t * ) expect , expectLen ) ;
2000-02-05 00:01:54 +00:00
if ( checkOffsets )
2000-08-11 16:33:09 +00:00
{
2000-02-05 00:01:54 +00:00
log_verbose ( " \n Offsets: " ) ;
2000-08-11 16:33:09 +00:00
log_verbose ( offset_str ) ;
}
2000-02-05 00:01:54 +00:00
log_verbose ( " \n " ) ;
}
ucnv_close ( conv ) ;
if ( expectLen ! = targ - junkout )
{
log_err ( " Expected %d chars out, got %d %s \n " , expectLen , targ - junkout , gNuConvTestName ) ;
2000-06-22 01:18:30 +00:00
log_verbose ( " Expected %d chars out, got %d %s \n " , expectLen , targ - junkout , gNuConvTestName ) ;
2000-07-19 20:14:27 +00:00
printf ( " \n Got: " ) ;
2000-08-14 23:35:57 +00:00
printSeqErr ( ( const unsigned char * ) junkout , targ - junkout ) ;
2000-07-19 20:14:27 +00:00
printf ( " \n Expected: " ) ;
2000-08-14 23:35:57 +00:00
printSeqErr ( ( const unsigned char * ) expect , expectLen ) ;
2000-02-05 00:01:54 +00:00
return FALSE ;
}
if ( checkOffsets & & ( expectOffsets ! = 0 ) )
{
log_verbose ( " comparing %d offsets.. \n " , targ - junkout ) ;
2000-06-22 01:18:30 +00:00
if ( memcmp ( junokout , expectOffsets , ( targ - junkout ) * sizeof ( int32_t ) ) ) {
2001-01-09 03:33:07 +00:00
log_err ( " did not get the expected offsets. %s \n " , gNuConvTestName ) ;
2000-08-14 23:35:57 +00:00
printSeqErr ( ( const unsigned char * ) junkout , targ - junkout ) ;
2001-01-09 03:33:07 +00:00
log_err ( " \n " ) ;
log_err ( " Got : " ) ;
for ( p = junkout ; p < targ ; p + + ) {
log_err ( " %d, " , junokout [ p - junkout ] ) ;
}
log_err ( " \n " ) ;
log_err ( " Expected: " ) ;
for ( i = 0 ; i < ( targ - junkout ) ; i + + ) {
2000-08-11 03:35:25 +00:00
log_err ( " %d, " , expectOffsets [ i ] ) ;
2001-01-09 03:33:07 +00:00
}
log_err ( " \n " ) ;
2000-06-22 01:18:30 +00:00
}
2000-02-05 00:01:54 +00:00
}
log_verbose ( " comparing.. \n " ) ;
if ( ! memcmp ( junkout , expect , expectLen ) )
{
log_verbose ( " Matches! \n " ) ;
return TRUE ;
}
else
2000-08-11 03:35:25 +00:00
{
2000-02-05 00:01:54 +00:00
log_err ( " String does not match. %s \n " , gNuConvTestName ) ;
2000-07-13 00:28:06 +00:00
printUSeqErr ( source , sourceLen ) ;
2000-07-19 20:14:27 +00:00
printf ( " \n Got: " ) ;
2000-06-28 22:26:59 +00:00
printSeqErr ( ( const unsigned char * ) junkout , expectLen ) ;
2000-07-19 20:14:27 +00:00
printf ( " \n Expected: " ) ;
2000-06-28 22:26:59 +00:00
printSeqErr ( ( const unsigned char * ) expect , expectLen ) ;
2001-04-18 19:31:05 +00:00
2000-02-05 00:01:54 +00:00
return FALSE ;
}
1999-08-16 21:50:52 +00:00
}
2001-01-26 03:05:11 +00:00
/* Note: This function uses global variables and it will not do offset
checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */
2001-04-18 19:31:05 +00:00
static UBool testConvertToU ( const uint8_t * source , int sourcelen , const UChar * expect , int expectlen ,
2001-02-23 04:40:39 +00:00
const char * codepage , const int32_t * expectOffsets , UBool useFallback )
1999-08-16 21:50:52 +00:00
{
2000-02-05 00:01:54 +00:00
UErrorCode status = U_ZERO_ERROR ;
UConverter * conv = 0 ;
UChar junkout [ NEW_MAX_BUFFER ] ; /* FIX */
int32_t junokout [ NEW_MAX_BUFFER ] ; /* FIX */
2000-08-11 03:35:25 +00:00
const uint8_t * src ;
const uint8_t * realSourceEnd ;
const uint8_t * srcLimit ;
2000-06-22 01:18:30 +00:00
UChar * p ;
2000-02-05 00:01:54 +00:00
UChar * targ ;
UChar * end ;
int32_t * offs ;
int i ;
2000-05-18 22:08:39 +00:00
UBool checkOffsets = TRUE ;
2001-04-18 19:31:05 +00:00
2000-02-05 00:01:54 +00:00
int32_t realBufferSize ;
UChar * realBufferEnd ;
2001-04-18 19:31:05 +00:00
2000-02-05 00:01:54 +00:00
for ( i = 0 ; i < NEW_MAX_BUFFER ; i + + )
junkout [ i ] = 0xFFFE ;
for ( i = 0 ; i < NEW_MAX_BUFFER ; i + + )
junokout [ i ] = - 1 ;
setNuConvTestName ( codepage , " TO " ) ;
log_verbose ( " \n ========= %s \n " , gNuConvTestName ) ;
conv = ucnv_open ( codepage , & status ) ;
if ( U_FAILURE ( status ) )
{
log_err ( " Couldn't open converter %s \n " , gNuConvTestName ) ;
return FALSE ;
}
2001-02-23 04:40:39 +00:00
if ( useFallback ) {
ucnv_setFallback ( conv , useFallback ) ;
}
2000-02-05 00:01:54 +00:00
log_verbose ( " Converter opened.. \n " ) ;
src = source ;
targ = junkout ;
offs = junokout ;
2001-04-18 19:31:05 +00:00
2000-02-05 00:01:54 +00:00
realBufferSize = ( sizeof ( junkout ) / sizeof ( junkout [ 0 ] ) ) ;
realBufferEnd = junkout + realBufferSize ;
realSourceEnd = src + sourcelen ;
2001-01-26 03:05:11 +00:00
if ( gOutBufferSize ! = realBufferSize | | gInBufferSize ! = NEW_MAX_BUFFER )
checkOffsets = FALSE ;
2000-02-05 00:01:54 +00:00
do
2001-01-26 03:05:11 +00:00
{
2000-02-05 00:01:54 +00:00
end = nct_min ( targ + gOutBufferSize , realBufferEnd ) ;
srcLimit = nct_min ( realSourceEnd , src + gInBufferSize ) ;
if ( targ = = realBufferEnd )
2001-01-26 03:05:11 +00:00
{
log_err ( " Error, the end would overflow the real output buffer while about to call toUnicode! tarjet=%08lx %s " , targ , gNuConvTestName ) ;
return FALSE ;
}
2000-02-05 00:01:54 +00:00
log_verbose ( " calling toUnicode @ %08lx to %08lx \n " , targ , end ) ;
/* oldTarg = targ; */
status = U_ZERO_ERROR ;
ucnv_toUnicode ( conv ,
& targ ,
end ,
2000-08-11 03:35:25 +00:00
( const char * * ) & src ,
( const char * ) srcLimit ,
2000-02-05 00:01:54 +00:00
checkOffsets ? offs : NULL ,
2000-05-18 22:08:39 +00:00
( UBool ) ( srcLimit = = realSourceEnd ) , /* flush if we're at the end of hte source data */
2000-02-05 00:01:54 +00:00
& status ) ;
/* offs += (targ-oldTarg); */
2000-08-11 19:51:13 +00:00
} while ( ( status = = U_BUFFER_OVERFLOW_ERROR ) | | ( U_SUCCESS ( status ) & & ( srcLimit < realSourceEnd ) ) ) ; /* while we just need another buffer */
2000-02-05 00:01:54 +00:00
if ( U_FAILURE ( status ) )
{
2000-07-19 20:14:27 +00:00
log_err ( " Problem doing %s toUnicode, errcode %s %s \n " , codepage , myErrorName ( status ) , gNuConvTestName ) ;
2000-02-05 00:01:54 +00:00
return FALSE ;
}
log_verbose ( " \n Conversion done. %d bytes -> %d chars. \n Result : " ,
sourcelen , targ - junkout ) ;
if ( VERBOSITY )
{
char junk [ 9999 ] ;
char offset_str [ 9999 ] ;
2000-12-08 01:13:38 +00:00
UChar * ptr ;
2001-04-18 19:31:05 +00:00
2000-02-05 00:01:54 +00:00
junk [ 0 ] = 0 ;
offset_str [ 0 ] = 0 ;
2000-12-08 01:13:38 +00:00
for ( ptr = junkout ; ptr < targ ; ptr + + )
2000-02-05 00:01:54 +00:00
{
2000-12-08 01:13:38 +00:00
sprintf ( junk + strlen ( junk ) , " 0x%04x, " , ( 0xFFFF ) & ( unsigned int ) * ptr ) ;
sprintf ( offset_str + strlen ( offset_str ) , " 0x%04x, " , ( 0xFFFF ) & ( unsigned int ) junokout [ ptr - junkout ] ) ;
2000-02-05 00:01:54 +00:00
}
2001-04-18 19:31:05 +00:00
2000-02-05 00:01:54 +00:00
log_verbose ( junk ) ;
2000-08-15 18:05:12 +00:00
printUSeq ( expect , expectlen ) ;
2000-02-05 00:01:54 +00:00
if ( checkOffsets )
{
log_verbose ( " \n Offsets: " ) ;
log_verbose ( offset_str ) ;
}
log_verbose ( " \n " ) ;
}
ucnv_close ( conv ) ;
log_verbose ( " comparing %d uchars (%d bytes).. \n " , expectlen , expectlen * 2 ) ;
if ( checkOffsets & & ( expectOffsets ! = 0 ) )
{
2000-06-22 01:18:30 +00:00
if ( memcmp ( junokout , expectOffsets , ( targ - junkout ) * sizeof ( int32_t ) ) ) {
2001-01-09 03:33:07 +00:00
log_err ( " did not get the expected offsets. %s \n " , gNuConvTestName ) ;
log_err ( " Got: " ) ;
for ( p = junkout ; p < targ ; p + + ) {
log_err ( " %d, " , junokout [ p - junkout ] ) ;
}
log_err ( " \n " ) ;
log_err ( " Expected: " ) ;
for ( i = 0 ; i < ( targ - junkout ) ; i + + ) {
log_err ( " %d, " , expectOffsets [ i ] ) ;
}
log_err ( " \n " ) ;
log_err ( " output: " ) ;
for ( i = 0 ; i < ( targ - junkout ) ; i + + ) {
log_err ( " %X, " , junkout [ i ] ) ;
}
log_err ( " \n " ) ;
log_err ( " input: " ) ;
for ( i = 0 ; i < ( src - source ) ; i + + ) {
log_err ( " %X, " , ( unsigned char ) source [ i ] ) ;
}
log_err ( " \n " ) ;
2000-08-11 03:35:25 +00:00
}
2000-02-05 00:01:54 +00:00
}
if ( ! memcmp ( junkout , expect , expectlen * 2 ) )
{
log_verbose ( " Matches! \n " ) ;
return TRUE ;
}
else
2001-04-18 19:31:05 +00:00
{
2000-02-05 00:01:54 +00:00
log_err ( " String does not match. %s \n " , gNuConvTestName ) ;
2000-06-22 01:18:30 +00:00
log_verbose ( " String does not match. %s \n " , gNuConvTestName ) ;
2000-07-19 20:14:27 +00:00
printf ( " \n Got: " ) ;
2000-08-15 18:05:12 +00:00
printUSeqErr ( junkout , expectlen ) ;
2000-07-19 20:14:27 +00:00
printf ( " \n Expected: " ) ;
2001-04-18 19:31:05 +00:00
printUSeqErr ( expect , expectlen ) ;
2000-02-05 00:01:54 +00:00
return FALSE ;
}
1999-08-16 21:50:52 +00:00
}
2001-04-18 19:31:05 +00:00
static void TestNewConvertWithBufferSizes ( int32_t outsize , int32_t insize )
1999-08-16 21:50:52 +00:00
{
/** test chars #1 */
2000-02-05 00:01:54 +00:00
/* 1 2 3 1Han 2Han 3Han . */
2001-04-18 19:31:05 +00:00
UChar sampleText [ ] =
2000-06-22 01:18:30 +00:00
{ 0x0031 , 0x0032 , 0x0033 , 0x0000 , 0x4e00 , 0x4e8c , 0x4e09 , 0x002E } ;
1999-08-16 21:50:52 +00:00
2001-04-18 19:31:05 +00:00
const uint8_t expectedUTF8 [ ] =
2000-08-11 03:35:25 +00:00
{ 0x31 , 0x32 , 0x33 , 0x00 , 0xe4 , 0xb8 , 0x80 , 0xe4 , 0xba , 0x8c , 0xe4 , 0xb8 , 0x89 , 0x2E } ;
2001-04-18 19:31:05 +00:00
int32_t toUTF8Offs [ ] =
2000-08-11 03:35:25 +00:00
{ 0x00 , 0x01 , 0x02 , 0x03 , 0x04 , 0x04 , 0x04 , 0x05 , 0x05 , 0x05 , 0x06 , 0x06 , 0x06 , 0x07 } ;
2001-04-18 19:31:05 +00:00
int32_t fmUTF8Offs [ ] =
2000-06-22 01:18:30 +00:00
{ 0x0000 , 0x0001 , 0x0002 , 0x0003 , 0x0004 , 0x0007 , 0x000a , 0x000d } ;
2001-04-18 19:31:05 +00:00
2000-02-05 00:01:54 +00:00
/* Same as UTF8, but with ^[%B preceeding */
2001-04-18 19:31:05 +00:00
const uint8_t expectedISO2022 [ ] =
2000-08-11 03:35:25 +00:00
{ 0x1b , 0x25 , 0x42 , 0x31 , 0x32 , 0x33 , 0x00 , 0xe4 , 0xb8 , 0x80 , 0xe4 , 0xba , 0x8c , 0xe4 , 0xb8 , 0x89 , 0x2E } ;
2001-04-18 19:31:05 +00:00
int32_t toISO2022Offs [ ] =
{ - 1 , - 1 , - 1 , 0x00 , 0x01 , 0x02 , 0x03 , 0x04 , 0x04 ,
2000-08-11 03:35:25 +00:00
0x04 , 0x05 , 0x05 , 0x05 , 0x06 , 0x06 , 0x06 , 0x07 } ; /* right? */
2001-04-18 19:31:05 +00:00
int32_t fmISO2022Offs [ ] =
2000-06-22 01:18:30 +00:00
{ 0x0003 , 0x0004 , 0x0005 , 0x0006 , 0x0007 , 0x000a , 0x000d , 0x0010 } ; /* is this right? */
2001-04-18 19:31:05 +00:00
2000-06-22 01:18:30 +00:00
/* 1 2 3 0, <SO> h1 h2 h3 <SI> . EBCDIC_STATEFUL */
2001-04-18 19:31:05 +00:00
const uint8_t expectedIBM930 [ ] =
2000-08-11 03:35:25 +00:00
{ 0xF1 , 0xF2 , 0xF3 , 0x00 , 0x0E , 0x45 , 0x41 , 0x45 , 0x42 , 0x45 , 0x43 , 0x0F , 0x4B } ;
2001-04-18 19:31:05 +00:00
int32_t toIBM930Offs [ ] =
2000-08-11 03:35:25 +00:00
{ 0x00 , 0x01 , 0x02 , 0x03 , 0x04 , 0x04 , 0x04 , 0x05 , 0x05 , 0x06 , 0x06 , 0x07 , 0x07 , } ;
2001-04-18 19:31:05 +00:00
int32_t fmIBM930Offs [ ] =
2000-06-22 01:18:30 +00:00
{ 0x0000 , 0x0001 , 0x0002 , 0x0003 , 0x0005 , 0x0007 , 0x0009 , 0x000c } ;
2001-04-18 19:31:05 +00:00
2000-06-22 01:18:30 +00:00
/* 1 2 3 0 h1 h2 h3 . MBCS*/
2001-04-18 19:31:05 +00:00
const uint8_t expectedIBM943 [ ] =
2000-08-11 03:35:25 +00:00
{ 0x31 , 0x32 , 0x33 , 0x00 , 0x88 , 0xea , 0x93 , 0xf1 , 0x8e , 0x4f , 0x2e } ;
2001-04-18 19:31:05 +00:00
int32_t toIBM943Offs [ ] =
2000-08-11 03:35:25 +00:00
{ 0x00 , 0x01 , 0x02 , 0x03 , 0x04 , 0x04 , 0x05 , 0x05 , 0x06 , 0x06 , 0x07 } ;
2001-04-18 19:31:05 +00:00
int32_t fmIBM943Offs [ ] =
2000-06-22 01:18:30 +00:00
{ 0x0000 , 0x0001 , 0x0002 , 0x0003 , 0x0004 , 0x0006 , 0x0008 , 0x000a } ;
2001-04-18 19:31:05 +00:00
2000-06-22 01:18:30 +00:00
/* 1 2 3 0 h1 h2 h3 . DBCS*/
2001-04-18 19:31:05 +00:00
const uint8_t expectedIBM835 [ ] =
2000-08-11 03:35:25 +00:00
{ 0xfe , 0xfe , 0xfe , 0xfe , 0xfe , 0xfe , 0xfe , 0xfe , 0x4c , 0x41 , 0x4c , 0x48 , 0x4c , 0x55 , 0xfe , 0xfe } ;
2001-04-18 19:31:05 +00:00
int32_t toIBM835Offs [ ] =
2000-08-11 03:35:25 +00:00
{ 0x00 , 0x00 , 0x01 , 0x01 , 0x02 , 0x02 , 0x03 , 0x03 , 0x04 , 0x04 , 0x05 , 0x05 , 0x06 , 0x06 , 0x07 , 0x07 } ;
2001-04-18 19:31:05 +00:00
2000-06-22 01:18:30 +00:00
/* 1 2 3 0 <?> <?> <?> . SBCS*/
2001-04-18 19:31:05 +00:00
const uint8_t expectedIBM920 [ ] =
2000-08-11 03:35:25 +00:00
{ 0x31 , 0x32 , 0x33 , 0x00 , 0x1a , 0x1a , 0x1a , 0x2e } ;
2001-04-18 19:31:05 +00:00
int32_t toIBM920Offs [ ] =
2000-08-11 03:35:25 +00:00
{ 0x00 , 0x01 , 0x02 , 0x03 , 0x04 , 0x05 , 0x06 , 0x07 } ;
2001-04-18 19:31:05 +00:00
2000-06-22 01:18:30 +00:00
/* 1 2 3 0 <?> <?> <?> . SBCS*/
2001-04-18 19:31:05 +00:00
const uint8_t expectedISO88593 [ ] =
2000-08-11 03:35:25 +00:00
{ 0x31 , 0x32 , 0x33 , 0x00 , 0x1a , 0x1a , 0x1a , 0x2E } ;
2001-04-18 19:31:05 +00:00
int32_t toISO88593Offs [ ] =
2000-08-11 03:35:25 +00:00
{ 0x00 , 0x01 , 0x02 , 0x03 , 0x04 , 0x05 , 0x06 , 0x07 } ;
1999-08-16 21:50:52 +00:00
2000-06-22 01:18:30 +00:00
/* 1 2 3 0 <?> <?> <?> . LATIN_1*/
2001-04-18 19:31:05 +00:00
const uint8_t expectedLATIN1 [ ] =
2000-08-11 03:35:25 +00:00
{ 0x31 , 0x32 , 0x33 , 0x00 , 0x1a , 0x1a , 0x1a , 0x2E } ;
2001-04-18 19:31:05 +00:00
int32_t toLATIN1Offs [ ] =
2000-08-11 03:35:25 +00:00
{ 0x00 , 0x01 , 0x02 , 0x03 , 0x04 , 0x05 , 0x06 , 0x07 } ;
2001-04-18 19:31:05 +00:00
2000-02-05 00:01:54 +00:00
/* etc */
2001-04-18 19:31:05 +00:00
const uint8_t expectedUTF16BE [ ] =
2000-08-11 03:35:25 +00:00
{ 0x00 , 0x31 , 0x00 , 0x32 , 0x00 , 0x33 , 0x00 , 0x00 , 0x4e , 0x00 , 0x4e , 0x8c , 0x4e , 0x09 , 0x00 , 0x2e } ;
2001-04-18 19:31:05 +00:00
int32_t toUTF16BEOffs [ ] =
2000-08-11 03:35:25 +00:00
{ 0x00 , 0x00 , 0x01 , 0x01 , 0x02 , 0x02 , 0x03 , 0x03 , 0x04 , 0x04 , 0x05 , 0x05 , 0x06 , 0x06 , 0x07 , 0x07 } ;
2001-04-18 19:31:05 +00:00
int32_t fmUTF16BEOffs [ ] =
{ 0x0000 , 0x0002 , 0x0004 , 0x0006 , 0x0008 , 0x000a , 0x000c , 0x000e } ;
2000-11-16 17:20:03 +00:00
2001-04-18 19:31:05 +00:00
const uint8_t expectedUTF16LE [ ] =
2000-11-16 17:20:03 +00:00
{ 0x31 , 0x00 , 0x32 , 0x00 , 0x33 , 0x00 , 0x00 , 0x00 , 0x00 , 0x4e , 0x8c , 0x4e , 0x09 , 0x4e , 0x2e , 0x00 } ;
2001-04-18 19:31:05 +00:00
int32_t toUTF16LEOffs [ ] =
2000-11-16 17:20:03 +00:00
{ 0x00 , 0x00 , 0x01 , 0x01 , 0x02 , 0x02 , 0x03 , 0x03 , 0x04 , 0x04 , 0x05 , 0x05 , 0x06 , 0x06 , 0x07 , 0x07 } ;
2001-04-18 19:31:05 +00:00
int32_t fmUTF16LEOffs [ ] =
{ 0x0000 , 0x0002 , 0x0004 , 0x0006 , 0x0008 , 0x000a , 0x000c , 0x000e } ;
const uint8_t expectedUTF32BE [ ] =
2000-11-16 17:20:03 +00:00
{ 0x00 , 0x00 , 0x00 , 0x31 ,
0x00 , 0x00 , 0x00 , 0x32 ,
0x00 , 0x00 , 0x00 , 0x33 ,
0x00 , 0x00 , 0x00 , 0x00 ,
0x00 , 0x00 , 0x4e , 0x00 ,
0x00 , 0x00 , 0x4e , 0x8c ,
0x00 , 0x00 , 0x4e , 0x09 ,
0x00 , 0x00 , 0x00 , 0x2e } ;
2001-04-18 19:31:05 +00:00
int32_t toUTF32BEOffs [ ] =
2000-11-16 17:20:03 +00:00
{ 0x00 , 0x00 , 0x00 , 0x00 ,
0x01 , 0x01 , 0x01 , 0x01 ,
0x02 , 0x02 , 0x02 , 0x02 ,
0x03 , 0x03 , 0x03 , 0x03 ,
0x04 , 0x04 , 0x04 , 0x04 ,
0x05 , 0x05 , 0x05 , 0x05 ,
0x06 , 0x06 , 0x06 , 0x06 ,
0x07 , 0x07 , 0x07 , 0x07 ,
0x08 , 0x08 , 0x08 , 0x08 } ;
2001-04-18 19:31:05 +00:00
int32_t fmUTF32BEOffs [ ] =
{ 0x0000 , 0x0004 , 0x0008 , 0x000c , 0x0010 , 0x0014 , 0x0018 , 0x001c } ;
2000-11-16 17:20:03 +00:00
2001-04-18 19:31:05 +00:00
const uint8_t expectedUTF32LE [ ] =
2000-11-16 17:20:03 +00:00
{ 0x31 , 0x00 , 0x00 , 0x00 ,
0x32 , 0x00 , 0x00 , 0x00 ,
0x33 , 0x00 , 0x00 , 0x00 ,
0x00 , 0x00 , 0x00 , 0x00 ,
0x00 , 0x4e , 0x00 , 0x00 ,
0x8c , 0x4e , 0x00 , 0x00 ,
0x09 , 0x4e , 0x00 , 0x00 ,
0x2e , 0x00 , 0x00 , 0x00 } ;
2001-04-18 19:31:05 +00:00
int32_t toUTF32LEOffs [ ] =
2000-11-16 17:20:03 +00:00
{ 0x00 , 0x00 , 0x00 , 0x00 ,
0x01 , 0x01 , 0x01 , 0x01 ,
0x02 , 0x02 , 0x02 , 0x02 ,
0x03 , 0x03 , 0x03 , 0x03 ,
0x04 , 0x04 , 0x04 , 0x04 ,
0x05 , 0x05 , 0x05 , 0x05 ,
0x06 , 0x06 , 0x06 , 0x06 ,
0x07 , 0x07 , 0x07 , 0x07 ,
0x08 , 0x08 , 0x08 , 0x08 } ;
2001-04-18 19:31:05 +00:00
int32_t fmUTF32LEOffs [ ] =
{ 0x0000 , 0x0004 , 0x0008 , 0x000c , 0x0010 , 0x0014 , 0x0018 , 0x001c } ;
2000-11-16 17:20:03 +00:00
1999-08-16 21:50:52 +00:00
2000-06-28 17:01:52 +00:00
/** Test chars #2 **/
1999-08-16 21:50:52 +00:00
2000-02-05 00:01:54 +00:00
/* Sahha [health], slashed h's */
const UChar malteseUChars [ ] = { 0x0053 , 0x0061 , 0x0127 , 0x0127 , 0x0061 } ;
2000-08-11 03:35:25 +00:00
const uint8_t expectedMaltese913 [ ] = { 0x53 , 0x61 , 0xB1 , 0xB1 , 0x61 } ;
2000-06-22 01:18:30 +00:00
/* LMBCS */
const UChar LMBCSUChars [ ] = { 0x0027 , 0x010A , 0x0000 , 0x0127 , 0x2666 } ;
2000-08-11 03:35:25 +00:00
const uint8_t expectedLMBCS [ ] = { 0x27 , 0x06 , 0x04 , 0x00 , 0x01 , 0x73 , 0x01 , 0x04 } ;
int32_t toLMBCSOffs [ ] = { 0x00 , 0x01 , 0x01 , 0x02 , 0x03 , 0x03 , 0x04 , 0x04 } ;
2000-06-22 01:18:30 +00:00
int32_t fmLMBCSOffs [ ] = { 0x0000 , 0x0001 , 0x0003 , 0x0004 , 0x0006 } ;
2000-02-05 00:01:54 +00:00
/*********************************** START OF CODE finally *************/
1999-08-16 21:50:52 +00:00
gInBufferSize = insize ;
gOutBufferSize = outsize ;
log_verbose ( " \n \n \n Testing conversions with InputBufferSize = %d, OutputBufferSize = %d \n " , gInBufferSize , gOutBufferSize ) ;
2001-04-18 19:31:05 +00:00
2000-06-29 02:53:29 +00:00
# if 1
2000-06-22 01:18:30 +00:00
/*UTF-8*/
2000-02-05 00:01:54 +00:00
if ( ! testConvertFromU ( sampleText , sizeof ( sampleText ) / sizeof ( sampleText [ 0 ] ) ,
2001-02-23 04:40:39 +00:00
expectedUTF8 , sizeof ( expectedUTF8 ) , " UTF8 " , toUTF8Offs , FALSE ) )
2000-02-05 00:01:54 +00:00
log_err ( " u-> UTF8 did not match. \n " ) ;
2001-04-18 19:31:05 +00:00
2000-07-19 20:14:27 +00:00
log_verbose ( " Test surrogate behaviour for UTF8 \n " ) ;
{
const UChar testinput [ ] = { 0x20ac , 0xd801 , 0xdc01 , 0xdc01 , 0xd801 } ;
2001-04-18 19:31:05 +00:00
const uint8_t expectedUTF8test2 [ ] = { 0xe2 , 0x82 , 0xac ,
0xf0 , 0x90 , 0x90 , 0x81 ,
0xed , 0xb0 , 0x81 , 0xed , 0xa0 , 0x81
2000-07-19 20:14:27 +00:00
} ;
2000-07-20 20:08:40 +00:00
int32_t offsets [ ] = { 0 , 0 , 0 , 1 , 1 , 1 , 1 , 3 , 3 , 3 , 4 , 4 , 4 } ;
2000-07-19 20:14:27 +00:00
if ( ! testConvertFromU ( testinput , sizeof ( testinput ) / sizeof ( testinput [ 0 ] ) ,
2001-02-23 04:40:39 +00:00
expectedUTF8test2 , sizeof ( expectedUTF8test2 ) , " UTF8 " , offsets , FALSE ) )
2001-04-18 19:31:05 +00:00
log_err ( " u-> UTF8 did not match. \n " ) ;
2000-07-19 20:14:27 +00:00
}
2000-06-22 01:18:30 +00:00
/*ISO-2022*/
2000-08-15 00:07:33 +00:00
if ( ! testConvertFromU ( sampleText , sizeof ( sampleText ) / sizeof ( sampleText [ 0 ] ) ,
2001-02-23 04:40:39 +00:00
expectedISO2022 , sizeof ( expectedISO2022 ) , " ISO_2022 " , toISO2022Offs , FALSE ) )
2000-02-05 00:01:54 +00:00
log_err ( " u-> iso-2022 did not match. \n " ) ;
2000-06-22 01:18:30 +00:00
/*UTF16 LE*/
if ( ! testConvertFromU ( sampleText , sizeof ( sampleText ) / sizeof ( sampleText [ 0 ] ) ,
2001-02-23 04:40:39 +00:00
expectedUTF16LE , sizeof ( expectedUTF16LE ) , " utf-16le " , toUTF16LEOffs , FALSE ) )
2000-06-22 01:18:30 +00:00
log_err ( " u-> utf-16le did not match. \n " ) ;
/*UTF16 BE*/
if ( ! testConvertFromU ( sampleText , sizeof ( sampleText ) / sizeof ( sampleText [ 0 ] ) ,
2001-02-23 04:40:39 +00:00
expectedUTF16BE , sizeof ( expectedUTF16BE ) , " utf-16be " , toUTF16BEOffs , FALSE ) )
2000-06-22 01:18:30 +00:00
log_err ( " u-> utf-16be did not match. \n " ) ;
2000-11-16 17:20:03 +00:00
/*UTF32 LE*/
if ( ! testConvertFromU ( sampleText , sizeof ( sampleText ) / sizeof ( sampleText [ 0 ] ) ,
2001-02-23 04:40:39 +00:00
expectedUTF32LE , sizeof ( expectedUTF32LE ) , " utf-32le " , toUTF32LEOffs , FALSE ) )
2000-11-16 17:20:03 +00:00
log_err ( " u-> utf-32le did not match. \n " ) ;
/*UTF32 BE*/
if ( ! testConvertFromU ( sampleText , sizeof ( sampleText ) / sizeof ( sampleText [ 0 ] ) ,
2001-02-23 04:40:39 +00:00
expectedUTF32BE , sizeof ( expectedUTF32BE ) , " utf-32be " , toUTF32BEOffs , FALSE ) )
2000-11-16 17:20:03 +00:00
log_err ( " u-> utf-32be did not match. \n " ) ;
2000-06-22 01:18:30 +00:00
/*LATIN_1*/
if ( ! testConvertFromU ( sampleText , sizeof ( sampleText ) / sizeof ( sampleText [ 0 ] ) ,
2001-02-23 04:40:39 +00:00
expectedLATIN1 , sizeof ( expectedLATIN1 ) , " LATIN_1 " , toLATIN1Offs , FALSE ) )
2000-06-22 01:18:30 +00:00
log_err ( " u-> LATIN_1 did not match. \n " ) ;
/*EBCDIC_STATEFUL*/
2000-02-05 00:01:54 +00:00
if ( ! testConvertFromU ( sampleText , sizeof ( sampleText ) / sizeof ( sampleText [ 0 ] ) ,
2001-02-23 04:40:39 +00:00
expectedIBM930 , sizeof ( expectedIBM930 ) , " ibm-930 " , toIBM930Offs , FALSE ) )
2000-02-05 00:01:54 +00:00
log_err ( " u-> ibm-930 did not match. \n " ) ;
2000-06-28 17:01:52 +00:00
if ( ! testConvertFromU ( sampleText , sizeof ( sampleText ) / sizeof ( sampleText [ 0 ] ) ,
2001-02-23 04:40:39 +00:00
expectedISO88593 , sizeof ( expectedISO88593 ) , " iso-8859-3 " , toISO88593Offs , FALSE ) )
2000-06-28 17:01:52 +00:00
log_err ( " u-> iso-8859-3 did not match. \n " ) ;
2000-06-22 01:18:30 +00:00
/*MBCS*/
2000-06-28 17:01:52 +00:00
2000-02-05 00:01:54 +00:00
if ( ! testConvertFromU ( sampleText , sizeof ( sampleText ) / sizeof ( sampleText [ 0 ] ) ,
2001-02-23 04:40:39 +00:00
expectedIBM943 , sizeof ( expectedIBM943 ) , " ibm-943 " , toIBM943Offs , FALSE ) )
2000-02-05 00:01:54 +00:00
log_err ( " u-> ibm-943 [UCNV_MBCS] not match. \n " ) ;
2000-06-22 01:18:30 +00:00
/*DBCS*/
2000-02-05 00:01:54 +00:00
if ( ! testConvertFromU ( sampleText , sizeof ( sampleText ) / sizeof ( sampleText [ 0 ] ) ,
2001-02-23 04:40:39 +00:00
expectedIBM835 , sizeof ( expectedIBM835 ) , " ibm-835 " , toIBM835Offs , FALSE ) )
2000-06-26 22:16:32 +00:00
log_err ( " u-> ibm-835 [UCNV_DBCS] not match. \n " ) ;
2000-06-22 01:18:30 +00:00
/*SBCS*/
if ( ! testConvertFromU ( sampleText , sizeof ( sampleText ) / sizeof ( sampleText [ 0 ] ) ,
2001-02-23 04:40:39 +00:00
expectedIBM920 , sizeof ( expectedIBM920 ) , " ibm-920 " , toIBM920Offs , FALSE ) )
2000-06-22 01:18:30 +00:00
log_err ( " u-> ibm-920 [UCNV_SBCS] not match. \n " ) ;
/*SBCS*/
if ( ! testConvertFromU ( sampleText , sizeof ( sampleText ) / sizeof ( sampleText [ 0 ] ) ,
2001-02-23 04:40:39 +00:00
expectedISO88593 , sizeof ( expectedISO88593 ) , " iso-8859-3 " , toISO88593Offs , FALSE ) )
2000-06-22 01:18:30 +00:00
log_err ( " u-> iso-8859-3 did not match. \n " ) ;
1999-08-16 21:50:52 +00:00
2001-04-18 19:31:05 +00:00
1999-08-16 21:50:52 +00:00
/****/
2000-06-29 02:53:29 +00:00
# endif
1999-12-04 02:31:40 +00:00
2000-06-29 02:53:29 +00:00
# if 1
2000-06-22 01:18:30 +00:00
/*UTF-8*/
2000-02-05 00:01:54 +00:00
if ( ! testConvertToU ( expectedUTF8 , sizeof ( expectedUTF8 ) ,
2001-02-23 04:40:39 +00:00
sampleText , sizeof ( sampleText ) / sizeof ( sampleText [ 0 ] ) , " utf8 " , fmUTF8Offs , FALSE ) )
2000-02-05 00:01:54 +00:00
log_err ( " utf8 -> u did not match \n " ) ;
2000-06-22 01:18:30 +00:00
/*ISO-2022*/
2000-08-15 18:05:12 +00:00
if ( ! testConvertToU ( expectedISO2022 , sizeof ( expectedISO2022 ) ,
2001-02-23 04:40:39 +00:00
sampleText , sizeof ( sampleText ) / sizeof ( sampleText [ 0 ] ) , " ISO_2022 " , fmISO2022Offs , FALSE ) )
2000-07-14 20:40:05 +00:00
log_err ( " iso-2022 -> u did not match. \n " ) ;
2000-06-22 01:18:30 +00:00
/*UTF16 LE*/
if ( ! testConvertToU ( expectedUTF16LE , sizeof ( expectedUTF16LE ) ,
2001-02-23 04:40:39 +00:00
sampleText , sizeof ( sampleText ) / sizeof ( sampleText [ 0 ] ) , " utf-16le " , fmUTF16LEOffs , FALSE ) )
2000-07-14 20:40:05 +00:00
log_err ( " utf-16le -> u did not match. \n " ) ;
2000-06-22 01:18:30 +00:00
/*UTF16 BE*/
if ( ! testConvertToU ( expectedUTF16BE , sizeof ( expectedUTF16BE ) ,
2001-02-23 04:40:39 +00:00
sampleText , sizeof ( sampleText ) / sizeof ( sampleText [ 0 ] ) , " utf-16be " , fmUTF16BEOffs , FALSE ) )
2000-07-14 20:40:05 +00:00
log_err ( " utf-16be -> u did not match. \n " ) ;
2000-11-16 17:20:03 +00:00
/*UTF32 LE*/
if ( ! testConvertToU ( expectedUTF32LE , sizeof ( expectedUTF32LE ) ,
2001-02-23 04:40:39 +00:00
sampleText , sizeof ( sampleText ) / sizeof ( sampleText [ 0 ] ) , " utf-32le " , fmUTF32LEOffs , FALSE ) )
2000-11-16 17:20:03 +00:00
log_err ( " utf-32le -> u did not match. \n " ) ;
/*UTF32 BE*/
if ( ! testConvertToU ( expectedUTF32BE , sizeof ( expectedUTF32BE ) ,
2001-02-23 04:40:39 +00:00
sampleText , sizeof ( sampleText ) / sizeof ( sampleText [ 0 ] ) , " utf-32be " , fmUTF32BEOffs , FALSE ) )
2000-11-16 17:20:03 +00:00
log_err ( " utf-32be -> u did not match. \n " ) ;
2000-06-22 01:18:30 +00:00
/*EBCDIC_STATEFUL*/
2000-02-05 00:01:54 +00:00
if ( ! testConvertToU ( expectedIBM930 , sizeof ( expectedIBM930 ) ,
2001-02-23 04:40:39 +00:00
sampleText , sizeof ( sampleText ) / sizeof ( sampleText [ 0 ] ) , " ibm-930 " , fmIBM930Offs , FALSE ) )
2000-07-14 20:40:05 +00:00
log_err ( " ibm-930 -> u did not match. \n " ) ;
2000-06-22 01:18:30 +00:00
/*MBCS*/
2000-02-05 00:01:54 +00:00
if ( ! testConvertToU ( expectedIBM943 , sizeof ( expectedIBM943 ) ,
2001-02-23 04:40:39 +00:00
sampleText , sizeof ( sampleText ) / sizeof ( sampleText [ 0 ] ) , " ibm-943 " , fmIBM943Offs , FALSE ) )
2000-07-14 20:40:05 +00:00
log_err ( " ibm-943 -> u did not match. \n " ) ;
2000-06-28 17:01:52 +00:00
2000-11-16 17:20:03 +00:00
/* Try it again to make sure it still works */
2000-06-28 17:01:52 +00:00
if ( ! testConvertToU ( expectedUTF16LE , sizeof ( expectedUTF16LE ) ,
2001-02-23 04:40:39 +00:00
sampleText , sizeof ( sampleText ) / sizeof ( sampleText [ 0 ] ) , " utf-16le " , fmUTF16LEOffs , FALSE ) )
2000-07-14 20:40:05 +00:00
log_err ( " utf-16le -> u did not match. \n " ) ;
1999-12-04 02:31:40 +00:00
2000-02-05 00:01:54 +00:00
if ( ! testConvertToU ( expectedMaltese913 , sizeof ( expectedMaltese913 ) ,
2001-02-23 04:40:39 +00:00
malteseUChars , sizeof ( malteseUChars ) / sizeof ( malteseUChars [ 0 ] ) , " latin3 " , NULL , FALSE ) )
2000-02-05 00:01:54 +00:00
log_err ( " latin3[813] -> u did not match \n " ) ;
1999-12-04 02:31:40 +00:00
2000-02-05 00:01:54 +00:00
if ( ! testConvertFromU ( malteseUChars , sizeof ( malteseUChars ) / sizeof ( malteseUChars [ 0 ] ) ,
2001-02-23 04:40:39 +00:00
expectedMaltese913 , sizeof ( expectedMaltese913 ) , " iso-8859-3 " , NULL , FALSE ) )
2001-04-18 19:31:05 +00:00
log_err ( " u-> latin3[813] did not match. \n " ) ;
2000-06-22 01:18:30 +00:00
2000-06-28 17:01:52 +00:00
/*LMBCS*/
2000-06-22 01:18:30 +00:00
if ( ! testConvertFromU ( LMBCSUChars , sizeof ( LMBCSUChars ) / sizeof ( LMBCSUChars [ 0 ] ) ,
2001-02-23 04:40:39 +00:00
expectedLMBCS , sizeof ( expectedLMBCS ) , " LMBCS-1 " , toLMBCSOffs , FALSE ) )
2000-06-22 01:18:30 +00:00
log_err ( " u-> LMBCS-1 did not match. \n " ) ;
if ( ! testConvertToU ( expectedLMBCS , sizeof ( expectedLMBCS ) ,
2001-02-23 04:40:39 +00:00
LMBCSUChars , sizeof ( LMBCSUChars ) / sizeof ( LMBCSUChars [ 0 ] ) , " LMBCS-1 " , fmLMBCSOffs , FALSE ) )
2000-07-14 20:40:05 +00:00
log_err ( " LMBCS-1 -> u did not match. \n " ) ;
2000-08-15 18:05:12 +00:00
/*some more test to increase the code coverage in MBCS. Create an test converter from test1.ucm
which is test file for MBCS conversion with single - byte codepage data . */
{
2001-04-18 19:31:05 +00:00
2000-08-15 18:05:12 +00:00
/* MBCS with single byte codepage data test1.ucm*/
const UChar unicodeInput [ ] = { 0x20ac , 0x0005 , 0x0006 , 0xdbc4 , 0xde34 , 0x0003 } ;
const uint8_t expectedtest1 [ ] = { 0x00 , 0x05 , 0xff , 0x07 , 0xff , } ;
2001-04-18 19:31:05 +00:00
int32_t totest1Offs [ ] = { 0 , 1 , 2 , 3 , 5 , } ;
2000-08-15 18:05:12 +00:00
const uint8_t test1input [ ] = { 0x00 , 0x05 , 0x06 , 0x07 , 0x08 , 0x09 } ;
2000-11-21 01:19:11 +00:00
const UChar expectedUnicode [ ] = { 0x20ac , 0x0005 , 0x0006 , 0xdbc4 , 0xde34 , 0xfffd , 0xfffd } ;
2000-08-15 18:05:12 +00:00
int32_t fromtest1Offs [ ] = { 0 , 1 , 2 , 3 , 3 , 4 , 5 } ;
/*from Unicode*/
if ( ! testConvertFromU ( unicodeInput , sizeof ( unicodeInput ) / sizeof ( unicodeInput [ 0 ] ) ,
2001-02-23 04:40:39 +00:00
expectedtest1 , sizeof ( expectedtest1 ) , " test1 " , totest1Offs , FALSE ) )
2000-08-15 18:05:12 +00:00
log_err ( " u-> test1(MBCS conversion with single-byte) did not match. \n " ) ;
2001-04-18 19:31:05 +00:00
2000-08-15 18:05:12 +00:00
/*to Unicode*/
if ( ! testConvertToU ( test1input , sizeof ( test1input ) ,
2001-02-23 04:40:39 +00:00
expectedUnicode , sizeof ( expectedUnicode ) / sizeof ( expectedUnicode [ 0 ] ) , " test1 " , fromtest1Offs , FALSE ) )
2000-08-15 18:05:12 +00:00
log_err ( " test1(MBCS conversion with single-byte) -> u did not match. \n " ) ;
}
/*some more test to increase the code coverage in MBCS. Create an test converter from test3.ucm
which is test file for MBCS conversion with three - byte codepage data . */
{
2001-04-18 19:31:05 +00:00
2000-08-15 18:05:12 +00:00
/* MBCS with three byte codepage data test3.ucm*/
const UChar unicodeInput [ ] = { 0x20ac , 0x0005 , 0x0006 , 0x000b , 0xdbc4 , 0xde34 , 0xd84d , 0xdc56 , 0x000e } ;
const uint8_t expectedtest3 [ ] = { 0x00 , 0x05 , 0xff , 0x01 , 0x02 , 0x0b , 0x07 , 0x01 , 0x02 , 0x0a , 0xff , } ;
2001-04-18 19:31:05 +00:00
int32_t totest3Offs [ ] = { 0 , 1 , 2 , 3 , 3 , 3 , 4 , 6 , 6 , 6 , 8 } ;
2000-08-15 18:05:12 +00:00
const uint8_t test3input [ ] = { 0x00 , 0x05 , 0x06 , 0x01 , 0x02 , 0x0b , 0x07 , 0x01 , 0x02 , 0x0a , 0x01 , 0x02 , 0x0c , } ;
2000-11-21 01:19:11 +00:00
const UChar expectedUnicode [ ] = { 0x20ac , 0x0005 , 0x0006 , 0x000b , 0xdbc4 , 0xde34 , 0xd84d , 0xdc56 , 0xfffd } ;
2000-08-15 18:05:12 +00:00
int32_t fromtest3Offs [ ] = { 0 , 1 , 2 , 3 , 6 , 6 , 7 , 7 , 10 } ;
/*from Unicode*/
if ( ! testConvertFromU ( unicodeInput , sizeof ( unicodeInput ) / sizeof ( unicodeInput [ 0 ] ) ,
2001-02-23 04:40:39 +00:00
expectedtest3 , sizeof ( expectedtest3 ) , " test3 " , totest3Offs , FALSE ) )
2000-08-15 18:05:12 +00:00
log_err ( " u-> test3(MBCS conversion with three-byte) did not match. \n " ) ;
2001-04-18 19:31:05 +00:00
2000-08-15 18:05:12 +00:00
/*to Unicode*/
if ( ! testConvertToU ( test3input , sizeof ( test3input ) ,
2001-02-23 04:40:39 +00:00
expectedUnicode , sizeof ( expectedUnicode ) / sizeof ( expectedUnicode [ 0 ] ) , " test3 " , fromtest3Offs , FALSE ) )
2000-08-15 18:05:12 +00:00
log_err ( " test3(MBCS conversion with three-byte) -> u did not match. \n " ) ;
}
2001-04-18 19:31:05 +00:00
2000-08-15 18:05:12 +00:00
/*some more test to increase the code coverage in MBCS. Create an test converter from test4.ucm
which is test file for MBCS conversion with four - byte codepage data . */
{
2001-04-18 19:31:05 +00:00
2000-08-15 18:05:12 +00:00
/* MBCS with three byte codepage data test4.ucm*/
const UChar unicodeInput [ ] = { 0x20ac , 0x0005 , 0x0006 , 0x000b , 0xdbc4 , 0xde34 , 0xd84d , 0xdc56 , 0x000e } ;
const uint8_t expectedtest4 [ ] = { 0x00 , 0x05 , 0xff , 0x01 , 0x02 , 0x03 , 0x0b , 0x07 , 0x01 , 0x02 , 0x03 , 0x0a , 0xff , } ;
2001-04-18 19:31:05 +00:00
int32_t totest4Offs [ ] = { 0 , 1 , 2 , 3 , 3 , 3 , 3 , 4 , 6 , 6 , 6 , 6 , 8 , } ;
2000-08-15 18:05:12 +00:00
const uint8_t test4input [ ] = { 0x00 , 0x05 , 0x06 , 0x01 , 0x02 , 0x03 , 0x0b , 0x07 , 0x01 , 0x02 , 0x03 , 0x0a , 0x01 , 0x02 , 0x03 , 0x0c , } ;
2000-11-21 01:19:11 +00:00
const UChar expectedUnicode [ ] = { 0x20ac , 0x0005 , 0x0006 , 0x000b , 0xdbc4 , 0xde34 , 0xd84d , 0xdc56 , 0xfffd } ;
2000-08-15 18:05:12 +00:00
int32_t fromtest4Offs [ ] = { 0 , 1 , 2 , 3 , 7 , 7 , 8 , 8 , 12 , } ;
/*from Unicode*/
if ( ! testConvertFromU ( unicodeInput , sizeof ( unicodeInput ) / sizeof ( unicodeInput [ 0 ] ) ,
2001-02-23 04:40:39 +00:00
expectedtest4 , sizeof ( expectedtest4 ) , " test4 " , totest4Offs , FALSE ) )
2000-08-15 18:05:12 +00:00
log_err ( " u-> test4(MBCS conversion with four-byte) did not match. \n " ) ;
2001-04-18 19:31:05 +00:00
2000-08-15 18:05:12 +00:00
/*to Unicode*/
if ( ! testConvertToU ( test4input , sizeof ( test4input ) ,
2001-02-23 04:40:39 +00:00
expectedUnicode , sizeof ( expectedUnicode ) / sizeof ( expectedUnicode [ 0 ] ) , " test4 " , fromtest4Offs , FALSE ) )
2000-08-15 18:05:12 +00:00
log_err ( " test4(MBCS conversion with four-byte) -> u did not match. \n " ) ;
}
2001-01-09 03:33:07 +00:00
/* UTF-7 examples are mostly from http://www.imc.org/rfc2152 */
{
/* encode directly set D and set O */
static const uint8_t utf7 [ ] = {
/*
Hi Mom - + Jjo - - !
A + ImIDkQ .
+ -
+ ZeVnLIqe
*/
0x48 , 0x69 , 0x20 , 0x4d , 0x6f , 0x6d , 0x20 , 0x2d , 0x2b , 0x4a , 0x6a , 0x6f , 0x2d , 0x2d , 0x21 ,
0x41 , 0x2b , 0x49 , 0x6d , 0x49 , 0x44 , 0x6b , 0x51 , 0x2e ,
0x2b , 0x2d ,
0x2b , 0x5a , 0x65 , 0x56 , 0x6e , 0x4c , 0x49 , 0x71 , 0x65
} ;
static const UChar unicode [ ] = {
/*
Hi Mom - < WHITE SMILING FACE > - !
A < NOT IDENTICAL TO > < ALPHA > .
+
[ Japanese word " nihongo " ]
*/
0x48 , 0x69 , 0x20 , 0x4d , 0x6f , 0x6d , 0x20 , 0x2d , 0x263a , 0x2d , 0x21 ,
0x41 , 0x2262 , 0x0391 , 0x2e ,
0x2b ,
0x65e5 , 0x672c , 0x8a9e
} ;
static const int32_t toUnicodeOffsets [ ] = {
0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 9 , 13 , 14 ,
15 , 17 , 19 , 23 ,
24 ,
27 , 29 , 32
} ;
static const int32_t fromUnicodeOffsets [ ] = {
0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 8 , 8 , 8 , 9 , 9 , 10 ,
11 , 12 , 12 , 12 , 13 , 13 , 13 , 13 , 14 ,
15 , 15 ,
16 , 16 , 16 , 17 , 17 , 17 , 18 , 18 , 18
} ;
/* same but escaping set O (the exclamation mark) */
static const uint8_t utf7Restricted [ ] = {
/*
Hi Mom - + Jjo - - + ACE -
A + ImIDkQ .
+ -
+ ZeVnLIqe
*/
0x48 , 0x69 , 0x20 , 0x4d , 0x6f , 0x6d , 0x20 , 0x2d , 0x2b , 0x4a , 0x6a , 0x6f , 0x2d , 0x2d , 0x2b , 0x41 , 0x43 , 0x45 , 0x2d ,
0x41 , 0x2b , 0x49 , 0x6d , 0x49 , 0x44 , 0x6b , 0x51 , 0x2e ,
0x2b , 0x2d ,
0x2b , 0x5a , 0x65 , 0x56 , 0x6e , 0x4c , 0x49 , 0x71 , 0x65
} ;
static const int32_t toUnicodeOffsetsR [ ] = {
0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 9 , 13 , 15 ,
19 , 21 , 23 , 27 ,
28 ,
31 , 33 , 36
} ;
static const int32_t fromUnicodeOffsetsR [ ] = {
0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 8 , 8 , 8 , 9 , 9 , 10 , 10 , 10 , 10 , 11 ,
11 , 12 , 12 , 12 , 13 , 13 , 13 , 13 , 14 ,
15 , 15 ,
16 , 16 , 16 , 17 , 17 , 17 , 18 , 18 , 18
} ;
2001-02-23 04:40:39 +00:00
if ( ! testConvertFromU ( unicode , sizeof ( unicode ) / U_SIZEOF_UCHAR , utf7 , sizeof ( utf7 ) , " UTF-7 " , fromUnicodeOffsets , FALSE ) ) {
2001-01-09 03:33:07 +00:00
log_err ( " u-> UTF-7 did not match. \n " ) ;
}
2000-08-15 18:05:12 +00:00
2001-02-23 04:40:39 +00:00
if ( ! testConvertToU ( utf7 , sizeof ( utf7 ) , unicode , sizeof ( unicode ) / U_SIZEOF_UCHAR , " UTF-7 " , toUnicodeOffsets , FALSE ) ) {
2001-01-09 03:33:07 +00:00
log_err ( " UTF-7 -> u did not match. \n " ) ;
}
2000-08-15 18:05:12 +00:00
2001-02-23 04:40:39 +00:00
if ( ! testConvertFromU ( unicode , sizeof ( unicode ) / U_SIZEOF_UCHAR , utf7Restricted , sizeof ( utf7Restricted ) , " UTF-7,version=1 " , fromUnicodeOffsetsR , FALSE ) ) {
2001-01-09 03:33:07 +00:00
log_err ( " u-> UTF-7,version=1 did not match. \n " ) ;
}
2001-02-23 04:40:39 +00:00
if ( ! testConvertToU ( utf7Restricted , sizeof ( utf7Restricted ) , unicode , sizeof ( unicode ) / U_SIZEOF_UCHAR , " UTF-7,version=1 " , toUnicodeOffsetsR , FALSE ) ) {
2001-01-09 03:33:07 +00:00
log_err ( " UTF-7,version=1 -> u did not match. \n " ) ;
}
}
2001-01-26 03:05:11 +00:00
/* Test UTF-8 bad data handling*/
{
static const uint8_t utf8 [ ] = {
0x61 ,
0xf7 , 0xbf , 0xbf , 0xbf , /* > 10FFFF */
0x00 ,
0x62 ,
0xfb , 0xbf , 0xbf , 0xbf , 0xbf , /* > 10FFFF */
0xfb , 0xbf , 0xbf , 0xbf , 0xbf , /* > 10FFFF */
0xf4 , 0x8f , 0xbf , 0xbf , /* 10FFFF */
0xdf , 0xbf , /* 7ff */
0xbf , /* truncated tail */
0xf4 , 0x90 , 0x80 , 0x80 , /* 11FFFF */
0x02
} ;
static const uint16_t utf8Expected [ ] = {
0x0061 ,
0xfffd ,
0x0000 ,
0x0062 ,
0xfffd ,
0xfffd ,
0xdbff , 0xdfff ,
0x07ff ,
0xfffd ,
0xfffd ,
0x0002
} ;
static const int32_t utf8Offsets [ ] = {
0 , 1 , 5 , 6 , 7 , 12 , 17 , 17 , 21 , 23 , 24 , 28
} ;
if ( ! testConvertToU ( utf8 , sizeof ( utf8 ) ,
2001-02-23 04:40:39 +00:00
utf8Expected , sizeof ( utf8Expected ) / sizeof ( utf8Expected [ 0 ] ) , " utf-8 " , utf8Offsets , FALSE ) )
2001-01-26 03:05:11 +00:00
log_err ( " u-> utf-8 did not match. \n " ) ;
}
/* Test UTF-32BE bad data handling*/
{
static const uint8_t utf32 [ ] = {
0x00 , 0x00 , 0x00 , 0x61 ,
0x00 , 0x11 , 0x00 , 0x00 , /* 0x110000 out of range */
0x00 , 0x00 , 0x00 , 0x62 ,
0xff , 0xff , 0xff , 0xff , /* 0xffffffff out of range */
0x7f , 0xff , 0xff , 0xff , /* 0x7fffffff out of range */
0x00 , 0x00 , 0x01 , 0x62 ,
0x00 , 0x00 , 0x02 , 0x62
} ;
static const uint16_t utf32Expected [ ] = {
0x0061 ,
0xfffd , /* 0x110000 out of range */
0x0062 ,
0xfffd , /* 0xffffffff out of range */
0xfffd , /* 0x7fffffff out of range */
0x0162 ,
0x0262
} ;
static const int32_t utf32Offsets [ ] = {
0 , 4 , 8 , 12 , 16 , 20 , 24
} ;
if ( ! testConvertToU ( utf32 , sizeof ( utf32 ) ,
2001-02-23 04:40:39 +00:00
utf32Expected , sizeof ( utf32Expected ) / sizeof ( utf32Expected [ 0 ] ) , " utf-32be " , utf32Offsets , FALSE ) )
2001-01-26 03:05:11 +00:00
log_err ( " u-> utf-32be did not match. \n " ) ;
}
/* Test UTF-32LE bad data handling*/
{
static const uint8_t utf32 [ ] = {
0x61 , 0x00 , 0x00 , 0x00 ,
0x00 , 0x00 , 0x11 , 0x00 , /* 0x110000 out of range */
0x62 , 0x00 , 0x00 , 0x00 ,
0xff , 0xff , 0xff , 0xff , /* 0xffffffff out of range */
0xff , 0xff , 0xff , 0x7f , /* 0x7fffffff out of range */
0x62 , 0x01 , 0x00 , 0x00 ,
0x62 , 0x02 , 0x00 , 0x00 ,
} ;
static const uint16_t utf32Expected [ ] = {
0x0061 ,
0xfffd , /* 0x110000 out of range */
0x0062 ,
0xfffd , /* 0xffffffff out of range */
0xfffd , /* 0x7fffffff out of range */
0x0162 ,
0x0262
} ;
static const int32_t utf32Offsets [ ] = {
0 , 4 , 8 , 12 , 16 , 20 , 24
} ;
if ( ! testConvertToU ( utf32 , sizeof ( utf32 ) ,
2001-02-23 04:40:39 +00:00
utf32Expected , sizeof ( utf32Expected ) / sizeof ( utf32Expected [ 0 ] ) , " utf-32le " , utf32Offsets , FALSE ) )
2001-01-26 03:05:11 +00:00
log_err ( " u-> utf-32le did not match. \n " ) ;
}
2001-04-18 19:31:05 +00:00
}
1999-08-16 21:50:52 +00:00
2000-11-21 04:05:39 +00:00
static void TestConverterTypesAndStarters ( )
1999-08-16 21:50:52 +00:00
{
2000-02-05 00:01:54 +00:00
UConverter * myConverter [ 3 ] ;
UErrorCode err = U_ZERO_ERROR ;
2000-05-18 22:08:39 +00:00
UBool mystarters [ 256 ] ;
2001-04-18 19:31:05 +00:00
2000-08-11 03:35:25 +00:00
/* const UBool expectedKSCstarters[256] = {
2000-02-05 00:01:54 +00:00
FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE ,
FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE ,
FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE ,
FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE ,
FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE ,
FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE ,
FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE ,
FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE ,
FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE ,
FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE ,
FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE ,
FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE ,
FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE ,
FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE ,
FALSE , FALSE , FALSE , TRUE , TRUE , TRUE , TRUE , TRUE , TRUE , TRUE ,
TRUE , TRUE , TRUE , TRUE , TRUE , TRUE , TRUE , TRUE , TRUE , TRUE ,
TRUE , TRUE , TRUE , TRUE , TRUE , TRUE , TRUE , TRUE , TRUE , TRUE ,
TRUE , TRUE , TRUE , FALSE , FALSE , TRUE , TRUE , TRUE , TRUE , TRUE ,
TRUE , TRUE , TRUE , TRUE , TRUE , TRUE , TRUE , TRUE , TRUE , TRUE ,
TRUE , TRUE , TRUE , TRUE , TRUE , TRUE , TRUE , TRUE , TRUE , TRUE ,
TRUE , TRUE , TRUE , TRUE , TRUE , TRUE , TRUE , TRUE , TRUE , TRUE ,
TRUE , TRUE , TRUE , TRUE , TRUE , TRUE , TRUE , TRUE , TRUE , TRUE ,
TRUE , TRUE , TRUE , TRUE , TRUE , TRUE , TRUE , TRUE , TRUE , TRUE ,
TRUE , TRUE , TRUE , TRUE , TRUE , TRUE , TRUE , TRUE , TRUE , TRUE ,
TRUE , TRUE , TRUE , TRUE , TRUE , TRUE , TRUE , TRUE , TRUE , TRUE ,
2000-08-11 03:35:25 +00:00
TRUE , TRUE , TRUE , TRUE , TRUE , TRUE } ; */
1999-08-16 21:50:52 +00:00
log_verbose ( " Testing KSC, ibm-930, ibm-878 for starters and their conversion types. " ) ;
2000-07-10 17:35:02 +00:00
myConverter [ 0 ] = ucnv_open ( " ksc " , & err ) ;
2000-10-27 00:10:17 +00:00
if ( U_FAILURE ( err ) ) {
2000-07-10 17:35:02 +00:00
log_err ( " Failed to create an ibm-ksc converter \n " ) ;
2000-10-27 00:10:17 +00:00
return ;
}
2000-02-05 00:01:54 +00:00
else
2001-01-26 03:05:11 +00:00
{
if ( ucnv_getType ( myConverter [ 0 ] ) ! = UCNV_MBCS )
log_err ( " ucnv_getType Failed for ibm-949 \n " ) ;
else
log_verbose ( " ucnv_getType ibm-949 ok \n " ) ;
2000-02-05 00:01:54 +00:00
if ( myConverter [ 0 ] ! = NULL )
2001-01-26 03:05:11 +00:00
ucnv_getStarters ( myConverter [ 0 ] , mystarters , & err ) ;
2000-02-05 00:01:54 +00:00
/*if (memcmp(expectedKSCstarters, mystarters, sizeof(expectedKSCstarters)))
log_err ( " Failed ucnv_getStarters for ksc \n " ) ;
else
log_verbose ( " ucnv_getStarters ok \n " ) ; */
2001-04-18 19:31:05 +00:00
2001-01-26 03:05:11 +00:00
}
2000-02-05 00:01:54 +00:00
myConverter [ 1 ] = ucnv_open ( " ibm-930 " , & err ) ;
2000-10-27 00:10:17 +00:00
if ( U_FAILURE ( err ) ) {
2001-01-26 03:05:11 +00:00
log_err ( " Failed to create an ibm-930 converter \n " ) ;
return ;
2000-10-27 00:10:17 +00:00
}
2000-02-05 00:01:54 +00:00
else
2001-01-26 03:05:11 +00:00
{
if ( ucnv_getType ( myConverter [ 1 ] ) ! = UCNV_EBCDIC_STATEFUL )
log_err ( " ucnv_getType Failed for ibm-930 \n " ) ;
else
log_verbose ( " ucnv_getType ibm-930 ok \n " ) ;
}
2000-02-05 00:01:54 +00:00
myConverter [ 2 ] = ucnv_open ( " ibm-878 " , & err ) ;
2000-10-27 00:10:17 +00:00
if ( U_FAILURE ( err ) ) {
2000-02-05 00:01:54 +00:00
log_err ( " Failed to create an ibm-815 converter \n " ) ;
2000-10-27 00:10:17 +00:00
return ;
}
2000-02-05 00:01:54 +00:00
else
{
if ( ucnv_getType ( myConverter [ 2 ] ) ! = UCNV_SBCS ) log_err ( " ucnv_getType Failed for ibm-815 \n " ) ;
else log_verbose ( " ucnv_getType ibm-815 ok \n " ) ;
}
2001-04-18 19:31:05 +00:00
2000-02-05 00:01:54 +00:00
ucnv_close ( myConverter [ 0 ] ) ;
ucnv_close ( myConverter [ 1 ] ) ;
ucnv_close ( myConverter [ 2 ] ) ;
1999-08-16 21:50:52 +00:00
}
2000-11-21 04:05:39 +00:00
2001-03-16 20:48:10 +00:00
static void
TestAmbiguousConverter ( UConverter * cnv ) {
static const char inBytes [ 2 ] = { 0x61 , 0x5c } ;
UChar outUnicode [ 20 ] = { 0 , 0 , 0 , 0 } ;
const char * s ;
UChar * u ;
UErrorCode errorCode ;
UBool isAmbiguous ;
/* try to convert an 'a' and a US-ASCII backslash */
errorCode = U_ZERO_ERROR ;
s = inBytes ;
u = outUnicode ;
ucnv_toUnicode ( cnv , & u , u + 20 , & s , s + 2 , NULL , TRUE , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
/* we do not care about general failures in this test; the input may just not be mappable */
return ;
}
if ( outUnicode [ 0 ] ! = 0x61 | | outUnicode [ 1 ] = = 0xfffd ) {
/* not an ASCII-family encoding, or 0x5c is unassigned/illegal: this test is not applicable */
return ;
}
isAmbiguous = ucnv_isAmbiguous ( cnv ) ;
/* check that outUnicode[1]!=0x5c is exactly the same as ucnv_isAmbiguous() */
if ( ( outUnicode [ 1 ] ! = 0x5c ) ! = isAmbiguous ) {
log_err ( " error: converter \" %s \" needs a backslash fix: %d but ucnv_isAmbiguous()==%d \n " ,
ucnv_getName ( cnv , & errorCode ) , outUnicode [ 1 ] ! = 0x5c , isAmbiguous ) ;
return ;
}
if ( outUnicode [ 1 ] ! = 0x5c ) {
/* needs fixup, fix it */
ucnv_fixFileSeparator ( cnv , outUnicode , ( int32_t ) ( u - outUnicode ) ) ;
if ( outUnicode [ 1 ] ! = 0x5c ) {
/* the fix failed */
log_err ( " error: ucnv_fixFileSeparator(%s) failed \n " , ucnv_getName ( cnv , & errorCode ) ) ;
return ;
}
}
}
2000-11-21 04:05:39 +00:00
static void TestAmbiguous ( )
2000-01-08 00:54:57 +00:00
{
UErrorCode status = U_ZERO_ERROR ;
2001-03-16 20:48:10 +00:00
UConverter * ascii_cnv = 0 , * sjis_cnv = 0 , * cnv ;
2000-03-22 01:57:16 +00:00
const char target [ ] = {
/* "\\usr\\local\\share\\data\\icutest.txt" */
0x5c , 0x75 , 0x73 , 0x72 ,
0x5c , 0x6c , 0x6f , 0x63 , 0x61 , 0x6c ,
0x5c , 0x73 , 0x68 , 0x61 , 0x72 , 0x65 ,
0x5c , 0x64 , 0x61 , 0x74 , 0x61 ,
0x5c , 0x69 , 0x63 , 0x75 , 0x74 , 0x65 , 0x73 , 0x74 , 0x2e , 0x74 , 0x78 , 0x74 ,
0
} ;
2001-09-28 22:22:59 +00:00
UChar asciiResult [ 200 ] , sjisResult [ 200 ] ;
2001-03-16 20:48:10 +00:00
int32_t asciiLength = 0 , sjisLength = 0 , i ;
const char * name ;
/* enumerate all converters */
status = U_ZERO_ERROR ;
for ( i = 0 ; ( name = ucnv_getAvailableName ( i ) ) ! = NULL ; + + i ) {
cnv = ucnv_open ( name , & status ) ;
if ( U_SUCCESS ( status ) ) {
TestAmbiguousConverter ( cnv ) ;
ucnv_close ( cnv ) ;
} else {
log_err ( " error: unable to open available converter \" %s \" \n " , name ) ;
status = U_ZERO_ERROR ;
}
}
2001-03-07 20:59:31 +00:00
sjis_cnv = ucnv_open ( " ibm-943 " , & status ) ;
2000-01-08 00:54:57 +00:00
if ( U_FAILURE ( status ) )
{
2000-02-05 00:01:54 +00:00
log_err ( " Failed to create a SJIS converter \n " ) ;
2000-01-08 00:54:57 +00:00
return ;
}
ascii_cnv = ucnv_open ( " LATIN-1 " , & status ) ;
if ( U_FAILURE ( status ) )
{
2000-02-05 00:01:54 +00:00
log_err ( " Failed to create a SJIS converter \n " ) ;
2000-01-08 00:54:57 +00:00
ucnv_close ( sjis_cnv ) ;
return ;
}
/* convert target from SJIS to Unicode */
2001-09-28 22:22:59 +00:00
sjisLength = ucnv_toUChars ( sjis_cnv , sjisResult , sizeof ( sjisResult ) / U_SIZEOF_UCHAR , target , strlen ( target ) , & status ) ;
2000-01-08 00:54:57 +00:00
if ( U_FAILURE ( status ) )
{
log_err ( " Failed to convert the SJIS string. \n " ) ;
ucnv_close ( sjis_cnv ) ;
ucnv_close ( ascii_cnv ) ;
return ;
}
/* convert target from Latin-1 to Unicode */
2001-09-28 22:22:59 +00:00
asciiLength = ucnv_toUChars ( ascii_cnv , asciiResult , sizeof ( asciiResult ) / U_SIZEOF_UCHAR , target , strlen ( target ) , & status ) ;
2000-01-08 00:54:57 +00:00
if ( U_FAILURE ( status ) )
{
log_err ( " Failed to convert the Latin-1 string. \n " ) ;
free ( sjisResult ) ;
ucnv_close ( sjis_cnv ) ;
ucnv_close ( ascii_cnv ) ;
return ;
2001-04-18 19:31:05 +00:00
}
2000-01-08 00:54:57 +00:00
if ( ! ucnv_isAmbiguous ( sjis_cnv ) )
{
log_err ( " SJIS converter should contain ambiguous character mappings. \n " ) ;
free ( sjisResult ) ;
free ( asciiResult ) ;
ucnv_close ( sjis_cnv ) ;
ucnv_close ( ascii_cnv ) ;
return ;
}
if ( u_strcmp ( sjisResult , asciiResult ) = = 0 )
{
log_err ( " File separators for SJIS don't need to be fixed. \n " ) ;
}
ucnv_fixFileSeparator ( sjis_cnv , sjisResult , sjisLength ) ;
if ( u_strcmp ( sjisResult , asciiResult ) ! = 0 )
{
log_err ( " Fixing file separator for SJIS failed. \n " ) ;
}
ucnv_close ( sjis_cnv ) ;
ucnv_close ( ascii_cnv ) ;
}
2001-04-18 19:31:05 +00:00
2002-01-08 01:05:57 +00:00
static void
TestSignatureDetection ( ) {
/* with null terminated strings */
{
char * data [ ] = {
" \xFE \xFF \x00 \x00 " , /* UTF-16BE */
" \xFF \xFE \x00 \x00 " , /* UTF-16LE */
" \xEF \xBB \xBF \x00 " , /* UTF-8 */
" \x0E \xFE \xFF \x00 " , /* SCSU */
" \xFE \xFF " , /* UTF-16BE */
" \xFF \xFE " , /* UTF-16LE */
" \xEF \xBB \xBF " , /* UTF-8 */
" \x0E \xFE \xFF " , /* SCSU */
" \xFE \xFF \x41 \x42 " , /* UTF-16BE */
" \xFF \xFE \x41 \x41 " , /* UTF-16LE */
" \xEF \xBB \xBF \x41 " , /* UTF-8 */
" \x0E \xFE \xFF \x41 " , /* SCSU */
} ;
char * expected [ ] = {
" UTF-16BE " ,
" UTF-16LE " ,
" UTF-8 " ,
" SCSU " ,
" UTF-16BE " ,
" UTF-16LE " ,
" UTF-8 " ,
" SCSU " ,
" UTF-16BE " ,
" UTF-16LE " ,
" UTF-8 " ,
" SCSU " ,
} ;
int32_t expectedLength [ ] = {
2 ,
2 ,
3 ,
3 ,
2 ,
2 ,
3 ,
3 ,
2 ,
2 ,
3 ,
3 ,
} ;
int i = 0 ;
UErrorCode err ;
int32_t signatureLength = - 1 ;
char * source = NULL ;
const char * enc = NULL ;
for ( ; i < sizeof ( data ) / sizeof ( char * ) ; i + + ) {
err = U_ZERO_ERROR ;
source = data [ i ] ;
enc = ucnv_detectUnicodeSignature ( source , - 1 , & signatureLength , & err ) ;
if ( U_FAILURE ( err ) ) {
log_err ( " ucnv_detectUnicodeSignature failed for source : %s at index :%i. Error: %s \n " , source , i , u_errorName ( err ) ) ;
continue ;
}
if ( enc = = NULL | | strcmp ( enc , expected [ i ] ) ! = 0 ) {
log_err ( " ucnv_detectUnicodeSignature failed for source : %s at index :%i. Expected: %s. Got: %s \n " , source , i , expected [ i ] , enc ) ;
continue ;
}
if ( signatureLength ! = expectedLength [ i ] ) {
log_err ( " ucnv_detectUnicodeSignature failed for source : %s at index :%i.Expected Length: %i. Got length: %i \n " , source , i , signatureLength , expectedLength [ i ] ) ;
}
}
}
{
char * data [ ] = {
" \xFE \xFF \x00 " , /* UTF-16BE */
" \xFF \xFE \x00 " , /* UTF-16LE */
" \xEF \xBB \xBF \x00 " , /* UTF-8 */
" \x0E \xFE \xFF \x00 " , /* SCSU */
" \x00 \x00 \xFE \xFF " , /* UTF-32BE */
" \xFF \xFE \x00 \x00 " , /* UTF-32LE */
" \xFE \xFF " , /* UTF-16BE */
" \xFF \xFE " , /* UTF-16LE */
" \xEF \xBB \xBF " , /* UTF-8 */
" \x0E \xFE \xFF " , /* SCSU */
" \x00 \x00 \xFE \xFF " , /* UTF-32BE */
" \xFF \xFE \x00 \x00 " , /* UTF-32LE */
" \xFE \xFF \x41 \x42 " , /* UTF-16BE */
" \xFF \xFE \x41 \x41 " , /* UTF-16LE */
" \xEF \xBB \xBF \x41 " , /* UTF-8 */
" \x0E \xFE \xFF \x41 " , /* SCSU */
" \x00 \x00 \xFE \xFF \x41 " , /* UTF-32BE */
" \xFF \xFE \x00 \x00 \x42 " , /* UTF-32LE */
" \xFF \x41 \x42 " /* NULL */
} ;
int len [ ] = {
3 ,
3 ,
4 ,
4 ,
4 ,
4 ,
2 ,
2 ,
3 ,
3 ,
4 ,
4 ,
4 ,
4 ,
4 ,
4 ,
5 ,
5 ,
3
} ;
char * expected [ ] = {
" UTF-16BE " ,
" UTF-16LE " ,
" UTF-8 " ,
" SCSU " ,
" UTF-32BE " ,
" UTF-32LE " ,
" UTF-16BE " ,
" UTF-16LE " ,
" UTF-8 " ,
" SCSU " ,
" UTF-32BE " ,
" UTF-32LE " ,
" UTF-16BE " ,
" UTF-16LE " ,
" UTF-8 " ,
" SCSU " ,
" UTF-32BE " ,
" UTF-32LE " ,
NULL
} ;
int32_t expectedLength [ ] = {
2 ,
2 ,
3 ,
3 ,
4 ,
4 ,
2 ,
2 ,
3 ,
3 ,
4 ,
4 ,
2 ,
2 ,
3 ,
3 ,
4 ,
4 ,
0
} ;
int i = 0 ;
UErrorCode err ;
int32_t signatureLength = - 1 ;
int32_t sourceLength = - 1 ;
char * source = NULL ;
const char * enc = NULL ;
for ( ; i < sizeof ( data ) / sizeof ( char * ) ; i + + ) {
err = U_ZERO_ERROR ;
source = data [ i ] ;
sourceLength = len [ i ] ;
enc = ucnv_detectUnicodeSignature ( source , sourceLength , & signatureLength , & err ) ;
if ( U_FAILURE ( err ) ) {
log_err ( " ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Error: %s \n " , source , i , u_errorName ( err ) ) ;
continue ;
}
if ( enc = = NULL | | strcmp ( enc , expected [ i ] ) ! = 0 ) {
if ( expected [ i ] ! = NULL ) {
log_err ( " ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Expected: %s. Got: %s \n " , source , i , expected [ i ] , enc ) ;
continue ;
}
}
if ( signatureLength ! = expectedLength [ i ] ) {
log_err ( " ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i.Expected Length: %i. Got length: %i \n " , source , i , signatureLength , expectedLength [ i ] ) ;
}
}
}
}
2001-01-09 22:57:47 +00:00
void
static TestUTF7 ( ) {
/* test input */
static const uint8_t in [ ] = {
/* H - +Jjo- - ! +- +2AHcAQ */
0x48 ,
0x2d ,
0x2b , 0x4a , 0x6a , 0x6f ,
0x2d , 0x2d ,
0x21 ,
0x2b , 0x2d ,
0x2b , 0x32 , 0x41 , 0x48 , 0x63 , 0x41 , 0x51
} ;
/* expected test results */
static const uint32_t results [ ] = {
/* number of bytes read, code point */
1 , 0x48 ,
1 , 0x2d ,
4 , 0x263a , /* <WHITE SMILING FACE> */
2 , 0x2d ,
1 , 0x21 ,
2 , 0x2b ,
7 , 0x10401
} ;
2001-11-07 01:03:53 +00:00
const char * cnvName ;
2001-01-09 22:57:47 +00:00
const char * source = ( const char * ) in , * limit = ( const char * ) in + sizeof ( in ) ;
UErrorCode errorCode = U_ZERO_ERROR ;
UConverter * cnv = ucnv_open ( " UTF-7 " , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
log_err ( " Unable to open a UTF-7 converter: %s \n " , u_errorName ( errorCode ) ) ;
return ;
}
TestNextUChar ( cnv , source , limit , results , " UTF-7 " ) ;
/* Test the condition when source >= sourceLimit */
TestNextUCharError ( cnv , source , source , U_INDEX_OUTOFBOUNDS_ERROR , " sourceLimit <= source " ) ;
2001-11-07 01:03:53 +00:00
cnvName = ucnv_getName ( cnv , & errorCode ) ;
if ( U_FAILURE ( errorCode ) | | uprv_strcmp ( cnvName , " UTF-7 " ) ! = 0 ) {
log_err ( " UTF-7 converter is called %s: %s \n " , cnvName , u_errorName ( errorCode ) ) ;
}
2001-01-09 22:57:47 +00:00
ucnv_close ( cnv ) ;
}
2000-06-02 00:04:34 +00:00
void
2000-11-21 04:05:39 +00:00
static TestUTF8 ( ) {
2000-06-02 00:04:34 +00:00
/* test input */
static const uint8_t in [ ] = {
2000-06-22 01:18:30 +00:00
0x61 ,
2001-10-12 18:54:09 +00:00
0xc2 , 0x80 ,
0xe0 , 0xa0 , 0x80 ,
0xf0 , 0x90 , 0x80 , 0x80 ,
2000-07-19 20:14:27 +00:00
0xf4 , 0x84 , 0x8c , 0xa1 ,
2000-12-20 00:02:15 +00:00
0xf0 , 0x90 , 0x90 , 0x81
2000-06-02 00:04:34 +00:00
} ;
/* expected test results */
static const uint32_t results [ ] = {
/* number of bytes read, code point */
1 , 0x61 ,
2001-10-12 18:54:09 +00:00
2 , 0x80 ,
3 , 0x800 ,
4 , 0x10000 ,
2000-07-19 20:14:27 +00:00
4 , 0x104321 ,
2000-12-20 00:02:15 +00:00
4 , 0x10401
2000-06-22 01:18:30 +00:00
} ;
2001-01-10 02:20:30 +00:00
/* error test input */
static const uint8_t in2 [ ] = {
0x61 ,
2001-10-12 18:54:09 +00:00
0xc0 , 0x80 , /* illegal non-shortest form */
0xe0 , 0x80 , 0x80 , /* illegal non-shortest form */
0xf0 , 0x80 , 0x80 , 0x80 , /* illegal non-shortest form */
2001-01-10 02:20:30 +00:00
0xc0 , 0xc0 , /* illegal trail byte */
0xf4 , 0x90 , 0x80 , 0x80 , /* 0x110000 out of range */
0xf8 , 0x80 , 0x80 , 0x80 , 0x80 , /* too long */
2001-10-12 18:54:09 +00:00
0xfe , /* illegal byte altogether */
2001-01-10 02:20:30 +00:00
0x62
} ;
/* expected error test results */
static const uint32_t results2 [ ] = {
/* number of bytes read, code point */
1 , 0x61 ,
2001-10-12 18:54:09 +00:00
22 , 0x62
2001-01-10 02:20:30 +00:00
} ;
UConverterToUCallback cb ;
2001-06-26 22:51:14 +00:00
const void * p ;
2001-01-10 02:20:30 +00:00
2000-06-22 01:18:30 +00:00
const char * source = ( const char * ) in , * limit = ( const char * ) in + sizeof ( in ) ;
UErrorCode errorCode = U_ZERO_ERROR ;
UConverter * cnv = ucnv_open ( " UTF-8 " , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
log_err ( " Unable to open a UTF-8 converter: %s \n " , u_errorName ( errorCode ) ) ;
2000-10-27 00:10:17 +00:00
return ;
2000-06-22 01:18:30 +00:00
}
TestNextUChar ( cnv , source , limit , results , " UTF-8 " ) ;
2000-12-20 00:02:15 +00:00
/* Test the condition when source >= sourceLimit */
TestNextUCharError ( cnv , source , source , U_INDEX_OUTOFBOUNDS_ERROR , " sourceLimit <= source " ) ;
2001-01-10 02:20:30 +00:00
/* test error behavior with a skip callback */
ucnv_setToUCallBack ( cnv , UCNV_TO_U_CALLBACK_SKIP , NULL , & cb , & p , & errorCode ) ;
2001-01-23 19:43:21 +00:00
source = ( const char * ) in2 ;
limit = ( const char * ) ( in2 + sizeof ( in2 ) ) ;
2001-01-10 02:20:30 +00:00
TestNextUChar ( cnv , source , limit , results2 , " UTF-8 " ) ;
2000-06-22 01:18:30 +00:00
ucnv_close ( cnv ) ;
}
2000-11-21 04:05:39 +00:00
2000-06-22 01:18:30 +00:00
void
2000-11-21 04:05:39 +00:00
static TestUTF16BE ( ) {
2000-06-22 01:18:30 +00:00
/* test input */
static const uint8_t in [ ] = {
2001-04-18 19:31:05 +00:00
0x00 , 0x61 ,
0x00 , 0xc0 ,
0x00 , 0x31 ,
0x00 , 0xf4 ,
2000-06-22 01:18:30 +00:00
0xce , 0xfe ,
2000-12-20 00:02:15 +00:00
0xd8 , 0x01 , 0xdc , 0x01
2000-06-02 00:04:34 +00:00
} ;
2000-06-22 01:18:30 +00:00
/* expected test results */
static const uint32_t results [ ] = {
/* number of bytes read, code point */
2 , 0x61 ,
2 , 0xc0 ,
2001-04-18 19:31:05 +00:00
2 , 0x31 ,
2000-06-22 01:18:30 +00:00
2 , 0xf4 ,
2000-07-18 18:04:50 +00:00
2 , 0xcefe ,
2000-12-20 00:02:15 +00:00
4 , 0x10401
2000-06-22 01:18:30 +00:00
} ;
2000-06-02 00:04:34 +00:00
2000-06-22 01:18:30 +00:00
const char * source = ( const char * ) in , * limit = ( const char * ) in + sizeof ( in ) ;
2000-06-02 00:04:34 +00:00
UErrorCode errorCode = U_ZERO_ERROR ;
2000-06-22 01:18:30 +00:00
UConverter * cnv = ucnv_open ( " utf-16be " , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
log_err ( " Unable to open a UTF16-BE converter: %s \n " , u_errorName ( errorCode ) ) ;
2000-10-27 00:10:17 +00:00
return ;
2000-06-22 01:18:30 +00:00
}
TestNextUChar ( cnv , source , limit , results , " UTF-16BE " ) ;
2000-12-20 00:02:15 +00:00
/* Test the condition when source >= sourceLimit */
TestNextUCharError ( cnv , source , source , U_INDEX_OUTOFBOUNDS_ERROR , " sourceLimit <= source " ) ;
2000-06-22 23:46:02 +00:00
/*Test for the condition where there is an invalid character*/
{
static const uint8_t source2 [ ] = { 0x61 } ;
TestNextUCharError ( cnv , ( const char * ) source2 , ( const char * ) source2 + sizeof ( source2 ) , U_TRUNCATED_CHAR_FOUND , " an invalid character " ) ;
}
/*Test for the condition where there is a surrogate pair*/
{
2000-07-18 18:04:50 +00:00
const uint8_t source2 [ ] = { 0xd8 , 0x01 } ;
2001-04-18 19:31:05 +00:00
TestNextUCharError ( cnv , ( const char * ) source2 , ( const char * ) source2 + sizeof ( source2 ) , U_TRUNCATED_CHAR_FOUND , " an truncated surrogate character " ) ;
2000-06-22 23:46:02 +00:00
}
2000-06-22 01:18:30 +00:00
ucnv_close ( cnv ) ;
}
2000-11-21 04:05:39 +00:00
static void
2000-06-22 01:18:30 +00:00
TestUTF16LE ( ) {
/* test input */
static const uint8_t in [ ] = {
0x61 , 0x00 ,
0x31 , 0x00 ,
2001-04-18 19:31:05 +00:00
0x4e , 0x2e ,
2000-06-22 01:18:30 +00:00
0x4e , 0x00 ,
2000-12-20 00:02:15 +00:00
0x01 , 0xd8 , 0x01 , 0xdc
2000-06-22 01:18:30 +00:00
} ;
2000-06-02 00:04:34 +00:00
2000-06-22 01:18:30 +00:00
/* expected test results */
static const uint32_t results [ ] = {
/* number of bytes read, code point */
2 , 0x61 ,
2 , 0x31 ,
2 , 0x2e4e ,
2 , 0x4e ,
2000-12-20 00:02:15 +00:00
4 , 0x10401
2000-06-22 01:18:30 +00:00
} ;
const char * source = ( const char * ) in , * limit = ( const char * ) in + sizeof ( in ) ;
UErrorCode errorCode = U_ZERO_ERROR ;
UConverter * cnv = ucnv_open ( " utf-16le " , & errorCode ) ;
2000-06-02 00:04:34 +00:00
if ( U_FAILURE ( errorCode ) ) {
2000-06-22 01:18:30 +00:00
log_err ( " Unable to open a UTF16-LE converter: %s \n " , u_errorName ( errorCode ) ) ;
2000-10-27 00:10:17 +00:00
return ;
2000-06-02 00:04:34 +00:00
}
2000-06-22 01:18:30 +00:00
TestNextUChar ( cnv , source , limit , results , " UTF-16LE " ) ;
2000-12-20 00:02:15 +00:00
/* Test the condition when source >= sourceLimit */
TestNextUCharError ( cnv , source , source , U_INDEX_OUTOFBOUNDS_ERROR , " sourceLimit <= source " ) ;
2000-06-22 23:46:02 +00:00
/*Test for the condition where there is an invalid character*/
{
static const uint8_t source2 [ ] = { 0x61 } ;
TestNextUCharError ( cnv , ( const char * ) source2 , ( const char * ) source2 + sizeof ( source2 ) , U_TRUNCATED_CHAR_FOUND , " an invalid character " ) ;
}
2000-07-18 18:04:50 +00:00
/*Test for the condition where there is a surrogate character*/
{
static const uint8_t source2 [ ] = { 0x01 , 0xd8 } ;
TestNextUCharError ( cnv , ( const char * ) source2 , ( const char * ) source2 + sizeof ( source2 ) , U_TRUNCATED_CHAR_FOUND , " an truncated surrogate character " ) ;
}
2001-04-18 19:31:05 +00:00
2000-06-22 01:18:30 +00:00
ucnv_close ( cnv ) ;
}
2000-11-21 04:05:39 +00:00
2000-12-20 00:02:15 +00:00
static void
TestUTF32BE ( ) {
/* test input */
static const uint8_t in [ ] = {
0x00 , 0x00 , 0x00 , 0x61 ,
2001-01-25 20:14:08 +00:00
0x00 , 0x00 , 0xdc , 0x00 ,
2000-12-20 00:02:15 +00:00
0x00 , 0x00 , 0xd8 , 0x00 ,
0x00 , 0x00 , 0xdf , 0xff ,
0x00 , 0x00 , 0xff , 0xfd ,
2001-01-25 20:14:08 +00:00
0x00 , 0x10 , 0xab , 0xcd ,
0x00 , 0x10 , 0xff , 0xff
2000-12-20 00:02:15 +00:00
} ;
/* expected test results */
static const uint32_t results [ ] = {
/* number of bytes read, code point */
4 , 0x61 ,
2001-01-25 20:14:08 +00:00
4 , 0xdc00 ,
2000-12-20 00:02:15 +00:00
4 , 0xd800 ,
4 , 0xdfff ,
4 , 0xfffd ,
2001-01-25 20:14:08 +00:00
4 , 0x10abcd ,
4 , 0x10ffff
2000-12-20 00:02:15 +00:00
} ;
2001-01-10 02:20:30 +00:00
/* error test input */
static const uint8_t in2 [ ] = {
0x00 , 0x00 , 0x00 , 0x61 ,
0x00 , 0x11 , 0x00 , 0x00 , /* 0x110000 out of range */
2001-01-25 20:14:08 +00:00
0x00 , 0x00 , 0x00 , 0x62 ,
0xff , 0xff , 0xff , 0xff , /* 0xffffffff out of range */
2001-01-25 20:19:43 +00:00
0x7f , 0xff , 0xff , 0xff , /* 0x7fffffff out of range */
2001-01-25 20:14:08 +00:00
0x00 , 0x00 , 0x01 , 0x62 ,
0x00 , 0x00 , 0x02 , 0x62
2001-01-10 02:20:30 +00:00
} ;
/* expected error test results */
static const uint32_t results2 [ ] = {
/* number of bytes read, code point */
2001-01-25 20:19:43 +00:00
4 , 0x61 ,
8 , 0x62 ,
12 , 0x162 ,
4 , 0x262
2001-01-10 02:20:30 +00:00
} ;
UConverterToUCallback cb ;
2001-06-26 22:51:14 +00:00
const void * p ;
2001-01-10 02:20:30 +00:00
2000-12-20 00:02:15 +00:00
const char * source = ( const char * ) in , * limit = ( const char * ) in + sizeof ( in ) ;
UErrorCode errorCode = U_ZERO_ERROR ;
UConverter * cnv = ucnv_open ( " UTF-32BE " , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
log_err ( " Unable to open a UTF-32BE converter: %s \n " , u_errorName ( errorCode ) ) ;
return ;
}
TestNextUChar ( cnv , source , limit , results , " UTF-32BE " ) ;
/* Test the condition when source >= sourceLimit */
TestNextUCharError ( cnv , source , source , U_INDEX_OUTOFBOUNDS_ERROR , " sourceLimit <= source " ) ;
2001-01-10 02:20:30 +00:00
/* test error behavior with a skip callback */
ucnv_setToUCallBack ( cnv , UCNV_TO_U_CALLBACK_SKIP , NULL , & cb , & p , & errorCode ) ;
2001-01-23 19:43:21 +00:00
source = ( const char * ) in2 ;
limit = ( const char * ) ( in2 + sizeof ( in2 ) ) ;
2001-01-10 02:20:30 +00:00
TestNextUChar ( cnv , source , limit , results2 , " UTF-32BE " ) ;
2000-12-20 00:02:15 +00:00
ucnv_close ( cnv ) ;
}
static void
TestUTF32LE ( ) {
/* test input */
static const uint8_t in [ ] = {
0x61 , 0x00 , 0x00 , 0x00 ,
2001-01-25 20:14:08 +00:00
0x00 , 0xdc , 0x00 , 0x00 ,
2000-12-20 00:02:15 +00:00
0x00 , 0xd8 , 0x00 , 0x00 ,
0xff , 0xdf , 0x00 , 0x00 ,
0xfd , 0xff , 0x00 , 0x00 ,
2001-01-25 20:14:08 +00:00
0xcd , 0xab , 0x10 , 0x00 ,
0xff , 0xff , 0x10 , 0x00
2000-12-20 00:02:15 +00:00
} ;
/* expected test results */
static const uint32_t results [ ] = {
/* number of bytes read, code point */
4 , 0x61 ,
2001-01-25 20:14:08 +00:00
4 , 0xdc00 ,
2000-12-20 00:02:15 +00:00
4 , 0xd800 ,
4 , 0xdfff ,
4 , 0xfffd ,
2001-01-25 20:14:08 +00:00
4 , 0x10abcd ,
4 , 0x10ffff
2000-12-20 00:02:15 +00:00
} ;
2001-01-10 02:20:30 +00:00
/* error test input */
static const uint8_t in2 [ ] = {
0x61 , 0x00 , 0x00 , 0x00 ,
0x00 , 0x00 , 0x11 , 0x00 , /* 0x110000 out of range */
2001-01-25 20:14:08 +00:00
0x62 , 0x00 , 0x00 , 0x00 ,
0xff , 0xff , 0xff , 0xff , /* 0xffffffff out of range */
2001-01-25 20:19:43 +00:00
0xff , 0xff , 0xff , 0x7f , /* 0x7fffffff out of range */
2001-01-25 20:14:08 +00:00
0x62 , 0x01 , 0x00 , 0x00 ,
0x62 , 0x02 , 0x00 , 0x00 ,
2001-01-10 02:20:30 +00:00
} ;
/* expected error test results */
static const uint32_t results2 [ ] = {
/* number of bytes read, code point */
2001-01-25 20:19:43 +00:00
4 , 0x61 ,
8 , 0x62 ,
12 , 0x162 ,
4 , 0x262 ,
2001-01-10 02:20:30 +00:00
} ;
UConverterToUCallback cb ;
2001-06-26 22:51:14 +00:00
const void * p ;
2001-01-10 02:20:30 +00:00
2000-12-20 00:02:15 +00:00
const char * source = ( const char * ) in , * limit = ( const char * ) in + sizeof ( in ) ;
UErrorCode errorCode = U_ZERO_ERROR ;
UConverter * cnv = ucnv_open ( " UTF-32LE " , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
log_err ( " Unable to open a UTF-32LE converter: %s \n " , u_errorName ( errorCode ) ) ;
return ;
}
TestNextUChar ( cnv , source , limit , results , " UTF-32LE " ) ;
/* Test the condition when source >= sourceLimit */
TestNextUCharError ( cnv , source , source , U_INDEX_OUTOFBOUNDS_ERROR , " sourceLimit <= source " ) ;
2001-01-10 02:20:30 +00:00
/* test error behavior with a skip callback */
ucnv_setToUCallBack ( cnv , UCNV_TO_U_CALLBACK_SKIP , NULL , & cb , & p , & errorCode ) ;
2001-01-23 19:43:21 +00:00
source = ( const char * ) in2 ;
limit = ( const char * ) ( in2 + sizeof ( in2 ) ) ;
2001-01-10 02:20:30 +00:00
TestNextUChar ( cnv , source , limit , results2 , " UTF-32LE " ) ;
2000-12-20 00:02:15 +00:00
ucnv_close ( cnv ) ;
}
2000-11-21 04:05:39 +00:00
static void
2000-06-22 01:18:30 +00:00
TestLATIN1 ( ) {
/* test input */
2001-04-18 19:31:05 +00:00
static const uint8_t in [ ] = {
2000-06-22 01:18:30 +00:00
0x61 ,
0x31 ,
0x32 ,
2001-04-18 19:31:05 +00:00
0xc0 ,
2000-06-22 01:18:30 +00:00
0xf0 ,
2001-04-18 19:31:05 +00:00
0xf4 ,
2000-06-22 01:18:30 +00:00
} ;
2000-06-02 00:04:34 +00:00
2000-06-22 01:18:30 +00:00
/* expected test results */
static const uint32_t results [ ] = {
/* number of bytes read, code point */
1 , 0x61 ,
1 , 0x31 ,
1 , 0x32 ,
2000-06-26 22:16:32 +00:00
1 , 0xc0 ,
1 , 0xf0 ,
1 , 0xf4 ,
2000-06-22 01:18:30 +00:00
} ;
2001-07-14 02:29:21 +00:00
static const uint16_t in1 [ ] = {
0x08 , 0x00 , 0x1b , 0x4c , 0xea , 0x16 , 0xca , 0xd3 , 0x94 , 0x0f , 0x53 , 0xef , 0x61 , 0x1b , 0xe5 , 0x84 ,
0xc4 , 0x0f , 0x53 , 0xef , 0x61 , 0x1b , 0xe5 , 0x84 , 0xc4 , 0x16 , 0xca , 0xd3 , 0x94 , 0x08 , 0x02 , 0x0f ,
0x53 , 0x4a , 0x4e , 0x16 , 0x7d , 0x00 , 0x30 , 0x82 , 0x52 , 0x4d , 0x30 , 0x6b , 0x6d , 0x41 , 0x88 , 0x4c ,
0xe5 , 0x97 , 0x9f , 0x08 , 0x0c , 0x16 , 0xca , 0xd3 , 0x94 , 0x15 , 0xae , 0x0e , 0x6b , 0x4c , 0x08 , 0x0d ,
0x8c , 0xb4 , 0xa3 , 0x9f , 0xca , 0x99 , 0xcb , 0x8b , 0xc2 , 0x97 , 0xcc , 0xaa , 0x84 , 0x08 , 0x02 , 0x0e ,
0x7c , 0x73 , 0xe2 , 0x16 , 0xa3 , 0xb7 , 0xcb , 0x93 , 0xd3 , 0xb4 , 0xc5 , 0xdc , 0x9f , 0x0e , 0x79 , 0x3e ,
0x06 , 0xae , 0xb1 , 0x9d , 0x93 , 0xd3 , 0x08 , 0x0c , 0xbe , 0xa3 , 0x8f , 0x08 , 0x88 , 0xbe , 0xa3 , 0x8d ,
0xd3 , 0xa8 , 0xa3 , 0x97 , 0xc5 , 0x17 , 0x89 , 0x08 , 0x0d , 0x15 , 0xd2 , 0x08 , 0x01 , 0x93 , 0xc8 , 0xaa ,
0x8f , 0x0e , 0x61 , 0x1b , 0x99 , 0xcb , 0x0e , 0x4e , 0xba , 0x9f , 0xa1 , 0xae , 0x93 , 0xa8 , 0xa0 , 0x08 ,
0x02 , 0x08 , 0x0c , 0xe2 , 0x16 , 0xa3 , 0xb7 , 0xcb , 0x0f , 0x4f , 0xe1 , 0x80 , 0x05 , 0xec , 0x60 , 0x8d ,
0xea , 0x06 , 0xd3 , 0xe6 , 0x0f , 0x8a , 0x00 , 0x30 , 0x44 , 0x65 , 0xb9 , 0xe4 , 0xfe , 0xe7 , 0xc2 , 0x06 ,
0xcb , 0x82
} ;
static const uint8_t out1 [ ] = {
0x08 , 0x00 , 0x1b , 0x4c , 0xea , 0x16 , 0xca , 0xd3 , 0x94 , 0x0f , 0x53 , 0xef , 0x61 , 0x1b , 0xe5 , 0x84 ,
0xc4 , 0x0f , 0x53 , 0xef , 0x61 , 0x1b , 0xe5 , 0x84 , 0xc4 , 0x16 , 0xca , 0xd3 , 0x94 , 0x08 , 0x02 , 0x0f ,
0x53 , 0x4a , 0x4e , 0x16 , 0x7d , 0x00 , 0x30 , 0x82 , 0x52 , 0x4d , 0x30 , 0x6b , 0x6d , 0x41 , 0x88 , 0x4c ,
0xe5 , 0x97 , 0x9f , 0x08 , 0x0c , 0x16 , 0xca , 0xd3 , 0x94 , 0x15 , 0xae , 0x0e , 0x6b , 0x4c , 0x08 , 0x0d ,
0x8c , 0xb4 , 0xa3 , 0x9f , 0xca , 0x99 , 0xcb , 0x8b , 0xc2 , 0x97 , 0xcc , 0xaa , 0x84 , 0x08 , 0x02 , 0x0e ,
0x7c , 0x73 , 0xe2 , 0x16 , 0xa3 , 0xb7 , 0xcb , 0x93 , 0xd3 , 0xb4 , 0xc5 , 0xdc , 0x9f , 0x0e , 0x79 , 0x3e ,
0x06 , 0xae , 0xb1 , 0x9d , 0x93 , 0xd3 , 0x08 , 0x0c , 0xbe , 0xa3 , 0x8f , 0x08 , 0x88 , 0xbe , 0xa3 , 0x8d ,
0xd3 , 0xa8 , 0xa3 , 0x97 , 0xc5 , 0x17 , 0x89 , 0x08 , 0x0d , 0x15 , 0xd2 , 0x08 , 0x01 , 0x93 , 0xc8 , 0xaa ,
0x8f , 0x0e , 0x61 , 0x1b , 0x99 , 0xcb , 0x0e , 0x4e , 0xba , 0x9f , 0xa1 , 0xae , 0x93 , 0xa8 , 0xa0 , 0x08 ,
0x02 , 0x08 , 0x0c , 0xe2 , 0x16 , 0xa3 , 0xb7 , 0xcb , 0x0f , 0x4f , 0xe1 , 0x80 , 0x05 , 0xec , 0x60 , 0x8d ,
0xea , 0x06 , 0xd3 , 0xe6 , 0x0f , 0x8a , 0x00 , 0x30 , 0x44 , 0x65 , 0xb9 , 0xe4 , 0xfe , 0xe7 , 0xc2 , 0x06 ,
0xcb , 0x82
} ;
static const uint16_t in2 [ ] = {
0x1B , 0x24 , 0x29 , 0x47 , 0x0E , 0x23 , 0x21 , 0x23 , 0x22 , 0x23 ,
0x23 , 0x23 , 0x24 , 0x23 , 0x25 , 0x23 , 0x26 , 0x23 , 0x27 , 0x23 ,
0x28 , 0x23 , 0x29 , 0x23 , 0x2A , 0x23 , 0x2B , 0x0F , 0x2F , 0x2A ,
0x70 , 0x6C , 0x61 , 0x6E , 0x65 , 0x20 , 0x31 , 0x20 , 0x2A , 0x2F ,
0x0D , 0x0A , 0x1B , 0x24 , 0x2A , 0x48 , 0x1B , 0x4E , 0x22 , 0x21 ,
0x1B , 0x4E , 0x22 , 0x22 , 0x1B , 0x4E , 0x22 , 0x23 , 0x1B , 0x4E ,
0x22 , 0x24 , 0x1B , 0x4E , 0x22 , 0x25 , 0x0F , 0x2F , 0x2A , 0x70 ,
0x6C , 0x61 , 0x6E , 0x65 , 0x32 , 0x2A , 0x2F , 0x20 , 0x0D , 0x0A ,
0x1B , 0x24 , 0x2B , 0x49 , 0x1B , 0x4F , 0x22 , 0x44 , 0x1B , 0x4F ,
0x22 , 0x45 , 0x1B , 0x4F , 0x22 , 0x46 , 0x1B , 0x4F , 0x22 , 0x47 ,
0x1B , 0x4F , 0x22 , 0x48 , 0x0F , 0x2F , 0x2A , 0x70 , 0x6C , 0x61 ,
0x6E , 0x65 , 0x20 , 0x33 , 0x20 , 0x2A , 0x2F , 0x0D , 0x0A , 0x1B ,
0x24 , 0x2B , 0x4A , 0x1B , 0x4F , 0x21 , 0x44 , 0x1B , 0x4F , 0x21 ,
0x45 , 0x1B , 0x4F , 0x22 , 0x6A , 0x1B , 0x4F , 0x22 , 0x6B , 0x1B ,
0x4F , 0x22 , 0x6C , 0x0F , 0x2F , 0x2A , 0x70 , 0x6C , 0x61 , 0x6E ,
0x65 , 0x20 , 0x34 , 0x20 , 0x2A , 0x2F , 0x0D , 0x0A , 0x1B , 0x24 ,
0x2B , 0x4B , 0x1B , 0x4F , 0x21 , 0x74 , 0x1B , 0x4F , 0x22 , 0x50 ,
0x1B , 0x4F , 0x22 , 0x51 , 0x1B , 0x4F , 0x23 , 0x37 , 0x1B , 0x4F ,
0x22 , 0x5C , 0x0F , 0x2F , 0x2A , 0x70 , 0x6C , 0x61 , 0x6E ,
0x65 , 0x20 , 0x35 , 0x20 , 0x2A , 0x2F , 0x0D , 0x0A , 0x1B , 0x24 ,
0x2B , 0x4C , 0x1B , 0x4F , 0x21 , 0x23 , 0x1B , 0x4F , 0x22 , 0x2C ,
0x1B , 0x4F , 0x23 , 0x4E , 0x1B , 0x4F , 0x21 , 0x6E , 0x1B , 0x4F ,
0x23 , 0x71 , 0x0F , 0x2F , 0x2A , 0x70 , 0x6C , 0x61 , 0x6E , 0x65 ,
0x20 , 0x36 , 0x20 , 0x2A , 0x2F , 0x0D , 0x0A , 0x1B , 0x24 , 0x2B ,
0x4D , 0x1B , 0x4F , 0x22 , 0x71 , 0x1B , 0x4F , 0x21 , 0x4E , 0x1B ,
0x4F , 0x21 , 0x6A , 0x1B , 0x4F , 0x23 , 0x3A , 0x1B , 0x4F , 0x23 ,
0x6F , 0x0F , 0x2F , 0x2A , 0x70 , 0x6C , 0x61 , 0x6E , 0x65 , 0x20 ,
0x37 , 0x20 , 0x2A , 0x2F ,
} ;
static const unsigned char out2 [ ] = {
0x1B , 0x24 , 0x29 , 0x47 , 0x0E , 0x23 , 0x21 , 0x23 , 0x22 , 0x23 ,
0x23 , 0x23 , 0x24 , 0x23 , 0x25 , 0x23 , 0x26 , 0x23 , 0x27 , 0x23 ,
0x28 , 0x23 , 0x29 , 0x23 , 0x2A , 0x23 , 0x2B , 0x0F , 0x2F , 0x2A ,
0x70 , 0x6C , 0x61 , 0x6E , 0x65 , 0x20 , 0x31 , 0x20 , 0x2A , 0x2F ,
0x0D , 0x0A , 0x1B , 0x24 , 0x2A , 0x48 , 0x1B , 0x4E , 0x22 , 0x21 ,
0x1B , 0x4E , 0x22 , 0x22 , 0x1B , 0x4E , 0x22 , 0x23 , 0x1B , 0x4E ,
0x22 , 0x24 , 0x1B , 0x4E , 0x22 , 0x25 , 0x0F , 0x2F , 0x2A , 0x70 ,
0x6C , 0x61 , 0x6E , 0x65 , 0x32 , 0x2A , 0x2F , 0x20 , 0x0D , 0x0A ,
0x1B , 0x24 , 0x2B , 0x49 , 0x1B , 0x4F , 0x22 , 0x44 , 0x1B , 0x4F ,
0x22 , 0x45 , 0x1B , 0x4F , 0x22 , 0x46 , 0x1B , 0x4F , 0x22 , 0x47 ,
0x1B , 0x4F , 0x22 , 0x48 , 0x0F , 0x2F , 0x2A , 0x70 , 0x6C , 0x61 ,
0x6E , 0x65 , 0x20 , 0x33 , 0x20 , 0x2A , 0x2F , 0x0D , 0x0A , 0x1B ,
0x24 , 0x2B , 0x4A , 0x1B , 0x4F , 0x21 , 0x44 , 0x1B , 0x4F , 0x21 ,
0x45 , 0x1B , 0x4F , 0x22 , 0x6A , 0x1B , 0x4F , 0x22 , 0x6B , 0x1B ,
0x4F , 0x22 , 0x6C , 0x0F , 0x2F , 0x2A , 0x70 , 0x6C , 0x61 , 0x6E ,
0x65 , 0x20 , 0x34 , 0x20 , 0x2A , 0x2F , 0x0D , 0x0A , 0x1B , 0x24 ,
0x2B , 0x4B , 0x1B , 0x4F , 0x21 , 0x74 , 0x1B , 0x4F , 0x22 , 0x50 ,
0x1B , 0x4F , 0x22 , 0x51 , 0x1B , 0x4F , 0x23 , 0x37 , 0x1B , 0x4F ,
0x22 , 0x5C , 0x0F , 0x2F , 0x2A , 0x70 , 0x6C , 0x61 , 0x6E ,
0x65 , 0x20 , 0x35 , 0x20 , 0x2A , 0x2F , 0x0D , 0x0A , 0x1B , 0x24 ,
0x2B , 0x4C , 0x1B , 0x4F , 0x21 , 0x23 , 0x1B , 0x4F , 0x22 , 0x2C ,
0x1B , 0x4F , 0x23 , 0x4E , 0x1B , 0x4F , 0x21 , 0x6E , 0x1B , 0x4F ,
0x23 , 0x71 , 0x0F , 0x2F , 0x2A , 0x70 , 0x6C , 0x61 , 0x6E , 0x65 ,
0x20 , 0x36 , 0x20 , 0x2A , 0x2F , 0x0D , 0x0A , 0x1B , 0x24 , 0x2B ,
0x4D , 0x1B , 0x4F , 0x22 , 0x71 , 0x1B , 0x4F , 0x21 , 0x4E , 0x1B ,
0x4F , 0x21 , 0x6A , 0x1B , 0x4F , 0x23 , 0x3A , 0x1B , 0x4F , 0x23 ,
0x6F , 0x0F , 0x2F , 0x2A , 0x70 , 0x6C , 0x61 , 0x6E , 0x65 , 0x20 ,
0x37 , 0x20 , 0x2A , 0x2F ,
} ;
2000-06-22 01:18:30 +00:00
const char * source = ( const char * ) in ;
const char * limit = ( const char * ) in + sizeof ( in ) ;
UErrorCode errorCode = U_ZERO_ERROR ;
UConverter * cnv = ucnv_open ( " LATIN_1 " , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
log_err ( " Unable to open a LATIN_1 converter: %s \n " , u_errorName ( errorCode ) ) ;
2000-10-27 00:10:17 +00:00
return ;
2000-06-02 00:04:34 +00:00
}
2000-06-22 01:18:30 +00:00
TestNextUChar ( cnv , source , limit , results , " LATIN_1 " ) ;
2000-12-20 00:02:15 +00:00
/* Test the condition when source >= sourceLimit */
TestNextUCharError ( cnv , source , source , U_INDEX_OUTOFBOUNDS_ERROR , " sourceLimit <= source " ) ;
2001-08-03 16:08:41 +00:00
TestConv ( ( uint16_t * ) in1 , sizeof ( in1 ) / 2 , " LATIN_1 " , " LATIN-1 " , ( char * ) out1 , sizeof ( out1 ) ) ;
TestConv ( ( uint16_t * ) in2 , sizeof ( in2 ) / 2 , " ASCII " , " ASCII " , ( char * ) out2 , sizeof ( out2 ) ) ;
2001-07-14 02:29:21 +00:00
2000-06-22 01:18:30 +00:00
ucnv_close ( cnv ) ;
}
2000-11-21 04:05:39 +00:00
static void
2000-06-22 01:18:30 +00:00
TestSBCS ( ) {
/* test input */
static const uint8_t in [ ] = { 0x61 , 0xc0 , 0x80 , 0xe0 , 0xf0 , 0xf4 } ;
/* expected test results */
static const uint32_t results [ ] = {
/* number of bytes read, code point */
1 , 0x61 ,
1 , 0xbf ,
1 , 0xc4 ,
1 , 0x2021 ,
2000-10-27 23:55:56 +00:00
1 , 0xf8ff ,
2000-06-22 01:18:30 +00:00
1 , 0x00d9
} ;
2000-06-02 00:04:34 +00:00
2000-06-22 01:18:30 +00:00
const char * source = ( const char * ) in , * limit = ( const char * ) in + sizeof ( in ) ;
UErrorCode errorCode = U_ZERO_ERROR ;
UConverter * cnv = ucnv_open ( " ibm-1281 " , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
log_err ( " Unable to open a SBCS(ibm-1281) converter: %s \n " , u_errorName ( errorCode ) ) ;
2000-10-27 00:10:17 +00:00
return ;
2000-06-22 01:18:30 +00:00
}
TestNextUChar ( cnv , source , limit , results , " SBCS(ibm-1281) " ) ;
2000-12-20 00:02:15 +00:00
/* Test the condition when source >= sourceLimit */
TestNextUCharError ( cnv , source , source , U_INDEX_OUTOFBOUNDS_ERROR , " sourceLimit <= source " ) ;
2001-03-05 18:39:03 +00:00
/*Test for Illegal character */ /*
2000-06-22 23:46:02 +00:00
{
static const uint8_t input1 [ ] = { 0xA1 } ;
const char * illegalsource = ( const char * ) input1 ;
TestNextUCharError ( cnv , illegalsource , illegalsource + sizeof ( illegalsource ) , U_INVALID_CHAR_FOUND , " source has a illegal characte " ) ;
}
*/
2000-07-06 23:01:50 +00:00
ucnv_close ( cnv ) ;
2000-06-02 00:04:34 +00:00
}
2000-06-22 23:46:02 +00:00
2000-11-21 04:05:39 +00:00
static void
2000-06-22 01:18:30 +00:00
TestDBCS ( ) {
/* test input */
static const uint8_t in [ ] = {
0x44 , 0x6a ,
0xc4 , 0x9c ,
0x7a , 0x74 ,
0x46 , 0xab ,
0x42 , 0x5b ,
2001-04-18 19:31:05 +00:00
2000-06-22 01:18:30 +00:00
} ;
2000-06-02 00:04:34 +00:00
2000-06-22 01:18:30 +00:00
/* expected test results */
static const uint32_t results [ ] = {
/* number of bytes read, code point */
2 , 0x00a7 ,
2 , 0xe1d2 ,
2 , 0x6962 ,
2 , 0xf842 ,
2001-04-18 19:31:05 +00:00
2 , 0xffe5 ,
2000-06-22 01:18:30 +00:00
} ;
const char * source = ( const char * ) in , * limit = ( const char * ) in + sizeof ( in ) ;
UErrorCode errorCode = U_ZERO_ERROR ;
2001-04-18 19:31:05 +00:00
2000-06-22 01:18:30 +00:00
UConverter * cnv = ucnv_open ( " ibm-9027 " , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
log_err ( " Unable to open a DBCS(ibm-9027) converter: %s \n " , u_errorName ( errorCode ) ) ;
2000-10-27 00:10:17 +00:00
return ;
2000-06-22 01:18:30 +00:00
}
TestNextUChar ( cnv , source , limit , results , " DBCS(ibm-9027) " ) ;
2000-12-20 00:02:15 +00:00
/* Test the condition when source >= sourceLimit */
TestNextUCharError ( cnv , source , source , U_INDEX_OUTOFBOUNDS_ERROR , " sourceLimit <= source " ) ;
2000-06-22 23:46:02 +00:00
/*Test for the condition where we have a truncated char*/
{
static const uint8_t source1 [ ] = { 0xc4 } ;
TestNextUCharError ( cnv , ( const char * ) source1 , ( const char * ) source1 + sizeof ( source1 ) , U_TRUNCATED_CHAR_FOUND , " a character is truncated " ) ;
}
/*Test for the condition where there is an invalid character*/
{
static const uint8_t source2 [ ] = { 0x1a , 0x1b } ;
TestNextUCharError ( cnv , ( const char * ) source2 , ( const char * ) source2 + sizeof ( source2 ) , U_ZERO_ERROR , " an invalid character " ) ;
}
2000-06-22 01:18:30 +00:00
ucnv_close ( cnv ) ;
}
2000-11-21 04:05:39 +00:00
static void
2000-06-22 01:18:30 +00:00
TestMBCS ( ) {
/* test input */
static const uint8_t in [ ] = {
0x01 ,
0xa6 , 0xa3 ,
0x00 ,
0xa6 , 0xa1 ,
0x08 ,
0xc2 , 0x76 ,
2001-04-18 19:31:05 +00:00
0xc2 , 0x78 ,
2000-06-22 01:18:30 +00:00
} ;
/* expected test results */
static const uint32_t results [ ] = {
/* number of bytes read, code point */
1 , 0x0001 ,
2 , 0x250c ,
1 , 0x0000 ,
2 , 0x2500 ,
2001-04-18 19:31:05 +00:00
1 , 0x0008 ,
2000-06-22 01:18:30 +00:00
2 , 0xd60c ,
2000-07-19 20:14:27 +00:00
2 , 0xd60e ,
2000-06-22 01:18:30 +00:00
} ;
const char * source = ( const char * ) in , * limit = ( const char * ) in + sizeof ( in ) ;
UErrorCode errorCode = U_ZERO_ERROR ;
2001-04-18 19:31:05 +00:00
2000-06-22 01:18:30 +00:00
UConverter * cnv = ucnv_open ( " ibm-1363 " , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
log_err ( " Unable to open a MBCS(ibm-1363) converter: %s \n " , u_errorName ( errorCode ) ) ;
2000-10-27 00:10:17 +00:00
return ;
2000-06-22 01:18:30 +00:00
}
TestNextUChar ( cnv , source , limit , results , " MBCS(ibm-1363) " ) ;
2000-12-20 00:02:15 +00:00
/* Test the condition when source >= sourceLimit */
TestNextUCharError ( cnv , source , source , U_INDEX_OUTOFBOUNDS_ERROR , " sourceLimit <= source " ) ;
2000-06-22 23:46:02 +00:00
/*Test for the condition where we have a truncated char*/
{
static const uint8_t source1 [ ] = { 0xc4 } ;
TestNextUCharError ( cnv , ( const char * ) source1 , ( const char * ) source1 + sizeof ( source1 ) , U_TRUNCATED_CHAR_FOUND , " a character is truncated " ) ;
}
/*Test for the condition where there is an invalid character*/
{
static const uint8_t source2 [ ] = { 0xa1 , 0x01 } ;
TestNextUCharError ( cnv , ( const char * ) source2 , ( const char * ) source2 + sizeof ( source2 ) , U_ZERO_ERROR , " an invalid character " ) ;
}
2000-06-22 01:18:30 +00:00
ucnv_close ( cnv ) ;
2000-08-15 18:05:12 +00:00
2000-06-22 01:18:30 +00:00
}
2000-11-21 04:05:39 +00:00
static void
2000-06-22 01:18:30 +00:00
TestISO_2022 ( ) {
2000-01-19 19:00:53 +00:00
/* test input */
static const uint8_t in [ ] = {
2000-06-22 01:18:30 +00:00
0x1b , 0x25 , 0x42 , 0x31 ,
0x32 ,
2000-01-19 19:00:53 +00:00
0x61 ,
2001-10-12 18:54:09 +00:00
0xc2 , 0x80 ,
0xe0 , 0xa0 , 0x80 ,
0xf0 , 0x90 , 0x80 , 0x80
2000-01-19 19:00:53 +00:00
} ;
2000-08-15 00:07:33 +00:00
2000-01-19 19:00:53 +00:00
/* expected test results */
static const uint32_t results [ ] = {
/* number of bytes read, code point */
2000-06-22 01:18:30 +00:00
4 , 0x0031 ,
1 , 0x0032 ,
2000-01-19 19:00:53 +00:00
1 , 0x61 ,
2001-10-12 18:54:09 +00:00
2 , 0x80 ,
3 , 0x800 ,
4 , 0x10000 ,
2001-04-18 19:31:05 +00:00
2000-01-19 19:00:53 +00:00
} ;
2000-06-22 01:18:30 +00:00
const char * source = ( const char * ) in , * limit = ( const char * ) in + sizeof ( in ) ;
2000-01-19 19:00:53 +00:00
UErrorCode errorCode = U_ZERO_ERROR ;
2000-07-22 00:08:14 +00:00
UConverter * cnv ;
2000-08-15 00:07:33 +00:00
cnv = ucnv_open ( " ISO_2022 " , & errorCode ) ;
2000-01-19 19:00:53 +00:00
if ( U_FAILURE ( errorCode ) ) {
2000-06-22 01:18:30 +00:00
log_err ( " Unable to open a iso-2022 converter: %s \n " , u_errorName ( errorCode ) ) ;
2000-07-21 23:58:01 +00:00
return ;
2000-01-19 19:00:53 +00:00
}
2000-08-15 00:07:33 +00:00
TestNextUChar ( cnv , source , limit , results , " ISO_2022 " ) ;
2000-06-22 23:46:02 +00:00
2000-12-20 00:02:15 +00:00
/* Test the condition when source >= sourceLimit */
2000-06-22 23:46:02 +00:00
TestNextUCharError ( cnv , source , source - 1 , U_ILLEGAL_ARGUMENT_ERROR , " sourceLimit < source " ) ;
2000-12-20 00:02:15 +00:00
TestNextUCharError ( cnv , source , source , U_INDEX_OUTOFBOUNDS_ERROR , " sourceLimit <= source " ) ;
2000-06-22 23:46:02 +00:00
/*Test for the condition where we have a truncated char*/
{
static const uint8_t source1 [ ] = { 0xc4 } ;
TestNextUCharError ( cnv , ( const char * ) source1 , ( const char * ) source1 + sizeof ( source1 ) , U_TRUNCATED_CHAR_FOUND , " a character is truncated " ) ;
}
/*Test for the condition where there is an invalid character*/
{
static const uint8_t source2 [ ] = { 0xa1 , 0x01 } ;
TestNextUCharError ( cnv , ( const char * ) source2 , ( const char * ) source2 + sizeof ( source2 ) , U_ZERO_ERROR , " an invalid character " ) ;
}
2000-06-22 01:18:30 +00:00
ucnv_close ( cnv ) ;
}
2000-11-21 04:05:39 +00:00
static void
2001-03-05 18:39:03 +00:00
TestSmallTargetBuffer ( const uint16_t * source , const UChar * sourceLimit , UConverter * cnv ) {
2000-10-20 02:47:14 +00:00
const UChar * uSource ;
2000-11-17 03:03:14 +00:00
const UChar * uSourceLimit ;
const char * cSource ;
const char * cSourceLimit ;
UChar * uTargetLimit = NULL ;
UChar * uTarget ;
char * cTarget ;
const char * cTargetLimit ;
2001-04-18 19:31:05 +00:00
char * cBuf ;
2000-11-17 03:03:14 +00:00
UChar * uBuf , * test ;
2000-10-20 02:47:14 +00:00
int32_t uBufSize = 120 ;
int len = 0 ;
2001-03-06 03:42:35 +00:00
int i = 2 ;
2000-10-20 02:47:14 +00:00
UErrorCode errorCode = U_ZERO_ERROR ;
2000-11-17 03:03:14 +00:00
uBuf = ( UChar * ) malloc ( uBufSize * sizeof ( UChar ) * 5 ) ;
cBuf = ( char * ) malloc ( uBufSize * sizeof ( char ) * 10 ) ;
2001-03-02 23:55:49 +00:00
ucnv_reset ( cnv ) ;
2000-10-28 01:08:25 +00:00
for ( ; - - i > 0 ; ) {
uSource = ( UChar * ) source ;
2000-11-17 03:03:14 +00:00
uSourceLimit = ( const UChar * ) sourceLimit ;
cTarget = cBuf ;
2000-10-28 01:08:25 +00:00
uTarget = uBuf ;
2000-11-17 03:03:14 +00:00
cSource = cBuf ;
2000-10-28 01:08:25 +00:00
cTargetLimit = cBuf ;
uTargetLimit = uBuf ;
2000-10-20 02:47:14 +00:00
2000-10-28 01:08:25 +00:00
do {
2001-04-18 19:31:05 +00:00
2000-11-17 03:03:14 +00:00
cTargetLimit = cTargetLimit + i ;
ucnv_fromUnicode ( cnv , & cTarget , cTargetLimit , & uSource , uSourceLimit , NULL , FALSE , & errorCode ) ;
2000-10-28 01:08:25 +00:00
if ( errorCode = = U_BUFFER_OVERFLOW_ERROR ) {
errorCode = U_ZERO_ERROR ;
continue ;
}
2000-11-17 03:03:14 +00:00
if ( U_FAILURE ( errorCode ) ) {
2000-10-28 01:08:25 +00:00
log_err ( " ucnv_toUnicode conversion failed reason %s \n " , u_errorName ( errorCode ) ) ;
2000-10-20 02:47:14 +00:00
return ;
2000-10-28 01:08:25 +00:00
}
} while ( uSource < uSourceLimit ) ;
cSourceLimit = cTarget ;
do {
uTargetLimit = uTargetLimit + i ;
2000-11-08 23:15:12 +00:00
ucnv_toUnicode ( cnv , & uTarget , uTargetLimit , & cSource , cSourceLimit , NULL , FALSE , & errorCode ) ;
2000-10-28 01:08:25 +00:00
if ( errorCode = = U_BUFFER_OVERFLOW_ERROR ) {
errorCode = U_ZERO_ERROR ;
continue ;
}
if ( U_FAILURE ( errorCode ) ) {
log_err ( " ucnv_toUnicode conversion failed reason %s \n " , u_errorName ( errorCode ) ) ;
return ;
}
} while ( cSource < cSourceLimit ) ;
uSource = source ;
test = uBuf ;
for ( len = 0 ; len < ( int ) ( source - sourceLimit ) ; len + + ) {
2000-11-17 03:03:14 +00:00
if ( uBuf [ len ] ! = uSource [ len ] ) {
log_err ( " Expected : \\ u%04X \t Got: \\ u%04X \n " , uSource [ len ] , ( int ) uBuf [ len ] ) ;
}
2000-10-20 02:47:14 +00:00
}
}
free ( uBuf ) ;
2000-11-17 03:03:14 +00:00
free ( cBuf ) ;
2000-10-20 02:47:14 +00:00
}
2000-12-19 02:29:23 +00:00
/* Test for Jitterbug 778 */
2001-03-05 18:39:03 +00:00
static void TestToAndFromUChars ( const uint16_t * source , const UChar * sourceLimit , UConverter * cnv ) {
2000-12-19 02:29:23 +00:00
const UChar * uSource ;
const UChar * uSourceLimit ;
const char * cSource ;
UChar * uTargetLimit = NULL ;
UChar * uTarget ;
char * cTarget ;
const char * cTargetLimit ;
2001-04-18 19:31:05 +00:00
char * cBuf ;
2000-12-19 02:29:23 +00:00
UChar * uBuf , * test ;
int32_t uBufSize = 120 ;
int numCharsInTarget = 0 ;
UErrorCode errorCode = U_ZERO_ERROR ;
uBuf = ( UChar * ) malloc ( uBufSize * sizeof ( UChar ) * 5 ) ;
cBuf = ( char * ) malloc ( uBufSize * sizeof ( char ) * 5 ) ;
uSource = source ;
uSourceLimit = sourceLimit ;
cTarget = cBuf ;
cTargetLimit = cBuf + uBufSize * 5 ;
uTarget = uBuf ;
uTargetLimit = uBuf + uBufSize * 5 ;
ucnv_reset ( cnv ) ;
numCharsInTarget = ucnv_fromUChars ( cnv , cTarget , ( cTargetLimit - cTarget ) , uSource , ( uSourceLimit - uSource ) , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
log_err ( " ucnv_fromUnicode conversion failed reason %s \n " , u_errorName ( errorCode ) ) ;
return ;
}
cSource = cBuf ;
test = uBuf ;
ucnv_toUChars ( cnv , uTarget , ( uTargetLimit - uTarget ) , cSource , numCharsInTarget , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
2001-09-28 22:22:59 +00:00
log_err ( " ucnv_toUChars conversion failed, reason %s \n " , u_errorName ( errorCode ) ) ;
2000-12-19 02:29:23 +00:00
return ;
}
uSource = source ;
while ( uSource < uSourceLimit ) {
if ( * test ! = * uSource ) {
2001-04-18 19:31:05 +00:00
2000-12-19 02:29:23 +00:00
log_err ( " Expected : \\ u%04X \t Got: \\ u%04X \n " , * uSource , ( int ) * test ) ;
}
uSource + + ;
test + + ;
}
2000-12-20 04:43:54 +00:00
free ( uBuf ) ;
free ( cBuf ) ;
2000-12-19 02:29:23 +00:00
}
2000-11-21 04:05:39 +00:00
2001-03-05 18:39:03 +00:00
static void TestSmallSourceBuffer ( const uint16_t * source , const UChar * sourceLimit , UConverter * cnv ) {
2000-10-20 02:47:14 +00:00
const UChar * uSource ;
2000-11-17 03:03:14 +00:00
const UChar * uSourceLimit ;
const char * cSource ;
const char * cSourceLimit ;
UChar * uTargetLimit = NULL ;
UChar * uTarget ;
char * cTarget ;
const char * cTargetLimit ;
2001-04-18 19:31:05 +00:00
char * cBuf ;
2000-11-17 03:03:14 +00:00
UChar * uBuf , * test ;
2000-10-20 02:47:14 +00:00
int32_t uBufSize = 120 ;
int len = 0 ;
2001-03-06 03:42:35 +00:00
int i = 2 ;
2000-10-20 02:47:14 +00:00
const UChar * temp = sourceLimit ;
UErrorCode errorCode = U_ZERO_ERROR ;
2000-11-17 03:03:14 +00:00
uBuf = ( UChar * ) malloc ( uBufSize * sizeof ( UChar ) * 5 ) ;
cBuf = ( char * ) malloc ( uBufSize * sizeof ( char ) * 10 ) ;
2001-04-18 19:31:05 +00:00
2001-03-02 23:55:49 +00:00
ucnv_reset ( cnv ) ;
2000-10-28 01:08:25 +00:00
for ( ; - - i > 0 ; ) {
2000-11-17 03:03:14 +00:00
uSource = ( UChar * ) source ;
cTarget = cBuf ;
2000-10-28 01:08:25 +00:00
uTarget = uBuf ;
2000-11-17 03:03:14 +00:00
cSource = cBuf ;
2000-10-28 01:08:25 +00:00
cTargetLimit = cBuf ;
uTargetLimit = uBuf + uBufSize * 5 ;
cTargetLimit = cTargetLimit + uBufSize * 10 ;
uSourceLimit = uSource ;
do {
2001-04-18 19:31:05 +00:00
2000-10-28 01:08:25 +00:00
uSourceLimit = uSourceLimit + 1 ;
2000-11-17 03:03:14 +00:00
ucnv_fromUnicode ( cnv , & cTarget , cTargetLimit , & uSource , uSourceLimit , NULL , FALSE , & errorCode ) ;
2000-10-28 01:08:25 +00:00
if ( errorCode = = U_BUFFER_OVERFLOW_ERROR ) {
errorCode = U_ZERO_ERROR ;
continue ;
}
2000-11-17 03:03:14 +00:00
if ( U_FAILURE ( errorCode ) ) {
2000-10-28 01:08:25 +00:00
log_err ( " ucnv_toUnicode conversion failed reason %s \n " , u_errorName ( errorCode ) ) ;
2000-10-20 02:47:14 +00:00
return ;
2000-10-28 01:08:25 +00:00
}
} while ( uSource < temp ) ;
cSourceLimit = cBuf ;
do {
cSourceLimit = cSourceLimit + 1 ;
ucnv_toUnicode ( cnv , & uTarget , uTargetLimit , & cSource , cSourceLimit , NULL , FALSE , & errorCode ) ;
if ( errorCode = = U_BUFFER_OVERFLOW_ERROR ) {
errorCode = U_ZERO_ERROR ;
continue ;
}
if ( U_FAILURE ( errorCode ) ) {
log_err ( " ucnv_toUnicode conversion failed reason %s \n " , u_errorName ( errorCode ) ) ;
return ;
}
} while ( cSource < cTarget ) ;
uSource = source ;
test = uBuf ;
2000-12-21 03:14:32 +00:00
for ( ; len < ( int ) ( source - sourceLimit ) ; len + + ) {
2000-11-17 03:03:14 +00:00
if ( uBuf [ len ] ! = uSource [ len ] ) {
log_err ( " Expected : \\ u%04X \t Got: \\ u%04X \n " , uSource [ len ] , ( int ) uBuf [ len ] ) ;
}
2000-10-20 02:47:14 +00:00
}
}
free ( uBuf ) ;
2000-11-17 03:03:14 +00:00
free ( cBuf ) ;
}
2001-04-18 19:31:05 +00:00
static void
TestGetNextUChar2022 ( UConverter * cnv , const char * source , const char * limit ,
const uint16_t results [ ] , const char * message ) {
2000-11-17 03:03:14 +00:00
const char * s0 ;
const char * s = ( char * ) source ;
const uint16_t * r = results ;
UErrorCode errorCode = U_ZERO_ERROR ;
2001-03-02 23:55:49 +00:00
uint32_t c , exC ;
ucnv_reset ( cnv ) ;
2000-11-17 03:03:14 +00:00
while ( s < limit ) {
s0 = s ;
c = ucnv_getNextUChar ( cnv , & s , limit , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
log_err ( " %s ucnv_getNextUChar() failed: %s \n " , message , u_errorName ( errorCode ) ) ;
break ;
2001-03-02 23:55:49 +00:00
} else {
if ( UTF_IS_FIRST_SURROGATE ( * r ) ) {
int i = 0 , len = 2 ;
UTF_NEXT_CHAR_SAFE ( r , i , len , exC , FALSE ) ;
r + + ;
} else {
exC = * r ;
}
if ( c ! = ( uint32_t ) ( exC ) )
log_err ( " %s ucnv_getNextUChar() Expected: \\ u%04X Got: \\ u%04X \n " , message , ( uint32_t ) ( * r ) , c ) ;
2000-11-17 03:03:14 +00:00
}
2001-03-02 23:55:49 +00:00
r + + ;
2000-11-17 03:03:14 +00:00
}
2000-10-17 08:05:02 +00:00
}
2000-11-17 03:03:14 +00:00
2001-05-04 00:17:03 +00:00
static int TestJitterbug930 ( const char * enc ) {
2001-08-25 01:06:41 +00:00
UErrorCode err = U_ZERO_ERROR ;
UConverter * converter ;
char out [ 80 ] ;
char * target = out ;
UChar in [ 4 ] ;
const UChar * source = in ;
int32_t off [ 80 ] ;
int32_t * offsets = off ;
int numOffWritten = 0 ;
UBool flush = 0 ;
converter = ucnv_open ( enc , & err ) ; /* "",&err);*/
in [ 0 ] = 0x41 ; /* 0x4E00;*/
in [ 1 ] = 0x4E01 ;
in [ 2 ] = 0x4E02 ;
in [ 3 ] = 0x4E03 ;
memset ( off , ' * ' , sizeof ( off ) ) ;
ucnv_fromUnicode ( converter ,
& target ,
target + 2 ,
& source ,
source + 3 ,
offsets ,
flush ,
& err ) ;
/* writes three bytes into the output buffer: 41 1B 24
* but offsets contains 0 1 1
2001-04-07 01:25:14 +00:00
*/
2001-08-25 01:06:41 +00:00
while ( * offsets < off [ 10 ] ) {
numOffWritten + + ;
offsets + + ;
}
log_verbose ( " Testing Jitterbug 930 for encoding %s " , enc ) ;
if ( numOffWritten ! = ( int ) ( target - out ) ) {
log_err ( " Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i " , enc , ( int ) ( target - out ) , numOffWritten ) ;
}
err = U_ZERO_ERROR ;
memset ( off , ' * ' , sizeof ( off ) ) ;
flush = 1 ;
offsets = off ;
ucnv_fromUnicode ( converter ,
& target ,
target + 4 ,
& source ,
source ,
offsets ,
flush ,
& err ) ;
numOffWritten = 0 ;
while ( * offsets < off [ 10 ] ) {
numOffWritten + + ;
if ( * offsets ! = - 1 ) {
log_err ( " Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i " , enc , - 1 , * offsets ) ;
}
offsets + + ;
}
2001-04-18 19:31:05 +00:00
2001-08-25 01:06:41 +00:00
/* writes 42 43 7A into output buffer,
* offsets contains - 1 - 1 - 1
*/
ucnv_close ( converter ) ;
2001-05-31 23:30:09 +00:00
return 0 ;
2001-04-07 01:25:14 +00:00
}
2000-11-21 04:05:39 +00:00
static void
2000-10-17 08:05:02 +00:00
TestHZ ( ) {
/* test input */
static const uint16_t in [ ] = {
0x3000 , 0x3001 , 0x3002 , 0x30FB , 0x02C9 , 0x02C7 , 0x00A8 , 0x3003 , 0x3005 , 0x2015 ,
2001-04-18 19:31:05 +00:00
0xFF5E , 0x2016 , 0x2026 , 0x007E , 0x997C , 0x70B3 , 0x75C5 , 0x5E76 , 0x73BB , 0x83E0 ,
0x64AD , 0x62E8 , 0x94B5 , 0x000A , 0x6CE2 , 0x535A , 0x52C3 , 0x640F , 0x94C2 , 0x7B94 ,
0x4F2F , 0x5E1B , 0x8236 , 0x000A , 0x8116 , 0x818A , 0x6E24 , 0x6CCA , 0x9A73 , 0x6355 ,
0x535C , 0x54FA , 0x8865 , 0x000A , 0x57E0 , 0x4E0D , 0x5E03 , 0x6B65 , 0x7C3F , 0x90E8 ,
0x6016 , 0x248F , 0x2490 , 0x000A , 0x2491 , 0x2492 , 0x2493 , 0x2494 , 0x2495 , 0x2496 ,
0x2497 , 0x2498 , 0x2499 , 0x000A , 0x249A , 0x249B , 0x2474 , 0x2475 , 0x2476 , 0x2477 ,
0x2478 , 0x2479 , 0x247A , 0x000A , 0x247B , 0x247C , 0x247D , 0x247E , 0x247F , 0x2480 ,
0x2481 , 0x2482 , 0x2483 , 0x000A , 0x0041 , 0x0043 , 0x0044 , 0x0045 , 0x0046 , 0x007E ,
0x0048 , 0x0049 , 0x004A , 0x000A , 0x004B , 0x004C , 0x004D , 0x004E , 0x004F , 0x0050 ,
0x0051 , 0x0052 , 0x0053 , 0x000A , 0x0054 , 0x0055 , 0x0056 , 0x0057 , 0x0058 , 0x0059 ,
0x005A , 0x005B , 0x005C , 0x000A
2000-10-17 08:05:02 +00:00
} ;
2000-11-17 03:03:14 +00:00
const UChar * uSource ;
const UChar * uSourceLimit ;
const char * cSource ;
const char * cSourceLimit ;
UChar * uTargetLimit = NULL ;
UChar * uTarget ;
char * cTarget ;
const char * cTargetLimit ;
2001-04-18 19:31:05 +00:00
char * cBuf ;
2000-11-17 03:03:14 +00:00
UChar * uBuf , * test ;
2000-10-17 08:05:02 +00:00
int32_t uBufSize = 120 ;
UErrorCode errorCode = U_ZERO_ERROR ;
UConverter * cnv ;
2000-11-17 03:03:14 +00:00
int32_t * offsets = ( int32_t * ) malloc ( uBufSize * sizeof ( int32_t ) * 5 ) ;
int32_t * myOff = offsets ;
2000-10-17 08:05:02 +00:00
cnv = ucnv_open ( " HZ " , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
log_err ( " Unable to open HZ converter: %s \n " , u_errorName ( errorCode ) ) ;
return ;
}
2000-11-17 03:03:14 +00:00
uBuf = ( UChar * ) malloc ( uBufSize * sizeof ( UChar ) * 5 ) ;
cBuf = ( char * ) malloc ( uBufSize * sizeof ( char ) * 5 ) ;
uSource = ( const UChar * ) & in [ 0 ] ;
uSourceLimit = ( const UChar * ) & in [ sizeof ( in ) / 2 ] ;
cTarget = cBuf ;
cTargetLimit = cBuf + uBufSize * 5 ;
uTarget = uBuf ;
uTargetLimit = uBuf + uBufSize * 5 ;
ucnv_fromUnicode ( cnv , & cTarget , cTargetLimit , & uSource , uSourceLimit , myOff , TRUE , & errorCode ) ;
2000-10-17 08:05:02 +00:00
if ( U_FAILURE ( errorCode ) ) {
log_err ( " ucnv_fromUnicode conversion failed reason %s \n " , u_errorName ( errorCode ) ) ;
return ;
}
cSource = cBuf ;
2000-11-17 03:03:14 +00:00
cSourceLimit = cTarget ;
test = uBuf ;
myOff = offsets ;
ucnv_toUnicode ( cnv , & uTarget , uTargetLimit , & cSource , cSourceLimit , myOff , TRUE , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
2000-10-17 08:05:02 +00:00
log_err ( " ucnv_toUnicode conversion failed reason %s \n " , u_errorName ( errorCode ) ) ;
return ;
}
uSource = ( const UChar * ) & in [ 0 ] ;
2000-11-17 03:03:14 +00:00
while ( uSource < uSourceLimit ) {
if ( * test ! = * uSource ) {
2001-04-18 19:31:05 +00:00
2000-11-17 03:03:14 +00:00
log_err ( " Expected : \\ u%04X \t Got: \\ u%04X \n " , * uSource , ( int ) * test ) ;
}
2000-12-08 01:13:38 +00:00
uSource + + ;
test + + ;
2000-11-17 03:03:14 +00:00
}
TestGetNextUChar2022 ( cnv , cBuf , cTarget , in , " HZ encoding " ) ;
2001-03-05 18:39:03 +00:00
TestSmallTargetBuffer ( & in [ 0 ] , ( const UChar * ) & in [ sizeof ( in ) / 2 ] , cnv ) ;
TestSmallSourceBuffer ( & in [ 0 ] , ( const UChar * ) & in [ sizeof ( in ) / 2 ] , cnv ) ;
TestToAndFromUChars ( & in [ 0 ] , ( const UChar * ) & in [ sizeof ( in ) / 2 ] , cnv ) ;
2001-04-07 01:25:14 +00:00
TestJitterbug930 ( " csISO2022JP " ) ;
2000-11-17 03:03:14 +00:00
ucnv_close ( cnv ) ;
free ( offsets ) ;
free ( uBuf ) ;
free ( cBuf ) ;
2000-09-21 00:35:06 +00:00
}
2001-07-14 02:29:21 +00:00
static void
TestISCII ( ) {
/* test input */
static const uint16_t in [ ] = {
/* test full range of Devanagari */
0x0901 , 0x0902 , 0x0903 , 0x0905 , 0x0906 , 0x0907 , 0x0908 , 0x0909 , 0x090A ,
0x090B , 0x090E , 0x090F , 0x0910 , 0x090D , 0x0912 , 0x0913 , 0x0914 , 0x0911 ,
0x0915 , 0x0916 , 0x0917 , 0x0918 , 0x0919 , 0x091A , 0x091B , 0x091C , 0x091D ,
0x091E , 0x091F , 0x0920 , 0x0921 , 0x0922 , 0x0923 , 0x0924 , 0x0925 , 0x0926 ,
0x0927 , 0x0928 , 0x0929 , 0x092A , 0x092B , 0x092C , 0x092D , 0x092E , 0x092F ,
0x095F , 0x0930 , 0x0931 , 0x0932 , 0x0933 , 0x0934 , 0x0935 , 0x0936 , 0x0937 ,
0x0938 , 0x0939 , 0x200D , 0x093E , 0x093F , 0x0940 , 0x0941 , 0x0942 , 0x0943 ,
0x0946 , 0x0947 , 0x0948 , 0x0945 , 0x094A , 0x094B , 0x094C , 0x0949 , 0x094D ,
0x0964 , 0x093C , 0x0966 , 0x0967 , 0x0968 , 0x0969 , 0x096A , 0x096B , 0x096C ,
0x096D , 0x096E , 0x096F ,
/* test Soft halant*/
0x0915 , 0x094d , 0x200D ,
/* test explicit halant */
0x0915 , 0x094d , 0x200c ,
/* test double danda */
0x965 ,
/* test ASCII */
0x1B , 0x24 , 0x29 , 0x47 , 0x0E , 0x23 , 0x21 , 0x23 , 0x22 , 0x23 ,
0x23 , 0x23 , 0x24 , 0x23 , 0x25 , 0x23 , 0x26 , 0x23 , 0x27 , 0x23 ,
0x28 , 0x23 , 0x29 , 0x23 , 0x2A , 0x23 , 0x2B , 0x0F , 0x2F , 0x2A ,
/* tests from Lotus */
0x0061 , 0x0915 , 0x000D , 0x000A , 0x0996 , 0x0043 ,
0x0930 , 0x094D , 0x200D ,
0x0901 , 0x000D , 0x000A , 0x0905 , 0x0985 , 0x0043 ,
0x0915 , 0x0921 , 0x002B , 0x095F ,
/* tamil range */
0x0B86 , 0xB87 , 0xB88 ,
/* telugu range */
0x0C05 , 0x0C02 , 0x0C03 , 0x0c31 ,
/* kannada range */
2001-07-24 18:10:05 +00:00
0x0C85 , 0xC82 , 0x0C83 ,
/* test Abbr sign and Anudatta */
0x0970 , 0x952 ,
2001-07-14 02:29:21 +00:00
} ;
static const unsigned char byteArr [ ] = {
0xa1 , 0xa2 , 0xa3 , 0xa4 , 0xa5 , 0xa6 , 0xa7 , 0xa8 , 0xa9 ,
0xaa , 0xab , 0xac , 0xad , 0xae , 0xaf , 0xb0 , 0xb1 , 0xb2 ,
0xb3 , 0xb4 , 0xb5 , 0xb6 , 0xb7 , 0xb8 , 0xb9 , 0xba , 0xbb ,
0xbc , 0xbd , 0xbe , 0xbf , 0xc0 , 0xc1 , 0xc2 , 0xc3 , 0xc4 ,
0xc5 , 0xc6 , 0xc7 , 0xc8 , 0xc9 , 0xca , 0xcb , 0xcc , 0xcd ,
0xce , 0xcf , 0xd0 , 0xd1 , 0xd2 , 0xd3 , 0xd4 , 0xd5 , 0xd6 ,
0xd7 , 0xd8 , 0xd9 , 0xda , 0xdb , 0xdc , 0xdd , 0xde , 0xdf ,
0xe0 , 0xe1 , 0xe2 , 0xe3 , 0xe4 , 0xe5 , 0xe6 , 0xe7 , 0xe8 ,
0xea , 0xe9 , 0xf1 , 0xf2 , 0xf3 , 0xf4 , 0xf5 , 0xf6 , 0xf7 ,
0xf8 , 0xf9 , 0xfa ,
/* test soft halant */
0xb3 , 0xE8 , 0xE9 ,
/* test explicit halant */
0xb3 , 0xE8 , 0xE8 ,
/* test double danda */
0xea , 0xea ,
/* test ASCII */
0x1B , 0x24 , 0x29 , 0x47 , 0x0E , 0x23 , 0x21 , 0x23 , 0x22 , 0x23 ,
0x23 , 0x23 , 0x24 , 0x23 , 0x25 , 0x23 , 0x26 , 0x23 , 0x27 , 0x23 ,
0x28 , 0x23 , 0x29 , 0x23 , 0x2A , 0x23 , 0x2B , 0x0F , 0x2F , 0x2A ,
/* test ATR code */
/* tests from Lotus */
0x61 , 0xEF , 0x42 , 0xEF , 0x30 , 0xB3 , 0x0D , 0x0A , 0xEF , 0x43 , 0xB4 , 0x43 ,
0xEF , 0x42 , 0xCF , 0xE8 , 0xD9 ,
0xEF , 0x42 , 0xA1 , 0x0D , 0x0A , 0xEF , 0x42 , 0xA4 , 0xEF , 0x43 , 0xA4 , 0x43 ,
0xEF , 0x42 , 0xB3 , 0xBF , 0x2B , 0xEF , 0x42 , 0xCE ,
/* tamil range */
0xEF , 0x44 , 0xa5 , 0xa6 , 0xa7 ,
/* telugu range */
0xEF , 0x45 , 0xa4 , 0xa2 , 0xa3 , 0xd0 ,
/* kannada range */
2001-07-24 18:10:05 +00:00
0xEF , 0x48 , 0xa4 , 0xa2 , 0xa3 ,
/* anudatta and abbreviation sign */
0xEF , 0x42 , 0xF0 , 0xBF , 0xF0 , 0xB8
2001-07-14 02:29:21 +00:00
} ;
TestConv ( in , ( sizeof ( in ) / 2 ) , " ISCII,version=0 " , " hindi " , ( char * ) byteArr , sizeof ( byteArr ) ) ;
}
2000-11-21 04:05:39 +00:00
static void
2000-10-28 01:08:25 +00:00
TestISO_2022_JP ( ) {
/* test input */
2001-04-18 19:31:05 +00:00
static const uint16_t in [ ] = {
2001-02-23 04:40:39 +00:00
0x0041 , /*0x00E9,*/ 0x3000 , 0x3001 , 0x3002 , 0x0020 , 0x000D , 0x000A ,
2001-04-18 19:31:05 +00:00
0x0043 , 0x0044 , 0x0045 , 0x0046 , 0x0047 , 0x0048 , 0x0049 , 0x004A , 0x000D , 0x000A ,
2000-10-28 01:08:25 +00:00
0x004B , 0x004C , 0x004D , 0x004E , 0x004F , 0x0050 , 0x0051 , 0x0052 , 0x000D , 0x000A ,
2001-04-18 19:31:05 +00:00
0x3005 , 0x3006 , 0x3007 , 0x30FC , 0x2015 , 0x2010 , 0xFF0F , 0x005C , 0x000D , 0x000A ,
0x301C , 0x2016 , 0x2026 , 0x2025 , 0x2018 , 0x2019 , 0x201C , 0x000D , 0x000A ,
2001-02-23 04:40:39 +00:00
0x201D , 0x3014 , 0x000D , 0x000A ,
2000-10-28 01:08:25 +00:00
0x0053 , 0x0054 , 0x0055 , 0x0056 , 0x0057 , 0x0058 , 0x0059 , 0x005A , 0x000D , 0x000A ,
0x0053 , 0x0054 , 0x0055 , 0x0056 , 0x0057 , 0x0058 , 0x0059 , 0x005A , 0x000D , 0x000A ,
2001-02-23 04:40:39 +00:00
} ;
2000-11-17 03:03:14 +00:00
const UChar * uSource ;
const UChar * uSourceLimit ;
const char * cSource ;
const char * cSourceLimit ;
UChar * uTargetLimit = NULL ;
UChar * uTarget ;
char * cTarget ;
const char * cTargetLimit ;
2001-04-18 19:31:05 +00:00
char * cBuf ;
2000-11-17 03:03:14 +00:00
UChar * uBuf , * test ;
2000-10-28 01:08:25 +00:00
int32_t uBufSize = 120 ;
UErrorCode errorCode = U_ZERO_ERROR ;
UConverter * cnv ;
2000-11-17 03:03:14 +00:00
int32_t * offsets = ( int32_t * ) malloc ( uBufSize * sizeof ( int32_t ) * 5 ) ;
int32_t * myOff = offsets ;
2001-03-13 00:07:23 +00:00
cnv = ucnv_open ( " ISO_2022_JP_1 " , & errorCode ) ;
2000-10-28 01:08:25 +00:00
if ( U_FAILURE ( errorCode ) ) {
log_err ( " Unable to open a iso-2022 converter: %s \n " , u_errorName ( errorCode ) ) ;
return ;
}
2000-10-20 02:47:14 +00:00
2000-11-17 03:03:14 +00:00
uBuf = ( UChar * ) malloc ( uBufSize * sizeof ( UChar ) * 5 ) ;
cBuf = ( char * ) malloc ( uBufSize * sizeof ( char ) * 5 ) ;
uSource = ( const UChar * ) & in [ 0 ] ;
uSourceLimit = ( const UChar * ) & in [ sizeof ( in ) / 2 ] ;
cTarget = cBuf ;
cTargetLimit = cBuf + uBufSize * 5 ;
uTarget = uBuf ;
uTargetLimit = uBuf + uBufSize * 5 ;
ucnv_fromUnicode ( cnv , & cTarget , cTargetLimit , & uSource , uSourceLimit , myOff , TRUE , & errorCode ) ;
2000-10-28 01:08:25 +00:00
if ( U_FAILURE ( errorCode ) ) {
log_err ( " ucnv_fromUnicode conversion failed reason %s \n " , u_errorName ( errorCode ) ) ;
return ;
}
cSource = cBuf ;
2000-11-17 03:03:14 +00:00
cSourceLimit = cTarget ;
test = uBuf ;
myOff = offsets ;
ucnv_toUnicode ( cnv , & uTarget , uTargetLimit , & cSource , cSourceLimit , myOff , TRUE , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
2000-10-28 01:08:25 +00:00
log_err ( " ucnv_toUnicode conversion failed reason %s \n " , u_errorName ( errorCode ) ) ;
return ;
}
2000-11-17 03:03:14 +00:00
2000-10-28 01:08:25 +00:00
uSource = ( const UChar * ) & in [ 0 ] ;
2000-11-17 03:03:14 +00:00
while ( uSource < uSourceLimit ) {
if ( * test ! = * uSource ) {
2001-04-18 19:31:05 +00:00
2000-11-17 03:03:14 +00:00
log_err ( " Expected : \\ u%04X \t Got: \\ u%04X \n " , * uSource , ( int ) * test ) ;
}
2000-12-08 01:13:38 +00:00
uSource + + ;
test + + ;
2000-11-17 03:03:14 +00:00
}
2001-03-05 18:39:03 +00:00
TestSmallTargetBuffer ( & in [ 0 ] , ( const UChar * ) & in [ sizeof ( in ) / 2 ] , cnv ) ;
TestSmallSourceBuffer ( & in [ 0 ] , ( const UChar * ) & in [ sizeof ( in ) / 2 ] , cnv ) ;
2001-04-18 19:31:05 +00:00
TestGetNextUChar2022 ( cnv , cBuf , cTarget , in , " ISO-2022-JP encoding " ) ;
2001-03-05 18:39:03 +00:00
TestToAndFromUChars ( & in [ 0 ] , ( const UChar * ) & in [ sizeof ( in ) / 2 ] , cnv ) ;
2001-04-07 01:25:14 +00:00
TestJitterbug930 ( " csISO2022JP " ) ;
2000-11-17 03:03:14 +00:00
ucnv_close ( cnv ) ;
free ( uBuf ) ;
free ( cBuf ) ;
free ( offsets ) ;
2000-10-28 01:08:25 +00:00
}
2000-11-17 03:03:14 +00:00
2001-07-14 02:29:21 +00:00
static void TestConv ( const uint16_t in [ ] , int len , const char * conv , const char * lang , char byteArr [ ] , int byteArrLen ) {
2001-03-02 23:55:49 +00:00
const UChar * uSource ;
const UChar * uSourceLimit ;
const char * cSource ;
const char * cSourceLimit ;
UChar * uTargetLimit = NULL ;
UChar * uTarget ;
char * cTarget ;
const char * cTargetLimit ;
2001-04-18 19:31:05 +00:00
char * cBuf ;
2001-03-02 23:55:49 +00:00
UChar * uBuf , * test ;
int32_t uBufSize = 120 ;
UErrorCode errorCode = U_ZERO_ERROR ;
UConverter * cnv ;
int32_t * offsets = ( int32_t * ) malloc ( uBufSize * sizeof ( int32_t ) * 5 ) ;
int32_t * myOff = offsets ;
cnv = ucnv_open ( conv , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
2001-07-14 02:29:21 +00:00
log_err ( " Unable to open a %s converter: %s \n " , conv , u_errorName ( errorCode ) ) ;
2001-03-02 23:55:49 +00:00
return ;
}
uBuf = ( UChar * ) malloc ( uBufSize * sizeof ( UChar ) * 5 ) ;
cBuf = ( char * ) malloc ( uBufSize * sizeof ( char ) * 5 ) ;
uSource = ( const UChar * ) & in [ 0 ] ;
uSourceLimit = uSource + len ;
cTarget = cBuf ;
cTargetLimit = cBuf + uBufSize * 5 ;
uTarget = uBuf ;
uTargetLimit = uBuf + uBufSize * 5 ;
ucnv_fromUnicode ( cnv , & cTarget , cTargetLimit , & uSource , uSourceLimit , myOff , TRUE , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
log_err ( " ucnv_fromUnicode conversion failed reason %s \n " , u_errorName ( errorCode ) ) ;
return ;
}
2001-07-14 02:29:21 +00:00
log_verbose ( " length of compressed string for language %s using %s:%i \n " , conv , lang , ( cTarget - cBuf ) ) ;
2001-03-02 23:55:49 +00:00
cSource = cBuf ;
cSourceLimit = cTarget ;
test = uBuf ;
myOff = offsets ;
ucnv_toUnicode ( cnv , & uTarget , uTargetLimit , & cSource , cSourceLimit , myOff , TRUE , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
log_err ( " ucnv_toUnicode conversion failed reason %s \n " , u_errorName ( errorCode ) ) ;
return ;
}
uSource = ( const UChar * ) & in [ 0 ] ;
while ( uSource < uSourceLimit ) {
if ( * test ! = * uSource ) {
log_err ( " Expected : \\ u%04X \t Got: \\ u%04X \n " , * uSource , ( int ) * test ) ;
}
uSource + + ;
test + + ;
}
2001-03-05 18:39:03 +00:00
TestSmallTargetBuffer ( & in [ 0 ] , ( const UChar * ) & in [ len ] , cnv ) ;
TestSmallSourceBuffer ( & in [ 0 ] , ( const UChar * ) & in [ len ] , cnv ) ;
2001-03-02 23:55:49 +00:00
TestGetNextUChar2022 ( cnv , cBuf , cTarget , in , conv ) ;
2001-07-14 02:29:21 +00:00
if ( byteArr & & byteArrLen ! = 0 ) {
TestGetNextUChar2022 ( cnv , byteArr , ( byteArr + byteArrLen ) , in , lang ) ;
TestToAndFromUChars ( & in [ 0 ] , ( const UChar * ) & in [ len ] , cnv ) ;
{
cSource = byteArr ;
cSourceLimit = cSource + byteArrLen ;
test = uBuf ;
myOff = offsets ;
ucnv_toUnicode ( cnv , & uTarget , uTargetLimit , & cSource , cSourceLimit , myOff , TRUE , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
log_err ( " ucnv_toUnicode conversion failed reason %s \n " , u_errorName ( errorCode ) ) ;
return ;
}
2001-03-02 23:55:49 +00:00
2001-07-14 02:29:21 +00:00
uSource = ( const UChar * ) & in [ 0 ] ;
while ( uSource < uSourceLimit ) {
if ( * test ! = * uSource ) {
log_err ( " Expected : \\ u%04X \t Got: \\ u%04X \n " , * uSource , ( int ) * test ) ;
}
uSource + + ;
test + + ;
2001-03-02 23:55:49 +00:00
}
}
}
2001-07-14 02:29:21 +00:00
2001-03-02 23:55:49 +00:00
ucnv_close ( cnv ) ;
free ( uBuf ) ;
free ( cBuf ) ;
free ( offsets ) ;
}
static void
TestSCSU ( ) {
2001-04-18 19:31:05 +00:00
2001-03-02 23:55:49 +00:00
uint16_t germanUTF16 [ ] = {
0x00d6 , 0x006c , 0x0020 , 0x0066 , 0x006c , 0x0069 , 0x0065 , 0x00df , 0x0074
} ;
uint8_t germanSCSU [ ] = {
0xd6 , 0x6c , 0x20 , 0x66 , 0x6c , 0x69 , 0x65 , 0xdf , 0x74
} ;
uint16_t russianUTF16 [ ] = {
0x041c , 0x043e , 0x0441 , 0x043a , 0x0432 , 0x0430
} ;
uint8_t russianSCSU [ ] = {
0x12 , 0x9c , 0xbe , 0xc1 , 0xba , 0xb2 , 0xb0
} ;
uint16_t japaneseUTF16 [ ] = {
0x3000 , 0x266a , 0x30ea , 0x30f3 , 0x30b4 , 0x53ef , 0x611b ,
0x3044 , 0x3084 , 0x53ef , 0x611b , 0x3044 , 0x3084 , 0x30ea , 0x30f3 ,
0x30b4 , 0x3002 , 0x534a , 0x4e16 , 0x7d00 , 0x3082 , 0x524d , 0x306b ,
0x6d41 , 0x884c , 0x3057 , 0x305f , 0x300c , 0x30ea , 0x30f3 , 0x30b4 ,
0x306e , 0x6b4c , 0x300d , 0x304c , 0x3074 , 0x3063 , 0x305f , 0x308a ,
0x3059 , 0x308b , 0x304b , 0x3082 , 0x3057 , 0x308c , 0x306a , 0x3044 ,
0x3002 , 0x7c73 , 0x30a2 , 0x30c3 , 0x30d7 , 0x30eb , 0x30b3 , 0x30f3 ,
0x30d4 , 0x30e5 , 0x30fc , 0x30bf , 0x793e , 0x306e , 0x30d1 , 0x30bd ,
0x30b3 , 0x30f3 , 0x300c , 0x30de , 0x30c3 , 0x30af , 0xff08 , 0x30de ,
0x30c3 , 0x30ad , 0x30f3 , 0x30c8 , 0x30c3 , 0x30b7 , 0x30e5 , 0xff09 ,
0x300d , 0x3092 , 0x3001 , 0x3053 , 0x3088 , 0x306a , 0x304f , 0x611b ,
0x3059 , 0x308b , 0x4eba , 0x305f , 0x3061 , 0x306e , 0x3053 , 0x3068 ,
0x3060 , 0x3002 , 0x300c , 0x30a2 , 0x30c3 , 0x30d7 , 0x30eb , 0x4fe1 ,
0x8005 , 0x300d , 0x306a , 0x3093 , 0x3066 , 0x8a00 , 0x3044 , 0x65b9 ,
0x307e , 0x3067 , 0x3042 , 0x308b , 0x3002
} ;
2001-03-05 18:39:03 +00:00
/* SCSUEncoder produces a slightly longer result (179B vs. 178B) because of one different choice:
it uses an SQn once where a longer look - ahead could have shown that SCn is more efficient */
2001-03-02 23:55:49 +00:00
uint8_t japaneseSCSU [ ] = {
0x08 , 0x00 , 0x1b , 0x4c , 0xea , 0x16 , 0xca , 0xd3 , 0x94 , 0x0f , 0x53 , 0xef , 0x61 , 0x1b , 0xe5 , 0x84 ,
0xc4 , 0x0f , 0x53 , 0xef , 0x61 , 0x1b , 0xe5 , 0x84 , 0xc4 , 0x16 , 0xca , 0xd3 , 0x94 , 0x08 , 0x02 , 0x0f ,
0x53 , 0x4a , 0x4e , 0x16 , 0x7d , 0x00 , 0x30 , 0x82 , 0x52 , 0x4d , 0x30 , 0x6b , 0x6d , 0x41 , 0x88 , 0x4c ,
0xe5 , 0x97 , 0x9f , 0x08 , 0x0c , 0x16 , 0xca , 0xd3 , 0x94 , 0x15 , 0xae , 0x0e , 0x6b , 0x4c , 0x08 , 0x0d ,
0x8c , 0xb4 , 0xa3 , 0x9f , 0xca , 0x99 , 0xcb , 0x8b , 0xc2 , 0x97 , 0xcc , 0xaa , 0x84 , 0x08 , 0x02 , 0x0e ,
0x7c , 0x73 , 0xe2 , 0x16 , 0xa3 , 0xb7 , 0xcb , 0x93 , 0xd3 , 0xb4 , 0xc5 , 0xdc , 0x9f , 0x0e , 0x79 , 0x3e ,
0x06 , 0xae , 0xb1 , 0x9d , 0x93 , 0xd3 , 0x08 , 0x0c , 0xbe , 0xa3 , 0x8f , 0x08 , 0x88 , 0xbe , 0xa3 , 0x8d ,
0xd3 , 0xa8 , 0xa3 , 0x97 , 0xc5 , 0x17 , 0x89 , 0x08 , 0x0d , 0x15 , 0xd2 , 0x08 , 0x01 , 0x93 , 0xc8 , 0xaa ,
0x8f , 0x0e , 0x61 , 0x1b , 0x99 , 0xcb , 0x0e , 0x4e , 0xba , 0x9f , 0xa1 , 0xae , 0x93 , 0xa8 , 0xa0 , 0x08 ,
0x02 , 0x08 , 0x0c , 0xe2 , 0x16 , 0xa3 , 0xb7 , 0xcb , 0x0f , 0x4f , 0xe1 , 0x80 , 0x05 , 0xec , 0x60 , 0x8d ,
0xea , 0x06 , 0xd3 , 0xe6 , 0x0f , 0x8a , 0x00 , 0x30 , 0x44 , 0x65 , 0xb9 , 0xe4 , 0xfe , 0xe7 , 0xc2 , 0x06 ,
0xcb , 0x82
} ;
uint16_t allFeaturesUTF16 [ ] = {
0x0041 , 0x00df , 0x0401 , 0x015f , 0x00df , 0x01df , 0xf000 , 0xdbff ,
2001-04-18 19:31:05 +00:00
0xdfff , 0x000d , 0x000a , 0x0041 , 0x00df , 0x0401 , 0x015f , 0x00df ,
2001-03-02 23:55:49 +00:00
0x01df , 0xf000 , 0xdbff , 0xdfff
} ;
/* see comment at japaneseSCSU: the same kind of different choice yields a slightly shorter
* result here ( 34 B vs . 35 B )
*/
uint8_t allFeaturesSCSU [ ] = {
2001-04-18 19:31:05 +00:00
0x41 , 0xdf , 0x12 , 0x81 , 0x03 , 0x5f , 0x10 , 0xdf , 0x1b , 0x03 ,
2001-03-02 23:55:49 +00:00
0xdf , 0x1c , 0x88 , 0x80 , 0x0b , 0xbf , 0xff , 0xff , 0x0d , 0x0a ,
0x41 , 0x10 , 0xdf , 0x12 , 0x81 , 0x03 , 0x5f , 0x10 , 0xdf , 0x13 ,
2001-04-18 19:31:05 +00:00
0xdf , 0x14 , 0x80 , 0x15 , 0xff
2001-03-02 23:55:49 +00:00
} ;
2001-07-14 02:29:21 +00:00
static const uint16_t monkeyIn [ ] = {
0x00A8 , 0x3003 , 0x3005 , 0x2015 , 0xFF5E , 0x2016 , 0x2026 , 0x2018 , 0x000D , 0x000A ,
0x2019 , 0x201C , 0x201D , 0x3014 , 0x3015 , 0x3008 , 0x3009 , 0x300A , 0x000D , 0x000A ,
0x300B , 0x300C , 0x300D , 0x300E , 0x300F , 0x3016 , 0x3017 , 0x3010 , 0x000D , 0x000A ,
0x3011 , 0x00B1 , 0x00D7 , 0x00F7 , 0x2236 , 0x2227 , 0x7FC1 , 0x8956 , 0x000D , 0x000A ,
0x9D2C , 0x9D0E , 0x9EC4 , 0x5CA1 , 0x6C96 , 0x837B , 0x5104 , 0x5C4B , 0x000D , 0x000A ,
0x61B6 , 0x81C6 , 0x6876 , 0x7261 , 0x4E59 , 0x4FFA , 0x5378 , 0x57F7 , 0x000D , 0x000A ,
0x57F4 , 0x57F9 , 0x57FA , 0x57FC , 0x5800 , 0x5802 , 0x5805 , 0x5806 , 0x000D , 0x000A ,
0x580A , 0x581E , 0x6BB5 , 0x6BB7 , 0x6BBA , 0x6BBC , 0x9CE2 , 0x977C , 0x000D , 0x000A ,
0x6BBF , 0x6BC1 , 0x6BC5 , 0x6BC6 , 0x6BCB , 0x6BCD , 0x6BCF , 0x6BD2 , 0x000D , 0x000A ,
0x6BD3 , 0x6BD4 , 0x6BD6 , 0x6BD7 , 0x6BD8 , 0x6BDB , 0x6BEB , 0x6BEC , 0x000D , 0x000A ,
0x6C05 , 0x6C08 , 0x6C0F , 0x6C11 , 0x6C13 , 0x6C23 , 0x6C34 , 0x0041 , 0x000D , 0x000A ,
0x0043 , 0x0044 , 0x0045 , 0x0046 , 0x0047 , 0x0048 , 0x0049 , 0x004A , 0x000D , 0x000A ,
0x004B , 0x004C , 0x004D , 0x004E , 0x004F , 0x0050 , 0x0051 , 0x0052 , 0x000D , 0x000A ,
0x0053 , 0x0054 , 0x0055 , 0x0056 , 0x0057 , 0x0058 , 0x0059 , 0x005A , 0x000D , 0x000A ,
0x005B , 0x9792 , 0x9CCC , 0x9CCD , 0x9CCE , 0x9CCF , 0x9CD0 , 0x9CD3 , 0x000D , 0x000A ,
0x9CD4 , 0x9CD5 , 0x9CD7 , 0x9CD8 , 0x9CD9 , 0x9CDC , 0x9CDD , 0x9CDF , 0x000D , 0x000A ,
0x9785 , 0x9791 , 0x00BD , 0x0390 , 0x0385 , 0x0386 , 0x0388 , 0x0389 , 0x000D , 0x000A ,
0x038E , 0x038F , 0x0390 , 0x0391 , 0x0392 , 0x0393 , 0x0394 , 0x0395 , 0x000D , 0x000A ,
0x0396 , 0x0397 , 0x0398 , 0x0399 , 0x039A , 0x038A , 0x038C , 0x039C , 0x000D , 0x000A ,
/* test non-BMP code points */
0xD869 , 0xDE99 , 0xD869 , 0xDE9C , 0xD869 , 0xDE9D , 0xD869 , 0xDE9E , 0xD869 , 0xDE9F ,
0xD869 , 0xDEA0 , 0xD869 , 0xDEA5 , 0xD869 , 0xDEA6 , 0xD869 , 0xDEA7 , 0xD869 , 0xDEA8 ,
0xD869 , 0xDEAB , 0xD869 , 0xDEAC , 0xD869 , 0xDEAD , 0xD869 , 0xDEAE , 0xD869 , 0xDEAF ,
0xD869 , 0xDEB0 , 0xD869 , 0xDEB1 , 0xD869 , 0xDEB3 , 0xD869 , 0xDEB5 , 0xD869 , 0xDEB6 ,
0xD869 , 0xDEB7 , 0xD869 , 0xDEB8 , 0xD869 , 0xDEB9 , 0xD869 , 0xDEBA , 0xD869 , 0xDEBB ,
0xD869 , 0xDEBC , 0xD869 , 0xDEBD , 0xD869 , 0xDEBE , 0xD869 , 0xDEBF , 0xD869 , 0xDEC0 ,
0xD869 , 0xDEC1 , 0xD869 , 0xDEC2 , 0xD869 , 0xDEC3 , 0xD869 , 0xDEC4 , 0xD869 , 0xDEC8 ,
0xD869 , 0xDECA , 0xD869 , 0xDECB , 0xD869 , 0xDECD , 0xD869 , 0xDECE , 0xD869 , 0xDECF ,
0xD869 , 0xDED0 , 0xD869 , 0xDED1 , 0xD869 , 0xDED2 , 0xD869 , 0xDED3 , 0xD869 , 0xDED4 ,
0xD869 , 0xDED5 ,
0x4DB3 , 0x4DB4 , 0x4DB5 , 0x4E00 , 0x4E00 , 0x4E01 , 0x4E02 , 0x4E03 , 0x000D , 0x000A ,
0x0392 , 0x0393 , 0x0394 , 0x0395 , 0x0396 , 0x0397 , 0x33E0 , 0x33E6 , 0x000D , 0x000A ,
0x4E05 , 0x4E07 , 0x4E04 , 0x4E08 , 0x4E08 , 0x4E09 , 0x4E0A , 0x4E0B , 0x000D , 0x000A ,
0x4E0C , 0x0021 , 0x0022 , 0x0023 , 0x0024 , 0xFF40 , 0xFF41 , 0xFF42 , 0x000D , 0x000A ,
0xFF43 , 0xFF44 , 0xFF45 , 0xFF46 , 0xFF47 , 0xFF48 , 0xFF49 , 0xFF4A , 0x000D , 0x000A ,
} ;
TestConv ( allFeaturesUTF16 , ( sizeof ( allFeaturesUTF16 ) / 2 ) , " SCSU " , " all features " , ( char * ) allFeaturesSCSU , sizeof ( allFeaturesSCSU ) ) ;
2001-03-05 18:39:03 +00:00
TestConv ( allFeaturesUTF16 , ( sizeof ( allFeaturesUTF16 ) / 2 ) , " SCSU " , " all features " , ( char * ) allFeaturesSCSU , sizeof ( allFeaturesSCSU ) ) ;
TestConv ( japaneseUTF16 , ( sizeof ( japaneseUTF16 ) / 2 ) , " SCSU " , " japaneese " , ( char * ) japaneseSCSU , sizeof ( japaneseSCSU ) ) ;
TestConv ( japaneseUTF16 , ( sizeof ( japaneseUTF16 ) / 2 ) , " SCSU,locale=ja " , " japaneese " , ( char * ) japaneseSCSU , sizeof ( japaneseSCSU ) ) ;
TestConv ( germanUTF16 , ( sizeof ( germanUTF16 ) / 2 ) , " SCSU " , " german " , ( char * ) germanSCSU , sizeof ( germanSCSU ) ) ;
TestConv ( russianUTF16 , ( sizeof ( russianUTF16 ) / 2 ) , " SCSU " , " russian " , ( char * ) russianSCSU , sizeof ( russianSCSU ) ) ;
2001-07-14 02:29:21 +00:00
TestConv ( monkeyIn , ( sizeof ( monkeyIn ) / 2 ) , " SCSU " , " monkey " , NULL , 0 ) ;
2001-03-02 23:55:49 +00:00
}
2000-11-21 04:05:39 +00:00
static void
2000-10-28 01:08:25 +00:00
TestISO_2022_JP_1 ( ) {
/* test input */
static const uint16_t in [ ] = {
0x3000 , 0x3001 , 0x3002 , 0x0020 , 0xFF0E , 0x30FB , 0xFF1A , 0xFF1B , 0x000D , 0x000A ,
2001-04-18 19:31:05 +00:00
0x0043 , 0x0044 , 0x0045 , 0x0046 , 0x0047 , 0x0048 , 0x0049 , 0x004A , 0x000D , 0x000A ,
2000-10-28 01:08:25 +00:00
0x52C8 , 0x52CC , 0x52CF , 0x52D1 , 0x52D4 , 0x52D6 , 0x52DB , 0x52DC , 0x000D , 0x000A ,
0x004B , 0x004C , 0x004D , 0x004E , 0x004F , 0x0050 , 0x0051 , 0x0052 , 0x000D , 0x000A ,
2001-04-18 19:31:05 +00:00
0x3005 , 0x3006 , 0x3007 , 0x30FC , 0x2015 , 0x2010 , 0xFF0F , 0x005C , 0x000D , 0x000A ,
0x301C , 0x2016 , 0x2026 , 0x2025 , 0x2018 , 0x2019 , 0x201C , 0x000D , 0x000A ,
2001-02-23 04:40:39 +00:00
0x201D , 0x000D , 0x000A ,
2000-10-28 01:08:25 +00:00
0x0053 , 0x0054 , 0x0055 , 0x0056 , 0x0057 , 0x0058 , 0x0059 , 0x005A , 0x000D , 0x000A ,
2001-04-18 19:31:05 +00:00
0x4F94 , 0x4F97 , 0x52BA , 0x52BB , 0x52BD , 0x52C0 , 0x52C4 , 0x52C6 , 0x000D , 0x000A ,
0x0053 , 0x0054 , 0x0055 , 0x0056 , 0x0057 , 0x0058 , 0x0059 , 0x005A , 0x000D , 0x000A ,
2000-10-28 01:08:25 +00:00
0x4F78 , 0x4F79 , 0x4F7A , 0x4F7D , 0x4F7E , 0x4F81 , 0x4F82 , 0x4F84 , 0x000D , 0x000A ,
0x4F85 , 0x4F89 , 0x4F8A , 0x4F8C , 0x4F8E , 0x4F90 , 0x4F92 , 0x4F93 , 0x000D , 0x000A ,
0x52E1 , 0x52E5 , 0x52E8 , 0x52E9 , 0x000D , 0x000A
} ;
2000-11-17 03:03:14 +00:00
const UChar * uSource ;
const UChar * uSourceLimit ;
const char * cSource ;
const char * cSourceLimit ;
UChar * uTargetLimit = NULL ;
UChar * uTarget ;
char * cTarget ;
const char * cTargetLimit ;
2001-04-18 19:31:05 +00:00
char * cBuf ;
2000-11-17 03:03:14 +00:00
UChar * uBuf , * test ;
2000-10-28 01:08:25 +00:00
int32_t uBufSize = 120 ;
UErrorCode errorCode = U_ZERO_ERROR ;
UConverter * cnv ;
2001-03-13 00:07:23 +00:00
cnv = ucnv_open ( " ISO_2022_JP_1 " , & errorCode ) ;
2000-10-28 01:08:25 +00:00
if ( U_FAILURE ( errorCode ) ) {
log_err ( " Unable to open a iso-2022 converter: %s \n " , u_errorName ( errorCode ) ) ;
return ;
}
2000-11-17 03:03:14 +00:00
uBuf = ( UChar * ) malloc ( uBufSize * sizeof ( UChar ) * 5 ) ;
cBuf = ( char * ) malloc ( uBufSize * sizeof ( char ) * 5 ) ;
uSource = ( const UChar * ) & in [ 0 ] ;
uSourceLimit = ( const UChar * ) & in [ sizeof ( in ) / 2 ] ;
cTarget = cBuf ;
cTargetLimit = cBuf + uBufSize * 5 ;
uTarget = uBuf ;
uTargetLimit = uBuf + uBufSize * 5 ;
ucnv_fromUnicode ( cnv , & cTarget , cTargetLimit , & uSource , uSourceLimit , NULL , TRUE , & errorCode ) ;
2000-10-28 01:08:25 +00:00
if ( U_FAILURE ( errorCode ) ) {
log_err ( " ucnv_fromUnicode conversion failed reason %s \n " , u_errorName ( errorCode ) ) ;
return ;
}
cSource = cBuf ;
2000-11-17 03:03:14 +00:00
cSourceLimit = cTarget ;
test = uBuf ;
ucnv_toUnicode ( cnv , & uTarget , uTargetLimit , & cSource , cSourceLimit , NULL , TRUE , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
2000-10-28 01:08:25 +00:00
log_err ( " ucnv_toUnicode conversion failed reason %s \n " , u_errorName ( errorCode ) ) ;
return ;
}
uSource = ( const UChar * ) & in [ 0 ] ;
2000-11-17 03:03:14 +00:00
while ( uSource < uSourceLimit ) {
if ( * test ! = * uSource ) {
2001-04-18 19:31:05 +00:00
2000-11-17 03:03:14 +00:00
log_err ( " Expected : \\ u%04X \t Got: \\ u%04X \n " , * uSource , ( int ) * test ) ;
}
2000-12-08 01:13:38 +00:00
uSource + + ;
test + + ;
2000-11-17 03:03:14 +00:00
}
2000-10-28 01:08:25 +00:00
/*ucnv_close(cnv);
cnv = ucnv_open ( " ISO_2022,locale=jp,version=1 " , & errorCode ) ; */
2001-03-05 18:39:03 +00:00
TestSmallTargetBuffer ( & in [ 0 ] , ( const UChar * ) & in [ sizeof ( in ) / 2 ] , cnv ) ;
TestSmallSourceBuffer ( & in [ 0 ] , ( const UChar * ) & in [ sizeof ( in ) / 2 ] , cnv ) ;
2000-11-17 03:03:14 +00:00
ucnv_close ( cnv ) ;
free ( uBuf ) ;
free ( cBuf ) ;
2000-10-28 01:08:25 +00:00
}
2000-11-21 04:05:39 +00:00
static void
2000-10-28 01:08:25 +00:00
TestISO_2022_JP_2 ( ) {
2000-08-15 00:07:33 +00:00
/* test input */
static const uint16_t in [ ] = {
2001-04-18 19:31:05 +00:00
0x00A8 , 0x3003 , 0x3005 , 0x2015 , 0xFF5E , 0x2016 , 0x2026 , 0x2018 , 0x000D , 0x000A ,
0x2019 , 0x201C , 0x201D , 0x3014 , 0x3015 , 0x3008 , 0x3009 , 0x300A , 0x000D , 0x000A ,
0x300B , 0x300C , 0x300D , 0x300E , 0x300F , 0x3016 , 0x3017 , 0x3010 , 0x000D , 0x000A ,
0x3011 , 0x00B1 , 0x00D7 , 0x00F7 , 0x2236 , 0x2227 , 0x7FC1 , 0x8956 , 0x000D , 0x000A ,
0x9D2C , 0x9D0E , 0x9EC4 , 0x5CA1 , 0x6C96 , 0x837B , 0x5104 , 0x5C4B , 0x000D , 0x000A ,
0x61B6 , 0x81C6 , 0x6876 , 0x7261 , 0x4E59 , 0x4FFA , 0x5378 , 0x57F7 , 0x000D , 0x000A ,
0x57F4 , 0x57F9 , 0x57FA , 0x57FC , 0x5800 , 0x5802 , 0x5805 , 0x5806 , 0x000D , 0x000A ,
0x580A , 0x581E , 0x6BB5 , 0x6BB7 , 0x6BBA , 0x6BBC , 0x9CE2 , 0x977C , 0x000D , 0x000A ,
0x6BBF , 0x6BC1 , 0x6BC5 , 0x6BC6 , 0x6BCB , 0x6BCD , 0x6BCF , 0x6BD2 , 0x000D , 0x000A ,
0x6BD3 , 0x6BD4 , 0x6BD6 , 0x6BD7 , 0x6BD8 , 0x6BDB , 0x6BEB , 0x6BEC , 0x000D , 0x000A ,
0x6C05 , 0x6C08 , 0x6C0F , 0x6C11 , 0x6C13 , 0x6C23 , 0x6C34 , 0x0041 , 0x000D , 0x000A ,
0x0043 , 0x0044 , 0x0045 , 0x0046 , 0x0047 , 0x0048 , 0x0049 , 0x004A , 0x000D , 0x000A ,
0x004B , 0x004C , 0x004D , 0x004E , 0x004F , 0x0050 , 0x0051 , 0x0052 , 0x000D , 0x000A ,
0x0053 , 0x0054 , 0x0055 , 0x0056 , 0x0057 , 0x0058 , 0x0059 , 0x005A , 0x000D , 0x000A ,
0x005B , 0x9792 , 0x9CCC , 0x9CCD , 0x9CCE , 0x9CCF , 0x9CD0 , 0x9CD3 , 0x000D , 0x000A ,
0x9CD4 , 0x9CD5 , 0x9CD7 , 0x9CD8 , 0x9CD9 , 0x9CDC , 0x9CDD , 0x9CDF , 0x000D , 0x000A ,
0x9785 , 0x9791 , 0x00BD , 0x0390 , 0x0385 , 0x0386 , 0x0388 , 0x0389 , 0x000D , 0x000A ,
0x038E , 0x038F , 0x0390 , 0x0391 , 0x0392 , 0x0393 , 0x0394 , 0x0395 , 0x000D , 0x000A ,
0x0396 , 0x0397 , 0x0398 , 0x0399 , 0x039A , 0x038A , 0x038C , 0x039C , 0x000D , 0x000A
2000-08-15 00:07:33 +00:00
} ;
2000-11-17 03:03:14 +00:00
const UChar * uSource ;
const UChar * uSourceLimit ;
const char * cSource ;
const char * cSourceLimit ;
UChar * uTargetLimit = NULL ;
UChar * uTarget ;
char * cTarget ;
const char * cTargetLimit ;
2001-04-18 19:31:05 +00:00
char * cBuf ;
2000-11-17 03:03:14 +00:00
UChar * uBuf , * test ;
2000-08-15 00:07:33 +00:00
int32_t uBufSize = 120 ;
UErrorCode errorCode = U_ZERO_ERROR ;
UConverter * cnv ;
2000-11-17 03:03:14 +00:00
int32_t * offsets = ( int32_t * ) malloc ( uBufSize * sizeof ( int32_t ) * 5 ) ;
int32_t * myOff = offsets ;
2001-03-13 00:07:23 +00:00
cnv = ucnv_open ( " ISO_2022_JP_2 " , & errorCode ) ;
2000-08-15 00:07:33 +00:00
if ( U_FAILURE ( errorCode ) ) {
log_err ( " Unable to open a iso-2022 converter: %s \n " , u_errorName ( errorCode ) ) ;
return ;
}
2000-11-17 03:03:14 +00:00
uBuf = ( UChar * ) malloc ( uBufSize * sizeof ( UChar ) * 5 ) ;
cBuf = ( char * ) malloc ( uBufSize * sizeof ( char ) * 5 ) ;
uSource = ( const UChar * ) & in [ 0 ] ;
uSourceLimit = ( const UChar * ) & in [ sizeof ( in ) / 2 ] ;
cTarget = cBuf ;
cTargetLimit = cBuf + uBufSize * 5 ;
uTarget = uBuf ;
uTargetLimit = uBuf + uBufSize * 5 ;
ucnv_fromUnicode ( cnv , & cTarget , cTargetLimit , & uSource , uSourceLimit , myOff , TRUE , & errorCode ) ;
2000-08-15 21:13:20 +00:00
if ( U_FAILURE ( errorCode ) ) {
log_err ( " ucnv_fromUnicode conversion failed reason %s \n " , u_errorName ( errorCode ) ) ;
return ;
}
cSource = cBuf ;
2000-11-17 03:03:14 +00:00
cSourceLimit = cTarget ;
2001-04-18 19:31:05 +00:00
test = uBuf ;
2000-11-17 03:03:14 +00:00
myOff = offsets ;
ucnv_toUnicode ( cnv , & uTarget , uTargetLimit , & cSource , cSourceLimit , myOff , TRUE , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
2000-08-22 00:04:27 +00:00
log_err ( " ucnv_toUnicode conversion failed reason %s \n " , u_errorName ( errorCode ) ) ;
2000-08-15 21:13:20 +00:00
return ;
}
2000-08-16 22:30:12 +00:00
uSource = ( const UChar * ) & in [ 0 ] ;
2000-11-17 03:03:14 +00:00
while ( uSource < uSourceLimit ) {
if ( * test ! = * uSource ) {
2001-04-18 19:31:05 +00:00
2000-11-17 03:03:14 +00:00
log_err ( " Expected : \\ u%04X \t Got: \\ u%04X \n " , * uSource , ( int ) * test ) ;
}
2000-12-08 01:13:38 +00:00
uSource + + ;
test + + ;
2000-11-17 03:03:14 +00:00
}
2001-03-05 18:39:03 +00:00
TestSmallTargetBuffer ( & in [ 0 ] , ( const UChar * ) & in [ sizeof ( in ) / 2 ] , cnv ) ;
TestSmallSourceBuffer ( & in [ 0 ] , ( const UChar * ) & in [ sizeof ( in ) / 2 ] , cnv ) ;
2001-04-18 19:31:05 +00:00
TestToAndFromUChars ( & in [ 0 ] , ( const UChar * ) & in [ sizeof ( in ) / 2 ] , cnv ) ;
2000-11-17 03:03:14 +00:00
ucnv_close ( cnv ) ;
free ( uBuf ) ;
free ( cBuf ) ;
free ( offsets ) ;
2000-08-22 00:04:27 +00:00
}
2000-11-21 04:05:39 +00:00
static void
2000-08-22 00:04:27 +00:00
TestISO_2022_KR ( ) {
/* test input */
static const uint16_t in [ ] = {
0x9F4B , 0x9F4E , 0x9F52 , 0x9F5F , 0x9F61 , 0x9F66 , 0x9F67 , 0x9F6A , 0x000A , 0x000D
, 0x9F6C , 0x9F77 , 0x9F8D , 0x9F90 , 0x9F95 , 0x9F9C , 0xAC00 , 0xAC01 , 0xAC02 , 0xAC04
, 0xAC07 , 0xAC08 , 0xAC09 , 0x0025 , 0x0026 , 0x0027 , 0x000A , 0x000D , 0x0028 , 0x0029
, 0x002A , 0x002B , 0x002C , 0x002D , 0x002E , 0x53C3 , 0x53C8 , 0x53C9 , 0x53CA , 0x53CB
, 0x53CD , 0x53D4 , 0x53D6 , 0x53D7 , 0x53DB , 0x000A , 0x000D , 0x53DF , 0x53E1 , 0x53E2
, 0x53E3 , 0x53E4 , 0x000A , 0x000D } ;
2000-11-17 03:03:14 +00:00
const UChar * uSource ;
const UChar * uSourceLimit ;
const char * cSource ;
const char * cSourceLimit ;
UChar * uTargetLimit = NULL ;
UChar * uTarget ;
char * cTarget ;
const char * cTargetLimit ;
2001-04-18 19:31:05 +00:00
char * cBuf ;
2000-11-17 03:03:14 +00:00
UChar * uBuf , * test ;
2000-08-22 00:04:27 +00:00
int32_t uBufSize = 120 ;
UErrorCode errorCode = U_ZERO_ERROR ;
UConverter * cnv ;
2000-11-17 03:03:14 +00:00
int32_t * offsets = ( int32_t * ) malloc ( uBufSize * sizeof ( int32_t ) * 5 ) ;
int32_t * myOff = offsets ;
2000-08-22 00:04:27 +00:00
cnv = ucnv_open ( " ISO_2022,locale=kr " , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
log_err ( " Unable to open a iso-2022 converter: %s \n " , u_errorName ( errorCode ) ) ;
return ;
}
2000-11-17 03:03:14 +00:00
uBuf = ( UChar * ) malloc ( uBufSize * sizeof ( UChar ) * 5 ) ;
cBuf = ( char * ) malloc ( uBufSize * sizeof ( char ) * 5 ) ;
uSource = ( const UChar * ) & in [ 0 ] ;
uSourceLimit = ( const UChar * ) & in [ sizeof ( in ) / 2 ] ;
cTarget = cBuf ;
cTargetLimit = cBuf + uBufSize * 5 ;
uTarget = uBuf ;
uTargetLimit = uBuf + uBufSize * 5 ;
ucnv_fromUnicode ( cnv , & cTarget , cTargetLimit , & uSource , uSourceLimit , myOff , TRUE , & errorCode ) ;
2000-08-22 00:04:27 +00:00
if ( U_FAILURE ( errorCode ) ) {
log_err ( " ucnv_fromUnicode conversion failed reason %s \n " , u_errorName ( errorCode ) ) ;
return ;
}
cSource = cBuf ;
2000-11-17 03:03:14 +00:00
cSourceLimit = cTarget ;
test = uBuf ;
myOff = offsets ;
ucnv_toUnicode ( cnv , & uTarget , uTargetLimit , & cSource , cSourceLimit , myOff , TRUE , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
2000-08-22 00:04:27 +00:00
log_err ( " ucnv_toUnicode conversion failed reason %s \n " , u_errorName ( errorCode ) ) ;
return ;
}
uSource = ( const UChar * ) & in [ 0 ] ;
2000-11-17 03:03:14 +00:00
while ( uSource < uSourceLimit ) {
if ( * test ! = * uSource ) {
log_err ( " Expected : \\ u%04X \t Got: \\ u%04X \n " , * uSource , * test ) ;
}
2000-12-08 01:13:38 +00:00
uSource + + ;
test + + ;
2000-11-17 03:03:14 +00:00
}
TestGetNextUChar2022 ( cnv , cBuf , cTarget , in , " ISO-2022-KR encoding " ) ;
2001-03-05 18:39:03 +00:00
TestSmallTargetBuffer ( & in [ 0 ] , ( const UChar * ) & in [ sizeof ( in ) / 2 ] , cnv ) ;
TestSmallSourceBuffer ( & in [ 0 ] , ( const UChar * ) & in [ sizeof ( in ) / 2 ] , cnv ) ;
TestToAndFromUChars ( & in [ 0 ] , ( const UChar * ) & in [ sizeof ( in ) / 2 ] , cnv ) ;
2001-04-07 01:25:14 +00:00
TestJitterbug930 ( " csISO2022KR " ) ;
2000-11-17 03:03:14 +00:00
ucnv_close ( cnv ) ;
free ( uBuf ) ;
free ( cBuf ) ;
free ( offsets ) ;
2000-09-19 02:38:40 +00:00
}
2001-04-07 01:25:14 +00:00
2001-02-24 02:52:14 +00:00
static void
TestISO_2022_KR_1 ( ) {
/* test input */
static const uint16_t in [ ] = {
0x9F4B , 0x9F4E , 0x9F52 , 0x9F5F , 0x9F61 , 0x9F67 , 0x9F6A , 0x000A , 0x000D
, 0x9F6C , 0x9F77 , 0x9F8D , 0x9F90 , 0x9F95 , 0x9F9C , 0xAC00 , 0xAC01 , 0xAC04
, 0xAC07 , 0xAC08 , 0xAC09 , 0x0025 , 0x0026 , 0x0027 , 0x000A , 0x000D , 0x0028 , 0x0029
, 0x002A , 0x002B , 0x002C , 0x002D , 0x002E , 0x53C3 , 0x53C8 , 0x53C9 , 0x53CA , 0x53CB
, 0x53CD , 0x53D4 , 0x53D6 , 0x53D7 , 0x53DB , 0x000A , 0x000D , 0x53E1 , 0x53E2
, 0x53E3 , 0x53E4 , 0x000A , 0x000D } ;
const UChar * uSource ;
const UChar * uSourceLimit ;
const char * cSource ;
const char * cSourceLimit ;
UChar * uTargetLimit = NULL ;
UChar * uTarget ;
char * cTarget ;
const char * cTargetLimit ;
2001-04-18 19:31:05 +00:00
char * cBuf ;
2001-02-24 02:52:14 +00:00
UChar * uBuf , * test ;
int32_t uBufSize = 120 ;
UErrorCode errorCode = U_ZERO_ERROR ;
UConverter * cnv ;
int32_t * offsets = ( int32_t * ) malloc ( uBufSize * sizeof ( int32_t ) * 5 ) ;
int32_t * myOff = offsets ;
cnv = ucnv_open ( " ibm-25546 " , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
log_err ( " Unable to open a iso-2022 converter: %s \n " , u_errorName ( errorCode ) ) ;
return ;
}
uBuf = ( UChar * ) malloc ( uBufSize * sizeof ( UChar ) * 5 ) ;
cBuf = ( char * ) malloc ( uBufSize * sizeof ( char ) * 5 ) ;
uSource = ( const UChar * ) & in [ 0 ] ;
uSourceLimit = ( const UChar * ) & in [ sizeof ( in ) / 2 ] ;
cTarget = cBuf ;
cTargetLimit = cBuf + uBufSize * 5 ;
uTarget = uBuf ;
uTargetLimit = uBuf + uBufSize * 5 ;
ucnv_fromUnicode ( cnv , & cTarget , cTargetLimit , & uSource , uSourceLimit , myOff , TRUE , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
log_err ( " ucnv_fromUnicode conversion failed reason %s \n " , u_errorName ( errorCode ) ) ;
return ;
}
cSource = cBuf ;
cSourceLimit = cTarget ;
test = uBuf ;
myOff = offsets ;
ucnv_toUnicode ( cnv , & uTarget , uTargetLimit , & cSource , cSourceLimit , myOff , TRUE , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
log_err ( " ucnv_toUnicode conversion failed reason %s \n " , u_errorName ( errorCode ) ) ;
return ;
}
uSource = ( const UChar * ) & in [ 0 ] ;
while ( uSource < uSourceLimit ) {
if ( * test ! = * uSource ) {
log_err ( " Expected : \\ u%04X \t Got: \\ u%04X \n " , * uSource , * test ) ;
}
uSource + + ;
test + + ;
}
ucnv_reset ( cnv ) ;
TestGetNextUChar2022 ( cnv , cBuf , cTarget , in , " ISO-2022-KR encoding " ) ;
2001-03-05 18:39:03 +00:00
TestSmallTargetBuffer ( & in [ 0 ] , ( const UChar * ) & in [ sizeof ( in ) / 2 ] , cnv ) ;
TestSmallSourceBuffer ( & in [ 0 ] , ( const UChar * ) & in [ sizeof ( in ) / 2 ] , cnv ) ;
2001-02-24 02:52:14 +00:00
ucnv_reset ( cnv ) ;
2001-03-05 18:39:03 +00:00
TestToAndFromUChars ( & in [ 0 ] , ( const UChar * ) & in [ sizeof ( in ) / 2 ] , cnv ) ;
2001-02-24 02:52:14 +00:00
ucnv_close ( cnv ) ;
free ( uBuf ) ;
free ( cBuf ) ;
free ( offsets ) ;
}
2000-11-21 04:05:39 +00:00
2001-02-23 04:40:39 +00:00
static void
TestJIS ( ) {
/* From Unicode */
{
/* JIS Encoding */
UChar sampleTextJIS [ ] = {
0xFF81 , 0xFF82 ,
0x30EC , 0x30ED ,
0x30EE , 0x30EF ,
0xFF93 , 0xFF94 ,
0xFF95 , 0xFF96 ,
0xFF97 , 0xFF98
} ;
2001-04-18 19:31:05 +00:00
const uint8_t expectedISO2022JIS [ ] = {
0x1b , 0x24 , 0x42 ,
2001-02-23 04:40:39 +00:00
0x25 , 0x41 , 0x25 , 0x44 ,
0x25 , 0x6c , 0x25 , 0x6d ,
0x25 , 0x6e , 0x25 , 0x6F ,
0x25 , 0x62 , 0x25 , 0x64 ,
0x25 , 0x66 , 0x25 , 0x68 ,
0x25 , 0x69 , 0x25 , 0x6a
} ;
2001-04-18 19:31:05 +00:00
int32_t fmISO2022JISOffs [ ] = {
2001-02-23 04:40:39 +00:00
0 , 0 , 0 ,
0 , 0 , 1 , 1 ,
2 , 2 , 3 , 3 ,
4 , 4 , 5 , 5 ,
6 , 6 , 7 , 7 ,
8 , 8 , 9 , 9 ,
10 , 10 , 11 , 11
2001-04-18 19:31:05 +00:00
2001-02-23 04:40:39 +00:00
} ;
/* JIS7 Encoding */
2001-04-18 19:31:05 +00:00
const uint8_t expectedISO2022JIS7 [ ] = {
0x1b , 0x28 , 0x49 ,
2001-02-23 04:40:39 +00:00
0x41 , 0x42 ,
0x1b , 0x24 , 0x42 ,
0x25 , 0x6c , 0x25 , 0x6d ,
0x25 , 0x6e , 0x25 , 0x6F ,
0x1b , 0x28 , 0x49 ,
0x53 , 0x54 ,
0x55 , 0x56 ,
0x57 , 0x58
} ;
2001-04-18 19:31:05 +00:00
int32_t fmISO2022JIS7Offs [ ] = {
2001-02-23 04:40:39 +00:00
0 , 0 , 0 ,
0 , 1 ,
2 , 2 , 2 ,
2 , 2 , 3 , 3 ,
4 , 4 , 5 , 5 ,
6 , 6 , 6 ,
6 , 7 ,
8 , 9 ,
10 , 11
2001-04-18 19:31:05 +00:00
2001-02-23 04:40:39 +00:00
} ;
2001-04-18 19:31:05 +00:00
2001-02-23 04:40:39 +00:00
/* JIS8 Encoding */
2001-04-18 19:31:05 +00:00
const uint8_t expectedISO2022JIS8 [ ] = {
0x1b , 0x28 , 0x4A ,
2001-02-23 04:40:39 +00:00
0xC1 , 0xC2 ,
0x1b , 0x24 , 0x42 ,
0x25 , 0x6c , 0x25 , 0x6d ,
0x25 , 0x6e , 0x25 , 0x6F ,
0x1b , 0x28 , 0x4A ,
0xD3 , 0xD4 ,
0xD5 , 0xD6 ,
0xD7 , 0xD8
} ;
2001-04-18 19:31:05 +00:00
int32_t fmISO2022JIS8Offs [ ] = {
2001-02-23 04:40:39 +00:00
0 , 0 , 0 ,
0 , 1 ,
2 , 2 , 2 ,
2 , 2 , 3 , 3 ,
4 , 4 , 5 , 5 ,
6 , 6 , 6 ,
6 , 7 ,
8 , 9 ,
10 , 11
2001-04-18 19:31:05 +00:00
} ;
2001-02-23 04:40:39 +00:00
if ( ! testConvertFromU ( sampleTextJIS , sizeof ( sampleTextJIS ) / sizeof ( sampleTextJIS [ 0 ] ) ,
expectedISO2022JIS , sizeof ( expectedISO2022JIS ) , " JIS " , fmISO2022JISOffs , TRUE ) )
log_err ( " u->JIS did not match. \n " ) ;
if ( ! testConvertFromU ( sampleTextJIS , sizeof ( sampleTextJIS ) / sizeof ( sampleTextJIS [ 0 ] ) ,
expectedISO2022JIS7 , sizeof ( expectedISO2022JIS7 ) , " JIS7 " , fmISO2022JIS7Offs , FALSE ) )
log_err ( " u-> JIS7 did not match. \n " ) ;
if ( ! testConvertFromU ( sampleTextJIS , sizeof ( sampleTextJIS ) / sizeof ( sampleTextJIS [ 0 ] ) ,
expectedISO2022JIS8 , sizeof ( expectedISO2022JIS8 ) , " JIS8 " , fmISO2022JIS8Offs , FALSE ) )
log_err ( " u-> JIS8 did not match. \n " ) ;
}
/*To Unicode*/
{
const uint8_t sampleTextJIS [ ] = {
0x1b , 0x28 , 0x48 , 0x41 , 0x42 , /*jis-Roman*/
0x1b , 0x28 , 0x49 , 0x41 , 0x42 , /*Katakana Set*/
0x1b , 0x26 , 0x40 , 0x1b , 0x24 , 0x42 , 0x21 , 0x21 /*recognize and ignore <esc>&@*/
} ;
const uint16_t expectedISO2022JIS [ ] = {
0x0041 , 0x0042 ,
0xFF81 , 0xFF82 ,
0x3000
} ;
2001-04-18 19:31:05 +00:00
int32_t toISO2022JISOffs [ ] = {
2001-02-23 04:40:39 +00:00
3 , 4 ,
8 , 9 ,
16
} ;
2001-04-18 19:31:05 +00:00
2001-02-23 04:40:39 +00:00
const uint8_t sampleTextJIS7 [ ] = {
0x1b , 0x28 , 0x48 , 0x41 , 0x42 , /*JIS7-Roman*/
0x1b , 0x28 , 0x49 , 0x41 , 0x42 , /*Katakana Set*/
0x1b , 0x24 , 0x42 , 0x21 , 0x21 ,
0x0e , 0x41 , 0x42 , 0x0f , /*Test Katakana set with SI and SO */
0x21 , 0x22 ,
0x1b , 0x26 , 0x40 , 0x1b , 0x24 , 0x42 , 0x21 , 0x21 /*recognize and ignore <esc>&@*/
} ;
const uint16_t expectedISO2022JIS7 [ ] = {
0x0041 , 0x0042 ,
0xFF81 , 0xFF82 ,
0x3000 ,
0xFF81 , 0xFF82 ,
0x3001 ,
0x3000
} ;
2001-04-18 19:31:05 +00:00
int32_t toISO2022JIS7Offs [ ] = {
2001-02-23 04:40:39 +00:00
3 , 4 ,
8 , 9 ,
13 , 16 ,
17 ,
19 , 27
} ;
const uint8_t sampleTextJIS8 [ ] = {
0x1b , 0x28 , 0x48 , 0x41 , 0x42 , /*JIS8-Roman*/
0xa1 , 0xc8 , 0xd9 , /*Katakana Set*/
0x1b , 0x28 , 0x42 ,
0x41 , 0x42 ,
0xb1 , 0xc3 , /*Katakana Set*/
2001-04-18 19:31:05 +00:00
0x1b , 0x24 , 0x42 , 0x21 , 0x21
2001-02-23 04:40:39 +00:00
} ;
const uint16_t expectedISO2022JIS8 [ ] = {
0x0041 , 0x0042 ,
2001-04-18 19:31:05 +00:00
0xff61 , 0xff88 , 0xff99 ,
2001-02-23 04:40:39 +00:00
0x0041 , 0x0042 ,
0xff71 , 0xff83 ,
0x3000
} ;
2001-04-18 19:31:05 +00:00
int32_t toISO2022JIS8Offs [ ] = {
3 , 4 , 5 , 6 ,
7 , 11 , 12 , 13 ,
2001-02-23 04:40:39 +00:00
14 , 18 ,
2001-04-18 19:31:05 +00:00
} ;
2001-02-23 04:40:39 +00:00
2001-04-18 19:31:05 +00:00
if ( ! testConvertToU ( sampleTextJIS , sizeof ( sampleTextJIS ) , expectedISO2022JIS ,
2001-02-23 04:40:39 +00:00
sizeof ( expectedISO2022JIS ) / sizeof ( expectedISO2022JIS [ 0 ] ) , " JIS " , toISO2022JISOffs , TRUE ) )
log_err ( " JIS -> u did not match. \n " ) ;
2001-04-18 19:31:05 +00:00
if ( ! testConvertToU ( sampleTextJIS7 , sizeof ( sampleTextJIS7 ) , expectedISO2022JIS7 ,
2001-02-23 04:40:39 +00:00
sizeof ( expectedISO2022JIS7 ) / sizeof ( expectedISO2022JIS7 [ 0 ] ) , " JIS7 " , toISO2022JIS7Offs , TRUE ) )
log_err ( " JIS7 -> u did not match. \n " ) ;
2001-04-18 19:31:05 +00:00
if ( ! testConvertToU ( sampleTextJIS8 , sizeof ( sampleTextJIS8 ) , expectedISO2022JIS8 ,
2001-02-23 04:40:39 +00:00
sizeof ( expectedISO2022JIS8 ) / sizeof ( expectedISO2022JIS8 [ 0 ] ) , " JIS8 " , toISO2022JIS8Offs , TRUE ) )
log_err ( " JIS8 -> u did not match. \n " ) ;
}
}
2001-05-08 00:01:30 +00:00
2001-05-11 02:30:47 +00:00
static void TestJitterbug915 ( ) {
/* tests for roundtripping of the below sequence
2001-05-31 23:30:09 +00:00
\ x1b $ ) G \ x0E # ! # " ###$#%#&#'#(#)#*#+ / *plane 1 * /
2001-05-11 02:30:47 +00:00
\ x1b $ * H \ x1bN " ! \x1b N " " \x1b N " # \ x1bN " $ \x1b N " % / * plane 2 * /
\ x1b $ + I \ x1bO " D \x1b O " E \ x1bO " F \x1b O " G \ x1bO " H / *plane 3 * /
\ x1b $ + J \ x1bO ! D \ x1bO ! E \ x1bO " j \x1b O " k \ x1bO " l / *plane 4 * /
\ x1b $ + K \ x1bO ! t \ x1bO " P \x1b O " Q \ x1bO # 7 \ x1bO " \ / *plane 5 * /
\ x1b $ + L \ x1bO ! # \ x1bO " , \x1b O#N \x1b O!n \x1b O#q / *plane 6 * /
\ x1b $ + M \ x1bO " q \x1b O!N \x1b O!j \x1b O#: \x1b O#o / *plane 7 * /
*/
2001-05-31 23:30:09 +00:00
static char cSource [ ] = {
0x1B , 0x24 , 0x29 , 0x47 , 0x0E , 0x23 , 0x21 , 0x23 , 0x22 , 0x23 ,
0x23 , 0x23 , 0x24 , 0x23 , 0x25 , 0x23 , 0x26 , 0x23 , 0x27 , 0x23 ,
0x28 , 0x23 , 0x29 , 0x23 , 0x2A , 0x23 , 0x2B , 0x0F , 0x2F , 0x2A ,
0x70 , 0x6C , 0x61 , 0x6E , 0x65 , 0x20 , 0x31 , 0x20 , 0x2A , 0x2F ,
0x0D , 0x0A , 0x1B , 0x24 , 0x2A , 0x48 , 0x1B , 0x4E , 0x22 , 0x21 ,
0x1B , 0x4E , 0x22 , 0x22 , 0x1B , 0x4E , 0x22 , 0x23 , 0x1B , 0x4E ,
0x22 , 0x24 , 0x1B , 0x4E , 0x22 , 0x25 , 0x0F , 0x2F , 0x2A , 0x70 ,
0x6C , 0x61 , 0x6E , 0x65 , 0x32 , 0x2A , 0x2F , 0x20 , 0x0D , 0x0A ,
0x1B , 0x24 , 0x2B , 0x49 , 0x1B , 0x4F , 0x22 , 0x44 , 0x1B , 0x4F ,
0x22 , 0x45 , 0x1B , 0x4F , 0x22 , 0x46 , 0x1B , 0x4F , 0x22 , 0x47 ,
0x1B , 0x4F , 0x22 , 0x48 , 0x0F , 0x2F , 0x2A , 0x70 , 0x6C , 0x61 ,
0x6E , 0x65 , 0x20 , 0x33 , 0x20 , 0x2A , 0x2F , 0x0D , 0x0A , 0x1B ,
0x24 , 0x2B , 0x4A , 0x1B , 0x4F , 0x21 , 0x44 , 0x1B , 0x4F , 0x21 ,
0x45 , 0x1B , 0x4F , 0x22 , 0x6A , 0x1B , 0x4F , 0x22 , 0x6B , 0x1B ,
0x4F , 0x22 , 0x6C , 0x0F , 0x2F , 0x2A , 0x70 , 0x6C , 0x61 , 0x6E ,
0x65 , 0x20 , 0x34 , 0x20 , 0x2A , 0x2F , 0x0D , 0x0A , 0x1B , 0x24 ,
0x2B , 0x4B , 0x1B , 0x4F , 0x21 , 0x74 , 0x1B , 0x4F , 0x22 , 0x50 ,
0x1B , 0x4F , 0x22 , 0x51 , 0x1B , 0x4F , 0x23 , 0x37 , 0x1B , 0x4F ,
0x22 , 0x5C , 0x0F , 0x2F , 0x2A , 0x70 , 0x6C , 0x61 , 0x6E ,
0x65 , 0x20 , 0x35 , 0x20 , 0x2A , 0x2F , 0x0D , 0x0A , 0x1B , 0x24 ,
0x2B , 0x4C , 0x1B , 0x4F , 0x21 , 0x23 , 0x1B , 0x4F , 0x22 , 0x2C ,
0x1B , 0x4F , 0x23 , 0x4E , 0x1B , 0x4F , 0x21 , 0x6E , 0x1B , 0x4F ,
0x23 , 0x71 , 0x0F , 0x2F , 0x2A , 0x70 , 0x6C , 0x61 , 0x6E , 0x65 ,
0x20 , 0x36 , 0x20 , 0x2A , 0x2F , 0x0D , 0x0A , 0x1B , 0x24 , 0x2B ,
0x4D , 0x1B , 0x4F , 0x22 , 0x71 , 0x1B , 0x4F , 0x21 , 0x4E , 0x1B ,
0x4F , 0x21 , 0x6A , 0x1B , 0x4F , 0x23 , 0x3A , 0x1B , 0x4F , 0x23 ,
0x6F , 0x0F , 0x2F , 0x2A , 0x70 , 0x6C , 0x61 , 0x6E , 0x65 , 0x20 ,
0x37 , 0x20 , 0x2A , 0x2F ,
} ;
UChar uTarget [ 500 ] = { ' \0 ' } ;
UChar * utarget = uTarget ;
UChar * utargetLimit = uTarget + sizeof ( uTarget ) / 2 ;
char cTarget [ 500 ] = { ' \0 ' } ;
char * ctarget = cTarget ;
char * ctargetLimit = cTarget + sizeof ( cTarget ) ;
const char * csource = cSource ;
char * tempSrc = cSource ;
UErrorCode err = U_ZERO_ERROR ;
UConverter * conv = ucnv_open ( " ISO_2022_CN_EXT " , & err ) ;
if ( U_FAILURE ( err ) ) {
2001-05-11 02:30:47 +00:00
log_err ( " Unable to open a iso-2022 converter: %s \n " , u_errorName ( err ) ) ;
return ;
}
2001-05-31 23:30:09 +00:00
ucnv_toUnicode ( conv , & utarget , utargetLimit , & csource , csource + sizeof ( cSource ) , NULL , TRUE , & err ) ;
if ( U_FAILURE ( err ) ) {
2001-05-11 02:30:47 +00:00
log_err ( " iso-2022-CN to Unicode conversion failed: %s \n " , u_errorName ( err ) ) ;
return ;
}
2001-05-31 23:30:09 +00:00
utargetLimit = utarget ;
utarget = uTarget ;
ucnv_fromUnicode ( conv , & ctarget , ctargetLimit , ( const UChar * * ) & utarget , utargetLimit , NULL , TRUE , & err ) ;
if ( U_FAILURE ( err ) ) {
2001-05-11 02:30:47 +00:00
log_err ( " iso-2022-CN from Unicode conversion failed: %s \n " , u_errorName ( err ) ) ;
return ;
}
2001-05-31 23:30:09 +00:00
ctargetLimit = ctarget ;
ctarget = cTarget ;
while ( ctarget < ctargetLimit ) {
if ( * ( ctarget + + ) ! = * ( tempSrc + + ) ) {
2001-05-11 02:30:47 +00:00
log_err ( " Expected : \\ x%02X \t Got: \\ x%02X \n " , * ctarget , ( int ) * tempSrc ) ;
2001-05-31 23:30:09 +00:00
}
}
2001-05-11 02:30:47 +00:00
2001-08-25 01:06:41 +00:00
ucnv_close ( conv ) ;
2001-05-11 02:30:47 +00:00
}
2000-11-21 04:05:39 +00:00
static void
2000-10-28 01:08:25 +00:00
TestISO_2022_CN_EXT ( ) {
2000-09-19 02:38:40 +00:00
/* test input */
static const uint16_t in [ ] = {
2001-05-08 00:01:30 +00:00
/* test Non-BMP code points */
0xD869 , 0xDE99 , 0xD869 , 0xDE9C , 0xD869 , 0xDE9D , 0xD869 , 0xDE9E , 0xD869 , 0xDE9F ,
0xD869 , 0xDEA0 , 0xD869 , 0xDEA5 , 0xD869 , 0xDEA6 , 0xD869 , 0xDEA7 , 0xD869 , 0xDEA8 ,
0xD869 , 0xDEAB , 0xD869 , 0xDEAC , 0xD869 , 0xDEAD , 0xD869 , 0xDEAE , 0xD869 , 0xDEAF ,
0xD869 , 0xDEB0 , 0xD869 , 0xDEB1 , 0xD869 , 0xDEB3 , 0xD869 , 0xDEB5 , 0xD869 , 0xDEB6 ,
0xD869 , 0xDEB7 , 0xD869 , 0xDEB8 , 0xD869 , 0xDEB9 , 0xD869 , 0xDEBA , 0xD869 , 0xDEBB ,
0xD869 , 0xDEBC , 0xD869 , 0xDEBD , 0xD869 , 0xDEBE , 0xD869 , 0xDEBF , 0xD869 , 0xDEC0 ,
0xD869 , 0xDEC1 , 0xD869 , 0xDEC2 , 0xD869 , 0xDEC3 , 0xD869 , 0xDEC4 , 0xD869 , 0xDEC8 ,
0xD869 , 0xDECA , 0xD869 , 0xDECB , 0xD869 , 0xDECD , 0xD869 , 0xDECE , 0xD869 , 0xDECF ,
0xD869 , 0xDED0 , 0xD869 , 0xDED1 , 0xD869 , 0xDED2 , 0xD869 , 0xDED3 , 0xD869 , 0xDED4 ,
0xD869 , 0xDED5 ,
2000-09-21 00:35:06 +00:00
0x4DB3 , 0x4DB4 , 0x4DB5 , 0x4E00 , 0x4E00 , 0x4E01 , 0x4E02 , 0x4E03 , 0x000D , 0x000A ,
0x0392 , 0x0393 , 0x0394 , 0x0395 , 0x0396 , 0x0397 , 0x33E0 , 0x33E6 , 0x000D , 0x000A ,
0x4E05 , 0x4E07 , 0x4E04 , 0x4E08 , 0x4E08 , 0x4E09 , 0x4E0A , 0x4E0B , 0x000D , 0x000A ,
0x4E0C , 0x0021 , 0x0022 , 0x0023 , 0x0024 , 0xFF40 , 0xFF41 , 0xFF42 , 0x000D , 0x000A ,
0xFF43 , 0xFF44 , 0xFF45 , 0xFF46 , 0xFF47 , 0xFF48 , 0xFF49 , 0xFF4A , 0x000D , 0x000A ,
0xFF4B , 0xFF4C , 0xFF4D , 0xFF4E , 0xFF4F , 0x6332 , 0x63B0 , 0x643F , 0x000D , 0x000A ,
0x64D8 , 0x8004 , 0x6BEA , 0x6BF3 , 0x6BFD , 0x6BF5 , 0x6BF9 , 0x6C05 , 0x000D , 0x000A ,
2000-11-17 03:03:14 +00:00
0x0041 , 0x0042 , 0x0043 , 0x0044 , 0x0045 , 0x0046 , 0x0047 , 0x0048 , 0x000D , 0x000A ,
2000-09-21 00:35:06 +00:00
0x6C07 , 0x6C06 , 0x6C0D , 0x6C15 , 0x9CD9 , 0x9CDC , 0x9CDD , 0x9CDF , 0x000D , 0x000A ,
0x9CE2 , 0x977C , 0x9785 , 0x9791 , 0x9792 , 0x9794 , 0x97AF , 0x97AB , 0x000D , 0x000A ,
0x97A3 , 0x97B2 , 0x97B4 , 0x9AB1 , 0x9AB0 , 0x9AB7 , 0x9E58 , 0x9AB6 , 0x000D , 0x000A ,
0x9ABA , 0x9ABC , 0x9AC1 , 0x9AC0 , 0x9AC5 , 0x9AC2 , 0x9ACB , 0x9ACC , 0x000D , 0x000A ,
0x9AD1 , 0x9B45 , 0x9B43 , 0x9B47 , 0x9B49 , 0x9B48 , 0x9B4D , 0x9B51 , 0x000D , 0x000A ,
0x98E8 , 0x990D , 0x992E , 0x9955 , 0x9954 , 0x9ADF , 0x3443 , 0x3444 , 0x000D , 0x000A ,
0x3445 , 0x3449 , 0x344A , 0x344B , 0x60F2 , 0x60F3 , 0x60F4 , 0x60F5 , 0x000D , 0x000A ,
0x60F6 , 0x60F7 , 0x60F8 , 0x60F9 , 0x60FA , 0x60FB , 0x60FC , 0x60FD , 0x000D , 0x000A ,
0x60FE , 0x60FF , 0x6100 , 0x6101 , 0x6102 , 0x0041 , 0x0042 , 0x0043 , 0x000D , 0x000A ,
0x0044 , 0x0045 , 0x0046 , 0x0047 , 0x0048 , 0x0049 , 0x004A , 0x004B , 0x000D , 0x000A ,
2001-05-08 00:01:30 +00:00
2001-04-18 19:31:05 +00:00
0x33E7 , 0x33E8 , 0x33E9 , 0x33EA , 0x000D , 0x000A
2000-09-21 00:35:06 +00:00
2000-09-19 02:38:40 +00:00
} ;
2001-05-08 00:01:30 +00:00
2000-11-17 03:03:14 +00:00
const UChar * uSource ;
const UChar * uSourceLimit ;
const char * cSource ;
const char * cSourceLimit ;
UChar * uTargetLimit = NULL ;
UChar * uTarget ;
char * cTarget ;
const char * cTargetLimit ;
2001-04-18 19:31:05 +00:00
char * cBuf ;
2000-11-17 03:03:14 +00:00
UChar * uBuf , * test ;
2000-09-21 00:35:06 +00:00
int32_t uBufSize = 180 ;
2000-09-19 02:38:40 +00:00
UErrorCode errorCode = U_ZERO_ERROR ;
UConverter * cnv ;
2000-11-17 03:03:14 +00:00
int32_t * offsets = ( int32_t * ) malloc ( uBufSize * sizeof ( int32_t ) * 5 ) ;
int32_t * myOff = offsets ;
2000-10-28 01:08:25 +00:00
cnv = ucnv_open ( " ISO_2022,locale=cn,version=1 " , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
log_err ( " Unable to open a iso-2022 converter: %s \n " , u_errorName ( errorCode ) ) ;
return ;
}
2000-11-17 03:03:14 +00:00
uBuf = ( UChar * ) malloc ( uBufSize * sizeof ( UChar ) * 5 ) ;
cBuf = ( char * ) malloc ( uBufSize * sizeof ( char ) * 10 ) ;
uSource = ( const UChar * ) & in [ 0 ] ;
uSourceLimit = ( const UChar * ) & in [ sizeof ( in ) / 2 ] ;
cTarget = cBuf ;
cTargetLimit = cBuf + uBufSize * 5 ;
uTarget = uBuf ;
uTargetLimit = uBuf + uBufSize * 5 ;
ucnv_fromUnicode ( cnv , & cTarget , cTargetLimit , & uSource , uSourceLimit , myOff , TRUE , & errorCode ) ;
2000-10-28 01:08:25 +00:00
if ( U_FAILURE ( errorCode ) ) {
log_err ( " ucnv_fromUnicode conversion failed reason %s \n " , u_errorName ( errorCode ) ) ;
return ;
}
cSource = cBuf ;
2000-11-17 03:03:14 +00:00
cSourceLimit = cTarget ;
test = uBuf ;
myOff = offsets ;
ucnv_toUnicode ( cnv , & uTarget , uTargetLimit , & cSource , cSourceLimit , myOff , TRUE , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
2000-10-28 01:08:25 +00:00
log_err ( " ucnv_toUnicode conversion failed reason %s \n " , u_errorName ( errorCode ) ) ;
return ;
}
uSource = ( const UChar * ) & in [ 0 ] ;
2000-11-17 03:03:14 +00:00
while ( uSource < uSourceLimit ) {
if ( * test ! = * uSource ) {
log_err ( " Expected : \\ u%04X \t Got: \\ u%04X \n " , * uSource , ( int ) * test ) ;
}
2000-10-28 01:08:25 +00:00
else {
2000-11-17 03:03:14 +00:00
log_verbose ( " Got: \\ u%04X \n " , ( int ) * test ) ;
2000-10-28 01:08:25 +00:00
}
2000-12-08 01:13:38 +00:00
uSource + + ;
test + + ;
2000-11-17 03:03:14 +00:00
}
2001-03-05 18:39:03 +00:00
TestSmallTargetBuffer ( & in [ 0 ] , ( const UChar * ) & in [ sizeof ( in ) / 2 ] , cnv ) ;
2001-04-18 19:31:05 +00:00
TestSmallSourceBuffer ( & in [ 0 ] , ( const UChar * ) & in [ sizeof ( in ) / 2 ] , cnv ) ;
2000-11-17 03:03:14 +00:00
ucnv_close ( cnv ) ;
free ( uBuf ) ;
free ( cBuf ) ;
free ( offsets ) ;
2000-10-28 01:08:25 +00:00
}
2000-11-21 04:05:39 +00:00
static void
2000-10-28 01:08:25 +00:00
TestISO_2022_CN ( ) {
/* test input */
static const uint16_t in [ ] = {
2001-05-31 23:30:09 +00:00
/* jitterbug 951 */
0xFF2D , 0xFF49 , 0xFF58 , 0xFF45 , 0xFF44 , 0x0020 , 0xFF43 , 0xFF48 , 0xFF41 , 0xFF52 ,
0x0020 , 0xFF06 , 0x0020 , 0xFF11 , 0xFF12 , 0xFF13 , 0xFF14 , 0xFF15 , 0xFF16 , 0xFF17 ,
0xFF18 , 0xFF19 , 0xFF10 , 0x0020 , 0xFF4E , 0xFF55 , 0xFF4D , 0xFF42 , 0xFF45 , 0xFF52 ,
0x0020 , 0xFF54 , 0xFF45 , 0xFF53 , 0xFF54 , 0x0020 , 0xFF4C , 0xFF49 , 0xFF4E , 0xFF45 ,
0x0020 , 0x0045 , 0x004e , 0x0044 ,
/**/
2000-10-28 01:08:25 +00:00
0x4E00 , 0x4E00 , 0x4E01 , 0x4E03 , 0x60F6 , 0x60F7 , 0x60F8 , 0x60FB , 0x000D , 0x000A ,
0x0392 , 0x0393 , 0x0394 , 0x0395 , 0x0396 , 0x0397 , 0x60FB , 0x60FC , 0x000D , 0x000A ,
0x4E07 , 0x4E08 , 0x4E08 , 0x4E09 , 0x4E0A , 0x4E0B , 0x0042 , 0x0043 , 0x000D , 0x000A ,
0x4E0C , 0x0021 , 0x0022 , 0x0023 , 0x0024 , 0xFF40 , 0xFF41 , 0xFF42 , 0x000D , 0x000A ,
0xFF43 , 0xFF44 , 0xFF45 , 0xFF46 , 0xFF47 , 0xFF48 , 0xFF49 , 0xFF4A , 0x000D , 0x000A ,
0xFF4B , 0xFF4C , 0xFF4D , 0xFF4E , 0xFF4F , 0x6332 , 0x63B0 , 0x643F , 0x000D , 0x000A ,
0x64D8 , 0x8004 , 0x6BEA , 0x6BF3 , 0x6BFD , 0x6BF5 , 0x6BF9 , 0x6C05 , 0x000D , 0x000A ,
0x6C07 , 0x6C06 , 0x6C0D , 0x6C15 , 0x9CD9 , 0x9CDC , 0x9CDD , 0x9CDF , 0x000D , 0x000A ,
0x9CE2 , 0x977C , 0x9785 , 0x9791 , 0x9792 , 0x9794 , 0x97AF , 0x97AB , 0x000D , 0x000A ,
0x97A3 , 0x97B2 , 0x97B4 , 0x9AB1 , 0x9AB0 , 0x9AB7 , 0x9E58 , 0x9AB6 , 0x000D , 0x000A ,
0x9ABA , 0x9ABC , 0x9AC1 , 0x9AC0 , 0x9AC5 , 0x9AC2 , 0x9ACB , 0x9ACC , 0x000D , 0x000A ,
0x9AD1 , 0x9B45 , 0x9B43 , 0x9B47 , 0x9B49 , 0x9B48 , 0x9B4D , 0x9B51 , 0x000D , 0x000A ,
0x98E8 , 0x990D , 0x992E , 0x9955 , 0x9954 , 0x9ADF , 0x60FE , 0x60FF , 0x000D , 0x000A ,
0x60F2 , 0x60F3 , 0x60F4 , 0x60F5 , 0x000D , 0x000A , 0x60F9 , 0x60FA , 0x000D , 0x000A ,
0x6100 , 0x6101 , 0x0041 , 0x0044 , 0x0045 , 0x0046 , 0x0047 , 0x0048 , 0x000D , 0x000A ,
0x247D , 0x247E , 0x247F , 0x2480 , 0x2481 , 0x2482 , 0x2483 , 0x2484 , 0x2485 , 0x2486 ,
0x2487 , 0x2460 , 0x2461 , 0xFF20 , 0xFF21 , 0xFF22 , 0x0049 , 0x004A , 0x000D , 0x000A ,
} ;
2000-11-17 03:03:14 +00:00
const UChar * uSource ;
const UChar * uSourceLimit ;
const char * cSource ;
const char * cSourceLimit ;
UChar * uTargetLimit = NULL ;
UChar * uTarget ;
char * cTarget ;
const char * cTargetLimit ;
2001-04-18 19:31:05 +00:00
char * cBuf ;
2000-11-17 03:03:14 +00:00
UChar * uBuf , * test ;
2000-10-28 01:08:25 +00:00
int32_t uBufSize = 180 ;
UErrorCode errorCode = U_ZERO_ERROR ;
UConverter * cnv ;
2000-11-17 03:03:14 +00:00
int32_t * offsets = ( int32_t * ) malloc ( uBufSize * sizeof ( int32_t ) * 5 ) ;
int32_t * myOff = offsets ;
2000-10-28 01:08:25 +00:00
cnv = ucnv_open ( " ISO_2022,locale=cn,version=0 " , & errorCode ) ;
2000-09-19 02:38:40 +00:00
if ( U_FAILURE ( errorCode ) ) {
log_err ( " Unable to open a iso-2022 converter: %s \n " , u_errorName ( errorCode ) ) ;
return ;
}
2000-11-17 03:03:14 +00:00
uBuf = ( UChar * ) malloc ( uBufSize * sizeof ( UChar ) * 5 ) ;
cBuf = ( char * ) malloc ( uBufSize * sizeof ( char ) * 10 ) ;
uSource = ( const UChar * ) & in [ 0 ] ;
uSourceLimit = ( const UChar * ) & in [ sizeof ( in ) / 2 ] ;
cTarget = cBuf ;
cTargetLimit = cBuf + uBufSize * 5 ;
uTarget = uBuf ;
uTargetLimit = uBuf + uBufSize * 5 ;
ucnv_fromUnicode ( cnv , & cTarget , cTargetLimit , & uSource , uSourceLimit , myOff , TRUE , & errorCode ) ;
2000-09-19 02:38:40 +00:00
if ( U_FAILURE ( errorCode ) ) {
log_err ( " ucnv_fromUnicode conversion failed reason %s \n " , u_errorName ( errorCode ) ) ;
return ;
}
cSource = cBuf ;
2000-11-17 03:03:14 +00:00
cSourceLimit = cTarget ;
test = uBuf ;
myOff = offsets ;
ucnv_toUnicode ( cnv , & uTarget , uTargetLimit , & cSource , cSourceLimit , myOff , TRUE , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
2000-09-19 02:38:40 +00:00
log_err ( " ucnv_toUnicode conversion failed reason %s \n " , u_errorName ( errorCode ) ) ;
return ;
}
uSource = ( const UChar * ) & in [ 0 ] ;
2000-11-17 03:03:14 +00:00
while ( uSource < uSourceLimit ) {
if ( * test ! = * uSource ) {
log_err ( " Expected : \\ u%04X \t Got: \\ u%04X \n " , * uSource , ( int ) * test ) ;
}
2000-10-17 08:05:02 +00:00
else {
2000-11-17 03:03:14 +00:00
log_verbose ( " Got: \\ u%04X \n " , ( int ) * test ) ;
2000-10-17 08:05:02 +00:00
}
2000-12-08 01:13:38 +00:00
uSource + + ;
test + + ;
2000-11-17 03:03:14 +00:00
}
TestGetNextUChar2022 ( cnv , cBuf , cTarget , in , " ISO-2022-CN encoding " ) ;
2001-03-05 18:39:03 +00:00
TestSmallTargetBuffer ( & in [ 0 ] , ( const UChar * ) & in [ sizeof ( in ) / 2 ] , cnv ) ;
2001-04-18 19:31:05 +00:00
TestSmallSourceBuffer ( & in [ 0 ] , ( const UChar * ) & in [ sizeof ( in ) / 2 ] , cnv ) ;
2001-03-05 18:39:03 +00:00
TestToAndFromUChars ( & in [ 0 ] , ( const UChar * ) & in [ sizeof ( in ) / 2 ] , cnv ) ;
2001-04-07 01:25:14 +00:00
TestJitterbug930 ( " csISO2022CN " ) ;
2000-11-17 03:03:14 +00:00
ucnv_close ( cnv ) ;
free ( uBuf ) ;
free ( cBuf ) ;
free ( offsets ) ;
2000-08-15 00:07:33 +00:00
}
2000-11-21 04:05:39 +00:00
static void
2000-06-22 01:18:30 +00:00
TestEBCDIC_STATEFUL ( ) {
/* test input */
static const uint8_t in [ ] = {
0x61 ,
0x1a ,
0x0f , 0x4b ,
0x42 ,
2001-04-18 19:31:05 +00:00
0x40 ,
2000-06-22 01:18:30 +00:00
0x36 ,
} ;
2000-01-19 19:00:53 +00:00
2000-06-22 01:18:30 +00:00
/* expected test results */
static const uint32_t results [ ] = {
/* number of bytes read, code point */
1 , 0x002f ,
1 , 0x0092 ,
2 , 0x002e ,
1 , 0xff62 ,
2001-04-18 19:31:05 +00:00
1 , 0x0020 ,
2000-06-22 01:18:30 +00:00
1 , 0x0096 ,
2001-04-18 19:31:05 +00:00
2000-06-22 01:18:30 +00:00
} ;
2000-08-12 04:27:59 +00:00
static const uint8_t in2 [ ] = {
0x0f ,
0xa1 ,
0x01
} ;
/* expected test results */
static const uint32_t results2 [ ] = {
/* number of bytes read, code point */
2 , 0x203E ,
1 , 0x0001 ,
} ;
2000-01-19 19:00:53 +00:00
2000-06-22 01:18:30 +00:00
const char * source = ( const char * ) in , * limit = ( const char * ) in + sizeof ( in ) ;
UErrorCode errorCode = U_ZERO_ERROR ;
UConverter * cnv = ucnv_open ( " ibm-930 " , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
log_err ( " Unable to open a EBCDIC_STATEFUL(ibm-930) converter: %s \n " , u_errorName ( errorCode ) ) ;
2000-10-27 00:10:17 +00:00
return ;
2000-06-22 01:18:30 +00:00
}
TestNextUChar ( cnv , source , limit , results , " EBCDIC_STATEFUL(ibm-930) " ) ;
2000-08-12 04:27:59 +00:00
ucnv_reset ( cnv ) ;
2000-12-20 00:02:15 +00:00
/* Test the condition when source >= sourceLimit */
TestNextUCharError ( cnv , source , source , U_INDEX_OUTOFBOUNDS_ERROR , " sourceLimit <= source " ) ;
2000-08-12 04:27:59 +00:00
ucnv_reset ( cnv ) ;
2000-06-22 23:46:02 +00:00
/*Test for the condition where source > sourcelimit after consuming the shift chracter */
{
static const uint8_t source1 [ ] = { 0x0f } ;
TestNextUCharError ( cnv , ( const char * ) source1 , ( const char * ) source1 + sizeof ( source1 ) , U_INDEX_OUTOFBOUNDS_ERROR , " a character is truncated " ) ;
}
/*Test for the condition where there is an invalid character*/
2000-08-12 04:27:59 +00:00
ucnv_reset ( cnv ) ;
2000-06-22 23:46:02 +00:00
{
2000-08-12 04:27:59 +00:00
static const uint8_t source2 [ ] = { 0x0e , 0x7F , 0xFF } ;
TestNextUCharError ( cnv , ( const char * ) source2 , ( const char * ) source2 + sizeof ( source2 ) , U_ZERO_ERROR , " an invalid character [EBCDIC STATEFUL] " ) ;
2000-06-22 23:46:02 +00:00
}
2000-08-12 04:27:59 +00:00
ucnv_reset ( cnv ) ;
2000-08-14 17:47:43 +00:00
source = ( const char * ) in2 ;
limit = ( const char * ) in2 + sizeof ( in2 ) ;
2000-08-12 04:27:59 +00:00
TestNextUChar ( cnv , source , limit , results2 , " EBCDIC_STATEFUL(ibm-930),seq#2 " ) ;
2000-01-19 19:00:53 +00:00
ucnv_close ( cnv ) ;
2000-08-12 04:27:59 +00:00
2000-01-19 19:00:53 +00:00
}
2000-10-26 00:18:34 +00:00
2000-11-21 04:05:39 +00:00
static void
2000-10-26 00:18:34 +00:00
TestGB18030 ( ) {
/* test input */
static const uint8_t in [ ] = {
0x24 ,
0x7f ,
2000-11-30 22:15:07 +00:00
0x81 , 0x30 , 0x81 , 0x30 ,
2000-10-26 00:18:34 +00:00
0xa8 , 0xbf ,
2000-11-30 22:15:07 +00:00
0xa2 , 0xe3 ,
2000-10-26 00:18:34 +00:00
0xd2 , 0xbb ,
2000-11-30 22:15:07 +00:00
0x82 , 0x35 , 0x8f , 0x33 ,
0x84 , 0x31 , 0xa4 , 0x39 ,
2000-10-26 00:18:34 +00:00
0x90 , 0x30 , 0x81 , 0x30 ,
2000-10-26 20:09:17 +00:00
0xe3 , 0x32 , 0x9a , 0x35
#if 0
/*
* Feature removed markus 2000 - oct - 26
* Only some codepages must match surrogate pairs into supplementary code points -
* see javadoc for ucnv_getNextUChar ( ) and implementation notes in ucnvmbcs . c .
* GB 18030 provides direct encodings for supplementary code points , therefore
* it must not combine two single - encoded surrogates into one code point .
*/
2000-10-26 00:18:34 +00:00
0x83 , 0x36 , 0xc8 , 0x30 , 0x83 , 0x37 , 0xb0 , 0x34 /* separately encoded surrogates */
2000-10-26 20:09:17 +00:00
# endif
2000-10-26 00:18:34 +00:00
} ;
/* expected test results */
static const uint32_t results [ ] = {
/* number of bytes read, code point */
1 , 0x24 ,
1 , 0x7f ,
4 , 0x80 ,
2 , 0x1f9 ,
2000-11-30 22:15:07 +00:00
2 , 0x20ac ,
2000-10-26 00:18:34 +00:00
2 , 0x4e00 ,
4 , 0x9fa6 ,
4 , 0xffff ,
4 , 0x10000 ,
2000-10-26 20:09:17 +00:00
4 , 0x10ffff
#if 0
/* Feature removed. See comment above. */
2000-10-26 00:18:34 +00:00
8 , 0x10000
2000-10-26 20:09:17 +00:00
# endif
2000-10-26 00:18:34 +00:00
} ;
2000-11-21 04:05:39 +00:00
/* const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); */
2000-10-26 00:18:34 +00:00
UErrorCode errorCode = U_ZERO_ERROR ;
UConverter * cnv = ucnv_open ( " gb18030 " , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
log_err ( " Unable to open a gb18030 converter: %s \n " , u_errorName ( errorCode ) ) ;
2000-10-27 00:10:17 +00:00
return ;
2000-10-26 00:18:34 +00:00
}
TestNextUChar ( cnv , ( const char * ) in , ( const char * ) in + sizeof ( in ) , results , " gb18030 " ) ;
ucnv_close ( cnv ) ;
}
2000-11-21 04:05:39 +00:00
static void
2000-03-31 16:53:09 +00:00
TestLMBCS ( ) {
2000-06-28 17:01:52 +00:00
/* LMBCS-1 string */
2000-05-22 21:24:26 +00:00
static const uint8_t pszLMBCS [ ] = {
2000-03-31 16:53:09 +00:00
0x61 ,
0x01 , 0x29 ,
0x81 ,
0xA0 ,
0x0F , 0x27 ,
0x0F , 0x91 ,
0x14 , 0x0a , 0x74 ,
2001-04-18 19:31:05 +00:00
0x14 , 0xF6 , 0x02 ,
2000-05-22 21:24:26 +00:00
0x14 , 0xd8 , 0x4d , 0x14 , 0xdc , 0x56 , /* UTF-16 surrogate pair */
2000-04-13 17:27:35 +00:00
0x10 , 0x88 , 0xA0 ,
2000-03-31 16:53:09 +00:00
} ;
2000-06-28 17:01:52 +00:00
/* Unicode UChar32 equivalents */
static const UChar32 pszUnicode32 [ ] = {
/* code point */
0x00000061 ,
0x00002013 ,
0x000000FC ,
0x000000E1 ,
0x00000007 ,
0x00000091 ,
0x00000a74 ,
0x00000200 ,
0x00023456 , /* code point for surrogate pair */
0x00005516
} ;
/* Unicode UChar equivalents */
static const UChar pszUnicode [ ] = {
2000-05-22 21:24:26 +00:00
/* code point */
0x0061 ,
0x2013 ,
0x00FC ,
0x00E1 ,
0x0007 ,
0x0091 ,
0x0a74 ,
0x0200 ,
2000-06-28 17:01:52 +00:00
0xD84D , /* low surrogate */
0xDC56 , /* high surrogate */
2000-05-22 21:24:26 +00:00
0x5516
} ;
/* expected test results */
2000-06-28 17:01:52 +00:00
static const int offsets32 [ ] = {
/* number of bytes read, code point */
2001-04-18 19:31:05 +00:00
0 ,
1 ,
3 ,
4 ,
5 ,
7 ,
9 ,
12 ,
15 ,
2000-06-28 17:01:52 +00:00
21 ,
24
} ;
/* expected test results */
static const int offsets [ ] = {
2000-03-31 16:53:09 +00:00
/* number of bytes read, code point */
2001-04-18 19:31:05 +00:00
0 ,
1 ,
3 ,
4 ,
5 ,
7 ,
9 ,
12 ,
15 ,
2000-06-28 17:01:52 +00:00
18 ,
2001-04-18 19:31:05 +00:00
21 ,
24
2000-03-31 16:53:09 +00:00
} ;
2000-05-22 21:24:26 +00:00
2001-04-18 19:31:05 +00:00
UConverter * cnv ;
2000-03-31 16:53:09 +00:00
2000-06-28 17:01:52 +00:00
# define NAME_LMBCS_1 "LMBCS-1"
# define NAME_LMBCS_2 "LMBCS-2"
2000-03-31 16:53:09 +00:00
2000-06-28 17:01:52 +00:00
/* Some basic open/close/property tests on some LMBCS converters */
{
char expected_subchars [ ] = { 0x3F } ; /* ANSI Question Mark */
char new_subchars [ ] = { 0x7F } ; /* subst char used by SmartSuite..*/
char get_subchars [ 1 ] ;
const char * get_name ;
UConverter * cnv1 ;
UConverter * cnv2 ;
int8_t len = sizeof ( get_subchars ) ;
UErrorCode errorCode = U_ZERO_ERROR ;
/* Open */
cnv1 = ucnv_open ( NAME_LMBCS_1 , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
log_err ( " Unable to open a LMBCS-1 converter: %s \n " , u_errorName ( errorCode ) ) ;
2000-10-27 00:10:17 +00:00
return ;
2000-06-28 17:01:52 +00:00
}
cnv2 = ucnv_open ( NAME_LMBCS_2 , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
log_err ( " Unable to open a LMBCS-2 converter: %s \n " , u_errorName ( errorCode ) ) ;
2000-10-27 00:10:17 +00:00
return ;
2000-06-28 17:01:52 +00:00
}
/* Name */
get_name = ucnv_getName ( cnv1 , & errorCode ) ;
if ( strcmp ( NAME_LMBCS_1 , get_name ) ) {
log_err ( " Unexpected converter name: %s \n " , get_name ) ;
}
get_name = ucnv_getName ( cnv2 , & errorCode ) ;
if ( strcmp ( NAME_LMBCS_2 , get_name ) ) {
log_err ( " Unexpected converter name: %s \n " , get_name ) ;
}
/* substitution chars */
ucnv_getSubstChars ( cnv1 , get_subchars , & len , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
log_err ( " Failure on get subst chars: %s \n " , u_errorName ( errorCode ) ) ;
}
if ( len ! = 1 ) {
log_err ( " Unexpected length of sub chars \n " ) ;
}
if ( get_subchars [ 0 ] ! = expected_subchars [ 0 ] ) {
log_err ( " Unexpected value of sub chars \n " ) ;
}
ucnv_setSubstChars ( cnv2 , new_subchars , len , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
log_err ( " Failure on set subst chars: %s \n " , u_errorName ( errorCode ) ) ;
}
ucnv_getSubstChars ( cnv2 , get_subchars , & len , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
log_err ( " Failure on get subst chars: %s \n " , u_errorName ( errorCode ) ) ;
}
if ( len ! = 1 ) {
log_err ( " Unexpected length of sub chars \n " ) ;
}
if ( get_subchars [ 0 ] ! = new_subchars [ 0 ] ) {
log_err ( " Unexpected value of sub chars \n " ) ;
}
2000-07-06 23:01:50 +00:00
ucnv_close ( cnv1 ) ;
ucnv_close ( cnv2 ) ;
2000-03-31 16:53:09 +00:00
}
2000-06-28 17:01:52 +00:00
/* LMBCS to Unicode - offsets */
2000-03-31 16:53:09 +00:00
{
2000-06-28 17:01:52 +00:00
UErrorCode errorCode = U_ZERO_ERROR ;
2001-04-18 19:31:05 +00:00
2000-08-11 03:35:25 +00:00
const uint8_t * pSource = pszLMBCS ;
const uint8_t * sourceLimit = pszLMBCS + sizeof ( pszLMBCS ) ;
2001-04-18 19:31:05 +00:00
2001-08-25 01:06:41 +00:00
UChar Out [ sizeof ( pszUnicode ) + 1 ] ;
2000-06-28 17:01:52 +00:00
UChar * pOut = Out ;
2001-08-25 01:06:41 +00:00
UChar * OutLimit = Out + sizeof ( pszUnicode ) / sizeof ( UChar ) ;
2000-06-28 17:01:52 +00:00
2000-07-28 03:18:30 +00:00
int32_t off [ sizeof ( offsets ) ] ;
2000-06-28 17:01:52 +00:00
2001-04-18 19:31:05 +00:00
/* last 'offset' in expected results is just the final size.
2000-06-28 17:01:52 +00:00
( Makes other tests easier ) . Compensate here : */
off [ ( sizeof ( offsets ) / sizeof ( offsets [ 0 ] ) ) - 1 ] = sizeof ( pszLMBCS ) ;
2000-03-31 16:53:09 +00:00
2000-05-22 21:24:26 +00:00
2000-06-28 17:01:52 +00:00
cnv = ucnv_open ( " lmbcs " , & errorCode ) ; /* use generic name for LMBCS-1 */
if ( U_FAILURE ( errorCode ) ) {
log_err ( " Unable to open a LMBCS converter: %s \n " , u_errorName ( errorCode ) ) ;
2000-10-27 00:10:17 +00:00
return ;
2000-03-31 16:53:09 +00:00
}
2000-08-11 03:35:25 +00:00
2000-06-28 17:01:52 +00:00
ucnv_toUnicode ( cnv ,
2000-08-11 03:35:25 +00:00
& pOut ,
OutLimit ,
( const char * * ) & pSource ,
2001-04-18 19:31:05 +00:00
( const char * ) sourceLimit ,
2000-08-11 03:35:25 +00:00
off ,
TRUE ,
& errorCode ) ;
2000-06-28 17:01:52 +00:00
if ( memcmp ( off , offsets , sizeof ( offsets ) ) )
{
log_err ( " LMBCS->Uni: Calculated offsets do not match expected results \n " ) ;
}
if ( memcmp ( Out , pszUnicode , sizeof ( pszUnicode ) ) )
{
log_err ( " LMBCS->Uni: Calculated codepoints do not match expected results \n " ) ;
}
ucnv_close ( cnv ) ;
2000-03-31 16:53:09 +00:00
}
2000-06-28 17:01:52 +00:00
{
/* LMBCS to Unicode - getNextUChar */
const char * sourceStart ;
const char * source = ( const char * ) pszLMBCS ;
const char * limit = ( const char * ) pszLMBCS + sizeof ( pszLMBCS ) ;
2000-07-28 04:09:39 +00:00
const UChar32 * results = pszUnicode32 ;
2000-06-28 17:01:52 +00:00
const int * off = offsets32 ;
2000-05-22 21:24:26 +00:00
2000-06-28 17:01:52 +00:00
UErrorCode errorCode = U_ZERO_ERROR ;
uint32_t uniChar ;
cnv = ucnv_open ( " LMBCS-1 " , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
log_err ( " Unable to open a LMBCS-1 converter: %s \n " , u_errorName ( errorCode ) ) ;
2000-10-27 00:10:17 +00:00
return ;
2000-06-28 17:01:52 +00:00
}
else
{
while ( source < limit ) {
sourceStart = source ;
uniChar = ucnv_getNextUChar ( cnv , & source , source + ( off [ 1 ] - off [ 0 ] ) , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
log_err ( " LMBCS-1 ucnv_getNextUChar() failed: %s \n " , u_errorName ( errorCode ) ) ;
break ;
} else if ( source - sourceStart ! = off [ 1 ] - off [ 0 ] | | uniChar ! = * results ) {
log_err ( " LMBCS-1 ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes. \n " ,
uniChar , ( source - sourceStart ) , * results , * off ) ;
break ;
}
results + + ;
off + + ;
}
}
ucnv_close ( cnv ) ;
}
{ /* test locale & optimization group operations: Unicode to LMBCS */
2001-04-18 19:31:05 +00:00
2000-06-28 17:01:52 +00:00
UErrorCode errorCode = U_ZERO_ERROR ;
2000-05-22 21:24:26 +00:00
UConverter * cnv16he = ucnv_open ( " LMBCS-16,locale=he " , & errorCode ) ;
2000-06-28 17:01:52 +00:00
UConverter * cnv16jp = ucnv_open ( " LMBCS-16,locale=ja_JP " , & errorCode ) ;
UConverter * cnv01us = ucnv_open ( " LMBCS-1,locale=us_EN " , & errorCode ) ;
2000-05-22 21:24:26 +00:00
UChar uniString [ ] = { 0x0192 } ; /* Latin Small letter f with hook */
2000-07-28 04:09:39 +00:00
const UChar * pUniOut = uniString ;
UChar * pUniIn = uniString ;
2000-08-11 03:35:25 +00:00
uint8_t lmbcsString [ 4 ] ;
const uint8_t * pLMBCSOut = lmbcsString ;
uint8_t * pLMBCSIn = lmbcsString ;
2000-05-22 21:24:26 +00:00
2000-06-28 17:01:52 +00:00
/* 0192 (hook) converts to both group 3 & group 1. input locale should differentiate */
2001-04-18 19:31:05 +00:00
ucnv_fromUnicode ( cnv16he ,
( char * * ) & pLMBCSIn , ( const char * ) ( pLMBCSIn + sizeof ( lmbcsString ) / sizeof ( lmbcsString [ 0 ] ) ) ,
& pUniOut , pUniOut + sizeof ( uniString ) / sizeof ( uniString [ 0 ] ) ,
2000-05-22 21:24:26 +00:00
NULL , 1 , & errorCode ) ;
2000-08-11 03:35:25 +00:00
if ( lmbcsString [ 0 ] ! = 0x3 | | lmbcsString [ 1 ] ! = 0x83 )
2000-05-22 21:24:26 +00:00
{
2000-06-28 17:01:52 +00:00
log_err ( " LMBCS-16,locale=he gives unexpected translation \n " ) ;
2000-05-22 21:24:26 +00:00
}
2000-08-11 03:35:25 +00:00
pLMBCSIn = lmbcsString ;
2000-07-28 04:09:39 +00:00
pUniOut = uniString ;
2001-04-18 19:31:05 +00:00
ucnv_fromUnicode ( cnv01us ,
( char * * ) & pLMBCSIn , ( const char * ) ( lmbcsString + sizeof ( lmbcsString ) / sizeof ( lmbcsString [ 0 ] ) ) ,
2000-07-28 04:09:39 +00:00
& pUniOut , pUniOut + sizeof ( uniString ) / sizeof ( uniString [ 0 ] ) ,
2000-05-22 21:24:26 +00:00
NULL , 1 , & errorCode ) ;
2001-04-18 19:31:05 +00:00
2000-08-11 03:35:25 +00:00
if ( lmbcsString [ 0 ] ! = 0x9F )
2000-05-22 21:24:26 +00:00
{
2000-06-28 17:01:52 +00:00
log_err ( " LMBCS-1,locale=US gives unexpected translation \n " ) ;
}
/* single byte char from mbcs char set */
2000-08-11 03:35:25 +00:00
lmbcsString [ 0 ] = 0xAE ; /* 1/2 width katakana letter small Yo */
2000-07-28 04:09:39 +00:00
pLMBCSOut = lmbcsString ;
pUniIn = uniString ;
2001-04-18 19:31:05 +00:00
ucnv_toUnicode ( cnv16jp ,
2000-07-28 04:09:39 +00:00
& pUniIn , pUniIn + 1 ,
2001-04-18 19:31:05 +00:00
( const char * * ) & pLMBCSOut , ( const char * ) ( pLMBCSOut + 1 ) ,
2000-06-28 17:01:52 +00:00
NULL , 1 , & errorCode ) ;
2000-07-28 04:09:39 +00:00
if ( U_FAILURE ( errorCode ) | | pLMBCSOut ! = lmbcsString + 1 | | pUniIn ! = uniString + 1 | | uniString [ 0 ] ! = 0xFF6E )
2000-06-28 17:01:52 +00:00
{
log_err ( " Unexpected results from LMBCS-16 single byte char \n " ) ;
}
/* convert to group 1: should be 3 bytes */
2000-07-28 04:09:39 +00:00
pLMBCSIn = lmbcsString ;
pUniOut = uniString ;
2001-04-18 19:31:05 +00:00
ucnv_fromUnicode ( cnv01us ,
( char * * ) & pLMBCSIn , ( const char * ) ( pLMBCSIn + 3 ) ,
2000-07-28 04:09:39 +00:00
& pUniOut , pUniOut + 1 ,
2000-06-28 17:01:52 +00:00
NULL , 1 , & errorCode ) ;
2001-04-18 19:31:05 +00:00
if ( U_FAILURE ( errorCode ) | | pLMBCSIn ! = lmbcsString + 3 | | pUniOut ! = uniString + 1
2000-08-11 03:35:25 +00:00
| | lmbcsString [ 0 ] ! = 0x10 | | lmbcsString [ 1 ] ! = 0x10 | | lmbcsString [ 2 ] ! = 0xAE )
2000-06-28 17:01:52 +00:00
{
log_err ( " Unexpected results to LMBCS-1 single byte mbcs char \n " ) ;
}
2000-07-28 04:09:39 +00:00
pLMBCSOut = lmbcsString ;
pUniIn = uniString ;
2001-04-18 19:31:05 +00:00
ucnv_toUnicode ( cnv01us ,
2000-07-28 04:09:39 +00:00
& pUniIn , pUniIn + 1 ,
2001-04-18 19:31:05 +00:00
( const char * * ) & pLMBCSOut , ( const char * ) ( pLMBCSOut + 3 ) ,
2000-06-28 17:01:52 +00:00
NULL , 1 , & errorCode ) ;
2000-07-28 04:09:39 +00:00
if ( U_FAILURE ( errorCode ) | | pLMBCSOut ! = lmbcsString + 3 | | pUniIn ! = uniString + 1 | | uniString [ 0 ] ! = 0xFF6E )
2000-06-28 17:01:52 +00:00
{
log_err ( " Unexpected results from LMBCS-1 single byte mbcs char \n " ) ;
}
2000-07-28 04:09:39 +00:00
pLMBCSIn = lmbcsString ;
pUniOut = uniString ;
2001-04-18 19:31:05 +00:00
ucnv_fromUnicode ( cnv16jp ,
( char * * ) & pLMBCSIn , ( const char * ) ( pLMBCSIn + 1 ) ,
2000-07-28 04:09:39 +00:00
& pUniOut , pUniOut + 1 ,
2000-06-28 17:01:52 +00:00
NULL , 1 , & errorCode ) ;
2000-08-11 03:35:25 +00:00
if ( U_FAILURE ( errorCode ) | | pLMBCSIn ! = lmbcsString + 1 | | pUniOut ! = uniString + 1 | | lmbcsString [ 0 ] ! = 0xAE )
2000-06-28 17:01:52 +00:00
{
log_err ( " Unexpected results to LMBCS-16 single byte mbcs char \n " ) ;
2000-05-22 21:24:26 +00:00
}
2000-06-28 17:01:52 +00:00
ucnv_close ( cnv16he ) ;
ucnv_close ( cnv16jp ) ;
ucnv_close ( cnv01us ) ;
}
{
/* Small source buffer testing, LMBCS -> Unicode */
UErrorCode errorCode = U_ZERO_ERROR ;
2001-04-18 19:31:05 +00:00
2000-08-11 03:35:25 +00:00
const uint8_t * pSource = pszLMBCS ;
const uint8_t * sourceLimit = pszLMBCS + sizeof ( pszLMBCS ) ;
2000-06-28 17:01:52 +00:00
int codepointCount = 0 ;
2001-08-25 01:06:41 +00:00
UChar Out [ sizeof ( pszUnicode ) + 1 ] ;
2000-06-28 17:01:52 +00:00
UChar * pOut = Out ;
2001-08-25 01:06:41 +00:00
UChar * OutLimit = Out + sizeof ( pszUnicode ) / sizeof ( UChar ) ;
2000-06-28 17:01:52 +00:00
cnv = ucnv_open ( NAME_LMBCS_1 , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
log_err ( " Unable to open a LMBCS-1 converter: %s \n " , u_errorName ( errorCode ) ) ;
2000-10-27 00:10:17 +00:00
return ;
2000-06-28 17:01:52 +00:00
}
while ( ( pSource < sourceLimit ) & & U_SUCCESS ( errorCode ) )
{
2001-04-18 19:31:05 +00:00
2000-06-28 17:01:52 +00:00
ucnv_toUnicode ( cnv ,
2000-08-11 03:35:25 +00:00
& pOut ,
OutLimit ,
( const char * * ) & pSource ,
( const char * ) ( pSource + 1 ) , /* claim that this is a 1- byte buffer */
NULL ,
FALSE , /* FALSE means there might be more chars in the next buffer */
& errorCode ) ;
2000-06-28 17:01:52 +00:00
if ( U_SUCCESS ( errorCode ) )
2000-08-11 03:35:25 +00:00
{
if ( ( pSource - ( const uint8_t * ) pszLMBCS ) = = offsets [ codepointCount + 1 ] )
2000-06-28 17:01:52 +00:00
{
/* we are on to the next code point: check value */
2001-04-18 19:31:05 +00:00
2000-06-28 17:01:52 +00:00
if ( Out [ 0 ] ! = pszUnicode [ codepointCount ] ) {
log_err ( " LMBCS->Uni result %lx should have been %lx \n " ,
Out [ 0 ] , pszUnicode [ codepointCount ] ) ;
}
2001-04-18 19:31:05 +00:00
2000-06-28 17:01:52 +00:00
pOut = Out ; /* reset for accumulating next code point */
codepointCount + + ;
}
}
else
{
log_err ( " Unexpected Error on toUnicode: %s \n " , u_errorName ( errorCode ) ) ;
}
}
{
/* limits & surrogate error testing */
2000-08-11 03:35:25 +00:00
uint8_t LIn [ sizeof ( pszLMBCS ) ] ;
const uint8_t * pLIn = LIn ;
2001-04-18 19:31:05 +00:00
2000-06-28 17:01:52 +00:00
char LOut [ sizeof ( pszLMBCS ) ] ;
char * pLOut = LOut ;
2001-04-18 19:31:05 +00:00
2000-06-28 17:01:52 +00:00
UChar UOut [ sizeof ( pszUnicode ) ] ;
UChar * pUOut = UOut ;
2001-04-18 19:31:05 +00:00
2000-06-28 17:01:52 +00:00
UChar UIn [ sizeof ( pszUnicode ) ] ;
const UChar * pUIn = UIn ;
2001-04-18 19:31:05 +00:00
2000-08-01 03:25:35 +00:00
int32_t off [ sizeof ( offsets ) ] ;
2000-06-28 17:01:52 +00:00
UChar32 uniChar ;
2001-02-01 01:55:53 +00:00
errorCode = U_ZERO_ERROR ;
/* negative source request should always return U_ILLEGAL_ARGUMENT_ERROR */
2000-06-28 17:01:52 +00:00
ucnv_fromUnicode ( cnv , & pLOut , pLOut + 1 , & pUIn , pUIn - 1 , off , FALSE , & errorCode ) ;
if ( errorCode ! = U_ILLEGAL_ARGUMENT_ERROR )
{
log_err ( " Unexpected Error on negative source request to ucnv_fromUnicode: %s \n " , u_errorName ( errorCode ) ) ;
}
errorCode = U_ZERO_ERROR ;
2000-08-11 03:35:25 +00:00
ucnv_toUnicode ( cnv , & pUOut , pUOut + 1 , ( const char * * ) & pLIn , ( const char * ) ( pLIn - 1 ) , off , FALSE , & errorCode ) ;
2000-06-28 17:01:52 +00:00
if ( errorCode ! = U_ILLEGAL_ARGUMENT_ERROR )
{
log_err ( " Unexpected Error on negative source request to ucnv_toUnicode: %s \n " , u_errorName ( errorCode ) ) ;
}
errorCode = U_ZERO_ERROR ;
2001-04-18 19:31:05 +00:00
2000-08-11 03:35:25 +00:00
uniChar = ucnv_getNextUChar ( cnv , ( const char * * ) & pLIn , ( const char * ) ( pLIn - 1 ) , & errorCode ) ;
2000-06-28 17:01:52 +00:00
if ( errorCode ! = U_ILLEGAL_ARGUMENT_ERROR )
{
log_err ( " Unexpected Error on negative source request to ucnv_getNextUChar: %s \n " , u_errorName ( errorCode ) ) ;
}
errorCode = U_ZERO_ERROR ;
/* 0 byte source request - no error, no pointer movement */
2000-08-11 03:35:25 +00:00
ucnv_toUnicode ( cnv , & pUOut , pUOut + 1 , ( const char * * ) & pLIn , ( const char * ) pLIn , off , FALSE , & errorCode ) ;
2000-06-28 17:01:52 +00:00
ucnv_fromUnicode ( cnv , & pLOut , pLOut + 1 , & pUIn , pUIn , off , FALSE , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
log_err ( " 0 byte source request: unexpected error: %s \n " , u_errorName ( errorCode ) ) ;
}
if ( ( pUOut ! = UOut ) | | ( pUIn ! = UIn ) | | ( pLOut ! = LOut ) | | ( pLIn ! = LIn ) )
{
log_err ( " Unexpected pointer move in 0 byte source request \n " ) ;
}
2000-07-13 23:55:33 +00:00
/*0 byte source request - GetNextUChar : error & value == fffe or ffff */
2000-08-11 03:35:25 +00:00
uniChar = ucnv_getNextUChar ( cnv , ( const char * * ) & pLIn , ( const char * ) pLIn , & errorCode ) ;
2000-06-28 17:01:52 +00:00
if ( errorCode ! = U_ILLEGAL_ARGUMENT_ERROR )
{
log_err ( " Unexpected Error on 0-byte source request to ucnv_getnextUChar: %s \n " , u_errorName ( errorCode ) ) ;
}
2000-07-13 23:55:33 +00:00
if ( ( ( uint32_t ) uniChar - 0xfffe ) > 1 ) /* not 0xfffe<=uniChar<=0xffff */
2000-06-28 17:01:52 +00:00
{
log_err ( " Unexpected value on 0-byte source request to ucnv_getnextUChar \n " ) ;
}
2000-08-14 23:35:57 +00:00
errorCode = U_ZERO_ERROR ;
2000-06-28 17:01:52 +00:00
2000-08-11 19:51:13 +00:00
/* running out of target room : U_BUFFER_OVERFLOW_ERROR */
2000-06-28 17:01:52 +00:00
pUIn = pszUnicode ;
2001-08-25 01:06:41 +00:00
ucnv_fromUnicode ( cnv , & pLOut , pLOut + offsets [ 4 ] , & pUIn , pUIn + sizeof ( pszUnicode ) / sizeof ( UChar ) , off , FALSE , & errorCode ) ;
2000-08-11 19:51:13 +00:00
if ( errorCode ! = U_BUFFER_OVERFLOW_ERROR | | pLOut ! = LOut + offsets [ 4 ] | | pUIn ! = pszUnicode + 4 )
2000-06-28 17:01:52 +00:00
{
log_err ( " Unexpected results on out of target room to ucnv_fromUnicode \n " ) ;
}
2000-08-14 23:35:57 +00:00
errorCode = U_ZERO_ERROR ;
2000-06-28 17:01:52 +00:00
pLIn = pszLMBCS ;
2000-08-11 03:35:25 +00:00
ucnv_toUnicode ( cnv , & pUOut , pUOut + 4 , ( const char * * ) & pLIn , ( const char * ) ( pLIn + sizeof ( pszLMBCS ) ) , off , FALSE , & errorCode ) ;
2000-08-11 19:51:13 +00:00
if ( errorCode ! = U_BUFFER_OVERFLOW_ERROR | | pUOut ! = UOut + 4 | | pLIn ! = ( const uint8_t * ) pszLMBCS + offsets [ 4 ] )
2000-06-28 17:01:52 +00:00
{
log_err ( " Unexpected results on out of target room to ucnv_toUnicode \n " ) ;
}
2001-04-18 19:31:05 +00:00
2000-06-28 17:01:52 +00:00
/* unpaired or chopped LMBCS surrogates */
/* OK high surrogate, Low surrogate is chopped */
2001-04-18 19:31:05 +00:00
LIn [ 0 ] = 0x14 ;
LIn [ 1 ] = 0xD8 ;
LIn [ 2 ] = 0x01 ;
LIn [ 3 ] = 0x14 ;
LIn [ 4 ] = 0xDC ;
2000-06-28 17:01:52 +00:00
pLIn = LIn ;
2000-08-14 23:35:57 +00:00
errorCode = U_ZERO_ERROR ;
2000-06-28 17:01:52 +00:00
pUOut = UOut ;
2001-04-18 19:31:05 +00:00
2001-08-31 02:21:34 +00:00
ucnv_toUnicode ( cnv , & pUOut , pUOut + sizeof ( UOut ) / sizeof ( UChar ) , ( const char * * ) & pLIn , ( const char * ) ( pLIn + 5 ) , off , TRUE , & errorCode ) ;
2001-02-26 19:29:14 +00:00
if ( UOut [ 0 ] ! = 0xD801 | | errorCode ! = U_TRUNCATED_CHAR_FOUND | | pUOut ! = UOut + 1 | | pLIn ! = LIn + 5 )
2000-06-28 17:01:52 +00:00
{
log_err ( " Unexpected results on chopped low surrogate \n " ) ;
}
2001-04-18 19:31:05 +00:00
2000-06-28 17:01:52 +00:00
/* chopped at surrogate boundary */
2001-04-18 19:31:05 +00:00
LIn [ 0 ] = 0x14 ;
LIn [ 1 ] = 0xD8 ;
LIn [ 2 ] = 0x01 ;
2000-06-28 17:01:52 +00:00
pLIn = LIn ;
2000-08-14 23:35:57 +00:00
errorCode = U_ZERO_ERROR ;
2000-06-28 17:01:52 +00:00
pUOut = UOut ;
2001-08-31 02:21:34 +00:00
ucnv_toUnicode ( cnv , & pUOut , pUOut + sizeof ( UOut ) / sizeof ( UChar ) , ( const char * * ) & pLIn , ( const char * ) ( pLIn + 3 ) , off , TRUE , & errorCode ) ;
2000-06-28 17:01:52 +00:00
if ( UOut [ 0 ] ! = 0xD801 | | U_FAILURE ( errorCode ) | | pUOut ! = UOut + 1 | | pLIn ! = LIn + 3 )
{
log_err ( " Unexpected results on chopped at surrogate boundary \n " ) ;
}
/* unpaired surrogate plus valid Unichar */
2001-04-18 19:31:05 +00:00
LIn [ 0 ] = 0x14 ;
LIn [ 1 ] = 0xD8 ;
LIn [ 2 ] = 0x01 ;
LIn [ 3 ] = 0x14 ;
LIn [ 4 ] = 0xC9 ;
2000-08-11 03:35:25 +00:00
LIn [ 5 ] = 0xD0 ;
2000-06-28 17:01:52 +00:00
pLIn = LIn ;
2000-08-14 23:35:57 +00:00
errorCode = U_ZERO_ERROR ;
2000-06-28 17:01:52 +00:00
pUOut = UOut ;
2001-08-31 02:21:34 +00:00
ucnv_toUnicode ( cnv , & pUOut , pUOut + sizeof ( UOut ) / sizeof ( UChar ) , ( const char * * ) & pLIn , ( const char * ) ( pLIn + 6 ) , off , TRUE , & errorCode ) ;
2000-06-28 17:01:52 +00:00
if ( UOut [ 0 ] ! = 0xD801 | | UOut [ 1 ] ! = 0xC9D0 | | U_FAILURE ( errorCode ) | | pUOut ! = UOut + 2 | | pLIn ! = LIn + 6 )
{
log_err ( " Unexpected results after unpaired surrogate plus valid Unichar \n " ) ;
}
/* unpaired surrogate plus chopped Unichar */
2001-04-18 19:31:05 +00:00
LIn [ 0 ] = 0x14 ;
LIn [ 1 ] = 0xD8 ;
LIn [ 2 ] = 0x01 ;
LIn [ 3 ] = 0x14 ;
LIn [ 4 ] = 0xC9 ;
2000-06-28 17:01:52 +00:00
pLIn = LIn ;
2000-08-14 23:35:57 +00:00
errorCode = U_ZERO_ERROR ;
2000-06-28 17:01:52 +00:00
pUOut = UOut ;
2001-08-31 02:21:34 +00:00
ucnv_toUnicode ( cnv , & pUOut , pUOut + sizeof ( UOut ) / sizeof ( UChar ) , ( const char * * ) & pLIn , ( const char * ) ( pLIn + 5 ) , off , TRUE , & errorCode ) ;
2001-02-26 19:29:14 +00:00
if ( UOut [ 0 ] ! = 0xD801 | | errorCode ! = U_TRUNCATED_CHAR_FOUND | | pUOut ! = UOut + 1 | | pLIn ! = LIn + 5 )
2000-06-28 17:01:52 +00:00
{
log_err ( " Unexpected results after unpaired surrogate plus chopped Unichar \n " ) ;
}
/* unpaired surrogate plus valid non-Unichar */
2001-04-18 19:31:05 +00:00
LIn [ 0 ] = 0x14 ;
LIn [ 1 ] = 0xD8 ;
LIn [ 2 ] = 0x01 ;
LIn [ 3 ] = 0x0F ;
LIn [ 4 ] = 0x3B ;
2000-06-28 17:01:52 +00:00
pLIn = LIn ;
2000-08-14 23:35:57 +00:00
errorCode = U_ZERO_ERROR ;
2000-06-28 17:01:52 +00:00
pUOut = UOut ;
2001-08-31 02:21:34 +00:00
ucnv_toUnicode ( cnv , & pUOut , pUOut + sizeof ( UOut ) / sizeof ( UChar ) , ( const char * * ) & pLIn , ( const char * ) ( pLIn + 5 ) , off , TRUE , & errorCode ) ;
2000-06-28 17:01:52 +00:00
if ( UOut [ 0 ] ! = 0xD801 | | UOut [ 1 ] ! = 0x1B | | U_FAILURE ( errorCode ) | | pUOut ! = UOut + 2 | | pLIn ! = LIn + 5 )
{
log_err ( " Unexpected results after unpaired surrogate plus valid non-Unichar \n " ) ;
}
/* unpaired surrogate plus chopped non-Unichar */
2001-04-18 19:31:05 +00:00
LIn [ 0 ] = 0x14 ;
LIn [ 1 ] = 0xD8 ;
LIn [ 2 ] = 0x01 ;
LIn [ 3 ] = 0x0F ;
2000-06-28 17:01:52 +00:00
pLIn = LIn ;
2000-08-14 23:35:57 +00:00
errorCode = U_ZERO_ERROR ;
2000-06-28 17:01:52 +00:00
pUOut = UOut ;
2001-08-31 02:21:34 +00:00
ucnv_toUnicode ( cnv , & pUOut , pUOut + sizeof ( UOut ) / sizeof ( UChar ) , ( const char * * ) & pLIn , ( const char * ) ( pLIn + 4 ) , off , TRUE , & errorCode ) ;
2000-06-28 17:01:52 +00:00
2001-02-26 19:29:14 +00:00
if ( UOut [ 0 ] ! = 0xD801 | | errorCode ! = U_TRUNCATED_CHAR_FOUND | | pUOut ! = UOut + 1 | | pLIn ! = LIn + 4 )
2000-06-28 17:01:52 +00:00
{
log_err ( " Unexpected results after unpaired surrogate plus chopped non-Unichar \n " ) ;
}
}
2000-05-22 21:24:26 +00:00
}
2000-06-28 17:01:52 +00:00
ucnv_close ( cnv ) ; /* final cleanup */
2000-03-31 16:53:09 +00:00
}
2000-11-21 04:05:39 +00:00
static void TestJitterbug255 ( )
2000-02-05 00:01:54 +00:00
{
2000-08-11 16:33:09 +00:00
const uint8_t testBytes [ ] = { 0x95 , 0xcf , 0x8a , 0xb7 , 0x0d , 0x0a , 0x00 } ;
2000-08-11 03:35:25 +00:00
const uint8_t * testBuffer = testBytes ;
2000-08-11 16:33:09 +00:00
const uint8_t * testEnd = testBytes + sizeof ( testBytes ) ;
2000-02-05 00:01:54 +00:00
UErrorCode status = U_ZERO_ERROR ;
2000-04-13 17:27:35 +00:00
UChar32 result ;
2000-02-05 00:01:54 +00:00
UConverter * cnv = 0 ;
2001-04-18 19:31:05 +00:00
cnv = ucnv_open ( " shift-jis " , & status ) ;
2000-02-05 00:01:54 +00:00
if ( U_FAILURE ( status ) | | cnv = = 0 ) {
log_err ( " Failed to open the converter for SJIS. \n " ) ;
return ;
}
while ( testBuffer ! = testEnd )
{
2000-08-11 03:35:25 +00:00
result = ucnv_getNextUChar ( cnv , ( const char * * ) & testBuffer , ( const char * ) testEnd , & status ) ;
2000-02-05 00:01:54 +00:00
if ( U_FAILURE ( status ) )
{
log_err ( " Failed to convert the next UChar for SJIS. \n " ) ;
break ;
}
}
ucnv_close ( cnv ) ;
}
2000-04-18 21:57:47 +00:00
2001-03-16 23:03:31 +00:00
static void TestJitterbug792 ( )
{
# define U_NUM_792_CONVERTERS 3
# define U_MAX_792_TEST_SIZE 21
2001-04-18 19:31:05 +00:00
/* FOR ICU 1.8 we have patched the UCM files.
This test is to make sure there are no accidental regressions to the old mappings
2001-03-16 23:03:31 +00:00
Some day the patch may be unnecessary , after the IBM repository catches up .
*/
const char * ConverterNames [ U_NUM_792_CONVERTERS ] =
{
2001-04-18 19:31:05 +00:00
" ibm-5351 " ,
" ibm-5352 " ,
2001-03-16 23:03:31 +00:00
" ibm-5353 "
} ;
2001-04-18 19:31:05 +00:00
const uint16_t inChars [ U_NUM_792_CONVERTERS ] [ U_MAX_792_TEST_SIZE ] =
2001-03-16 23:03:31 +00:00
{
{ 0x00A1 , 0x00D7 , 0x00B8 , 0x00F7 , 0x00BF , 0x05F3 , 0x05F4 , 0x000 } ,
{ 0x0679 , 0xFB66 , 0xFB68 , 0x0688 , 0xFB88 , 0x06A9 , 0xFB8E , 0xFB90 , 0x0691 , 0xFB8C , 0x06BA , 0xFB9E , 0x06BE , 0xFBAA , 0xFBAC , 0x06C1 , 0xFBA6 , 0xFBA8 , 0x06D2 , 0xFBAE , 0x000 } ,
{ 0x00A8 , 0x02C7 , 0x00B8 , 0x00AF , 0x02DB , 0x00B4 , 0x02D9 , 0x000 }
} ;
const uint16_t * pInChars ;
2001-04-18 19:31:05 +00:00
2001-03-16 23:03:31 +00:00
const uint8_t outBytes [ U_NUM_792_CONVERTERS ] [ U_MAX_792_TEST_SIZE ] =
{
{ 0xA1 , 0xAA , 0xB8 , 0xBA , 0xBF , 0xD7 , 0xD8 , 0x00 } ,
{ 0x8A , 0x8A , 0x8A , 0x8F , 0x8F , 0x98 , 0x98 , 0x98 , 0x9A , 0x9A , 0x9F , 0x9F , 0xAA , 0xAA , 0xAA , 0xC0 , 0xC0 , 0xC0 , 0xFF , 0xFF , 0x00 } ,
{ 0x8D , 0x8E , 0x8F , 0x9D , 0x9E , 0xB4 , 0xFF , 0x00 }
} ;
2001-03-22 03:36:53 +00:00
char outBuffer [ U_MAX_792_TEST_SIZE ] ;
2001-03-16 23:03:31 +00:00
UErrorCode status = U_ZERO_ERROR ;
2001-03-22 03:36:53 +00:00
char * pOutBuffer ;
2001-03-16 23:03:31 +00:00
UConverter * cnv = 0 ;
int i ;
for ( i = 0 ; i < U_NUM_792_CONVERTERS ; i + + )
{
2001-04-18 19:31:05 +00:00
cnv = ucnv_open ( ConverterNames [ i ] , & status ) ;
2001-03-16 23:03:31 +00:00
if ( U_FAILURE ( status ) | | cnv = = 0 ) {
log_err ( " Failed to open the converter for %s \n " , ConverterNames [ i ] ) ;
return ;
}
ucnv_setFallback ( cnv , TRUE ) ;
pOutBuffer = outBuffer ;
pInChars = inChars [ i ] ;
2001-04-18 19:31:05 +00:00
ucnv_fromUnicode ( cnv ,
& pOutBuffer , outBuffer + sizeof ( outBuffer ) ,
( const UChar * * ) & pInChars , pInChars + u_strlen ( pInChars ) + 1 ,
2001-03-16 23:03:31 +00:00
NULL , TRUE , & status ) ;
2001-04-18 19:31:05 +00:00
2001-03-16 23:03:31 +00:00
if ( U_FAILURE ( status ) ) {
log_err ( " Failed to convert correctly for %s \n " , ConverterNames [ i ] ) ;
}
2001-03-22 03:36:53 +00:00
if ( strcmp ( outBuffer , ( const char * ) outBytes [ i ] ) ) {
2001-03-16 23:03:31 +00:00
log_err ( " Failed to correctly convert buffer for %s \n " , ConverterNames [ i ] ) ;
}
ucnv_close ( cnv ) ;
}
}
2000-11-21 04:05:39 +00:00
static void TestEBCDICUS4XML ( )
2000-04-18 21:57:47 +00:00
{
UChar unicodes_x [ ] = { 0x0000 , 0x0000 , 0x0000 , 0x0000 } ;
2001-09-01 04:23:43 +00:00
static const UChar toUnicodeMaps_x [ ] = { 0x000A , 0x000A , 0x000D , 0x0000 } ;
static const char fromUnicodeMaps_x [ ] = { 0x25 , 0x25 , 0x0D , 0x00 } ;
static const char newLines_x [ ] = { 0x25 , 0x15 , 0x0D , 0x00 } ;
2000-04-18 21:57:47 +00:00
char target_x [ ] = { 0x00 , 0x00 , 0x00 , 0x00 } ;
UChar * unicodes = unicodes_x ;
const UChar * toUnicodeMaps = toUnicodeMaps_x ;
char * target = target_x ;
const char * fromUnicodeMaps = fromUnicodeMaps_x , * newLines = newLines_x ;
UErrorCode status = U_ZERO_ERROR ;
UConverter * cnv = 0 ;
cnv = ucnv_open ( " ebcdic-xml-us " , & status ) ;
if ( U_FAILURE ( status ) | | cnv = = 0 ) {
log_err ( " Failed to open the converter for EBCDIC-XML-US. \n " ) ;
2001-09-01 04:23:43 +00:00
return ;
2000-04-18 21:57:47 +00:00
}
ucnv_toUnicode ( cnv , & unicodes , unicodes + 3 , ( const char * * ) & newLines , newLines + 3 , NULL , TRUE , & status ) ;
if ( U_FAILURE ( status ) | | memcmp ( unicodes_x , toUnicodeMaps , sizeof ( UChar ) * 3 ) ! = 0 ) {
2001-09-01 04:23:43 +00:00
log_err ( " To Unicode conversion failed in EBCDICUS4XML test. %s \n " ,
u_errorName ( status ) ) ;
printUSeqErr ( unicodes_x , 3 ) ;
printUSeqErr ( toUnicodeMaps , 3 ) ;
2000-04-18 21:57:47 +00:00
}
2001-09-01 04:23:43 +00:00
status = U_ZERO_ERROR ;
2000-04-18 21:57:47 +00:00
ucnv_fromUnicode ( cnv , & target , target + 3 , ( const UChar * * ) & toUnicodeMaps , toUnicodeMaps + 3 , NULL , TRUE , & status ) ;
if ( U_FAILURE ( status ) | | memcmp ( target_x , fromUnicodeMaps , sizeof ( char ) * 3 ) ! = 0 ) {
2001-09-01 04:23:43 +00:00
log_err ( " From Unicode conversion failed in EBCDICUS4XML test. %s \n " ,
u_errorName ( status ) ) ;
2001-09-18 21:02:14 +00:00
printSeqErr ( ( const unsigned char * ) target_x , 3 ) ;
printSeqErr ( ( const unsigned char * ) fromUnicodeMaps , 3 ) ;
2000-04-18 21:57:47 +00:00
}
ucnv_close ( cnv ) ;
}
2001-11-05 23:17:51 +00:00
static void TestJitterbug981 ( ) {
const UChar * rules ;
int32_t rules_length , target_cap , bytes_needed ;
UErrorCode status = U_ZERO_ERROR ;
UConverter * utf8cnv ;
UCollator * myCollator ;
char buff [ 50000 ] ;
int numNeeded = 0 ;
utf8cnv = ucnv_open ( " utf8 " , & status ) ;
if ( U_FAILURE ( status ) ) {
log_err ( " Could not open UTF-8 converter. Error: %s " , u_errorName ( status ) ) ;
}
myCollator = ucol_open ( " zh " , & status ) ;
if ( U_FAILURE ( status ) ) {
log_err ( " Could not open collator for zh locale. Error: %s " , u_errorName ( status ) ) ;
}
rules = ucol_getRules ( myCollator , & rules_length ) ;
target_cap = 0 ;
do {
ucnv_reset ( utf8cnv ) ;
status = U_ZERO_ERROR ;
bytes_needed = ucnv_fromUChars ( utf8cnv , buff , target_cap ,
rules , rules_length , & status ) ;
target_cap = ( bytes_needed > target_cap ) ? bytes_needed : target_cap + 1 ;
if ( numNeeded ! = 0 & & numNeeded ! = bytes_needed ) {
log_err ( " ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes " ) ;
}
numNeeded = bytes_needed ;
} while ( status = = U_BUFFER_OVERFLOW_ERROR ) ;
ucol_close ( myCollator ) ;
2001-11-06 00:45:10 +00:00
ucnv_close ( utf8cnv ) ;
2001-11-05 23:17:51 +00:00
}
2000-06-22 01:18:30 +00:00
2001-11-06 00:45:10 +00:00
static void TestJitterbug1293 ( ) {
UChar src [ ] = { 0x30DE , 0x30A4 , 0x5E83 , 0x544A , 0x30BF , 0x30A4 , 0x30D7 , 0x000 } ;
char target [ 256 ] ;
UErrorCode status = U_ZERO_ERROR ;
UConverter * conv = NULL ;
int32_t target_cap , bytes_needed , numNeeded = 0 ;
conv = ucnv_open ( " shift-jis " , & status ) ;
if ( U_FAILURE ( status ) ) {
log_err ( " Could not open Shift-Jis converter. Error: %s " , u_errorName ( status ) ) ;
return ;
}
do {
target_cap = 0 ;
bytes_needed = ucnv_fromUChars ( conv , target , 256 , src , u_strlen ( src ) , & status ) ;
target_cap = ( bytes_needed > target_cap ) ? bytes_needed : target_cap + 1 ;
if ( numNeeded ! = 0 & & numNeeded ! = bytes_needed ) {
log_err ( " ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes " ) ;
}
numNeeded = bytes_needed ;
} while ( status = = U_BUFFER_OVERFLOW_ERROR ) ;
if ( U_FAILURE ( status ) ) {
log_err ( " An error occured in ucnv_fromUChars. Error: %s " , u_errorName ( status ) ) ;
return ;
}
ucnv_close ( conv ) ;
}
2000-06-29 02:53:29 +00:00
# endif