2000-01-15 02:00:06 +00:00
/********************************************************************
2001-04-18 19:31:05 +00:00
* COPYRIGHT :
2011-01-19 03:30:52 +00:00
* Copyright ( c ) 1997 - 2011 , International Business Machines Corporation and
2000-01-15 02:00:06 +00:00
* others . All Rights Reserved .
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2006-05-26 04:44:31 +00:00
/*******************************************************************************
1999-08-16 21:50:52 +00:00
*
* File CCONVTST . C
*
* Modification History :
2001-04-18 19:31:05 +00:00
* Name Description
1999-08-16 21:50:52 +00:00
* Steven R . Loomis 7 / 8 / 1999 Adding input buffer test
2006-05-26 04:44:31 +00:00
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
1999-08-16 21:50:52 +00:00
*/
# include <stdio.h>
2000-07-21 23:28:07 +00:00
# include "cstring.h"
1999-12-28 23:57:50 +00:00
# include "unicode/uloc.h"
# include "unicode/ucnv.h"
# include "unicode/ucnv_err.h"
2008-08-07 21:22:54 +00:00
# include "unicode/ucnv_cb.h"
1999-08-16 21:50:52 +00:00
# include "cintltst.h"
1999-12-28 23:57:50 +00:00
# include "unicode/utypes.h"
# include "unicode/ustring.h"
2001-11-05 23:17:51 +00:00
# include "unicode/ucol.h"
2002-02-07 21:36:52 +00:00
# include "cmemory.h"
2009-08-26 01:02:40 +00:00
# include "nucnvtst.h"
1999-08-16 21:50:52 +00:00
2003-08-01 14:30:29 +00:00
static void TestNextUChar ( UConverter * cnv , const char * source , const char * limit , const int32_t results [ ] , const char * message ) ;
2000-06-22 23:46:02 +00:00
static void TestNextUCharError ( UConverter * cnv , const char * source , const char * limit , UErrorCode expected , const char * message ) ;
2002-09-20 17:54:45 +00:00
# if !UCONFIG_NO_COLLATION
2001-11-13 04:28:44 +00:00
static void TestJitterbug981 ( void ) ;
2002-09-20 17:54:45 +00:00
# endif
2001-11-13 04:28:44 +00:00
static void TestJitterbug1293 ( void ) ;
2000-11-21 04:05:39 +00:00
static void TestNewConvertWithBufferSizes ( int32_t osize , int32_t isize ) ;
static void TestConverterTypesAndStarters ( void ) ;
static void TestAmbiguous ( void ) ;
2002-02-08 04:53:41 +00:00
static void TestSignatureDetection ( void ) ;
2001-01-09 22:57:47 +00:00
static void TestUTF7 ( void ) ;
2002-11-07 21:02:24 +00:00
static void TestIMAP ( void ) ;
2000-11-21 04:05:39 +00:00
static void TestUTF8 ( void ) ;
2002-07-02 22:52:30 +00:00
static void TestCESU8 ( void ) ;
2002-06-11 17:59:45 +00:00
static void TestUTF16 ( void ) ;
2000-11-21 04:05:39 +00:00
static void TestUTF16BE ( void ) ;
static void TestUTF16LE ( void ) ;
2002-06-11 17:59:45 +00:00
static void TestUTF32 ( void ) ;
2000-12-20 00:02:15 +00:00
static void TestUTF32BE ( void ) ;
static void TestUTF32LE ( void ) ;
2000-11-21 04:05:39 +00:00
static void TestLATIN1 ( void ) ;
2006-07-28 22:58:29 +00:00
# if !UCONFIG_NO_LEGACY_CONVERSION
2000-11-21 04:05:39 +00:00
static void TestSBCS ( void ) ;
static void TestDBCS ( void ) ;
static void TestMBCS ( void ) ;
2009-08-26 01:02:40 +00:00
# if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
static void TestICCRunout ( void ) ;
# endif
2006-07-28 22:58:29 +00:00
2003-12-03 22:53:14 +00:00
# ifdef U_ENABLE_GENERIC_ISO_2022
2000-11-21 04:05:39 +00:00
static void TestISO_2022 ( void ) ;
2003-12-03 22:53:14 +00:00
# endif
2006-07-28 22:58:29 +00:00
2000-11-21 04:05:39 +00:00
static void TestISO_2022_JP ( void ) ;
static void TestISO_2022_JP_1 ( void ) ;
static void TestISO_2022_JP_2 ( void ) ;
static void TestISO_2022_KR ( void ) ;
2001-02-24 02:52:14 +00:00
static void TestISO_2022_KR_1 ( void ) ;
2000-11-21 04:05:39 +00:00
static void TestISO_2022_CN ( void ) ;
2010-05-25 22:17:12 +00:00
#if 0
/*
* ICU 4.4 ( ticket # 7314 ) removes mappings for CNS 11643 planes 3. .7
*/
2000-11-21 04:05:39 +00:00
static void TestISO_2022_CN_EXT ( void ) ;
2010-05-25 22:17:12 +00:00
# endif
2001-02-23 04:40:39 +00:00
static void TestJIS ( void ) ;
2000-11-21 04:05:39 +00:00
static void TestHZ ( void ) ;
2006-07-28 22:58:29 +00:00
# endif
2001-03-02 23:55:49 +00:00
static void TestSCSU ( void ) ;
2006-07-28 22:58:29 +00:00
# if !UCONFIG_NO_LEGACY_CONVERSION
2000-11-21 04:05:39 +00:00
static void TestEBCDIC_STATEFUL ( void ) ;
static void TestGB18030 ( void ) ;
static void TestLMBCS ( void ) ;
static void TestJitterbug255 ( void ) ;
static void TestEBCDICUS4XML ( void ) ;
2010-05-25 22:17:12 +00:00
#if 0
/*
* ICU 4.4 ( ticket # 7314 ) removes mappings for CNS 11643 planes 3. .7
*/
2001-05-11 02:30:47 +00:00
static void TestJitterbug915 ( void ) ;
2010-05-25 22:17:12 +00:00
# endif
2001-07-14 02:29:21 +00:00
static void TestISCII ( void ) ;
2006-07-28 22:58:29 +00:00
static void TestCoverageMBCS ( void ) ;
static void TestJitterbug2346 ( void ) ;
static void TestJitterbug2411 ( void ) ;
2006-08-15 23:21:39 +00:00
static void TestJB5275 ( void ) ;
static void TestJB5275_1 ( void ) ;
2008-03-12 23:22:07 +00:00
static void TestJitterbug6175 ( void ) ;
2011-02-23 22:21:58 +00:00
static void TestIsFixedWidth ( void ) ;
2006-07-28 22:58:29 +00:00
# endif
2009-08-26 01:02:40 +00:00
static void TestInBufSizes ( void ) ;
2006-07-28 22:58:29 +00:00
static void TestRoundTrippingAllUTF ( void ) ;
2001-07-14 02:29:21 +00:00
static void TestConv ( const uint16_t in [ ] ,
2002-07-29 21:04:18 +00:00
int len ,
const char * conv ,
const char * lang ,
2001-07-14 02:29:21 +00:00
char byteArr [ ] ,
int byteArrLen ) ;
2001-05-31 23:30:09 +00:00
2002-07-17 02:41:04 +00:00
/* open a converter, using test data if it begins with '@' */
static UConverter * my_ucnv_open ( const char * cnv , UErrorCode * err ) ;
2001-05-31 23:30:09 +00:00
1999-08-16 21:50:52 +00:00
# define NEW_MAX_BUFFER 999
2001-01-26 03:05:11 +00:00
static int32_t gInBufferSize = NEW_MAX_BUFFER ;
static int32_t gOutBufferSize = NEW_MAX_BUFFER ;
1999-08-16 21:50:52 +00:00
static char gNuConvTestName [ 1024 ] ;
# define nct_min(x,y) ((x<y) ? x : y)
2002-07-17 02:41:04 +00:00
static UConverter * my_ucnv_open ( const char * cnv , UErrorCode * err )
{
if ( cnv & & cnv [ 0 ] = = ' @ ' ) {
2003-08-14 16:09:58 +00:00
return ucnv_openPackage ( loadTestData ( err ) , cnv + 1 , err ) ;
2002-07-17 02:41:04 +00:00
} else {
return ucnv_open ( cnv , err ) ;
}
}
2000-11-21 04:05:39 +00:00
static void printSeq ( const unsigned char * a , int len )
1999-08-16 21:50:52 +00:00
{
2000-02-05 00:01:54 +00:00
int i = 0 ;
2000-07-13 00:28:06 +00:00
log_verbose ( " { " ) ;
2000-11-21 04:05:39 +00:00
while ( i < len )
log_verbose ( " 0x%02x " , a [ i + + ] ) ;
2000-02-05 00:01:54 +00:00
log_verbose ( " } \n " ) ;
1999-08-16 21:50:52 +00:00
}
2000-11-21 04:05:39 +00:00
static void printUSeq ( const UChar * a , int len )
1999-08-16 21:50:52 +00:00
{
2000-02-05 00:01:54 +00:00
int i = 0 ;
2000-07-13 00:28:06 +00:00
log_verbose ( " {U+ " ) ;
2000-08-15 18:05:12 +00:00
while ( i < len ) log_verbose ( " 0x%04x " , a [ i + + ] ) ;
2000-02-05 00:01:54 +00:00
log_verbose ( " } \n " ) ;
1999-08-16 21:50:52 +00:00
}
2000-11-21 04:05:39 +00:00
static void printSeqErr ( const unsigned char * a , int len )
1999-08-16 21:50:52 +00:00
{
2000-02-05 00:01:54 +00:00
int i = 0 ;
2000-07-13 00:28:06 +00:00
fprintf ( stderr , " { " ) ;
2000-11-21 04:05:39 +00:00
while ( i < len )
fprintf ( stderr , " 0x%02x " , a [ i + + ] ) ;
2000-02-05 00:01:54 +00:00
fprintf ( stderr , " } \n " ) ;
1999-08-16 21:50:52 +00:00
}
2000-11-21 04:05:39 +00:00
2000-12-08 01:13:38 +00:00
static void printUSeqErr ( const UChar * a , int len )
1999-08-16 21:50:52 +00:00
{
2000-02-05 00:01:54 +00:00
int i = 0 ;
2000-07-13 00:28:06 +00:00
fprintf ( stderr , " {U+ " ) ;
2000-11-21 04:05:39 +00:00
while ( i < len )
fprintf ( stderr , " 0x%04x " , a [ i + + ] ) ;
2000-02-05 00:01:54 +00:00
fprintf ( stderr , " } \n " ) ;
1999-08-16 21:50:52 +00:00
}
2000-11-21 04:05:39 +00:00
2001-04-18 19:31:05 +00:00
static void
2003-08-01 14:30:29 +00:00
TestNextUChar ( UConverter * cnv , const char * source , const char * limit , const int32_t results [ ] , const char * message )
2000-06-22 01:18:30 +00:00
{
const char * s0 ;
2000-06-29 01:48:34 +00:00
const char * s = ( char * ) source ;
2003-08-01 14:30:29 +00:00
const int32_t * r = results ;
2000-06-22 01:18:30 +00:00
UErrorCode errorCode = U_ZERO_ERROR ;
2003-08-01 14:30:29 +00:00
UChar32 c ;
1999-08-16 21:50:52 +00:00
2000-06-22 01:18:30 +00:00
while ( s < limit ) {
s0 = s ;
c = ucnv_getNextUChar ( cnv , & s , limit , & errorCode ) ;
2002-11-07 21:02:24 +00:00
if ( errorCode = = U_INDEX_OUTOFBOUNDS_ERROR ) {
break ; /* no more significant input */
} else if ( U_FAILURE ( errorCode ) ) {
2000-06-22 01:18:30 +00:00
log_err ( " %s ucnv_getNextUChar() failed: %s \n " , message , u_errorName ( errorCode ) ) ;
break ;
2003-07-22 04:20:13 +00:00
} else if (
/* test the expected number of input bytes only if >=0 */
2003-08-01 14:30:29 +00:00
( * r > = 0 & & ( int32_t ) ( s - s0 ) ! = * r ) | |
2003-07-22 04:20:13 +00:00
c ! = * ( r + 1 )
) {
2000-06-22 01:18:30 +00:00
log_err ( " %s ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes. \n " ,
message , c , ( s - s0 ) , * ( r + 1 ) , * r ) ;
break ;
}
r + = 2 ;
}
}
2000-11-21 04:05:39 +00:00
2001-04-18 19:31:05 +00:00
static void
2000-06-22 23:46:02 +00:00
TestNextUCharError ( UConverter * cnv , const char * source , const char * limit , UErrorCode expected , const char * message )
{
2000-06-29 01:48:34 +00:00
const char * s = ( char * ) source ;
2000-06-22 23:46:02 +00:00
UErrorCode errorCode = U_ZERO_ERROR ;
uint32_t c ;
c = ucnv_getNextUChar ( cnv , & s , limit , & errorCode ) ;
if ( errorCode ! = expected ) {
log_err ( " FAIL: Expected:%s when %s-----Got:%s \n " , myErrorName ( expected ) , message , myErrorName ( errorCode ) ) ;
}
2000-07-13 00:28:06 +00:00
if ( c ! = 0xFFFD & & c ! = 0xffff ) {
log_err ( " FAIL: Expected return value of 0xfffd or 0xffff when %s-----Got 0x%lx \n " , message , c ) ;
2000-06-22 23:46:02 +00:00
}
2001-04-18 19:31:05 +00:00
}
2000-11-21 04:05:39 +00:00
static void TestInBufSizes ( void )
1999-08-16 21:50:52 +00:00
{
TestNewConvertWithBufferSizes ( NEW_MAX_BUFFER , 1 ) ;
2000-06-29 02:53:29 +00:00
# if 1
1999-08-16 21:50:52 +00:00
TestNewConvertWithBufferSizes ( NEW_MAX_BUFFER , 2 ) ;
TestNewConvertWithBufferSizes ( NEW_MAX_BUFFER , 3 ) ;
TestNewConvertWithBufferSizes ( NEW_MAX_BUFFER , 4 ) ;
TestNewConvertWithBufferSizes ( NEW_MAX_BUFFER , 5 ) ;
TestNewConvertWithBufferSizes ( NEW_MAX_BUFFER , 6 ) ;
TestNewConvertWithBufferSizes ( 1 , 1 ) ;
TestNewConvertWithBufferSizes ( 2 , 3 ) ;
TestNewConvertWithBufferSizes ( 3 , 2 ) ;
2000-06-29 02:53:29 +00:00
# endif
1999-08-16 21:50:52 +00:00
}
2000-11-21 04:05:39 +00:00
static void TestOutBufSizes ( void )
1999-08-16 21:50:52 +00:00
{
2000-06-29 02:53:29 +00:00
# if 1
1999-08-16 21:50:52 +00:00
TestNewConvertWithBufferSizes ( NEW_MAX_BUFFER , NEW_MAX_BUFFER ) ;
TestNewConvertWithBufferSizes ( 1 , NEW_MAX_BUFFER ) ;
TestNewConvertWithBufferSizes ( 2 , NEW_MAX_BUFFER ) ;
TestNewConvertWithBufferSizes ( 3 , NEW_MAX_BUFFER ) ;
TestNewConvertWithBufferSizes ( 4 , NEW_MAX_BUFFER ) ;
TestNewConvertWithBufferSizes ( 5 , NEW_MAX_BUFFER ) ;
2001-04-18 19:31:05 +00:00
2000-06-29 02:53:29 +00:00
# endif
1999-08-16 21:50:52 +00:00
}
void addTestNewConvert ( TestNode * * root )
{
2009-08-04 21:09:17 +00:00
# if !UCONFIG_NO_FILE_IO
1999-08-16 21:50:52 +00:00
addTest ( root , & TestInBufSizes , " tsconv/nucnvtst/TestInBufSizes " ) ;
addTest ( root , & TestOutBufSizes , " tsconv/nucnvtst/TestOutBufSizes " ) ;
2009-08-04 21:09:17 +00:00
# endif
1999-08-16 21:50:52 +00:00
addTest ( root , & TestConverterTypesAndStarters , " tsconv/nucnvtst/TestConverterTypesAndStarters " ) ;
2000-01-08 00:54:57 +00:00
addTest ( root , & TestAmbiguous , " tsconv/nucnvtst/TestAmbiguous " ) ;
2002-01-08 01:05:57 +00:00
addTest ( root , & TestSignatureDetection , " tsconv/nucnvtst/TestSignatureDetection " ) ;
2001-01-09 22:57:47 +00:00
addTest ( root , & TestUTF7 , " tsconv/nucnvtst/TestUTF7 " ) ;
2002-11-07 21:02:24 +00:00
addTest ( root , & TestIMAP , " tsconv/nucnvtst/TestIMAP " ) ;
2000-01-19 19:00:53 +00:00
addTest ( root , & TestUTF8 , " tsconv/nucnvtst/TestUTF8 " ) ;
2003-07-22 04:20:13 +00:00
/* test ucnv_getNextUChar() for charsets that encode single surrogates with complete byte sequences */
2002-07-02 22:52:30 +00:00
addTest ( root , & TestCESU8 , " tsconv/nucnvtst/TestCESU8 " ) ;
2002-06-11 17:59:45 +00:00
addTest ( root , & TestUTF16 , " tsconv/nucnvtst/TestUTF16 " ) ;
2000-06-22 01:18:30 +00:00
addTest ( root , & TestUTF16BE , " tsconv/nucnvtst/TestUTF16BE " ) ;
addTest ( root , & TestUTF16LE , " tsconv/nucnvtst/TestUTF16LE " ) ;
2002-06-11 17:59:45 +00:00
addTest ( root , & TestUTF32 , " tsconv/nucnvtst/TestUTF32 " ) ;
2000-12-20 00:02:15 +00:00
addTest ( root , & TestUTF32BE , " tsconv/nucnvtst/TestUTF32BE " ) ;
addTest ( root , & TestUTF32LE , " tsconv/nucnvtst/TestUTF32LE " ) ;
2006-07-28 22:58:29 +00:00
# if !UCONFIG_NO_LEGACY_CONVERSION
2003-07-22 04:20:13 +00:00
addTest ( root , & TestLMBCS , " tsconv/nucnvtst/TestLMBCS " ) ;
2006-07-28 22:58:29 +00:00
# endif
2003-07-22 04:20:13 +00:00
2000-06-22 01:18:30 +00:00
addTest ( root , & TestLATIN1 , " tsconv/nucnvtst/TestLATIN1 " ) ;
2006-07-28 22:58:29 +00:00
# if !UCONFIG_NO_LEGACY_CONVERSION
2000-06-22 01:18:30 +00:00
addTest ( root , & TestSBCS , " tsconv/nucnvtst/TestSBCS " ) ;
2009-08-04 21:09:17 +00:00
# if !UCONFIG_NO_FILE_IO
2000-06-22 01:18:30 +00:00
addTest ( root , & TestDBCS , " tsconv/nucnvtst/TestDBCS " ) ;
2009-08-26 01:02:40 +00:00
addTest ( root , & TestICCRunout , " tsconv/nucnvtst/TestICCRunout " ) ;
2009-08-04 21:09:17 +00:00
# endif
2000-06-22 01:18:30 +00:00
addTest ( root , & TestMBCS , " tsconv/nucnvtst/TestMBCS " ) ;
2006-07-28 22:58:29 +00:00
2003-12-03 22:53:14 +00:00
# ifdef U_ENABLE_GENERIC_ISO_2022
2000-06-22 01:18:30 +00:00
addTest ( root , & TestISO_2022 , " tsconv/nucnvtst/TestISO_2022 " ) ;
2003-12-03 22:53:14 +00:00
# endif
2006-07-28 22:58:29 +00:00
2000-08-15 00:07:33 +00:00
addTest ( root , & TestISO_2022_JP , " tsconv/nucnvtst/TestISO_2022_JP " ) ;
2001-02-23 04:40:39 +00:00
addTest ( root , & TestJIS , " tsconv/nucnvtst/TestJIS " ) ;
2000-10-28 01:08:25 +00:00
addTest ( root , & TestISO_2022_JP_1 , " tsconv/nucnvtst/TestISO_2022_JP_1 " ) ;
addTest ( root , & TestISO_2022_JP_2 , " tsconv/nucnvtst/TestISO_2022_JP_2 " ) ;
2000-08-22 00:04:27 +00:00
addTest ( root , & TestISO_2022_KR , " tsconv/nucnvtst/TestISO_2022_KR " ) ;
2001-02-24 02:52:14 +00:00
addTest ( root , & TestISO_2022_KR_1 , " tsconv/nucnvtst/TestISO_2022_KR_1 " ) ;
2000-09-21 00:35:06 +00:00
addTest ( root , & TestISO_2022_CN , " tsconv/nucnvtst/TestISO_2022_CN " ) ;
2010-01-16 06:37:14 +00:00
/*
* ICU 4.4 ( ticket # 7314 ) removes mappings for CNS 11643 planes 3. .7
2000-10-28 01:08:25 +00:00
addTest ( root , & TestISO_2022_CN_EXT , " tsconv/nucnvtst/TestISO_2022_CN_EXT " ) ;
2001-05-11 02:30:47 +00:00
addTest ( root , & TestJitterbug915 , " tsconv/nucnvtst/TestJitterbug915 " ) ;
2010-01-16 06:37:14 +00:00
*/
2000-10-17 08:05:02 +00:00
addTest ( root , & TestHZ , " tsconv/nucnvtst/TestHZ " ) ;
2006-07-28 22:58:29 +00:00
# endif
2001-03-02 23:55:49 +00:00
addTest ( root , & TestSCSU , " tsconv/nucnvtst/TestSCSU " ) ;
2006-07-28 22:58:29 +00:00
# if !UCONFIG_NO_LEGACY_CONVERSION
2000-06-22 01:18:30 +00:00
addTest ( root , & TestEBCDIC_STATEFUL , " tsconv/nucnvtst/TestEBCDIC_STATEFUL " ) ;
2000-10-26 00:18:34 +00:00
addTest ( root , & TestGB18030 , " tsconv/nucnvtst/TestGB18030 " ) ;
2000-02-05 00:01:54 +00:00
addTest ( root , & TestJitterbug255 , " tsconv/nucnvtst/TestJitterbug255 " ) ;
2000-04-18 21:57:47 +00:00
addTest ( root , & TestEBCDICUS4XML , " tsconv/nucnvtst/TestEBCDICUS4XML " ) ;
2001-07-14 02:29:21 +00:00
addTest ( root , & TestISCII , " tsconv/nucnvtst/TestISCII " ) ;
2006-08-15 23:21:39 +00:00
addTest ( root , & TestJB5275 , " tsconv/nucnvtst/TestJB5275 " ) ;
addTest ( root , & TestJB5275_1 , " tsconv/nucnvtst/TestJB5275_1 " ) ;
2002-09-20 17:54:45 +00:00
# if !UCONFIG_NO_COLLATION
2001-11-05 23:17:51 +00:00
addTest ( root , & TestJitterbug981 , " tsconv/nucnvtst/TestJitterbug981 " ) ;
2002-09-20 17:54:45 +00:00
# endif
2006-07-28 22:58:29 +00:00
2001-11-06 00:45:10 +00:00
addTest ( root , & TestJitterbug1293 , " tsconv/nucnvtst/TestJitterbug1293 " ) ;
2006-07-28 22:58:29 +00:00
# endif
2009-08-04 21:09:17 +00:00
# if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
2002-01-21 21:10:18 +00:00
addTest ( root , & TestCoverageMBCS , " tsconv/nucnvtst/TestCoverageMBCS " ) ;
2006-07-28 22:58:29 +00:00
# endif
2002-03-25 22:51:09 +00:00
addTest ( root , & TestRoundTrippingAllUTF , " tsconv/nucnvtst/TestRoundTrippingAllUTF " ) ;
2006-07-28 22:58:29 +00:00
# if !UCONFIG_NO_LEGACY_CONVERSION
2002-10-29 01:16:22 +00:00
addTest ( root , & TestJitterbug2346 , " tsconv/nucnvtst/TestJitterbug2346 " ) ;
2002-10-29 01:54:11 +00:00
addTest ( root , & TestJitterbug2411 , " tsconv/nucnvtst/TestJitterbug2411 " ) ;
2008-03-12 23:22:07 +00:00
addTest ( root , & TestJitterbug6175 , " tsconv/nucnvtst/TestJitterbug6175 " ) ;
2001-07-14 02:29:21 +00:00
2011-02-23 22:21:58 +00:00
addTest ( root , & TestIsFixedWidth , " tsconv/nucnvtst/TestIsFixedWidth " ) ;
# endif
1999-08-16 21:50:52 +00:00
}
2001-04-18 19:31:05 +00:00
/* Note that this test already makes use of statics, so it's not really
multithread safe .
1999-08-16 21:50:52 +00:00
This convenience function lets us make the error messages actually useful .
*/
2000-11-21 04:05:39 +00:00
static void setNuConvTestName ( const char * codepage , const char * direction )
1999-08-16 21:50:52 +00:00
{
2004-05-19 06:28:40 +00:00
sprintf ( gNuConvTestName , " [Testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d] " ,
codepage ,
direction ,
( int ) gInBufferSize ,
( int ) gOutBufferSize ) ;
1999-08-16 21:50:52 +00:00
}
2002-09-20 19:07:19 +00:00
typedef enum
{
TC_OK = 0 , /* test was OK */
TC_MISMATCH = 1 , /* Match failed - err was printed */
TC_FAIL = 2 /* Test failed, don't print an err because it was already printed. */
} ETestConvertResult ;
2001-01-26 03:05:11 +00:00
/* Note: This function uses global variables and it will not do offset
checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */
2002-09-20 19:07:19 +00:00
static ETestConvertResult testConvertFromU ( const UChar * source , int sourceLen , const uint8_t * expect , int expectLen ,
2001-02-23 04:40:39 +00:00
const char * codepage , const int32_t * expectOffsets , UBool useFallback )
1999-08-16 21:50:52 +00:00
{
2000-02-05 00:01:54 +00:00
UErrorCode status = U_ZERO_ERROR ;
UConverter * conv = 0 ;
2006-05-26 04:44:31 +00:00
char junkout [ NEW_MAX_BUFFER ] ; /* FIX */
2000-02-05 00:01:54 +00:00
int32_t junokout [ NEW_MAX_BUFFER ] ; /* FIX */
2006-05-26 04:44:31 +00:00
char * p ;
2000-02-05 00:01:54 +00:00
const UChar * src ;
2006-05-26 04:44:31 +00:00
char * end ;
char * targ ;
2000-02-05 00:01:54 +00:00
int32_t * offs ;
int i ;
int32_t realBufferSize ;
2006-05-26 04:44:31 +00:00
char * realBufferEnd ;
2000-02-05 00:01:54 +00:00
const UChar * realSourceEnd ;
const UChar * sourceLimit ;
2000-05-18 22:08:39 +00:00
UBool checkOffsets = TRUE ;
UBool doFlush ;
2000-02-05 00:01:54 +00:00
for ( i = 0 ; i < NEW_MAX_BUFFER ; i + + )
2006-05-26 04:44:31 +00:00
junkout [ i ] = ( char ) 0xF0 ;
2000-02-05 00:01:54 +00:00
for ( i = 0 ; i < NEW_MAX_BUFFER ; i + + )
junokout [ i ] = 0xFF ;
setNuConvTestName ( codepage , " FROM " ) ;
log_verbose ( " \n ========= %s \n " , gNuConvTestName ) ;
2002-07-17 02:41:04 +00:00
conv = my_ucnv_open ( codepage , & status ) ;
2002-07-29 21:04:18 +00:00
2000-02-05 00:01:54 +00:00
if ( U_FAILURE ( status ) )
{
2002-09-20 19:07:19 +00:00
log_data_err ( " Couldn't open converter %s \n " , codepage ) ;
return TC_FAIL ;
2000-02-05 00:01:54 +00:00
}
2001-02-23 04:40:39 +00:00
if ( useFallback ) {
ucnv_setFallback ( conv , useFallback ) ;
}
2000-02-05 00:01:54 +00:00
log_verbose ( " Converter opened.. \n " ) ;
src = source ;
targ = junkout ;
offs = junokout ;
realBufferSize = ( sizeof ( junkout ) / sizeof ( junkout [ 0 ] ) ) ;
realBufferEnd = junkout + realBufferSize ;
realSourceEnd = source + sourceLen ;
2001-01-26 03:05:11 +00:00
if ( gOutBufferSize ! = realBufferSize | | gInBufferSize ! = NEW_MAX_BUFFER )
2000-08-11 16:33:09 +00:00
checkOffsets = FALSE ;
2001-04-18 19:31:05 +00:00
2000-02-05 00:01:54 +00:00
do
2000-08-11 16:33:09 +00:00
{
2002-09-20 19:07:19 +00:00
end = nct_min ( targ + gOutBufferSize , realBufferEnd ) ;
sourceLimit = nct_min ( src + gInBufferSize , realSourceEnd ) ;
doFlush = ( UBool ) ( sourceLimit = = realSourceEnd ) ;
if ( targ = = realBufferEnd ) {
2000-02-05 00:01:54 +00:00
log_err ( " Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s " , targ , gNuConvTestName ) ;
2002-09-20 19:07:19 +00:00
return TC_FAIL ;
}
log_verbose ( " calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s \n " , src , sourceLimit , targ , end , doFlush ? " TRUE " : " FALSE " ) ;
2001-04-18 19:31:05 +00:00
1999-08-16 21:50:52 +00:00
2002-09-20 19:07:19 +00:00
status = U_ZERO_ERROR ;
ucnv_fromUnicode ( conv ,
2006-05-26 04:44:31 +00:00
& targ ,
end ,
2002-09-20 19:07:19 +00:00
& src ,
sourceLimit ,
checkOffsets ? offs : NULL ,
doFlush , /* flush if we're at the end of the input data */
& status ) ;
2000-08-11 19:51:13 +00:00
} while ( ( status = = U_BUFFER_OVERFLOW_ERROR ) | | ( U_SUCCESS ( status ) & & sourceLimit < realSourceEnd ) ) ;
2002-09-20 19:07:19 +00:00
if ( U_FAILURE ( status ) ) {
log_err ( " Problem doing fromUnicode to %s, errcode %s %s \n " , codepage , myErrorName ( status ) , gNuConvTestName ) ;
return TC_FAIL ;
2000-08-11 16:33:09 +00:00
}
2000-02-05 00:01:54 +00:00
log_verbose ( " \n Conversion done [%d uchars in -> %d chars out]. \n Result : " ,
2002-09-20 19:07:19 +00:00
sourceLen , targ - junkout ) ;
2010-04-07 16:18:38 +00:00
if ( getTestOption ( VERBOSITY_OPTION ) )
2000-02-05 00:01:54 +00:00
{
2002-09-20 19:07:19 +00:00
char junk [ 9999 ] ;
char offset_str [ 9999 ] ;
2006-05-26 04:44:31 +00:00
char * ptr ;
2002-09-20 19:07:19 +00:00
junk [ 0 ] = 0 ;
offset_str [ 0 ] = 0 ;
for ( ptr = junkout ; ptr < targ ; ptr + + ) {
sprintf ( junk + strlen ( junk ) , " 0x%02x, " , ( int ) ( 0xFF & * ptr ) ) ;
sprintf ( offset_str + strlen ( offset_str ) , " 0x%02x, " , ( int ) ( 0xFF & junokout [ ptr - junkout ] ) ) ;
}
log_verbose ( junk ) ;
printSeq ( ( const uint8_t * ) expect , expectLen ) ;
if ( checkOffsets ) {
log_verbose ( " \n Offsets: " ) ;
log_verbose ( offset_str ) ;
}
log_verbose ( " \n " ) ;
2000-02-05 00:01:54 +00:00
}
ucnv_close ( conv ) ;
2002-09-20 19:07:19 +00:00
if ( expectLen ! = targ - junkout ) {
log_err ( " Expected %d chars out, got %d %s \n " , expectLen , targ - junkout , gNuConvTestName ) ;
log_verbose ( " Expected %d chars out, got %d %s \n " , expectLen , targ - junkout , gNuConvTestName ) ;
printf ( " \n Got: " ) ;
2004-12-08 23:02:08 +00:00
printSeqErr ( ( const unsigned char * ) junkout , ( int32_t ) ( targ - junkout ) ) ;
2002-09-20 19:07:19 +00:00
printf ( " \n Expected: " ) ;
printSeqErr ( ( const unsigned char * ) expect , expectLen ) ;
return TC_MISMATCH ;
}
if ( checkOffsets & & ( expectOffsets ! = 0 ) ) {
log_verbose ( " comparing %d offsets.. \n " , targ - junkout ) ;
if ( memcmp ( junokout , expectOffsets , ( targ - junkout ) * sizeof ( int32_t ) ) ) {
log_err ( " did not get the expected offsets. %s \n " , gNuConvTestName ) ;
2004-12-08 23:02:08 +00:00
printSeqErr ( ( const unsigned char * ) junkout , ( int32_t ) ( targ - junkout ) ) ;
2002-09-20 19:07:19 +00:00
log_err ( " \n " ) ;
log_err ( " Got : " ) ;
for ( p = junkout ; p < targ ; p + + ) {
2002-11-07 21:02:24 +00:00
log_err ( " %d, " , junokout [ p - junkout ] ) ;
2000-06-22 01:18:30 +00:00
}
2002-09-20 19:07:19 +00:00
log_err ( " \n " ) ;
log_err ( " Expected: " ) ;
for ( i = 0 ; i < ( targ - junkout ) ; i + + ) {
log_err ( " %d, " , expectOffsets [ i ] ) ;
}
log_err ( " \n " ) ;
}
2000-02-05 00:01:54 +00:00
}
log_verbose ( " comparing.. \n " ) ;
2002-09-20 19:07:19 +00:00
if ( ! memcmp ( junkout , expect , expectLen ) ) {
log_verbose ( " Matches! \n " ) ;
return TC_OK ;
} else {
log_err ( " String does not match u->%s \n " , gNuConvTestName ) ;
printUSeqErr ( source , sourceLen ) ;
printf ( " \n Got: " ) ;
printSeqErr ( ( const unsigned char * ) junkout , expectLen ) ;
printf ( " \n Expected: " ) ;
printSeqErr ( ( const unsigned char * ) expect , expectLen ) ;
return TC_MISMATCH ;
2000-02-05 00:01:54 +00:00
}
1999-08-16 21:50:52 +00:00
}
2001-01-26 03:05:11 +00:00
/* Note: This function uses global variables and it will not do offset
checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */
2002-09-20 19:07:19 +00:00
static ETestConvertResult testConvertToU ( const uint8_t * source , int sourcelen , const UChar * expect , int expectlen ,
const char * codepage , const int32_t * expectOffsets , UBool useFallback )
1999-08-16 21:50:52 +00:00
{
2000-02-05 00:01:54 +00:00
UErrorCode status = U_ZERO_ERROR ;
UConverter * conv = 0 ;
UChar junkout [ NEW_MAX_BUFFER ] ; /* FIX */
int32_t junokout [ NEW_MAX_BUFFER ] ; /* FIX */
2006-05-26 04:44:31 +00:00
const char * src ;
const char * realSourceEnd ;
const char * srcLimit ;
2000-06-22 01:18:30 +00:00
UChar * p ;
2000-02-05 00:01:54 +00:00
UChar * targ ;
UChar * end ;
int32_t * offs ;
int i ;
2000-05-18 22:08:39 +00:00
UBool checkOffsets = TRUE ;
2001-04-18 19:31:05 +00:00
2000-02-05 00:01:54 +00:00
int32_t realBufferSize ;
UChar * realBufferEnd ;
2001-04-18 19:31:05 +00:00
2000-02-05 00:01:54 +00:00
for ( i = 0 ; i < NEW_MAX_BUFFER ; i + + )
junkout [ i ] = 0xFFFE ;
for ( i = 0 ; i < NEW_MAX_BUFFER ; i + + )
junokout [ i ] = - 1 ;
setNuConvTestName ( codepage , " TO " ) ;
log_verbose ( " \n ========= %s \n " , gNuConvTestName ) ;
2002-07-17 02:41:04 +00:00
conv = my_ucnv_open ( codepage , & status ) ;
2000-02-05 00:01:54 +00:00
if ( U_FAILURE ( status ) )
{
2002-09-20 19:07:19 +00:00
log_data_err ( " Couldn't open converter %s \n " , gNuConvTestName ) ;
return TC_FAIL ;
2000-02-05 00:01:54 +00:00
}
2001-02-23 04:40:39 +00:00
if ( useFallback ) {
ucnv_setFallback ( conv , useFallback ) ;
}
2000-02-05 00:01:54 +00:00
log_verbose ( " Converter opened.. \n " ) ;
2006-05-26 04:44:31 +00:00
src = ( const char * ) source ;
2000-02-05 00:01:54 +00:00
targ = junkout ;
offs = junokout ;
2001-04-18 19:31:05 +00:00
2000-02-05 00:01:54 +00:00
realBufferSize = ( sizeof ( junkout ) / sizeof ( junkout [ 0 ] ) ) ;
realBufferEnd = junkout + realBufferSize ;
realSourceEnd = src + sourcelen ;
2001-01-26 03:05:11 +00:00
if ( gOutBufferSize ! = realBufferSize | | gInBufferSize ! = NEW_MAX_BUFFER )
checkOffsets = FALSE ;
2000-02-05 00:01:54 +00:00
do
2001-01-26 03:05:11 +00:00
{
2000-02-05 00:01:54 +00:00
end = nct_min ( targ + gOutBufferSize , realBufferEnd ) ;
srcLimit = nct_min ( realSourceEnd , src + gInBufferSize ) ;
if ( targ = = realBufferEnd )
2001-01-26 03:05:11 +00:00
{
log_err ( " Error, the end would overflow the real output buffer while about to call toUnicode! tarjet=%08lx %s " , targ , gNuConvTestName ) ;
2002-09-20 19:07:19 +00:00
return TC_FAIL ;
2001-01-26 03:05:11 +00:00
}
2000-02-05 00:01:54 +00:00
log_verbose ( " calling toUnicode @ %08lx to %08lx \n " , targ , end ) ;
/* oldTarg = targ; */
status = U_ZERO_ERROR ;
ucnv_toUnicode ( conv ,
& targ ,
end ,
2006-05-26 04:44:31 +00:00
& src ,
srcLimit ,
2000-02-05 00:01:54 +00:00
checkOffsets ? offs : NULL ,
2000-05-18 22:08:39 +00:00
( UBool ) ( srcLimit = = realSourceEnd ) , /* flush if we're at the end of hte source data */
2000-02-05 00:01:54 +00:00
& status ) ;
/* offs += (targ-oldTarg); */
2000-08-11 19:51:13 +00:00
} while ( ( status = = U_BUFFER_OVERFLOW_ERROR ) | | ( U_SUCCESS ( status ) & & ( srcLimit < realSourceEnd ) ) ) ; /* while we just need another buffer */
2000-02-05 00:01:54 +00:00
if ( U_FAILURE ( status ) )
{
2000-07-19 20:14:27 +00:00
log_err ( " Problem doing %s toUnicode, errcode %s %s \n " , codepage , myErrorName ( status ) , gNuConvTestName ) ;
2002-09-20 19:07:19 +00:00
return TC_FAIL ;
2000-02-05 00:01:54 +00:00
}
log_verbose ( " \n Conversion done. %d bytes -> %d chars. \n Result : " ,
sourcelen , targ - junkout ) ;
2010-04-07 16:18:38 +00:00
if ( getTestOption ( VERBOSITY_OPTION ) )
2000-02-05 00:01:54 +00:00
{
char junk [ 9999 ] ;
char offset_str [ 9999 ] ;
2000-12-08 01:13:38 +00:00
UChar * ptr ;
2001-04-18 19:31:05 +00:00
2000-02-05 00:01:54 +00:00
junk [ 0 ] = 0 ;
offset_str [ 0 ] = 0 ;
2000-12-08 01:13:38 +00:00
for ( ptr = junkout ; ptr < targ ; ptr + + )
2000-02-05 00:01:54 +00:00
{
2000-12-08 01:13:38 +00:00
sprintf ( junk + strlen ( junk ) , " 0x%04x, " , ( 0xFFFF ) & ( unsigned int ) * ptr ) ;
sprintf ( offset_str + strlen ( offset_str ) , " 0x%04x, " , ( 0xFFFF ) & ( unsigned int ) junokout [ ptr - junkout ] ) ;
2000-02-05 00:01:54 +00:00
}
2001-04-18 19:31:05 +00:00
2000-02-05 00:01:54 +00:00
log_verbose ( junk ) ;
2000-08-15 18:05:12 +00:00
printUSeq ( expect , expectlen ) ;
2000-02-05 00:01:54 +00:00
if ( checkOffsets )
{
log_verbose ( " \n Offsets: " ) ;
log_verbose ( offset_str ) ;
}
log_verbose ( " \n " ) ;
}
ucnv_close ( conv ) ;
log_verbose ( " comparing %d uchars (%d bytes).. \n " , expectlen , expectlen * 2 ) ;
if ( checkOffsets & & ( expectOffsets ! = 0 ) )
{
2000-06-22 01:18:30 +00:00
if ( memcmp ( junokout , expectOffsets , ( targ - junkout ) * sizeof ( int32_t ) ) ) {
2001-01-09 03:33:07 +00:00
log_err ( " did not get the expected offsets. %s \n " , gNuConvTestName ) ;
log_err ( " Got: " ) ;
for ( p = junkout ; p < targ ; p + + ) {
log_err ( " %d, " , junokout [ p - junkout ] ) ;
}
log_err ( " \n " ) ;
log_err ( " Expected: " ) ;
for ( i = 0 ; i < ( targ - junkout ) ; i + + ) {
log_err ( " %d, " , expectOffsets [ i ] ) ;
}
log_err ( " \n " ) ;
log_err ( " output: " ) ;
for ( i = 0 ; i < ( targ - junkout ) ; i + + ) {
log_err ( " %X, " , junkout [ i ] ) ;
}
log_err ( " \n " ) ;
log_err ( " input: " ) ;
2006-05-26 04:44:31 +00:00
for ( i = 0 ; i < ( src - ( const char * ) source ) ; i + + ) {
2001-01-09 03:33:07 +00:00
log_err ( " %X, " , ( unsigned char ) source [ i ] ) ;
}
log_err ( " \n " ) ;
2000-08-11 03:35:25 +00:00
}
2000-02-05 00:01:54 +00:00
}
if ( ! memcmp ( junkout , expect , expectlen * 2 ) )
{
log_verbose ( " Matches! \n " ) ;
2002-09-20 19:07:19 +00:00
return TC_OK ;
2000-02-05 00:01:54 +00:00
}
else
2001-04-18 19:31:05 +00:00
{
2000-02-05 00:01:54 +00:00
log_err ( " String does not match. %s \n " , gNuConvTestName ) ;
2000-06-22 01:18:30 +00:00
log_verbose ( " String does not match. %s \n " , gNuConvTestName ) ;
2000-07-19 20:14:27 +00:00
printf ( " \n Got: " ) ;
2000-08-15 18:05:12 +00:00
printUSeqErr ( junkout , expectlen ) ;
2000-07-19 20:14:27 +00:00
printf ( " \n Expected: " ) ;
2001-04-18 19:31:05 +00:00
printUSeqErr ( expect , expectlen ) ;
2002-09-20 19:07:19 +00:00
return TC_MISMATCH ;
2000-02-05 00:01:54 +00:00
}
1999-08-16 21:50:52 +00:00
}
2001-04-18 19:31:05 +00:00
static void TestNewConvertWithBufferSizes ( int32_t outsize , int32_t insize )
1999-08-16 21:50:52 +00:00
{
/** test chars #1 */
2000-02-05 00:01:54 +00:00
/* 1 2 3 1Han 2Han 3Han . */
2004-12-23 21:03:30 +00:00
static const UChar sampleText [ ] =
2008-07-16 21:12:16 +00:00
{ 0x0031 , 0x0032 , 0x0033 , 0x0000 , 0x4e00 , 0x4e8c , 0x4e09 , 0x002E , 0xD840 , 0xDC21 } ;
static const UChar sampleTextRoundTripUnmappable [ ] =
{ 0x0031 , 0x0032 , 0x0033 , 0x0000 , 0x4e00 , 0x4e8c , 0x4e09 , 0x002E , 0xfffd } ;
1999-08-16 21:50:52 +00:00
2001-04-18 19:31:05 +00:00
2004-12-23 21:03:30 +00:00
static const uint8_t expectedUTF8 [ ] =
2008-07-16 21:12:16 +00:00
{ 0x31 , 0x32 , 0x33 , 0x00 , 0xe4 , 0xb8 , 0x80 , 0xe4 , 0xba , 0x8c , 0xe4 , 0xb8 , 0x89 , 0x2E , 0xf0 , 0xa0 , 0x80 , 0xa1 } ;
2004-12-23 21:03:30 +00:00
static const int32_t toUTF8Offs [ ] =
2008-07-16 21:12:16 +00:00
{ 0x00 , 0x01 , 0x02 , 0x03 , 0x04 , 0x04 , 0x04 , 0x05 , 0x05 , 0x05 , 0x06 , 0x06 , 0x06 , 0x07 , 0x08 , 0x08 , 0x08 , 0x08 } ;
2004-12-23 21:03:30 +00:00
static const int32_t fmUTF8Offs [ ] =
2008-07-16 21:12:16 +00:00
{ 0x0000 , 0x0001 , 0x0002 , 0x0003 , 0x0004 , 0x0007 , 0x000a , 0x000d , 0x000e , 0x000e } ;
2001-04-18 19:31:05 +00:00
2004-01-06 21:24:53 +00:00
# ifdef U_ENABLE_GENERIC_ISO_2022
2000-02-05 00:01:54 +00:00
/* Same as UTF8, but with ^[%B preceeding */
2004-12-23 21:03:30 +00:00
static const const uint8_t expectedISO2022 [ ] =
2000-08-11 03:35:25 +00:00
{ 0x1b , 0x25 , 0x42 , 0x31 , 0x32 , 0x33 , 0x00 , 0xe4 , 0xb8 , 0x80 , 0xe4 , 0xba , 0x8c , 0xe4 , 0xb8 , 0x89 , 0x2E } ;
2004-12-23 21:03:30 +00:00
static const int32_t toISO2022Offs [ ] =
2001-04-18 19:31:05 +00:00
{ - 1 , - 1 , - 1 , 0x00 , 0x01 , 0x02 , 0x03 , 0x04 , 0x04 ,
2000-08-11 03:35:25 +00:00
0x04 , 0x05 , 0x05 , 0x05 , 0x06 , 0x06 , 0x06 , 0x07 } ; /* right? */
2004-12-23 21:03:30 +00:00
static const int32_t fmISO2022Offs [ ] =
2000-06-22 01:18:30 +00:00
{ 0x0003 , 0x0004 , 0x0005 , 0x0006 , 0x0007 , 0x000a , 0x000d , 0x0010 } ; /* is this right? */
2004-01-06 21:24:53 +00:00
# endif
2001-04-18 19:31:05 +00:00
2000-06-22 01:18:30 +00:00
/* 1 2 3 0, <SO> h1 h2 h3 <SI> . EBCDIC_STATEFUL */
2004-12-23 21:03:30 +00:00
static const uint8_t expectedIBM930 [ ] =
2008-07-16 21:12:16 +00:00
{ 0xF1 , 0xF2 , 0xF3 , 0x00 , 0x0E , 0x45 , 0x41 , 0x45 , 0x42 , 0x45 , 0x43 , 0x0F , 0x4B , 0x0e , 0xfe , 0xfe , 0x0f } ;
2004-12-23 21:03:30 +00:00
static const int32_t toIBM930Offs [ ] =
2008-07-16 21:12:16 +00:00
{ 0x00 , 0x01 , 0x02 , 0x03 , 0x04 , 0x04 , 0x04 , 0x05 , 0x05 , 0x06 , 0x06 , 0x07 , 0x07 , 0x08 , 0x08 , 0x08 , - 1 } ;
2004-12-23 21:03:30 +00:00
static const int32_t fmIBM930Offs [ ] =
2008-07-16 21:12:16 +00:00
{ 0x0000 , 0x0001 , 0x0002 , 0x0003 , 0x0005 , 0x0007 , 0x0009 , 0x000c , 0x000e } ;
2001-04-18 19:31:05 +00:00
2000-06-22 01:18:30 +00:00
/* 1 2 3 0 h1 h2 h3 . MBCS*/
2004-12-23 21:03:30 +00:00
static const uint8_t expectedIBM943 [ ] =
2008-07-16 21:12:16 +00:00
{ 0x31 , 0x32 , 0x33 , 0x00 , 0x88 , 0xea , 0x93 , 0xf1 , 0x8e , 0x4f , 0x2e , 0xfc , 0xfc } ;
2004-12-23 21:03:30 +00:00
static const int32_t toIBM943Offs [ ] =
2008-07-16 21:12:16 +00:00
{ 0x00 , 0x01 , 0x02 , 0x03 , 0x04 , 0x04 , 0x05 , 0x05 , 0x06 , 0x06 , 0x07 , 0x08 , 0x08 } ;
2004-12-23 21:03:30 +00:00
static const int32_t fmIBM943Offs [ ] =
2008-07-16 21:12:16 +00:00
{ 0x0000 , 0x0001 , 0x0002 , 0x0003 , 0x0004 , 0x0006 , 0x0008 , 0x000a , 0x000b } ;
2001-04-18 19:31:05 +00:00
2000-06-22 01:18:30 +00:00
/* 1 2 3 0 h1 h2 h3 . DBCS*/
2004-12-23 21:03:30 +00:00
static const uint8_t expectedIBM9027 [ ] =
2008-07-16 21:12:16 +00:00
{ 0xfe , 0xfe , 0xfe , 0xfe , 0xfe , 0xfe , 0xfe , 0xfe , 0x4c , 0x41 , 0x4c , 0x48 , 0x4c , 0x55 , 0xfe , 0xfe , 0xfe , 0xfe } ;
2004-12-23 21:03:30 +00:00
static const int32_t toIBM9027Offs [ ] =
2008-07-16 21:12:16 +00:00
{ 0x00 , 0x00 , 0x01 , 0x01 , 0x02 , 0x02 , 0x03 , 0x03 , 0x04 , 0x04 , 0x05 , 0x05 , 0x06 , 0x06 , 0x07 , 0x07 , 0x08 , 0x08 } ;
2001-04-18 19:31:05 +00:00
2000-06-22 01:18:30 +00:00
/* 1 2 3 0 <?> <?> <?> . SBCS*/
2004-12-23 21:03:30 +00:00
static const uint8_t expectedIBM920 [ ] =
2008-07-16 21:12:16 +00:00
{ 0x31 , 0x32 , 0x33 , 0x00 , 0x1a , 0x1a , 0x1a , 0x2e , 0x1a } ;
2004-12-23 21:03:30 +00:00
static const int32_t toIBM920Offs [ ] =
2008-07-16 21:12:16 +00:00
{ 0x00 , 0x01 , 0x02 , 0x03 , 0x04 , 0x05 , 0x06 , 0x07 , 0x08 } ;
2001-04-18 19:31:05 +00:00
2000-06-22 01:18:30 +00:00
/* 1 2 3 0 <?> <?> <?> . SBCS*/
2004-12-23 21:03:30 +00:00
static const uint8_t expectedISO88593 [ ] =
2008-07-16 21:12:16 +00:00
{ 0x31 , 0x32 , 0x33 , 0x00 , 0x1a , 0x1a , 0x1a , 0x2E , 0x1a } ;
2004-12-23 21:03:30 +00:00
static const int32_t toISO88593Offs [ ] =
2008-07-16 21:12:16 +00:00
{ 0x00 , 0x01 , 0x02 , 0x03 , 0x04 , 0x05 , 0x06 , 0x07 , 0x08 } ;
1999-08-16 21:50:52 +00:00
2004-12-23 21:03:30 +00:00
/* 1 2 3 0 <?> <?> <?> . <?> LATIN_1*/
static const uint8_t expectedLATIN1 [ ] =
2008-07-16 21:12:16 +00:00
{ 0x31 , 0x32 , 0x33 , 0x00 , 0x1a , 0x1a , 0x1a , 0x2E , 0x1a } ;
2004-12-23 21:03:30 +00:00
static const int32_t toLATIN1Offs [ ] =
2008-07-16 21:12:16 +00:00
{ 0x00 , 0x01 , 0x02 , 0x03 , 0x04 , 0x05 , 0x06 , 0x07 , 0x08 } ;
2001-04-18 19:31:05 +00:00
2000-02-05 00:01:54 +00:00
/* etc */
2004-12-23 21:03:30 +00:00
static const uint8_t expectedUTF16BE [ ] =
2008-07-16 21:12:16 +00:00
{ 0x00 , 0x31 , 0x00 , 0x32 , 0x00 , 0x33 , 0x00 , 0x00 , 0x4e , 0x00 , 0x4e , 0x8c , 0x4e , 0x09 , 0x00 , 0x2e , 0xd8 , 0x40 , 0xdc , 0x21 } ;
2004-12-23 21:03:30 +00:00
static const int32_t toUTF16BEOffs [ ] =
2008-07-16 21:12:16 +00:00
{ 0x00 , 0x00 , 0x01 , 0x01 , 0x02 , 0x02 , 0x03 , 0x03 , 0x04 , 0x04 , 0x05 , 0x05 , 0x06 , 0x06 , 0x07 , 0x07 , 0x08 , 0x08 , 0x08 , 0x08 } ;
2004-12-23 21:03:30 +00:00
static const int32_t fmUTF16BEOffs [ ] =
2008-07-16 21:12:16 +00:00
{ 0x0000 , 0x0002 , 0x0004 , 0x0006 , 0x0008 , 0x000a , 0x000c , 0x000e , 0x0010 , 0x0010 } ;
2000-11-16 17:20:03 +00:00
2004-12-23 21:03:30 +00:00
static const uint8_t expectedUTF16LE [ ] =
2008-07-16 21:12:16 +00:00
{ 0x31 , 0x00 , 0x32 , 0x00 , 0x33 , 0x00 , 0x00 , 0x00 , 0x00 , 0x4e , 0x8c , 0x4e , 0x09 , 0x4e , 0x2e , 0x00 , 0x40 , 0xd8 , 0x21 , 0xdc } ;
2004-12-23 21:03:30 +00:00
static const int32_t toUTF16LEOffs [ ] =
2008-07-16 21:12:16 +00:00
{ 0x00 , 0x00 , 0x01 , 0x01 , 0x02 , 0x02 , 0x03 , 0x03 , 0x04 , 0x04 , 0x05 , 0x05 , 0x06 , 0x06 , 0x07 , 0x07 , 0x08 , 0x08 , 0x08 , 0x08 } ;
2004-12-23 21:03:30 +00:00
static const int32_t fmUTF16LEOffs [ ] =
2008-07-16 21:12:16 +00:00
{ 0x0000 , 0x0002 , 0x0004 , 0x0006 , 0x0008 , 0x000a , 0x000c , 0x000e , 0x0010 , 0x0010 } ;
2001-04-18 19:31:05 +00:00
2004-12-23 21:03:30 +00:00
static const uint8_t expectedUTF32BE [ ] =
2000-11-16 17:20:03 +00:00
{ 0x00 , 0x00 , 0x00 , 0x31 ,
0x00 , 0x00 , 0x00 , 0x32 ,
0x00 , 0x00 , 0x00 , 0x33 ,
0x00 , 0x00 , 0x00 , 0x00 ,
0x00 , 0x00 , 0x4e , 0x00 ,
0x00 , 0x00 , 0x4e , 0x8c ,
0x00 , 0x00 , 0x4e , 0x09 ,
2008-07-16 21:12:16 +00:00
0x00 , 0x00 , 0x00 , 0x2e ,
0x00 , 0x02 , 0x00 , 0x21 } ;
2004-12-23 21:03:30 +00:00
static const int32_t toUTF32BEOffs [ ] =
2000-11-16 17:20:03 +00:00
{ 0x00 , 0x00 , 0x00 , 0x00 ,
0x01 , 0x01 , 0x01 , 0x01 ,
0x02 , 0x02 , 0x02 , 0x02 ,
0x03 , 0x03 , 0x03 , 0x03 ,
0x04 , 0x04 , 0x04 , 0x04 ,
0x05 , 0x05 , 0x05 , 0x05 ,
0x06 , 0x06 , 0x06 , 0x06 ,
0x07 , 0x07 , 0x07 , 0x07 ,
2008-07-16 21:12:16 +00:00
0x08 , 0x08 , 0x08 , 0x08 ,
2000-11-16 17:20:03 +00:00
0x08 , 0x08 , 0x08 , 0x08 } ;
2004-12-23 21:03:30 +00:00
static const int32_t fmUTF32BEOffs [ ] =
2008-07-16 21:12:16 +00:00
{ 0x0000 , 0x0004 , 0x0008 , 0x000c , 0x0010 , 0x0014 , 0x0018 , 0x001c , 0x0020 , 0x0020 } ;
2000-11-16 17:20:03 +00:00
2004-12-23 21:03:30 +00:00
static const uint8_t expectedUTF32LE [ ] =
2000-11-16 17:20:03 +00:00
{ 0x31 , 0x00 , 0x00 , 0x00 ,
0x32 , 0x00 , 0x00 , 0x00 ,
0x33 , 0x00 , 0x00 , 0x00 ,
0x00 , 0x00 , 0x00 , 0x00 ,
0x00 , 0x4e , 0x00 , 0x00 ,
0x8c , 0x4e , 0x00 , 0x00 ,
0x09 , 0x4e , 0x00 , 0x00 ,
2008-07-16 21:12:16 +00:00
0x2e , 0x00 , 0x00 , 0x00 ,
0x21 , 0x00 , 0x02 , 0x00 } ;
2004-12-23 21:03:30 +00:00
static const int32_t toUTF32LEOffs [ ] =
2000-11-16 17:20:03 +00:00
{ 0x00 , 0x00 , 0x00 , 0x00 ,
0x01 , 0x01 , 0x01 , 0x01 ,
0x02 , 0x02 , 0x02 , 0x02 ,
0x03 , 0x03 , 0x03 , 0x03 ,
0x04 , 0x04 , 0x04 , 0x04 ,
0x05 , 0x05 , 0x05 , 0x05 ,
0x06 , 0x06 , 0x06 , 0x06 ,
0x07 , 0x07 , 0x07 , 0x07 ,
2008-07-16 21:12:16 +00:00
0x08 , 0x08 , 0x08 , 0x08 ,
2000-11-16 17:20:03 +00:00
0x08 , 0x08 , 0x08 , 0x08 } ;
2004-12-23 21:03:30 +00:00
static const int32_t fmUTF32LEOffs [ ] =
2008-07-16 21:12:16 +00:00
{ 0x0000 , 0x0004 , 0x0008 , 0x000c , 0x0010 , 0x0014 , 0x0018 , 0x001c , 0x0020 , 0x0020 } ;
2001-04-18 19:31:05 +00:00
2000-11-16 17:20:03 +00:00
1999-08-16 21:50:52 +00:00
2000-06-28 17:01:52 +00:00
/** Test chars #2 **/
1999-08-16 21:50:52 +00:00
2000-02-05 00:01:54 +00:00
/* Sahha [health], slashed h's */
2004-12-23 21:03:30 +00:00
static const UChar malteseUChars [ ] = { 0x0053 , 0x0061 , 0x0127 , 0x0127 , 0x0061 } ;
static const uint8_t expectedMaltese913 [ ] = { 0x53 , 0x61 , 0xB1 , 0xB1 , 0x61 } ;
2000-06-22 01:18:30 +00:00
/* LMBCS */
2004-12-23 21:03:30 +00:00
static const UChar LMBCSUChars [ ] = { 0x0027 , 0x010A , 0x0000 , 0x0127 , 0x2666 , 0x0220 } ;
static const uint8_t expectedLMBCS [ ] = { 0x27 , 0x06 , 0x04 , 0x00 , 0x01 , 0x73 , 0x01 , 0x04 , 0x14 , 0x02 , 0x20 } ;
static const int32_t toLMBCSOffs [ ] = { 0x00 , 0x01 , 0x01 , 0x02 , 0x03 , 0x03 , 0x04 , 0x04 , 0x05 , 0x05 , 0x05 } ;
static const int32_t fmLMBCSOffs [ ] = { 0x0000 , 0x0001 , 0x0003 , 0x0004 , 0x0006 , 0x0008 } ;
2000-02-05 00:01:54 +00:00
/*********************************** START OF CODE finally *************/
1999-08-16 21:50:52 +00:00
2004-12-23 21:03:30 +00:00
gInBufferSize = insize ;
gOutBufferSize = outsize ;
1999-08-16 21:50:52 +00:00
2004-12-23 21:03:30 +00:00
log_verbose ( " \n \n \n Testing conversions with InputBufferSize = %d, OutputBufferSize = %d \n " , gInBufferSize , gOutBufferSize ) ;
1999-08-16 21:50:52 +00:00
2001-04-18 19:31:05 +00:00
2000-06-22 01:18:30 +00:00
/*UTF-8*/
2002-12-13 04:05:50 +00:00
testConvertFromU ( sampleText , sizeof ( sampleText ) / sizeof ( sampleText [ 0 ] ) ,
expectedUTF8 , sizeof ( expectedUTF8 ) , " UTF8 " , toUTF8Offs , FALSE ) ;
2001-04-18 19:31:05 +00:00
2000-07-19 20:14:27 +00:00
log_verbose ( " Test surrogate behaviour for UTF8 \n " ) ;
{
2004-12-23 21:03:30 +00:00
static const UChar testinput [ ] = { 0x20ac , 0xd801 , 0xdc01 , 0xdc01 } ;
static const uint8_t expectedUTF8test2 [ ] = { 0xe2 , 0x82 , 0xac ,
2001-04-18 19:31:05 +00:00
0xf0 , 0x90 , 0x90 , 0x81 ,
2002-07-02 00:51:16 +00:00
0xef , 0xbf , 0xbd
2000-07-19 20:14:27 +00:00
} ;
2004-12-23 21:03:30 +00:00
static const int32_t offsets [ ] = { 0 , 0 , 0 , 1 , 1 , 1 , 1 , 3 , 3 , 3 } ;
2002-09-20 19:07:19 +00:00
testConvertFromU ( testinput , sizeof ( testinput ) / sizeof ( testinput [ 0 ] ) ,
expectedUTF8test2 , sizeof ( expectedUTF8test2 ) , " UTF8 " , offsets , FALSE ) ;
2000-07-19 20:14:27 +00:00
}
2006-07-28 22:58:29 +00:00
# if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022)
2000-06-22 01:18:30 +00:00
/*ISO-2022*/
2002-12-13 04:05:50 +00:00
testConvertFromU ( sampleText , sizeof ( sampleText ) / sizeof ( sampleText [ 0 ] ) ,
expectedISO2022 , sizeof ( expectedISO2022 ) , " ISO_2022 " , toISO2022Offs , FALSE ) ;
2003-12-03 22:53:14 +00:00
# endif
2006-07-28 22:58:29 +00:00
2000-06-22 01:18:30 +00:00
/*UTF16 LE*/
2002-12-13 04:05:50 +00:00
testConvertFromU ( sampleText , sizeof ( sampleText ) / sizeof ( sampleText [ 0 ] ) ,
expectedUTF16LE , sizeof ( expectedUTF16LE ) , " utf-16le " , toUTF16LEOffs , FALSE ) ;
2000-06-22 01:18:30 +00:00
/*UTF16 BE*/
2002-12-13 04:05:50 +00:00
testConvertFromU ( sampleText , sizeof ( sampleText ) / sizeof ( sampleText [ 0 ] ) ,
expectedUTF16BE , sizeof ( expectedUTF16BE ) , " utf-16be " , toUTF16BEOffs , FALSE ) ;
2000-11-16 17:20:03 +00:00
/*UTF32 LE*/
2002-12-13 04:05:50 +00:00
testConvertFromU ( sampleText , sizeof ( sampleText ) / sizeof ( sampleText [ 0 ] ) ,
expectedUTF32LE , sizeof ( expectedUTF32LE ) , " utf-32le " , toUTF32LEOffs , FALSE ) ;
2000-11-16 17:20:03 +00:00
/*UTF32 BE*/
2002-12-13 04:05:50 +00:00
testConvertFromU ( sampleText , sizeof ( sampleText ) / sizeof ( sampleText [ 0 ] ) ,
expectedUTF32BE , sizeof ( expectedUTF32BE ) , " utf-32be " , toUTF32BEOffs , FALSE ) ;
2006-07-28 22:58:29 +00:00
2000-06-22 01:18:30 +00:00
/*LATIN_1*/
2002-12-13 04:05:50 +00:00
testConvertFromU ( sampleText , sizeof ( sampleText ) / sizeof ( sampleText [ 0 ] ) ,
expectedLATIN1 , sizeof ( expectedLATIN1 ) , " LATIN_1 " , toLATIN1Offs , FALSE ) ;
2006-07-28 22:58:29 +00:00
# if !UCONFIG_NO_LEGACY_CONVERSION
2000-06-22 01:18:30 +00:00
/*EBCDIC_STATEFUL*/
2002-12-13 04:05:50 +00:00
testConvertFromU ( sampleText , sizeof ( sampleText ) / sizeof ( sampleText [ 0 ] ) ,
expectedIBM930 , sizeof ( expectedIBM930 ) , " ibm-930 " , toIBM930Offs , FALSE ) ;
2000-06-28 17:01:52 +00:00
2002-12-13 04:05:50 +00:00
testConvertFromU ( sampleText , sizeof ( sampleText ) / sizeof ( sampleText [ 0 ] ) ,
expectedISO88593 , sizeof ( expectedISO88593 ) , " iso-8859-3 " , toISO88593Offs , FALSE ) ;
2000-06-28 17:01:52 +00:00
2000-06-22 01:18:30 +00:00
/*MBCS*/
2000-06-28 17:01:52 +00:00
2002-12-13 04:05:50 +00:00
testConvertFromU ( sampleText , sizeof ( sampleText ) / sizeof ( sampleText [ 0 ] ) ,
expectedIBM943 , sizeof ( expectedIBM943 ) , " ibm-943 " , toIBM943Offs , FALSE ) ;
2000-06-22 01:18:30 +00:00
/*DBCS*/
2002-12-13 04:05:50 +00:00
testConvertFromU ( sampleText , sizeof ( sampleText ) / sizeof ( sampleText [ 0 ] ) ,
2003-04-25 00:24:50 +00:00
expectedIBM9027 , sizeof ( expectedIBM9027 ) , " @ibm9027 " , toIBM9027Offs , FALSE ) ;
2000-06-22 01:18:30 +00:00
/*SBCS*/
2002-12-13 04:05:50 +00:00
testConvertFromU ( sampleText , sizeof ( sampleText ) / sizeof ( sampleText [ 0 ] ) ,
expectedIBM920 , sizeof ( expectedIBM920 ) , " ibm-920 " , toIBM920Offs , FALSE ) ;
2000-06-22 01:18:30 +00:00
/*SBCS*/
2002-12-13 04:05:50 +00:00
testConvertFromU ( sampleText , sizeof ( sampleText ) / sizeof ( sampleText [ 0 ] ) ,
expectedISO88593 , sizeof ( expectedISO88593 ) , " iso-8859-3 " , toISO88593Offs , FALSE ) ;
2006-07-28 22:58:29 +00:00
# endif
1999-08-16 21:50:52 +00:00
2001-04-18 19:31:05 +00:00
1999-08-16 21:50:52 +00:00
/****/
1999-12-04 02:31:40 +00:00
2000-06-22 01:18:30 +00:00
/*UTF-8*/
2002-12-13 04:05:50 +00:00
testConvertToU ( expectedUTF8 , sizeof ( expectedUTF8 ) ,
sampleText , sizeof ( sampleText ) / sizeof ( sampleText [ 0 ] ) , " utf8 " , fmUTF8Offs , FALSE ) ;
2006-07-28 22:58:29 +00:00
# if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022)
2000-06-22 01:18:30 +00:00
/*ISO-2022*/
2002-12-13 04:05:50 +00:00
testConvertToU ( expectedISO2022 , sizeof ( expectedISO2022 ) ,
sampleText , sizeof ( sampleText ) / sizeof ( sampleText [ 0 ] ) , " ISO_2022 " , fmISO2022Offs , FALSE ) ;
2003-12-03 22:53:14 +00:00
# endif
2006-07-28 22:58:29 +00:00
2000-06-22 01:18:30 +00:00
/*UTF16 LE*/
2002-12-13 04:05:50 +00:00
testConvertToU ( expectedUTF16LE , sizeof ( expectedUTF16LE ) ,
sampleText , sizeof ( sampleText ) / sizeof ( sampleText [ 0 ] ) , " utf-16le " , fmUTF16LEOffs , FALSE ) ;
2000-06-22 01:18:30 +00:00
/*UTF16 BE*/
2002-12-13 04:05:50 +00:00
testConvertToU ( expectedUTF16BE , sizeof ( expectedUTF16BE ) ,
sampleText , sizeof ( sampleText ) / sizeof ( sampleText [ 0 ] ) , " utf-16be " , fmUTF16BEOffs , FALSE ) ;
2000-11-16 17:20:03 +00:00
/*UTF32 LE*/
2002-12-13 04:05:50 +00:00
testConvertToU ( expectedUTF32LE , sizeof ( expectedUTF32LE ) ,
sampleText , sizeof ( sampleText ) / sizeof ( sampleText [ 0 ] ) , " utf-32le " , fmUTF32LEOffs , FALSE ) ;
2000-11-16 17:20:03 +00:00
/*UTF32 BE*/
2002-12-13 04:05:50 +00:00
testConvertToU ( expectedUTF32BE , sizeof ( expectedUTF32BE ) ,
sampleText , sizeof ( sampleText ) / sizeof ( sampleText [ 0 ] ) , " utf-32be " , fmUTF32BEOffs , FALSE ) ;
2006-07-28 22:58:29 +00:00
# if !UCONFIG_NO_LEGACY_CONVERSION
2000-06-22 01:18:30 +00:00
/*EBCDIC_STATEFUL*/
2008-07-16 21:12:16 +00:00
testConvertToU ( expectedIBM930 , sizeof ( expectedIBM930 ) , sampleTextRoundTripUnmappable ,
sizeof ( sampleTextRoundTripUnmappable ) / sizeof ( sampleTextRoundTripUnmappable [ 0 ] ) , " ibm-930 " , fmIBM930Offs , FALSE ) ;
2000-06-22 01:18:30 +00:00
/*MBCS*/
2008-07-16 21:12:16 +00:00
testConvertToU ( expectedIBM943 , sizeof ( expectedIBM943 ) , sampleTextRoundTripUnmappable ,
sizeof ( sampleTextRoundTripUnmappable ) / sizeof ( sampleTextRoundTripUnmappable [ 0 ] ) , " ibm-943 " , fmIBM943Offs , FALSE ) ;
2006-07-28 22:58:29 +00:00
# endif
2000-06-28 17:01:52 +00:00
2000-11-16 17:20:03 +00:00
/* Try it again to make sure it still works */
2002-12-13 04:05:50 +00:00
testConvertToU ( expectedUTF16LE , sizeof ( expectedUTF16LE ) ,
sampleText , sizeof ( sampleText ) / sizeof ( sampleText [ 0 ] ) , " utf-16le " , fmUTF16LEOffs , FALSE ) ;
1999-12-04 02:31:40 +00:00
2006-07-28 22:58:29 +00:00
# if !UCONFIG_NO_LEGACY_CONVERSION
2002-12-13 04:05:50 +00:00
testConvertToU ( expectedMaltese913 , sizeof ( expectedMaltese913 ) ,
malteseUChars , sizeof ( malteseUChars ) / sizeof ( malteseUChars [ 0 ] ) , " latin3 " , NULL , FALSE ) ;
1999-12-04 02:31:40 +00:00
2002-12-13 04:05:50 +00:00
testConvertFromU ( malteseUChars , sizeof ( malteseUChars ) / sizeof ( malteseUChars [ 0 ] ) ,
expectedMaltese913 , sizeof ( expectedMaltese913 ) , " iso-8859-3 " , NULL , FALSE ) ;
2000-06-22 01:18:30 +00:00
2002-12-13 04:05:50 +00:00
/*LMBCS*/
testConvertFromU ( LMBCSUChars , sizeof ( LMBCSUChars ) / sizeof ( LMBCSUChars [ 0 ] ) ,
expectedLMBCS , sizeof ( expectedLMBCS ) , " LMBCS-1 " , toLMBCSOffs , FALSE ) ;
testConvertToU ( expectedLMBCS , sizeof ( expectedLMBCS ) ,
LMBCSUChars , sizeof ( LMBCSUChars ) / sizeof ( LMBCSUChars [ 0 ] ) , " LMBCS-1 " , fmLMBCSOffs , FALSE ) ;
2006-07-28 22:58:29 +00:00
# endif
2000-08-15 18:05:12 +00:00
2001-01-09 03:33:07 +00:00
/* UTF-7 examples are mostly from http://www.imc.org/rfc2152 */
{
/* encode directly set D and set O */
static const uint8_t utf7 [ ] = {
/*
Hi Mom - + Jjo - - !
A + ImIDkQ .
+ -
+ ZeVnLIqe
*/
0x48 , 0x69 , 0x20 , 0x4d , 0x6f , 0x6d , 0x20 , 0x2d , 0x2b , 0x4a , 0x6a , 0x6f , 0x2d , 0x2d , 0x21 ,
0x41 , 0x2b , 0x49 , 0x6d , 0x49 , 0x44 , 0x6b , 0x51 , 0x2e ,
0x2b , 0x2d ,
0x2b , 0x5a , 0x65 , 0x56 , 0x6e , 0x4c , 0x49 , 0x71 , 0x65
} ;
static const UChar unicode [ ] = {
/*
Hi Mom - < WHITE SMILING FACE > - !
A < NOT IDENTICAL TO > < ALPHA > .
+
[ Japanese word " nihongo " ]
*/
0x48 , 0x69 , 0x20 , 0x4d , 0x6f , 0x6d , 0x20 , 0x2d , 0x263a , 0x2d , 0x21 ,
0x41 , 0x2262 , 0x0391 , 0x2e ,
0x2b ,
0x65e5 , 0x672c , 0x8a9e
} ;
static const int32_t toUnicodeOffsets [ ] = {
0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 9 , 13 , 14 ,
15 , 17 , 19 , 23 ,
24 ,
27 , 29 , 32
} ;
static const int32_t fromUnicodeOffsets [ ] = {
2002-11-07 21:02:24 +00:00
0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 8 , 8 , 8 , 8 , 9 , 10 ,
2001-01-09 03:33:07 +00:00
11 , 12 , 12 , 12 , 13 , 13 , 13 , 13 , 14 ,
15 , 15 ,
16 , 16 , 16 , 17 , 17 , 17 , 18 , 18 , 18
} ;
/* same but escaping set O (the exclamation mark) */
static const uint8_t utf7Restricted [ ] = {
/*
Hi Mom - + Jjo - - + ACE -
A + ImIDkQ .
+ -
+ ZeVnLIqe
*/
0x48 , 0x69 , 0x20 , 0x4d , 0x6f , 0x6d , 0x20 , 0x2d , 0x2b , 0x4a , 0x6a , 0x6f , 0x2d , 0x2d , 0x2b , 0x41 , 0x43 , 0x45 , 0x2d ,
0x41 , 0x2b , 0x49 , 0x6d , 0x49 , 0x44 , 0x6b , 0x51 , 0x2e ,
0x2b , 0x2d ,
0x2b , 0x5a , 0x65 , 0x56 , 0x6e , 0x4c , 0x49 , 0x71 , 0x65
} ;
static const int32_t toUnicodeOffsetsR [ ] = {
0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 9 , 13 , 15 ,
19 , 21 , 23 , 27 ,
28 ,
31 , 33 , 36
} ;
static const int32_t fromUnicodeOffsetsR [ ] = {
2002-11-07 21:02:24 +00:00
0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 8 , 8 , 8 , 8 , 9 , 10 , 10 , 10 , 10 , 10 ,
2001-01-09 03:33:07 +00:00
11 , 12 , 12 , 12 , 13 , 13 , 13 , 13 , 14 ,
15 , 15 ,
16 , 16 , 16 , 17 , 17 , 17 , 18 , 18 , 18
} ;
2002-11-07 21:02:24 +00:00
testConvertFromU ( unicode , sizeof ( unicode ) / U_SIZEOF_UCHAR , utf7 , sizeof ( utf7 ) , " UTF-7 " , fromUnicodeOffsets , FALSE ) ;
2000-08-15 18:05:12 +00:00
2002-11-07 21:02:24 +00:00
testConvertToU ( utf7 , sizeof ( utf7 ) , unicode , sizeof ( unicode ) / U_SIZEOF_UCHAR , " UTF-7 " , toUnicodeOffsets , FALSE ) ;
2000-08-15 18:05:12 +00:00
2002-11-07 21:02:24 +00:00
testConvertFromU ( unicode , sizeof ( unicode ) / U_SIZEOF_UCHAR , utf7Restricted , sizeof ( utf7Restricted ) , " UTF-7,version=1 " , fromUnicodeOffsetsR , FALSE ) ;
2001-01-09 03:33:07 +00:00
2002-11-07 21:02:24 +00:00
testConvertToU ( utf7Restricted , sizeof ( utf7Restricted ) , unicode , sizeof ( unicode ) / U_SIZEOF_UCHAR , " UTF-7,version=1 " , toUnicodeOffsetsR , FALSE ) ;
}
/*
* IMAP - mailbox - name examples are mostly from http : //www.imc.org/rfc2152,
* modified according to RFC 2060 ,
* and supplemented with the one example in RFC 2060 itself .
*/
{
static const uint8_t imap [ ] = {
/* Hi Mom -&Jjo--!
A & ImIDkQ - .
& -
& ZeVnLIqe -
\
~ peter
/ mail
/ & ZeVnLIqe -
/ & U , BTFw -
*/
0x48 , 0x69 , 0x20 , 0x4d , 0x6f , 0x6d , 0x20 , 0x2d , 0x26 , 0x4a , 0x6a , 0x6f , 0x2d , 0x2d , 0x21 ,
0x41 , 0x26 , 0x49 , 0x6d , 0x49 , 0x44 , 0x6b , 0x51 , 0x2d , 0x2e ,
0x26 , 0x2d ,
0x26 , 0x5a , 0x65 , 0x56 , 0x6e , 0x4c , 0x49 , 0x71 , 0x65 , 0x2d ,
0x5c ,
0x7e , 0x70 , 0x65 , 0x74 , 0x65 , 0x72 ,
0x2f , 0x6d , 0x61 , 0x69 , 0x6c ,
0x2f , 0x26 , 0x5a , 0x65 , 0x56 , 0x6e , 0x4c , 0x49 , 0x71 , 0x65 , 0x2d ,
0x2f , 0x26 , 0x55 , 0x2c , 0x42 , 0x54 , 0x46 , 0x77 , 0x2d
} ;
static const UChar unicode [ ] = {
/* Hi Mom -<WHITE SMILING FACE>-!
A < NOT IDENTICAL TO > < ALPHA > .
&
[ Japanese word " nihongo " ]
\
~ peter
/ mail
/ < 65e5 , 672 c , 8 a9e >
/ < 53f 0 , 5317 >
*/
0x48 , 0x69 , 0x20 , 0x4d , 0x6f , 0x6d , 0x20 , 0x2d , 0x263a , 0x2d , 0x21 ,
0x41 , 0x2262 , 0x0391 , 0x2e ,
0x26 ,
0x65e5 , 0x672c , 0x8a9e ,
0x5c ,
0x7e , 0x70 , 0x65 , 0x74 , 0x65 , 0x72 ,
0x2f , 0x6d , 0x61 , 0x69 , 0x6c ,
0x2f , 0x65e5 , 0x672c , 0x8a9e ,
0x2f , 0x53f0 , 0x5317
} ;
static const int32_t toUnicodeOffsets [ ] = {
0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 9 , 13 , 14 ,
15 , 17 , 19 , 24 ,
25 ,
28 , 30 , 33 ,
37 ,
38 , 39 , 40 , 41 , 42 , 43 ,
44 , 45 , 46 , 47 , 48 ,
49 , 51 , 53 , 56 ,
60 , 62 , 64
} ;
static const int32_t fromUnicodeOffsets [ ] = {
0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 8 , 8 , 8 , 8 , 9 , 10 ,
11 , 12 , 12 , 12 , 13 , 13 , 13 , 13 , 13 , 14 ,
15 , 15 ,
16 , 16 , 16 , 17 , 17 , 17 , 18 , 18 , 18 , 18 ,
19 ,
20 , 21 , 22 , 23 , 24 , 25 ,
26 , 27 , 28 , 29 , 30 ,
31 , 32 , 32 , 32 , 33 , 33 , 33 , 34 , 34 , 34 , 34 ,
35 , 36 , 36 , 36 , 37 , 37 , 37 , 37 , 37
} ;
testConvertFromU ( unicode , sizeof ( unicode ) / U_SIZEOF_UCHAR , imap , sizeof ( imap ) , " IMAP-mailbox-name " , fromUnicodeOffsets , FALSE ) ;
testConvertToU ( imap , sizeof ( imap ) , unicode , sizeof ( unicode ) / U_SIZEOF_UCHAR , " IMAP-mailbox-name " , toUnicodeOffsets , FALSE ) ;
2001-01-09 03:33:07 +00:00
}
2001-01-26 03:05:11 +00:00
/* Test UTF-8 bad data handling*/
{
static const uint8_t utf8 [ ] = {
0x61 ,
0xf7 , 0xbf , 0xbf , 0xbf , /* > 10FFFF */
0x00 ,
0x62 ,
0xfb , 0xbf , 0xbf , 0xbf , 0xbf , /* > 10FFFF */
0xfb , 0xbf , 0xbf , 0xbf , 0xbf , /* > 10FFFF */
0xf4 , 0x8f , 0xbf , 0xbf , /* 10FFFF */
0xdf , 0xbf , /* 7ff */
0xbf , /* truncated tail */
0xf4 , 0x90 , 0x80 , 0x80 , /* 11FFFF */
0x02
} ;
static const uint16_t utf8Expected [ ] = {
0x0061 ,
0xfffd ,
0x0000 ,
0x0062 ,
0xfffd ,
0xfffd ,
0xdbff , 0xdfff ,
0x07ff ,
0xfffd ,
0xfffd ,
0x0002
} ;
static const int32_t utf8Offsets [ ] = {
0 , 1 , 5 , 6 , 7 , 12 , 17 , 17 , 21 , 23 , 24 , 28
} ;
2002-09-20 19:07:19 +00:00
testConvertToU ( utf8 , sizeof ( utf8 ) ,
utf8Expected , sizeof ( utf8Expected ) / sizeof ( utf8Expected [ 0 ] ) , " utf-8 " , utf8Offsets , FALSE ) ;
2001-01-26 03:05:11 +00:00
}
/* Test UTF-32BE bad data handling*/
{
static const uint8_t utf32 [ ] = {
0x00 , 0x00 , 0x00 , 0x61 ,
0x00 , 0x11 , 0x00 , 0x00 , /* 0x110000 out of range */
2002-08-23 17:24:45 +00:00
0x00 , 0x10 , 0xff , 0xff , /* 0x10FFFF in range */
2001-01-26 03:05:11 +00:00
0x00 , 0x00 , 0x00 , 0x62 ,
0xff , 0xff , 0xff , 0xff , /* 0xffffffff out of range */
0x7f , 0xff , 0xff , 0xff , /* 0x7fffffff out of range */
0x00 , 0x00 , 0x01 , 0x62 ,
0x00 , 0x00 , 0x02 , 0x62
} ;
static const uint16_t utf32Expected [ ] = {
0x0061 ,
0xfffd , /* 0x110000 out of range */
2002-08-23 17:24:45 +00:00
0xDBFF , /* 0x10FFFF in range */
0xDFFF ,
2001-01-26 03:05:11 +00:00
0x0062 ,
0xfffd , /* 0xffffffff out of range */
0xfffd , /* 0x7fffffff out of range */
0x0162 ,
0x0262
} ;
static const int32_t utf32Offsets [ ] = {
2002-08-23 17:24:45 +00:00
0 , 4 , 8 , 8 , 12 , 16 , 20 , 24 , 28
2001-01-26 03:05:11 +00:00
} ;
2004-12-23 21:03:30 +00:00
static const uint8_t utf32ExpectedBack [ ] = {
0x00 , 0x00 , 0x00 , 0x61 ,
0x00 , 0x00 , 0xff , 0xfd , /* 0x110000 out of range */
0x00 , 0x10 , 0xff , 0xff , /* 0x10FFFF in range */
0x00 , 0x00 , 0x00 , 0x62 ,
0x00 , 0x00 , 0xff , 0xfd , /* 0xffffffff out of range */
0x00 , 0x00 , 0xff , 0xfd , /* 0x7fffffff out of range */
0x00 , 0x00 , 0x01 , 0x62 ,
0x00 , 0x00 , 0x02 , 0x62
} ;
static const int32_t utf32OffsetsBack [ ] = {
0 , 0 , 0 , 0 ,
1 , 1 , 1 , 1 ,
2 , 2 , 2 , 2 ,
4 , 4 , 4 , 4 ,
5 , 5 , 5 , 5 ,
6 , 6 , 6 , 6 ,
7 , 7 , 7 , 7 ,
8 , 8 , 8 , 8
} ;
2002-09-20 19:07:19 +00:00
testConvertToU ( utf32 , sizeof ( utf32 ) ,
utf32Expected , sizeof ( utf32Expected ) / sizeof ( utf32Expected [ 0 ] ) , " utf-32be " , utf32Offsets , FALSE ) ;
2004-12-23 21:03:30 +00:00
testConvertFromU ( utf32Expected , sizeof ( utf32Expected ) / sizeof ( utf32Expected [ 0 ] ) ,
utf32ExpectedBack , sizeof ( utf32ExpectedBack ) , " utf-32be " , utf32OffsetsBack , FALSE ) ;
2001-01-26 03:05:11 +00:00
}
/* Test UTF-32LE bad data handling*/
{
static const uint8_t utf32 [ ] = {
0x61 , 0x00 , 0x00 , 0x00 ,
0x00 , 0x00 , 0x11 , 0x00 , /* 0x110000 out of range */
2002-08-23 17:24:45 +00:00
0xff , 0xff , 0x10 , 0x00 , /* 0x10FFFF in range */
2001-01-26 03:05:11 +00:00
0x62 , 0x00 , 0x00 , 0x00 ,
0xff , 0xff , 0xff , 0xff , /* 0xffffffff out of range */
0xff , 0xff , 0xff , 0x7f , /* 0x7fffffff out of range */
0x62 , 0x01 , 0x00 , 0x00 ,
0x62 , 0x02 , 0x00 , 0x00 ,
} ;
static const uint16_t utf32Expected [ ] = {
0x0061 ,
0xfffd , /* 0x110000 out of range */
2002-08-23 17:24:45 +00:00
0xDBFF , /* 0x10FFFF in range */
0xDFFF ,
2001-01-26 03:05:11 +00:00
0x0062 ,
0xfffd , /* 0xffffffff out of range */
0xfffd , /* 0x7fffffff out of range */
0x0162 ,
0x0262
} ;
static const int32_t utf32Offsets [ ] = {
2002-08-23 17:24:45 +00:00
0 , 4 , 8 , 8 , 12 , 16 , 20 , 24 , 28
2001-01-26 03:05:11 +00:00
} ;
2004-12-23 21:03:30 +00:00
static const uint8_t utf32ExpectedBack [ ] = {
0x61 , 0x00 , 0x00 , 0x00 ,
0xfd , 0xff , 0x00 , 0x00 , /* 0x110000 out of range */
0xff , 0xff , 0x10 , 0x00 , /* 0x10FFFF in range */
0x62 , 0x00 , 0x00 , 0x00 ,
0xfd , 0xff , 0x00 , 0x00 , /* 0xffffffff out of range */
0xfd , 0xff , 0x00 , 0x00 , /* 0x7fffffff out of range */
0x62 , 0x01 , 0x00 , 0x00 ,
0x62 , 0x02 , 0x00 , 0x00
} ;
static const int32_t utf32OffsetsBack [ ] = {
0 , 0 , 0 , 0 ,
1 , 1 , 1 , 1 ,
2 , 2 , 2 , 2 ,
4 , 4 , 4 , 4 ,
5 , 5 , 5 , 5 ,
6 , 6 , 6 , 6 ,
7 , 7 , 7 , 7 ,
8 , 8 , 8 , 8
} ;
2002-12-13 04:05:50 +00:00
testConvertToU ( utf32 , sizeof ( utf32 ) ,
utf32Expected , sizeof ( utf32Expected ) / sizeof ( utf32Expected [ 0 ] ) , " utf-32le " , utf32Offsets , FALSE ) ;
2004-12-23 21:03:30 +00:00
testConvertFromU ( utf32Expected , sizeof ( utf32Expected ) / sizeof ( utf32Expected [ 0 ] ) ,
utf32ExpectedBack , sizeof ( utf32ExpectedBack ) , " utf-32le " , utf32OffsetsBack , FALSE ) ;
2001-01-26 03:05:11 +00:00
}
2001-04-18 19:31:05 +00:00
}
2002-01-21 21:10:18 +00:00
static void TestCoverageMBCS ( ) {
2002-07-17 02:41:04 +00:00
#if 0
2002-08-28 15:55:37 +00:00
UErrorCode status = U_ZERO_ERROR ;
2002-08-22 20:48:21 +00:00
const char * directory = loadTestData ( & status ) ;
2002-07-25 18:32:04 +00:00
char * tdpath = NULL ;
2002-07-29 21:04:18 +00:00
char * saveDirectory = ( char * ) malloc ( sizeof ( char ) * ( strlen ( u_getDataDirectory ( ) ) + 1 ) ) ;
2002-07-25 18:32:04 +00:00
int len = strlen ( directory ) ;
char * index = NULL ;
2002-07-29 21:04:18 +00:00
tdpath = ( char * ) malloc ( sizeof ( char ) * ( len * 2 ) ) ;
2002-02-07 21:36:52 +00:00
uprv_strcpy ( saveDirectory , u_getDataDirectory ( ) ) ;
2002-07-29 21:04:18 +00:00
log_verbose ( " Retrieved data directory %s \n " , saveDirectory ) ;
uprv_strcpy ( tdpath , directory ) ;
index = strrchr ( tdpath , ( char ) U_FILE_SEP_CHAR ) ;
2002-02-07 21:36:52 +00:00
if ( ( unsigned int ) ( index - tdpath ) ! = ( strlen ( tdpath ) - 1 ) ) {
* ( index + 1 ) = 0 ;
2002-01-29 04:01:49 +00:00
}
2002-02-07 21:36:52 +00:00
u_setDataDirectory ( tdpath ) ;
log_verbose ( " ICU data directory is set to: %s \n " , tdpath ) ;
2002-07-17 02:41:04 +00:00
# endif
2002-01-21 21:10:18 +00:00
/*some more test to increase the code coverage in MBCS. Create an test converter from test1.ucm
which is test file for MBCS conversion with single - byte codepage data . */
{
/* MBCS with single byte codepage data test1.ucm*/
const UChar unicodeInput [ ] = { 0x20ac , 0x0005 , 0x0006 , 0xdbc4 , 0xde34 , 0x0003 } ;
const uint8_t expectedtest1 [ ] = { 0x00 , 0x05 , 0xff , 0x07 , 0xff , } ;
int32_t totest1Offs [ ] = { 0 , 1 , 2 , 3 , 5 , } ;
/*from Unicode*/
2002-12-13 04:05:50 +00:00
testConvertFromU ( unicodeInput , sizeof ( unicodeInput ) / sizeof ( unicodeInput [ 0 ] ) ,
expectedtest1 , sizeof ( expectedtest1 ) , " @test1 " , totest1Offs , FALSE ) ;
2002-01-21 21:10:18 +00:00
}
/*some more test to increase the code coverage in MBCS. Create an test converter from test3.ucm
which is test file for MBCS conversion with three - byte codepage data . */
{
/* MBCS with three byte codepage data test3.ucm*/
const UChar unicodeInput [ ] = { 0x20ac , 0x0005 , 0x0006 , 0x000b , 0xdbc4 , 0xde34 , 0xd84d , 0xdc56 , 0x000e } ;
const uint8_t expectedtest3 [ ] = { 0x00 , 0x05 , 0xff , 0x01 , 0x02 , 0x0b , 0x07 , 0x01 , 0x02 , 0x0a , 0xff , } ;
int32_t totest3Offs [ ] = { 0 , 1 , 2 , 3 , 3 , 3 , 4 , 6 , 6 , 6 , 8 } ;
const uint8_t test3input [ ] = { 0x00 , 0x05 , 0x06 , 0x01 , 0x02 , 0x0b , 0x07 , 0x01 , 0x02 , 0x0a , 0x01 , 0x02 , 0x0c , } ;
const UChar expectedUnicode [ ] = { 0x20ac , 0x0005 , 0x0006 , 0x000b , 0xdbc4 , 0xde34 , 0xd84d , 0xdc56 , 0xfffd } ;
int32_t fromtest3Offs [ ] = { 0 , 1 , 2 , 3 , 6 , 6 , 7 , 7 , 10 } ;
/*from Unicode*/
2002-12-13 04:05:50 +00:00
testConvertFromU ( unicodeInput , sizeof ( unicodeInput ) / sizeof ( unicodeInput [ 0 ] ) ,
expectedtest3 , sizeof ( expectedtest3 ) , " @test3 " , totest3Offs , FALSE ) ;
2002-01-21 21:10:18 +00:00
/*to Unicode*/
2002-12-13 04:05:50 +00:00
testConvertToU ( test3input , sizeof ( test3input ) ,
expectedUnicode , sizeof ( expectedUnicode ) / sizeof ( expectedUnicode [ 0 ] ) , " @test3 " , fromtest3Offs , FALSE ) ;
2002-01-21 21:10:18 +00:00
}
/*some more test to increase the code coverage in MBCS. Create an test converter from test4.ucm
which is test file for MBCS conversion with four - byte codepage data . */
{
/* MBCS with three byte codepage data test4.ucm*/
2002-03-28 16:51:17 +00:00
static const UChar unicodeInput [ ] = { 0x20ac , 0x0005 , 0x0006 , 0x000b , 0xdbc4 , 0xde34 , 0xd84d , 0xdc56 , 0x000e } ;
static const uint8_t expectedtest4 [ ] = { 0x00 , 0x05 , 0xff , 0x01 , 0x02 , 0x03 , 0x0b , 0x07 , 0x01 , 0x02 , 0x03 , 0x0a , 0xff , } ;
static const int32_t totest4Offs [ ] = { 0 , 1 , 2 , 3 , 3 , 3 , 3 , 4 , 6 , 6 , 6 , 6 , 8 , } ;
2002-01-21 21:10:18 +00:00
2002-03-28 16:51:17 +00:00
static const uint8_t test4input [ ] = { 0x00 , 0x05 , 0x06 , 0x01 , 0x02 , 0x03 , 0x0b , 0x07 , 0x01 , 0x02 , 0x03 , 0x0a , 0x01 , 0x02 , 0x03 , 0x0c , } ;
static const UChar expectedUnicode [ ] = { 0x20ac , 0x0005 , 0x0006 , 0x000b , 0xdbc4 , 0xde34 , 0xd84d , 0xdc56 , 0xfffd } ;
static const int32_t fromtest4Offs [ ] = { 0 , 1 , 2 , 3 , 7 , 7 , 8 , 8 , 12 , } ;
2002-01-21 21:10:18 +00:00
/*from Unicode*/
2002-12-13 04:05:50 +00:00
testConvertFromU ( unicodeInput , sizeof ( unicodeInput ) / sizeof ( unicodeInput [ 0 ] ) ,
expectedtest4 , sizeof ( expectedtest4 ) , " @test4 " , totest4Offs , FALSE ) ;
2002-01-21 21:10:18 +00:00
/*to Unicode*/
2002-12-13 04:05:50 +00:00
testConvertToU ( test4input , sizeof ( test4input ) ,
expectedUnicode , sizeof ( expectedUnicode ) / sizeof ( expectedUnicode [ 0 ] ) , " @test4 " , fromtest4Offs , FALSE ) ;
2002-01-21 21:10:18 +00:00
}
2002-07-17 02:41:04 +00:00
#if 0
2002-07-29 21:04:18 +00:00
free ( tdpath ) ;
2002-01-21 21:10:18 +00:00
/* restore the original data directory */
2002-02-07 21:36:52 +00:00
log_verbose ( " Setting the data directory to %s \n " , saveDirectory ) ;
2002-01-21 21:10:18 +00:00
u_setDataDirectory ( saveDirectory ) ;
2002-07-29 21:04:18 +00:00
free ( saveDirectory ) ;
2002-07-17 02:41:04 +00:00
# endif
2002-01-21 21:10:18 +00:00
}
2002-03-28 16:51:17 +00:00
static void TestConverterType ( const char * convName , UConverterType convType ) {
2002-03-27 23:25:18 +00:00
UConverter * myConverter ;
UErrorCode err = U_ZERO_ERROR ;
2002-07-17 02:41:04 +00:00
myConverter = my_ucnv_open ( convName , & err ) ;
2002-03-27 23:25:18 +00:00
if ( U_FAILURE ( err ) ) {
2002-09-20 19:07:19 +00:00
log_data_err ( " Failed to create an %s converter \n " , convName ) ;
2002-03-27 23:25:18 +00:00
return ;
}
else
{
if ( ucnv_getType ( myConverter ) ! = convType ) {
log_err ( " ucnv_getType Failed for %s. Got enum value 0x%X \n " ,
convName , convType ) ;
}
else {
log_verbose ( " ucnv_getType %s ok \n " , convName ) ;
}
}
ucnv_close ( myConverter ) ;
}
1999-08-16 21:50:52 +00:00
2000-11-21 04:05:39 +00:00
static void TestConverterTypesAndStarters ( )
1999-08-16 21:50:52 +00:00
{
2006-07-28 22:58:29 +00:00
# if !UCONFIG_NO_LEGACY_CONVERSION
2002-03-27 23:25:18 +00:00
UConverter * myConverter ;
2000-02-05 00:01:54 +00:00
UErrorCode err = U_ZERO_ERROR ;
2000-05-18 22:08:39 +00:00
UBool mystarters [ 256 ] ;
2001-04-18 19:31:05 +00:00
2000-08-11 03:35:25 +00:00
/* const UBool expectedKSCstarters[256] = {
2000-02-05 00:01:54 +00:00
FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE ,
FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE ,
FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE ,
FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE ,
FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE ,
FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE ,
FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE ,
FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE ,
FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE ,
FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE ,
FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE ,
FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE ,
FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE ,
FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE , FALSE ,
FALSE , FALSE , FALSE , TRUE , TRUE , TRUE , TRUE , TRUE , TRUE , TRUE ,
TRUE , TRUE , TRUE , TRUE , TRUE , TRUE , TRUE , TRUE , TRUE , TRUE ,
TRUE , TRUE , TRUE , TRUE , TRUE , TRUE , TRUE , TRUE , TRUE , TRUE ,
TRUE , TRUE , TRUE , FALSE , FALSE , TRUE , TRUE , TRUE , TRUE , TRUE ,
TRUE , TRUE , TRUE , TRUE , TRUE , TRUE , TRUE , TRUE , TRUE , TRUE ,
TRUE , TRUE , TRUE , TRUE , TRUE , TRUE , TRUE , TRUE , TRUE , TRUE ,
TRUE , TRUE , TRUE , TRUE , TRUE , TRUE , TRUE , TRUE , TRUE , TRUE ,
TRUE , TRUE , TRUE , TRUE , TRUE , TRUE , TRUE , TRUE , TRUE , TRUE ,
TRUE , TRUE , TRUE , TRUE , TRUE , TRUE , TRUE , TRUE , TRUE , TRUE ,
TRUE , TRUE , TRUE , TRUE , TRUE , TRUE , TRUE , TRUE , TRUE , TRUE ,
TRUE , TRUE , TRUE , TRUE , TRUE , TRUE , TRUE , TRUE , TRUE , TRUE ,
2000-08-11 03:35:25 +00:00
TRUE , TRUE , TRUE , TRUE , TRUE , TRUE } ; */
1999-08-16 21:50:52 +00:00
2002-03-27 23:25:18 +00:00
log_verbose ( " Testing KSC, ibm-930, ibm-878 for starters and their conversion types. " ) ;
1999-08-16 21:50:52 +00:00
2002-03-27 23:25:18 +00:00
myConverter = ucnv_open ( " ksc " , & err ) ;
2000-10-27 00:10:17 +00:00
if ( U_FAILURE ( err ) ) {
2002-09-20 19:07:19 +00:00
log_data_err ( " Failed to create an ibm-ksc converter \n " ) ;
2000-10-27 00:10:17 +00:00
return ;
}
2000-02-05 00:01:54 +00:00
else
2001-01-26 03:05:11 +00:00
{
2002-03-27 23:25:18 +00:00
if ( ucnv_getType ( myConverter ) ! = UCNV_MBCS )
2001-01-26 03:05:11 +00:00
log_err ( " ucnv_getType Failed for ibm-949 \n " ) ;
else
log_verbose ( " ucnv_getType ibm-949 ok \n " ) ;
2000-02-05 00:01:54 +00:00
2002-03-27 23:25:18 +00:00
if ( myConverter ! = NULL )
ucnv_getStarters ( myConverter , mystarters , & err ) ;
2000-02-05 00:01:54 +00:00
/*if (memcmp(expectedKSCstarters, mystarters, sizeof(expectedKSCstarters)))
log_err ( " Failed ucnv_getStarters for ksc \n " ) ;
else
log_verbose ( " ucnv_getStarters ok \n " ) ; */
2001-04-18 19:31:05 +00:00
2001-01-26 03:05:11 +00:00
}
2002-03-27 23:25:18 +00:00
ucnv_close ( myConverter ) ;
TestConverterType ( " ibm-930 " , UCNV_EBCDIC_STATEFUL ) ;
TestConverterType ( " ibm-878 " , UCNV_SBCS ) ;
2006-07-28 22:58:29 +00:00
# endif
2002-03-27 23:25:18 +00:00
TestConverterType ( " iso-8859-1 " , UCNV_LATIN_1 ) ;
2006-07-28 22:58:29 +00:00
2002-03-27 23:25:18 +00:00
TestConverterType ( " ibm-1208 " , UCNV_UTF8 ) ;
2006-07-28 22:58:29 +00:00
2002-03-27 23:25:18 +00:00
TestConverterType ( " utf-8 " , UCNV_UTF8 ) ;
TestConverterType ( " UTF-16BE " , UCNV_UTF16_BigEndian ) ;
TestConverterType ( " UTF-16LE " , UCNV_UTF16_LittleEndian ) ;
TestConverterType ( " UTF-32BE " , UCNV_UTF32_BigEndian ) ;
TestConverterType ( " UTF-32LE " , UCNV_UTF32_LittleEndian ) ;
2006-07-28 22:58:29 +00:00
# if !UCONFIG_NO_LEGACY_CONVERSION
# if defined(U_ENABLE_GENERIC_ISO_2022)
2002-03-27 23:25:18 +00:00
TestConverterType ( " iso-2022 " , UCNV_ISO_2022 ) ;
2003-12-03 22:53:14 +00:00
# endif
2006-07-28 22:58:29 +00:00
2002-03-27 23:25:18 +00:00
TestConverterType ( " hz " , UCNV_HZ ) ;
2006-07-28 22:58:29 +00:00
# endif
2002-03-27 23:25:18 +00:00
TestConverterType ( " scsu " , UCNV_SCSU ) ;
2006-07-28 22:58:29 +00:00
# if !UCONFIG_NO_LEGACY_CONVERSION
2002-03-27 23:25:18 +00:00
TestConverterType ( " x-iscii-de " , UCNV_ISCII ) ;
2006-07-28 22:58:29 +00:00
# endif
2002-03-27 23:25:18 +00:00
TestConverterType ( " ascii " , UCNV_US_ASCII ) ;
TestConverterType ( " utf-7 " , UCNV_UTF7 ) ;
2002-11-07 21:02:24 +00:00
TestConverterType ( " IMAP-mailbox-name " , UCNV_IMAP_MAILBOX ) ;
2002-05-25 00:30:31 +00:00
TestConverterType ( " bocu-1 " , UCNV_BOCU1 ) ;
1999-08-16 21:50:52 +00:00
}
2000-11-21 04:05:39 +00:00
2001-03-16 20:48:10 +00:00
static void
TestAmbiguousConverter ( UConverter * cnv ) {
2007-08-24 09:42:49 +00:00
static const char inBytes [ 3 ] = { 0x61 , 0x5B , 0x5c } ;
2001-03-16 20:48:10 +00:00
UChar outUnicode [ 20 ] = { 0 , 0 , 0 , 0 } ;
const char * s ;
UChar * u ;
UErrorCode errorCode ;
UBool isAmbiguous ;
2007-08-24 09:42:49 +00:00
/* try to convert an 'a', a square bracket and a US-ASCII backslash */
2001-03-16 20:48:10 +00:00
errorCode = U_ZERO_ERROR ;
s = inBytes ;
u = outUnicode ;
2007-08-24 09:42:49 +00:00
ucnv_toUnicode ( cnv , & u , u + 20 , & s , s + 3 , NULL , TRUE , & errorCode ) ;
2001-03-16 20:48:10 +00:00
if ( U_FAILURE ( errorCode ) ) {
/* we do not care about general failures in this test; the input may just not be mappable */
return ;
}
2007-08-24 09:42:49 +00:00
if ( outUnicode [ 0 ] ! = 0x61 | | outUnicode [ 1 ] ! = 0x5B | | outUnicode [ 2 ] = = 0xfffd ) {
/* not a close ASCII-family encoding, or 0x5c is unassigned/illegal: this test is not applicable */
/* There are some encodings that are partially ASCII based,
like the ISO - 7 and GSM series of codepages , which we ignore . */
2001-03-16 20:48:10 +00:00
return ;
}
isAmbiguous = ucnv_isAmbiguous ( cnv ) ;
/* check that outUnicode[1]!=0x5c is exactly the same as ucnv_isAmbiguous() */
2007-08-24 09:42:49 +00:00
if ( ( outUnicode [ 2 ] ! = 0x5c ) ! = isAmbiguous ) {
2001-03-16 20:48:10 +00:00
log_err ( " error: converter \" %s \" needs a backslash fix: %d but ucnv_isAmbiguous()==%d \n " ,
2007-08-24 09:42:49 +00:00
ucnv_getName ( cnv , & errorCode ) , outUnicode [ 2 ] ! = 0x5c , isAmbiguous ) ;
2001-03-16 20:48:10 +00:00
return ;
}
2007-08-24 09:42:49 +00:00
if ( outUnicode [ 2 ] ! = 0x5c ) {
2001-03-16 20:48:10 +00:00
/* needs fixup, fix it */
ucnv_fixFileSeparator ( cnv , outUnicode , ( int32_t ) ( u - outUnicode ) ) ;
2007-08-24 09:42:49 +00:00
if ( outUnicode [ 2 ] ! = 0x5c ) {
2001-03-16 20:48:10 +00:00
/* the fix failed */
log_err ( " error: ucnv_fixFileSeparator(%s) failed \n " , ucnv_getName ( cnv , & errorCode ) ) ;
return ;
}
}
}
2000-11-21 04:05:39 +00:00
static void TestAmbiguous ( )
2000-01-08 00:54:57 +00:00
{
UErrorCode status = U_ZERO_ERROR ;
2001-03-16 20:48:10 +00:00
UConverter * ascii_cnv = 0 , * sjis_cnv = 0 , * cnv ;
2004-12-23 21:03:30 +00:00
static const char target [ ] = {
2000-03-22 01:57:16 +00:00
/* "\\usr\\local\\share\\data\\icutest.txt" */
0x5c , 0x75 , 0x73 , 0x72 ,
0x5c , 0x6c , 0x6f , 0x63 , 0x61 , 0x6c ,
0x5c , 0x73 , 0x68 , 0x61 , 0x72 , 0x65 ,
0x5c , 0x64 , 0x61 , 0x74 , 0x61 ,
0x5c , 0x69 , 0x63 , 0x75 , 0x74 , 0x65 , 0x73 , 0x74 , 0x2e , 0x74 , 0x78 , 0x74 ,
0
} ;
2001-09-28 22:22:59 +00:00
UChar asciiResult [ 200 ] , sjisResult [ 200 ] ;
2009-08-26 01:02:40 +00:00
int32_t /*asciiLength = 0,*/ sjisLength = 0 , i ;
2001-03-16 20:48:10 +00:00
const char * name ;
/* enumerate all converters */
status = U_ZERO_ERROR ;
for ( i = 0 ; ( name = ucnv_getAvailableName ( i ) ) ! = NULL ; + + i ) {
cnv = ucnv_open ( name , & status ) ;
if ( U_SUCCESS ( status ) ) {
TestAmbiguousConverter ( cnv ) ;
ucnv_close ( cnv ) ;
} else {
log_err ( " error: unable to open available converter \" %s \" \n " , name ) ;
status = U_ZERO_ERROR ;
}
}
2006-07-28 22:58:29 +00:00
# if !UCONFIG_NO_LEGACY_CONVERSION
2001-03-07 20:59:31 +00:00
sjis_cnv = ucnv_open ( " ibm-943 " , & status ) ;
2000-01-08 00:54:57 +00:00
if ( U_FAILURE ( status ) )
{
2002-09-20 19:07:19 +00:00
log_data_err ( " Failed to create a SJIS converter \n " ) ;
2000-01-08 00:54:57 +00:00
return ;
}
ascii_cnv = ucnv_open ( " LATIN-1 " , & status ) ;
if ( U_FAILURE ( status ) )
{
2002-09-20 19:07:19 +00:00
log_data_err ( " Failed to create a LATIN-1 converter \n " ) ;
2000-01-08 00:54:57 +00:00
ucnv_close ( sjis_cnv ) ;
return ;
}
/* convert target from SJIS to Unicode */
2004-12-08 23:02:08 +00:00
sjisLength = ucnv_toUChars ( sjis_cnv , sjisResult , sizeof ( sjisResult ) / U_SIZEOF_UCHAR , target , ( int32_t ) strlen ( target ) , & status ) ;
2000-01-08 00:54:57 +00:00
if ( U_FAILURE ( status ) )
{
log_err ( " Failed to convert the SJIS string. \n " ) ;
ucnv_close ( sjis_cnv ) ;
ucnv_close ( ascii_cnv ) ;
return ;
}
/* convert target from Latin-1 to Unicode */
2009-08-26 01:02:40 +00:00
/*asciiLength =*/ ucnv_toUChars ( ascii_cnv , asciiResult , sizeof ( asciiResult ) / U_SIZEOF_UCHAR , target , ( int32_t ) strlen ( target ) , & status ) ;
2000-01-08 00:54:57 +00:00
if ( U_FAILURE ( status ) )
{
log_err ( " Failed to convert the Latin-1 string. \n " ) ;
ucnv_close ( sjis_cnv ) ;
ucnv_close ( ascii_cnv ) ;
return ;
2001-04-18 19:31:05 +00:00
}
2000-01-08 00:54:57 +00:00
if ( ! ucnv_isAmbiguous ( sjis_cnv ) )
{
log_err ( " SJIS converter should contain ambiguous character mappings. \n " ) ;
ucnv_close ( sjis_cnv ) ;
ucnv_close ( ascii_cnv ) ;
return ;
}
if ( u_strcmp ( sjisResult , asciiResult ) = = 0 )
{
log_err ( " File separators for SJIS don't need to be fixed. \n " ) ;
}
ucnv_fixFileSeparator ( sjis_cnv , sjisResult , sjisLength ) ;
if ( u_strcmp ( sjisResult , asciiResult ) ! = 0 )
{
log_err ( " Fixing file separator for SJIS failed. \n " ) ;
}
ucnv_close ( sjis_cnv ) ;
ucnv_close ( ascii_cnv ) ;
2006-07-28 22:58:29 +00:00
# endif
2000-01-08 00:54:57 +00:00
}
2001-04-18 19:31:05 +00:00
2002-07-29 21:04:18 +00:00
static void
2002-01-08 01:05:57 +00:00
TestSignatureDetection ( ) {
/* with null terminated strings */
{
2002-07-29 21:04:18 +00:00
static const char * data [ ] = {
2002-01-08 01:05:57 +00:00
" \xFE \xFF \x00 \x00 " , /* UTF-16BE */
" \xFF \xFE \x00 \x00 " , /* UTF-16LE */
" \xEF \xBB \xBF \x00 " , /* UTF-8 */
" \x0E \xFE \xFF \x00 " , /* SCSU */
2002-07-29 21:04:18 +00:00
2002-01-08 01:05:57 +00:00
" \xFE \xFF " , /* UTF-16BE */
" \xFF \xFE " , /* UTF-16LE */
" \xEF \xBB \xBF " , /* UTF-8 */
" \x0E \xFE \xFF " , /* SCSU */
2002-07-29 21:04:18 +00:00
2002-01-08 01:05:57 +00:00
" \xFE \xFF \x41 \x42 " , /* UTF-16BE */
" \xFF \xFE \x41 \x41 " , /* UTF-16LE */
" \xEF \xBB \xBF \x41 " , /* UTF-8 */
" \x0E \xFE \xFF \x41 " , /* SCSU */
2002-11-08 01:28:14 +00:00
" \x2B \x2F \x76 \x38 \x2D " , /* UTF-7 */
" \x2B \x2F \x76 \x38 \x41 " , /* UTF-7 */
" \x2B \x2F \x76 \x39 \x41 " , /* UTF-7 */
" \x2B \x2F \x76 \x2B \x41 " , /* UTF-7 */
2004-07-16 21:48:33 +00:00
" \x2B \x2F \x76 \x2F \x41 " , /* UTF-7 */
" \xDD \x73 \x66 \x73 " /* UTF-EBCDIC */
2002-01-08 01:05:57 +00:00
} ;
2002-02-08 04:53:41 +00:00
static const char * expected [ ] = {
2002-01-08 01:05:57 +00:00
" UTF-16BE " ,
" UTF-16LE " ,
" UTF-8 " ,
" SCSU " ,
" UTF-16BE " ,
" UTF-16LE " ,
" UTF-8 " ,
" SCSU " ,
" UTF-16BE " ,
" UTF-16LE " ,
" UTF-8 " ,
" SCSU " ,
2002-11-08 01:28:14 +00:00
" UTF-7 " ,
" UTF-7 " ,
" UTF-7 " ,
" UTF-7 " ,
2004-07-16 21:48:33 +00:00
" UTF-7 " ,
" UTF-EBCDIC "
2002-01-08 01:05:57 +00:00
} ;
2002-02-08 04:53:41 +00:00
static const int32_t expectedLength [ ] = {
2002-01-08 01:05:57 +00:00
2 ,
2 ,
3 ,
3 ,
2 ,
2 ,
3 ,
3 ,
2 ,
2 ,
3 ,
3 ,
2002-11-08 01:28:14 +00:00
5 ,
4 ,
4 ,
4 ,
2004-07-16 21:48:33 +00:00
4 ,
2002-11-08 01:28:14 +00:00
4
2002-01-08 01:05:57 +00:00
} ;
int i = 0 ;
UErrorCode err ;
int32_t signatureLength = - 1 ;
2002-02-08 04:53:41 +00:00
const char * source = NULL ;
2002-01-08 01:05:57 +00:00
const char * enc = NULL ;
for ( ; i < sizeof ( data ) / sizeof ( char * ) ; i + + ) {
err = U_ZERO_ERROR ;
source = data [ i ] ;
enc = ucnv_detectUnicodeSignature ( source , - 1 , & signatureLength , & err ) ;
if ( U_FAILURE ( err ) ) {
log_err ( " ucnv_detectUnicodeSignature failed for source : %s at index :%i. Error: %s \n " , source , i , u_errorName ( err ) ) ;
continue ;
}
if ( enc = = NULL | | strcmp ( enc , expected [ i ] ) ! = 0 ) {
log_err ( " ucnv_detectUnicodeSignature failed for source : %s at index :%i. Expected: %s. Got: %s \n " , source , i , expected [ i ] , enc ) ;
continue ;
}
if ( signatureLength ! = expectedLength [ i ] ) {
log_err ( " ucnv_detectUnicodeSignature failed for source : %s at index :%i.Expected Length: %i. Got length: %i \n " , source , i , signatureLength , expectedLength [ i ] ) ;
}
}
}
{
2002-07-29 21:04:18 +00:00
static const char * data [ ] = {
2002-01-08 01:05:57 +00:00
" \xFE \xFF \x00 " , /* UTF-16BE */
" \xFF \xFE \x00 " , /* UTF-16LE */
" \xEF \xBB \xBF \x00 " , /* UTF-8 */
" \x0E \xFE \xFF \x00 " , /* SCSU */
" \x00 \x00 \xFE \xFF " , /* UTF-32BE */
" \xFF \xFE \x00 \x00 " , /* UTF-32LE */
" \xFE \xFF " , /* UTF-16BE */
" \xFF \xFE " , /* UTF-16LE */
" \xEF \xBB \xBF " , /* UTF-8 */
" \x0E \xFE \xFF " , /* SCSU */
" \x00 \x00 \xFE \xFF " , /* UTF-32BE */
" \xFF \xFE \x00 \x00 " , /* UTF-32LE */
" \xFE \xFF \x41 \x42 " , /* UTF-16BE */
" \xFF \xFE \x41 \x41 " , /* UTF-16LE */
" \xEF \xBB \xBF \x41 " , /* UTF-8 */
" \x0E \xFE \xFF \x41 " , /* SCSU */
" \x00 \x00 \xFE \xFF \x41 " , /* UTF-32BE */
" \xFF \xFE \x00 \x00 \x42 " , /* UTF-32LE */
2002-05-25 00:30:31 +00:00
" \xFB \xEE \x28 " , /* BOCU-1 */
2002-01-08 01:05:57 +00:00
" \xFF \x41 \x42 " /* NULL */
} ;
2002-02-08 04:53:41 +00:00
static const int len [ ] = {
2002-01-08 01:05:57 +00:00
3 ,
3 ,
4 ,
4 ,
4 ,
4 ,
2 ,
2 ,
3 ,
3 ,
4 ,
4 ,
4 ,
4 ,
4 ,
4 ,
5 ,
5 ,
2002-05-25 00:30:31 +00:00
3 ,
2002-01-08 01:05:57 +00:00
3
} ;
2002-02-08 04:53:41 +00:00
static const char * expected [ ] = {
2002-01-08 01:05:57 +00:00
" UTF-16BE " ,
" UTF-16LE " ,
" UTF-8 " ,
" SCSU " ,
" UTF-32BE " ,
" UTF-32LE " ,
" UTF-16BE " ,
" UTF-16LE " ,
" UTF-8 " ,
" SCSU " ,
" UTF-32BE " ,
" UTF-32LE " ,
" UTF-16BE " ,
" UTF-16LE " ,
" UTF-8 " ,
" SCSU " ,
" UTF-32BE " ,
" UTF-32LE " ,
2002-05-25 00:30:31 +00:00
" BOCU-1 " ,
2002-01-08 01:05:57 +00:00
NULL
} ;
2002-02-08 04:53:41 +00:00
static const int32_t expectedLength [ ] = {
2002-01-08 01:05:57 +00:00
2 ,
2 ,
3 ,
3 ,
4 ,
4 ,
2 ,
2 ,
3 ,
3 ,
4 ,
4 ,
2 ,
2 ,
3 ,
3 ,
4 ,
4 ,
2002-05-25 00:30:31 +00:00
3 ,
2002-01-08 01:05:57 +00:00
0
} ;
int i = 0 ;
UErrorCode err ;
int32_t signatureLength = - 1 ;
int32_t sourceLength = - 1 ;
2002-02-08 04:53:41 +00:00
const char * source = NULL ;
2002-01-08 01:05:57 +00:00
const char * enc = NULL ;
for ( ; i < sizeof ( data ) / sizeof ( char * ) ; i + + ) {
err = U_ZERO_ERROR ;
source = data [ i ] ;
sourceLength = len [ i ] ;
enc = ucnv_detectUnicodeSignature ( source , sourceLength , & signatureLength , & err ) ;
if ( U_FAILURE ( err ) ) {
log_err ( " ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Error: %s \n " , source , i , u_errorName ( err ) ) ;
continue ;
}
if ( enc = = NULL | | strcmp ( enc , expected [ i ] ) ! = 0 ) {
if ( expected [ i ] ! = NULL ) {
log_err ( " ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Expected: %s. Got: %s \n " , source , i , expected [ i ] , enc ) ;
continue ;
}
}
if ( signatureLength ! = expectedLength [ i ] ) {
log_err ( " ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i.Expected Length: %i. Got length: %i \n " , source , i , signatureLength , expectedLength [ i ] ) ;
}
}
}
}
2009-08-26 01:02:40 +00:00
static void TestUTF7 ( ) {
2001-01-09 22:57:47 +00:00
/* test input */
static const uint8_t in [ ] = {
/* H - +Jjo- - ! +- +2AHcAQ */
0x48 ,
0x2d ,
0x2b , 0x4a , 0x6a , 0x6f ,
0x2d , 0x2d ,
0x21 ,
0x2b , 0x2d ,
0x2b , 0x32 , 0x41 , 0x48 , 0x63 , 0x41 , 0x51
} ;
/* expected test results */
2003-08-01 14:30:29 +00:00
static const int32_t results [ ] = {
2001-01-09 22:57:47 +00:00
/* number of bytes read, code point */
1 , 0x48 ,
1 , 0x2d ,
4 , 0x263a , /* <WHITE SMILING FACE> */
2 , 0x2d ,
1 , 0x21 ,
2 , 0x2b ,
7 , 0x10401
} ;
2001-11-07 01:03:53 +00:00
const char * cnvName ;
2001-01-09 22:57:47 +00:00
const char * source = ( const char * ) in , * limit = ( const char * ) in + sizeof ( in ) ;
UErrorCode errorCode = U_ZERO_ERROR ;
UConverter * cnv = ucnv_open ( " UTF-7 " , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
2002-09-20 19:07:19 +00:00
log_err ( " Unable to open a UTF-7 converter: %s \n " , u_errorName ( errorCode ) ) ; /* sholdn't be a data err */
2001-01-09 22:57:47 +00:00
return ;
}
TestNextUChar ( cnv , source , limit , results , " UTF-7 " ) ;
/* Test the condition when source >= sourceLimit */
TestNextUCharError ( cnv , source , source , U_INDEX_OUTOFBOUNDS_ERROR , " sourceLimit <= source " ) ;
2001-11-07 01:03:53 +00:00
cnvName = ucnv_getName ( cnv , & errorCode ) ;
if ( U_FAILURE ( errorCode ) | | uprv_strcmp ( cnvName , " UTF-7 " ) ! = 0 ) {
log_err ( " UTF-7 converter is called %s: %s \n " , cnvName , u_errorName ( errorCode ) ) ;
}
2001-01-09 22:57:47 +00:00
ucnv_close ( cnv ) ;
}
2009-08-26 01:02:40 +00:00
static void TestIMAP ( ) {
2002-11-07 21:02:24 +00:00
/* test input */
static const uint8_t in [ ] = {
/* H - &Jjo- - ! &- &2AHcAQ- \ */
0x48 ,
0x2d ,
0x26 , 0x4a , 0x6a , 0x6f ,
0x2d , 0x2d ,
0x21 ,
0x26 , 0x2d ,
0x26 , 0x32 , 0x41 , 0x48 , 0x63 , 0x41 , 0x51 , 0x2d
} ;
/* expected test results */
2003-08-01 14:30:29 +00:00
static const int32_t results [ ] = {
2002-11-07 21:02:24 +00:00
/* number of bytes read, code point */
1 , 0x48 ,
1 , 0x2d ,
4 , 0x263a , /* <WHITE SMILING FACE> */
2 , 0x2d ,
1 , 0x21 ,
2 , 0x26 ,
7 , 0x10401
} ;
const char * cnvName ;
const char * source = ( const char * ) in , * limit = ( const char * ) in + sizeof ( in ) ;
UErrorCode errorCode = U_ZERO_ERROR ;
UConverter * cnv = ucnv_open ( " IMAP-mailbox-name " , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
log_err ( " Unable to open a IMAP-mailbox-name converter: %s \n " , u_errorName ( errorCode ) ) ; /* sholdn't be a data err */
return ;
}
TestNextUChar ( cnv , source , limit , results , " IMAP-mailbox-name " ) ;
/* Test the condition when source >= sourceLimit */
TestNextUCharError ( cnv , source , source , U_INDEX_OUTOFBOUNDS_ERROR , " sourceLimit <= source " ) ;
cnvName = ucnv_getName ( cnv , & errorCode ) ;
if ( U_FAILURE ( errorCode ) | | uprv_strcmp ( cnvName , " IMAP-mailbox-name " ) ! = 0 ) {
log_err ( " IMAP-mailbox-name converter is called %s: %s \n " , cnvName , u_errorName ( errorCode ) ) ;
}
ucnv_close ( cnv ) ;
}
2009-08-26 01:02:40 +00:00
static void TestUTF8 ( ) {
2000-06-02 00:04:34 +00:00
/* test input */
static const uint8_t in [ ] = {
2000-06-22 01:18:30 +00:00
0x61 ,
2001-10-12 18:54:09 +00:00
0xc2 , 0x80 ,
0xe0 , 0xa0 , 0x80 ,
0xf0 , 0x90 , 0x80 , 0x80 ,
2000-07-19 20:14:27 +00:00
0xf4 , 0x84 , 0x8c , 0xa1 ,
2000-12-20 00:02:15 +00:00
0xf0 , 0x90 , 0x90 , 0x81
2000-06-02 00:04:34 +00:00
} ;
/* expected test results */
2003-08-01 14:30:29 +00:00
static const int32_t results [ ] = {
2000-06-02 00:04:34 +00:00
/* number of bytes read, code point */
1 , 0x61 ,
2001-10-12 18:54:09 +00:00
2 , 0x80 ,
3 , 0x800 ,
4 , 0x10000 ,
2000-07-19 20:14:27 +00:00
4 , 0x104321 ,
2000-12-20 00:02:15 +00:00
4 , 0x10401
2000-06-22 01:18:30 +00:00
} ;
2001-01-10 02:20:30 +00:00
/* error test input */
static const uint8_t in2 [ ] = {
0x61 ,
2001-10-12 18:54:09 +00:00
0xc0 , 0x80 , /* illegal non-shortest form */
0xe0 , 0x80 , 0x80 , /* illegal non-shortest form */
0xf0 , 0x80 , 0x80 , 0x80 , /* illegal non-shortest form */
2001-01-10 02:20:30 +00:00
0xc0 , 0xc0 , /* illegal trail byte */
0xf4 , 0x90 , 0x80 , 0x80 , /* 0x110000 out of range */
0xf8 , 0x80 , 0x80 , 0x80 , 0x80 , /* too long */
2001-10-12 18:54:09 +00:00
0xfe , /* illegal byte altogether */
2001-01-10 02:20:30 +00:00
0x62
} ;
/* expected error test results */
2003-08-01 14:30:29 +00:00
static const int32_t results2 [ ] = {
2001-01-10 02:20:30 +00:00
/* number of bytes read, code point */
1 , 0x61 ,
2001-10-12 18:54:09 +00:00
22 , 0x62
2001-01-10 02:20:30 +00:00
} ;
UConverterToUCallback cb ;
2001-06-26 22:51:14 +00:00
const void * p ;
2001-01-10 02:20:30 +00:00
2000-06-22 01:18:30 +00:00
const char * source = ( const char * ) in , * limit = ( const char * ) in + sizeof ( in ) ;
UErrorCode errorCode = U_ZERO_ERROR ;
UConverter * cnv = ucnv_open ( " UTF-8 " , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
log_err ( " Unable to open a UTF-8 converter: %s \n " , u_errorName ( errorCode ) ) ;
2000-10-27 00:10:17 +00:00
return ;
2000-06-22 01:18:30 +00:00
}
TestNextUChar ( cnv , source , limit , results , " UTF-8 " ) ;
2000-12-20 00:02:15 +00:00
/* Test the condition when source >= sourceLimit */
TestNextUCharError ( cnv , source , source , U_INDEX_OUTOFBOUNDS_ERROR , " sourceLimit <= source " ) ;
2001-01-10 02:20:30 +00:00
/* test error behavior with a skip callback */
ucnv_setToUCallBack ( cnv , UCNV_TO_U_CALLBACK_SKIP , NULL , & cb , & p , & errorCode ) ;
2001-01-23 19:43:21 +00:00
source = ( const char * ) in2 ;
limit = ( const char * ) ( in2 + sizeof ( in2 ) ) ;
2001-01-10 02:20:30 +00:00
TestNextUChar ( cnv , source , limit , results2 , " UTF-8 " ) ;
2000-06-22 01:18:30 +00:00
ucnv_close ( cnv ) ;
}
2000-11-21 04:05:39 +00:00
2009-08-26 01:02:40 +00:00
static void TestCESU8 ( ) {
2002-07-02 22:52:30 +00:00
/* test input */
static const uint8_t in [ ] = {
0x61 ,
0xc2 , 0x80 ,
0xe0 , 0xa0 , 0x80 ,
0xed , 0xa0 , 0x80 , 0xed , 0xb0 , 0x80 ,
0xed , 0xb0 , 0x81 , 0xed , 0xa0 , 0x82 ,
0xed , 0xaf , 0xbf , 0xed , 0xbf , 0xbf ,
0xef , 0xbf , 0xbc
} ;
/* expected test results */
2003-08-01 14:30:29 +00:00
static const int32_t results [ ] = {
2002-07-02 22:52:30 +00:00
/* number of bytes read, code point */
1 , 0x61 ,
2 , 0x80 ,
3 , 0x800 ,
6 , 0x10000 ,
3 , 0xdc01 ,
2003-08-01 14:30:29 +00:00
- 1 , 0xd802 , /* may read 3 or 6 bytes */
- 1 , 0x10ffff , /* may read 0 or 3 bytes */
2002-07-02 22:52:30 +00:00
3 , 0xfffc
} ;
/* error test input */
static const uint8_t in2 [ ] = {
0x61 ,
0xc0 , 0x80 , /* illegal non-shortest form */
0xe0 , 0x80 , 0x80 , /* illegal non-shortest form */
0xf0 , 0x80 , 0x80 , 0x80 , /* illegal non-shortest form */
0xc0 , 0xc0 , /* illegal trail byte */
0xf0 , 0x90 , 0x80 , 0x80 , /* illegal 4-byte supplementary code point */
0xf4 , 0x84 , 0x8c , 0xa1 , /* illegal 4-byte supplementary code point */
0xf0 , 0x90 , 0x90 , 0x81 , /* illegal 4-byte supplementary code point */
0xf4 , 0x90 , 0x80 , 0x80 , /* 0x110000 out of range */
0xf8 , 0x80 , 0x80 , 0x80 , 0x80 , /* too long */
0xfe , /* illegal byte altogether */
0x62
} ;
/* expected error test results */
2003-08-01 14:30:29 +00:00
static const int32_t results2 [ ] = {
2002-07-02 22:52:30 +00:00
/* number of bytes read, code point */
1 , 0x61 ,
34 , 0x62
} ;
UConverterToUCallback cb ;
const void * p ;
const char * source = ( const char * ) in , * limit = ( const char * ) in + sizeof ( in ) ;
UErrorCode errorCode = U_ZERO_ERROR ;
UConverter * cnv = ucnv_open ( " CESU-8 " , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
log_err ( " Unable to open a CESU-8 converter: %s \n " , u_errorName ( errorCode ) ) ;
return ;
}
TestNextUChar ( cnv , source , limit , results , " CESU-8 " ) ;
/* Test the condition when source >= sourceLimit */
TestNextUCharError ( cnv , source , source , U_INDEX_OUTOFBOUNDS_ERROR , " sourceLimit <= source " ) ;
/* test error behavior with a skip callback */
ucnv_setToUCallBack ( cnv , UCNV_TO_U_CALLBACK_SKIP , NULL , & cb , & p , & errorCode ) ;
source = ( const char * ) in2 ;
limit = ( const char * ) ( in2 + sizeof ( in2 ) ) ;
TestNextUChar ( cnv , source , limit , results2 , " CESU-8 " ) ;
ucnv_close ( cnv ) ;
}
2009-08-26 01:02:40 +00:00
static void TestUTF16 ( ) {
2002-06-11 17:59:45 +00:00
/* test input */
static const uint8_t in1 [ ] = {
0xfe , 0xff , 0x4e , 0x00 , 0xfe , 0xff
} ;
static const uint8_t in2 [ ] = {
0xff , 0xfe , 0x4e , 0x00 , 0xfe , 0xff
} ;
static const uint8_t in3 [ ] = {
0xfe , 0xfe , 0x4e , 0x00 , 0xfe , 0xff , 0xd8 , 0x40 , 0xdc , 0x01
} ;
/* expected test results */
2003-08-01 14:30:29 +00:00
static const int32_t results1 [ ] = {
2002-06-11 17:59:45 +00:00
/* number of bytes read, code point */
4 , 0x4e00 ,
2 , 0xfeff
} ;
2003-08-01 14:30:29 +00:00
static const int32_t results2 [ ] = {
2002-06-11 17:59:45 +00:00
/* number of bytes read, code point */
4 , 0x004e ,
2 , 0xfffe
} ;
2003-08-01 14:30:29 +00:00
static const int32_t results3 [ ] = {
2002-06-11 17:59:45 +00:00
/* number of bytes read, code point */
2 , 0xfefe ,
2 , 0x4e00 ,
2 , 0xfeff ,
4 , 0x20001
} ;
const char * source , * limit ;
UErrorCode errorCode = U_ZERO_ERROR ;
UConverter * cnv = ucnv_open ( " UTF-16 " , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
log_err ( " Unable to open a UTF-16 converter: %s \n " , u_errorName ( errorCode ) ) ;
return ;
}
source = ( const char * ) in1 , limit = ( const char * ) in1 + sizeof ( in1 ) ;
TestNextUChar ( cnv , source , limit , results1 , " UTF-16 " ) ;
source = ( const char * ) in2 , limit = ( const char * ) in2 + sizeof ( in2 ) ;
ucnv_resetToUnicode ( cnv ) ;
TestNextUChar ( cnv , source , limit , results2 , " UTF-16 " ) ;
source = ( const char * ) in3 , limit = ( const char * ) in3 + sizeof ( in3 ) ;
ucnv_resetToUnicode ( cnv ) ;
TestNextUChar ( cnv , source , limit , results3 , " UTF-16 " ) ;
/* Test the condition when source >= sourceLimit */
ucnv_resetToUnicode ( cnv ) ;
TestNextUCharError ( cnv , source , source , U_INDEX_OUTOFBOUNDS_ERROR , " sourceLimit <= source " ) ;
ucnv_close ( cnv ) ;
}
2009-08-26 01:02:40 +00:00
static void TestUTF16BE ( ) {
2000-06-22 01:18:30 +00:00
/* test input */
static const uint8_t in [ ] = {
2001-04-18 19:31:05 +00:00
0x00 , 0x61 ,
0x00 , 0xc0 ,
0x00 , 0x31 ,
0x00 , 0xf4 ,
2000-06-22 01:18:30 +00:00
0xce , 0xfe ,
2000-12-20 00:02:15 +00:00
0xd8 , 0x01 , 0xdc , 0x01
2000-06-02 00:04:34 +00:00
} ;
2000-06-22 01:18:30 +00:00
/* expected test results */
2003-08-01 14:30:29 +00:00
static const int32_t results [ ] = {
2000-06-22 01:18:30 +00:00
/* number of bytes read, code point */
2 , 0x61 ,
2 , 0xc0 ,
2001-04-18 19:31:05 +00:00
2 , 0x31 ,
2000-06-22 01:18:30 +00:00
2 , 0xf4 ,
2000-07-18 18:04:50 +00:00
2 , 0xcefe ,
2000-12-20 00:02:15 +00:00
4 , 0x10401
2000-06-22 01:18:30 +00:00
} ;
2000-06-02 00:04:34 +00:00
2000-06-22 01:18:30 +00:00
const char * source = ( const char * ) in , * limit = ( const char * ) in + sizeof ( in ) ;
2000-06-02 00:04:34 +00:00
UErrorCode errorCode = U_ZERO_ERROR ;
2000-06-22 01:18:30 +00:00
UConverter * cnv = ucnv_open ( " utf-16be " , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
log_err ( " Unable to open a UTF16-BE converter: %s \n " , u_errorName ( errorCode ) ) ;
2000-10-27 00:10:17 +00:00
return ;
2000-06-22 01:18:30 +00:00
}
TestNextUChar ( cnv , source , limit , results , " UTF-16BE " ) ;
2000-12-20 00:02:15 +00:00
/* Test the condition when source >= sourceLimit */
TestNextUCharError ( cnv , source , source , U_INDEX_OUTOFBOUNDS_ERROR , " sourceLimit <= source " ) ;
2000-06-22 23:46:02 +00:00
/*Test for the condition where there is an invalid character*/
{
static const uint8_t source2 [ ] = { 0x61 } ;
2003-08-01 14:30:29 +00:00
ucnv_setToUCallBack ( cnv , UCNV_TO_U_CALLBACK_STOP , NULL , NULL , NULL , & errorCode ) ;
2000-06-22 23:46:02 +00:00
TestNextUCharError ( cnv , ( const char * ) source2 , ( const char * ) source2 + sizeof ( source2 ) , U_TRUNCATED_CHAR_FOUND , " an invalid character " ) ;
}
2003-08-01 14:30:29 +00:00
#if 0
/*
* Test disabled because currently the UTF - 16 BE / LE converters are supposed
* to not set errors for unpaired surrogates .
* This may change with
* Jitterbug 1838 - forbid converting surrogate code points in UTF - 16 / 32
*/
2000-06-22 23:46:02 +00:00
/*Test for the condition where there is a surrogate pair*/
{
2000-07-18 18:04:50 +00:00
const uint8_t source2 [ ] = { 0xd8 , 0x01 } ;
2001-04-18 19:31:05 +00:00
TestNextUCharError ( cnv , ( const char * ) source2 , ( const char * ) source2 + sizeof ( source2 ) , U_TRUNCATED_CHAR_FOUND , " an truncated surrogate character " ) ;
2000-06-22 23:46:02 +00:00
}
2003-08-01 14:30:29 +00:00
# endif
2000-06-22 01:18:30 +00:00
ucnv_close ( cnv ) ;
}
2000-11-21 04:05:39 +00:00
static void
2000-06-22 01:18:30 +00:00
TestUTF16LE ( ) {
/* test input */
static const uint8_t in [ ] = {
0x61 , 0x00 ,
0x31 , 0x00 ,
2001-04-18 19:31:05 +00:00
0x4e , 0x2e ,
2000-06-22 01:18:30 +00:00
0x4e , 0x00 ,
2000-12-20 00:02:15 +00:00
0x01 , 0xd8 , 0x01 , 0xdc
2000-06-22 01:18:30 +00:00
} ;
2000-06-02 00:04:34 +00:00
2000-06-22 01:18:30 +00:00
/* expected test results */
2003-08-01 14:30:29 +00:00
static const int32_t results [ ] = {
2000-06-22 01:18:30 +00:00
/* number of bytes read, code point */
2 , 0x61 ,
2 , 0x31 ,
2 , 0x2e4e ,
2 , 0x4e ,
2000-12-20 00:02:15 +00:00
4 , 0x10401
2000-06-22 01:18:30 +00:00
} ;
const char * source = ( const char * ) in , * limit = ( const char * ) in + sizeof ( in ) ;
UErrorCode errorCode = U_ZERO_ERROR ;
UConverter * cnv = ucnv_open ( " utf-16le " , & errorCode ) ;
2000-06-02 00:04:34 +00:00
if ( U_FAILURE ( errorCode ) ) {
2000-06-22 01:18:30 +00:00
log_err ( " Unable to open a UTF16-LE converter: %s \n " , u_errorName ( errorCode ) ) ;
2000-10-27 00:10:17 +00:00
return ;
2000-06-02 00:04:34 +00:00
}
2000-06-22 01:18:30 +00:00
TestNextUChar ( cnv , source , limit , results , " UTF-16LE " ) ;
2000-12-20 00:02:15 +00:00
/* Test the condition when source >= sourceLimit */
TestNextUCharError ( cnv , source , source , U_INDEX_OUTOFBOUNDS_ERROR , " sourceLimit <= source " ) ;
2000-06-22 23:46:02 +00:00
/*Test for the condition where there is an invalid character*/
{
static const uint8_t source2 [ ] = { 0x61 } ;
2003-08-01 14:30:29 +00:00
ucnv_setToUCallBack ( cnv , UCNV_TO_U_CALLBACK_STOP , NULL , NULL , NULL , & errorCode ) ;
2000-06-22 23:46:02 +00:00
TestNextUCharError ( cnv , ( const char * ) source2 , ( const char * ) source2 + sizeof ( source2 ) , U_TRUNCATED_CHAR_FOUND , " an invalid character " ) ;
}
2003-08-01 14:30:29 +00:00
#if 0
/*
* Test disabled because currently the UTF - 16 BE / LE converters are supposed
* to not set errors for unpaired surrogates .
* This may change with
* Jitterbug 1838 - forbid converting surrogate code points in UTF - 16 / 32
*/
2000-07-18 18:04:50 +00:00
/*Test for the condition where there is a surrogate character*/
{
static const uint8_t source2 [ ] = { 0x01 , 0xd8 } ;
TestNextUCharError ( cnv , ( const char * ) source2 , ( const char * ) source2 + sizeof ( source2 ) , U_TRUNCATED_CHAR_FOUND , " an truncated surrogate character " ) ;
}
2003-08-01 14:30:29 +00:00
# endif
2001-04-18 19:31:05 +00:00
2000-06-22 01:18:30 +00:00
ucnv_close ( cnv ) ;
}
2000-11-21 04:05:39 +00:00
2009-08-26 01:02:40 +00:00
static void TestUTF32 ( ) {
2002-06-11 17:59:45 +00:00
/* test input */
static const uint8_t in1 [ ] = {
0x00 , 0x00 , 0xfe , 0xff , 0x00 , 0x10 , 0x0f , 0x00 , 0x00 , 0x00 , 0xfe , 0xff
} ;
static const uint8_t in2 [ ] = {
0xff , 0xfe , 0x00 , 0x00 , 0x00 , 0x10 , 0x0f , 0x00 , 0xfe , 0xff , 0x00 , 0x00
} ;
static const uint8_t in3 [ ] = {
0x00 , 0x00 , 0xfe , 0xfe , 0x00 , 0x10 , 0x0f , 0x00 , 0x00 , 0x00 , 0xd8 , 0x40 , 0x00 , 0x00 , 0xdc , 0x01
} ;
/* expected test results */
2003-08-01 14:30:29 +00:00
static const int32_t results1 [ ] = {
2002-06-11 17:59:45 +00:00
/* number of bytes read, code point */
8 , 0x100f00 ,
4 , 0xfeff
} ;
2003-08-01 14:30:29 +00:00
static const int32_t results2 [ ] = {
2002-06-11 17:59:45 +00:00
/* number of bytes read, code point */
8 , 0x0f1000 ,
4 , 0xfffe
} ;
2003-08-01 14:30:29 +00:00
static const int32_t results3 [ ] = {
2002-06-11 17:59:45 +00:00
/* number of bytes read, code point */
4 , 0xfefe ,
4 , 0x100f00 ,
2003-10-02 17:02:14 +00:00
4 , 0xfffd , /* unmatched surrogate */
4 , 0xfffd /* unmatched surrogate */
2002-06-11 17:59:45 +00:00
} ;
const char * source , * limit ;
UErrorCode errorCode = U_ZERO_ERROR ;
UConverter * cnv = ucnv_open ( " UTF-32 " , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
log_err ( " Unable to open a UTF-32 converter: %s \n " , u_errorName ( errorCode ) ) ;
return ;
}
source = ( const char * ) in1 , limit = ( const char * ) in1 + sizeof ( in1 ) ;
TestNextUChar ( cnv , source , limit , results1 , " UTF-32 " ) ;
source = ( const char * ) in2 , limit = ( const char * ) in2 + sizeof ( in2 ) ;
ucnv_resetToUnicode ( cnv ) ;
TestNextUChar ( cnv , source , limit , results2 , " UTF-32 " ) ;
source = ( const char * ) in3 , limit = ( const char * ) in3 + sizeof ( in3 ) ;
ucnv_resetToUnicode ( cnv ) ;
TestNextUChar ( cnv , source , limit , results3 , " UTF-32 " ) ;
/* Test the condition when source >= sourceLimit */
ucnv_resetToUnicode ( cnv ) ;
TestNextUCharError ( cnv , source , source , U_INDEX_OUTOFBOUNDS_ERROR , " sourceLimit <= source " ) ;
ucnv_close ( cnv ) ;
}
2000-12-20 00:02:15 +00:00
static void
TestUTF32BE ( ) {
/* test input */
static const uint8_t in [ ] = {
0x00 , 0x00 , 0x00 , 0x61 ,
2003-10-02 17:02:14 +00:00
0x00 , 0x00 , 0x30 , 0x61 ,
2001-01-25 20:14:08 +00:00
0x00 , 0x00 , 0xdc , 0x00 ,
2000-12-20 00:02:15 +00:00
0x00 , 0x00 , 0xd8 , 0x00 ,
0x00 , 0x00 , 0xdf , 0xff ,
2003-10-02 17:02:14 +00:00
0x00 , 0x00 , 0xff , 0xfe ,
2001-01-25 20:14:08 +00:00
0x00 , 0x10 , 0xab , 0xcd ,
0x00 , 0x10 , 0xff , 0xff
2000-12-20 00:02:15 +00:00
} ;
/* expected test results */
2003-08-01 14:30:29 +00:00
static const int32_t results [ ] = {
2000-12-20 00:02:15 +00:00
/* number of bytes read, code point */
4 , 0x61 ,
2003-10-02 17:02:14 +00:00
4 , 0x3061 ,
2000-12-20 00:02:15 +00:00
4 , 0xfffd ,
2003-10-02 17:02:14 +00:00
4 , 0xfffd ,
4 , 0xfffd ,
4 , 0xfffe ,
2001-01-25 20:14:08 +00:00
4 , 0x10abcd ,
4 , 0x10ffff
2000-12-20 00:02:15 +00:00
} ;
2001-01-10 02:20:30 +00:00
/* error test input */
static const uint8_t in2 [ ] = {
0x00 , 0x00 , 0x00 , 0x61 ,
0x00 , 0x11 , 0x00 , 0x00 , /* 0x110000 out of range */
2001-01-25 20:14:08 +00:00
0x00 , 0x00 , 0x00 , 0x62 ,
0xff , 0xff , 0xff , 0xff , /* 0xffffffff out of range */
2001-01-25 20:19:43 +00:00
0x7f , 0xff , 0xff , 0xff , /* 0x7fffffff out of range */
2001-01-25 20:14:08 +00:00
0x00 , 0x00 , 0x01 , 0x62 ,
0x00 , 0x00 , 0x02 , 0x62
2001-01-10 02:20:30 +00:00
} ;
/* expected error test results */
2003-08-01 14:30:29 +00:00
static const int32_t results2 [ ] = {
2001-01-10 02:20:30 +00:00
/* number of bytes read, code point */
2001-01-25 20:19:43 +00:00
4 , 0x61 ,
8 , 0x62 ,
12 , 0x162 ,
4 , 0x262
2001-01-10 02:20:30 +00:00
} ;
UConverterToUCallback cb ;
2001-06-26 22:51:14 +00:00
const void * p ;
2001-01-10 02:20:30 +00:00
2000-12-20 00:02:15 +00:00
const char * source = ( const char * ) in , * limit = ( const char * ) in + sizeof ( in ) ;
UErrorCode errorCode = U_ZERO_ERROR ;
UConverter * cnv = ucnv_open ( " UTF-32BE " , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
log_err ( " Unable to open a UTF-32BE converter: %s \n " , u_errorName ( errorCode ) ) ;
return ;
}
TestNextUChar ( cnv , source , limit , results , " UTF-32BE " ) ;
/* Test the condition when source >= sourceLimit */
TestNextUCharError ( cnv , source , source , U_INDEX_OUTOFBOUNDS_ERROR , " sourceLimit <= source " ) ;
2001-01-10 02:20:30 +00:00
/* test error behavior with a skip callback */
ucnv_setToUCallBack ( cnv , UCNV_TO_U_CALLBACK_SKIP , NULL , & cb , & p , & errorCode ) ;
2001-01-23 19:43:21 +00:00
source = ( const char * ) in2 ;
limit = ( const char * ) ( in2 + sizeof ( in2 ) ) ;
2001-01-10 02:20:30 +00:00
TestNextUChar ( cnv , source , limit , results2 , " UTF-32BE " ) ;
2000-12-20 00:02:15 +00:00
ucnv_close ( cnv ) ;
}
static void
TestUTF32LE ( ) {
/* test input */
static const uint8_t in [ ] = {
0x61 , 0x00 , 0x00 , 0x00 ,
2003-10-02 17:02:14 +00:00
0x61 , 0x30 , 0x00 , 0x00 ,
2001-01-25 20:14:08 +00:00
0x00 , 0xdc , 0x00 , 0x00 ,
2000-12-20 00:02:15 +00:00
0x00 , 0xd8 , 0x00 , 0x00 ,
0xff , 0xdf , 0x00 , 0x00 ,
2003-10-02 17:02:14 +00:00
0xfe , 0xff , 0x00 , 0x00 ,
2001-01-25 20:14:08 +00:00
0xcd , 0xab , 0x10 , 0x00 ,
0xff , 0xff , 0x10 , 0x00
2000-12-20 00:02:15 +00:00
} ;
/* expected test results */
2003-08-01 14:30:29 +00:00
static const int32_t results [ ] = {
2000-12-20 00:02:15 +00:00
/* number of bytes read, code point */
4 , 0x61 ,
2003-10-02 17:02:14 +00:00
4 , 0x3061 ,
4 , 0xfffd ,
4 , 0xfffd ,
2000-12-20 00:02:15 +00:00
4 , 0xfffd ,
2003-10-02 17:02:14 +00:00
4 , 0xfffe ,
2001-01-25 20:14:08 +00:00
4 , 0x10abcd ,
4 , 0x10ffff
2000-12-20 00:02:15 +00:00
} ;
2001-01-10 02:20:30 +00:00
/* error test input */
static const uint8_t in2 [ ] = {
0x61 , 0x00 , 0x00 , 0x00 ,
0x00 , 0x00 , 0x11 , 0x00 , /* 0x110000 out of range */
2001-01-25 20:14:08 +00:00
0x62 , 0x00 , 0x00 , 0x00 ,
0xff , 0xff , 0xff , 0xff , /* 0xffffffff out of range */
2001-01-25 20:19:43 +00:00
0xff , 0xff , 0xff , 0x7f , /* 0x7fffffff out of range */
2001-01-25 20:14:08 +00:00
0x62 , 0x01 , 0x00 , 0x00 ,
0x62 , 0x02 , 0x00 , 0x00 ,
2001-01-10 02:20:30 +00:00
} ;
/* expected error test results */
2003-08-01 14:30:29 +00:00
static const int32_t results2 [ ] = {
2001-01-10 02:20:30 +00:00
/* number of bytes read, code point */
2001-01-25 20:19:43 +00:00
4 , 0x61 ,
8 , 0x62 ,
12 , 0x162 ,
4 , 0x262 ,
2001-01-10 02:20:30 +00:00
} ;
UConverterToUCallback cb ;
2001-06-26 22:51:14 +00:00
const void * p ;
2001-01-10 02:20:30 +00:00
2000-12-20 00:02:15 +00:00
const char * source = ( const char * ) in , * limit = ( const char * ) in + sizeof ( in ) ;
UErrorCode errorCode = U_ZERO_ERROR ;
UConverter * cnv = ucnv_open ( " UTF-32LE " , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
log_err ( " Unable to open a UTF-32LE converter: %s \n " , u_errorName ( errorCode ) ) ;
return ;
}
TestNextUChar ( cnv , source , limit , results , " UTF-32LE " ) ;
/* Test the condition when source >= sourceLimit */
TestNextUCharError ( cnv , source , source , U_INDEX_OUTOFBOUNDS_ERROR , " sourceLimit <= source " ) ;
2001-01-10 02:20:30 +00:00
/* test error behavior with a skip callback */
ucnv_setToUCallBack ( cnv , UCNV_TO_U_CALLBACK_SKIP , NULL , & cb , & p , & errorCode ) ;
2001-01-23 19:43:21 +00:00
source = ( const char * ) in2 ;
limit = ( const char * ) ( in2 + sizeof ( in2 ) ) ;
2001-01-10 02:20:30 +00:00
TestNextUChar ( cnv , source , limit , results2 , " UTF-32LE " ) ;
2000-12-20 00:02:15 +00:00
ucnv_close ( cnv ) ;
}
2000-11-21 04:05:39 +00:00
static void
2000-06-22 01:18:30 +00:00
TestLATIN1 ( ) {
/* test input */
2001-04-18 19:31:05 +00:00
static const uint8_t in [ ] = {
2000-06-22 01:18:30 +00:00
0x61 ,
0x31 ,
0x32 ,
2001-04-18 19:31:05 +00:00
0xc0 ,
2000-06-22 01:18:30 +00:00
0xf0 ,
2001-04-18 19:31:05 +00:00
0xf4 ,
2000-06-22 01:18:30 +00:00
} ;
2000-06-02 00:04:34 +00:00
2000-06-22 01:18:30 +00:00
/* expected test results */
2003-08-01 14:30:29 +00:00
static const int32_t results [ ] = {
2000-06-22 01:18:30 +00:00
/* number of bytes read, code point */
1 , 0x61 ,
1 , 0x31 ,
1 , 0x32 ,
2000-06-26 22:16:32 +00:00
1 , 0xc0 ,
1 , 0xf0 ,
1 , 0xf4 ,
2000-06-22 01:18:30 +00:00
} ;
2001-07-14 02:29:21 +00:00
static const uint16_t in1 [ ] = {
0x08 , 0x00 , 0x1b , 0x4c , 0xea , 0x16 , 0xca , 0xd3 , 0x94 , 0x0f , 0x53 , 0xef , 0x61 , 0x1b , 0xe5 , 0x84 ,
0xc4 , 0x0f , 0x53 , 0xef , 0x61 , 0x1b , 0xe5 , 0x84 , 0xc4 , 0x16 , 0xca , 0xd3 , 0x94 , 0x08 , 0x02 , 0x0f ,
0x53 , 0x4a , 0x4e , 0x16 , 0x7d , 0x00 , 0x30 , 0x82 , 0x52 , 0x4d , 0x30 , 0x6b , 0x6d , 0x41 , 0x88 , 0x4c ,
0xe5 , 0x97 , 0x9f , 0x08 , 0x0c , 0x16 , 0xca , 0xd3 , 0x94 , 0x15 , 0xae , 0x0e , 0x6b , 0x4c , 0x08 , 0x0d ,
0x8c , 0xb4 , 0xa3 , 0x9f , 0xca , 0x99 , 0xcb , 0x8b , 0xc2 , 0x97 , 0xcc , 0xaa , 0x84 , 0x08 , 0x02 , 0x0e ,
0x7c , 0x73 , 0xe2 , 0x16 , 0xa3 , 0xb7 , 0xcb , 0x93 , 0xd3 , 0xb4 , 0xc5 , 0xdc , 0x9f , 0x0e , 0x79 , 0x3e ,
0x06 , 0xae , 0xb1 , 0x9d , 0x93 , 0xd3 , 0x08 , 0x0c , 0xbe , 0xa3 , 0x8f , 0x08 , 0x88 , 0xbe , 0xa3 , 0x8d ,
0xd3 , 0xa8 , 0xa3 , 0x97 , 0xc5 , 0x17 , 0x89 , 0x08 , 0x0d , 0x15 , 0xd2 , 0x08 , 0x01 , 0x93 , 0xc8 , 0xaa ,
0x8f , 0x0e , 0x61 , 0x1b , 0x99 , 0xcb , 0x0e , 0x4e , 0xba , 0x9f , 0xa1 , 0xae , 0x93 , 0xa8 , 0xa0 , 0x08 ,
0x02 , 0x08 , 0x0c , 0xe2 , 0x16 , 0xa3 , 0xb7 , 0xcb , 0x0f , 0x4f , 0xe1 , 0x80 , 0x05 , 0xec , 0x60 , 0x8d ,
0xea , 0x06 , 0xd3 , 0xe6 , 0x0f , 0x8a , 0x00 , 0x30 , 0x44 , 0x65 , 0xb9 , 0xe4 , 0xfe , 0xe7 , 0xc2 , 0x06 ,
0xcb , 0x82
} ;
static const uint8_t out1 [ ] = {
0x08 , 0x00 , 0x1b , 0x4c , 0xea , 0x16 , 0xca , 0xd3 , 0x94 , 0x0f , 0x53 , 0xef , 0x61 , 0x1b , 0xe5 , 0x84 ,
0xc4 , 0x0f , 0x53 , 0xef , 0x61 , 0x1b , 0xe5 , 0x84 , 0xc4 , 0x16 , 0xca , 0xd3 , 0x94 , 0x08 , 0x02 , 0x0f ,
0x53 , 0x4a , 0x4e , 0x16 , 0x7d , 0x00 , 0x30 , 0x82 , 0x52 , 0x4d , 0x30 , 0x6b , 0x6d , 0x41 , 0x88 , 0x4c ,
0xe5 , 0x97 , 0x9f , 0x08 , 0x0c , 0x16 , 0xca , 0xd3 , 0x94 , 0x15 , 0xae , 0x0e , 0x6b , 0x4c , 0x08 , 0x0d ,
0x8c , 0xb4 , 0xa3 , 0x9f , 0xca , 0x99 , 0xcb , 0x8b , 0xc2 , 0x97 , 0xcc , 0xaa , 0x84 , 0x08 , 0x02 , 0x0e ,
0x7c , 0x73 , 0xe2 , 0x16 , 0xa3 , 0xb7 , 0xcb , 0x93 , 0xd3 , 0xb4 , 0xc5 , 0xdc , 0x9f , 0x0e , 0x79 , 0x3e ,
0x06 , 0xae , 0xb1 , 0x9d , 0x93 , 0xd3 , 0x08 , 0x0c , 0xbe , 0xa3 , 0x8f , 0x08 , 0x88 , 0xbe , 0xa3 , 0x8d ,
0xd3 , 0xa8 , 0xa3 , 0x97 , 0xc5 , 0x17 , 0x89 , 0x08 , 0x0d , 0x15 , 0xd2 , 0x08 , 0x01 , 0x93 , 0xc8 , 0xaa ,
0x8f , 0x0e , 0x61 , 0x1b , 0x99 , 0xcb , 0x0e , 0x4e , 0xba , 0x9f , 0xa1 , 0xae , 0x93 , 0xa8 , 0xa0 , 0x08 ,
0x02 , 0x08 , 0x0c , 0xe2 , 0x16 , 0xa3 , 0xb7 , 0xcb , 0x0f , 0x4f , 0xe1 , 0x80 , 0x05 , 0xec , 0x60 , 0x8d ,
0xea , 0x06 , 0xd3 , 0xe6 , 0x0f , 0x8a , 0x00 , 0x30 , 0x44 , 0x65 , 0xb9 , 0xe4 , 0xfe , 0xe7 , 0xc2 , 0x06 ,
0xcb , 0x82
} ;
static const uint16_t in2 [ ] = {
0x1B , 0x24 , 0x29 , 0x47 , 0x0E , 0x23 , 0x21 , 0x23 , 0x22 , 0x23 ,
0x23 , 0x23 , 0x24 , 0x23 , 0x25 , 0x23 , 0x26 , 0x23 , 0x27 , 0x23 ,
0x28 , 0x23 , 0x29 , 0x23 , 0x2A , 0x23 , 0x2B , 0x0F , 0x2F , 0x2A ,
0x70 , 0x6C , 0x61 , 0x6E , 0x65 , 0x20 , 0x31 , 0x20 , 0x2A , 0x2F ,
0x0D , 0x0A , 0x1B , 0x24 , 0x2A , 0x48 , 0x1B , 0x4E , 0x22 , 0x21 ,
0x1B , 0x4E , 0x22 , 0x22 , 0x1B , 0x4E , 0x22 , 0x23 , 0x1B , 0x4E ,
0x22 , 0x24 , 0x1B , 0x4E , 0x22 , 0x25 , 0x0F , 0x2F , 0x2A , 0x70 ,
0x6C , 0x61 , 0x6E , 0x65 , 0x32 , 0x2A , 0x2F , 0x20 , 0x0D , 0x0A ,
0x1B , 0x24 , 0x2B , 0x49 , 0x1B , 0x4F , 0x22 , 0x44 , 0x1B , 0x4F ,
0x22 , 0x45 , 0x1B , 0x4F , 0x22 , 0x46 , 0x1B , 0x4F , 0x22 , 0x47 ,
0x1B , 0x4F , 0x22 , 0x48 , 0x0F , 0x2F , 0x2A , 0x70 , 0x6C , 0x61 ,
0x6E , 0x65 , 0x20 , 0x33 , 0x20 , 0x2A , 0x2F , 0x0D , 0x0A , 0x1B ,
0x24 , 0x2B , 0x4A , 0x1B , 0x4F , 0x21 , 0x44 , 0x1B , 0x4F , 0x21 ,
0x45 , 0x1B , 0x4F , 0x22 , 0x6A , 0x1B , 0x4F , 0x22 , 0x6B , 0x1B ,
0x4F , 0x22 , 0x6C , 0x0F , 0x2F , 0x2A , 0x70 , 0x6C , 0x61 , 0x6E ,
0x65 , 0x20 , 0x34 , 0x20 , 0x2A , 0x2F , 0x0D , 0x0A , 0x1B , 0x24 ,
0x2B , 0x4B , 0x1B , 0x4F , 0x21 , 0x74 , 0x1B , 0x4F , 0x22 , 0x50 ,
0x1B , 0x4F , 0x22 , 0x51 , 0x1B , 0x4F , 0x23 , 0x37 , 0x1B , 0x4F ,
0x22 , 0x5C , 0x0F , 0x2F , 0x2A , 0x70 , 0x6C , 0x61 , 0x6E ,
0x65 , 0x20 , 0x35 , 0x20 , 0x2A , 0x2F , 0x0D , 0x0A , 0x1B , 0x24 ,
0x2B , 0x4C , 0x1B , 0x4F , 0x21 , 0x23 , 0x1B , 0x4F , 0x22 , 0x2C ,
0x1B , 0x4F , 0x23 , 0x4E , 0x1B , 0x4F , 0x21 , 0x6E , 0x1B , 0x4F ,
0x23 , 0x71 , 0x0F , 0x2F , 0x2A , 0x70 , 0x6C , 0x61 , 0x6E , 0x65 ,
0x20 , 0x36 , 0x20 , 0x2A , 0x2F , 0x0D , 0x0A , 0x1B , 0x24 , 0x2B ,
0x4D , 0x1B , 0x4F , 0x22 , 0x71 , 0x1B , 0x4F , 0x21 , 0x4E , 0x1B ,
0x4F , 0x21 , 0x6A , 0x1B , 0x4F , 0x23 , 0x3A , 0x1B , 0x4F , 0x23 ,
0x6F , 0x0F , 0x2F , 0x2A , 0x70 , 0x6C , 0x61 , 0x6E , 0x65 , 0x20 ,
0x37 , 0x20 , 0x2A , 0x2F ,
} ;
static const unsigned char out2 [ ] = {
0x1B , 0x24 , 0x29 , 0x47 , 0x0E , 0x23 , 0x21 , 0x23 , 0x22 , 0x23 ,
0x23 , 0x23 , 0x24 , 0x23 , 0x25 , 0x23 , 0x26 , 0x23 , 0x27 , 0x23 ,
0x28 , 0x23 , 0x29 , 0x23 , 0x2A , 0x23 , 0x2B , 0x0F , 0x2F , 0x2A ,
0x70 , 0x6C , 0x61 , 0x6E , 0x65 , 0x20 , 0x31 , 0x20 , 0x2A , 0x2F ,
0x0D , 0x0A , 0x1B , 0x24 , 0x2A , 0x48 , 0x1B , 0x4E , 0x22 , 0x21 ,
0x1B , 0x4E , 0x22 , 0x22 , 0x1B , 0x4E , 0x22 , 0x23 , 0x1B , 0x4E ,
0x22 , 0x24 , 0x1B , 0x4E , 0x22 , 0x25 , 0x0F , 0x2F , 0x2A , 0x70 ,
0x6C , 0x61 , 0x6E , 0x65 , 0x32 , 0x2A , 0x2F , 0x20 , 0x0D , 0x0A ,
0x1B , 0x24 , 0x2B , 0x49 , 0x1B , 0x4F , 0x22 , 0x44 , 0x1B , 0x4F ,
0x22 , 0x45 , 0x1B , 0x4F , 0x22 , 0x46 , 0x1B , 0x4F , 0x22 , 0x47 ,
0x1B , 0x4F , 0x22 , 0x48 , 0x0F , 0x2F , 0x2A , 0x70 , 0x6C , 0x61 ,
0x6E , 0x65 , 0x20 , 0x33 , 0x20 , 0x2A , 0x2F , 0x0D , 0x0A , 0x1B ,
0x24 , 0x2B , 0x4A , 0x1B , 0x4F , 0x21 , 0x44 , 0x1B , 0x4F , 0x21 ,
0x45 , 0x1B , 0x4F , 0x22 , 0x6A , 0x1B , 0x4F , 0x22 , 0x6B , 0x1B ,
0x4F , 0x22 , 0x6C , 0x0F , 0x2F , 0x2A , 0x70 , 0x6C , 0x61 , 0x6E ,
0x65 , 0x20 , 0x34 , 0x20 , 0x2A , 0x2F , 0x0D , 0x0A , 0x1B , 0x24 ,
0x2B , 0x4B , 0x1B , 0x4F , 0x21 , 0x74 , 0x1B , 0x4F , 0x22 , 0x50 ,
0x1B , 0x4F , 0x22 , 0x51 , 0x1B , 0x4F , 0x23 , 0x37 , 0x1B , 0x4F ,
0x22 , 0x5C , 0x0F , 0x2F , 0x2A , 0x70 , 0x6C , 0x61 , 0x6E ,
0x65 , 0x20 , 0x35 , 0x20 , 0x2A , 0x2F , 0x0D , 0x0A , 0x1B , 0x24 ,
0x2B , 0x4C , 0x1B , 0x4F , 0x21 , 0x23 , 0x1B , 0x4F , 0x22 , 0x2C ,
0x1B , 0x4F , 0x23 , 0x4E , 0x1B , 0x4F , 0x21 , 0x6E , 0x1B , 0x4F ,
0x23 , 0x71 , 0x0F , 0x2F , 0x2A , 0x70 , 0x6C , 0x61 , 0x6E , 0x65 ,
0x20 , 0x36 , 0x20 , 0x2A , 0x2F , 0x0D , 0x0A , 0x1B , 0x24 , 0x2B ,
0x4D , 0x1B , 0x4F , 0x22 , 0x71 , 0x1B , 0x4F , 0x21 , 0x4E , 0x1B ,
0x4F , 0x21 , 0x6A , 0x1B , 0x4F , 0x23 , 0x3A , 0x1B , 0x4F , 0x23 ,
0x6F , 0x0F , 0x2F , 0x2A , 0x70 , 0x6C , 0x61 , 0x6E , 0x65 , 0x20 ,
0x37 , 0x20 , 0x2A , 0x2F ,
} ;
2000-06-22 01:18:30 +00:00
const char * source = ( const char * ) in ;
const char * limit = ( const char * ) in + sizeof ( in ) ;
UErrorCode errorCode = U_ZERO_ERROR ;
UConverter * cnv = ucnv_open ( " LATIN_1 " , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
2009-06-09 21:28:13 +00:00
log_data_err ( " Unable to open a LATIN_1 converter: %s \n " , u_errorName ( errorCode ) ) ;
2000-10-27 00:10:17 +00:00
return ;
2000-06-02 00:04:34 +00:00
}
2000-06-22 01:18:30 +00:00
TestNextUChar ( cnv , source , limit , results , " LATIN_1 " ) ;
2000-12-20 00:02:15 +00:00
/* Test the condition when source >= sourceLimit */
TestNextUCharError ( cnv , source , source , U_INDEX_OUTOFBOUNDS_ERROR , " sourceLimit <= source " ) ;
2001-08-03 16:08:41 +00:00
TestConv ( ( uint16_t * ) in1 , sizeof ( in1 ) / 2 , " LATIN_1 " , " LATIN-1 " , ( char * ) out1 , sizeof ( out1 ) ) ;
TestConv ( ( uint16_t * ) in2 , sizeof ( in2 ) / 2 , " ASCII " , " ASCII " , ( char * ) out2 , sizeof ( out2 ) ) ;
2001-07-14 02:29:21 +00:00
2000-06-22 01:18:30 +00:00
ucnv_close ( cnv ) ;
}
2000-11-21 04:05:39 +00:00
static void
2000-06-22 01:18:30 +00:00
TestSBCS ( ) {
/* test input */
static const uint8_t in [ ] = { 0x61 , 0xc0 , 0x80 , 0xe0 , 0xf0 , 0xf4 } ;
/* expected test results */
2003-08-01 14:30:29 +00:00
static const int32_t results [ ] = {
2000-06-22 01:18:30 +00:00
/* number of bytes read, code point */
1 , 0x61 ,
1 , 0xbf ,
1 , 0xc4 ,
1 , 0x2021 ,
2000-10-27 23:55:56 +00:00
1 , 0xf8ff ,
2000-06-22 01:18:30 +00:00
1 , 0x00d9
} ;
2000-06-02 00:04:34 +00:00
2000-06-22 01:18:30 +00:00
const char * source = ( const char * ) in , * limit = ( const char * ) in + sizeof ( in ) ;
UErrorCode errorCode = U_ZERO_ERROR ;
2003-12-03 18:29:33 +00:00
UConverter * cnv = ucnv_open ( " x-mac-turkish " , & errorCode ) ;
2000-06-22 01:18:30 +00:00
if ( U_FAILURE ( errorCode ) ) {
2003-12-03 18:29:33 +00:00
log_data_err ( " Unable to open a SBCS(x-mac-turkish) converter: %s \n " , u_errorName ( errorCode ) ) ;
2000-10-27 00:10:17 +00:00
return ;
2000-06-22 01:18:30 +00:00
}
2003-12-03 18:29:33 +00:00
TestNextUChar ( cnv , source , limit , results , " SBCS(x-mac-turkish) " ) ;
2000-12-20 00:02:15 +00:00
/* Test the condition when source >= sourceLimit */
TestNextUCharError ( cnv , source , source , U_INDEX_OUTOFBOUNDS_ERROR , " sourceLimit <= source " ) ;
2001-03-05 18:39:03 +00:00
/*Test for Illegal character */ /*
2000-06-22 23:46:02 +00:00
{
static const uint8_t input1 [ ] = { 0xA1 } ;
const char * illegalsource = ( const char * ) input1 ;
TestNextUCharError ( cnv , illegalsource , illegalsource + sizeof ( illegalsource ) , U_INVALID_CHAR_FOUND , " source has a illegal characte " ) ;
}
*/
2000-07-06 23:01:50 +00:00
ucnv_close ( cnv ) ;
2000-06-02 00:04:34 +00:00
}
2000-06-22 23:46:02 +00:00
2000-11-21 04:05:39 +00:00
static void
2000-06-22 01:18:30 +00:00
TestDBCS ( ) {
/* test input */
static const uint8_t in [ ] = {
0x44 , 0x6a ,
0xc4 , 0x9c ,
0x7a , 0x74 ,
0x46 , 0xab ,
0x42 , 0x5b ,
2001-04-18 19:31:05 +00:00
2000-06-22 01:18:30 +00:00
} ;
2000-06-02 00:04:34 +00:00
2000-06-22 01:18:30 +00:00
/* expected test results */
2003-08-01 14:30:29 +00:00
static const int32_t results [ ] = {
2000-06-22 01:18:30 +00:00
/* number of bytes read, code point */
2 , 0x00a7 ,
2 , 0xe1d2 ,
2 , 0x6962 ,
2 , 0xf842 ,
2001-04-18 19:31:05 +00:00
2 , 0xffe5 ,
2000-06-22 01:18:30 +00:00
} ;
const char * source = ( const char * ) in , * limit = ( const char * ) in + sizeof ( in ) ;
UErrorCode errorCode = U_ZERO_ERROR ;
2001-04-18 19:31:05 +00:00
2003-04-25 00:24:50 +00:00
UConverter * cnv = my_ucnv_open ( " @ibm9027 " , & errorCode ) ;
2000-06-22 01:18:30 +00:00
if ( U_FAILURE ( errorCode ) ) {
2003-04-25 00:24:50 +00:00
log_data_err ( " Unable to open a DBCS(@ibm9027) converter: %s \n " , u_errorName ( errorCode ) ) ;
2000-10-27 00:10:17 +00:00
return ;
2000-06-22 01:18:30 +00:00
}
2003-04-25 00:24:50 +00:00
TestNextUChar ( cnv , source , limit , results , " DBCS(@ibm9027) " ) ;
2000-12-20 00:02:15 +00:00
/* Test the condition when source >= sourceLimit */
TestNextUCharError ( cnv , source , source , U_INDEX_OUTOFBOUNDS_ERROR , " sourceLimit <= source " ) ;
2000-06-22 23:46:02 +00:00
/*Test for the condition where there is an invalid character*/
{
static const uint8_t source2 [ ] = { 0x1a , 0x1b } ;
TestNextUCharError ( cnv , ( const char * ) source2 , ( const char * ) source2 + sizeof ( source2 ) , U_ZERO_ERROR , " an invalid character " ) ;
}
2003-07-22 04:20:13 +00:00
/*Test for the condition where we have a truncated char*/
{
static const uint8_t source1 [ ] = { 0xc4 } ;
ucnv_setToUCallBack ( cnv , UCNV_TO_U_CALLBACK_STOP , NULL , NULL , NULL , & errorCode ) ;
TestNextUCharError ( cnv , ( const char * ) source1 , ( const char * ) source1 + sizeof ( source1 ) , U_TRUNCATED_CHAR_FOUND , " a character is truncated " ) ;
}
2000-06-22 01:18:30 +00:00
ucnv_close ( cnv ) ;
}
2000-11-21 04:05:39 +00:00
static void
2000-06-22 01:18:30 +00:00
TestMBCS ( ) {
/* test input */
static const uint8_t in [ ] = {
0x01 ,
0xa6 , 0xa3 ,
0x00 ,
0xa6 , 0xa1 ,
0x08 ,
0xc2 , 0x76 ,
2001-04-18 19:31:05 +00:00
0xc2 , 0x78 ,
2000-06-22 01:18:30 +00:00
} ;
/* expected test results */
2003-08-01 14:30:29 +00:00
static const int32_t results [ ] = {
2000-06-22 01:18:30 +00:00
/* number of bytes read, code point */
1 , 0x0001 ,
2 , 0x250c ,
1 , 0x0000 ,
2 , 0x2500 ,
2001-04-18 19:31:05 +00:00
1 , 0x0008 ,
2000-06-22 01:18:30 +00:00
2 , 0xd60c ,
2000-07-19 20:14:27 +00:00
2 , 0xd60e ,
2000-06-22 01:18:30 +00:00
} ;
const char * source = ( const char * ) in , * limit = ( const char * ) in + sizeof ( in ) ;
UErrorCode errorCode = U_ZERO_ERROR ;
2001-04-18 19:31:05 +00:00
2000-06-22 01:18:30 +00:00
UConverter * cnv = ucnv_open ( " ibm-1363 " , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
2002-09-20 19:07:19 +00:00
log_data_err ( " Unable to open a MBCS(ibm-1363) converter: %s \n " , u_errorName ( errorCode ) ) ;
2000-10-27 00:10:17 +00:00
return ;
2000-06-22 01:18:30 +00:00
}
TestNextUChar ( cnv , source , limit , results , " MBCS(ibm-1363) " ) ;
2000-12-20 00:02:15 +00:00
/* Test the condition when source >= sourceLimit */
TestNextUCharError ( cnv , source , source , U_INDEX_OUTOFBOUNDS_ERROR , " sourceLimit <= source " ) ;
2000-06-22 23:46:02 +00:00
/*Test for the condition where there is an invalid character*/
{
2008-10-07 04:33:51 +00:00
static const uint8_t source2 [ ] = { 0xa1 , 0x80 } ;
2000-06-22 23:46:02 +00:00
TestNextUCharError ( cnv , ( const char * ) source2 , ( const char * ) source2 + sizeof ( source2 ) , U_ZERO_ERROR , " an invalid character " ) ;
}
2003-07-22 04:20:13 +00:00
/*Test for the condition where we have a truncated char*/
{
static const uint8_t source1 [ ] = { 0xc4 } ;
ucnv_setToUCallBack ( cnv , UCNV_TO_U_CALLBACK_STOP , NULL , NULL , NULL , & errorCode ) ;
TestNextUCharError ( cnv , ( const char * ) source1 , ( const char * ) source1 + sizeof ( source1 ) , U_TRUNCATED_CHAR_FOUND , " a character is truncated " ) ;
}
2000-06-22 01:18:30 +00:00
ucnv_close ( cnv ) ;
2000-08-15 18:05:12 +00:00
2000-06-22 01:18:30 +00:00
}
2000-11-21 04:05:39 +00:00
2009-08-26 01:02:40 +00:00
# if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
static void
TestICCRunout ( ) {
/* { "ibm-1363", :bin{ a2aea2 }, "\u00a1\u001a", :intvector{ 0, 2 }, :int{1}, :int{0}, "\", "?", :bin{""} } */
const char * cnvName = " ibm-1363 " ;
UErrorCode status = U_ZERO_ERROR ;
2010-07-12 18:03:29 +00:00
const char sourceData [ ] = { ( char ) 0xa2 , ( char ) 0xae , ( char ) 0xa2 } ;
/* UChar expectUData[] = { 0x00a1, 0x001a }; */
const char * source = sourceData ;
const char * sourceLim = sourceData + sizeof ( sourceData ) ;
2009-08-26 01:02:40 +00:00
UChar c1 , c2 , c3 ;
UConverter * cnv = ucnv_open ( cnvName , & status ) ;
if ( U_FAILURE ( status ) ) {
log_data_err ( " Unable to open %s converter: %s \n " , cnvName , u_errorName ( status ) ) ;
return ;
}
#if 0
2010-07-12 18:03:29 +00:00
{
UChar targetBuf [ 256 ] ;
UChar * target = targetBuf ;
UChar * targetLim = target + 256 ;
2009-08-26 01:02:40 +00:00
ucnv_toUnicode ( cnv , & target , targetLim , & source , sourceLim , NULL , TRUE , & status ) ;
log_info ( " After convert: target@%d, source@%d, status%s \n " ,
target - targetBuf , source - sourceData , u_errorName ( status ) ) ;
if ( U_FAILURE ( status ) ) {
log_err ( " Failed to convert: %s \n " , u_errorName ( status ) ) ;
} else {
}
2010-07-12 18:03:29 +00:00
}
2009-08-26 01:02:40 +00:00
# endif
c1 = ucnv_getNextUChar ( cnv , & source , sourceLim , & status ) ;
log_verbose ( " c1: U+%04X, source@%d, status %s \n " , c1 , source - sourceData , u_errorName ( status ) ) ;
c2 = ucnv_getNextUChar ( cnv , & source , sourceLim , & status ) ;
log_verbose ( " c2: U+%04X, source@%d, status %s \n " , c2 , source - sourceData , u_errorName ( status ) ) ;
c3 = ucnv_getNextUChar ( cnv , & source , sourceLim , & status ) ;
log_verbose ( " c3: U+%04X, source@%d, status %s \n " , c3 , source - sourceData , u_errorName ( status ) ) ;
if ( status = = U_INDEX_OUTOFBOUNDS_ERROR & & c3 = = 0xFFFF ) {
log_verbose ( " OK \n " ) ;
} else {
log_err ( " FAIL: c3 was not FFFF or err was not U_INDEXOUTOFBOUNDS_ERROR \n " ) ;
}
ucnv_close ( cnv ) ;
}
# endif
2003-12-03 22:53:14 +00:00
# ifdef U_ENABLE_GENERIC_ISO_2022
2000-11-21 04:05:39 +00:00
static void
2000-06-22 01:18:30 +00:00
TestISO_2022 ( ) {
2000-01-19 19:00:53 +00:00
/* test input */
static const uint8_t in [ ] = {
2003-07-22 04:20:13 +00:00
0x1b , 0x25 , 0x42 ,
0x31 ,
2000-06-22 01:18:30 +00:00
0x32 ,
2000-01-19 19:00:53 +00:00
0x61 ,
2001-10-12 18:54:09 +00:00
0xc2 , 0x80 ,
0xe0 , 0xa0 , 0x80 ,
0xf0 , 0x90 , 0x80 , 0x80
2000-01-19 19:00:53 +00:00
} ;
2000-08-15 00:07:33 +00:00
2000-01-19 19:00:53 +00:00
/* expected test results */
2003-08-01 14:30:29 +00:00
static const int32_t results [ ] = {
2000-01-19 19:00:53 +00:00
/* number of bytes read, code point */
2003-08-01 14:30:29 +00:00
4 , 0x0031 , /* 4 bytes including the escape sequence */
2000-06-22 01:18:30 +00:00
1 , 0x0032 ,
2000-01-19 19:00:53 +00:00
1 , 0x61 ,
2001-10-12 18:54:09 +00:00
2 , 0x80 ,
3 , 0x800 ,
2003-07-22 04:20:13 +00:00
4 , 0x10000
2000-01-19 19:00:53 +00:00
} ;
2000-06-22 01:18:30 +00:00
const char * source = ( const char * ) in , * limit = ( const char * ) in + sizeof ( in ) ;
2000-01-19 19:00:53 +00:00
UErrorCode errorCode = U_ZERO_ERROR ;
2000-07-22 00:08:14 +00:00
UConverter * cnv ;
2000-08-15 00:07:33 +00:00
cnv = ucnv_open ( " ISO_2022 " , & errorCode ) ;
2000-01-19 19:00:53 +00:00
if ( U_FAILURE ( errorCode ) ) {
2002-09-20 19:07:19 +00:00
log_data_err ( " Unable to open a iso-2022 converter: %s \n " , u_errorName ( errorCode ) ) ;
2000-07-21 23:58:01 +00:00
return ;
2000-01-19 19:00:53 +00:00
}
2000-08-15 00:07:33 +00:00
TestNextUChar ( cnv , source , limit , results , " ISO_2022 " ) ;
2000-06-22 23:46:02 +00:00
2000-12-20 00:02:15 +00:00
/* Test the condition when source >= sourceLimit */
2000-06-22 23:46:02 +00:00
TestNextUCharError ( cnv , source , source - 1 , U_ILLEGAL_ARGUMENT_ERROR , " sourceLimit < source " ) ;
2000-12-20 00:02:15 +00:00
TestNextUCharError ( cnv , source , source , U_INDEX_OUTOFBOUNDS_ERROR , " sourceLimit <= source " ) ;
2000-06-22 23:46:02 +00:00
/*Test for the condition where we have a truncated char*/
{
static const uint8_t source1 [ ] = { 0xc4 } ;
2003-08-01 14:30:29 +00:00
ucnv_setToUCallBack ( cnv , UCNV_TO_U_CALLBACK_STOP , NULL , NULL , NULL , & errorCode ) ;
2000-06-22 23:46:02 +00:00
TestNextUCharError ( cnv , ( const char * ) source1 , ( const char * ) source1 + sizeof ( source1 ) , U_TRUNCATED_CHAR_FOUND , " a character is truncated " ) ;
}
/*Test for the condition where there is an invalid character*/
{
static const uint8_t source2 [ ] = { 0xa1 , 0x01 } ;
2003-08-01 14:30:29 +00:00
TestNextUCharError ( cnv , ( const char * ) source2 , ( const char * ) source2 + sizeof ( source2 ) , U_ILLEGAL_CHAR_FOUND , " an invalid character " ) ;
2000-06-22 23:46:02 +00:00
}
2000-06-22 01:18:30 +00:00
ucnv_close ( cnv ) ;
}
2000-11-21 04:05:39 +00:00
2003-12-03 22:53:14 +00:00
# endif
2000-11-21 04:05:39 +00:00
static void
2001-03-05 18:39:03 +00:00
TestSmallTargetBuffer ( const uint16_t * source , const UChar * sourceLimit , UConverter * cnv ) {
2000-10-20 02:47:14 +00:00
const UChar * uSource ;
2000-11-17 03:03:14 +00:00
const UChar * uSourceLimit ;
const char * cSource ;
const char * cSourceLimit ;
UChar * uTargetLimit = NULL ;
UChar * uTarget ;
char * cTarget ;
const char * cTargetLimit ;
2001-04-18 19:31:05 +00:00
char * cBuf ;
2009-08-26 01:02:40 +00:00
UChar * uBuf ; /*,*test;*/
2000-10-20 02:47:14 +00:00
int32_t uBufSize = 120 ;
int len = 0 ;
2001-03-06 03:42:35 +00:00
int i = 2 ;
2000-10-20 02:47:14 +00:00
UErrorCode errorCode = U_ZERO_ERROR ;
2000-11-17 03:03:14 +00:00
uBuf = ( UChar * ) malloc ( uBufSize * sizeof ( UChar ) * 5 ) ;
cBuf = ( char * ) malloc ( uBufSize * sizeof ( char ) * 10 ) ;
2001-03-02 23:55:49 +00:00
ucnv_reset ( cnv ) ;
2000-10-28 01:08:25 +00:00
for ( ; - - i > 0 ; ) {
uSource = ( UChar * ) source ;
2000-11-17 03:03:14 +00:00
uSourceLimit = ( const UChar * ) sourceLimit ;
cTarget = cBuf ;
2000-10-28 01:08:25 +00:00
uTarget = uBuf ;
2000-11-17 03:03:14 +00:00
cSource = cBuf ;
2000-10-28 01:08:25 +00:00
cTargetLimit = cBuf ;
uTargetLimit = uBuf ;
2000-10-20 02:47:14 +00:00
2000-10-28 01:08:25 +00:00
do {
2001-04-18 19:31:05 +00:00
2000-11-17 03:03:14 +00:00
cTargetLimit = cTargetLimit + i ;
ucnv_fromUnicode ( cnv , & cTarget , cTargetLimit , & uSource , uSourceLimit , NULL , FALSE , & errorCode ) ;
2000-10-28 01:08:25 +00:00
if ( errorCode = = U_BUFFER_OVERFLOW_ERROR ) {
errorCode = U_ZERO_ERROR ;
continue ;
}
2000-11-17 03:03:14 +00:00
if ( U_FAILURE ( errorCode ) ) {
2000-10-28 01:08:25 +00:00
log_err ( " ucnv_toUnicode conversion failed reason %s \n " , u_errorName ( errorCode ) ) ;
2000-10-20 02:47:14 +00:00
return ;
2000-10-28 01:08:25 +00:00
}
} while ( uSource < uSourceLimit ) ;
cSourceLimit = cTarget ;
do {
uTargetLimit = uTargetLimit + i ;
2000-11-08 23:15:12 +00:00
ucnv_toUnicode ( cnv , & uTarget , uTargetLimit , & cSource , cSourceLimit , NULL , FALSE , & errorCode ) ;
2000-10-28 01:08:25 +00:00
if ( errorCode = = U_BUFFER_OVERFLOW_ERROR ) {
errorCode = U_ZERO_ERROR ;
continue ;
}
if ( U_FAILURE ( errorCode ) ) {
log_err ( " ucnv_toUnicode conversion failed reason %s \n " , u_errorName ( errorCode ) ) ;
return ;
}
} while ( cSource < cSourceLimit ) ;
uSource = source ;
2009-08-26 01:02:40 +00:00
/*test =uBuf;*/
2000-10-28 01:08:25 +00:00
for ( len = 0 ; len < ( int ) ( source - sourceLimit ) ; len + + ) {
2000-11-17 03:03:14 +00:00
if ( uBuf [ len ] ! = uSource [ len ] ) {
log_err ( " Expected : \\ u%04X \t Got: \\ u%04X \n " , uSource [ len ] , ( int ) uBuf [ len ] ) ;
}
2000-10-20 02:47:14 +00:00
}
}
free ( uBuf ) ;
2000-11-17 03:03:14 +00:00
free ( cBuf ) ;
2000-10-20 02:47:14 +00:00
}
2000-12-19 02:29:23 +00:00
/* Test for Jitterbug 778 */
2001-03-05 18:39:03 +00:00
static void TestToAndFromUChars ( const uint16_t * source , const UChar * sourceLimit , UConverter * cnv ) {
2000-12-19 02:29:23 +00:00
const UChar * uSource ;
const UChar * uSourceLimit ;
const char * cSource ;
UChar * uTargetLimit = NULL ;
UChar * uTarget ;
char * cTarget ;
const char * cTargetLimit ;
2001-04-18 19:31:05 +00:00
char * cBuf ;
2000-12-19 02:29:23 +00:00
UChar * uBuf , * test ;
int32_t uBufSize = 120 ;
int numCharsInTarget = 0 ;
UErrorCode errorCode = U_ZERO_ERROR ;
uBuf = ( UChar * ) malloc ( uBufSize * sizeof ( UChar ) * 5 ) ;
cBuf = ( char * ) malloc ( uBufSize * sizeof ( char ) * 5 ) ;
uSource = source ;
uSourceLimit = sourceLimit ;
cTarget = cBuf ;
cTargetLimit = cBuf + uBufSize * 5 ;
uTarget = uBuf ;
uTargetLimit = uBuf + uBufSize * 5 ;
ucnv_reset ( cnv ) ;
2004-12-08 23:02:08 +00:00
numCharsInTarget = ucnv_fromUChars ( cnv , cTarget , ( int32_t ) ( cTargetLimit - cTarget ) , uSource , ( int32_t ) ( uSourceLimit - uSource ) , & errorCode ) ;
2000-12-19 02:29:23 +00:00
if ( U_FAILURE ( errorCode ) ) {
log_err ( " ucnv_fromUnicode conversion failed reason %s \n " , u_errorName ( errorCode ) ) ;
return ;
}
cSource = cBuf ;
test = uBuf ;
2004-12-08 23:02:08 +00:00
ucnv_toUChars ( cnv , uTarget , ( int32_t ) ( uTargetLimit - uTarget ) , cSource , numCharsInTarget , & errorCode ) ;
2000-12-19 02:29:23 +00:00
if ( U_FAILURE ( errorCode ) ) {
2001-09-28 22:22:59 +00:00
log_err ( " ucnv_toUChars conversion failed, reason %s \n " , u_errorName ( errorCode ) ) ;
2000-12-19 02:29:23 +00:00
return ;
}
uSource = source ;
while ( uSource < uSourceLimit ) {
if ( * test ! = * uSource ) {
2001-04-18 19:31:05 +00:00
2000-12-19 02:29:23 +00:00
log_err ( " Expected : \\ u%04X \t Got: \\ u%04X \n " , * uSource , ( int ) * test ) ;
}
uSource + + ;
test + + ;
}
2000-12-20 04:43:54 +00:00
free ( uBuf ) ;
free ( cBuf ) ;
2000-12-19 02:29:23 +00:00
}
2000-11-21 04:05:39 +00:00
2001-03-05 18:39:03 +00:00
static void TestSmallSourceBuffer ( const uint16_t * source , const UChar * sourceLimit , UConverter * cnv ) {
2000-10-20 02:47:14 +00:00
const UChar * uSource ;
2000-11-17 03:03:14 +00:00
const UChar * uSourceLimit ;
const char * cSource ;
const char * cSourceLimit ;
UChar * uTargetLimit = NULL ;
UChar * uTarget ;
char * cTarget ;
const char * cTargetLimit ;
2001-04-18 19:31:05 +00:00
char * cBuf ;
2009-08-26 01:02:40 +00:00
UChar * uBuf ; /*,*test;*/
2000-10-20 02:47:14 +00:00
int32_t uBufSize = 120 ;
int len = 0 ;
2001-03-06 03:42:35 +00:00
int i = 2 ;
2000-10-20 02:47:14 +00:00
const UChar * temp = sourceLimit ;
UErrorCode errorCode = U_ZERO_ERROR ;
2000-11-17 03:03:14 +00:00
uBuf = ( UChar * ) malloc ( uBufSize * sizeof ( UChar ) * 5 ) ;
cBuf = ( char * ) malloc ( uBufSize * sizeof ( char ) * 10 ) ;
2001-04-18 19:31:05 +00:00
2001-03-02 23:55:49 +00:00
ucnv_reset ( cnv ) ;
2000-10-28 01:08:25 +00:00
for ( ; - - i > 0 ; ) {
2000-11-17 03:03:14 +00:00
uSource = ( UChar * ) source ;
cTarget = cBuf ;
2000-10-28 01:08:25 +00:00
uTarget = uBuf ;
2000-11-17 03:03:14 +00:00
cSource = cBuf ;
2000-10-28 01:08:25 +00:00
cTargetLimit = cBuf ;
uTargetLimit = uBuf + uBufSize * 5 ;
cTargetLimit = cTargetLimit + uBufSize * 10 ;
uSourceLimit = uSource ;
do {
2001-04-18 19:31:05 +00:00
2002-04-03 23:38:10 +00:00
if ( uSourceLimit < sourceLimit ) {
uSourceLimit = uSourceLimit + 1 ;
}
2000-11-17 03:03:14 +00:00
ucnv_fromUnicode ( cnv , & cTarget , cTargetLimit , & uSource , uSourceLimit , NULL , FALSE , & errorCode ) ;
2000-10-28 01:08:25 +00:00
if ( errorCode = = U_BUFFER_OVERFLOW_ERROR ) {
errorCode = U_ZERO_ERROR ;
continue ;
}
2000-11-17 03:03:14 +00:00
if ( U_FAILURE ( errorCode ) ) {
2000-10-28 01:08:25 +00:00
log_err ( " ucnv_toUnicode conversion failed reason %s \n " , u_errorName ( errorCode ) ) ;
2000-10-20 02:47:14 +00:00
return ;
2000-10-28 01:08:25 +00:00
}
} while ( uSource < temp ) ;
cSourceLimit = cBuf ;
do {
2002-04-03 23:38:10 +00:00
if ( cSourceLimit < cBuf + ( cTarget - cBuf ) ) {
cSourceLimit = cSourceLimit + 1 ;
}
2000-10-28 01:08:25 +00:00
ucnv_toUnicode ( cnv , & uTarget , uTargetLimit , & cSource , cSourceLimit , NULL , FALSE , & errorCode ) ;
if ( errorCode = = U_BUFFER_OVERFLOW_ERROR ) {
errorCode = U_ZERO_ERROR ;
continue ;
}
if ( U_FAILURE ( errorCode ) ) {
log_err ( " ucnv_toUnicode conversion failed reason %s \n " , u_errorName ( errorCode ) ) ;
return ;
}
} while ( cSource < cTarget ) ;
uSource = source ;
2009-08-26 01:02:40 +00:00
/*test =uBuf;*/
2000-12-21 03:14:32 +00:00
for ( ; len < ( int ) ( source - sourceLimit ) ; len + + ) {
2000-11-17 03:03:14 +00:00
if ( uBuf [ len ] ! = uSource [ len ] ) {
log_err ( " Expected : \\ u%04X \t Got: \\ u%04X \n " , uSource [ len ] , ( int ) uBuf [ len ] ) ;
}
2000-10-20 02:47:14 +00:00
}
}
free ( uBuf ) ;
2000-11-17 03:03:14 +00:00
free ( cBuf ) ;
}
2001-04-18 19:31:05 +00:00
static void
TestGetNextUChar2022 ( UConverter * cnv , const char * source , const char * limit ,
const uint16_t results [ ] , const char * message ) {
2009-08-26 01:02:40 +00:00
/* const char* s0; */
2000-11-17 03:03:14 +00:00
const char * s = ( char * ) source ;
const uint16_t * r = results ;
UErrorCode errorCode = U_ZERO_ERROR ;
2001-03-02 23:55:49 +00:00
uint32_t c , exC ;
ucnv_reset ( cnv ) ;
2000-11-17 03:03:14 +00:00
while ( s < limit ) {
2009-08-26 01:02:40 +00:00
/* s0=s; */
2000-11-17 03:03:14 +00:00
c = ucnv_getNextUChar ( cnv , & s , limit , & errorCode ) ;
2002-11-07 21:02:24 +00:00
if ( errorCode = = U_INDEX_OUTOFBOUNDS_ERROR ) {
break ; /* no more significant input */
} else if ( U_FAILURE ( errorCode ) ) {
2000-11-17 03:03:14 +00:00
log_err ( " %s ucnv_getNextUChar() failed: %s \n " , message , u_errorName ( errorCode ) ) ;
break ;
2001-03-02 23:55:49 +00:00
} else {
if ( UTF_IS_FIRST_SURROGATE ( * r ) ) {
int i = 0 , len = 2 ;
UTF_NEXT_CHAR_SAFE ( r , i , len , exC , FALSE ) ;
r + + ;
} else {
exC = * r ;
}
if ( c ! = ( uint32_t ) ( exC ) )
log_err ( " %s ucnv_getNextUChar() Expected: \\ u%04X Got: \\ u%04X \n " , message , ( uint32_t ) ( * r ) , c ) ;
2000-11-17 03:03:14 +00:00
}
2001-03-02 23:55:49 +00:00
r + + ;
2000-11-17 03:03:14 +00:00
}
2000-10-17 08:05:02 +00:00
}
2000-11-17 03:03:14 +00:00
2001-05-04 00:17:03 +00:00
static int TestJitterbug930 ( const char * enc ) {
2001-08-25 01:06:41 +00:00
UErrorCode err = U_ZERO_ERROR ;
UConverter * converter ;
char out [ 80 ] ;
char * target = out ;
UChar in [ 4 ] ;
const UChar * source = in ;
int32_t off [ 80 ] ;
int32_t * offsets = off ;
int numOffWritten = 0 ;
UBool flush = 0 ;
2002-07-17 02:41:04 +00:00
converter = my_ucnv_open ( enc , & err ) ;
2002-07-29 21:04:18 +00:00
2001-08-25 01:06:41 +00:00
in [ 0 ] = 0x41 ; /* 0x4E00;*/
in [ 1 ] = 0x4E01 ;
in [ 2 ] = 0x4E02 ;
in [ 3 ] = 0x4E03 ;
2002-07-29 21:04:18 +00:00
2001-08-25 01:06:41 +00:00
memset ( off , ' * ' , sizeof ( off ) ) ;
2002-07-29 21:04:18 +00:00
2001-08-25 01:06:41 +00:00
ucnv_fromUnicode ( converter ,
& target ,
target + 2 ,
& source ,
source + 3 ,
offsets ,
flush ,
& err ) ;
2002-07-29 21:04:18 +00:00
2001-08-25 01:06:41 +00:00
/* writes three bytes into the output buffer: 41 1B 24
* but offsets contains 0 1 1
2001-04-07 01:25:14 +00:00
*/
2001-08-25 01:06:41 +00:00
while ( * offsets < off [ 10 ] ) {
numOffWritten + + ;
offsets + + ;
}
log_verbose ( " Testing Jitterbug 930 for encoding %s " , enc ) ;
if ( numOffWritten ! = ( int ) ( target - out ) ) {
log_err ( " Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i " , enc , ( int ) ( target - out ) , numOffWritten ) ;
}
2002-07-29 21:04:18 +00:00
2001-08-25 01:06:41 +00:00
err = U_ZERO_ERROR ;
2002-07-29 21:04:18 +00:00
2001-08-25 01:06:41 +00:00
memset ( off , ' * ' , sizeof ( off ) ) ;
2002-07-29 21:04:18 +00:00
2001-08-25 01:06:41 +00:00
flush = 1 ;
offsets = off ;
ucnv_fromUnicode ( converter ,
& target ,
target + 4 ,
& source ,
source ,
offsets ,
flush ,
& err ) ;
numOffWritten = 0 ;
while ( * offsets < off [ 10 ] ) {
numOffWritten + + ;
if ( * offsets ! = - 1 ) {
log_err ( " Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i " , enc , - 1 , * offsets ) ;
}
offsets + + ;
}
2001-04-18 19:31:05 +00:00
2001-08-25 01:06:41 +00:00
/* writes 42 43 7A into output buffer,
* offsets contains - 1 - 1 - 1
*/
ucnv_close ( converter ) ;
2001-05-31 23:30:09 +00:00
return 0 ;
2001-04-07 01:25:14 +00:00
}
2000-11-21 04:05:39 +00:00
static void
2000-10-17 08:05:02 +00:00
TestHZ ( ) {
/* test input */
static const uint16_t in [ ] = {
2002-08-01 17:58:30 +00:00
0x3000 , 0x3001 , 0x3002 , 0x00B7 , 0x02C9 , 0x02C7 , 0x00A8 , 0x3003 , 0x3005 , 0x2014 ,
2001-04-18 19:31:05 +00:00
0xFF5E , 0x2016 , 0x2026 , 0x007E , 0x997C , 0x70B3 , 0x75C5 , 0x5E76 , 0x73BB , 0x83E0 ,
0x64AD , 0x62E8 , 0x94B5 , 0x000A , 0x6CE2 , 0x535A , 0x52C3 , 0x640F , 0x94C2 , 0x7B94 ,
0x4F2F , 0x5E1B , 0x8236 , 0x000A , 0x8116 , 0x818A , 0x6E24 , 0x6CCA , 0x9A73 , 0x6355 ,
0x535C , 0x54FA , 0x8865 , 0x000A , 0x57E0 , 0x4E0D , 0x5E03 , 0x6B65 , 0x7C3F , 0x90E8 ,
0x6016 , 0x248F , 0x2490 , 0x000A , 0x2491 , 0x2492 , 0x2493 , 0x2494 , 0x2495 , 0x2496 ,
0x2497 , 0x2498 , 0x2499 , 0x000A , 0x249A , 0x249B , 0x2474 , 0x2475 , 0x2476 , 0x2477 ,
0x2478 , 0x2479 , 0x247A , 0x000A , 0x247B , 0x247C , 0x247D , 0x247E , 0x247F , 0x2480 ,
0x2481 , 0x2482 , 0x2483 , 0x000A , 0x0041 , 0x0043 , 0x0044 , 0x0045 , 0x0046 , 0x007E ,
0x0048 , 0x0049 , 0x004A , 0x000A , 0x004B , 0x004C , 0x004D , 0x004E , 0x004F , 0x0050 ,
0x0051 , 0x0052 , 0x0053 , 0x000A , 0x0054 , 0x0055 , 0x0056 , 0x0057 , 0x0058 , 0x0059 ,
0x005A , 0x005B , 0x005C , 0x000A
2000-10-17 08:05:02 +00:00
} ;
2000-11-17 03:03:14 +00:00
const UChar * uSource ;
const UChar * uSourceLimit ;
const char * cSource ;
const char * cSourceLimit ;
UChar * uTargetLimit = NULL ;
UChar * uTarget ;
char * cTarget ;
const char * cTargetLimit ;
2001-04-18 19:31:05 +00:00
char * cBuf ;
2000-11-17 03:03:14 +00:00
UChar * uBuf , * test ;
2000-10-17 08:05:02 +00:00
int32_t uBufSize = 120 ;
UErrorCode errorCode = U_ZERO_ERROR ;
UConverter * cnv ;
2000-11-17 03:03:14 +00:00
int32_t * offsets = ( int32_t * ) malloc ( uBufSize * sizeof ( int32_t ) * 5 ) ;
int32_t * myOff = offsets ;
2000-10-17 08:05:02 +00:00
cnv = ucnv_open ( " HZ " , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
2002-09-20 21:55:31 +00:00
log_data_err ( " Unable to open HZ converter: %s \n " , u_errorName ( errorCode ) ) ;
2000-10-17 08:05:02 +00:00
return ;
}
2000-11-17 03:03:14 +00:00
uBuf = ( UChar * ) malloc ( uBufSize * sizeof ( UChar ) * 5 ) ;
cBuf = ( char * ) malloc ( uBufSize * sizeof ( char ) * 5 ) ;
2005-11-08 22:44:37 +00:00
uSource = ( const UChar * ) in ;
uSourceLimit = ( const UChar * ) in + ( sizeof ( in ) / sizeof ( in [ 0 ] ) ) ;
2000-11-17 03:03:14 +00:00
cTarget = cBuf ;
cTargetLimit = cBuf + uBufSize * 5 ;
uTarget = uBuf ;
uTargetLimit = uBuf + uBufSize * 5 ;
ucnv_fromUnicode ( cnv , & cTarget , cTargetLimit , & uSource , uSourceLimit , myOff , TRUE , & errorCode ) ;
2000-10-17 08:05:02 +00:00
if ( U_FAILURE ( errorCode ) ) {
log_err ( " ucnv_fromUnicode conversion failed reason %s \n " , u_errorName ( errorCode ) ) ;
return ;
}
cSource = cBuf ;
2000-11-17 03:03:14 +00:00
cSourceLimit = cTarget ;
test = uBuf ;
myOff = offsets ;
ucnv_toUnicode ( cnv , & uTarget , uTargetLimit , & cSource , cSourceLimit , myOff , TRUE , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
2000-10-17 08:05:02 +00:00
log_err ( " ucnv_toUnicode conversion failed reason %s \n " , u_errorName ( errorCode ) ) ;
return ;
}
2005-11-08 22:44:37 +00:00
uSource = ( const UChar * ) in ;
2000-11-17 03:03:14 +00:00
while ( uSource < uSourceLimit ) {
if ( * test ! = * uSource ) {
2001-04-18 19:31:05 +00:00
2000-11-17 03:03:14 +00:00
log_err ( " Expected : \\ u%04X \t Got: \\ u%04X \n " , * uSource , ( int ) * test ) ;
}
2000-12-08 01:13:38 +00:00
uSource + + ;
test + + ;
2000-11-17 03:03:14 +00:00
}
TestGetNextUChar2022 ( cnv , cBuf , cTarget , in , " HZ encoding " ) ;
2005-11-08 22:44:37 +00:00
TestSmallTargetBuffer ( in , ( const UChar * ) in + ( sizeof ( in ) / sizeof ( in [ 0 ] ) ) , cnv ) ;
TestSmallSourceBuffer ( in , ( const UChar * ) in + ( sizeof ( in ) / sizeof ( in [ 0 ] ) ) , cnv ) ;
TestToAndFromUChars ( in , ( const UChar * ) in + ( sizeof ( in ) / sizeof ( in [ 0 ] ) ) , cnv ) ;
2001-04-07 01:25:14 +00:00
TestJitterbug930 ( " csISO2022JP " ) ;
2000-11-17 03:03:14 +00:00
ucnv_close ( cnv ) ;
free ( offsets ) ;
free ( uBuf ) ;
free ( cBuf ) ;
2000-09-21 00:35:06 +00:00
}
2002-07-29 21:04:18 +00:00
static void
2001-07-14 02:29:21 +00:00
TestISCII ( ) {
/* test input */
static const uint16_t in [ ] = {
/* test full range of Devanagari */
0x0901 , 0x0902 , 0x0903 , 0x0905 , 0x0906 , 0x0907 , 0x0908 , 0x0909 , 0x090A ,
0x090B , 0x090E , 0x090F , 0x0910 , 0x090D , 0x0912 , 0x0913 , 0x0914 , 0x0911 ,
0x0915 , 0x0916 , 0x0917 , 0x0918 , 0x0919 , 0x091A , 0x091B , 0x091C , 0x091D ,
0x091E , 0x091F , 0x0920 , 0x0921 , 0x0922 , 0x0923 , 0x0924 , 0x0925 , 0x0926 ,
0x0927 , 0x0928 , 0x0929 , 0x092A , 0x092B , 0x092C , 0x092D , 0x092E , 0x092F ,
0x095F , 0x0930 , 0x0931 , 0x0932 , 0x0933 , 0x0934 , 0x0935 , 0x0936 , 0x0937 ,
0x0938 , 0x0939 , 0x200D , 0x093E , 0x093F , 0x0940 , 0x0941 , 0x0942 , 0x0943 ,
0x0946 , 0x0947 , 0x0948 , 0x0945 , 0x094A , 0x094B , 0x094C , 0x0949 , 0x094D ,
2002-04-04 03:08:38 +00:00
0x093d , 0x0966 , 0x0967 , 0x0968 , 0x0969 , 0x096A , 0x096B , 0x096C ,
2001-07-14 02:29:21 +00:00
0x096D , 0x096E , 0x096F ,
/* test Soft halant*/
0x0915 , 0x094d , 0x200D ,
/* test explicit halant */
0x0915 , 0x094d , 0x200c ,
/* test double danda */
0x965 ,
/* test ASCII */
0x1B , 0x24 , 0x29 , 0x47 , 0x0E , 0x23 , 0x21 , 0x23 , 0x22 , 0x23 ,
0x23 , 0x23 , 0x24 , 0x23 , 0x25 , 0x23 , 0x26 , 0x23 , 0x27 , 0x23 ,
0x28 , 0x23 , 0x29 , 0x23 , 0x2A , 0x23 , 0x2B , 0x0F , 0x2F , 0x2A ,
/* tests from Lotus */
0x0061 , 0x0915 , 0x000D , 0x000A , 0x0996 , 0x0043 ,
0x0930 , 0x094D , 0x200D ,
0x0901 , 0x000D , 0x000A , 0x0905 , 0x0985 , 0x0043 ,
0x0915 , 0x0921 , 0x002B , 0x095F ,
/* tamil range */
0x0B86 , 0xB87 , 0xB88 ,
/* telugu range */
0x0C05 , 0x0C02 , 0x0C03 , 0x0c31 ,
/* kannada range */
2001-07-24 18:10:05 +00:00
0x0C85 , 0xC82 , 0x0C83 ,
/* test Abbr sign and Anudatta */
0x0970 , 0x952 ,
2002-04-04 03:08:38 +00:00
/* 0x0958,
0x0959 ,
0x095A ,
0x095B ,
0x095C ,
0x095D ,
0x095E ,
0x095F , */
0x0960 /* Vocallic RRI 0xAB, 0xE9*/ ,
0x0944 /* Vowel Sign Vocallic RRI 0xDF, 0xE9 */ ,
0x090C ,
0x0962 ,
0x0961 /* Vocallic LL 0xa6, 0xE9 */ ,
0x0963 /* Vowel Sign Vocallic LL 0xdb, 0xE9, */ ,
0x0950 /* OM Symbol 0xa1, 0xE9,*/ ,
2002-07-29 21:04:18 +00:00
0x093D /* Avagraha 0xEA, 0xE9*/ ,
2002-04-04 03:08:38 +00:00
0x0958 ,
0x0959 ,
0x095A ,
0x095B ,
0x095C ,
0x095D ,
0x095E ,
2003-04-02 02:59:14 +00:00
0x0020 , 0x094D , 0x0930 , 0x0000 , 0x00A0
2001-07-14 02:29:21 +00:00
} ;
static const unsigned char byteArr [ ] = {
2002-06-22 00:20:43 +00:00
2001-07-14 02:29:21 +00:00
0xa1 , 0xa2 , 0xa3 , 0xa4 , 0xa5 , 0xa6 , 0xa7 , 0xa8 , 0xa9 ,
0xaa , 0xab , 0xac , 0xad , 0xae , 0xaf , 0xb0 , 0xb1 , 0xb2 ,
0xb3 , 0xb4 , 0xb5 , 0xb6 , 0xb7 , 0xb8 , 0xb9 , 0xba , 0xbb ,
0xbc , 0xbd , 0xbe , 0xbf , 0xc0 , 0xc1 , 0xc2 , 0xc3 , 0xc4 ,
0xc5 , 0xc6 , 0xc7 , 0xc8 , 0xc9 , 0xca , 0xcb , 0xcc , 0xcd ,
0xce , 0xcf , 0xd0 , 0xd1 , 0xd2 , 0xd3 , 0xd4 , 0xd5 , 0xd6 ,
0xd7 , 0xd8 , 0xd9 , 0xda , 0xdb , 0xdc , 0xdd , 0xde , 0xdf ,
0xe0 , 0xe1 , 0xe2 , 0xe3 , 0xe4 , 0xe5 , 0xe6 , 0xe7 , 0xe8 ,
0xea , 0xe9 , 0xf1 , 0xf2 , 0xf3 , 0xf4 , 0xf5 , 0xf6 , 0xf7 ,
0xf8 , 0xf9 , 0xfa ,
/* test soft halant */
0xb3 , 0xE8 , 0xE9 ,
/* test explicit halant */
0xb3 , 0xE8 , 0xE8 ,
/* test double danda */
0xea , 0xea ,
/* test ASCII */
0x1B , 0x24 , 0x29 , 0x47 , 0x0E , 0x23 , 0x21 , 0x23 , 0x22 , 0x23 ,
0x23 , 0x23 , 0x24 , 0x23 , 0x25 , 0x23 , 0x26 , 0x23 , 0x27 , 0x23 ,
0x28 , 0x23 , 0x29 , 0x23 , 0x2A , 0x23 , 0x2B , 0x0F , 0x2F , 0x2A ,
/* test ATR code */
/* tests from Lotus */
0x61 , 0xEF , 0x42 , 0xEF , 0x30 , 0xB3 , 0x0D , 0x0A , 0xEF , 0x43 , 0xB4 , 0x43 ,
0xEF , 0x42 , 0xCF , 0xE8 , 0xD9 ,
0xEF , 0x42 , 0xA1 , 0x0D , 0x0A , 0xEF , 0x42 , 0xA4 , 0xEF , 0x43 , 0xA4 , 0x43 ,
0xEF , 0x42 , 0xB3 , 0xBF , 0x2B , 0xEF , 0x42 , 0xCE ,
/* tamil range */
0xEF , 0x44 , 0xa5 , 0xa6 , 0xa7 ,
/* telugu range */
0xEF , 0x45 , 0xa4 , 0xa2 , 0xa3 , 0xd0 ,
/* kannada range */
2001-07-24 18:10:05 +00:00
0xEF , 0x48 , 0xa4 , 0xa2 , 0xa3 ,
/* anudatta and abbreviation sign */
2002-04-04 03:08:38 +00:00
0xEF , 0x42 , 0xF0 , 0xBF , 0xF0 , 0xB8 ,
2002-07-29 21:04:18 +00:00
2002-04-04 03:08:38 +00:00
0xAA , 0xE9 , /* RI + NUKTA 0x0960*/
2002-07-29 21:04:18 +00:00
2002-04-04 03:08:38 +00:00
0xDF , 0xE9 , /* Vowel sign RI + NUKTA 0x0944*/
2002-07-29 21:04:18 +00:00
2002-04-04 03:08:38 +00:00
0xa6 , 0xE9 , /* Vowel I + NUKTA 0x090C*/
2002-07-29 21:04:18 +00:00
2002-04-04 03:08:38 +00:00
0xdb , 0xE9 , /* Vowel sign I + Nukta 0x0962*/
2002-07-29 21:04:18 +00:00
2002-04-04 03:08:38 +00:00
0xa7 , 0xE9 , /* Vowel II + NUKTA 0x0961*/
2002-07-29 21:04:18 +00:00
2002-04-04 03:08:38 +00:00
0xdc , 0xE9 , /* Vowel sign II + Nukta 0x0963*/
2002-07-29 21:04:18 +00:00
2002-04-04 03:08:38 +00:00
0xa1 , 0xE9 , /* chandrabindu + Nukta 0x0950*/
2002-07-29 21:04:18 +00:00
2002-04-04 03:08:38 +00:00
0xEA , 0xE9 , /* Danda + Nukta 0x093D*/
2002-07-29 21:04:18 +00:00
2002-04-04 03:08:38 +00:00
0xB3 , 0xE9 , /* Ka + NUKTA */
2002-07-29 21:04:18 +00:00
2002-04-04 03:08:38 +00:00
0xB4 , 0xE9 , /* Kha + NUKTA */
2002-07-29 21:04:18 +00:00
2002-04-04 03:08:38 +00:00
0xB5 , 0xE9 , /* Ga + NUKTA */
0xBA , 0xE9 ,
0xBF , 0xE9 ,
2002-07-29 21:04:18 +00:00
2002-04-04 03:08:38 +00:00
0xC0 , 0xE9 ,
0xC9 , 0xE9 ,
2002-06-22 00:20:43 +00:00
/* INV halant RA */
0xD9 , 0xE8 , 0xCF ,
2003-04-02 02:59:14 +00:00
0x00 , 0x00A0 ,
/* just consume unhandled codepoints */
0xEF , 0x30 ,
2001-07-14 02:29:21 +00:00
} ;
2002-06-22 00:20:43 +00:00
testConvertToU ( byteArr , ( sizeof ( byteArr ) ) , in , ( sizeof ( in ) / U_SIZEOF_UCHAR ) , " x-iscii-de " , NULL , TRUE ) ;
2002-07-31 20:58:58 +00:00
TestConv ( in , ( sizeof ( in ) / 2 ) , " ISCII,version=0 " , " hindi " , ( char * ) byteArr , sizeof ( byteArr ) ) ;
2001-07-14 02:29:21 +00:00
}
2002-07-31 20:58:58 +00:00
2000-11-21 04:05:39 +00:00
static void
2000-10-28 01:08:25 +00:00
TestISO_2022_JP ( ) {
/* test input */
2001-04-18 19:31:05 +00:00
static const uint16_t in [ ] = {
2001-02-23 04:40:39 +00:00
0x0041 , /*0x00E9,*/ 0x3000 , 0x3001 , 0x3002 , 0x0020 , 0x000D , 0x000A ,
2001-04-18 19:31:05 +00:00
0x0043 , 0x0044 , 0x0045 , 0x0046 , 0x0047 , 0x0048 , 0x0049 , 0x004A , 0x000D , 0x000A ,
2000-10-28 01:08:25 +00:00
0x004B , 0x004C , 0x004D , 0x004E , 0x004F , 0x0050 , 0x0051 , 0x0052 , 0x000D , 0x000A ,
2001-04-18 19:31:05 +00:00
0x3005 , 0x3006 , 0x3007 , 0x30FC , 0x2015 , 0x2010 , 0xFF0F , 0x005C , 0x000D , 0x000A ,
2007-10-11 21:52:29 +00:00
0x3013 , 0x2018 , 0x2026 , 0x2025 , 0x2018 , 0x2019 , 0x201C , 0x000D , 0x000A ,
2001-02-23 04:40:39 +00:00
0x201D , 0x3014 , 0x000D , 0x000A ,
2000-10-28 01:08:25 +00:00
0x0053 , 0x0054 , 0x0055 , 0x0056 , 0x0057 , 0x0058 , 0x0059 , 0x005A , 0x000D , 0x000A ,
0x0053 , 0x0054 , 0x0055 , 0x0056 , 0x0057 , 0x0058 , 0x0059 , 0x005A , 0x000D , 0x000A ,
2001-02-23 04:40:39 +00:00
} ;
2000-11-17 03:03:14 +00:00
const UChar * uSource ;
const UChar * uSourceLimit ;
const char * cSource ;
const char * cSourceLimit ;
UChar * uTargetLimit = NULL ;
UChar * uTarget ;
char * cTarget ;
const char * cTargetLimit ;
2001-04-18 19:31:05 +00:00
char * cBuf ;
2000-11-17 03:03:14 +00:00
UChar * uBuf , * test ;
2000-10-28 01:08:25 +00:00
int32_t uBufSize = 120 ;
UErrorCode errorCode = U_ZERO_ERROR ;
UConverter * cnv ;
2000-11-17 03:03:14 +00:00
int32_t * offsets = ( int32_t * ) malloc ( uBufSize * sizeof ( int32_t ) * 5 ) ;
int32_t * myOff = offsets ;
2001-03-13 00:07:23 +00:00
cnv = ucnv_open ( " ISO_2022_JP_1 " , & errorCode ) ;
2000-10-28 01:08:25 +00:00
if ( U_FAILURE ( errorCode ) ) {
2003-12-03 22:53:14 +00:00
log_data_err ( " Unable to open an ISO_2022_JP_1 converter: %s \n " , u_errorName ( errorCode ) ) ;
2000-10-28 01:08:25 +00:00
return ;
}
2000-10-20 02:47:14 +00:00
2000-11-17 03:03:14 +00:00
uBuf = ( UChar * ) malloc ( uBufSize * sizeof ( UChar ) * 5 ) ;
cBuf = ( char * ) malloc ( uBufSize * sizeof ( char ) * 5 ) ;
2005-11-08 22:44:37 +00:00
uSource = ( const UChar * ) in ;
uSourceLimit = ( const UChar * ) in + ( sizeof ( in ) / sizeof ( in [ 0 ] ) ) ;
2000-11-17 03:03:14 +00:00
cTarget = cBuf ;
cTargetLimit = cBuf + uBufSize * 5 ;
uTarget = uBuf ;
uTargetLimit = uBuf + uBufSize * 5 ;
ucnv_fromUnicode ( cnv , & cTarget , cTargetLimit , & uSource , uSourceLimit , myOff , TRUE , & errorCode ) ;
2000-10-28 01:08:25 +00:00
if ( U_FAILURE ( errorCode ) ) {
log_err ( " ucnv_fromUnicode conversion failed reason %s \n " , u_errorName ( errorCode ) ) ;
return ;
}
cSource = cBuf ;
2000-11-17 03:03:14 +00:00
cSourceLimit = cTarget ;
test = uBuf ;
myOff = offsets ;
ucnv_toUnicode ( cnv , & uTarget , uTargetLimit , & cSource , cSourceLimit , myOff , TRUE , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
2000-10-28 01:08:25 +00:00
log_err ( " ucnv_toUnicode conversion failed reason %s \n " , u_errorName ( errorCode ) ) ;
return ;
}
2000-11-17 03:03:14 +00:00
2005-11-08 22:44:37 +00:00
uSource = ( const UChar * ) in ;
2000-11-17 03:03:14 +00:00
while ( uSource < uSourceLimit ) {
if ( * test ! = * uSource ) {
2001-04-18 19:31:05 +00:00
2000-11-17 03:03:14 +00:00
log_err ( " Expected : \\ u%04X \t Got: \\ u%04X \n " , * uSource , ( int ) * test ) ;
}
2000-12-08 01:13:38 +00:00
uSource + + ;
test + + ;
2000-11-17 03:03:14 +00:00
}
2005-11-08 22:44:37 +00:00
TestSmallTargetBuffer ( in , ( const UChar * ) in + ( sizeof ( in ) / sizeof ( in [ 0 ] ) ) , cnv ) ;
TestSmallSourceBuffer ( in , ( const UChar * ) in + ( sizeof ( in ) / sizeof ( in [ 0 ] ) ) , cnv ) ;
2001-04-18 19:31:05 +00:00
TestGetNextUChar2022 ( cnv , cBuf , cTarget , in , " ISO-2022-JP encoding " ) ;
2005-11-08 22:44:37 +00:00
TestToAndFromUChars ( in , ( const UChar * ) in + ( sizeof ( in ) / sizeof ( in [ 0 ] ) ) , cnv ) ;
2001-04-07 01:25:14 +00:00
TestJitterbug930 ( " csISO2022JP " ) ;
2000-11-17 03:03:14 +00:00
ucnv_close ( cnv ) ;
free ( uBuf ) ;
free ( cBuf ) ;
free ( offsets ) ;
2000-10-28 01:08:25 +00:00
}
2000-11-17 03:03:14 +00:00
2001-07-14 02:29:21 +00:00
static void TestConv ( const uint16_t in [ ] , int len , const char * conv , const char * lang , char byteArr [ ] , int byteArrLen ) {
2001-03-02 23:55:49 +00:00
const UChar * uSource ;
const UChar * uSourceLimit ;
const char * cSource ;
const char * cSourceLimit ;
UChar * uTargetLimit = NULL ;
UChar * uTarget ;
char * cTarget ;
const char * cTargetLimit ;
2001-04-18 19:31:05 +00:00
char * cBuf ;
2001-03-02 23:55:49 +00:00
UChar * uBuf , * test ;
2002-03-25 22:41:39 +00:00
int32_t uBufSize = 120 * 10 ;
2001-03-02 23:55:49 +00:00
UErrorCode errorCode = U_ZERO_ERROR ;
UConverter * cnv ;
2002-03-25 22:41:39 +00:00
int32_t * offsets = ( int32_t * ) malloc ( uBufSize * sizeof ( int32_t ) ) ;
2001-03-02 23:55:49 +00:00
int32_t * myOff = offsets ;
2002-07-17 02:41:04 +00:00
cnv = my_ucnv_open ( conv , & errorCode ) ;
2001-03-02 23:55:49 +00:00
if ( U_FAILURE ( errorCode ) ) {
2002-09-20 19:07:19 +00:00
log_data_err ( " Unable to open a %s converter: %s \n " , conv , u_errorName ( errorCode ) ) ;
2001-03-02 23:55:49 +00:00
return ;
}
2002-03-25 22:41:39 +00:00
uBuf = ( UChar * ) malloc ( uBufSize * sizeof ( UChar ) ) ;
cBuf = ( char * ) malloc ( uBufSize * sizeof ( char ) ) ;
2005-11-08 22:44:37 +00:00
uSource = ( const UChar * ) in ;
2001-03-02 23:55:49 +00:00
uSourceLimit = uSource + len ;
cTarget = cBuf ;
2002-03-25 22:41:39 +00:00
cTargetLimit = cBuf + uBufSize ;
2001-03-02 23:55:49 +00:00
uTarget = uBuf ;
2002-03-25 22:41:39 +00:00
uTargetLimit = uBuf + uBufSize ;
2001-03-02 23:55:49 +00:00
ucnv_fromUnicode ( cnv , & cTarget , cTargetLimit , & uSource , uSourceLimit , myOff , TRUE , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
log_err ( " ucnv_fromUnicode conversion failed reason %s \n " , u_errorName ( errorCode ) ) ;
return ;
}
2002-03-25 22:41:39 +00:00
/*log_verbose("length of compressed string for language %s using %s:%i \n",conv,lang,(cTarget-cBuf));*/
2001-03-02 23:55:49 +00:00
cSource = cBuf ;
cSourceLimit = cTarget ;
test = uBuf ;
myOff = offsets ;
ucnv_toUnicode ( cnv , & uTarget , uTargetLimit , & cSource , cSourceLimit , myOff , TRUE , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
2002-09-20 19:07:19 +00:00
log_err ( " ucnv_toUnicode conversion failed, reason: %s \n " , u_errorName ( errorCode ) ) ;
2001-03-02 23:55:49 +00:00
return ;
}
2005-11-08 22:44:37 +00:00
uSource = ( const UChar * ) in ;
2001-03-02 23:55:49 +00:00
while ( uSource < uSourceLimit ) {
if ( * test ! = * uSource ) {
2002-09-20 19:07:19 +00:00
log_err ( " for codepage %s : Expected : \\ u%04X \t Got: \\ u%04X \n " , conv , * uSource , ( int ) * test ) ;
2001-03-02 23:55:49 +00:00
}
uSource + + ;
test + + ;
}
2005-11-08 22:44:37 +00:00
TestSmallTargetBuffer ( in , ( const UChar * ) & in [ len ] , cnv ) ;
TestSmallSourceBuffer ( in , ( const UChar * ) & in [ len ] , cnv ) ;
2001-03-02 23:55:49 +00:00
TestGetNextUChar2022 ( cnv , cBuf , cTarget , in , conv ) ;
2001-07-14 02:29:21 +00:00
if ( byteArr & & byteArrLen ! = 0 ) {
TestGetNextUChar2022 ( cnv , byteArr , ( byteArr + byteArrLen ) , in , lang ) ;
2005-11-08 22:44:37 +00:00
TestToAndFromUChars ( in , ( const UChar * ) & in [ len ] , cnv ) ;
2001-07-14 02:29:21 +00:00
{
cSource = byteArr ;
cSourceLimit = cSource + byteArrLen ;
test = uBuf ;
myOff = offsets ;
ucnv_toUnicode ( cnv , & uTarget , uTargetLimit , & cSource , cSourceLimit , myOff , TRUE , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
log_err ( " ucnv_toUnicode conversion failed reason %s \n " , u_errorName ( errorCode ) ) ;
return ;
}
2001-03-02 23:55:49 +00:00
2005-11-08 22:44:37 +00:00
uSource = ( const UChar * ) in ;
2001-07-14 02:29:21 +00:00
while ( uSource < uSourceLimit ) {
if ( * test ! = * uSource ) {
log_err ( " Expected : \\ u%04X \t Got: \\ u%04X \n " , * uSource , ( int ) * test ) ;
}
uSource + + ;
test + + ;
2001-03-02 23:55:49 +00:00
}
}
}
2001-07-14 02:29:21 +00:00
2001-03-02 23:55:49 +00:00
ucnv_close ( cnv ) ;
free ( uBuf ) ;
free ( cBuf ) ;
free ( offsets ) ;
}
2002-07-23 23:01:08 +00:00
static UChar U_CALLCONV
2002-03-25 22:41:39 +00:00
_charAt ( int32_t offset , void * context ) {
return ( ( char * ) context ) [ offset ] ;
}
static int32_t
unescape ( UChar * dst , int32_t dstLen , const char * src , int32_t srcLen , UErrorCode * status ) {
int32_t srcIndex = 0 ;
int32_t dstIndex = 0 ;
if ( U_FAILURE ( * status ) ) {
return 0 ;
}
if ( ( dst = = NULL & & dstLen > 0 ) | | ( src = = NULL ) | | dstLen < - 1 | | srcLen < - 1 ) {
* status = U_ILLEGAL_ARGUMENT_ERROR ;
return 0 ;
}
if ( srcLen = = - 1 ) {
2004-12-08 23:02:08 +00:00
srcLen = ( int32_t ) uprv_strlen ( src ) ;
2002-03-25 22:41:39 +00:00
}
for ( ; srcIndex < srcLen ; ) {
UChar32 c = src [ srcIndex + + ] ;
if ( c = = 0x005C /*'\\'*/ ) {
c = u_unescapeAt ( _charAt , & srcIndex , srcLen , ( void * ) src ) ; /* advances i*/
if ( c = = ( UChar32 ) 0xFFFFFFFF ) {
* status = U_INVALID_CHAR_FOUND ; /* return empty string */
break ; /* invalid escape sequence */
}
}
if ( dstIndex < dstLen ) {
if ( c > 0xFFFF ) {
dst [ dstIndex + + ] = UTF16_LEAD ( c ) ;
if ( dstIndex < dstLen ) {
dst [ dstIndex ] = UTF16_TRAIL ( c ) ;
} else {
* status = U_BUFFER_OVERFLOW_ERROR ;
}
} else {
dst [ dstIndex ] = ( UChar ) c ;
}
} else {
* status = U_BUFFER_OVERFLOW_ERROR ;
}
dstIndex + + ; /* for preflighting */
}
return dstIndex ;
}
2002-03-25 22:51:09 +00:00
static void
TestFullRoundtrip ( const char * cp ) {
UChar usource [ 10 ] = { 0 } ;
UChar nsrc [ 10 ] = { 0 } ;
uint32_t i = 1 ;
2002-05-25 00:30:31 +00:00
int len = 0 , ulen ;
2002-03-25 22:51:09 +00:00
nsrc [ 0 ] = 0x0061 ;
/* Test codepoint 0 */
TestConv ( usource , 1 , cp , " " , NULL , 0 ) ;
TestConv ( usource , 2 , cp , " " , NULL , 0 ) ;
nsrc [ 2 ] = 0x5555 ;
TestConv ( nsrc , 3 , cp , " " , NULL , 0 ) ;
for ( ; i < = 0x10FFFF ; i + + ) {
2002-05-25 00:30:31 +00:00
if ( i = = 0xD800 ) {
i = 0xDFFF ;
2002-03-25 22:51:09 +00:00
continue ;
}
if ( i < = 0xFFFF ) {
usource [ 0 ] = ( UChar ) i ;
len = 1 ;
} else {
usource [ 0 ] = UTF16_LEAD ( i ) ;
usource [ 1 ] = UTF16_TRAIL ( i ) ;
len = 2 ;
}
2002-05-25 00:30:31 +00:00
ulen = len ;
if ( i = = 0x80 ) {
usource [ 2 ] = 0 ;
}
2002-03-25 22:51:09 +00:00
/* Test only single code points */
2002-05-25 00:30:31 +00:00
TestConv ( usource , ulen , cp , " " , NULL , 0 ) ;
2002-03-25 22:51:09 +00:00
/* Test codepoint repeated twice */
2002-05-25 00:30:31 +00:00
usource [ ulen ] = usource [ 0 ] ;
usource [ ulen + 1 ] = usource [ 1 ] ;
ulen + = len ;
TestConv ( usource , ulen , cp , " " , NULL , 0 ) ;
2002-03-25 22:51:09 +00:00
/* Test codepoint repeated 3 times */
2002-05-25 00:30:31 +00:00
usource [ ulen ] = usource [ 0 ] ;
usource [ ulen + 1 ] = usource [ 1 ] ;
ulen + = len ;
TestConv ( usource , ulen , cp , " " , NULL , 0 ) ;
2002-03-25 22:51:09 +00:00
/* Test codepoint in between 2 codepoints */
2002-05-25 00:30:31 +00:00
nsrc [ 1 ] = usource [ 0 ] ;
nsrc [ 2 ] = usource [ 1 ] ;
2002-03-25 22:51:09 +00:00
nsrc [ len + 1 ] = 0x5555 ;
2002-07-29 21:04:18 +00:00
TestConv ( nsrc , len + 2 , cp , " " , NULL , 0 ) ;
2002-03-25 22:51:09 +00:00
uprv_memset ( usource , 0 , sizeof ( UChar ) * 10 ) ;
}
}
static void
2002-03-28 18:26:25 +00:00
TestRoundTrippingAllUTF ( void ) {
2010-04-07 16:18:38 +00:00
if ( ! getTestOption ( QUICK_OPTION ) ) {
2002-05-25 00:30:31 +00:00
log_verbose ( " Running exhaustive round trip test for BOCU-1 \n " ) ;
TestFullRoundtrip ( " BOCU-1 " ) ;
2002-03-25 22:51:09 +00:00
log_verbose ( " Running exhaustive round trip test for SCSU \n " ) ;
TestFullRoundtrip ( " SCSU " ) ;
log_verbose ( " Running exhaustive round trip test for UTF-8 \n " ) ;
TestFullRoundtrip ( " UTF-8 " ) ;
2002-07-02 22:52:30 +00:00
log_verbose ( " Running exhaustive round trip test for CESU-8 \n " ) ;
TestFullRoundtrip ( " CESU-8 " ) ;
2002-03-25 22:51:09 +00:00
log_verbose ( " Running exhaustive round trip test for UTF-16BE \n " ) ;
TestFullRoundtrip ( " UTF-16BE " ) ;
log_verbose ( " Running exhaustive round trip test for UTF-16LE \n " ) ;
TestFullRoundtrip ( " UTF-16LE " ) ;
2002-06-11 04:45:37 +00:00
log_verbose ( " Running exhaustive round trip test for UTF-16 \n " ) ;
TestFullRoundtrip ( " UTF-16 " ) ;
2002-03-25 22:51:09 +00:00
log_verbose ( " Running exhaustive round trip test for UTF-32BE \n " ) ;
TestFullRoundtrip ( " UTF-32BE " ) ;
log_verbose ( " Running exhaustive round trip test for UTF-32LE \n " ) ;
TestFullRoundtrip ( " UTF-32LE " ) ;
2002-06-11 04:45:37 +00:00
log_verbose ( " Running exhaustive round trip test for UTF-32 \n " ) ;
TestFullRoundtrip ( " UTF-32 " ) ;
2002-03-25 22:51:09 +00:00
log_verbose ( " Running exhaustive round trip test for UTF-7 \n " ) ;
TestFullRoundtrip ( " UTF-7 " ) ;
log_verbose ( " Running exhaustive round trip test for UTF-7 \n " ) ;
TestFullRoundtrip ( " UTF-7,version=1 " ) ;
2002-11-07 21:02:24 +00:00
log_verbose ( " Running exhaustive round trip test for IMAP-mailbox-name \n " ) ;
TestFullRoundtrip ( " IMAP-mailbox-name " ) ;
2011-01-19 03:30:52 +00:00
/*
*
* With the update to GB18030 2005 ( Ticket # 8274 ) , this test will fail because the 2005 version of
* GB18030 contains mappings to actual Unicode codepoints ( which were previously mapped to PUA ) .
* The old mappings remain as fallbacks .
* This test may be reintroduced at a later time .
*
* 110118 - mow
*/
/*
log_verbose ( " Running exhaustive round trip test for GB18030 \n " ) ;
TestFullRoundtrip ( " GB18030 " ) ;
*/
2002-03-25 22:51:09 +00:00
}
}
2002-03-25 22:41:39 +00:00
2001-03-02 23:55:49 +00:00
static void
TestSCSU ( ) {
2001-04-18 19:31:05 +00:00
2002-03-28 18:26:25 +00:00
static const uint16_t germanUTF16 [ ] = {
2001-03-02 23:55:49 +00:00
0x00d6 , 0x006c , 0x0020 , 0x0066 , 0x006c , 0x0069 , 0x0065 , 0x00df , 0x0074
} ;
2002-03-28 18:26:25 +00:00
static const uint8_t germanSCSU [ ] = {
2001-03-02 23:55:49 +00:00
0xd6 , 0x6c , 0x20 , 0x66 , 0x6c , 0x69 , 0x65 , 0xdf , 0x74
} ;
2002-03-28 18:26:25 +00:00
static const uint16_t russianUTF16 [ ] = {
2001-03-02 23:55:49 +00:00
0x041c , 0x043e , 0x0441 , 0x043a , 0x0432 , 0x0430
} ;
2002-03-28 18:26:25 +00:00
static const uint8_t russianSCSU [ ] = {
2001-03-02 23:55:49 +00:00
0x12 , 0x9c , 0xbe , 0xc1 , 0xba , 0xb2 , 0xb0
} ;
2002-03-28 18:26:25 +00:00
static const uint16_t japaneseUTF16 [ ] = {
2001-03-02 23:55:49 +00:00
0x3000 , 0x266a , 0x30ea , 0x30f3 , 0x30b4 , 0x53ef , 0x611b ,
0x3044 , 0x3084 , 0x53ef , 0x611b , 0x3044 , 0x3084 , 0x30ea , 0x30f3 ,
0x30b4 , 0x3002 , 0x534a , 0x4e16 , 0x7d00 , 0x3082 , 0x524d , 0x306b ,
0x6d41 , 0x884c , 0x3057 , 0x305f , 0x300c , 0x30ea , 0x30f3 , 0x30b4 ,
0x306e , 0x6b4c , 0x300d , 0x304c , 0x3074 , 0x3063 , 0x305f , 0x308a ,
0x3059 , 0x308b , 0x304b , 0x3082 , 0x3057 , 0x308c , 0x306a , 0x3044 ,
0x3002 , 0x7c73 , 0x30a2 , 0x30c3 , 0x30d7 , 0x30eb , 0x30b3 , 0x30f3 ,
0x30d4 , 0x30e5 , 0x30fc , 0x30bf , 0x793e , 0x306e , 0x30d1 , 0x30bd ,
0x30b3 , 0x30f3 , 0x300c , 0x30de , 0x30c3 , 0x30af , 0xff08 , 0x30de ,
0x30c3 , 0x30ad , 0x30f3 , 0x30c8 , 0x30c3 , 0x30b7 , 0x30e5 , 0xff09 ,
0x300d , 0x3092 , 0x3001 , 0x3053 , 0x3088 , 0x306a , 0x304f , 0x611b ,
0x3059 , 0x308b , 0x4eba , 0x305f , 0x3061 , 0x306e , 0x3053 , 0x3068 ,
0x3060 , 0x3002 , 0x300c , 0x30a2 , 0x30c3 , 0x30d7 , 0x30eb , 0x4fe1 ,
0x8005 , 0x300d , 0x306a , 0x3093 , 0x3066 , 0x8a00 , 0x3044 , 0x65b9 ,
0x307e , 0x3067 , 0x3042 , 0x308b , 0x3002
} ;
2001-03-05 18:39:03 +00:00
/* SCSUEncoder produces a slightly longer result (179B vs. 178B) because of one different choice:
it uses an SQn once where a longer look - ahead could have shown that SCn is more efficient */
2002-03-28 18:26:25 +00:00
static const uint8_t japaneseSCSU [ ] = {
2001-03-02 23:55:49 +00:00
0x08 , 0x00 , 0x1b , 0x4c , 0xea , 0x16 , 0xca , 0xd3 , 0x94 , 0x0f , 0x53 , 0xef , 0x61 , 0x1b , 0xe5 , 0x84 ,
0xc4 , 0x0f , 0x53 , 0xef , 0x61 , 0x1b , 0xe5 , 0x84 , 0xc4 , 0x16 , 0xca , 0xd3 , 0x94 , 0x08 , 0x02 , 0x0f ,
0x53 , 0x4a , 0x4e , 0x16 , 0x7d , 0x00 , 0x30 , 0x82 , 0x52 , 0x4d , 0x30 , 0x6b , 0x6d , 0x41 , 0x88 , 0x4c ,
0xe5 , 0x97 , 0x9f , 0x08 , 0x0c , 0x16 , 0xca , 0xd3 , 0x94 , 0x15 , 0xae , 0x0e , 0x6b , 0x4c , 0x08 , 0x0d ,
0x8c , 0xb4 , 0xa3 , 0x9f , 0xca , 0x99 , 0xcb , 0x8b , 0xc2 , 0x97 , 0xcc , 0xaa , 0x84 , 0x08 , 0x02 , 0x0e ,
0x7c , 0x73 , 0xe2 , 0x16 , 0xa3 , 0xb7 , 0xcb , 0x93 , 0xd3 , 0xb4 , 0xc5 , 0xdc , 0x9f , 0x0e , 0x79 , 0x3e ,
0x06 , 0xae , 0xb1 , 0x9d , 0x93 , 0xd3 , 0x08 , 0x0c , 0xbe , 0xa3 , 0x8f , 0x08 , 0x88 , 0xbe , 0xa3 , 0x8d ,
0xd3 , 0xa8 , 0xa3 , 0x97 , 0xc5 , 0x17 , 0x89 , 0x08 , 0x0d , 0x15 , 0xd2 , 0x08 , 0x01 , 0x93 , 0xc8 , 0xaa ,
0x8f , 0x0e , 0x61 , 0x1b , 0x99 , 0xcb , 0x0e , 0x4e , 0xba , 0x9f , 0xa1 , 0xae , 0x93 , 0xa8 , 0xa0 , 0x08 ,
0x02 , 0x08 , 0x0c , 0xe2 , 0x16 , 0xa3 , 0xb7 , 0xcb , 0x0f , 0x4f , 0xe1 , 0x80 , 0x05 , 0xec , 0x60 , 0x8d ,
0xea , 0x06 , 0xd3 , 0xe6 , 0x0f , 0x8a , 0x00 , 0x30 , 0x44 , 0x65 , 0xb9 , 0xe4 , 0xfe , 0xe7 , 0xc2 , 0x06 ,
0xcb , 0x82
} ;
2002-03-28 18:26:25 +00:00
static const uint16_t allFeaturesUTF16 [ ] = {
2001-03-02 23:55:49 +00:00
0x0041 , 0x00df , 0x0401 , 0x015f , 0x00df , 0x01df , 0xf000 , 0xdbff ,
2001-04-18 19:31:05 +00:00
0xdfff , 0x000d , 0x000a , 0x0041 , 0x00df , 0x0401 , 0x015f , 0x00df ,
2001-03-02 23:55:49 +00:00
0x01df , 0xf000 , 0xdbff , 0xdfff
} ;
2002-03-28 21:51:14 +00:00
2001-03-02 23:55:49 +00:00
/* see comment at japaneseSCSU: the same kind of different choice yields a slightly shorter
* result here ( 34 B vs . 35 B )
*/
2002-03-28 18:26:25 +00:00
static const uint8_t allFeaturesSCSU [ ] = {
2001-04-18 19:31:05 +00:00
0x41 , 0xdf , 0x12 , 0x81 , 0x03 , 0x5f , 0x10 , 0xdf , 0x1b , 0x03 ,
2001-03-02 23:55:49 +00:00
0xdf , 0x1c , 0x88 , 0x80 , 0x0b , 0xbf , 0xff , 0xff , 0x0d , 0x0a ,
0x41 , 0x10 , 0xdf , 0x12 , 0x81 , 0x03 , 0x5f , 0x10 , 0xdf , 0x13 ,
2001-04-18 19:31:05 +00:00
0xdf , 0x14 , 0x80 , 0x15 , 0xff
2001-03-02 23:55:49 +00:00
} ;
2001-07-14 02:29:21 +00:00
static const uint16_t monkeyIn [ ] = {
0x00A8 , 0x3003 , 0x3005 , 0x2015 , 0xFF5E , 0x2016 , 0x2026 , 0x2018 , 0x000D , 0x000A ,
0x2019 , 0x201C , 0x201D , 0x3014 , 0x3015 , 0x3008 , 0x3009 , 0x300A , 0x000D , 0x000A ,
0x300B , 0x300C , 0x300D , 0x300E , 0x300F , 0x3016 , 0x3017 , 0x3010 , 0x000D , 0x000A ,
0x3011 , 0x00B1 , 0x00D7 , 0x00F7 , 0x2236 , 0x2227 , 0x7FC1 , 0x8956 , 0x000D , 0x000A ,
0x9D2C , 0x9D0E , 0x9EC4 , 0x5CA1 , 0x6C96 , 0x837B , 0x5104 , 0x5C4B , 0x000D , 0x000A ,
0x61B6 , 0x81C6 , 0x6876 , 0x7261 , 0x4E59 , 0x4FFA , 0x5378 , 0x57F7 , 0x000D , 0x000A ,
0x57F4 , 0x57F9 , 0x57FA , 0x57FC , 0x5800 , 0x5802 , 0x5805 , 0x5806 , 0x000D , 0x000A ,
0x580A , 0x581E , 0x6BB5 , 0x6BB7 , 0x6BBA , 0x6BBC , 0x9CE2 , 0x977C , 0x000D , 0x000A ,
0x6BBF , 0x6BC1 , 0x6BC5 , 0x6BC6 , 0x6BCB , 0x6BCD , 0x6BCF , 0x6BD2 , 0x000D , 0x000A ,
0x6BD3 , 0x6BD4 , 0x6BD6 , 0x6BD7 , 0x6BD8 , 0x6BDB , 0x6BEB , 0x6BEC , 0x000D , 0x000A ,
0x6C05 , 0x6C08 , 0x6C0F , 0x6C11 , 0x6C13 , 0x6C23 , 0x6C34 , 0x0041 , 0x000D , 0x000A ,
0x0043 , 0x0044 , 0x0045 , 0x0046 , 0x0047 , 0x0048 , 0x0049 , 0x004A , 0x000D , 0x000A ,
0x004B , 0x004C , 0x004D , 0x004E , 0x004F , 0x0050 , 0x0051 , 0x0052 , 0x000D , 0x000A ,
0x0053 , 0x0054 , 0x0055 , 0x0056 , 0x0057 , 0x0058 , 0x0059 , 0x005A , 0x000D , 0x000A ,
0x005B , 0x9792 , 0x9CCC , 0x9CCD , 0x9CCE , 0x9CCF , 0x9CD0 , 0x9CD3 , 0x000D , 0x000A ,
0x9CD4 , 0x9CD5 , 0x9CD7 , 0x9CD8 , 0x9CD9 , 0x9CDC , 0x9CDD , 0x9CDF , 0x000D , 0x000A ,
0x9785 , 0x9791 , 0x00BD , 0x0390 , 0x0385 , 0x0386 , 0x0388 , 0x0389 , 0x000D , 0x000A ,
0x038E , 0x038F , 0x0390 , 0x0391 , 0x0392 , 0x0393 , 0x0394 , 0x0395 , 0x000D , 0x000A ,
0x0396 , 0x0397 , 0x0398 , 0x0399 , 0x039A , 0x038A , 0x038C , 0x039C , 0x000D , 0x000A ,
2002-07-29 21:04:18 +00:00
/* test non-BMP code points */
0xD869 , 0xDE99 , 0xD869 , 0xDE9C , 0xD869 , 0xDE9D , 0xD869 , 0xDE9E , 0xD869 , 0xDE9F ,
0xD869 , 0xDEA0 , 0xD869 , 0xDEA5 , 0xD869 , 0xDEA6 , 0xD869 , 0xDEA7 , 0xD869 , 0xDEA8 ,
2001-07-14 02:29:21 +00:00
0xD869 , 0xDEAB , 0xD869 , 0xDEAC , 0xD869 , 0xDEAD , 0xD869 , 0xDEAE , 0xD869 , 0xDEAF ,
2002-07-29 21:04:18 +00:00
0xD869 , 0xDEB0 , 0xD869 , 0xDEB1 , 0xD869 , 0xDEB3 , 0xD869 , 0xDEB5 , 0xD869 , 0xDEB6 ,
0xD869 , 0xDEB7 , 0xD869 , 0xDEB8 , 0xD869 , 0xDEB9 , 0xD869 , 0xDEBA , 0xD869 , 0xDEBB ,
0xD869 , 0xDEBC , 0xD869 , 0xDEBD , 0xD869 , 0xDEBE , 0xD869 , 0xDEBF , 0xD869 , 0xDEC0 ,
0xD869 , 0xDEC1 , 0xD869 , 0xDEC2 , 0xD869 , 0xDEC3 , 0xD869 , 0xDEC4 , 0xD869 , 0xDEC8 ,
0xD869 , 0xDECA , 0xD869 , 0xDECB , 0xD869 , 0xDECD , 0xD869 , 0xDECE , 0xD869 , 0xDECF ,
0xD869 , 0xDED0 , 0xD869 , 0xDED1 , 0xD869 , 0xDED2 , 0xD869 , 0xDED3 , 0xD869 , 0xDED4 ,
2002-03-25 22:41:39 +00:00
0xD869 , 0xDED5 , 0xD800 , 0xDC00 , 0xD800 , 0xDC00 , 0xD800 , 0xDC00 , 0xDBFF , 0xDFFF ,
0xDBFF , 0xDFFF , 0xDBFF , 0xDFFF ,
2001-07-14 02:29:21 +00:00
0x4DB3 , 0x4DB4 , 0x4DB5 , 0x4E00 , 0x4E00 , 0x4E01 , 0x4E02 , 0x4E03 , 0x000D , 0x000A ,
0x0392 , 0x0393 , 0x0394 , 0x0395 , 0x0396 , 0x0397 , 0x33E0 , 0x33E6 , 0x000D , 0x000A ,
0x4E05 , 0x4E07 , 0x4E04 , 0x4E08 , 0x4E08 , 0x4E09 , 0x4E0A , 0x4E0B , 0x000D , 0x000A ,
0x4E0C , 0x0021 , 0x0022 , 0x0023 , 0x0024 , 0xFF40 , 0xFF41 , 0xFF42 , 0x000D , 0x000A ,
0xFF43 , 0xFF44 , 0xFF45 , 0xFF46 , 0xFF47 , 0xFF48 , 0xFF49 , 0xFF4A , 0x000D , 0x000A ,
} ;
2002-03-25 22:41:39 +00:00
static const char * fTestCases [ ] = {
" \\ ud800 \\ udc00 " , /* smallest surrogate*/
" \\ ud8ff \\ udcff " ,
" \\ udBff \\ udFff " , /* largest surrogate pair*/
" \\ ud834 \\ udc00 " ,
2002-04-03 23:38:10 +00:00
" \\ U0010FFFF " ,
2002-03-25 22:41:39 +00:00
" Hello \\ u9292 \\ u9192 World! " ,
" Hell \\ u0429o \\ u9292 \\ u9192 W \\ u00e4rld! " ,
" Hell \\ u0429o \\ u9292 \\ u9292W \\ u00e4rld! " ,
2002-07-29 21:04:18 +00:00
2002-03-25 22:41:39 +00:00
" \\ u0648 \\ u06c8 " , /* catch missing reset*/
" \\ u0648 \\ u06c8 " ,
2002-07-29 21:04:18 +00:00
2002-03-25 22:41:39 +00:00
" \\ u4444 \\ uE001 " , /* lowest quotable*/
" \\ u4444 \\ uf2FF " , /* highest quotable*/
" \\ u4444 \\ uf188 \\ u4444 " ,
" \\ u4444 \\ uf188 \\ uf288 " ,
" \\ u4444 \\ uf188abc \\ u0429 \\ uf288 " ,
" \\ u9292 \\ u2222 " ,
" Hell \\ u0429 \\ u04230o \\ u9292 \\ u9292W \\ u00e4 \\ u0192rld! " ,
" Hell \\ u0429o \\ u9292 \\ u9292W \\ u00e4rld! " ,
" Hello World!123456 " ,
" Hello W \\ u0081 \\ u011f \\ u0082! " , /* Latin 1 run*/
2002-07-29 21:04:18 +00:00
2002-03-25 22:41:39 +00:00
" abc \\ u0301 \\ u0302 " , /* uses SQn for u301 u302*/
" abc \\ u4411d " , /* uses SQU*/
" abc \\ u4411 \\ u4412d " , /* uses SCU*/
" abc \\ u0401 \\ u0402 \\ u047f \\ u00a5 \\ u0405 " , /* uses SQn for ua5*/
" \\ u9191 \\ u9191 \\ u3041 \\ u9191 \\ u3041 \\ u3041 \\ u3000 " , /* SJIS like data*/
" \\ u9292 \\ u2222 " ,
" \\ u9191 \\ u9191 \\ u3041 \\ u9191 \\ u3041 \\ u3041 \\ u3000 " ,
" \\ u9999 \\ u3051 \\ u300c \\ u9999 \\ u9999 \\ u3060 \\ u9999 \\ u3065 \\ u3065 \\ u3065 \\ u300c " ,
" \\ u3000 \\ u266a \\ u30ea \\ u30f3 \\ u30b4 \\ u53ef \\ u611b \\ u3044 \\ u3084 \\ u53ef \\ u611b \\ u3044 \\ u3084 \\ u30ea \\ u30f3 \\ u30b4 \\ u3002 " ,
2002-07-29 21:04:18 +00:00
2002-03-25 22:41:39 +00:00
" " , /* empty input*/
" \\ u0000 " , /* smallest BMP character*/
" \\ uFFFF " , /* largest BMP character*/
2002-07-29 21:04:18 +00:00
2002-03-25 22:41:39 +00:00
/* regression tests*/
" \\ u6441 \\ ub413 \\ ua733 \\ uf8fe \\ ueedb \\ u587f \\ u195f \\ u4899 \\ uf23d \\ u49fd \\ u0aac \\ u5792 \\ ufc22 \\ ufc3c \\ ufc46 \\ u00aa " ,
" \\ u00df \\ u01df \\ uf000 \\ udbff \\ udfff \\ u000d \n \\ u0041 \\ u00df \\ u0401 \\ u015f \\ u00df \\ u01df \\ uf000 \\ udbff \\ udfff " ,
" \\ u30f9 \\ u8321 \\ u05e5 \\ u181c \\ ud72b \\ u2019 \\ u99c9 \\ u2f2f \\ uc10c \\ u82e1 \\ u2c4d \\ u1ebc \\ u6013 \\ u66dc \\ ubbde \\ u94a5 \\ u4726 \\ u74af \\ u3083 \\ u55b9 \\ u000c " ,
" \\ u0041 \\ u00df \\ u0401 \\ u015f " ,
" \\ u9066 \\ u2123abc " ,
" \\ ud266 \\ u43d7 \\ u \\ ue386 \\ uc9c0 \\ u4a6b \\ u9222 \\ u901f \\ u7410 \\ ua63f \\ u539b \\ u9596 \\ u482e \\ u9d47 \\ ucfe4 \\ u7b71 \\ uc280 \\ uf26a \\ u982f \\ u862a \\ u4edd \\ uf513 \\ ufda6 \\ u869d \\ u2ee0 \\ ua216 \\ u3ff6 \\ u3c70 \\ u89c0 \\ u9576 \\ ud5ec \\ ubfda \\ u6cca \\ u5bb3 \\ ubcea \\ u554c \\ u914e \\ ufa4a \\ uede3 \\ u2990 \\ ud2f5 \\ u2729 \\ u5141 \\ u0f26 \\ uccd8 \\ u5413 \\ ud196 \\ ubbe2 \\ u51b9 \\ u9b48 \\ u0dc8 \\ u2195 \\ u21a2 \\ u21e9 \\ u00e4 \\ u9d92 \\ u0bc0 \\ u06c5 " ,
" \\ uf95b \\ u2458 \\ u2468 \\ u0e20 \\ uf51b \\ ue36e \\ ubfc1 \\ u0080 \\ u02dd \\ uf1b5 \\ u0cf3 \\ u6059 \\ u7489 " ,
} ;
int i = 0 ;
for ( ; i < sizeof ( fTestCases ) / sizeof ( * fTestCases ) ; i + + ) {
const char * cSrc = fTestCases [ i ] ;
UErrorCode status = U_ZERO_ERROR ;
int32_t cSrcLen , srcLen ;
UChar * src ;
2002-03-26 04:19:30 +00:00
/* UConverter* cnv = ucnv_open("SCSU",&status); */
2004-12-08 23:02:08 +00:00
cSrcLen = srcLen = ( int32_t ) uprv_strlen ( fTestCases [ i ] ) ;
2002-07-29 21:04:18 +00:00
src = ( UChar * ) malloc ( ( sizeof ( UChar ) * srcLen ) + sizeof ( UChar ) ) ;
2002-03-25 22:41:39 +00:00
srcLen = unescape ( src , srcLen , cSrc , cSrcLen , & status ) ;
log_verbose ( " Testing roundtrip for src: %s at index :%d \n " , cSrc , i ) ;
TestConv ( src , srcLen , " SCSU " , " Coverage " , NULL , 0 ) ;
2002-07-29 21:04:18 +00:00
free ( src ) ;
2002-03-25 22:41:39 +00:00
}
2001-07-14 02:29:21 +00:00
TestConv ( allFeaturesUTF16 , ( sizeof ( allFeaturesUTF16 ) / 2 ) , " SCSU " , " all features " , ( char * ) allFeaturesSCSU , sizeof ( allFeaturesSCSU ) ) ;
2001-03-05 18:39:03 +00:00
TestConv ( allFeaturesUTF16 , ( sizeof ( allFeaturesUTF16 ) / 2 ) , " SCSU " , " all features " , ( char * ) allFeaturesSCSU , sizeof ( allFeaturesSCSU ) ) ;
TestConv ( japaneseUTF16 , ( sizeof ( japaneseUTF16 ) / 2 ) , " SCSU " , " japaneese " , ( char * ) japaneseSCSU , sizeof ( japaneseSCSU ) ) ;
TestConv ( japaneseUTF16 , ( sizeof ( japaneseUTF16 ) / 2 ) , " SCSU,locale=ja " , " japaneese " , ( char * ) japaneseSCSU , sizeof ( japaneseSCSU ) ) ;
TestConv ( germanUTF16 , ( sizeof ( germanUTF16 ) / 2 ) , " SCSU " , " german " , ( char * ) germanSCSU , sizeof ( germanSCSU ) ) ;
TestConv ( russianUTF16 , ( sizeof ( russianUTF16 ) / 2 ) , " SCSU " , " russian " , ( char * ) russianSCSU , sizeof ( russianSCSU ) ) ;
2001-07-14 02:29:21 +00:00
TestConv ( monkeyIn , ( sizeof ( monkeyIn ) / 2 ) , " SCSU " , " monkey " , NULL , 0 ) ;
2002-10-29 01:16:22 +00:00
}
2006-07-28 22:58:29 +00:00
# if !UCONFIG_NO_LEGACY_CONVERSION
2002-10-29 01:16:22 +00:00
static void TestJitterbug2346 ( ) {
char source [ ] = { 0x1b , 0x24 , 0x42 , 0x3d , 0x45 , 0x1b , 0x28 , 0x4a , 0x0d , 0x0a ,
0x1b , 0x24 , 0x42 , 0x3d , 0x45 , 0x1b , 0x28 , 0x4a , 0x0d , 0x0a } ;
uint16_t expected [ ] = { 0x91CD , 0x000D , 0x000A , 0x91CD , 0x000D , 0x000A } ;
UChar uTarget [ 500 ] = { ' \0 ' } ;
UChar * utarget = uTarget ;
UChar * utargetLimit = uTarget + sizeof ( uTarget ) / 2 ;
char cTarget [ 500 ] = { ' \0 ' } ;
char * ctarget = cTarget ;
char * ctargetLimit = cTarget + sizeof ( cTarget ) ;
const char * csource = source ;
UChar * temp = expected ;
UErrorCode err = U_ZERO_ERROR ;
UConverter * conv = ucnv_open ( " ISO_2022_JP " , & err ) ;
if ( U_FAILURE ( err ) ) {
log_data_err ( " Unable to open a iso-2022 converter: %s \n " , u_errorName ( err ) ) ;
return ;
}
ucnv_toUnicode ( conv , & utarget , utargetLimit , & csource , csource + sizeof ( source ) , NULL , TRUE , & err ) ;
if ( U_FAILURE ( err ) ) {
log_err ( " ISO_2022_JP to Unicode conversion failed: %s \n " , u_errorName ( err ) ) ;
return ;
}
utargetLimit = utarget ;
utarget = uTarget ;
while ( utarget < utargetLimit ) {
if ( * temp ! = * utarget ) {
log_err ( " Expected : \\ u%04X \t Got: \\ u%04X \n " , * utarget , ( int ) * temp ) ;
}
utarget + + ;
temp + + ;
}
ucnv_fromUnicode ( conv , & ctarget , ctargetLimit , ( const UChar * * ) & utarget , utargetLimit , NULL , TRUE , & err ) ;
if ( U_FAILURE ( err ) ) {
log_err ( " ISO_2022_JP from Unicode conversion failed: %s \n " , u_errorName ( err ) ) ;
return ;
}
ctargetLimit = ctarget ;
ctarget = cTarget ;
ucnv_close ( conv ) ;
2001-03-02 23:55:49 +00:00
}
2006-07-28 22:58:29 +00:00
2000-11-21 04:05:39 +00:00
static void
2000-10-28 01:08:25 +00:00
TestISO_2022_JP_1 ( ) {
/* test input */
static const uint16_t in [ ] = {
0x3000 , 0x3001 , 0x3002 , 0x0020 , 0xFF0E , 0x30FB , 0xFF1A , 0xFF1B , 0x000D , 0x000A ,
2001-04-18 19:31:05 +00:00
0x0043 , 0x0044 , 0x0045 , 0x0046 , 0x0047 , 0x0048 , 0x0049 , 0x004A , 0x000D , 0x000A ,
2000-10-28 01:08:25 +00:00
0x52C8 , 0x52CC , 0x52CF , 0x52D1 , 0x52D4 , 0x52D6 , 0x52DB , 0x52DC , 0x000D , 0x000A ,
0x004B , 0x004C , 0x004D , 0x004E , 0x004F , 0x0050 , 0x0051 , 0x0052 , 0x000D , 0x000A ,
2001-04-18 19:31:05 +00:00
0x3005 , 0x3006 , 0x3007 , 0x30FC , 0x2015 , 0x2010 , 0xFF0F , 0x005C , 0x000D , 0x000A ,
2007-10-11 21:52:29 +00:00
0x3013 , 0x2018 , 0x2026 , 0x2025 , 0x2018 , 0x2019 , 0x201C , 0x000D , 0x000A ,
2001-02-23 04:40:39 +00:00
0x201D , 0x000D , 0x000A ,
2000-10-28 01:08:25 +00:00
0x0053 , 0x0054 , 0x0055 , 0x0056 , 0x0057 , 0x0058 , 0x0059 , 0x005A , 0x000D , 0x000A ,
2001-04-18 19:31:05 +00:00
0x4F94 , 0x4F97 , 0x52BA , 0x52BB , 0x52BD , 0x52C0 , 0x52C4 , 0x52C6 , 0x000D , 0x000A ,
0x0053 , 0x0054 , 0x0055 , 0x0056 , 0x0057 , 0x0058 , 0x0059 , 0x005A , 0x000D , 0x000A ,
2000-10-28 01:08:25 +00:00
0x4F78 , 0x4F79 , 0x4F7A , 0x4F7D , 0x4F7E , 0x4F81 , 0x4F82 , 0x4F84 , 0x000D , 0x000A ,
0x4F85 , 0x4F89 , 0x4F8A , 0x4F8C , 0x4F8E , 0x4F90 , 0x4F92 , 0x4F93 , 0x000D , 0x000A ,
0x52E1 , 0x52E5 , 0x52E8 , 0x52E9 , 0x000D , 0x000A
} ;
2000-11-17 03:03:14 +00:00
const UChar * uSource ;
const UChar * uSourceLimit ;
const char * cSource ;
const char * cSourceLimit ;
UChar * uTargetLimit = NULL ;
UChar * uTarget ;
char * cTarget ;
const char * cTargetLimit ;
2001-04-18 19:31:05 +00:00
char * cBuf ;
2000-11-17 03:03:14 +00:00
UChar * uBuf , * test ;
2000-10-28 01:08:25 +00:00
int32_t uBufSize = 120 ;
UErrorCode errorCode = U_ZERO_ERROR ;
UConverter * cnv ;
2001-03-13 00:07:23 +00:00
cnv = ucnv_open ( " ISO_2022_JP_1 " , & errorCode ) ;
2000-10-28 01:08:25 +00:00
if ( U_FAILURE ( errorCode ) ) {
2002-09-20 19:07:19 +00:00
log_data_err ( " Unable to open a iso-2022 converter: %s \n " , u_errorName ( errorCode ) ) ;
2000-10-28 01:08:25 +00:00
return ;
}
2000-11-17 03:03:14 +00:00
uBuf = ( UChar * ) malloc ( uBufSize * sizeof ( UChar ) * 5 ) ;
cBuf = ( char * ) malloc ( uBufSize * sizeof ( char ) * 5 ) ;
2005-11-08 22:44:37 +00:00
uSource = ( const UChar * ) in ;
uSourceLimit = ( const UChar * ) in + ( sizeof ( in ) / sizeof ( in [ 0 ] ) ) ;
2000-11-17 03:03:14 +00:00
cTarget = cBuf ;
cTargetLimit = cBuf + uBufSize * 5 ;
uTarget = uBuf ;
uTargetLimit = uBuf + uBufSize * 5 ;
ucnv_fromUnicode ( cnv , & cTarget , cTargetLimit , & uSource , uSourceLimit , NULL , TRUE , & errorCode ) ;
2000-10-28 01:08:25 +00:00
if ( U_FAILURE ( errorCode ) ) {
log_err ( " ucnv_fromUnicode conversion failed reason %s \n " , u_errorName ( errorCode ) ) ;
return ;
}
cSource = cBuf ;
2000-11-17 03:03:14 +00:00
cSourceLimit = cTarget ;
test = uBuf ;
ucnv_toUnicode ( cnv , & uTarget , uTargetLimit , & cSource , cSourceLimit , NULL , TRUE , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
2000-10-28 01:08:25 +00:00
log_err ( " ucnv_toUnicode conversion failed reason %s \n " , u_errorName ( errorCode ) ) ;
return ;
}
2005-11-08 22:44:37 +00:00
uSource = ( const UChar * ) in ;
2000-11-17 03:03:14 +00:00
while ( uSource < uSourceLimit ) {
if ( * test ! = * uSource ) {
2001-04-18 19:31:05 +00:00
2000-11-17 03:03:14 +00:00
log_err ( " Expected : \\ u%04X \t Got: \\ u%04X \n " , * uSource , ( int ) * test ) ;
}
2000-12-08 01:13:38 +00:00
uSource + + ;
test + + ;
2000-11-17 03:03:14 +00:00
}
2000-10-28 01:08:25 +00:00
/*ucnv_close(cnv);
cnv = ucnv_open ( " ISO_2022,locale=jp,version=1 " , & errorCode ) ; */
2002-07-31 20:58:58 +00:00
/*Test for the condition where there is an invalid character*/
ucnv_reset ( cnv ) ;
{
static const uint8_t source2 [ ] = { 0x0e , 0x24 , 0x053 } ;
TestNextUCharError ( cnv , ( const char * ) source2 , ( const char * ) source2 + sizeof ( source2 ) , U_ZERO_ERROR , " an invalid character [ISO-2022-JP-1] " ) ;
}
2005-11-08 22:44:37 +00:00
TestSmallTargetBuffer ( in , ( const UChar * ) in + ( sizeof ( in ) / sizeof ( in [ 0 ] ) ) , cnv ) ;
TestSmallSourceBuffer ( in , ( const UChar * ) in + ( sizeof ( in ) / sizeof ( in [ 0 ] ) ) , cnv ) ;
2000-11-17 03:03:14 +00:00
ucnv_close ( cnv ) ;
free ( uBuf ) ;
free ( cBuf ) ;
2000-10-28 01:08:25 +00:00
}
2000-11-21 04:05:39 +00:00
static void
2000-10-28 01:08:25 +00:00
TestISO_2022_JP_2 ( ) {
2000-08-15 00:07:33 +00:00
/* test input */
static const uint16_t in [ ] = {
2001-04-18 19:31:05 +00:00
0x00A8 , 0x3003 , 0x3005 , 0x2015 , 0xFF5E , 0x2016 , 0x2026 , 0x2018 , 0x000D , 0x000A ,
0x2019 , 0x201C , 0x201D , 0x3014 , 0x3015 , 0x3008 , 0x3009 , 0x300A , 0x000D , 0x000A ,
0x300B , 0x300C , 0x300D , 0x300E , 0x300F , 0x3016 , 0x3017 , 0x3010 , 0x000D , 0x000A ,
0x3011 , 0x00B1 , 0x00D7 , 0x00F7 , 0x2236 , 0x2227 , 0x7FC1 , 0x8956 , 0x000D , 0x000A ,
0x9D2C , 0x9D0E , 0x9EC4 , 0x5CA1 , 0x6C96 , 0x837B , 0x5104 , 0x5C4B , 0x000D , 0x000A ,
0x61B6 , 0x81C6 , 0x6876 , 0x7261 , 0x4E59 , 0x4FFA , 0x5378 , 0x57F7 , 0x000D , 0x000A ,
0x57F4 , 0x57F9 , 0x57FA , 0x57FC , 0x5800 , 0x5802 , 0x5805 , 0x5806 , 0x000D , 0x000A ,
0x580A , 0x581E , 0x6BB5 , 0x6BB7 , 0x6BBA , 0x6BBC , 0x9CE2 , 0x977C , 0x000D , 0x000A ,
0x6BBF , 0x6BC1 , 0x6BC5 , 0x6BC6 , 0x6BCB , 0x6BCD , 0x6BCF , 0x6BD2 , 0x000D , 0x000A ,
0x6BD3 , 0x6BD4 , 0x6BD6 , 0x6BD7 , 0x6BD8 , 0x6BDB , 0x6BEB , 0x6BEC , 0x000D , 0x000A ,
0x6C05 , 0x6C08 , 0x6C0F , 0x6C11 , 0x6C13 , 0x6C23 , 0x6C34 , 0x0041 , 0x000D , 0x000A ,
0x0043 , 0x0044 , 0x0045 , 0x0046 , 0x0047 , 0x0048 , 0x0049 , 0x004A , 0x000D , 0x000A ,
0x004B , 0x004C , 0x004D , 0x004E , 0x004F , 0x0050 , 0x0051 , 0x0052 , 0x000D , 0x000A ,
0x0053 , 0x0054 , 0x0055 , 0x0056 , 0x0057 , 0x0058 , 0x0059 , 0x005A , 0x000D , 0x000A ,
0x005B , 0x9792 , 0x9CCC , 0x9CCD , 0x9CCE , 0x9CCF , 0x9CD0 , 0x9CD3 , 0x000D , 0x000A ,
0x9CD4 , 0x9CD5 , 0x9CD7 , 0x9CD8 , 0x9CD9 , 0x9CDC , 0x9CDD , 0x9CDF , 0x000D , 0x000A ,
0x9785 , 0x9791 , 0x00BD , 0x0390 , 0x0385 , 0x0386 , 0x0388 , 0x0389 , 0x000D , 0x000A ,
0x038E , 0x038F , 0x0390 , 0x0391 , 0x0392 , 0x0393 , 0x0394 , 0x0395 , 0x000D , 0x000A ,
0x0396 , 0x0397 , 0x0398 , 0x0399 , 0x039A , 0x038A , 0x038C , 0x039C , 0x000D , 0x000A
2000-08-15 00:07:33 +00:00
} ;
2000-11-17 03:03:14 +00:00
const UChar * uSource ;
const UChar * uSourceLimit ;
const char * cSource ;
const char * cSourceLimit ;
UChar * uTargetLimit = NULL ;
UChar * uTarget ;
char * cTarget ;
const char * cTargetLimit ;
2001-04-18 19:31:05 +00:00
char * cBuf ;
2000-11-17 03:03:14 +00:00
UChar * uBuf , * test ;
2000-08-15 00:07:33 +00:00
int32_t uBufSize = 120 ;
UErrorCode errorCode = U_ZERO_ERROR ;
UConverter * cnv ;
2000-11-17 03:03:14 +00:00
int32_t * offsets = ( int32_t * ) malloc ( uBufSize * sizeof ( int32_t ) * 5 ) ;
int32_t * myOff = offsets ;
2001-03-13 00:07:23 +00:00
cnv = ucnv_open ( " ISO_2022_JP_2 " , & errorCode ) ;
2000-08-15 00:07:33 +00:00
if ( U_FAILURE ( errorCode ) ) {
2002-09-20 19:07:19 +00:00
log_data_err ( " Unable to open a iso-2022 converter: %s \n " , u_errorName ( errorCode ) ) ;
2000-08-15 00:07:33 +00:00
return ;
}
2000-11-17 03:03:14 +00:00
uBuf = ( UChar * ) malloc ( uBufSize * sizeof ( UChar ) * 5 ) ;
cBuf = ( char * ) malloc ( uBufSize * sizeof ( char ) * 5 ) ;
2005-11-08 22:44:37 +00:00
uSource = ( const UChar * ) in ;
uSourceLimit = ( const UChar * ) in + ( sizeof ( in ) / sizeof ( in [ 0 ] ) ) ;
2000-11-17 03:03:14 +00:00
cTarget = cBuf ;
cTargetLimit = cBuf + uBufSize * 5 ;
uTarget = uBuf ;
uTargetLimit = uBuf + uBufSize * 5 ;
ucnv_fromUnicode ( cnv , & cTarget , cTargetLimit , & uSource , uSourceLimit , myOff , TRUE , & errorCode ) ;
2000-08-15 21:13:20 +00:00
if ( U_FAILURE ( errorCode ) ) {
log_err ( " ucnv_fromUnicode conversion failed reason %s \n " , u_errorName ( errorCode ) ) ;
return ;
}
cSource = cBuf ;
2000-11-17 03:03:14 +00:00
cSourceLimit = cTarget ;
2001-04-18 19:31:05 +00:00
test = uBuf ;
2000-11-17 03:03:14 +00:00
myOff = offsets ;
ucnv_toUnicode ( cnv , & uTarget , uTargetLimit , & cSource , cSourceLimit , myOff , TRUE , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
2000-08-22 00:04:27 +00:00
log_err ( " ucnv_toUnicode conversion failed reason %s \n " , u_errorName ( errorCode ) ) ;
2000-08-15 21:13:20 +00:00
return ;
}
2005-11-08 22:44:37 +00:00
uSource = ( const UChar * ) in ;
2000-11-17 03:03:14 +00:00
while ( uSource < uSourceLimit ) {
if ( * test ! = * uSource ) {
2001-04-18 19:31:05 +00:00
2000-11-17 03:03:14 +00:00
log_err ( " Expected : \\ u%04X \t Got: \\ u%04X \n " , * uSource , ( int ) * test ) ;
}
2000-12-08 01:13:38 +00:00
uSource + + ;
test + + ;
2000-11-17 03:03:14 +00:00
}
2005-11-08 22:44:37 +00:00
TestSmallTargetBuffer ( in , ( const UChar * ) in + ( sizeof ( in ) / sizeof ( in [ 0 ] ) ) , cnv ) ;
TestSmallSourceBuffer ( in , ( const UChar * ) in + ( sizeof ( in ) / sizeof ( in [ 0 ] ) ) , cnv ) ;
TestToAndFromUChars ( in , ( const UChar * ) in + ( sizeof ( in ) / sizeof ( in [ 0 ] ) ) , cnv ) ;
2002-07-31 20:58:58 +00:00
/*Test for the condition where there is an invalid character*/
ucnv_reset ( cnv ) ;
{
static const uint8_t source2 [ ] = { 0x0e , 0x24 , 0x053 } ;
TestNextUCharError ( cnv , ( const char * ) source2 , ( const char * ) source2 + sizeof ( source2 ) , U_ZERO_ERROR , " an invalid character [ISO-2022-JP-2] " ) ;
}
2000-11-17 03:03:14 +00:00
ucnv_close ( cnv ) ;
free ( uBuf ) ;
free ( cBuf ) ;
free ( offsets ) ;
2000-08-22 00:04:27 +00:00
}
2000-11-21 04:05:39 +00:00
static void
2000-08-22 00:04:27 +00:00
TestISO_2022_KR ( ) {
/* test input */
static const uint16_t in [ ] = {
2008-10-07 04:33:51 +00:00
0x9F4B , 0x9F4E , 0x9F52 , 0x9F5F , 0x9F61 , 0x9F67 , 0x9F6A , 0x000A , 0x000D
, 0x9F6C , 0x9F77 , 0x9F8D , 0x9F90 , 0x9F95 , 0x9F9C , 0xAC00 , 0xAC01 , 0xAC04
2000-08-22 00:04:27 +00:00
, 0xAC07 , 0xAC08 , 0xAC09 , 0x0025 , 0x0026 , 0x0027 , 0x000A , 0x000D , 0x0028 , 0x0029
, 0x002A , 0x002B , 0x002C , 0x002D , 0x002E , 0x53C3 , 0x53C8 , 0x53C9 , 0x53CA , 0x53CB
2008-10-07 04:33:51 +00:00
, 0x53CD , 0x53D4 , 0x53D6 , 0x53D7 , 0x53DB , 0x000A , 0x000D , 0x53E1 , 0x53E2
2000-08-22 00:04:27 +00:00
, 0x53E3 , 0x53E4 , 0x000A , 0x000D } ;
2000-11-17 03:03:14 +00:00
const UChar * uSource ;
const UChar * uSourceLimit ;
const char * cSource ;
const char * cSourceLimit ;
UChar * uTargetLimit = NULL ;
UChar * uTarget ;
char * cTarget ;
const char * cTargetLimit ;
2001-04-18 19:31:05 +00:00
char * cBuf ;
2000-11-17 03:03:14 +00:00
UChar * uBuf , * test ;
2000-08-22 00:04:27 +00:00
int32_t uBufSize = 120 ;
UErrorCode errorCode = U_ZERO_ERROR ;
UConverter * cnv ;
2000-11-17 03:03:14 +00:00
int32_t * offsets = ( int32_t * ) malloc ( uBufSize * sizeof ( int32_t ) * 5 ) ;
int32_t * myOff = offsets ;
2000-08-22 00:04:27 +00:00
cnv = ucnv_open ( " ISO_2022,locale=kr " , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
2002-09-20 19:07:19 +00:00
log_data_err ( " Unable to open a iso-2022 converter: %s \n " , u_errorName ( errorCode ) ) ;
2000-08-22 00:04:27 +00:00
return ;
}
2000-11-17 03:03:14 +00:00
uBuf = ( UChar * ) malloc ( uBufSize * sizeof ( UChar ) * 5 ) ;
cBuf = ( char * ) malloc ( uBufSize * sizeof ( char ) * 5 ) ;
2005-11-08 22:44:37 +00:00
uSource = ( const UChar * ) in ;
uSourceLimit = ( const UChar * ) in + ( sizeof ( in ) / sizeof ( in [ 0 ] ) ) ;
2000-11-17 03:03:14 +00:00
cTarget = cBuf ;
cTargetLimit = cBuf + uBufSize * 5 ;
uTarget = uBuf ;
uTargetLimit = uBuf + uBufSize * 5 ;
ucnv_fromUnicode ( cnv , & cTarget , cTargetLimit , & uSource , uSourceLimit , myOff , TRUE , & errorCode ) ;
2000-08-22 00:04:27 +00:00
if ( U_FAILURE ( errorCode ) ) {
log_err ( " ucnv_fromUnicode conversion failed reason %s \n " , u_errorName ( errorCode ) ) ;
return ;
}
cSource = cBuf ;
2000-11-17 03:03:14 +00:00
cSourceLimit = cTarget ;
test = uBuf ;
myOff = offsets ;
ucnv_toUnicode ( cnv , & uTarget , uTargetLimit , & cSource , cSourceLimit , myOff , TRUE , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
2000-08-22 00:04:27 +00:00
log_err ( " ucnv_toUnicode conversion failed reason %s \n " , u_errorName ( errorCode ) ) ;
return ;
}
2005-11-08 22:44:37 +00:00
uSource = ( const UChar * ) in ;
2000-11-17 03:03:14 +00:00
while ( uSource < uSourceLimit ) {
if ( * test ! = * uSource ) {
log_err ( " Expected : \\ u%04X \t Got: \\ u%04X \n " , * uSource , * test ) ;
}
2000-12-08 01:13:38 +00:00
uSource + + ;
test + + ;
2000-11-17 03:03:14 +00:00
}
TestGetNextUChar2022 ( cnv , cBuf , cTarget , in , " ISO-2022-KR encoding " ) ;
2005-11-08 22:44:37 +00:00
TestSmallTargetBuffer ( in , ( const UChar * ) in + ( sizeof ( in ) / sizeof ( in [ 0 ] ) ) , cnv ) ;
TestSmallSourceBuffer ( in , ( const UChar * ) in + ( sizeof ( in ) / sizeof ( in [ 0 ] ) ) , cnv ) ;
TestToAndFromUChars ( in , ( const UChar * ) in + ( sizeof ( in ) / sizeof ( in [ 0 ] ) ) , cnv ) ;
2002-07-31 20:58:58 +00:00
TestJitterbug930 ( " csISO2022KR " ) ;
/*Test for the condition where there is an invalid character*/
ucnv_reset ( cnv ) ;
{
static const uint8_t source2 [ ] = { 0x1b , 0x24 , 0x053 } ;
2003-12-03 17:54:17 +00:00
ucnv_setToUCallBack ( cnv , UCNV_TO_U_CALLBACK_STOP , NULL , NULL , NULL , & errorCode ) ;
2002-07-31 20:58:58 +00:00
TestNextUCharError ( cnv , ( const char * ) source2 , ( const char * ) source2 + sizeof ( source2 ) , U_ILLEGAL_ESCAPE_SEQUENCE , " an invalid character [ISO-2022-KR] " ) ;
}
2000-11-17 03:03:14 +00:00
ucnv_close ( cnv ) ;
free ( uBuf ) ;
free ( cBuf ) ;
free ( offsets ) ;
2000-09-19 02:38:40 +00:00
}
2001-04-07 01:25:14 +00:00
2001-02-24 02:52:14 +00:00
static void
TestISO_2022_KR_1 ( ) {
/* test input */
static const uint16_t in [ ] = {
0x9F4B , 0x9F4E , 0x9F52 , 0x9F5F , 0x9F61 , 0x9F67 , 0x9F6A , 0x000A , 0x000D
, 0x9F6C , 0x9F77 , 0x9F8D , 0x9F90 , 0x9F95 , 0x9F9C , 0xAC00 , 0xAC01 , 0xAC04
, 0xAC07 , 0xAC08 , 0xAC09 , 0x0025 , 0x0026 , 0x0027 , 0x000A , 0x000D , 0x0028 , 0x0029
, 0x002A , 0x002B , 0x002C , 0x002D , 0x002E , 0x53C3 , 0x53C8 , 0x53C9 , 0x53CA , 0x53CB
, 0x53CD , 0x53D4 , 0x53D6 , 0x53D7 , 0x53DB , 0x000A , 0x000D , 0x53E1 , 0x53E2
, 0x53E3 , 0x53E4 , 0x000A , 0x000D } ;
const UChar * uSource ;
const UChar * uSourceLimit ;
const char * cSource ;
const char * cSourceLimit ;
UChar * uTargetLimit = NULL ;
UChar * uTarget ;
char * cTarget ;
const char * cTargetLimit ;
2001-04-18 19:31:05 +00:00
char * cBuf ;
2001-02-24 02:52:14 +00:00
UChar * uBuf , * test ;
int32_t uBufSize = 120 ;
UErrorCode errorCode = U_ZERO_ERROR ;
UConverter * cnv ;
int32_t * offsets = ( int32_t * ) malloc ( uBufSize * sizeof ( int32_t ) * 5 ) ;
int32_t * myOff = offsets ;
cnv = ucnv_open ( " ibm-25546 " , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
2002-09-20 19:07:19 +00:00
log_data_err ( " Unable to open a iso-2022 converter: %s \n " , u_errorName ( errorCode ) ) ;
2001-02-24 02:52:14 +00:00
return ;
}
uBuf = ( UChar * ) malloc ( uBufSize * sizeof ( UChar ) * 5 ) ;
cBuf = ( char * ) malloc ( uBufSize * sizeof ( char ) * 5 ) ;
2005-11-08 22:44:37 +00:00
uSource = ( const UChar * ) in ;
uSourceLimit = ( const UChar * ) in + ( sizeof ( in ) / sizeof ( in [ 0 ] ) ) ;
2001-02-24 02:52:14 +00:00
cTarget = cBuf ;
cTargetLimit = cBuf + uBufSize * 5 ;
uTarget = uBuf ;
uTargetLimit = uBuf + uBufSize * 5 ;
ucnv_fromUnicode ( cnv , & cTarget , cTargetLimit , & uSource , uSourceLimit , myOff , TRUE , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
log_err ( " ucnv_fromUnicode conversion failed reason %s \n " , u_errorName ( errorCode ) ) ;
return ;
}
cSource = cBuf ;
cSourceLimit = cTarget ;
test = uBuf ;
myOff = offsets ;
ucnv_toUnicode ( cnv , & uTarget , uTargetLimit , & cSource , cSourceLimit , myOff , TRUE , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
log_err ( " ucnv_toUnicode conversion failed reason %s \n " , u_errorName ( errorCode ) ) ;
return ;
}
2005-11-08 22:44:37 +00:00
uSource = ( const UChar * ) in ;
2001-02-24 02:52:14 +00:00
while ( uSource < uSourceLimit ) {
if ( * test ! = * uSource ) {
log_err ( " Expected : \\ u%04X \t Got: \\ u%04X \n " , * uSource , * test ) ;
}
uSource + + ;
test + + ;
}
ucnv_reset ( cnv ) ;
TestGetNextUChar2022 ( cnv , cBuf , cTarget , in , " ISO-2022-KR encoding " ) ;
2005-11-08 22:44:37 +00:00
TestSmallTargetBuffer ( in , ( const UChar * ) in + ( sizeof ( in ) / sizeof ( in [ 0 ] ) ) , cnv ) ;
TestSmallSourceBuffer ( in , ( const UChar * ) in + ( sizeof ( in ) / sizeof ( in [ 0 ] ) ) , cnv ) ;
2001-02-24 02:52:14 +00:00
ucnv_reset ( cnv ) ;
2005-11-08 22:44:37 +00:00
TestToAndFromUChars ( in , ( const UChar * ) in + ( sizeof ( in ) / sizeof ( in [ 0 ] ) ) , cnv ) ;
2002-07-31 20:58:58 +00:00
/*Test for the condition where there is an invalid character*/
ucnv_reset ( cnv ) ;
{
static const uint8_t source2 [ ] = { 0x1b , 0x24 , 0x053 } ;
2003-12-03 17:54:17 +00:00
ucnv_setToUCallBack ( cnv , UCNV_TO_U_CALLBACK_STOP , NULL , NULL , NULL , & errorCode ) ;
2002-07-31 20:58:58 +00:00
TestNextUCharError ( cnv , ( const char * ) source2 , ( const char * ) source2 + sizeof ( source2 ) , U_ILLEGAL_ESCAPE_SEQUENCE , " an invalid character [ISO-2022-KR] " ) ;
}
2001-02-24 02:52:14 +00:00
ucnv_close ( cnv ) ;
free ( uBuf ) ;
free ( cBuf ) ;
free ( offsets ) ;
}
2000-11-21 04:05:39 +00:00
2002-10-29 01:54:11 +00:00
static void TestJitterbug2411 ( ) {
2004-12-23 21:03:30 +00:00
static const char * source = " \x1b \x24 \x29 \x43 \x6b \x6b \x6e \x6e \x6a \x68 \x70 \x6f \x69 \x75 \x79 \x71 \x77 \x65 \x68 \x67 \x0A "
2002-11-22 23:44:46 +00:00
" \x1b \x24 \x29 \x43 \x6a \x61 \x73 \x64 \x66 \x6a \x61 \x73 \x64 \x66 \x68 \x6f \x69 \x75 \x79 \x1b \x24 \x29 \x43 " ;
2002-10-29 01:54:11 +00:00
UConverter * kr = NULL , * kr1 = NULL ;
UErrorCode errorCode = U_ZERO_ERROR ;
UChar tgt [ 100 ] = { ' \0 ' } ;
UChar * target = tgt ;
UChar * targetLimit = target + 100 ;
kr = ucnv_open ( " iso-2022-kr " , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
log_data_err ( " Unable to open a iso-2022-kr converter: %s \n " , u_errorName ( errorCode ) ) ;
return ;
}
ucnv_toUnicode ( kr , & target , targetLimit , & source , source + uprv_strlen ( source ) , NULL , TRUE , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
log_err ( " iso-2022-kr cannot handle multiple escape sequences : %s \n " , u_errorName ( errorCode ) ) ;
return ;
}
kr1 = ucnv_open ( " ibm-25546 " , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
log_data_err ( " Unable to open a iso-2022-kr_1 converter: %s \n " , u_errorName ( errorCode ) ) ;
return ;
}
target = tgt ;
targetLimit = target + 100 ;
ucnv_toUnicode ( kr , & target , targetLimit , & source , source + uprv_strlen ( source ) , NULL , TRUE , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
log_err ( " iso-2022-kr_1 cannot handle multiple escape sequences : %s \n " , u_errorName ( errorCode ) ) ;
return ;
}
2002-11-28 08:53:37 +00:00
ucnv_close ( kr ) ;
ucnv_close ( kr1 ) ;
2002-10-29 01:54:11 +00:00
}
2001-02-23 04:40:39 +00:00
static void
TestJIS ( ) {
2003-12-10 02:03:42 +00:00
/* From Unicode moved to testdata/conversion.txt */
2001-02-23 04:40:39 +00:00
/*To Unicode*/
{
2004-12-23 21:03:30 +00:00
static const uint8_t sampleTextJIS [ ] = {
2001-02-23 04:40:39 +00:00
0x1b , 0x28 , 0x48 , 0x41 , 0x42 , /*jis-Roman*/
0x1b , 0x28 , 0x49 , 0x41 , 0x42 , /*Katakana Set*/
0x1b , 0x26 , 0x40 , 0x1b , 0x24 , 0x42 , 0x21 , 0x21 /*recognize and ignore <esc>&@*/
} ;
2004-12-23 21:03:30 +00:00
static const uint16_t expectedISO2022JIS [ ] = {
2001-02-23 04:40:39 +00:00
0x0041 , 0x0042 ,
0xFF81 , 0xFF82 ,
0x3000
} ;
2004-12-23 21:03:30 +00:00
static const int32_t toISO2022JISOffs [ ] = {
2001-02-23 04:40:39 +00:00
3 , 4 ,
8 , 9 ,
16
} ;
2001-04-18 19:31:05 +00:00
2004-12-23 21:03:30 +00:00
static const uint8_t sampleTextJIS7 [ ] = {
2001-02-23 04:40:39 +00:00
0x1b , 0x28 , 0x48 , 0x41 , 0x42 , /*JIS7-Roman*/
0x1b , 0x28 , 0x49 , 0x41 , 0x42 , /*Katakana Set*/
0x1b , 0x24 , 0x42 , 0x21 , 0x21 ,
0x0e , 0x41 , 0x42 , 0x0f , /*Test Katakana set with SI and SO */
0x21 , 0x22 ,
0x1b , 0x26 , 0x40 , 0x1b , 0x24 , 0x42 , 0x21 , 0x21 /*recognize and ignore <esc>&@*/
} ;
2004-12-23 21:03:30 +00:00
static const uint16_t expectedISO2022JIS7 [ ] = {
2001-02-23 04:40:39 +00:00
0x0041 , 0x0042 ,
0xFF81 , 0xFF82 ,
0x3000 ,
0xFF81 , 0xFF82 ,
0x3001 ,
0x3000
} ;
2004-12-23 21:03:30 +00:00
static const int32_t toISO2022JIS7Offs [ ] = {
2001-02-23 04:40:39 +00:00
3 , 4 ,
8 , 9 ,
13 , 16 ,
17 ,
19 , 27
} ;
2004-12-23 21:03:30 +00:00
static const uint8_t sampleTextJIS8 [ ] = {
2001-02-23 04:40:39 +00:00
0x1b , 0x28 , 0x48 , 0x41 , 0x42 , /*JIS8-Roman*/
0xa1 , 0xc8 , 0xd9 , /*Katakana Set*/
0x1b , 0x28 , 0x42 ,
0x41 , 0x42 ,
0xb1 , 0xc3 , /*Katakana Set*/
2001-04-18 19:31:05 +00:00
0x1b , 0x24 , 0x42 , 0x21 , 0x21
2001-02-23 04:40:39 +00:00
} ;
2004-12-23 21:03:30 +00:00
static const uint16_t expectedISO2022JIS8 [ ] = {
2001-02-23 04:40:39 +00:00
0x0041 , 0x0042 ,
2001-04-18 19:31:05 +00:00
0xff61 , 0xff88 , 0xff99 ,
2001-02-23 04:40:39 +00:00
0x0041 , 0x0042 ,
0xff71 , 0xff83 ,
0x3000
} ;
2004-12-23 21:03:30 +00:00
static const int32_t toISO2022JIS8Offs [ ] = {
2001-04-18 19:31:05 +00:00
3 , 4 , 5 , 6 ,
7 , 11 , 12 , 13 ,
2001-02-23 04:40:39 +00:00
14 , 18 ,
2001-04-18 19:31:05 +00:00
} ;
2001-02-23 04:40:39 +00:00
2002-12-13 04:05:50 +00:00
testConvertToU ( sampleTextJIS , sizeof ( sampleTextJIS ) , expectedISO2022JIS ,
sizeof ( expectedISO2022JIS ) / sizeof ( expectedISO2022JIS [ 0 ] ) , " JIS " , toISO2022JISOffs , TRUE ) ;
testConvertToU ( sampleTextJIS7 , sizeof ( sampleTextJIS7 ) , expectedISO2022JIS7 ,
sizeof ( expectedISO2022JIS7 ) / sizeof ( expectedISO2022JIS7 [ 0 ] ) , " JIS7 " , toISO2022JIS7Offs , TRUE ) ;
testConvertToU ( sampleTextJIS8 , sizeof ( sampleTextJIS8 ) , expectedISO2022JIS8 ,
sizeof ( expectedISO2022JIS8 ) / sizeof ( expectedISO2022JIS8 [ 0 ] ) , " JIS8 " , toISO2022JIS8Offs , TRUE ) ;
2001-02-23 04:40:39 +00:00
}
}
2001-05-08 00:01:30 +00:00
2010-05-25 22:17:12 +00:00
#if 0
ICU 4.4 ( ticket # 7314 ) removes mappings for CNS 11643 planes 3. .7
2001-05-11 02:30:47 +00:00
static void TestJitterbug915 ( ) {
2002-07-29 21:04:18 +00:00
/* tests for roundtripping of the below sequence
2001-05-31 23:30:09 +00:00
\ x1b $ ) G \ x0E # ! # " ###$#%#&#'#(#)#*#+ / *plane 1 * /
2001-05-11 02:30:47 +00:00
\ x1b $ * H \ x1bN " ! \x1b N " " \x1b N " # \ x1bN " $ \x1b N " % / * plane 2 * /
\ x1b $ + I \ x1bO " D \x1b O " E \ x1bO " F \x1b O " G \ x1bO " H / *plane 3 * /
\ x1b $ + J \ x1bO ! D \ x1bO ! E \ x1bO " j \x1b O " k \ x1bO " l / *plane 4 * /
\ x1b $ + K \ x1bO ! t \ x1bO " P \x1b O " Q \ x1bO # 7 \ x1bO " \ / *plane 5 * /
\ x1b $ + L \ x1bO ! # \ x1bO " , \x1b O#N \x1b O!n \x1b O#q / *plane 6 * /
\ x1b $ + M \ x1bO " q \x1b O!N \x1b O!j \x1b O#: \x1b O#o / *plane 7 * /
*/
2004-12-23 21:03:30 +00:00
static const char cSource [ ] = {
2001-05-31 23:30:09 +00:00
0x1B , 0x24 , 0x29 , 0x47 , 0x0E , 0x23 , 0x21 , 0x23 , 0x22 , 0x23 ,
0x23 , 0x23 , 0x24 , 0x23 , 0x25 , 0x23 , 0x26 , 0x23 , 0x27 , 0x23 ,
0x28 , 0x23 , 0x29 , 0x23 , 0x2A , 0x23 , 0x2B , 0x0F , 0x2F , 0x2A ,
0x70 , 0x6C , 0x61 , 0x6E , 0x65 , 0x20 , 0x31 , 0x20 , 0x2A , 0x2F ,
0x0D , 0x0A , 0x1B , 0x24 , 0x2A , 0x48 , 0x1B , 0x4E , 0x22 , 0x21 ,
0x1B , 0x4E , 0x22 , 0x22 , 0x1B , 0x4E , 0x22 , 0x23 , 0x1B , 0x4E ,
2003-12-03 17:54:17 +00:00
0x22 , 0x24 , 0x1B , 0x4E , 0x22 , 0x25 , 0x2F , 0x2A , 0x70 ,
2001-05-31 23:30:09 +00:00
0x6C , 0x61 , 0x6E , 0x65 , 0x32 , 0x2A , 0x2F , 0x20 , 0x0D , 0x0A ,
0x1B , 0x24 , 0x2B , 0x49 , 0x1B , 0x4F , 0x22 , 0x44 , 0x1B , 0x4F ,
0x22 , 0x45 , 0x1B , 0x4F , 0x22 , 0x46 , 0x1B , 0x4F , 0x22 , 0x47 ,
2003-12-03 17:54:17 +00:00
0x1B , 0x4F , 0x22 , 0x48 , 0x2F , 0x2A , 0x70 , 0x6C , 0x61 ,
2001-05-31 23:30:09 +00:00
0x6E , 0x65 , 0x20 , 0x33 , 0x20 , 0x2A , 0x2F , 0x0D , 0x0A , 0x1B ,
0x24 , 0x2B , 0x4A , 0x1B , 0x4F , 0x21 , 0x44 , 0x1B , 0x4F , 0x21 ,
0x45 , 0x1B , 0x4F , 0x22 , 0x6A , 0x1B , 0x4F , 0x22 , 0x6B , 0x1B ,
2003-12-03 17:54:17 +00:00
0x4F , 0x22 , 0x6C , 0x2F , 0x2A , 0x70 , 0x6C , 0x61 , 0x6E ,
2001-05-31 23:30:09 +00:00
0x65 , 0x20 , 0x34 , 0x20 , 0x2A , 0x2F , 0x0D , 0x0A , 0x1B , 0x24 ,
0x2B , 0x4B , 0x1B , 0x4F , 0x21 , 0x74 , 0x1B , 0x4F , 0x22 , 0x50 ,
0x1B , 0x4F , 0x22 , 0x51 , 0x1B , 0x4F , 0x23 , 0x37 , 0x1B , 0x4F ,
2003-12-03 17:54:17 +00:00
0x22 , 0x5C , 0x2F , 0x2A , 0x70 , 0x6C , 0x61 , 0x6E ,
2001-05-31 23:30:09 +00:00
0x65 , 0x20 , 0x35 , 0x20 , 0x2A , 0x2F , 0x0D , 0x0A , 0x1B , 0x24 ,
0x2B , 0x4C , 0x1B , 0x4F , 0x21 , 0x23 , 0x1B , 0x4F , 0x22 , 0x2C ,
0x1B , 0x4F , 0x23 , 0x4E , 0x1B , 0x4F , 0x21 , 0x6E , 0x1B , 0x4F ,
2003-12-03 17:54:17 +00:00
0x23 , 0x71 , 0x2F , 0x2A , 0x70 , 0x6C , 0x61 , 0x6E , 0x65 ,
2001-05-31 23:30:09 +00:00
0x20 , 0x36 , 0x20 , 0x2A , 0x2F , 0x0D , 0x0A , 0x1B , 0x24 , 0x2B ,
0x4D , 0x1B , 0x4F , 0x22 , 0x71 , 0x1B , 0x4F , 0x21 , 0x4E , 0x1B ,
0x4F , 0x21 , 0x6A , 0x1B , 0x4F , 0x23 , 0x3A , 0x1B , 0x4F , 0x23 ,
2003-12-03 17:54:17 +00:00
0x6F , 0x2F , 0x2A , 0x70 , 0x6C , 0x61 , 0x6E , 0x65 , 0x20 ,
0x37 , 0x20 , 0x2A , 0x2F
2001-05-31 23:30:09 +00:00
} ;
UChar uTarget [ 500 ] = { ' \0 ' } ;
UChar * utarget = uTarget ;
UChar * utargetLimit = uTarget + sizeof ( uTarget ) / 2 ;
char cTarget [ 500 ] = { ' \0 ' } ;
char * ctarget = cTarget ;
char * ctargetLimit = cTarget + sizeof ( cTarget ) ;
const char * csource = cSource ;
2004-12-23 21:03:30 +00:00
const char * tempSrc = cSource ;
2001-05-31 23:30:09 +00:00
UErrorCode err = U_ZERO_ERROR ;
UConverter * conv = ucnv_open ( " ISO_2022_CN_EXT " , & err ) ;
if ( U_FAILURE ( err ) ) {
2002-09-20 19:07:19 +00:00
log_data_err ( " Unable to open a iso-2022 converter: %s \n " , u_errorName ( err ) ) ;
2001-05-11 02:30:47 +00:00
return ;
}
2001-05-31 23:30:09 +00:00
ucnv_toUnicode ( conv , & utarget , utargetLimit , & csource , csource + sizeof ( cSource ) , NULL , TRUE , & err ) ;
if ( U_FAILURE ( err ) ) {
2001-05-11 02:30:47 +00:00
log_err ( " iso-2022-CN to Unicode conversion failed: %s \n " , u_errorName ( err ) ) ;
return ;
}
2001-05-31 23:30:09 +00:00
utargetLimit = utarget ;
utarget = uTarget ;
ucnv_fromUnicode ( conv , & ctarget , ctargetLimit , ( const UChar * * ) & utarget , utargetLimit , NULL , TRUE , & err ) ;
if ( U_FAILURE ( err ) ) {
2001-05-11 02:30:47 +00:00
log_err ( " iso-2022-CN from Unicode conversion failed: %s \n " , u_errorName ( err ) ) ;
return ;
}
2001-05-31 23:30:09 +00:00
ctargetLimit = ctarget ;
ctarget = cTarget ;
while ( ctarget < ctargetLimit ) {
2003-12-03 17:54:17 +00:00
if ( * ctarget ! = * tempSrc ) {
log_err ( " j915[%d] Expected : \\ x%02X \t Got: \\ x%02X \n " , ( int ) ( ctarget - cTarget ) , * ctarget , ( int ) * tempSrc ) ;
2001-05-31 23:30:09 +00:00
}
2003-12-03 17:54:17 +00:00
+ + ctarget ;
+ + tempSrc ;
2001-05-31 23:30:09 +00:00
}
2001-05-11 02:30:47 +00:00
2001-08-25 01:06:41 +00:00
ucnv_close ( conv ) ;
2001-05-11 02:30:47 +00:00
}
2000-11-21 04:05:39 +00:00
static void
2000-10-28 01:08:25 +00:00
TestISO_2022_CN_EXT ( ) {
2000-09-19 02:38:40 +00:00
/* test input */
static const uint16_t in [ ] = {
2001-05-08 00:01:30 +00:00
/* test Non-BMP code points */
2002-07-29 21:04:18 +00:00
0xD869 , 0xDE99 , 0xD869 , 0xDE9C , 0xD869 , 0xDE9D , 0xD869 , 0xDE9E , 0xD869 , 0xDE9F ,
0xD869 , 0xDEA0 , 0xD869 , 0xDEA5 , 0xD869 , 0xDEA6 , 0xD869 , 0xDEA7 , 0xD869 , 0xDEA8 ,
2001-05-08 00:01:30 +00:00
0xD869 , 0xDEAB , 0xD869 , 0xDEAC , 0xD869 , 0xDEAD , 0xD869 , 0xDEAE , 0xD869 , 0xDEAF ,
2002-07-29 21:04:18 +00:00
0xD869 , 0xDEB0 , 0xD869 , 0xDEB1 , 0xD869 , 0xDEB3 , 0xD869 , 0xDEB5 , 0xD869 , 0xDEB6 ,
0xD869 , 0xDEB7 , 0xD869 , 0xDEB8 , 0xD869 , 0xDEB9 , 0xD869 , 0xDEBA , 0xD869 , 0xDEBB ,
0xD869 , 0xDEBC , 0xD869 , 0xDEBD , 0xD869 , 0xDEBE , 0xD869 , 0xDEBF , 0xD869 , 0xDEC0 ,
0xD869 , 0xDEC1 , 0xD869 , 0xDEC2 , 0xD869 , 0xDEC3 , 0xD869 , 0xDEC4 , 0xD869 , 0xDEC8 ,
0xD869 , 0xDECA , 0xD869 , 0xDECB , 0xD869 , 0xDECD , 0xD869 , 0xDECE , 0xD869 , 0xDECF ,
0xD869 , 0xDED0 , 0xD869 , 0xDED1 , 0xD869 , 0xDED2 , 0xD869 , 0xDED3 , 0xD869 , 0xDED4 ,
0xD869 , 0xDED5 ,
2001-05-08 00:01:30 +00:00
2000-09-21 00:35:06 +00:00
0x4DB3 , 0x4DB4 , 0x4DB5 , 0x4E00 , 0x4E00 , 0x4E01 , 0x4E02 , 0x4E03 , 0x000D , 0x000A ,
0x0392 , 0x0393 , 0x0394 , 0x0395 , 0x0396 , 0x0397 , 0x33E0 , 0x33E6 , 0x000D , 0x000A ,
0x4E05 , 0x4E07 , 0x4E04 , 0x4E08 , 0x4E08 , 0x4E09 , 0x4E0A , 0x4E0B , 0x000D , 0x000A ,
0x4E0C , 0x0021 , 0x0022 , 0x0023 , 0x0024 , 0xFF40 , 0xFF41 , 0xFF42 , 0x000D , 0x000A ,
0xFF43 , 0xFF44 , 0xFF45 , 0xFF46 , 0xFF47 , 0xFF48 , 0xFF49 , 0xFF4A , 0x000D , 0x000A ,
0xFF4B , 0xFF4C , 0xFF4D , 0xFF4E , 0xFF4F , 0x6332 , 0x63B0 , 0x643F , 0x000D , 0x000A ,
0x64D8 , 0x8004 , 0x6BEA , 0x6BF3 , 0x6BFD , 0x6BF5 , 0x6BF9 , 0x6C05 , 0x000D , 0x000A ,
2000-11-17 03:03:14 +00:00
0x0041 , 0x0042 , 0x0043 , 0x0044 , 0x0045 , 0x0046 , 0x0047 , 0x0048 , 0x000D , 0x000A ,
2000-09-21 00:35:06 +00:00
0x6C07 , 0x6C06 , 0x6C0D , 0x6C15 , 0x9CD9 , 0x9CDC , 0x9CDD , 0x9CDF , 0x000D , 0x000A ,
0x9CE2 , 0x977C , 0x9785 , 0x9791 , 0x9792 , 0x9794 , 0x97AF , 0x97AB , 0x000D , 0x000A ,
0x97A3 , 0x97B2 , 0x97B4 , 0x9AB1 , 0x9AB0 , 0x9AB7 , 0x9E58 , 0x9AB6 , 0x000D , 0x000A ,
0x9ABA , 0x9ABC , 0x9AC1 , 0x9AC0 , 0x9AC5 , 0x9AC2 , 0x9ACB , 0x9ACC , 0x000D , 0x000A ,
0x9AD1 , 0x9B45 , 0x9B43 , 0x9B47 , 0x9B49 , 0x9B48 , 0x9B4D , 0x9B51 , 0x000D , 0x000A ,
0x98E8 , 0x990D , 0x992E , 0x9955 , 0x9954 , 0x9ADF , 0x3443 , 0x3444 , 0x000D , 0x000A ,
0x3445 , 0x3449 , 0x344A , 0x344B , 0x60F2 , 0x60F3 , 0x60F4 , 0x60F5 , 0x000D , 0x000A ,
0x60F6 , 0x60F7 , 0x60F8 , 0x60F9 , 0x60FA , 0x60FB , 0x60FC , 0x60FD , 0x000D , 0x000A ,
0x60FE , 0x60FF , 0x6100 , 0x6101 , 0x6102 , 0x0041 , 0x0042 , 0x0043 , 0x000D , 0x000A ,
0x0044 , 0x0045 , 0x0046 , 0x0047 , 0x0048 , 0x0049 , 0x004A , 0x004B , 0x000D , 0x000A ,
2002-07-29 21:04:18 +00:00
2001-04-18 19:31:05 +00:00
0x33E7 , 0x33E8 , 0x33E9 , 0x33EA , 0x000D , 0x000A
2000-09-21 00:35:06 +00:00
2000-09-19 02:38:40 +00:00
} ;
2001-05-08 00:01:30 +00:00
2000-11-17 03:03:14 +00:00
const UChar * uSource ;
const UChar * uSourceLimit ;
const char * cSource ;
const char * cSourceLimit ;
UChar * uTargetLimit = NULL ;
UChar * uTarget ;
char * cTarget ;
const char * cTargetLimit ;
2001-04-18 19:31:05 +00:00
char * cBuf ;
2000-11-17 03:03:14 +00:00
UChar * uBuf , * test ;
2000-09-21 00:35:06 +00:00
int32_t uBufSize = 180 ;
2000-09-19 02:38:40 +00:00
UErrorCode errorCode = U_ZERO_ERROR ;
UConverter * cnv ;
2000-11-17 03:03:14 +00:00
int32_t * offsets = ( int32_t * ) malloc ( uBufSize * sizeof ( int32_t ) * 5 ) ;
int32_t * myOff = offsets ;
2000-10-28 01:08:25 +00:00
cnv = ucnv_open ( " ISO_2022,locale=cn,version=1 " , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
2002-09-20 19:07:19 +00:00
log_data_err ( " Unable to open a iso-2022 converter: %s \n " , u_errorName ( errorCode ) ) ;
2000-10-28 01:08:25 +00:00
return ;
}
2000-11-17 03:03:14 +00:00
uBuf = ( UChar * ) malloc ( uBufSize * sizeof ( UChar ) * 5 ) ;
cBuf = ( char * ) malloc ( uBufSize * sizeof ( char ) * 10 ) ;
2005-11-08 22:44:37 +00:00
uSource = ( const UChar * ) in ;
uSourceLimit = ( const UChar * ) in + ( sizeof ( in ) / sizeof ( in [ 0 ] ) ) ;
2000-11-17 03:03:14 +00:00
cTarget = cBuf ;
cTargetLimit = cBuf + uBufSize * 5 ;
uTarget = uBuf ;
uTargetLimit = uBuf + uBufSize * 5 ;
ucnv_fromUnicode ( cnv , & cTarget , cTargetLimit , & uSource , uSourceLimit , myOff , TRUE , & errorCode ) ;
2000-10-28 01:08:25 +00:00
if ( U_FAILURE ( errorCode ) ) {
log_err ( " ucnv_fromUnicode conversion failed reason %s \n " , u_errorName ( errorCode ) ) ;
return ;
}
cSource = cBuf ;
2000-11-17 03:03:14 +00:00
cSourceLimit = cTarget ;
test = uBuf ;
myOff = offsets ;
ucnv_toUnicode ( cnv , & uTarget , uTargetLimit , & cSource , cSourceLimit , myOff , TRUE , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
2000-10-28 01:08:25 +00:00
log_err ( " ucnv_toUnicode conversion failed reason %s \n " , u_errorName ( errorCode ) ) ;
return ;
}
2005-11-08 22:44:37 +00:00
uSource = ( const UChar * ) in ;
2000-11-17 03:03:14 +00:00
while ( uSource < uSourceLimit ) {
if ( * test ! = * uSource ) {
log_err ( " Expected : \\ u%04X \t Got: \\ u%04X \n " , * uSource , ( int ) * test ) ;
}
2000-10-28 01:08:25 +00:00
else {
2000-11-17 03:03:14 +00:00
log_verbose ( " Got: \\ u%04X \n " , ( int ) * test ) ;
2000-10-28 01:08:25 +00:00
}
2000-12-08 01:13:38 +00:00
uSource + + ;
test + + ;
2000-11-17 03:03:14 +00:00
}
2005-11-08 22:44:37 +00:00
TestSmallTargetBuffer ( in , ( const UChar * ) in + ( sizeof ( in ) / sizeof ( in [ 0 ] ) ) , cnv ) ;
TestSmallSourceBuffer ( in , ( const UChar * ) in + ( sizeof ( in ) / sizeof ( in [ 0 ] ) ) , cnv ) ;
2002-07-31 20:58:58 +00:00
/*Test for the condition where there is an invalid character*/
ucnv_reset ( cnv ) ;
{
static const uint8_t source2 [ ] = { 0x0e , 0x24 , 0x053 } ;
TestNextUCharError ( cnv , ( const char * ) source2 , ( const char * ) source2 + sizeof ( source2 ) , U_ZERO_ERROR , " an invalid character [ISO-2022-CN-EXT] " ) ;
}
2000-11-17 03:03:14 +00:00
ucnv_close ( cnv ) ;
free ( uBuf ) ;
free ( cBuf ) ;
free ( offsets ) ;
2000-10-28 01:08:25 +00:00
}
2010-05-25 22:17:12 +00:00
# endif
2000-10-28 01:08:25 +00:00
2000-11-21 04:05:39 +00:00
static void
2000-10-28 01:08:25 +00:00
TestISO_2022_CN ( ) {
/* test input */
static const uint16_t in [ ] = {
2001-05-31 23:30:09 +00:00
/* jitterbug 951 */
0xFF2D , 0xFF49 , 0xFF58 , 0xFF45 , 0xFF44 , 0x0020 , 0xFF43 , 0xFF48 , 0xFF41 , 0xFF52 ,
0x0020 , 0xFF06 , 0x0020 , 0xFF11 , 0xFF12 , 0xFF13 , 0xFF14 , 0xFF15 , 0xFF16 , 0xFF17 ,
0xFF18 , 0xFF19 , 0xFF10 , 0x0020 , 0xFF4E , 0xFF55 , 0xFF4D , 0xFF42 , 0xFF45 , 0xFF52 ,
0x0020 , 0xFF54 , 0xFF45 , 0xFF53 , 0xFF54 , 0x0020 , 0xFF4C , 0xFF49 , 0xFF4E , 0xFF45 ,
0x0020 , 0x0045 , 0x004e , 0x0044 ,
/**/
2000-10-28 01:08:25 +00:00
0x4E00 , 0x4E00 , 0x4E01 , 0x4E03 , 0x60F6 , 0x60F7 , 0x60F8 , 0x60FB , 0x000D , 0x000A ,
0x0392 , 0x0393 , 0x0394 , 0x0395 , 0x0396 , 0x0397 , 0x60FB , 0x60FC , 0x000D , 0x000A ,
0x4E07 , 0x4E08 , 0x4E08 , 0x4E09 , 0x4E0A , 0x4E0B , 0x0042 , 0x0043 , 0x000D , 0x000A ,
0x4E0C , 0x0021 , 0x0022 , 0x0023 , 0x0024 , 0xFF40 , 0xFF41 , 0xFF42 , 0x000D , 0x000A ,
0xFF43 , 0xFF44 , 0xFF45 , 0xFF46 , 0xFF47 , 0xFF48 , 0xFF49 , 0xFF4A , 0x000D , 0x000A ,
0xFF4B , 0xFF4C , 0xFF4D , 0xFF4E , 0xFF4F , 0x6332 , 0x63B0 , 0x643F , 0x000D , 0x000A ,
0x64D8 , 0x8004 , 0x6BEA , 0x6BF3 , 0x6BFD , 0x6BF5 , 0x6BF9 , 0x6C05 , 0x000D , 0x000A ,
0x6C07 , 0x6C06 , 0x6C0D , 0x6C15 , 0x9CD9 , 0x9CDC , 0x9CDD , 0x9CDF , 0x000D , 0x000A ,
0x9CE2 , 0x977C , 0x9785 , 0x9791 , 0x9792 , 0x9794 , 0x97AF , 0x97AB , 0x000D , 0x000A ,
0x97A3 , 0x97B2 , 0x97B4 , 0x9AB1 , 0x9AB0 , 0x9AB7 , 0x9E58 , 0x9AB6 , 0x000D , 0x000A ,
0x9ABA , 0x9ABC , 0x9AC1 , 0x9AC0 , 0x9AC5 , 0x9AC2 , 0x9ACB , 0x9ACC , 0x000D , 0x000A ,
0x9AD1 , 0x9B45 , 0x9B43 , 0x9B47 , 0x9B49 , 0x9B48 , 0x9B4D , 0x9B51 , 0x000D , 0x000A ,
0x98E8 , 0x990D , 0x992E , 0x9955 , 0x9954 , 0x9ADF , 0x60FE , 0x60FF , 0x000D , 0x000A ,
0x60F2 , 0x60F3 , 0x60F4 , 0x60F5 , 0x000D , 0x000A , 0x60F9 , 0x60FA , 0x000D , 0x000A ,
0x6100 , 0x6101 , 0x0041 , 0x0044 , 0x0045 , 0x0046 , 0x0047 , 0x0048 , 0x000D , 0x000A ,
0x247D , 0x247E , 0x247F , 0x2480 , 0x2481 , 0x2482 , 0x2483 , 0x2484 , 0x2485 , 0x2486 ,
0x2487 , 0x2460 , 0x2461 , 0xFF20 , 0xFF21 , 0xFF22 , 0x0049 , 0x004A , 0x000D , 0x000A ,
} ;
2000-11-17 03:03:14 +00:00
const UChar * uSource ;
const UChar * uSourceLimit ;
const char * cSource ;
const char * cSourceLimit ;
UChar * uTargetLimit = NULL ;
UChar * uTarget ;
char * cTarget ;
const char * cTargetLimit ;
2001-04-18 19:31:05 +00:00
char * cBuf ;
2000-11-17 03:03:14 +00:00
UChar * uBuf , * test ;
2000-10-28 01:08:25 +00:00
int32_t uBufSize = 180 ;
UErrorCode errorCode = U_ZERO_ERROR ;
UConverter * cnv ;
2000-11-17 03:03:14 +00:00
int32_t * offsets = ( int32_t * ) malloc ( uBufSize * sizeof ( int32_t ) * 5 ) ;
int32_t * myOff = offsets ;
2000-10-28 01:08:25 +00:00
cnv = ucnv_open ( " ISO_2022,locale=cn,version=0 " , & errorCode ) ;
2000-09-19 02:38:40 +00:00
if ( U_FAILURE ( errorCode ) ) {
2002-09-20 19:07:19 +00:00
log_data_err ( " Unable to open a iso-2022 converter: %s \n " , u_errorName ( errorCode ) ) ;
2000-09-19 02:38:40 +00:00
return ;
}
2000-11-17 03:03:14 +00:00
uBuf = ( UChar * ) malloc ( uBufSize * sizeof ( UChar ) * 5 ) ;
cBuf = ( char * ) malloc ( uBufSize * sizeof ( char ) * 10 ) ;
2005-11-08 22:44:37 +00:00
uSource = ( const UChar * ) in ;
uSourceLimit = ( const UChar * ) in + ( sizeof ( in ) / sizeof ( in [ 0 ] ) ) ;
2000-11-17 03:03:14 +00:00
cTarget = cBuf ;
cTargetLimit = cBuf + uBufSize * 5 ;
uTarget = uBuf ;
uTargetLimit = uBuf + uBufSize * 5 ;
ucnv_fromUnicode ( cnv , & cTarget , cTargetLimit , & uSource , uSourceLimit , myOff , TRUE , & errorCode ) ;
2000-09-19 02:38:40 +00:00
if ( U_FAILURE ( errorCode ) ) {
log_err ( " ucnv_fromUnicode conversion failed reason %s \n " , u_errorName ( errorCode ) ) ;
return ;
}
cSource = cBuf ;
2000-11-17 03:03:14 +00:00
cSourceLimit = cTarget ;
test = uBuf ;
myOff = offsets ;
ucnv_toUnicode ( cnv , & uTarget , uTargetLimit , & cSource , cSourceLimit , myOff , TRUE , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
2000-09-19 02:38:40 +00:00
log_err ( " ucnv_toUnicode conversion failed reason %s \n " , u_errorName ( errorCode ) ) ;
return ;
}
2005-11-08 22:44:37 +00:00
uSource = ( const UChar * ) in ;
2000-11-17 03:03:14 +00:00
while ( uSource < uSourceLimit ) {
if ( * test ! = * uSource ) {
log_err ( " Expected : \\ u%04X \t Got: \\ u%04X \n " , * uSource , ( int ) * test ) ;
}
2000-10-17 08:05:02 +00:00
else {
2000-11-17 03:03:14 +00:00
log_verbose ( " Got: \\ u%04X \n " , ( int ) * test ) ;
2000-10-17 08:05:02 +00:00
}
2000-12-08 01:13:38 +00:00
uSource + + ;
test + + ;
2000-11-17 03:03:14 +00:00
}
TestGetNextUChar2022 ( cnv , cBuf , cTarget , in , " ISO-2022-CN encoding " ) ;
2005-11-08 22:44:37 +00:00
TestSmallTargetBuffer ( in , ( const UChar * ) in + ( sizeof ( in ) / sizeof ( in [ 0 ] ) ) , cnv ) ;
TestSmallSourceBuffer ( in , ( const UChar * ) in + ( sizeof ( in ) / sizeof ( in [ 0 ] ) ) , cnv ) ;
TestToAndFromUChars ( in , ( const UChar * ) in + ( sizeof ( in ) / sizeof ( in [ 0 ] ) ) , cnv ) ;
2001-04-07 01:25:14 +00:00
TestJitterbug930 ( " csISO2022CN " ) ;
2002-07-31 20:58:58 +00:00
/*Test for the condition where there is an invalid character*/
ucnv_reset ( cnv ) ;
{
static const uint8_t source2 [ ] = { 0x0e , 0x24 , 0x053 } ;
TestNextUCharError ( cnv , ( const char * ) source2 , ( const char * ) source2 + sizeof ( source2 ) , U_ZERO_ERROR , " an invalid character [ISO-2022-CN] " ) ;
}
2000-11-17 03:03:14 +00:00
ucnv_close ( cnv ) ;
free ( uBuf ) ;
free ( cBuf ) ;
free ( offsets ) ;
2000-08-15 00:07:33 +00:00
}
2000-11-21 04:05:39 +00:00
2008-03-12 23:22:07 +00:00
/* Tests for empty segments in ISO-2022-JP/KR/CN, HZ, check that UConverterCallbackReason is UCNV_IRREGULAR */
typedef struct {
const char * converterName ;
const char * inputText ;
int inputTextLength ;
} EmptySegmentTest ;
/* Callback for TestJitterbug6175, should only get called for empty segment errors */
static void UCNV_TO_U_CALLBACK_EMPTYSEGMENT ( const void * context , UConverterToUnicodeArgs * toArgs , const char * codeUnits ,
int32_t length , UConverterCallbackReason reason , UErrorCode * err ) {
if ( reason > UCNV_IRREGULAR ) {
return ;
}
if ( reason ! = UCNV_IRREGULAR ) {
log_err ( " toUnicode callback invoked for empty segment but reason is not UCNV_IRREGULAR \n " ) ;
}
/* Standard stuff below from UCNV_TO_U_CALLBACK_SUBSTITUTE */
* err = U_ZERO_ERROR ;
ucnv_cbToUWriteSub ( toArgs , 0 , err ) ;
}
enum { kEmptySegmentToUCharsMax = 64 } ;
static void TestJitterbug6175 ( void ) {
static const char iso2022jp_a [ ] = { 0x61 , 0x62 , 0x1B , 0x24 , 0x42 , 0x1B , 0x28 , 0x42 , 0x63 , 0x64 , 0x0D , 0x0A } ;
static const char iso2022kr_a [ ] = { 0x1B , 0x24 , 0x29 , 0x43 , 0x61 , 0x0E , 0x0F , 0x62 , 0x0D , 0x0A } ;
static const char iso2022cn_a [ ] = { 0x61 , 0x1B , 0x24 , 0x29 , 0x41 , 0x62 , 0x0E , 0x0F , 0x1B , 0x24 , 0x2A , 0x48 , 0x1B , 0x4E , 0x6A , 0x65 , 0x63 , 0x0D , 0x0A } ;
static const char iso2022cn_b [ ] = { 0x61 , 0x1B , 0x24 , 0x29 , 0x41 , 0x62 , 0x0E , 0x1B , 0x24 , 0x29 , 0x47 , 0x68 , 0x64 , 0x0F , 0x63 , 0x0D , 0x0A } ;
static const char hzGB2312_a [ ] = { 0x61 , 0x62 , 0x7E , 0x7B , 0x7E , 0x7D , 0x63 , 0x64 } ;
static const EmptySegmentTest emptySegmentTests [ ] = {
/* converterName inputText inputTextLength */
{ " ISO-2022-JP " , iso2022jp_a , sizeof ( iso2022jp_a ) } ,
{ " ISO-2022-KR " , iso2022kr_a , sizeof ( iso2022kr_a ) } ,
{ " ISO-2022-CN " , iso2022cn_a , sizeof ( iso2022cn_a ) } ,
{ " ISO-2022-CN " , iso2022cn_b , sizeof ( iso2022cn_b ) } ,
{ " HZ-GB-2312 " , hzGB2312_a , sizeof ( hzGB2312_a ) } ,
/* terminator: */
{ NULL , NULL , 0 , }
} ;
const EmptySegmentTest * testPtr ;
for ( testPtr = emptySegmentTests ; testPtr - > converterName ! = NULL ; + + testPtr ) {
UErrorCode err = U_ZERO_ERROR ;
UConverter * cnv = ucnv_open ( testPtr - > converterName , & err ) ;
if ( U_FAILURE ( err ) ) {
log_data_err ( " Unable to open %s converter: %s \n " , testPtr - > converterName , u_errorName ( err ) ) ;
return ;
}
ucnv_setToUCallBack ( cnv , UCNV_TO_U_CALLBACK_EMPTYSEGMENT , NULL , NULL , NULL , & err ) ;
if ( U_FAILURE ( err ) ) {
log_data_err ( " Unable to setToUCallBack for %s converter: %s \n " , testPtr - > converterName , u_errorName ( err ) ) ;
ucnv_close ( cnv ) ;
return ;
}
{
UChar toUChars [ kEmptySegmentToUCharsMax ] ;
UChar * toUCharsPtr = toUChars ;
const UChar * toUCharsLimit = toUCharsPtr + kEmptySegmentToUCharsMax ;
const char * inCharsPtr = testPtr - > inputText ;
const char * inCharsLimit = inCharsPtr + testPtr - > inputTextLength ;
ucnv_toUnicode ( cnv , & toUCharsPtr , toUCharsLimit , & inCharsPtr , inCharsLimit , NULL , TRUE , & err ) ;
}
ucnv_close ( cnv ) ;
}
}
2000-11-21 04:05:39 +00:00
static void
2000-06-22 01:18:30 +00:00
TestEBCDIC_STATEFUL ( ) {
/* test input */
static const uint8_t in [ ] = {
0x61 ,
0x1a ,
0x0f , 0x4b ,
0x42 ,
2001-04-18 19:31:05 +00:00
0x40 ,
2000-06-22 01:18:30 +00:00
0x36 ,
} ;
2000-01-19 19:00:53 +00:00
2000-06-22 01:18:30 +00:00
/* expected test results */
2003-08-01 14:30:29 +00:00
static const int32_t results [ ] = {
2000-06-22 01:18:30 +00:00
/* number of bytes read, code point */
1 , 0x002f ,
1 , 0x0092 ,
2 , 0x002e ,
1 , 0xff62 ,
2001-04-18 19:31:05 +00:00
1 , 0x0020 ,
2000-06-22 01:18:30 +00:00
1 , 0x0096 ,
2001-04-18 19:31:05 +00:00
2000-06-22 01:18:30 +00:00
} ;
2000-08-12 04:27:59 +00:00
static const uint8_t in2 [ ] = {
0x0f ,
0xa1 ,
0x01
} ;
/* expected test results */
2003-08-01 14:30:29 +00:00
static const int32_t results2 [ ] = {
2000-08-12 04:27:59 +00:00
/* number of bytes read, code point */
2 , 0x203E ,
1 , 0x0001 ,
} ;
2000-01-19 19:00:53 +00:00
2000-06-22 01:18:30 +00:00
const char * source = ( const char * ) in , * limit = ( const char * ) in + sizeof ( in ) ;
UErrorCode errorCode = U_ZERO_ERROR ;
UConverter * cnv = ucnv_open ( " ibm-930 " , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
2002-09-20 19:07:19 +00:00
log_data_err ( " Unable to open a EBCDIC_STATEFUL(ibm-930) converter: %s \n " , u_errorName ( errorCode ) ) ;
2000-10-27 00:10:17 +00:00
return ;
2000-06-22 01:18:30 +00:00
}
TestNextUChar ( cnv , source , limit , results , " EBCDIC_STATEFUL(ibm-930) " ) ;
2000-08-12 04:27:59 +00:00
ucnv_reset ( cnv ) ;
2000-12-20 00:02:15 +00:00
/* Test the condition when source >= sourceLimit */
TestNextUCharError ( cnv , source , source , U_INDEX_OUTOFBOUNDS_ERROR , " sourceLimit <= source " ) ;
2000-08-12 04:27:59 +00:00
ucnv_reset ( cnv ) ;
2000-06-22 23:46:02 +00:00
/*Test for the condition where source > sourcelimit after consuming the shift chracter */
{
static const uint8_t source1 [ ] = { 0x0f } ;
TestNextUCharError ( cnv , ( const char * ) source1 , ( const char * ) source1 + sizeof ( source1 ) , U_INDEX_OUTOFBOUNDS_ERROR , " a character is truncated " ) ;
}
/*Test for the condition where there is an invalid character*/
2000-08-12 04:27:59 +00:00
ucnv_reset ( cnv ) ;
2000-06-22 23:46:02 +00:00
{
2000-08-12 04:27:59 +00:00
static const uint8_t source2 [ ] = { 0x0e , 0x7F , 0xFF } ;
TestNextUCharError ( cnv , ( const char * ) source2 , ( const char * ) source2 + sizeof ( source2 ) , U_ZERO_ERROR , " an invalid character [EBCDIC STATEFUL] " ) ;
2000-06-22 23:46:02 +00:00
}
2000-08-12 04:27:59 +00:00
ucnv_reset ( cnv ) ;
2000-08-14 17:47:43 +00:00
source = ( const char * ) in2 ;
limit = ( const char * ) in2 + sizeof ( in2 ) ;
2000-08-12 04:27:59 +00:00
TestNextUChar ( cnv , source , limit , results2 , " EBCDIC_STATEFUL(ibm-930),seq#2 " ) ;
2000-01-19 19:00:53 +00:00
ucnv_close ( cnv ) ;
2000-08-12 04:27:59 +00:00
2000-01-19 19:00:53 +00:00
}
2000-10-26 00:18:34 +00:00
2000-11-21 04:05:39 +00:00
static void
2000-10-26 00:18:34 +00:00
TestGB18030 ( ) {
/* test input */
static const uint8_t in [ ] = {
0x24 ,
0x7f ,
2000-11-30 22:15:07 +00:00
0x81 , 0x30 , 0x81 , 0x30 ,
2000-10-26 00:18:34 +00:00
0xa8 , 0xbf ,
2000-11-30 22:15:07 +00:00
0xa2 , 0xe3 ,
2000-10-26 00:18:34 +00:00
0xd2 , 0xbb ,
2000-11-30 22:15:07 +00:00
0x82 , 0x35 , 0x8f , 0x33 ,
0x84 , 0x31 , 0xa4 , 0x39 ,
2000-10-26 00:18:34 +00:00
0x90 , 0x30 , 0x81 , 0x30 ,
2000-10-26 20:09:17 +00:00
0xe3 , 0x32 , 0x9a , 0x35
#if 0
/*
* Feature removed markus 2000 - oct - 26
* Only some codepages must match surrogate pairs into supplementary code points -
* see javadoc for ucnv_getNextUChar ( ) and implementation notes in ucnvmbcs . c .
* GB 18030 provides direct encodings for supplementary code points , therefore
* it must not combine two single - encoded surrogates into one code point .
*/
2000-10-26 00:18:34 +00:00
0x83 , 0x36 , 0xc8 , 0x30 , 0x83 , 0x37 , 0xb0 , 0x34 /* separately encoded surrogates */
2000-10-26 20:09:17 +00:00
# endif
2000-10-26 00:18:34 +00:00
} ;
/* expected test results */
2003-08-01 14:30:29 +00:00
static const int32_t results [ ] = {
2000-10-26 00:18:34 +00:00
/* number of bytes read, code point */
1 , 0x24 ,
1 , 0x7f ,
4 , 0x80 ,
2 , 0x1f9 ,
2000-11-30 22:15:07 +00:00
2 , 0x20ac ,
2000-10-26 00:18:34 +00:00
2 , 0x4e00 ,
4 , 0x9fa6 ,
4 , 0xffff ,
4 , 0x10000 ,
2000-10-26 20:09:17 +00:00
4 , 0x10ffff
#if 0
/* Feature removed. See comment above. */
2000-10-26 00:18:34 +00:00
8 , 0x10000
2000-10-26 20:09:17 +00:00
# endif
2000-10-26 00:18:34 +00:00
} ;
2000-11-21 04:05:39 +00:00
/* const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); */
2000-10-26 00:18:34 +00:00
UErrorCode errorCode = U_ZERO_ERROR ;
UConverter * cnv = ucnv_open ( " gb18030 " , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
2002-09-20 19:07:19 +00:00
log_data_err ( " Unable to open a gb18030 converter: %s \n " , u_errorName ( errorCode ) ) ;
2000-10-27 00:10:17 +00:00
return ;
2000-10-26 00:18:34 +00:00
}
TestNextUChar ( cnv , ( const char * ) in , ( const char * ) in + sizeof ( in ) , results , " gb18030 " ) ;
ucnv_close ( cnv ) ;
}
2000-11-21 04:05:39 +00:00
static void
2000-03-31 16:53:09 +00:00
TestLMBCS ( ) {
2000-06-28 17:01:52 +00:00
/* LMBCS-1 string */
2000-05-22 21:24:26 +00:00
static const uint8_t pszLMBCS [ ] = {
2000-03-31 16:53:09 +00:00
0x61 ,
0x01 , 0x29 ,
0x81 ,
0xA0 ,
0x0F , 0x27 ,
0x0F , 0x91 ,
0x14 , 0x0a , 0x74 ,
2001-04-18 19:31:05 +00:00
0x14 , 0xF6 , 0x02 ,
2000-05-22 21:24:26 +00:00
0x14 , 0xd8 , 0x4d , 0x14 , 0xdc , 0x56 , /* UTF-16 surrogate pair */
2000-04-13 17:27:35 +00:00
0x10 , 0x88 , 0xA0 ,
2000-03-31 16:53:09 +00:00
} ;
2000-06-28 17:01:52 +00:00
/* Unicode UChar32 equivalents */
static const UChar32 pszUnicode32 [ ] = {
/* code point */
0x00000061 ,
0x00002013 ,
0x000000FC ,
0x000000E1 ,
0x00000007 ,
0x00000091 ,
0x00000a74 ,
0x00000200 ,
0x00023456 , /* code point for surrogate pair */
0x00005516
} ;
/* Unicode UChar equivalents */
static const UChar pszUnicode [ ] = {
2000-05-22 21:24:26 +00:00
/* code point */
0x0061 ,
0x2013 ,
0x00FC ,
0x00E1 ,
0x0007 ,
0x0091 ,
0x0a74 ,
0x0200 ,
2000-06-28 17:01:52 +00:00
0xD84D , /* low surrogate */
0xDC56 , /* high surrogate */
2000-05-22 21:24:26 +00:00
0x5516
} ;
/* expected test results */
2000-06-28 17:01:52 +00:00
static const int offsets32 [ ] = {
/* number of bytes read, code point */
2001-04-18 19:31:05 +00:00
0 ,
1 ,
3 ,
4 ,
5 ,
7 ,
9 ,
12 ,
15 ,
2000-06-28 17:01:52 +00:00
21 ,
24
} ;
/* expected test results */
static const int offsets [ ] = {
2000-03-31 16:53:09 +00:00
/* number of bytes read, code point */
2001-04-18 19:31:05 +00:00
0 ,
1 ,
3 ,
4 ,
5 ,
7 ,
9 ,
12 ,
15 ,
2000-06-28 17:01:52 +00:00
18 ,
2001-04-18 19:31:05 +00:00
21 ,
24
2000-03-31 16:53:09 +00:00
} ;
2000-05-22 21:24:26 +00:00
2001-04-18 19:31:05 +00:00
UConverter * cnv ;
2000-03-31 16:53:09 +00:00
2000-06-28 17:01:52 +00:00
# define NAME_LMBCS_1 "LMBCS-1"
# define NAME_LMBCS_2 "LMBCS-2"
2000-03-31 16:53:09 +00:00
2000-06-28 17:01:52 +00:00
/* Some basic open/close/property tests on some LMBCS converters */
{
char expected_subchars [ ] = { 0x3F } ; /* ANSI Question Mark */
char new_subchars [ ] = { 0x7F } ; /* subst char used by SmartSuite..*/
char get_subchars [ 1 ] ;
const char * get_name ;
UConverter * cnv1 ;
UConverter * cnv2 ;
int8_t len = sizeof ( get_subchars ) ;
UErrorCode errorCode = U_ZERO_ERROR ;
/* Open */
cnv1 = ucnv_open ( NAME_LMBCS_1 , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
2002-09-20 19:07:19 +00:00
log_data_err ( " Unable to open a LMBCS-1 converter: %s \n " , u_errorName ( errorCode ) ) ;
2000-10-27 00:10:17 +00:00
return ;
2000-06-28 17:01:52 +00:00
}
cnv2 = ucnv_open ( NAME_LMBCS_2 , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
2002-09-20 19:07:19 +00:00
log_data_err ( " Unable to open a LMBCS-2 converter: %s \n " , u_errorName ( errorCode ) ) ;
2000-10-27 00:10:17 +00:00
return ;
2000-06-28 17:01:52 +00:00
}
/* Name */
get_name = ucnv_getName ( cnv1 , & errorCode ) ;
if ( strcmp ( NAME_LMBCS_1 , get_name ) ) {
log_err ( " Unexpected converter name: %s \n " , get_name ) ;
}
get_name = ucnv_getName ( cnv2 , & errorCode ) ;
if ( strcmp ( NAME_LMBCS_2 , get_name ) ) {
log_err ( " Unexpected converter name: %s \n " , get_name ) ;
}
/* substitution chars */
ucnv_getSubstChars ( cnv1 , get_subchars , & len , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
log_err ( " Failure on get subst chars: %s \n " , u_errorName ( errorCode ) ) ;
}
if ( len ! = 1 ) {
log_err ( " Unexpected length of sub chars \n " ) ;
}
if ( get_subchars [ 0 ] ! = expected_subchars [ 0 ] ) {
log_err ( " Unexpected value of sub chars \n " ) ;
}
ucnv_setSubstChars ( cnv2 , new_subchars , len , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
log_err ( " Failure on set subst chars: %s \n " , u_errorName ( errorCode ) ) ;
}
ucnv_getSubstChars ( cnv2 , get_subchars , & len , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
log_err ( " Failure on get subst chars: %s \n " , u_errorName ( errorCode ) ) ;
}
if ( len ! = 1 ) {
log_err ( " Unexpected length of sub chars \n " ) ;
}
if ( get_subchars [ 0 ] ! = new_subchars [ 0 ] ) {
log_err ( " Unexpected value of sub chars \n " ) ;
}
2000-07-06 23:01:50 +00:00
ucnv_close ( cnv1 ) ;
ucnv_close ( cnv2 ) ;
2000-03-31 16:53:09 +00:00
}
2000-06-28 17:01:52 +00:00
/* LMBCS to Unicode - offsets */
2000-03-31 16:53:09 +00:00
{
2000-06-28 17:01:52 +00:00
UErrorCode errorCode = U_ZERO_ERROR ;
2001-04-18 19:31:05 +00:00
2006-05-26 04:44:31 +00:00
const char * pSource = ( const char * ) pszLMBCS ;
const char * sourceLimit = ( const char * ) pszLMBCS + sizeof ( pszLMBCS ) ;
2001-04-18 19:31:05 +00:00
2001-08-25 01:06:41 +00:00
UChar Out [ sizeof ( pszUnicode ) + 1 ] ;
2000-06-28 17:01:52 +00:00
UChar * pOut = Out ;
2001-08-25 01:06:41 +00:00
UChar * OutLimit = Out + sizeof ( pszUnicode ) / sizeof ( UChar ) ;
2000-06-28 17:01:52 +00:00
2000-07-28 03:18:30 +00:00
int32_t off [ sizeof ( offsets ) ] ;
2000-06-28 17:01:52 +00:00
2001-04-18 19:31:05 +00:00
/* last 'offset' in expected results is just the final size.
2000-06-28 17:01:52 +00:00
( Makes other tests easier ) . Compensate here : */
off [ ( sizeof ( offsets ) / sizeof ( offsets [ 0 ] ) ) - 1 ] = sizeof ( pszLMBCS ) ;
2000-03-31 16:53:09 +00:00
2000-05-22 21:24:26 +00:00
2000-06-28 17:01:52 +00:00
cnv = ucnv_open ( " lmbcs " , & errorCode ) ; /* use generic name for LMBCS-1 */
if ( U_FAILURE ( errorCode ) ) {
2002-09-20 19:07:19 +00:00
log_data_err ( " Unable to open a LMBCS converter: %s \n " , u_errorName ( errorCode ) ) ;
2000-10-27 00:10:17 +00:00
return ;
2000-03-31 16:53:09 +00:00
}
2000-08-11 03:35:25 +00:00
2000-06-28 17:01:52 +00:00
ucnv_toUnicode ( cnv ,
2000-08-11 03:35:25 +00:00
& pOut ,
OutLimit ,
2006-05-26 04:44:31 +00:00
& pSource ,
sourceLimit ,
2000-08-11 03:35:25 +00:00
off ,
TRUE ,
& errorCode ) ;
2000-06-28 17:01:52 +00:00
if ( memcmp ( off , offsets , sizeof ( offsets ) ) )
{
log_err ( " LMBCS->Uni: Calculated offsets do not match expected results \n " ) ;
}
if ( memcmp ( Out , pszUnicode , sizeof ( pszUnicode ) ) )
{
log_err ( " LMBCS->Uni: Calculated codepoints do not match expected results \n " ) ;
}
ucnv_close ( cnv ) ;
2000-03-31 16:53:09 +00:00
}
2000-06-28 17:01:52 +00:00
{
/* LMBCS to Unicode - getNextUChar */
const char * sourceStart ;
const char * source = ( const char * ) pszLMBCS ;
const char * limit = ( const char * ) pszLMBCS + sizeof ( pszLMBCS ) ;
2000-07-28 04:09:39 +00:00
const UChar32 * results = pszUnicode32 ;
2000-06-28 17:01:52 +00:00
const int * off = offsets32 ;
2000-05-22 21:24:26 +00:00
2000-06-28 17:01:52 +00:00
UErrorCode errorCode = U_ZERO_ERROR ;
2002-09-20 16:02:16 +00:00
UChar32 uniChar ;
2000-06-28 17:01:52 +00:00
cnv = ucnv_open ( " LMBCS-1 " , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
2002-09-20 19:07:19 +00:00
log_data_err ( " Unable to open a LMBCS-1 converter: %s \n " , u_errorName ( errorCode ) ) ;
2000-10-27 00:10:17 +00:00
return ;
2000-06-28 17:01:52 +00:00
}
else
{
while ( source < limit ) {
sourceStart = source ;
uniChar = ucnv_getNextUChar ( cnv , & source , source + ( off [ 1 ] - off [ 0 ] ) , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
log_err ( " LMBCS-1 ucnv_getNextUChar() failed: %s \n " , u_errorName ( errorCode ) ) ;
break ;
} else if ( source - sourceStart ! = off [ 1 ] - off [ 0 ] | | uniChar ! = * results ) {
log_err ( " LMBCS-1 ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes. \n " ,
uniChar , ( source - sourceStart ) , * results , * off ) ;
break ;
}
results + + ;
off + + ;
}
}
ucnv_close ( cnv ) ;
}
{ /* test locale & optimization group operations: Unicode to LMBCS */
2001-04-18 19:31:05 +00:00
2000-06-28 17:01:52 +00:00
UErrorCode errorCode = U_ZERO_ERROR ;
2000-05-22 21:24:26 +00:00
UConverter * cnv16he = ucnv_open ( " LMBCS-16,locale=he " , & errorCode ) ;
2000-06-28 17:01:52 +00:00
UConverter * cnv16jp = ucnv_open ( " LMBCS-16,locale=ja_JP " , & errorCode ) ;
UConverter * cnv01us = ucnv_open ( " LMBCS-1,locale=us_EN " , & errorCode ) ;
2000-05-22 21:24:26 +00:00
UChar uniString [ ] = { 0x0192 } ; /* Latin Small letter f with hook */
2000-07-28 04:09:39 +00:00
const UChar * pUniOut = uniString ;
UChar * pUniIn = uniString ;
2000-08-11 03:35:25 +00:00
uint8_t lmbcsString [ 4 ] ;
2006-05-26 04:44:31 +00:00
const char * pLMBCSOut = ( const char * ) lmbcsString ;
char * pLMBCSIn = ( char * ) lmbcsString ;
2000-05-22 21:24:26 +00:00
2000-06-28 17:01:52 +00:00
/* 0192 (hook) converts to both group 3 & group 1. input locale should differentiate */
2001-04-18 19:31:05 +00:00
ucnv_fromUnicode ( cnv16he ,
2006-05-26 04:44:31 +00:00
& pLMBCSIn , ( pLMBCSIn + sizeof ( lmbcsString ) / sizeof ( lmbcsString [ 0 ] ) ) ,
2001-04-18 19:31:05 +00:00
& pUniOut , pUniOut + sizeof ( uniString ) / sizeof ( uniString [ 0 ] ) ,
2000-05-22 21:24:26 +00:00
NULL , 1 , & errorCode ) ;
2000-08-11 03:35:25 +00:00
if ( lmbcsString [ 0 ] ! = 0x3 | | lmbcsString [ 1 ] ! = 0x83 )
2000-05-22 21:24:26 +00:00
{
2000-06-28 17:01:52 +00:00
log_err ( " LMBCS-16,locale=he gives unexpected translation \n " ) ;
2000-05-22 21:24:26 +00:00
}
2000-08-11 03:35:25 +00:00
2006-05-26 04:44:31 +00:00
pLMBCSIn = ( char * ) lmbcsString ;
2000-07-28 04:09:39 +00:00
pUniOut = uniString ;
2001-04-18 19:31:05 +00:00
ucnv_fromUnicode ( cnv01us ,
2006-05-26 04:44:31 +00:00
& pLMBCSIn , ( const char * ) ( lmbcsString + sizeof ( lmbcsString ) / sizeof ( lmbcsString [ 0 ] ) ) ,
2000-07-28 04:09:39 +00:00
& pUniOut , pUniOut + sizeof ( uniString ) / sizeof ( uniString [ 0 ] ) ,
2000-05-22 21:24:26 +00:00
NULL , 1 , & errorCode ) ;
2001-04-18 19:31:05 +00:00
2000-08-11 03:35:25 +00:00
if ( lmbcsString [ 0 ] ! = 0x9F )
2000-05-22 21:24:26 +00:00
{
2000-06-28 17:01:52 +00:00
log_err ( " LMBCS-1,locale=US gives unexpected translation \n " ) ;
}
/* single byte char from mbcs char set */
2000-08-11 03:35:25 +00:00
lmbcsString [ 0 ] = 0xAE ; /* 1/2 width katakana letter small Yo */
2006-05-26 04:44:31 +00:00
pLMBCSOut = ( const char * ) lmbcsString ;
2000-07-28 04:09:39 +00:00
pUniIn = uniString ;
2001-04-18 19:31:05 +00:00
ucnv_toUnicode ( cnv16jp ,
2000-07-28 04:09:39 +00:00
& pUniIn , pUniIn + 1 ,
2006-05-26 04:44:31 +00:00
& pLMBCSOut , ( pLMBCSOut + 1 ) ,
2000-06-28 17:01:52 +00:00
NULL , 1 , & errorCode ) ;
2006-05-26 04:44:31 +00:00
if ( U_FAILURE ( errorCode ) | | pLMBCSOut ! = ( const char * ) lmbcsString + 1 | | pUniIn ! = uniString + 1 | | uniString [ 0 ] ! = 0xFF6E )
2000-06-28 17:01:52 +00:00
{
log_err ( " Unexpected results from LMBCS-16 single byte char \n " ) ;
}
/* convert to group 1: should be 3 bytes */
2006-05-26 04:44:31 +00:00
pLMBCSIn = ( char * ) lmbcsString ;
2000-07-28 04:09:39 +00:00
pUniOut = uniString ;
2001-04-18 19:31:05 +00:00
ucnv_fromUnicode ( cnv01us ,
2006-05-26 04:44:31 +00:00
& pLMBCSIn , ( const char * ) ( pLMBCSIn + 3 ) ,
2000-07-28 04:09:39 +00:00
& pUniOut , pUniOut + 1 ,
2000-06-28 17:01:52 +00:00
NULL , 1 , & errorCode ) ;
2006-05-26 04:44:31 +00:00
if ( U_FAILURE ( errorCode ) | | pLMBCSIn ! = ( const char * ) lmbcsString + 3 | | pUniOut ! = uniString + 1
2000-08-11 03:35:25 +00:00
| | lmbcsString [ 0 ] ! = 0x10 | | lmbcsString [ 1 ] ! = 0x10 | | lmbcsString [ 2 ] ! = 0xAE )
2000-06-28 17:01:52 +00:00
{
log_err ( " Unexpected results to LMBCS-1 single byte mbcs char \n " ) ;
}
2006-05-26 04:44:31 +00:00
pLMBCSOut = ( const char * ) lmbcsString ;
2000-07-28 04:09:39 +00:00
pUniIn = uniString ;
2001-04-18 19:31:05 +00:00
ucnv_toUnicode ( cnv01us ,
2000-07-28 04:09:39 +00:00
& pUniIn , pUniIn + 1 ,
2006-05-26 04:44:31 +00:00
& pLMBCSOut , ( const char * ) ( pLMBCSOut + 3 ) ,
2000-06-28 17:01:52 +00:00
NULL , 1 , & errorCode ) ;
2006-05-26 04:44:31 +00:00
if ( U_FAILURE ( errorCode ) | | pLMBCSOut ! = ( const char * ) lmbcsString + 3 | | pUniIn ! = uniString + 1 | | uniString [ 0 ] ! = 0xFF6E )
2000-06-28 17:01:52 +00:00
{
log_err ( " Unexpected results from LMBCS-1 single byte mbcs char \n " ) ;
}
2006-05-26 04:44:31 +00:00
pLMBCSIn = ( char * ) lmbcsString ;
2000-07-28 04:09:39 +00:00
pUniOut = uniString ;
2001-04-18 19:31:05 +00:00
ucnv_fromUnicode ( cnv16jp ,
2006-05-26 04:44:31 +00:00
& pLMBCSIn , ( const char * ) ( pLMBCSIn + 1 ) ,
2000-07-28 04:09:39 +00:00
& pUniOut , pUniOut + 1 ,
2000-06-28 17:01:52 +00:00
NULL , 1 , & errorCode ) ;
2006-05-26 04:44:31 +00:00
if ( U_FAILURE ( errorCode ) | | pLMBCSIn ! = ( const char * ) lmbcsString + 1 | | pUniOut ! = uniString + 1 | | lmbcsString [ 0 ] ! = 0xAE )
2000-06-28 17:01:52 +00:00
{
log_err ( " Unexpected results to LMBCS-16 single byte mbcs char \n " ) ;
2000-05-22 21:24:26 +00:00
}
2000-06-28 17:01:52 +00:00
ucnv_close ( cnv16he ) ;
ucnv_close ( cnv16jp ) ;
ucnv_close ( cnv01us ) ;
}
{
/* Small source buffer testing, LMBCS -> Unicode */
UErrorCode errorCode = U_ZERO_ERROR ;
2001-04-18 19:31:05 +00:00
2006-05-26 04:44:31 +00:00
const char * pSource = ( const char * ) pszLMBCS ;
const char * sourceLimit = ( const char * ) pszLMBCS + sizeof ( pszLMBCS ) ;
2000-06-28 17:01:52 +00:00
int codepointCount = 0 ;
2001-08-25 01:06:41 +00:00
UChar Out [ sizeof ( pszUnicode ) + 1 ] ;
2000-06-28 17:01:52 +00:00
UChar * pOut = Out ;
2001-08-25 01:06:41 +00:00
UChar * OutLimit = Out + sizeof ( pszUnicode ) / sizeof ( UChar ) ;
2000-06-28 17:01:52 +00:00
2002-08-03 05:58:30 +00:00
cnv = ucnv_open ( NAME_LMBCS_1 , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
log_err ( " Unable to open a LMBCS-1 converter: %s \n " , u_errorName ( errorCode ) ) ;
return ;
}
2000-06-28 17:01:52 +00:00
while ( ( pSource < sourceLimit ) & & U_SUCCESS ( errorCode ) )
{
2002-08-03 05:58:30 +00:00
ucnv_toUnicode ( cnv ,
& pOut ,
OutLimit ,
2006-05-26 04:44:31 +00:00
& pSource ,
( pSource + 1 ) , /* claim that this is a 1- byte buffer */
2002-08-03 05:58:30 +00:00
NULL ,
FALSE , /* FALSE means there might be more chars in the next buffer */
& errorCode ) ;
if ( U_SUCCESS ( errorCode ) )
{
2006-05-26 04:44:31 +00:00
if ( ( pSource - ( const char * ) pszLMBCS ) = = offsets [ codepointCount + 1 ] )
2002-08-03 05:58:30 +00:00
{
/* we are on to the next code point: check value */
if ( Out [ 0 ] ! = pszUnicode [ codepointCount ] ) {
log_err ( " LMBCS->Uni result %lx should have been %lx \n " ,
Out [ 0 ] , pszUnicode [ codepointCount ] ) ;
}
pOut = Out ; /* reset for accumulating next code point */
codepointCount + + ;
2000-06-28 17:01:52 +00:00
}
2002-08-03 05:58:30 +00:00
}
else
{
log_err ( " Unexpected Error on toUnicode: %s \n " , u_errorName ( errorCode ) ) ;
}
2000-06-28 17:01:52 +00:00
}
{
/* limits & surrogate error testing */
2006-05-26 04:44:31 +00:00
char LIn [ sizeof ( pszLMBCS ) ] ;
const char * pLIn = LIn ;
2001-04-18 19:31:05 +00:00
2000-06-28 17:01:52 +00:00
char LOut [ sizeof ( pszLMBCS ) ] ;
char * pLOut = LOut ;
2001-04-18 19:31:05 +00:00
2000-06-28 17:01:52 +00:00
UChar UOut [ sizeof ( pszUnicode ) ] ;
UChar * pUOut = UOut ;
2001-04-18 19:31:05 +00:00
2000-06-28 17:01:52 +00:00
UChar UIn [ sizeof ( pszUnicode ) ] ;
const UChar * pUIn = UIn ;
2001-04-18 19:31:05 +00:00
2000-08-01 03:25:35 +00:00
int32_t off [ sizeof ( offsets ) ] ;
2000-06-28 17:01:52 +00:00
UChar32 uniChar ;
2001-02-01 01:55:53 +00:00
errorCode = U_ZERO_ERROR ;
/* negative source request should always return U_ILLEGAL_ARGUMENT_ERROR */
2010-07-12 18:03:29 +00:00
pUIn + + ;
ucnv_fromUnicode ( cnv , & pLOut , pLOut + 1 , & pUIn , pUIn - 1 , off , FALSE , & errorCode ) ;
2000-06-28 17:01:52 +00:00
if ( errorCode ! = U_ILLEGAL_ARGUMENT_ERROR )
{
log_err ( " Unexpected Error on negative source request to ucnv_fromUnicode: %s \n " , u_errorName ( errorCode ) ) ;
}
2010-07-12 18:03:29 +00:00
pUIn - - ;
2000-06-28 17:01:52 +00:00
errorCode = U_ZERO_ERROR ;
2000-08-11 03:35:25 +00:00
ucnv_toUnicode ( cnv , & pUOut , pUOut + 1 , ( const char * * ) & pLIn , ( const char * ) ( pLIn - 1 ) , off , FALSE , & errorCode ) ;
2000-06-28 17:01:52 +00:00
if ( errorCode ! = U_ILLEGAL_ARGUMENT_ERROR )
{
log_err ( " Unexpected Error on negative source request to ucnv_toUnicode: %s \n " , u_errorName ( errorCode ) ) ;
}
errorCode = U_ZERO_ERROR ;
2001-04-18 19:31:05 +00:00
2000-08-11 03:35:25 +00:00
uniChar = ucnv_getNextUChar ( cnv , ( const char * * ) & pLIn , ( const char * ) ( pLIn - 1 ) , & errorCode ) ;
2000-06-28 17:01:52 +00:00
if ( errorCode ! = U_ILLEGAL_ARGUMENT_ERROR )
{
log_err ( " Unexpected Error on negative source request to ucnv_getNextUChar: %s \n " , u_errorName ( errorCode ) ) ;
}
errorCode = U_ZERO_ERROR ;
/* 0 byte source request - no error, no pointer movement */
2000-08-11 03:35:25 +00:00
ucnv_toUnicode ( cnv , & pUOut , pUOut + 1 , ( const char * * ) & pLIn , ( const char * ) pLIn , off , FALSE , & errorCode ) ;
2000-06-28 17:01:52 +00:00
ucnv_fromUnicode ( cnv , & pLOut , pLOut + 1 , & pUIn , pUIn , off , FALSE , & errorCode ) ;
if ( U_FAILURE ( errorCode ) ) {
log_err ( " 0 byte source request: unexpected error: %s \n " , u_errorName ( errorCode ) ) ;
}
if ( ( pUOut ! = UOut ) | | ( pUIn ! = UIn ) | | ( pLOut ! = LOut ) | | ( pLIn ! = LIn ) )
{
log_err ( " Unexpected pointer move in 0 byte source request \n " ) ;
}
2000-07-13 23:55:33 +00:00
/*0 byte source request - GetNextUChar : error & value == fffe or ffff */
2000-08-11 03:35:25 +00:00
uniChar = ucnv_getNextUChar ( cnv , ( const char * * ) & pLIn , ( const char * ) pLIn , & errorCode ) ;
2003-08-01 14:30:29 +00:00
if ( errorCode ! = U_INDEX_OUTOFBOUNDS_ERROR )
2000-06-28 17:01:52 +00:00
{
log_err ( " Unexpected Error on 0-byte source request to ucnv_getnextUChar: %s \n " , u_errorName ( errorCode ) ) ;
}
2000-07-13 23:55:33 +00:00
if ( ( ( uint32_t ) uniChar - 0xfffe ) > 1 ) /* not 0xfffe<=uniChar<=0xffff */
2000-06-28 17:01:52 +00:00
{
log_err ( " Unexpected value on 0-byte source request to ucnv_getnextUChar \n " ) ;
}
2000-08-14 23:35:57 +00:00
errorCode = U_ZERO_ERROR ;
2000-06-28 17:01:52 +00:00
2000-08-11 19:51:13 +00:00
/* running out of target room : U_BUFFER_OVERFLOW_ERROR */
2000-06-28 17:01:52 +00:00
pUIn = pszUnicode ;
2001-08-25 01:06:41 +00:00
ucnv_fromUnicode ( cnv , & pLOut , pLOut + offsets [ 4 ] , & pUIn , pUIn + sizeof ( pszUnicode ) / sizeof ( UChar ) , off , FALSE , & errorCode ) ;
2000-08-11 19:51:13 +00:00
if ( errorCode ! = U_BUFFER_OVERFLOW_ERROR | | pLOut ! = LOut + offsets [ 4 ] | | pUIn ! = pszUnicode + 4 )
2000-06-28 17:01:52 +00:00
{
log_err ( " Unexpected results on out of target room to ucnv_fromUnicode \n " ) ;
}
2000-08-14 23:35:57 +00:00
errorCode = U_ZERO_ERROR ;
2000-06-28 17:01:52 +00:00
2006-05-26 04:44:31 +00:00
pLIn = ( const char * ) pszLMBCS ;
ucnv_toUnicode ( cnv , & pUOut , pUOut + 4 , & pLIn , ( pLIn + sizeof ( pszLMBCS ) ) , off , FALSE , & errorCode ) ;
if ( errorCode ! = U_BUFFER_OVERFLOW_ERROR | | pUOut ! = UOut + 4 | | pLIn ! = ( const char * ) pszLMBCS + offsets [ 4 ] )
2000-06-28 17:01:52 +00:00
{
log_err ( " Unexpected results on out of target room to ucnv_toUnicode \n " ) ;
}
2001-04-18 19:31:05 +00:00
2000-06-28 17:01:52 +00:00
/* unpaired or chopped LMBCS surrogates */
/* OK high surrogate, Low surrogate is chopped */
2006-05-26 04:44:31 +00:00
LIn [ 0 ] = ( char ) 0x14 ;
LIn [ 1 ] = ( char ) 0xD8 ;
LIn [ 2 ] = ( char ) 0x01 ;
LIn [ 3 ] = ( char ) 0x14 ;
LIn [ 4 ] = ( char ) 0xDC ;
2000-06-28 17:01:52 +00:00
pLIn = LIn ;
2000-08-14 23:35:57 +00:00
errorCode = U_ZERO_ERROR ;
2000-06-28 17:01:52 +00:00
pUOut = UOut ;
2001-04-18 19:31:05 +00:00
2003-08-01 14:30:29 +00:00
ucnv_setToUCallBack ( cnv , UCNV_TO_U_CALLBACK_STOP , NULL , NULL , NULL , & errorCode ) ;
2001-08-31 02:21:34 +00:00
ucnv_toUnicode ( cnv , & pUOut , pUOut + sizeof ( UOut ) / sizeof ( UChar ) , ( const char * * ) & pLIn , ( const char * ) ( pLIn + 5 ) , off , TRUE , & errorCode ) ;
2001-02-26 19:29:14 +00:00
if ( UOut [ 0 ] ! = 0xD801 | | errorCode ! = U_TRUNCATED_CHAR_FOUND | | pUOut ! = UOut + 1 | | pLIn ! = LIn + 5 )
2000-06-28 17:01:52 +00:00
{
log_err ( " Unexpected results on chopped low surrogate \n " ) ;
}
2001-04-18 19:31:05 +00:00
2000-06-28 17:01:52 +00:00
/* chopped at surrogate boundary */
2006-05-26 04:44:31 +00:00
LIn [ 0 ] = ( char ) 0x14 ;
LIn [ 1 ] = ( char ) 0xD8 ;
LIn [ 2 ] = ( char ) 0x01 ;
2000-06-28 17:01:52 +00:00
pLIn = LIn ;
2000-08-14 23:35:57 +00:00
errorCode = U_ZERO_ERROR ;
2000-06-28 17:01:52 +00:00
pUOut = UOut ;
2001-08-31 02:21:34 +00:00
ucnv_toUnicode ( cnv , & pUOut , pUOut + sizeof ( UOut ) / sizeof ( UChar ) , ( const char * * ) & pLIn , ( const char * ) ( pLIn + 3 ) , off , TRUE , & errorCode ) ;
2000-06-28 17:01:52 +00:00
if ( UOut [ 0 ] ! = 0xD801 | | U_FAILURE ( errorCode ) | | pUOut ! = UOut + 1 | | pLIn ! = LIn + 3 )
{
log_err ( " Unexpected results on chopped at surrogate boundary \n " ) ;
}
/* unpaired surrogate plus valid Unichar */
2006-05-26 04:44:31 +00:00
LIn [ 0 ] = ( char ) 0x14 ;
LIn [ 1 ] = ( char ) 0xD8 ;
LIn [ 2 ] = ( char ) 0x01 ;
LIn [ 3 ] = ( char ) 0x14 ;
LIn [ 4 ] = ( char ) 0xC9 ;
LIn [ 5 ] = ( char ) 0xD0 ;
2000-06-28 17:01:52 +00:00
pLIn = LIn ;
2000-08-14 23:35:57 +00:00
errorCode = U_ZERO_ERROR ;
2000-06-28 17:01:52 +00:00
pUOut = UOut ;
2001-08-31 02:21:34 +00:00
ucnv_toUnicode ( cnv , & pUOut , pUOut + sizeof ( UOut ) / sizeof ( UChar ) , ( const char * * ) & pLIn , ( const char * ) ( pLIn + 6 ) , off , TRUE , & errorCode ) ;
2000-06-28 17:01:52 +00:00
if ( UOut [ 0 ] ! = 0xD801 | | UOut [ 1 ] ! = 0xC9D0 | | U_FAILURE ( errorCode ) | | pUOut ! = UOut + 2 | | pLIn ! = LIn + 6 )
{
log_err ( " Unexpected results after unpaired surrogate plus valid Unichar \n " ) ;
}
/* unpaired surrogate plus chopped Unichar */
2006-05-26 04:44:31 +00:00
LIn [ 0 ] = ( char ) 0x14 ;
LIn [ 1 ] = ( char ) 0xD8 ;
LIn [ 2 ] = ( char ) 0x01 ;
LIn [ 3 ] = ( char ) 0x14 ;
LIn [ 4 ] = ( char ) 0xC9 ;
2001-04-18 19:31:05 +00:00
2000-06-28 17:01:52 +00:00
pLIn = LIn ;
2000-08-14 23:35:57 +00:00
errorCode = U_ZERO_ERROR ;
2000-06-28 17:01:52 +00:00
pUOut = UOut ;
2001-08-31 02:21:34 +00:00
ucnv_toUnicode ( cnv , & pUOut , pUOut + sizeof ( UOut ) / sizeof ( UChar ) , ( const char * * ) & pLIn , ( const char * ) ( pLIn + 5 ) , off , TRUE , & errorCode ) ;
2001-02-26 19:29:14 +00:00
if ( UOut [ 0 ] ! = 0xD801 | | errorCode ! = U_TRUNCATED_CHAR_FOUND | | pUOut ! = UOut + 1 | | pLIn ! = LIn + 5 )
2000-06-28 17:01:52 +00:00
{
log_err ( " Unexpected results after unpaired surrogate plus chopped Unichar \n " ) ;
}
/* unpaired surrogate plus valid non-Unichar */
2006-05-26 04:44:31 +00:00
LIn [ 0 ] = ( char ) 0x14 ;
LIn [ 1 ] = ( char ) 0xD8 ;
LIn [ 2 ] = ( char ) 0x01 ;
LIn [ 3 ] = ( char ) 0x0F ;
LIn [ 4 ] = ( char ) 0x3B ;
2001-04-18 19:31:05 +00:00
2000-06-28 17:01:52 +00:00
pLIn = LIn ;
2000-08-14 23:35:57 +00:00
errorCode = U_ZERO_ERROR ;
2000-06-28 17:01:52 +00:00
pUOut = UOut ;
2001-08-31 02:21:34 +00:00
ucnv_toUnicode ( cnv , & pUOut , pUOut + sizeof ( UOut ) / sizeof ( UChar ) , ( const char * * ) & pLIn , ( const char * ) ( pLIn + 5 ) , off , TRUE , & errorCode ) ;
2000-06-28 17:01:52 +00:00
if ( UOut [ 0 ] ! = 0xD801 | | UOut [ 1 ] ! = 0x1B | | U_FAILURE ( errorCode ) | | pUOut ! = UOut + 2 | | pLIn ! = LIn + 5 )
{
log_err ( " Unexpected results after unpaired surrogate plus valid non-Unichar \n " ) ;
}
/* unpaired surrogate plus chopped non-Unichar */
2006-05-26 04:44:31 +00:00
LIn [ 0 ] = ( char ) 0x14 ;
LIn [ 1 ] = ( char ) 0xD8 ;
LIn [ 2 ] = ( char ) 0x01 ;
LIn [ 3 ] = ( char ) 0x0F ;
2001-04-18 19:31:05 +00:00
2000-06-28 17:01:52 +00:00
pLIn = LIn ;
2000-08-14 23:35:57 +00:00
errorCode = U_ZERO_ERROR ;
2000-06-28 17:01:52 +00:00
pUOut = UOut ;
2001-08-31 02:21:34 +00:00
ucnv_toUnicode ( cnv , & pUOut , pUOut + sizeof ( UOut ) / sizeof ( UChar ) , ( const char * * ) & pLIn , ( const char * ) ( pLIn + 4 ) , off , TRUE , & errorCode ) ;
2000-06-28 17:01:52 +00:00
2001-02-26 19:29:14 +00:00
if ( UOut [ 0 ] ! = 0xD801 | | errorCode ! = U_TRUNCATED_CHAR_FOUND | | pUOut ! = UOut + 1 | | pLIn ! = LIn + 4 )
2000-06-28 17:01:52 +00:00
{
log_err ( " Unexpected results after unpaired surrogate plus chopped non-Unichar \n " ) ;
}
}
2000-05-22 21:24:26 +00:00
}
2000-06-28 17:01:52 +00:00
ucnv_close ( cnv ) ; /* final cleanup */
2000-03-31 16:53:09 +00:00
}
2000-11-21 04:05:39 +00:00
static void TestJitterbug255 ( )
2000-02-05 00:01:54 +00:00
{
2004-12-23 21:03:30 +00:00
static const uint8_t testBytes [ ] = { 0x95 , 0xcf , 0x8a , 0xb7 , 0x0d , 0x0a , 0x00 } ;
2006-05-26 04:44:31 +00:00
const char * testBuffer = ( const char * ) testBytes ;
const char * testEnd = ( const char * ) testBytes + sizeof ( testBytes ) ;
2000-02-05 00:01:54 +00:00
UErrorCode status = U_ZERO_ERROR ;
2009-08-26 01:02:40 +00:00
/*UChar32 result;*/
2000-02-05 00:01:54 +00:00
UConverter * cnv = 0 ;
2001-04-18 19:31:05 +00:00
cnv = ucnv_open ( " shift-jis " , & status ) ;
2000-02-05 00:01:54 +00:00
if ( U_FAILURE ( status ) | | cnv = = 0 ) {
2002-09-20 19:07:19 +00:00
log_data_err ( " Failed to open the converter for SJIS. \n " ) ;
2000-02-05 00:01:54 +00:00
return ;
}
while ( testBuffer ! = testEnd )
{
2009-08-26 01:02:40 +00:00
/*result = */ ucnv_getNextUChar ( cnv , & testBuffer , testEnd , & status ) ;
2000-02-05 00:01:54 +00:00
if ( U_FAILURE ( status ) )
{
log_err ( " Failed to convert the next UChar for SJIS. \n " ) ;
break ;
}
}
ucnv_close ( cnv ) ;
}
2000-04-18 21:57:47 +00:00
2000-11-21 04:05:39 +00:00
static void TestEBCDICUS4XML ( )
2000-04-18 21:57:47 +00:00
{
UChar unicodes_x [ ] = { 0x0000 , 0x0000 , 0x0000 , 0x0000 } ;
2001-09-01 04:23:43 +00:00
static const UChar toUnicodeMaps_x [ ] = { 0x000A , 0x000A , 0x000D , 0x0000 } ;
static const char fromUnicodeMaps_x [ ] = { 0x25 , 0x25 , 0x0D , 0x00 } ;
static const char newLines_x [ ] = { 0x25 , 0x15 , 0x0D , 0x00 } ;
2000-04-18 21:57:47 +00:00
char target_x [ ] = { 0x00 , 0x00 , 0x00 , 0x00 } ;
UChar * unicodes = unicodes_x ;
const UChar * toUnicodeMaps = toUnicodeMaps_x ;
char * target = target_x ;
const char * fromUnicodeMaps = fromUnicodeMaps_x , * newLines = newLines_x ;
UErrorCode status = U_ZERO_ERROR ;
UConverter * cnv = 0 ;
cnv = ucnv_open ( " ebcdic-xml-us " , & status ) ;
if ( U_FAILURE ( status ) | | cnv = = 0 ) {
2002-09-20 19:07:19 +00:00
log_data_err ( " Failed to open the converter for EBCDIC-XML-US. \n " ) ;
2001-09-01 04:23:43 +00:00
return ;
2000-04-18 21:57:47 +00:00
}
ucnv_toUnicode ( cnv , & unicodes , unicodes + 3 , ( const char * * ) & newLines , newLines + 3 , NULL , TRUE , & status ) ;
if ( U_FAILURE ( status ) | | memcmp ( unicodes_x , toUnicodeMaps , sizeof ( UChar ) * 3 ) ! = 0 ) {
2001-09-01 04:23:43 +00:00
log_err ( " To Unicode conversion failed in EBCDICUS4XML test. %s \n " ,
u_errorName ( status ) ) ;
printUSeqErr ( unicodes_x , 3 ) ;
printUSeqErr ( toUnicodeMaps , 3 ) ;
2000-04-18 21:57:47 +00:00
}
2001-09-01 04:23:43 +00:00
status = U_ZERO_ERROR ;
2000-04-18 21:57:47 +00:00
ucnv_fromUnicode ( cnv , & target , target + 3 , ( const UChar * * ) & toUnicodeMaps , toUnicodeMaps + 3 , NULL , TRUE , & status ) ;
if ( U_FAILURE ( status ) | | memcmp ( target_x , fromUnicodeMaps , sizeof ( char ) * 3 ) ! = 0 ) {
2001-09-01 04:23:43 +00:00
log_err ( " From Unicode conversion failed in EBCDICUS4XML test. %s \n " ,
u_errorName ( status ) ) ;
2001-09-18 21:02:14 +00:00
printSeqErr ( ( const unsigned char * ) target_x , 3 ) ;
printSeqErr ( ( const unsigned char * ) fromUnicodeMaps , 3 ) ;
2000-04-18 21:57:47 +00:00
}
ucnv_close ( cnv ) ;
}
2006-07-28 22:58:29 +00:00
# endif /* #if !UCONFIG_NO_LEGACY_COLLATION */
2000-04-18 21:57:47 +00:00
2002-09-20 17:54:45 +00:00
# if !UCONFIG_NO_COLLATION
2001-11-05 23:17:51 +00:00
static void TestJitterbug981 ( ) {
2004-10-02 04:59:29 +00:00
const UChar * rules ;
int32_t rules_length , target_cap , bytes_needed , buff_size ;
UErrorCode status = U_ZERO_ERROR ;
UConverter * utf8cnv ;
UCollator * myCollator ;
char * buff ;
int numNeeded = 0 ;
utf8cnv = ucnv_open ( " utf8 " , & status ) ;
if ( U_FAILURE ( status ) ) {
2008-02-08 08:21:32 +00:00
log_err ( " Could not open UTF-8 converter. Error: %s \n " , u_errorName ( status ) ) ;
2004-10-02 04:59:29 +00:00
return ;
}
myCollator = ucol_open ( " zh " , & status ) ;
if ( U_FAILURE ( status ) ) {
2009-06-09 21:28:13 +00:00
log_data_err ( " Could not open collator for zh locale. Error: %s \n " , u_errorName ( status ) ) ;
2008-02-08 08:21:32 +00:00
ucnv_close ( utf8cnv ) ;
2004-10-02 04:59:29 +00:00
return ;
}
2001-11-05 23:17:51 +00:00
2004-10-02 04:59:29 +00:00
rules = ucol_getRules ( myCollator , & rules_length ) ;
buff_size = rules_length * ucnv_getMaxCharSize ( utf8cnv ) ;
buff = malloc ( buff_size ) ;
2001-11-05 23:17:51 +00:00
2004-10-02 04:59:29 +00:00
target_cap = 0 ;
do {
ucnv_reset ( utf8cnv ) ;
status = U_ZERO_ERROR ;
if ( target_cap > = buff_size ) {
log_err ( " wanted %d bytes, only %d available \n " , target_cap , buff_size ) ;
2007-08-31 04:58:52 +00:00
break ;
2004-10-02 04:59:29 +00:00
}
bytes_needed = ucnv_fromUChars ( utf8cnv , buff , target_cap ,
rules , rules_length , & status ) ;
target_cap = ( bytes_needed > target_cap ) ? bytes_needed : target_cap + 1 ;
if ( numNeeded ! = 0 & & numNeeded ! = bytes_needed ) {
log_err ( " ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes " ) ;
2007-08-31 04:58:52 +00:00
break ;
2004-10-02 04:59:29 +00:00
}
numNeeded = bytes_needed ;
} while ( status = = U_BUFFER_OVERFLOW_ERROR ) ;
ucol_close ( myCollator ) ;
ucnv_close ( utf8cnv ) ;
2004-10-05 23:02:26 +00:00
free ( buff ) ;
2001-11-05 23:17:51 +00:00
}
2000-06-22 01:18:30 +00:00
2002-09-20 17:54:45 +00:00
# endif
2001-11-06 00:45:10 +00:00
static void TestJitterbug1293 ( ) {
2004-12-23 21:03:30 +00:00
static const UChar src [ ] = { 0x30DE , 0x30A4 , 0x5E83 , 0x544A , 0x30BF , 0x30A4 , 0x30D7 , 0x000 } ;
2001-11-06 00:45:10 +00:00
char target [ 256 ] ;
UErrorCode status = U_ZERO_ERROR ;
UConverter * conv = NULL ;
int32_t target_cap , bytes_needed , numNeeded = 0 ;
conv = ucnv_open ( " shift-jis " , & status ) ;
if ( U_FAILURE ( status ) ) {
2002-09-20 19:07:19 +00:00
log_data_err ( " Could not open Shift-Jis converter. Error: %s " , u_errorName ( status ) ) ;
2001-11-06 00:45:10 +00:00
return ;
}
do {
target_cap = 0 ;
bytes_needed = ucnv_fromUChars ( conv , target , 256 , src , u_strlen ( src ) , & status ) ;
target_cap = ( bytes_needed > target_cap ) ? bytes_needed : target_cap + 1 ;
if ( numNeeded ! = 0 & & numNeeded ! = bytes_needed ) {
log_err ( " ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes " ) ;
}
numNeeded = bytes_needed ;
} while ( status = = U_BUFFER_OVERFLOW_ERROR ) ;
if ( U_FAILURE ( status ) ) {
log_err ( " An error occured in ucnv_fromUChars. Error: %s " , u_errorName ( status ) ) ;
return ;
}
ucnv_close ( conv ) ;
}
2006-08-15 23:21:39 +00:00
static void TestJB5275_1 ( ) {
2006-08-23 03:08:48 +00:00
static const char * data = " \x3B \xB3 \x0A " /* Easy characters */
" \xC0 \xE9 \xBF \xE9 \xE8 \xD8 \x0A " /* Gurmukhi test */
2006-08-15 23:21:39 +00:00
/* Switch script: */
2006-08-23 03:08:48 +00:00
" \xEF \x43 \xC0 \xE9 \xBF \xE9 \xE8 \xD8 \x0A " /* Bengali test */
" \x3B \xB3 \x0A " /* Easy characters - new line, so should default!*/
" \xEF \x40 \x3B \xB3 \x0A " ;
2006-08-15 23:21:39 +00:00
static const UChar expected [ ] = {
0x003b , 0x0a15 , 0x000a , /* Easy characters */
2008-07-01 17:52:51 +00:00
0x0a5c , 0x0a4d , 0x0a39 , 0x0a5c , 0x0a4d , 0x0a39 , 0x000a , /* Gurmukhi test */
2006-08-15 23:21:39 +00:00
0x09dd , 0x09dc , 0x09cd , 0x09b9 , 0x000a , /* Switch script: to Bengali*/
0x003b , 0x0a15 , 0x000a , /* Easy characters - new line, so should default!*/
0x003b , 0x0a15 , 0x000a /* Back to Gurmukhi*/
} ;
UErrorCode status = U_ZERO_ERROR ;
UConverter * conv = ucnv_open ( " iscii-gur " , & status ) ;
UChar dest [ 100 ] = { ' \0 ' } ;
UChar * target = dest ;
UChar * targetLimit = dest + 100 ;
const char * source = data ;
const char * sourceLimit = data + strlen ( data ) ;
const UChar * exp = expected ;
2009-06-09 21:28:13 +00:00
if ( U_FAILURE ( status ) ) {
log_data_err ( " Unable to open converter: iscii-gur got errorCode: %s \n " , u_errorName ( status ) ) ;
return ;
}
2006-08-15 23:21:39 +00:00
log_verbose ( " Testing switching back to default script when new line is encountered. \n " ) ;
ucnv_toUnicode ( conv , & target , targetLimit , & source , sourceLimit , NULL , TRUE , & status ) ;
if ( U_FAILURE ( status ) ) {
log_err ( " conversion failed: %s \n " , u_errorName ( status ) ) ;
}
targetLimit = target ;
target = dest ;
printUSeq ( target , targetLimit - target ) ;
while ( target < targetLimit ) {
if ( * exp ! = * target ) {
log_err ( " did not get the expected output. \\ u%04X != \\ u%04X (got) \n " , * exp , * target ) ;
}
target + + ;
exp + + ;
}
2006-08-23 00:04:31 +00:00
ucnv_close ( conv ) ;
2006-08-15 23:21:39 +00:00
}
2002-07-29 21:04:18 +00:00
2006-08-15 23:21:39 +00:00
static void TestJB5275 ( ) {
static const char * data =
2006-08-23 03:07:26 +00:00
/* "\xEF\x42\xEF\x41\xA4\xD5\xE5\xB3\xEA\x0A" unsupported sequence \xEF\x41 */
/* "\xEF\x42\xEF\x41\xD4\xDA\xB3\xE8\xEA\x0A" unsupported sequence \xEF\x41 */
/* "\xEF\x44\xEF\x41\xC8\xE1\x8B\xDB\xB3\xE8 \xB3\xE4\xC1\xE8\x0A" unsupported sequence \xEF\x41 */
" \xEF \x4B \xC0 \xE9 \xBF \xE9 \xE8 \xD8 \x0A " /* Gurmukhi test */
" \xEF \x4A \xC0 \xD4 \xBF \xD4 \xE8 \xD8 \x0A " /* Gujarati test */
" \xEF \x48 \x38 \xB3 \x0A " /* Kannada test */
" \xEF \x49 \x39 \xB3 \x0A " /* Malayalam test */
" \xEF \x4A \x3A \xB3 \x0A " /* Gujarati test */
" \xEF \x4B \x3B \xB3 \x0A " /* Punjabi test */
/* "\xEF\x4C\x3C\xB3\x0A" unsupported sequence \xEF\x41 */ ;
2006-08-15 23:21:39 +00:00
static const UChar expected [ ] = {
2008-07-01 17:52:51 +00:00
0x0A5C , 0x0A4D , 0x0A39 , 0x0A5C , 0x0A4D , 0x0A39 , 0x000A , /* Gurmukhi test */
2006-08-15 23:21:39 +00:00
0x0AA2 , 0x0AB5 , 0x0AA1 , 0x0AB5 , 0x0ACD , 0x0AB9 , 0x000A , /* Gujarati test */
0x0038 , 0x0C95 , 0x000A , /* Kannada test */
0x0039 , 0x0D15 , 0x000A , /* Malayalam test */
0x003A , 0x0A95 , 0x000A , /* Gujarati test */
0x003B , 0x0A15 , 0x000A , /* Punjabi test */
} ;
UErrorCode status = U_ZERO_ERROR ;
UConverter * conv = ucnv_open ( " iscii " , & status ) ;
UChar dest [ 100 ] = { ' \0 ' } ;
UChar * target = dest ;
UChar * targetLimit = dest + 100 ;
const char * source = data ;
const char * sourceLimit = data + strlen ( data ) ;
const UChar * exp = expected ;
ucnv_toUnicode ( conv , & target , targetLimit , & source , sourceLimit , NULL , TRUE , & status ) ;
if ( U_FAILURE ( status ) ) {
log_err ( " conversion failed: %s \n " , u_errorName ( status ) ) ;
}
targetLimit = target ;
target = dest ;
printUSeq ( target , targetLimit - target ) ;
while ( target < targetLimit ) {
if ( * exp ! = * target ) {
log_err ( " did not get the expected output. \\ u%04X != \\ u%04X (got) \n " , * exp , * target ) ;
}
target + + ;
exp + + ;
}
2006-08-23 00:04:31 +00:00
ucnv_close ( conv ) ;
2006-08-19 18:05:31 +00:00
}
2011-02-23 22:21:58 +00:00
static void
TestIsFixedWidth ( ) {
UErrorCode status = U_ZERO_ERROR ;
UConverter * cnv = NULL ;
int32_t i ;
const char * fixedWidth [ ] = {
" US-ASCII " ,
" UTF32 " ,
" ibm-5478_P100-1995 " ,
" UTF16 "
} ;
int32_t fixedWidthLength = 4 ;
const char * notFixedWidth [ ] = {
" GB18030 " ,
" UTF8 " ,
" windows-949-2000 "
} ;
int32_t notFixedWidthLength = 3 ;
for ( i = 0 ; i < fixedWidthLength ; i + + ) {
cnv = ucnv_open ( fixedWidth [ i ] , & status ) ;
if ( cnv = = NULL | | U_FAILURE ( status ) ) {
2011-03-03 19:29:57 +00:00
log_data_err ( " Error open converter: %s - %s \n " , fixedWidth [ i ] , u_errorName ( status ) ) ;
continue ;
2011-02-23 22:21:58 +00:00
}
if ( ! ucnv_isFixedWidth ( cnv , & status ) ) {
log_err ( " %s is a fixedWidth converter but returned FALSE. \n " , fixedWidth [ i ] ) ;
}
2011-03-03 19:29:57 +00:00
ucnv_close ( cnv ) ;
2011-02-23 22:21:58 +00:00
}
for ( i = 0 ; i < notFixedWidthLength ; i + + ) {
cnv = ucnv_open ( notFixedWidth [ i ] , & status ) ;
if ( cnv = = NULL | | U_FAILURE ( status ) ) {
2011-03-03 19:29:57 +00:00
log_data_err ( " Error open converter: %s - %s \n " , fixedWidth [ i ] , u_errorName ( status ) ) ;
continue ;
2011-02-23 22:21:58 +00:00
}
if ( ucnv_isFixedWidth ( cnv , & status ) ) {
log_err ( " %s is NOT a fixedWidth converter but returned TRUE. \n " , fixedWidth [ i ] ) ;
}
2011-03-03 19:29:57 +00:00
ucnv_close ( cnv ) ;
2011-02-23 22:21:58 +00:00
}
}