2000-05-24 17:31:51 +00:00
/*
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2011-03-03 19:29:57 +00:00
* Copyright ( C ) 2000 - 2011 , International Business Machines
2000-05-24 17:31:51 +00:00
* Corporation and others . All Rights Reserved .
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
* Date Name Description
* 05 / 23 / 00 aliu Creation .
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
*/
2002-09-21 00:43:14 +00:00
2000-05-24 17:31:51 +00:00
# include "unicode/utypes.h"
2002-09-21 00:43:14 +00:00
# if !UCONFIG_NO_TRANSLITERATION
2000-05-24 17:31:51 +00:00
# include "unicode/translit.h"
2003-02-14 01:44:49 +00:00
# include "rbt.h"
2004-09-23 00:24:36 +00:00
# include "unicode/calendar.h"
2000-05-24 17:31:51 +00:00
# include "unicode/uniset.h"
2002-03-19 17:50:15 +00:00
# include "unicode/uchar.h"
2001-10-31 19:26:53 +00:00
# include "unicode/normlzr.h"
# include "unicode/uchar.h"
2001-11-22 05:51:42 +00:00
# include "unicode/parseerr.h"
2002-04-30 22:12:28 +00:00
# include "unicode/usetiter.h"
2002-12-06 21:31:22 +00:00
# include "unicode/putil.h"
2003-04-11 23:58:42 +00:00
# include "unicode/uversion.h"
2004-06-07 17:11:07 +00:00
# include "unicode/locid.h"
# include "unicode/ulocdata.h"
# include "unicode/utf8.h"
2011-07-27 05:53:56 +00:00
# include "unicode/utf16.h"
2004-11-10 03:47:56 +00:00
# include "putilimp.h"
2003-04-11 23:58:42 +00:00
# include "cmemory.h"
2001-10-08 23:26:58 +00:00
# include "transrt.h"
# include "testutil.h"
2001-11-22 02:41:06 +00:00
# include <string.h>
2004-06-07 17:11:07 +00:00
# include <stdio.h>
2000-05-24 17:31:51 +00:00
# define CASE(id,test) case id: \
name = # test ; \
if ( exec ) { \
logln ( # test " --- " ) ; \
logln ( ( UnicodeString ) " " ) ; \
2004-11-10 03:47:56 +00:00
UDate t = uprv_getUTCtime ( ) ; \
2000-05-24 17:31:51 +00:00
test ( ) ; \
2004-11-10 03:47:56 +00:00
t = uprv_getUTCtime ( ) - t ; \
2004-08-13 01:20:26 +00:00
logln ( ( UnicodeString ) # test " took " + t / U_MILLIS_PER_DAY + " seconds " ) ; \
2000-05-24 17:31:51 +00:00
} \
break
2001-11-15 21:11:04 +00:00
# define EXHAUSTIVE(id,test) case id: \
if ( quick = = FALSE ) { \
name = # test ; \
if ( exec ) { \
logln ( # test " --- " ) ; \
logln ( ( UnicodeString ) " " ) ; \
test ( ) ; \
} \
} else { \
name = " " ; \
} \
break
2000-05-24 17:31:51 +00:00
void
TransliteratorRoundTripTest : : runIndexedTest ( int32_t index , UBool exec ,
2000-08-23 19:11:16 +00:00
const char * & name , char * /*par*/ ) {
2000-05-24 17:31:51 +00:00
switch ( index ) {
2001-11-22 02:41:06 +00:00
CASE ( 0 , TestCyrillic ) ;
// CASE(0,TestKana);
CASE ( 1 , TestHiragana ) ;
CASE ( 2 , TestKatakana ) ;
CASE ( 3 , TestJamo ) ;
CASE ( 4 , TestHangul ) ;
CASE ( 5 , TestGreek ) ;
CASE ( 6 , TestGreekUNGEGN ) ;
CASE ( 7 , Testel ) ;
2002-12-06 21:31:22 +00:00
CASE ( 8 , TestDevanagariLatin ) ;
CASE ( 9 , TestInterIndic ) ;
2004-05-19 04:17:37 +00:00
CASE ( 10 , TestHebrew ) ;
CASE ( 11 , TestArabic ) ;
2004-06-07 17:11:07 +00:00
CASE ( 12 , TestHan ) ;
2000-05-24 17:31:51 +00:00
default : name = " " ; break ;
}
}
2003-04-11 23:58:42 +00:00
2002-12-12 19:54:39 +00:00
//--------------------------------------------------------------------
// TransliteratorPointer
//--------------------------------------------------------------------
/**
* A transliterator pointer wrapper that deletes the contained
* pointer automatically when the wrapper goes out of scope .
* Sometimes called a " janitor " or " smart pointer " .
*/
class TransliteratorPointer {
Transliterator * t ;
// disallowed:
TransliteratorPointer ( const TransliteratorPointer & rhs ) ;
TransliteratorPointer & operator = ( const TransliteratorPointer & rhs ) ;
public :
TransliteratorPointer ( Transliterator * adopted ) {
t = adopted ;
}
~ TransliteratorPointer ( ) {
delete t ;
}
inline Transliterator * operator - > ( ) { return t ; }
inline operator const Transliterator * ( ) const { return t ; }
inline operator Transliterator * ( ) { return t ; }
} ;
2001-10-30 18:29:45 +00:00
//--------------------------------------------------------------------
2001-11-22 02:41:06 +00:00
// Legal
2001-10-30 18:29:45 +00:00
//--------------------------------------------------------------------
2001-11-22 02:41:06 +00:00
class Legal {
2001-10-30 18:29:45 +00:00
public :
2001-11-22 02:41:06 +00:00
Legal ( ) { }
virtual ~ Legal ( ) { }
2001-11-26 23:09:33 +00:00
virtual UBool is ( const UnicodeString & /*sourceString*/ ) const { return TRUE ; }
2001-10-30 18:29:45 +00:00
} ;
2001-11-22 02:41:06 +00:00
class LegalJamo : public Legal {
// any initial must be followed by a medial (or initial)
// any medial must follow an initial (or medial)
// any final must follow a medial (or final)
public :
LegalJamo ( ) { }
virtual ~ LegalJamo ( ) { }
virtual UBool is ( const UnicodeString & sourceString ) const ;
int getType ( UChar c ) const ;
} ;
2001-10-30 18:29:45 +00:00
2001-11-22 02:41:06 +00:00
UBool LegalJamo : : is ( const UnicodeString & sourceString ) const {
int t ;
UnicodeString decomp ;
UErrorCode ec = U_ZERO_ERROR ;
Normalizer : : decompose ( sourceString , FALSE , 0 , decomp , ec ) ;
if ( U_FAILURE ( ec ) ) {
return FALSE ;
}
for ( int i = 0 ; i < decomp . length ( ) ; + + i ) { // don't worry about surrogates
switch ( getType ( decomp . charAt ( i ) ) ) {
case 0 : t = getType ( decomp . charAt ( i + 1 ) ) ;
if ( t ! = 0 & & t ! = 1 ) { return FALSE ; }
break ;
case 1 : t = getType ( decomp . charAt ( i - 1 ) ) ;
if ( t ! = 0 & & t ! = 1 ) { return FALSE ; }
break ;
case 2 : t = getType ( decomp . charAt ( i - 1 ) ) ;
if ( t ! = 1 & & t ! = 2 ) { return FALSE ; }
break ;
}
}
return TRUE ;
2001-10-30 18:29:45 +00:00
}
2001-11-22 02:41:06 +00:00
int LegalJamo : : getType ( UChar c ) const {
if ( 0x1100 < = c & & c < = 0x1112 )
return 0 ;
else if ( 0x1161 < = c & & c < = 0x1175 )
return 1 ;
else if ( 0x11A8 < = c & & c < = 0x11C2 )
return 2 ;
return - 1 ; // other
2001-10-30 18:29:45 +00:00
}
2001-10-31 19:26:53 +00:00
class LegalGreek : public Legal {
2001-11-07 18:06:53 +00:00
UBool full ;
2001-10-31 19:26:53 +00:00
public :
2001-11-07 18:06:53 +00:00
LegalGreek ( UBool _full ) { full = _full ; }
2001-10-31 19:26:53 +00:00
virtual ~ LegalGreek ( ) { }
virtual UBool is ( const UnicodeString & sourceString ) const ;
static UBool isVowel ( UChar c ) ;
static UBool isRho ( UChar c ) ;
} ;
UBool LegalGreek : : is ( const UnicodeString & sourceString ) const {
2001-11-07 18:06:53 +00:00
UnicodeString decomp ;
UErrorCode ec = U_ZERO_ERROR ;
Normalizer : : decompose ( sourceString , FALSE , 0 , decomp , ec ) ;
// modern is simpler: don't care about anything but a grave
2001-11-22 02:41:06 +00:00
if ( full = = FALSE ) {
2002-05-31 18:05:12 +00:00
// A special case which is legal but should be
// excluded from round trip
// if (sourceString == UnicodeString("\\u039C\\u03C0", "")) {
// return FALSE;
// }
2001-11-07 18:06:53 +00:00
for ( int32_t i = 0 ; i < decomp . length ( ) ; + + i ) {
UChar c = decomp . charAt ( i ) ;
// exclude all the accents
if ( c = = 0x0313 | | c = = 0x0314 | | c = = 0x0300 | | c = = 0x0302
| | c = = 0x0342 | | c = = 0x0345
) return FALSE ;
}
return TRUE ;
}
2001-10-31 19:26:53 +00:00
// Legal greek has breathing marks IFF there is a vowel or RHO at the start
// IF it has them, it has exactly one.
// IF it starts with a RHO, then the breathing mark must come before the second letter.
// Since there are no surrogates in greek, don't worry about them
UBool firstIsVowel = FALSE ;
UBool firstIsRho = FALSE ;
UBool noLetterYet = TRUE ;
int32_t breathingCount = 0 ;
int32_t letterCount = 0 ;
for ( int32_t i = 0 ; i < decomp . length ( ) ; + + i ) {
UChar c = decomp . charAt ( i ) ;
if ( u_isalpha ( c ) ) {
+ + letterCount ;
if ( noLetterYet ) {
noLetterYet = FALSE ;
firstIsVowel = isVowel ( c ) ;
firstIsRho = isRho ( c ) ;
}
2001-11-22 02:41:06 +00:00
if ( firstIsRho & & letterCount = = 2 & & breathingCount = = 0 ) {
return FALSE ;
}
2001-10-31 19:26:53 +00:00
}
if ( c = = 0x0313 | | c = = 0x0314 ) {
+ + breathingCount ;
}
}
if ( firstIsVowel | | firstIsRho ) return breathingCount = = 1 ;
return breathingCount = = 0 ;
}
UBool LegalGreek : : isVowel ( UChar c ) {
switch ( c ) {
case 0x03B1 :
case 0x03B5 :
case 0x03B7 :
case 0x03B9 :
case 0x03BF :
case 0x03C5 :
case 0x03C9 :
case 0x0391 :
case 0x0395 :
case 0x0397 :
case 0x0399 :
case 0x039F :
case 0x03A5 :
case 0x03A9 :
return TRUE ;
}
return FALSE ;
}
UBool LegalGreek : : isRho ( UChar c ) {
switch ( c ) {
case 0x03C1 :
case 0x03A1 :
return TRUE ;
}
return FALSE ;
}
2002-04-30 22:12:28 +00:00
// AbbreviatedUnicodeSetIterator Interface ---------------------------------------------
2006-01-07 01:35:28 +00:00
//
// Iterate over a UnicodeSet, only returning a sampling of the contained code points.
// density is the approximate total number of code points to returned for the entire set.
//
2001-11-22 02:41:06 +00:00
2002-04-30 22:12:28 +00:00
class AbbreviatedUnicodeSetIterator : public UnicodeSetIterator {
2001-11-22 02:41:06 +00:00
public :
2002-04-30 22:12:28 +00:00
AbbreviatedUnicodeSetIterator ( ) ;
virtual ~ AbbreviatedUnicodeSetIterator ( ) ;
2002-12-06 21:31:22 +00:00
void reset ( UnicodeSet & set , UBool abb = FALSE , int32_t density = 100 ) ;
2001-11-22 02:41:06 +00:00
2002-06-29 00:04:16 +00:00
/**
2005-01-14 17:44:56 +00:00
* ICU " poor man's RTTI " , returns a UClassID for this class .
2002-06-29 00:04:16 +00:00
*/
2005-01-14 17:44:56 +00:00
static inline UClassID getStaticClassID ( ) { return ( UClassID ) & fgClassID ; }
2002-06-29 00:04:16 +00:00
/**
2005-01-14 17:44:56 +00:00
* ICU " poor man's RTTI " , returns a UClassID for the actual class .
2002-06-29 00:04:16 +00:00
*/
2005-01-14 17:44:56 +00:00
virtual inline UClassID getDynamicClassID ( ) const { return getStaticClassID ( ) ; }
2002-06-29 00:04:16 +00:00
2001-11-22 02:41:06 +00:00
private :
UBool abbreviated ;
2006-01-07 01:35:28 +00:00
int32_t perRange ; // The maximum number of code points to be returned from each range
2002-04-30 22:12:28 +00:00
virtual void loadRange ( int32_t range ) ;
2002-06-29 00:04:16 +00:00
/**
* The address of this static class variable serves as this class ' s ID
* for ICU " poor man's RTTI " .
*/
static const char fgClassID ;
2001-10-31 19:26:53 +00:00
} ;
2002-04-30 22:12:28 +00:00
// AbbreviatedUnicodeSetIterator Implementation ---------------------------------------
2001-11-22 02:41:06 +00:00
2002-06-29 00:04:16 +00:00
const char AbbreviatedUnicodeSetIterator : : fgClassID = 0 ;
2002-04-30 22:12:28 +00:00
AbbreviatedUnicodeSetIterator : : AbbreviatedUnicodeSetIterator ( ) :
UnicodeSetIterator ( ) , abbreviated ( FALSE ) {
2001-11-22 02:41:06 +00:00
}
2002-04-30 22:12:28 +00:00
AbbreviatedUnicodeSetIterator : : ~ AbbreviatedUnicodeSetIterator ( ) {
2001-11-22 02:41:06 +00:00
}
2002-12-06 21:31:22 +00:00
void AbbreviatedUnicodeSetIterator : : reset ( UnicodeSet & newSet , UBool abb , int32_t density ) {
2002-04-30 22:12:28 +00:00
UnicodeSetIterator : : reset ( newSet ) ;
abbreviated = abb ;
2002-12-06 21:31:22 +00:00
perRange = newSet . getRangeCount ( ) ;
if ( perRange ! = 0 ) {
perRange = density / perRange ;
}
2001-11-22 02:41:06 +00:00
}
2002-04-30 22:12:28 +00:00
2002-08-20 20:59:01 +00:00
void AbbreviatedUnicodeSetIterator : : loadRange ( int32_t myRange ) {
UnicodeSetIterator : : loadRange ( myRange ) ;
2002-12-06 21:31:22 +00:00
if ( abbreviated & & ( endElement > nextElement + perRange ) ) {
endElement = nextElement + perRange ;
2001-11-22 02:41:06 +00:00
}
}
2000-05-24 17:31:51 +00:00
//--------------------------------------------------------------------
// RTTest Interface
//--------------------------------------------------------------------
2001-12-03 18:06:27 +00:00
class RTTest : public IntlTest {
2000-05-24 17:31:51 +00:00
// PrintWriter out;
UnicodeString transliteratorID ;
int32_t errorLimit ;
int32_t errorCount ;
int32_t pairLimit ;
UnicodeSet sourceRange ;
UnicodeSet targetRange ;
2001-11-22 02:41:06 +00:00
UnicodeSet toSource ;
UnicodeSet toTarget ;
2001-11-26 23:09:33 +00:00
UnicodeSet roundtripExclusionsSet ;
2001-12-03 18:06:27 +00:00
IntlTest * parent ;
2001-10-31 19:26:53 +00:00
Legal * legalSource ; // NOT owned
2001-11-07 18:06:53 +00:00
UnicodeSet badCharacters ;
2000-05-24 17:31:51 +00:00
public :
/*
* create a test for the given script transliterator .
*/
2001-11-22 02:41:06 +00:00
RTTest ( const UnicodeString & transliteratorIDStr ) ;
2000-05-24 17:31:51 +00:00
virtual ~ RTTest ( ) ;
void setErrorLimit ( int32_t limit ) ;
void setPairLimit ( int32_t limit ) ;
void test ( const UnicodeString & sourceRange ,
2001-11-07 18:06:53 +00:00
const UnicodeString & targetRange ,
const char * roundtripExclusions ,
2001-12-03 18:06:27 +00:00
IntlTest * parent ,
2001-11-22 02:41:06 +00:00
UBool quick ,
2002-12-06 21:31:22 +00:00
Legal * adoptedLegal ,
int32_t density = 100 ) ;
2000-05-24 17:31:51 +00:00
private :
2001-11-07 18:06:53 +00:00
// Added to do better equality check.
static UBool isSame ( const UnicodeString & a , const UnicodeString & b ) ;
2001-11-22 02:41:06 +00:00
2001-11-07 18:06:53 +00:00
static UBool isCamel ( const UnicodeString & a ) ;
2001-11-22 02:41:06 +00:00
UBool checkIrrelevants ( Transliterator * t , const UnicodeString & irrelevants ) ;
2002-12-06 21:31:22 +00:00
void test2 ( UBool quick , int32_t density ) ;
2000-05-24 17:31:51 +00:00
void logWrongScript ( const UnicodeString & label ,
const UnicodeString & from ,
const UnicodeString & to ) ;
2001-11-22 02:41:06 +00:00
void logNotCanonical ( const UnicodeString & label ,
const UnicodeString & from ,
2001-12-01 04:33:03 +00:00
const UnicodeString & to ,
const UnicodeString & fromCan ,
2001-11-07 18:06:53 +00:00
const UnicodeString & toCan ) ;
2000-05-24 17:31:51 +00:00
2001-11-22 02:41:06 +00:00
void logFails ( const UnicodeString & label ) ;
2000-05-24 17:31:51 +00:00
2001-11-22 02:41:06 +00:00
void logToRulesFails ( const UnicodeString & label ,
const UnicodeString & from ,
const UnicodeString & to ,
const UnicodeString & toCan ) ;
2000-05-24 17:31:51 +00:00
2001-11-22 02:41:06 +00:00
void logRoundTripFailure ( const UnicodeString & from ,
2001-12-01 04:33:03 +00:00
const UnicodeString & toID ,
2001-11-22 02:41:06 +00:00
const UnicodeString & to ,
2001-12-01 04:33:03 +00:00
const UnicodeString & backID ,
2001-11-22 02:41:06 +00:00
const UnicodeString & back ) ;
2000-05-24 17:31:51 +00:00
} ;
//--------------------------------------------------------------------
// RTTest Implementation
//--------------------------------------------------------------------
/*
* create a test for the given script transliterator .
*/
2001-11-22 02:41:06 +00:00
RTTest : : RTTest ( const UnicodeString & transliteratorIDStr ) {
transliteratorID = transliteratorIDStr ;
errorLimit = 500 ;
2000-05-24 17:31:51 +00:00
errorCount = 0 ;
pairLimit = 0x10000 ;
}
RTTest : : ~ RTTest ( ) {
}
void RTTest : : setErrorLimit ( int32_t limit ) {
errorLimit = limit ;
}
void RTTest : : setPairLimit ( int32_t limit ) {
pairLimit = limit ;
}
2001-11-07 18:06:53 +00:00
UBool RTTest : : isSame ( const UnicodeString & a , const UnicodeString & b ) {
if ( a = = b ) return TRUE ;
if ( a . caseCompare ( b , U_FOLD_CASE_DEFAULT ) = = 0 & & isCamel ( a ) ) return TRUE ;
UnicodeString aa , bb ;
UErrorCode ec = U_ZERO_ERROR ;
Normalizer : : decompose ( a , FALSE , 0 , aa , ec ) ;
Normalizer : : decompose ( b , FALSE , 0 , bb , ec ) ;
if ( aa = = bb ) return TRUE ;
if ( aa . caseCompare ( bb , U_FOLD_CASE_DEFAULT ) = = 0 & & isCamel ( aa ) ) return TRUE ;
return FALSE ;
}
UBool RTTest : : isCamel ( const UnicodeString & a ) {
// see if string is of the form aB; e.g. lower, then upper or title
UChar32 cp ;
UBool haveLower = FALSE ;
2011-07-27 05:53:56 +00:00
for ( int32_t i = 0 ; i < a . length ( ) ; i + = U16_LENGTH ( cp ) ) {
2001-11-07 18:06:53 +00:00
cp = a . char32At ( i ) ;
int8_t t = u_charType ( cp ) ;
switch ( t ) {
case U_UPPERCASE_LETTER :
if ( haveLower ) return TRUE ;
break ;
case U_TITLECASE_LETTER :
if ( haveLower ) return TRUE ;
// drop through, since second letter is lower.
case U_LOWERCASE_LETTER :
haveLower = TRUE ;
break ;
}
}
return FALSE ;
}
2000-12-08 21:17:32 +00:00
void RTTest : : test ( const UnicodeString & sourceRangeVal ,
2001-11-07 18:06:53 +00:00
const UnicodeString & targetRangeVal ,
2001-11-22 02:41:06 +00:00
const char * roundtripExclusions ,
2001-12-03 18:06:27 +00:00
IntlTest * logVal , UBool quickRt ,
2002-12-06 21:31:22 +00:00
Legal * adoptedLegal ,
int32_t density )
2001-12-03 18:06:27 +00:00
{
2000-05-24 17:31:51 +00:00
UErrorCode status = U_ZERO_ERROR ;
2000-12-08 21:17:32 +00:00
2001-12-03 18:06:27 +00:00
this - > parent = logVal ;
2001-10-31 19:26:53 +00:00
this - > legalSource = adoptedLegal ;
2001-11-22 02:41:06 +00:00
UnicodeSet neverOk ( " [:Other:] " , status ) ;
UnicodeSet okAnyway ( " [^[:Letter:]] " , status ) ;
if ( U_FAILURE ( status ) ) {
2009-06-12 19:34:21 +00:00
parent - > dataerrln ( " FAIL: Initializing UnicodeSet with [:Other:] or [^[:Letter:]] - Error: %s " , u_errorName ( status ) ) ;
2001-11-22 02:41:06 +00:00
return ;
}
this - > sourceRange . clear ( ) ;
this - > sourceRange . applyPattern ( sourceRangeVal , status ) ;
if ( U_FAILURE ( status ) ) {
2001-12-03 18:06:27 +00:00
parent - > errln ( " FAIL: UnicodeSet::applyPattern( " +
2001-11-22 02:41:06 +00:00
sourceRangeVal + " ) " ) ;
return ;
}
this - > sourceRange . removeAll ( neverOk ) ;
this - > targetRange . clear ( ) ;
this - > targetRange . applyPattern ( targetRangeVal , status ) ;
if ( U_FAILURE ( status ) ) {
2001-12-03 18:06:27 +00:00
parent - > errln ( " FAIL: UnicodeSet::applyPattern( " +
2001-11-22 02:41:06 +00:00
targetRangeVal + " ) " ) ;
return ;
}
this - > targetRange . removeAll ( neverOk ) ;
this - > toSource . clear ( ) ;
this - > toSource . applyPattern ( sourceRangeVal , status ) ;
if ( U_FAILURE ( status ) ) {
2001-12-03 18:06:27 +00:00
parent - > errln ( " FAIL: UnicodeSet::applyPattern( " +
2001-11-22 02:41:06 +00:00
sourceRangeVal + " ) " ) ;
return ;
}
this - > toSource . addAll ( okAnyway ) ;
2000-12-08 21:17:32 +00:00
2001-11-22 02:41:06 +00:00
this - > toTarget . clear ( ) ;
this - > toTarget . applyPattern ( targetRangeVal , status ) ;
if ( U_FAILURE ( status ) ) {
2001-12-03 18:06:27 +00:00
parent - > errln ( " FAIL: UnicodeSet::applyPattern( " +
2001-11-22 02:41:06 +00:00
targetRangeVal + " ) " ) ;
return ;
}
this - > toTarget . addAll ( okAnyway ) ;
2001-11-26 23:09:33 +00:00
this - > roundtripExclusionsSet . clear ( ) ;
2001-11-22 02:41:06 +00:00
if ( roundtripExclusions ! = NULL & & strlen ( roundtripExclusions ) > 0 ) {
2008-06-17 02:28:25 +00:00
this - > roundtripExclusionsSet . applyPattern ( UnicodeString ( roundtripExclusions , - 1 , US_INV ) , status ) ;
2000-05-24 17:31:51 +00:00
if ( U_FAILURE ( status ) ) {
2001-12-03 18:06:27 +00:00
parent - > errln ( " FAIL: UnicodeSet::applyPattern(%s) " , roundtripExclusions ) ;
2000-05-24 17:31:51 +00:00
return ;
}
2001-11-22 02:41:06 +00:00
}
badCharacters . clear ( ) ;
badCharacters . applyPattern ( " [:Other:] " , status ) ;
if ( U_FAILURE ( status ) ) {
2001-12-03 18:06:27 +00:00
parent - > errln ( " FAIL: UnicodeSet::applyPattern([:Other:]) " ) ;
2001-11-22 02:41:06 +00:00
return ;
}
2002-12-06 21:31:22 +00:00
test2 ( quickRt , density ) ;
2001-11-22 02:41:06 +00:00
if ( errorCount > 0 ) {
char str [ 100 ] ;
int32_t length = transliteratorID . extract ( str , 100 , NULL , status ) ;
str [ length ] = 0 ;
2001-12-04 05:25:39 +00:00
parent - > errln ( " FAIL: %s errors: %d %s " , str , errorCount , ( errorCount > errorLimit ? " (at least!) " : " " ) ) ; // + ", see " + logFileName);
2000-05-24 17:31:51 +00:00
} else {
2001-11-22 02:41:06 +00:00
char str [ 100 ] ;
int32_t length = transliteratorID . extract ( str , 100 , NULL , status ) ;
str [ length ] = 0 ;
2001-12-03 18:06:27 +00:00
parent - > logln ( " %s ok " , str ) ;
2001-11-22 02:41:06 +00:00
}
}
UBool RTTest : : checkIrrelevants ( Transliterator * t ,
const UnicodeString & irrelevants ) {
for ( int i = 0 ; i < irrelevants . length ( ) ; + + i ) {
UChar c = irrelevants . charAt ( i ) ;
2003-04-24 00:32:44 +00:00
UnicodeString srcStr ( c ) ;
UnicodeString targ = srcStr ;
2001-11-22 02:41:06 +00:00
t - > transliterate ( targ ) ;
2003-04-24 00:32:44 +00:00
if ( srcStr = = targ ) return TRUE ;
2001-11-22 02:41:06 +00:00
}
return FALSE ;
}
2002-12-06 21:31:22 +00:00
void RTTest : : test2 ( UBool quickRt , int32_t density ) {
2001-11-22 02:41:06 +00:00
2003-04-24 00:32:44 +00:00
UnicodeString srcStr , targ , reverse ;
2001-11-22 02:41:06 +00:00
UErrorCode status = U_ZERO_ERROR ;
2001-11-22 05:51:42 +00:00
UParseError parseError ;
2002-12-12 19:54:39 +00:00
TransliteratorPointer sourceToTarget (
2001-11-22 05:51:42 +00:00
Transliterator : : createInstance ( transliteratorID , UTRANS_FORWARD , parseError ,
2002-12-12 19:54:39 +00:00
status ) ) ;
2004-06-12 06:16:57 +00:00
if ( ( Transliterator * ) sourceToTarget = = NULL ) {
2011-03-03 19:29:57 +00:00
parent - > dataerrln ( " FAIL: createInstance( " + transliteratorID +
2001-11-22 05:51:42 +00:00
" ) returned NULL. Error: " + u_errorName ( status )
+ " \n \t preContext : " + prettify ( parseError . preContext )
+ " \n \t postContext : " + prettify ( parseError . postContext ) ) ;
return ;
2001-11-22 02:41:06 +00:00
}
2002-12-12 19:54:39 +00:00
TransliteratorPointer targetToSource ( sourceToTarget - > createInverse ( status ) ) ;
2004-06-12 06:16:57 +00:00
if ( ( Transliterator * ) targetToSource = = NULL ) {
2001-12-04 05:25:39 +00:00
parent - > errln ( " FAIL: " + transliteratorID +
2001-11-28 06:05:12 +00:00
" .createInverse() returned NULL. Error: " + u_errorName ( status )
+ " \n \t preContext : " + prettify ( parseError . preContext )
+ " \n \t postContext : " + prettify ( parseError . postContext ) ) ;
2001-11-22 02:41:06 +00:00
return ;
}
2002-04-30 22:12:28 +00:00
AbbreviatedUnicodeSetIterator usi ;
AbbreviatedUnicodeSetIterator usi2 ;
2001-11-22 02:41:06 +00:00
2001-12-04 05:25:39 +00:00
parent - > logln ( " Checking that at least one irrelevant character is not NFC'ed " ) ;
2001-11-22 02:41:06 +00:00
// string is from NFC_NO in the UCD
UnicodeString irrelevants = CharsToUnicodeString ( " \\ u2000 \\ u2001 \\ u2126 \\ u212A \\ u212B \\ u2329 " ) ;
if ( checkIrrelevants ( sourceToTarget , irrelevants ) = = FALSE ) {
logFails ( " Source-Target, irrelevants " ) ;
}
if ( checkIrrelevants ( targetToSource , irrelevants ) = = FALSE ) {
logFails ( " Target-Source, irrelevants " ) ;
}
2001-12-03 18:06:27 +00:00
if ( ! quickRt ) {
parent - > logln ( " Checking that toRules works " ) ;
2001-11-22 02:41:06 +00:00
UnicodeString rules = " " ;
UParseError parseError ;
2001-12-04 05:25:39 +00:00
rules = sourceToTarget - > toRules ( rules , TRUE ) ;
// parent->logln((UnicodeString)"toRules => " + rules);
2002-12-12 19:54:39 +00:00
TransliteratorPointer sourceToTarget2 ( Transliterator : : createFromRules (
2001-11-22 02:41:06 +00:00
" s2t2 " , rules ,
UTRANS_FORWARD ,
2002-12-12 19:54:39 +00:00
parseError , status ) ) ;
2001-11-22 02:41:06 +00:00
if ( U_FAILURE ( status ) ) {
2001-12-04 05:25:39 +00:00
parent - > errln ( " FAIL: createFromRules %s \n " , u_errorName ( status ) ) ;
2001-11-22 02:41:06 +00:00
return ;
}
rules = targetToSource - > toRules ( rules , FALSE ) ;
2002-12-12 19:54:39 +00:00
TransliteratorPointer targetToSource2 ( Transliterator : : createFromRules (
2001-11-22 02:41:06 +00:00
" t2s2 " , rules ,
UTRANS_FORWARD ,
2002-12-12 19:54:39 +00:00
parseError , status ) ) ;
2001-11-22 02:41:06 +00:00
if ( U_FAILURE ( status ) ) {
2001-12-04 05:25:39 +00:00
parent - > errln ( " FAIL: createFromRules %s \n " , u_errorName ( status ) ) ;
2001-11-22 02:41:06 +00:00
return ;
}
2002-12-06 22:51:13 +00:00
usi . reset ( sourceRange ) ;
2001-11-26 23:09:33 +00:00
for ( ; ; ) {
2002-04-30 22:12:28 +00:00
if ( ! usi . next ( ) | | usi . isString ( ) ) break ;
UChar32 c = usi . getCodepoint ( ) ;
2001-11-22 02:41:06 +00:00
2003-04-24 00:32:44 +00:00
UnicodeString srcStr ( ( UChar32 ) c ) ;
UnicodeString targ = srcStr ;
2001-11-22 02:41:06 +00:00
sourceToTarget - > transliterate ( targ ) ;
2003-04-24 00:32:44 +00:00
UnicodeString targ2 = srcStr ;
2001-11-22 02:41:06 +00:00
sourceToTarget2 - > transliterate ( targ2 ) ;
if ( targ ! = targ2 ) {
2003-04-24 00:32:44 +00:00
logToRulesFails ( " Source-Target, toRules " , srcStr , targ , targ2 ) ;
2001-11-22 02:41:06 +00:00
}
}
2002-12-06 22:51:13 +00:00
usi . reset ( targetRange ) ;
2001-11-26 23:09:33 +00:00
for ( ; ; ) {
2002-04-30 22:12:28 +00:00
if ( ! usi . next ( ) | | usi . isString ( ) ) break ;
UChar32 c = usi . getCodepoint ( ) ;
2001-11-22 02:41:06 +00:00
2003-04-24 00:32:44 +00:00
UnicodeString srcStr ( ( UChar32 ) c ) ;
UnicodeString targ = srcStr ;
2001-11-22 02:41:06 +00:00
targetToSource - > transliterate ( targ ) ;
2003-04-24 00:32:44 +00:00
UnicodeString targ2 = srcStr ;
2001-11-22 02:41:06 +00:00
targetToSource2 - > transliterate ( targ2 ) ;
if ( targ ! = targ2 ) {
2003-04-24 00:32:44 +00:00
logToRulesFails ( " Target-Source, toRules " , srcStr , targ , targ2 ) ;
2001-11-22 02:41:06 +00:00
}
}
}
2001-12-03 18:06:27 +00:00
parent - > logln ( " Checking that all source characters convert to target - Singles " ) ;
2001-11-22 02:41:06 +00:00
UnicodeSet failSourceTarg ;
2002-12-06 22:51:13 +00:00
usi . reset ( sourceRange ) ;
2001-11-26 23:09:33 +00:00
for ( ; ; ) {
2002-04-30 22:12:28 +00:00
if ( ! usi . next ( ) | | usi . isString ( ) ) break ;
UChar32 c = usi . getCodepoint ( ) ;
2001-11-22 02:41:06 +00:00
2003-04-24 00:32:44 +00:00
UnicodeString srcStr ( ( UChar32 ) c ) ;
UnicodeString targ = srcStr ;
2001-11-22 02:41:06 +00:00
sourceToTarget - > transliterate ( targ ) ;
2002-04-30 22:12:28 +00:00
if ( toTarget . containsAll ( targ ) = = FALSE
| | badCharacters . containsSome ( targ ) = = TRUE ) {
2001-11-22 02:41:06 +00:00
UnicodeString targD ;
Normalizer : : decompose ( targ , FALSE , 0 , targD , status ) ;
if ( U_FAILURE ( status ) ) {
2001-12-04 05:25:39 +00:00
parent - > errln ( " FAIL: Internal error during decomposition %s \n " , u_errorName ( status ) ) ;
2001-11-22 02:41:06 +00:00
return ;
}
2002-04-30 22:12:28 +00:00
if ( toTarget . containsAll ( targD ) = = FALSE | |
badCharacters . containsSome ( targD ) = = TRUE ) {
2003-04-24 00:32:44 +00:00
logWrongScript ( " Source-Target " , srcStr , targ ) ;
2001-11-22 02:41:06 +00:00
failSourceTarg . add ( c ) ;
continue ;
}
}
UnicodeString cs2 ;
2003-04-24 00:32:44 +00:00
Normalizer : : decompose ( srcStr , FALSE , 0 , cs2 , status ) ;
2000-05-24 17:31:51 +00:00
if ( U_FAILURE ( status ) ) {
2001-12-04 05:25:39 +00:00
parent - > errln ( " FAIL: Internal error during decomposition %s \n " , u_errorName ( status ) ) ;
2000-05-24 17:31:51 +00:00
return ;
}
2001-11-22 02:41:06 +00:00
UnicodeString targ2 = cs2 ;
sourceToTarget - > transliterate ( targ2 ) ;
if ( targ ! = targ2 ) {
2003-04-24 00:32:44 +00:00
logNotCanonical ( " Source-Target " , srcStr , targ , cs2 , targ2 ) ;
2001-11-22 02:41:06 +00:00
}
2000-05-24 17:31:51 +00:00
}
2001-11-22 02:41:06 +00:00
2001-12-03 18:06:27 +00:00
parent - > logln ( " Checking that all source characters convert to target - Doubles " ) ;
2001-11-22 02:41:06 +00:00
UnicodeSet sourceRangeMinusFailures ( sourceRange ) ;
sourceRangeMinusFailures . removeAll ( failSourceTarg ) ;
2002-12-06 21:31:22 +00:00
usi . reset ( sourceRangeMinusFailures , quickRt , density ) ;
2002-04-30 22:12:28 +00:00
for ( ; ; ) {
if ( ! usi . next ( ) | | usi . isString ( ) ) break ;
UChar32 c = usi . getCodepoint ( ) ;
2001-11-22 02:41:06 +00:00
2002-12-06 21:31:22 +00:00
usi2 . reset ( sourceRangeMinusFailures , quickRt , density ) ;
2001-11-26 23:09:33 +00:00
for ( ; ; ) {
2002-04-30 22:12:28 +00:00
if ( ! usi2 . next ( ) | | usi2 . isString ( ) ) break ;
UChar32 d = usi2 . getCodepoint ( ) ;
2001-11-22 02:41:06 +00:00
2003-04-24 00:32:44 +00:00
UnicodeString srcStr ;
srcStr + = ( UChar32 ) c ;
srcStr + = ( UChar32 ) d ;
UnicodeString targ = srcStr ;
2001-11-22 02:41:06 +00:00
sourceToTarget - > transliterate ( targ ) ;
2002-04-30 22:12:28 +00:00
if ( toTarget . containsAll ( targ ) = = FALSE | |
badCharacters . containsSome ( targ ) = = TRUE )
2001-11-26 23:09:33 +00:00
{
2001-11-22 02:41:06 +00:00
UnicodeString targD ;
Normalizer : : decompose ( targ , FALSE , 0 , targD , status ) ;
if ( U_FAILURE ( status ) ) {
2001-12-04 05:25:39 +00:00
parent - > errln ( " FAIL: Internal error during decomposition %s \n " , u_errorName ( status ) ) ;
2001-11-22 02:41:06 +00:00
return ;
}
2002-04-30 22:12:28 +00:00
if ( toTarget . containsAll ( targD ) = = FALSE | |
badCharacters . containsSome ( targD ) = = TRUE ) {
2003-04-24 00:32:44 +00:00
logWrongScript ( " Source-Target " , srcStr , targ ) ;
2001-11-22 02:41:06 +00:00
continue ;
}
}
UnicodeString cs2 ;
2003-04-24 00:32:44 +00:00
Normalizer : : decompose ( srcStr , FALSE , 0 , cs2 , status ) ;
2001-11-22 02:41:06 +00:00
if ( U_FAILURE ( status ) ) {
2001-12-04 05:25:39 +00:00
parent - > errln ( " FAIL: Internal error during decomposition %s \n " , u_errorName ( status ) ) ;
2001-11-22 02:41:06 +00:00
return ;
}
UnicodeString targ2 = cs2 ;
sourceToTarget - > transliterate ( targ2 ) ;
if ( targ ! = targ2 ) {
2003-04-24 00:32:44 +00:00
logNotCanonical ( " Source-Target " , srcStr , targ , cs2 , targ2 ) ;
2001-11-22 02:41:06 +00:00
}
}
}
2001-12-03 18:06:27 +00:00
parent - > logln ( " Checking that target characters convert to source and back - Singles " ) ;
2001-11-22 02:41:06 +00:00
UnicodeSet failTargSource ;
UnicodeSet failRound ;
2002-12-06 22:51:13 +00:00
usi . reset ( targetRange ) ;
2001-11-26 23:09:33 +00:00
for ( ; ; ) {
2002-05-31 18:05:12 +00:00
if ( ! usi . next ( ) ) break ;
if ( usi . isString ( ) ) {
2003-04-24 00:32:44 +00:00
srcStr = usi . getString ( ) ;
2002-05-31 18:05:12 +00:00
} else {
2003-04-24 00:32:44 +00:00
srcStr = ( UnicodeString ) usi . getCodepoint ( ) ;
2002-05-31 18:05:12 +00:00
}
2001-11-22 02:41:06 +00:00
2003-04-24 00:32:44 +00:00
UChar32 c = srcStr . char32At ( 0 ) ;
2002-05-31 18:05:12 +00:00
2003-04-24 00:32:44 +00:00
targ = srcStr ;
2001-11-22 02:41:06 +00:00
targetToSource - > transliterate ( targ ) ;
reverse = targ ;
sourceToTarget - > transliterate ( reverse ) ;
2002-04-30 22:12:28 +00:00
if ( toSource . containsAll ( targ ) = = FALSE | |
badCharacters . containsSome ( targ ) = = TRUE ) {
2001-11-22 02:41:06 +00:00
UnicodeString targD ;
Normalizer : : decompose ( targ , FALSE , 0 , targD , status ) ;
if ( U_FAILURE ( status ) ) {
2001-12-04 05:25:39 +00:00
parent - > errln ( " FAIL: Internal error during decomposition%s \n " , u_errorName ( status ) ) ;
2001-11-22 02:41:06 +00:00
return ;
}
2004-05-28 00:19:06 +00:00
if ( toSource . containsAll ( targD ) = = FALSE ) {
2003-04-24 00:32:44 +00:00
logWrongScript ( " Target-Source " , srcStr , targ ) ;
2002-06-03 17:00:20 +00:00
failTargSource . add ( c ) ;
2001-11-22 02:41:06 +00:00
continue ;
}
2004-05-28 00:19:06 +00:00
if ( badCharacters . containsSome ( targD ) = = TRUE ) {
logWrongScript ( " Target-Source* " , srcStr , targ ) ;
failTargSource . add ( c ) ;
continue ;
}
2001-11-22 02:41:06 +00:00
}
2003-04-24 00:32:44 +00:00
if ( isSame ( srcStr , reverse ) = = FALSE & &
2002-05-31 18:05:12 +00:00
roundtripExclusionsSet . contains ( c ) = = FALSE
2003-04-24 00:32:44 +00:00
& & roundtripExclusionsSet . contains ( srcStr ) = = FALSE ) {
logRoundTripFailure ( srcStr , targetToSource - > getID ( ) , targ , sourceToTarget - > getID ( ) , reverse ) ;
2002-06-03 17:00:20 +00:00
failRound . add ( c ) ;
2001-11-22 02:41:06 +00:00
continue ;
}
UnicodeString targ2 ;
Normalizer : : decompose ( targ , FALSE , 0 , targ2 , status ) ;
2000-05-24 17:31:51 +00:00
if ( U_FAILURE ( status ) ) {
2001-12-04 05:25:39 +00:00
parent - > errln ( " FAIL: Internal error during decomposition%s \n " , u_errorName ( status ) ) ;
2000-05-24 17:31:51 +00:00
return ;
}
2001-11-22 02:41:06 +00:00
UnicodeString reverse2 = targ2 ;
sourceToTarget - > transliterate ( reverse2 ) ;
if ( reverse ! = reverse2 ) {
2001-12-01 04:33:03 +00:00
logNotCanonical ( " Target-Source " , targ , reverse , targ2 , reverse2 ) ;
2001-11-22 02:41:06 +00:00
}
2001-11-07 18:06:53 +00:00
}
2000-05-24 17:31:51 +00:00
2001-12-03 18:06:27 +00:00
parent - > logln ( " Checking that target characters convert to source and back - Doubles " ) ;
2001-11-22 02:41:06 +00:00
int32_t count = 0 ;
2001-11-26 23:09:33 +00:00
2001-11-22 02:41:06 +00:00
UnicodeSet targetRangeMinusFailures ( targetRange ) ;
targetRangeMinusFailures . removeAll ( failTargSource ) ;
targetRangeMinusFailures . removeAll ( failRound ) ;
2001-11-26 23:09:33 +00:00
2002-12-06 21:31:22 +00:00
usi . reset ( targetRangeMinusFailures , quickRt , density ) ;
2002-06-03 17:00:20 +00:00
UnicodeString targ2 ;
UnicodeString reverse2 ;
UnicodeString targD ;
2001-11-26 23:09:33 +00:00
for ( ; ; ) {
2002-04-30 22:12:28 +00:00
if ( ! usi . next ( ) | | usi . isString ( ) ) break ;
UChar32 c = usi . getCodepoint ( ) ;
2001-11-22 02:41:06 +00:00
if ( + + count > pairLimit ) {
//throw new TestTruncated("Test truncated at " + pairLimit + " x 64k pairs");
2001-12-03 18:06:27 +00:00
parent - > logln ( " " ) ;
parent - > logln ( ( UnicodeString ) " Test truncated at " + pairLimit + " x 64k pairs " ) ;
2001-11-22 02:41:06 +00:00
return ;
}
2001-11-26 23:09:33 +00:00
2002-12-06 21:31:22 +00:00
usi2 . reset ( targetRangeMinusFailures , quickRt , density ) ;
2001-11-26 23:09:33 +00:00
for ( ; ; ) {
2002-06-03 17:00:20 +00:00
if ( ! usi2 . next ( ) | | usi2 . isString ( ) )
break ;
2002-04-30 22:12:28 +00:00
UChar32 d = usi2 . getCodepoint ( ) ;
2003-04-24 00:32:44 +00:00
srcStr . truncate ( 0 ) ; // empty the variable without construction/destruction
srcStr + = c ;
srcStr + = d ;
2000-05-24 17:31:51 +00:00
2003-04-24 00:32:44 +00:00
targ = srcStr ;
2001-11-22 02:41:06 +00:00
targetToSource - > transliterate ( targ ) ;
reverse = targ ;
sourceToTarget - > transliterate ( reverse ) ;
2001-11-26 23:09:33 +00:00
2002-04-30 22:12:28 +00:00
if ( toSource . containsAll ( targ ) = = FALSE | |
badCharacters . containsSome ( targ ) = = TRUE )
2001-11-22 02:41:06 +00:00
{
2002-06-03 17:00:20 +00:00
targD . truncate ( 0 ) ; // empty the variable without construction/destruction
2001-11-22 02:41:06 +00:00
Normalizer : : decompose ( targ , FALSE , 0 , targD , status ) ;
if ( U_FAILURE ( status ) ) {
2001-12-04 05:25:39 +00:00
parent - > errln ( " FAIL: Internal error during decomposition%s \n " ,
2001-11-22 02:41:06 +00:00
u_errorName ( status ) ) ;
return ;
}
2002-04-30 22:12:28 +00:00
if ( toSource . containsAll ( targD ) = = FALSE
2002-06-03 17:00:20 +00:00
| | badCharacters . containsSome ( targD ) = = TRUE )
{
2003-04-24 00:32:44 +00:00
logWrongScript ( " Target-Source " , srcStr , targ ) ;
2001-11-22 02:41:06 +00:00
continue ;
}
}
2003-04-24 00:32:44 +00:00
if ( isSame ( srcStr , reverse ) = = FALSE & &
2001-11-26 23:09:33 +00:00
roundtripExclusionsSet . contains ( c ) = = FALSE & &
2002-05-31 18:05:12 +00:00
roundtripExclusionsSet . contains ( d ) = = FALSE & &
2003-04-24 00:32:44 +00:00
roundtripExclusionsSet . contains ( srcStr ) = = FALSE )
2002-06-03 17:00:20 +00:00
{
2003-04-24 00:32:44 +00:00
logRoundTripFailure ( srcStr , targetToSource - > getID ( ) , targ , sourceToTarget - > getID ( ) , reverse ) ;
2001-11-22 02:41:06 +00:00
continue ;
}
2002-06-03 17:00:20 +00:00
targ2 . truncate ( 0 ) ; // empty the variable without construction/destruction
2001-11-22 02:41:06 +00:00
Normalizer : : decompose ( targ , FALSE , 0 , targ2 , status ) ;
if ( U_FAILURE ( status ) ) {
2001-12-04 05:25:39 +00:00
parent - > errln ( " FAIL: Internal error during decomposition%s \n " , u_errorName ( status ) ) ;
2001-11-22 02:41:06 +00:00
return ;
}
2002-06-03 17:00:20 +00:00
reverse2 = targ2 ;
2001-11-22 02:41:06 +00:00
sourceToTarget - > transliterate ( reverse2 ) ;
if ( reverse ! = reverse2 ) {
2001-12-01 04:33:03 +00:00
logNotCanonical ( " Target-Source " , targ , reverse , targ2 , reverse2 ) ;
2001-11-22 02:41:06 +00:00
}
}
2000-05-24 17:31:51 +00:00
}
2001-12-03 18:06:27 +00:00
parent - > logln ( " " ) ;
2000-05-24 17:31:51 +00:00
}
void RTTest : : logWrongScript ( const UnicodeString & label ,
const UnicodeString & from ,
const UnicodeString & to ) {
2001-12-04 05:25:39 +00:00
parent - > errln ( ( UnicodeString ) " FAIL " +
2000-05-24 17:31:51 +00:00
label + " : " +
from + " ( " + TestUtility : : hex ( from ) + " ) => " +
to + " ( " + TestUtility : : hex ( to ) + " ) " ) ;
+ + errorCount ;
}
2001-11-07 18:06:53 +00:00
void RTTest : : logNotCanonical ( const UnicodeString & label ,
const UnicodeString & from ,
const UnicodeString & to ,
2001-12-01 04:33:03 +00:00
const UnicodeString & fromCan ,
2001-11-07 18:06:53 +00:00
const UnicodeString & toCan ) {
2001-12-04 05:25:39 +00:00
parent - > errln ( ( UnicodeString ) " FAIL (can.equiv) " +
2001-11-07 18:06:53 +00:00
label + " : " +
from + " ( " + TestUtility : : hex ( from ) + " ) => " +
to + " ( " + TestUtility : : hex ( to ) + " ) " +
2001-12-01 04:33:03 +00:00
fromCan + " ( " + TestUtility : : hex ( fromCan ) + " ) => " +
2001-11-07 18:06:53 +00:00
toCan + " ( " +
2001-11-22 02:41:06 +00:00
TestUtility : : hex ( toCan ) + " ) "
) ;
+ + errorCount ;
}
void RTTest : : logFails ( const UnicodeString & label ) {
2001-12-04 05:25:39 +00:00
parent - > errln ( ( UnicodeString ) " <br>FAIL " + label ) ;
2001-11-22 02:41:06 +00:00
+ + errorCount ;
}
void RTTest : : logToRulesFails ( const UnicodeString & label ,
const UnicodeString & from ,
const UnicodeString & to ,
2001-12-03 18:06:27 +00:00
const UnicodeString & otherTo )
{
2001-12-04 05:25:39 +00:00
parent - > errln ( ( UnicodeString ) " FAIL: " +
2001-11-22 02:41:06 +00:00
label + " : " +
from + " ( " + TestUtility : : hex ( from ) + " ) => " +
to + " ( " + TestUtility : : hex ( to ) + " ) " +
2001-12-01 04:33:03 +00:00
" != " +
otherTo + " ( " +
TestUtility : : hex ( otherTo ) + " ) "
2001-11-07 18:06:53 +00:00
) ;
+ + errorCount ;
}
2001-11-22 02:41:06 +00:00
2000-05-24 17:31:51 +00:00
void RTTest : : logRoundTripFailure ( const UnicodeString & from ,
2001-12-01 04:33:03 +00:00
const UnicodeString & toID ,
2000-05-24 17:31:51 +00:00
const UnicodeString & to ,
2001-12-01 04:33:03 +00:00
const UnicodeString & backID ,
2000-05-24 17:31:51 +00:00
const UnicodeString & back ) {
2001-11-22 02:41:06 +00:00
if ( legalSource - > is ( from ) = = FALSE ) return ; // skip illegals
2001-10-31 19:26:53 +00:00
2001-12-04 05:25:39 +00:00
parent - > errln ( ( UnicodeString ) " FAIL Roundtrip: " +
2000-05-24 17:31:51 +00:00
from + " ( " + TestUtility : : hex ( from ) + " ) => " +
2001-12-01 04:33:03 +00:00
to + " ( " + TestUtility : : hex ( to ) + " ) " + toID + " => " +
back + " ( " + TestUtility : : hex ( back ) + " ) " + backID + " => " ) ;
2000-05-24 17:31:51 +00:00
+ + errorCount ;
}
//--------------------------------------------------------------------
// Specific Tests
//--------------------------------------------------------------------
2002-07-12 19:39:56 +00:00
/*
Note : Unicode 3.2 added new Hiragana / Katakana characters :
3095. .3096 ; 3.2 # [ 2 ] HIRAGANA LETTER SMALL KA . . HIRAGANA LETTER SMALL KE
309F . .30 A0 ; 3.2 # [ 2 ] HIRAGANA DIGRAPH YORI . . KATAKANA - HIRAGANA DOUBLE HYPHEN
30FF ; 3.2 # KATAKANA DIGRAPH KOTO
31F 0. .31F F ; 3.2 # [ 16 ] KATAKANA LETTER SMALL KU . . KATAKANA LETTER SMALL RO
2009-11-13 19:25:21 +00:00
Unicode 5.2 added another Hiragana character :
1F 200 ; 5.2 # SQUARE HIRAGANA HOKA
2002-07-12 19:39:56 +00:00
We will not add them to the rules until they are more supported ( e . g . in fonts on Windows )
A bug has been filed to remind us to do this : # 1979.
*/
2009-11-13 19:25:21 +00:00
2010-09-21 00:12:49 +00:00
static const char KATAKANA [ ] = " [[[:katakana:][ \\ u30A1- \\ u30FA \\ u30FC]]-[ \\ u30FF \\ u31F0- \\ u31FF]-[:^age=5.2:]] " ;
static const char HIRAGANA [ ] = " [[[:hiragana:][ \\ u3040- \\ u3094]]-[ \\ u3095- \\ u3096 \\ u309F- \\ u30A0 \\ U0001F200- \\ U0001F2FF]-[:^age=5.2:]] " ;
2002-07-12 19:39:56 +00:00
static const char LENGTH [ ] = " [ \\ u30FC] " ;
static const char HALFWIDTH_KATAKANA [ ] = " [ \\ uFF65- \\ uFF9D] " ;
static const char KATAKANA_ITERATION [ ] = " [ \\ u30FD \\ u30FE] " ;
static const char HIRAGANA_ITERATION [ ] = " [ \\ u309D \\ u309E] " ;
static const int32_t TEMP_MAX = 256 ;
2001-11-22 02:41:06 +00:00
void TransliteratorRoundTripTest : : TestKana ( ) {
RTTest test ( " Katakana-Hiragana " ) ;
Legal * legal = new Legal ( ) ;
2002-07-12 19:39:56 +00:00
char temp [ TEMP_MAX ] ;
strcpy ( temp , " [ " ) ;
strcat ( temp , HALFWIDTH_KATAKANA ) ;
strcat ( temp , LENGTH ) ;
strcat ( temp , " ] " ) ;
2004-11-11 23:34:58 +00:00
test . test ( KATAKANA , UnicodeString ( " [ " ) + HIRAGANA + LENGTH + UnicodeString ( " ] " ) ,
2002-07-12 19:39:56 +00:00
temp ,
this , quick , legal ) ;
2001-11-22 02:41:06 +00:00
delete legal ;
2000-05-24 17:31:51 +00:00
}
2001-11-22 02:41:06 +00:00
void TransliteratorRoundTripTest : : TestHiragana ( ) {
RTTest test ( " Latin-Hiragana " ) ;
Legal * legal = new Legal ( ) ;
test . test ( UnicodeString ( " [a-zA-Z] " , " " ) ,
2008-06-17 02:28:25 +00:00
UnicodeString ( HIRAGANA , - 1 , US_INV ) ,
2002-07-12 19:39:56 +00:00
HIRAGANA_ITERATION , this , quick , legal ) ;
2001-11-22 02:41:06 +00:00
delete legal ;
2000-05-24 17:31:51 +00:00
}
2001-11-22 02:41:06 +00:00
void TransliteratorRoundTripTest : : TestKatakana ( ) {
RTTest test ( " Latin-Katakana " ) ;
Legal * legal = new Legal ( ) ;
2002-07-12 19:39:56 +00:00
char temp [ TEMP_MAX ] ;
strcpy ( temp , " [ " ) ;
strcat ( temp , KATAKANA_ITERATION ) ;
strcat ( temp , HALFWIDTH_KATAKANA ) ;
strcat ( temp , " ] " ) ;
2001-11-22 02:41:06 +00:00
test . test ( UnicodeString ( " [a-zA-Z] " , " " ) ,
2008-06-17 02:28:25 +00:00
UnicodeString ( KATAKANA , - 1 , US_INV ) ,
2002-07-12 19:39:56 +00:00
temp ,
this , quick , legal ) ;
2001-11-22 02:41:06 +00:00
delete legal ;
2000-05-24 17:31:51 +00:00
}
void TransliteratorRoundTripTest : : TestJamo ( ) {
2001-11-22 02:41:06 +00:00
RTTest t ( " Latin-Jamo " ) ;
2001-11-30 18:19:56 +00:00
Legal * legal = new LegalJamo ( ) ;
2001-11-22 02:41:06 +00:00
t . test ( UnicodeString ( " [a-zA-Z] " , " " ) ,
UnicodeString ( " [ \\ u1100- \\ u1112 \\ u1161- \\ u1175 \\ u11A8- \\ u11C2] " ,
" " ) ,
2001-11-30 18:19:56 +00:00
NULL , this , quick , legal ) ;
2001-11-22 02:41:06 +00:00
delete legal ;
2000-05-24 17:31:51 +00:00
}
2001-11-22 02:41:06 +00:00
void TransliteratorRoundTripTest : : TestHangul ( ) {
RTTest t ( " Latin-Hangul " ) ;
Legal * legal = new Legal ( ) ;
2002-12-06 22:51:13 +00:00
if ( quick ) t . setPairLimit ( 1000 ) ;
2001-11-22 02:41:06 +00:00
t . test ( UnicodeString ( " [a-zA-Z] " , " " ) ,
UnicodeString ( " [ \\ uAC00- \\ uD7A4] " , " " ) ,
2002-12-06 22:51:13 +00:00
NULL , this , quick , legal , 1 ) ;
2001-11-22 02:41:06 +00:00
delete legal ;
2000-05-24 17:31:51 +00:00
}
2004-06-07 17:11:07 +00:00
# define ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \
2009-06-12 19:34:21 +00:00
errcheckln ( status , " error at file %s, line %d, status = %s " , __FILE__ , __LINE__ , \
2004-06-07 17:11:07 +00:00
u_errorName ( status ) ) ; \
return ; } }
static void writeStringInU8 ( FILE * out , const UnicodeString & s ) {
int i ;
for ( i = 0 ; i < s . length ( ) ; i = s . moveIndex32 ( i , 1 ) ) {
UChar32 c = s . char32At ( i ) ;
uint8_t bufForOneChar [ 10 ] ;
UBool isError = FALSE ;
int32_t destIdx = 0 ;
2006-07-25 05:31:24 +00:00
U8_APPEND ( bufForOneChar , destIdx , ( int32_t ) sizeof ( bufForOneChar ) , c , isError ) ;
2004-06-07 17:11:07 +00:00
fwrite ( bufForOneChar , 1 , destIdx , out ) ;
}
}
void TransliteratorRoundTripTest : : TestHan ( ) {
UErrorCode status = U_ZERO_ERROR ;
2009-11-20 06:28:25 +00:00
LocalULocaleDataPointer uld ( ulocdata_open ( " zh " , & status ) ) ;
LocalUSetPointer USetExemplars ( ulocdata_getExemplarSet ( uld . getAlias ( ) , uset_openEmpty ( ) , 0 , ULOCDATA_ES_STANDARD , & status ) ) ;
2004-06-07 17:11:07 +00:00
ASSERT_SUCCESS ( status ) ;
UnicodeString source ;
UChar32 c ;
int i ;
for ( i = 0 ; ; i + + ) {
// Add all of the Chinese exemplar chars to the string "source".
2009-11-20 06:28:25 +00:00
c = uset_charAt ( USetExemplars . getAlias ( ) , i ) ;
2004-06-07 17:11:07 +00:00
if ( c = = ( UChar32 ) - 1 ) {
break ;
}
source . append ( c ) ;
}
// transform with Han translit
Transliterator * hanTL = Transliterator : : createInstance ( " Han-Latin " , UTRANS_FORWARD , status ) ;
ASSERT_SUCCESS ( status ) ;
UnicodeString target = source ;
hanTL - > transliterate ( target ) ;
// now verify that there are no Han characters left
UnicodeSet allHan ( " [:han:] " , status ) ;
ASSERT_SUCCESS ( status ) ;
if ( allHan . containsSome ( target ) ) {
errln ( " file %s, line %d, No Han must be left after Han-Latin transliteration " ,
__FILE__ , __LINE__ ) ;
}
// check the pinyin translit
Transliterator * pn = Transliterator : : createInstance ( " Latin-NumericPinyin " , UTRANS_FORWARD , status ) ;
ASSERT_SUCCESS ( status ) ;
UnicodeString target2 = target ;
pn - > transliterate ( target2 ) ;
// verify that there are no marks
2005-06-09 17:30:48 +00:00
Transliterator * nfd = Transliterator : : createInstance ( " nfd " , UTRANS_FORWARD , status ) ;
2004-06-07 17:11:07 +00:00
ASSERT_SUCCESS ( status ) ;
2005-06-09 17:30:48 +00:00
UnicodeString nfded = target2 ;
nfd - > transliterate ( nfded ) ;
2008-06-17 00:55:35 +00:00
UnicodeSet allMarks ( UNICODE_STRING_SIMPLE ( " [ \\ u0304 \\ u0301 \\ u030C \\ u0300 \\ u0306] " ) , status ) ; // look only for Pinyin tone marks, not all marks (there are some others in there)
2004-06-07 17:11:07 +00:00
ASSERT_SUCCESS ( status ) ;
2005-06-09 17:30:48 +00:00
assertFalse ( " NumericPinyin must contain no marks " , allMarks . containsSome ( nfded ) ) ;
2004-06-07 17:11:07 +00:00
// verify roundtrip
Transliterator * np = pn - > createInverse ( status ) ;
ASSERT_SUCCESS ( status ) ;
2005-06-09 17:30:48 +00:00
UnicodeString target3 = target2 ;
2004-06-07 17:11:07 +00:00
np - > transliterate ( target3 ) ;
UBool roundtripOK = ( target3 . compare ( target ) = = 0 ) ;
assertTrue ( " NumericPinyin must roundtrip " , roundtripOK ) ;
if ( ! roundtripOK ) {
const char * filename = " numeric-pinyin.log.txt " ;
FILE * out = fopen ( filename , " w " ) ;
errln ( " Creating log file %s \n " , filename ) ;
fprintf ( out , " Pinyin: " ) ;
writeStringInU8 ( out , target ) ;
fprintf ( out , " \n Pinyin-Numeric-Pinyin: " ) ;
writeStringInU8 ( out , target2 ) ;
2005-06-09 17:30:48 +00:00
fprintf ( out , " \n Numeric-Pinyin-Pinyin: " ) ;
writeStringInU8 ( out , target3 ) ;
2004-06-07 17:11:07 +00:00
fprintf ( out , " \n " ) ;
fclose ( out ) ;
}
delete hanTL ;
delete pn ;
2005-06-09 17:30:48 +00:00
delete nfd ;
2004-06-07 17:11:07 +00:00
delete np ;
}
2000-05-24 17:31:51 +00:00
void TransliteratorRoundTripTest : : TestGreek ( ) {
2005-02-04 00:50:01 +00:00
2008-08-06 00:42:42 +00:00
// CLDR bug #1911: This test should be moved into CLDR.
// It is left in its current state as a regression test.
// if (isICUVersionAtLeast(ICU_39)) {
// // We temporarily filter against Unicode 4.1, but we only do this
// // before version 3.4.
// errln("FAIL: TestGreek needs to be updated to remove delete the [:Age=4.0:] filter ");
// return;
// } else {
// logln("Warning: TestGreek needs to be updated to remove delete the section marked [:Age=4.0:] filter");
// }
2005-02-04 00:50:01 +00:00
2001-11-22 02:41:06 +00:00
RTTest test ( " Latin-Greek " ) ;
LegalGreek * legal = new LegalGreek ( TRUE ) ;
2003-05-05 23:22:34 +00:00
2001-11-22 02:41:06 +00:00
test . test ( UnicodeString ( " [a-zA-Z] " , " " ) ,
2004-04-16 17:28:06 +00:00
UnicodeString ( " [ \\ u003B \\ u00B7[[:Greek:]&[:Letter:]]-[ "
" \\ u1D26- \\ u1D2A " // L& [5] GREEK LETTER SMALL CAPITAL GAMMA..GREEK LETTER SMALL CAPITAL PSI
" \\ u1D5D- \\ u1D61 " // Lm [5] MODIFIER LETTER SMALL BETA..MODIFIER LETTER SMALL CHI
" \\ u1D66- \\ u1D6A " // L& [5] GREEK SUBSCRIPT SMALL LETTER BETA..GREEK SUBSCRIPT SMALL LETTER CHI
" \\ u03D7- \\ u03EF " // \N{GREEK KAI SYMBOL}..\N{COPTIC SMALL LETTER DEI}
2005-02-04 00:50:01 +00:00
" ] & [:Age=4.0:]] " ,
2004-04-16 17:28:06 +00:00
//UnicodeString("[[\\u003B\\u00B7[:Greek:]-[\\u0374\\u0385\\u1fcd\\u1fce\\u1fdd\\u1fde\\u1fed-\\u1fef\\u1ffd\\u03D7-\\u03EF]]&[:Age=3.2:]]",
2001-11-22 02:41:06 +00:00
" " ) ,
2004-04-16 17:28:06 +00:00
" [ \\ u00B5 \\ u037A \\ u03D0- \\ u03F5 \\ u03f9] " , /* exclusions */
2002-12-06 21:31:22 +00:00
this , quick , legal , 50 ) ;
2003-05-05 23:22:34 +00:00
2003-04-23 00:23:26 +00:00
2001-11-22 02:41:06 +00:00
delete legal ;
2000-05-24 17:31:51 +00:00
}
2001-11-07 18:06:53 +00:00
2001-11-22 02:41:06 +00:00
void TransliteratorRoundTripTest : : TestGreekUNGEGN ( ) {
2005-02-04 00:50:01 +00:00
2008-08-06 00:42:42 +00:00
// CLDR bug #1911: This test should be moved into CLDR.
// It is left in its current state as a regression test.
// if (isICUVersionAtLeast(ICU_39)) {
// // We temporarily filter against Unicode 4.1, but we only do this
// // before version 3.4.
// errln("FAIL: TestGreek needs to be updated to remove delete the [:Age=4.0:] filter ");
// return;
// } else {
// logln("Warning: TestGreek needs to be updated to remove delete the section marked [:Age=4.0:] filter");
// }
2005-02-04 00:50:01 +00:00
2001-11-22 02:41:06 +00:00
RTTest test ( " Latin-Greek/UNGEGN " ) ;
LegalGreek * legal = new LegalGreek ( FALSE ) ;
2003-05-05 23:22:34 +00:00
2001-11-22 02:41:06 +00:00
test . test ( UnicodeString ( " [a-zA-Z] " , " " ) ,
2004-04-16 17:28:06 +00:00
UnicodeString ( " [ \\ u003B \\ u00B7[[:Greek:]&[:Letter:]]-[ "
" \\ u1D26- \\ u1D2A " // L& [5] GREEK LETTER SMALL CAPITAL GAMMA..GREEK LETTER SMALL CAPITAL PSI
" \\ u1D5D- \\ u1D61 " // Lm [5] MODIFIER LETTER SMALL BETA..MODIFIER LETTER SMALL CHI
" \\ u1D66- \\ u1D6A " // L& [5] GREEK SUBSCRIPT SMALL LETTER BETA..GREEK SUBSCRIPT SMALL LETTER CHI
" \\ u03D7- \\ u03EF " // \N{GREEK KAI SYMBOL}..\N{COPTIC SMALL LETTER DEI}
2005-02-04 00:50:01 +00:00
" ] & [:Age=4.0:]] " ,
2004-04-16 17:28:06 +00:00
//UnicodeString("[[\\u003B\\u00B7[:Greek:]-[\\u0374\\u0385\\u1fce\\u1fde\\u03D7-\\u03EF]]&[:Age=3.2:]]",
2001-11-22 02:41:06 +00:00
" " ) ,
2004-04-07 00:17:23 +00:00
" [ \\ u0385 \\ u00B5 \\ u037A \\ u03D0- \\ uFFFF { \\ u039C \\ u03C0}] " , /* roundtrip exclusions */
2001-11-22 02:41:06 +00:00
this , quick , legal ) ;
2003-04-23 00:23:26 +00:00
2001-11-22 02:41:06 +00:00
delete legal ;
2001-11-07 18:06:53 +00:00
}
2001-11-22 02:41:06 +00:00
void TransliteratorRoundTripTest : : Testel ( ) {
2005-02-04 00:50:01 +00:00
2008-08-06 00:42:42 +00:00
// CLDR bug #1911: This test should be moved into CLDR.
// It is left in its current state as a regression test.
// if (isICUVersionAtLeast(ICU_39)) {
// // We temporarily filter against Unicode 4.1, but we only do this
// // before version 3.4.
// errln("FAIL: TestGreek needs to be updated to remove delete the [:Age=4.0:] filter ");
// return;
// } else {
// logln("Warning: TestGreek needs to be updated to remove delete the section marked [:Age=4.0:] filter");
// }
2005-02-04 00:50:01 +00:00
2001-11-22 02:41:06 +00:00
RTTest test ( " Latin-el " ) ;
LegalGreek * legal = new LegalGreek ( FALSE ) ;
2003-05-05 23:22:34 +00:00
2001-11-22 02:41:06 +00:00
test . test ( UnicodeString ( " [a-zA-Z] " , " " ) ,
2004-04-16 17:28:06 +00:00
UnicodeString ( " [ \\ u003B \\ u00B7[[:Greek:]&[:Letter:]]-[ "
" \\ u1D26- \\ u1D2A " // L& [5] GREEK LETTER SMALL CAPITAL GAMMA..GREEK LETTER SMALL CAPITAL PSI
" \\ u1D5D- \\ u1D61 " // Lm [5] MODIFIER LETTER SMALL BETA..MODIFIER LETTER SMALL CHI
" \\ u1D66- \\ u1D6A " // L& [5] GREEK SUBSCRIPT SMALL LETTER BETA..GREEK SUBSCRIPT SMALL LETTER CHI
" \\ u03D7- \\ u03EF " // \N{GREEK KAI SYMBOL}..\N{COPTIC SMALL LETTER DEI}
2005-02-04 00:50:01 +00:00
" ] & [:Age=4.0:]] " ,
2004-04-16 17:28:06 +00:00
//UnicodeString("[[\\u003B\\u00B7[:Greek:]-[\\u0374\\u0385\\u1fce\\u1fde\\u03D7-\\u03EF]]&[:Age=3.2:]]",
2001-11-22 02:41:06 +00:00
" " ) ,
2002-05-31 18:05:12 +00:00
" [ \\ u00B5 \\ u037A \\ u03D0- \\ uFFFF { \\ u039C \\ u03C0}] " , /* exclusions */
2001-11-22 02:41:06 +00:00
this , quick , legal ) ;
2003-05-05 23:22:34 +00:00
2003-04-23 00:23:26 +00:00
2001-11-22 02:41:06 +00:00
delete legal ;
2001-11-07 18:06:53 +00:00
}
2004-05-19 04:17:37 +00:00
void TransliteratorRoundTripTest : : TestArabic ( ) {
2008-06-17 00:55:35 +00:00
UnicodeString ARABIC ( " [ \\ u060C \\ u061B \\ u061F \\ u0621 \\ u0627- \\ u063A \\ u0641- \\ u0655 \\ u0660- \\ u066C \\ u067E \\ u0686 \\ u0698 \\ u06A4 \\ u06AD \\ u06AF \\ u06CB- \\ u06CC \\ u06F0- \\ u06F9] " , - 1 , US_INV ) ;
2004-05-19 04:17:37 +00:00
Legal * legal = new Legal ( ) ;
RTTest test ( " Latin-Arabic " ) ;
2008-06-17 00:55:35 +00:00
test . test ( UNICODE_STRING_SIMPLE ( " [a-zA-Z \\ u02BE \\ u02BF \\ u207F] " ) , ARABIC , " [a-zA-Z \\ u02BE \\ u02BF \\ u207F] " , this , quick , legal ) ; //
2004-05-19 04:17:37 +00:00
delete legal ;
}
class LegalHebrew : public Legal {
private :
UnicodeSet FINAL ;
UnicodeSet NON_FINAL ;
UnicodeSet LETTER ;
public :
LegalHebrew ( UErrorCode & error ) ;
virtual ~ LegalHebrew ( ) { }
virtual UBool is ( const UnicodeString & sourceString ) const ;
} ;
LegalHebrew : : LegalHebrew ( UErrorCode & error ) {
2008-06-17 00:55:35 +00:00
FINAL . applyPattern ( UNICODE_STRING_SIMPLE ( " [ \\ u05DA \\ u05DD \\ u05DF \\ u05E3 \\ u05E5] " ) , error ) ;
NON_FINAL . applyPattern ( UNICODE_STRING_SIMPLE ( " [ \\ u05DB \\ u05DE \\ u05E0 \\ u05E4 \\ u05E6] " ) , error ) ;
2004-05-19 04:17:37 +00:00
LETTER . applyPattern ( " [:letter:] " , error ) ;
}
UBool LegalHebrew : : is ( const UnicodeString & sourceString ) const {
if ( sourceString . length ( ) = = 0 ) return TRUE ;
// don't worry about surrogates.
for ( int i = 0 ; i < sourceString . length ( ) ; + + i ) {
UChar ch = sourceString . charAt ( i ) ;
UChar next = i + 1 = = sourceString . length ( ) ? 0x0000 : sourceString . charAt ( i ) ;
if ( FINAL . contains ( ch ) ) {
if ( LETTER . contains ( next ) ) return FALSE ;
} else if ( NON_FINAL . contains ( ch ) ) {
if ( ! LETTER . contains ( next ) ) return FALSE ;
}
}
return TRUE ;
}
void TransliteratorRoundTripTest : : TestHebrew ( ) {
2008-08-06 00:42:42 +00:00
// CLDR bug #1911: This test should be moved into CLDR.
// It is left in its current state as a regression test.
// if (isICUVersionAtLeast(ICU_39)) {
// // We temporarily filter against Unicode 4.1, but we only do this
// // before version 3.4.
// errln("FAIL: TestHebrew needs to be updated to remove delete the [:Age=4.0:] filter ");
// return;
// } else {
// logln("Warning: TestHebrew needs to be updated to remove delete the section marked [:Age=4.0:] filter");
// }
2005-06-09 16:38:11 +00:00
//long start = System.currentTimeMillis();
UErrorCode error = U_ZERO_ERROR ;
LegalHebrew * legal = new LegalHebrew ( error ) ;
if ( U_FAILURE ( error ) ) {
2009-06-12 19:34:21 +00:00
dataerrln ( " Could not construct LegalHebrew object. Error: %s " , u_errorName ( error ) ) ;
2005-06-09 16:38:11 +00:00
return ;
}
2004-05-19 04:17:37 +00:00
RTTest test ( " Latin-Hebrew " ) ;
2008-06-17 00:55:35 +00:00
test . test ( UNICODE_STRING_SIMPLE ( " [a-zA-Z \\ u02BC \\ u02BB] " ) , UNICODE_STRING_SIMPLE ( " [[[:hebrew:]-[ \\ u05BD \\ uFB00- \\ uFBFF]]&[:Age=4.0:]] " ) , " [ \\ u05F0 \\ u05F1 \\ u05F2] " , this , quick , legal ) ;
2005-03-19 00:34:12 +00:00
2004-05-19 04:17:37 +00:00
//showElapsed(start, "TestHebrew");
delete legal ;
}
2001-11-22 02:41:06 +00:00
void TransliteratorRoundTripTest : : TestCyrillic ( ) {
RTTest test ( " Latin-Cyrillic " ) ;
Legal * legal = new Legal ( ) ;
2003-04-23 00:23:26 +00:00
2003-05-05 23:22:34 +00:00
test . test ( UnicodeString ( " [a-zA-Z \\ u0110 \\ u0111 \\ u02BA \\ u02B9] " , " " ) ,
2003-04-23 00:23:26 +00:00
UnicodeString ( " [[ \\ u0400- \\ u045F] & [:Age=3.2:]] " , " " ) , NULL , this , quick ,
legal ) ;
2001-11-22 02:41:06 +00:00
delete legal ;
2001-11-13 09:09:14 +00:00
}
2001-11-22 02:41:06 +00:00
// Inter-Indic Tests ----------------------------------
2001-11-28 06:05:12 +00:00
class LegalIndic : public Legal {
UnicodeSet vowelSignSet ;
UnicodeSet avagraha ;
UnicodeSet nukta ;
UnicodeSet virama ;
UnicodeSet sanskritStressSigns ;
2001-12-01 04:33:03 +00:00
UnicodeSet chandrabindu ;
2001-11-28 06:05:12 +00:00
2005-03-10 03:46:19 +00:00
public :
LegalIndic ( ) ;
2001-11-28 06:05:12 +00:00
virtual UBool is ( const UnicodeString & sourceString ) const ;
virtual ~ LegalIndic ( ) { } ;
} ;
UBool LegalIndic : : is ( const UnicodeString & sourceString ) const {
int cp = sourceString . charAt ( 0 ) ;
// A vowel sign cannot be the first char
if ( vowelSignSet . contains ( cp ) ) {
return FALSE ;
} else if ( avagraha . contains ( cp ) ) {
return FALSE ;
} else if ( virama . contains ( cp ) ) {
return FALSE ;
} else if ( nukta . contains ( cp ) ) {
return FALSE ;
} else if ( sanskritStressSigns . contains ( cp ) ) {
return FALSE ;
2001-12-01 04:33:03 +00:00
} else if ( chandrabindu . contains ( cp ) & &
( ( sourceString . length ( ) > 1 ) & &
vowelSignSet . contains ( sourceString . charAt ( 1 ) ) ) ) {
return FALSE ;
2001-11-28 06:05:12 +00:00
}
return TRUE ;
}
2005-03-10 03:46:19 +00:00
LegalIndic : : LegalIndic ( ) {
UErrorCode status = U_ZERO_ERROR ;
vowelSignSet . addAll ( UnicodeSet ( " [ \\ u0902 \\ u0903 \\ u0904 \\ u093e- \\ u094c \\ u0962 \\ u0963] " , status ) ) ; /* Devanagari */
vowelSignSet . addAll ( UnicodeSet ( " [ \\ u0982 \\ u0983 \\ u09be- \\ u09cc \\ u09e2 \\ u09e3 \\ u09D7] " , status ) ) ; /* Bengali */
vowelSignSet . addAll ( UnicodeSet ( " [ \\ u0a02 \\ u0a03 \\ u0a3e- \\ u0a4c \\ u0a62 \\ u0a63 \\ u0a70 \\ u0a71] " , status ) ) ; /* Gurmukhi */
vowelSignSet . addAll ( UnicodeSet ( " [ \\ u0a82 \\ u0a83 \\ u0abe- \\ u0acc \\ u0ae2 \\ u0ae3] " , status ) ) ; /* Gujarati */
vowelSignSet . addAll ( UnicodeSet ( " [ \\ u0b02 \\ u0b03 \\ u0b3e- \\ u0b4c \\ u0b62 \\ u0b63 \\ u0b56 \\ u0b57] " , status ) ) ; /* Oriya */
vowelSignSet . addAll ( UnicodeSet ( " [ \\ u0b82 \\ u0b83 \\ u0bbe- \\ u0bcc \\ u0be2 \\ u0be3 \\ u0bd7] " , status ) ) ; /* Tamil */
vowelSignSet . addAll ( UnicodeSet ( " [ \\ u0c02 \\ u0c03 \\ u0c3e- \\ u0c4c \\ u0c62 \\ u0c63 \\ u0c55 \\ u0c56] " , status ) ) ; /* Telugu */
vowelSignSet . addAll ( UnicodeSet ( " [ \\ u0c82 \\ u0c83 \\ u0cbe- \\ u0ccc \\ u0ce2 \\ u0ce3 \\ u0cd5 \\ u0cd6] " , status ) ) ; /* Kannada */
vowelSignSet . addAll ( UnicodeSet ( " [ \\ u0d02 \\ u0d03 \\ u0d3e- \\ u0d4c \\ u0d62 \\ u0d63 \\ u0d57] " , status ) ) ; /* Malayalam */
avagraha . addAll ( UnicodeSet ( " [ \\ u093d \\ u09bd \\ u0abd \\ u0b3d \\ u0cbd] " , status ) ) ;
nukta . addAll ( UnicodeSet ( " [ \\ u093c \\ u09bc \\ u0a3c \\ u0abc \\ u0b3c \\ u0cbc] " , status ) ) ;
virama . addAll ( UnicodeSet ( " [ \\ u094d \\ u09cd \\ u0a4d \\ u0acd \\ u0b4d \\ u0bcd \\ u0c4d \\ u0ccd \\ u0d4d] " , status ) ) ;
sanskritStressSigns . addAll ( UnicodeSet ( " [ \\ u0951 \\ u0952 \\ u0953 \\ u0954 \\ u097d] " , status ) ) ;
chandrabindu . addAll ( UnicodeSet ( " [ \\ u0901 \\ u0981 \\ u0A81 \\ u0b01 \\ u0c01] " , status ) ) ;
}
2001-12-01 04:33:03 +00:00
2001-12-28 21:26:53 +00:00
static const char latinForIndic [ ] = " [['.0-9A-Za-z~ \\ u00C0- \\ u00C5 \\ u00C7- \\ u00CF \\ u00D1- \\ u00D6 \\ u00D9- \\ u00DD "
2001-12-01 04:33:03 +00:00
" \\ u00E0- \\ u00E5 \\ u00E7- \\ u00EF \\ u00F1- \\ u00F6 \\ u00F9- \\ u00FD \\ u00FF- \\ u010F "
" \\ u0112- \\ u0125 \\ u0128- \\ u0130 \\ u0134- \\ u0137 \\ u0139- \\ u013E \\ u0143- \\ u0148 "
" \\ u014C- \\ u0151 \\ u0154- \\ u0165 \\ u0168- \\ u017E \\ u01A0- \\ u01A1 \\ u01AF- \\ u01B0 "
" \\ u01CD- \\ u01DC \\ u01DE- \\ u01E3 \\ u01E6- \\ u01ED \\ u01F0 \\ u01F4- \\ u01F5 \\ u01F8- \\ u01FB "
2005-02-04 00:50:01 +00:00
" \\ u0200- \\ u021B \\ u021E- \\ u021F \\ u0226- \\ u0233 \\ u0294 \\ u0303- \\ u0304 \\ u0306 \\ u0314- \\ u0315 "
2001-12-01 04:33:03 +00:00
" \\ u0325 \\ u040E \\ u0419 \\ u0439 \\ u045E \\ u04C1- \\ u04C2 \\ u04D0- \\ u04D1 \\ u04D6- \\ u04D7 "
" \\ u04E2- \\ u04E3 \\ u04EE- \\ u04EF \\ u1E00- \\ u1E99 \\ u1EA0- \\ u1EF9 \\ u1F01 \\ u1F03 \\ u1F05 "
" \\ u1F07 \\ u1F09 \\ u1F0B \\ u1F0D \\ u1F0F \\ u1F11 \\ u1F13 \\ u1F15 \\ u1F19 \\ u1F1B \\ u1F1D \\ u1F21 "
" \\ u1F23 \\ u1F25 \\ u1F27 \\ u1F29 \\ u1F2B \\ u1F2D \\ u1F2F \\ u1F31 \\ u1F33 \\ u1F35 \\ u1F37 \\ u1F39 "
" \\ u1F3B \\ u1F3D \\ u1F3F \\ u1F41 \\ u1F43 \\ u1F45 \\ u1F49 \\ u1F4B \\ u1F4D \\ u1F51 \\ u1F53 \\ u1F55 "
" \\ u1F57 \\ u1F59 \\ u1F5B \\ u1F5D \\ u1F5F \\ u1F61 \\ u1F63 \\ u1F65 \\ u1F67 \\ u1F69 \\ u1F6B \\ u1F6D "
" \\ u1F6F \\ u1F81 \\ u1F83 \\ u1F85 \\ u1F87 \\ u1F89 \\ u1F8B \\ u1F8D \\ u1F8F \\ u1F91 \\ u1F93 \\ u1F95 "
" \\ u1F97 \\ u1F99 \\ u1F9B \\ u1F9D \\ u1F9F \\ u1FA1 \\ u1FA3 \\ u1FA5 \\ u1FA7 \\ u1FA9 \\ u1FAB \\ u1FAD "
" \\ u1FAF- \\ u1FB1 \\ u1FB8- \\ u1FB9 \\ u1FD0- \\ u1FD1 \\ u1FD8- \\ u1FD9 \\ u1FE0- \\ u1FE1 \\ u1FE5 "
" \\ u1FE8- \\ u1FE9 \\ u1FEC \\ u212A- \\ u212B \\ uE04D \\ uE064] "
" -[ \\ uE000- \\ uE080 \\ u01E2 \\ u01E3]& [[:latin:][:mark:]]] " ;
2001-11-22 02:41:06 +00:00
void TransliteratorRoundTripTest : : TestDevanagariLatin ( ) {
2001-11-28 06:05:12 +00:00
{
UErrorCode status = U_ZERO_ERROR ;
2003-10-17 00:34:33 +00:00
UParseError parseError ;
TransliteratorPointer t1 ( Transliterator : : createInstance ( " [ \\ u0964- \\ u0965 \\ u0981- \\ u0983 \\ u0985- \\ u098C \\ u098F- \\ u0990 \\ u0993- \\ u09A8 \\ u09AA- \\ u09B0 \\ u09B2 \\ u09B6- \\ u09B9 \\ u09BC \\ u09BE- \\ u09C4 \\ u09C7- \\ u09C8 \\ u09CB- \\ u09CD \\ u09D7 \\ u09DC- \\ u09DD \\ u09DF- \\ u09E3 \\ u09E6- \\ u09FA];NFD;Bengali-InterIndic;InterIndic-Gujarati;NFC; " , UTRANS_FORWARD , parseError , status ) ) ;
2004-06-12 06:16:57 +00:00
if ( ( Transliterator * ) t1 ! = NULL ) {
2002-12-12 19:54:39 +00:00
TransliteratorPointer t2 ( t1 - > createInverse ( status ) ) ;
2001-11-28 06:05:12 +00:00
if ( U_FAILURE ( status ) ) {
2001-12-04 05:25:39 +00:00
errln ( " FAIL: could not create the Inverse:-( \n " ) ;
2001-11-28 06:05:12 +00:00
}
2003-10-17 00:34:33 +00:00
} else {
2009-06-12 19:34:21 +00:00
dataerrln ( " FAIL: could not create the transliterator. Error: %s \n " , u_errorName ( status ) ) ;
2001-11-28 06:05:12 +00:00
}
2003-10-17 00:34:33 +00:00
2001-11-28 06:05:12 +00:00
}
RTTest test ( " Latin-Devanagari " ) ;
Legal * legal = new LegalIndic ( ) ;
2008-08-06 00:42:42 +00:00
// CLDR bug #1911: This test should be moved into CLDR.
// It is left in its current state as a regression test.
// if (isICUVersionAtLeast(ICU_39)) {
// // We temporarily filter against Unicode 4.1, but we only do this
// // before version 3.4.
// errln("FAIL: TestDevanagariLatin needs to be updated to remove delete the [:Age=4.1:] filter ");
// return;
// } else {
// logln("Warning: TestDevanagariLatin needs to be updated to remove delete the section marked [:Age=4.1:] filter");
// }
2004-05-19 04:17:37 +00:00
test . test ( UnicodeString ( latinForIndic , " " ) ,
2011-12-09 19:25:02 +00:00
UnicodeString ( " [[[:Devanagari:][ \\ u094d][ \\ u0964 \\ u0965]]&[:Age=4.1:]-[ \\ u0970]] " , " " ) , " [ \\ u0965 \\ u0904] " , this , quick ,
2004-05-19 04:17:37 +00:00
legal , 50 ) ;
2003-09-10 01:28:29 +00:00
2001-11-22 02:41:06 +00:00
delete legal ;
2001-11-13 09:09:14 +00:00
}
2001-12-03 18:06:27 +00:00
/* Defined this way for HP/UX11CC :-( */
static const int32_t INTER_INDIC_ARRAY_WIDTH = 4 ;
2001-12-28 21:26:53 +00:00
static const char * const interIndicArray [ ] = {
2005-02-04 00:50:01 +00:00
2011-12-09 19:25:02 +00:00
" BENGALI-DEVANAGARI " , " [:BENGALI:] " , " [[:Devanagari:]-[ \\ u0970]] " ,
" [ \\ u0904 \\ u0951- \\ u0954 \\ u0943- \\ u0949 \\ u094a \\ u0962 \\ u0963 \\ u090D \\ u090e \\ u0911 \\ u0912 \\ u0929 \\ u0933 \\ u0934 \\ u0935 \\ u093d \\ u0950 \\ u0958 \\ u0959 \\ u095a \\ u095b \\ u095e \\ u097d] " , /*roundtrip exclusions*/
2001-08-31 18:02:09 +00:00
2011-12-09 19:25:02 +00:00
" DEVANAGARI-BENGALI " , " [[:Devanagari:]-[ \\ u0970]] " , " [:BENGALI:] " ,
" [ \\ u0951- \\ u0954 \\ u0951- \\ u0954 \\ u09D7 \\ u090D \\ u090e \\ u0911 \\ u0912 \\ u0929 \\ u0933 \\ u0934 \\ u0935 \\ u093d \\ u0950 \\ u0958 \\ u0959 \\ u095a \\ u095b \\ u095e \\ u09f0 \\ u09f1 \\ u09f2- \\ u09fa \\ u09ce] " , /*roundtrip exclusions*/
2001-11-26 23:09:33 +00:00
2011-12-09 19:25:02 +00:00
" GURMUKHI-DEVANAGARI " , " [:GURMUKHI:] " , " [[:Devanagari:]-[ \\ u0970]] " ,
" [ \\ u0904 \\ u0901 \\ u0902 \\ u0936 \\ u0933 \\ u0951- \\ u0954 \\ u0902 \\ u0903 \\ u0943- \\ u0949 \\ u094a \\ u0962 \\ u0963 \\ u090B \\ u090C \\ u090D \\ u090e \\ u0911 \\ u0912 \\ u0934 \\ u0937 \\ u093D \\ u0950 \\ u0960 \\ u0961 \\ u097d] " , /*roundtrip exclusions*/
2001-11-26 23:09:33 +00:00
2011-12-09 19:25:02 +00:00
" DEVANAGARI-GURMUKHI " , " [[:Devanagari:]-[ \\ u0970]] " , " [:GURMUKHI:] " ,
" [ \\ u0904 \\ u0A02 \\ u0946 \\ u0A5C \\ u0951- \\ u0954 \\ u0A70 \\ u0A71 \\ u090B \\ u090C \\ u090D \\ u090e \\ u0911 \\ u0912 \\ u0934 \\ u0937 \\ u093D \\ u0950 \\ u0960 \\ u0961 \\ u0a72 \\ u0a73 \\ u0a74] " , /*roundtrip exclusions*/
2001-11-26 23:09:33 +00:00
2011-12-09 19:25:02 +00:00
" GUJARATI-DEVANAGARI " , " [:GUJARATI:] " , " [[:Devanagari:]-[ \\ u0970]] " ,
" [ \\ u0946 \\ u094A \\ u0962 \\ u0963 \\ u0951- \\ u0954 \\ u0961 \\ u090c \\ u090e \\ u0912 \\ u097d] " , /*roundtrip exclusions*/
2001-11-26 23:09:33 +00:00
2011-12-09 19:25:02 +00:00
" DEVANAGARI-GUJARATI " , " [[:Devanagari:]-[ \\ u0970]] " , " [:GUJARATI:] " ,
" [ \\ u0951- \\ u0954 \\ u0961 \\ u090c \\ u090e \\ u0912] " , /*roundtrip exclusions*/
2001-11-26 23:09:33 +00:00
2011-12-09 19:25:02 +00:00
" ORIYA-DEVANAGARI " , " [:ORIYA:] " , " [[:Devanagari:]-[ \\ u0970]] " ,
" [ \\ u0904 \\ u0943- \\ u094a \\ u0962 \\ u0963 \\ u0951- \\ u0954 \\ u0950 \\ u090D \\ u090e \\ u0912 \\ u0911 \\ u0931 \\ u0935 \\ u097d] " , /*roundtrip exclusions*/
2001-11-26 23:09:33 +00:00
2011-12-09 19:25:02 +00:00
" DEVANAGARI-ORIYA " , " [[:Devanagari:]-[ \\ u0970]] " , " [:ORIYA:] " ,
" [ \\ u0b5f \\ u0b56 \\ u0b57 \\ u0b70 \\ u0b71 \\ u0950 \\ u090D \\ u090e \\ u0912 \\ u0911 \\ u0931] " , /*roundtrip exclusions*/
2001-11-26 23:09:33 +00:00
2011-12-09 19:25:02 +00:00
" Tamil-DEVANAGARI " , " [:tamil:] " , " [[:Devanagari:]-[ \\ u0970]] " ,
" [ \\ u0901 \\ u0904 \\ u093c \\ u0943- \\ u094a \\ u0951- \\ u0954 \\ u0962 \\ u0963 \\ u090B \\ u090C \\ u090D \\ u0911 \\ u0916 \\ u0917 \\ u0918 \\ u091B \\ u091D \\ u0920 \\ u0921 \\ u0922 \\ u0925 \\ u0926 \\ u0927 \\ u092B \\ u092C \\ u092D \\ u0936 \\ u093d \\ u0950[ \\ u0958- \\ u0961] \\ u097d] " , /*roundtrip exclusions*/
2001-11-26 23:09:33 +00:00
2011-12-09 19:25:02 +00:00
" DEVANAGARI-Tamil " , " [[:Devanagari:]-[ \\ u0970]] " , " [:tamil:] " ,
" [ \\ u0bd7 \\ u0BF0 \\ u0BF1 \\ u0BF2] " , /*roundtrip exclusions*/
2001-11-26 23:09:33 +00:00
2011-12-09 19:25:02 +00:00
" Telugu-DEVANAGARI " , " [:telugu:] " , " [[:Devanagari:]-[ \\ u0970]] " ,
" [ \\ u0904 \\ u093c \\ u0950 \\ u0945 \\ u0949 \\ u0951- \\ u0954 \\ u0962 \\ u0963 \\ u090D \\ u0911 \\ u093d \\ u0929 \\ u0934[ \\ u0958- \\ u095f] \\ u097d] " , /*roundtrip exclusions*/
2001-11-26 23:09:33 +00:00
2011-12-09 19:25:02 +00:00
" DEVANAGARI-TELUGU " , " [[:Devanagari:]-[ \\ u0970]] " , " [:TELUGU:] " ,
" [ \\ u0c55 \\ u0c56 \\ u0950 \\ u090D \\ u0911 \\ u093d \\ u0929 \\ u0934[ \\ u0958- \\ u095f]] " , /*roundtrip exclusions*/
2001-11-26 23:09:33 +00:00
2011-12-09 19:25:02 +00:00
" KANNADA-DEVANAGARI " , " [:KANNADA:] " , " [[:Devanagari:]-[ \\ u0970]] " ,
" [ \\ u0901 \\ u0904 \\ u0946 \\ u093c \\ u0950 \\ u0945 \\ u0949 \\ u0951- \\ u0954 \\ u0962 \\ u0963 \\ u0950 \\ u090D \\ u0911 \\ u093d \\ u0929 \\ u0934[ \\ u0958- \\ u095f] \\ u097d] " , /*roundtrip exclusions*/
2001-11-26 23:09:33 +00:00
2011-12-09 19:25:02 +00:00
" DEVANAGARI-KANNADA " , " [[:Devanagari:]-[ \\ u0970]] " , " [:KANNADA:] " ,
" [{ \\ u0cb0 \\ u0cbc}{ \\ u0cb3 \\ u0cbc} \\ u0cde \\ u0cd5 \\ u0cd6 \\ u0950 \\ u090D \\ u0911 \\ u093d \\ u0929 \\ u0934[ \\ u0958- \\ u095f]] " , /*roundtrip exclusions*/
2001-11-26 23:09:33 +00:00
2011-12-09 19:25:02 +00:00
" MALAYALAM-DEVANAGARI " , " [:MALAYALAM:] " , " [[:Devanagari:]-[ \\ u0970]] " ,
" [ \\ u0901 \\ u0904 \\ u094a \\ u094b \\ u094c \\ u093c \\ u0950 \\ u0944 \\ u0945 \\ u0949 \\ u0951- \\ u0954 \\ u0962 \\ u0963 \\ u090D \\ u0911 \\ u093d \\ u0929 \\ u0934[ \\ u0958- \\ u095f] \\ u097d] " , /*roundtrip exclusions*/
2001-11-26 23:09:33 +00:00
2011-12-09 19:25:02 +00:00
" DEVANAGARI-MALAYALAM " , " [[:Devanagari:]-[ \\ u0970]] " , " [:MALAYALAM:] " ,
" [ \\ u0d4c \\ u0d57 \\ u0950 \\ u090D \\ u0911 \\ u093d \\ u0929 \\ u0934[ \\ u0958- \\ u095f]] " , /*roundtrip exclusions*/
2001-11-26 23:09:33 +00:00
2001-12-03 18:06:27 +00:00
" GURMUKHI-BENGALI " , " [:GURMUKHI:] " , " [:BENGALI:] " ,
2005-02-04 00:50:01 +00:00
" [ \\ u0981 \\ u0982 \\ u09b6 \\ u09e2 \\ u09e3 \\ u09c3 \\ u09c4 \\ u09d7 \\ u098B \\ u098C \\ u09B7 \\ u09E0 \\ u09E1 \\ u09F0 \\ u09F1 \\ u09f2- \\ u09fa \\ u09ce] " , /*roundtrip exclusions*/
2001-11-26 23:09:33 +00:00
2001-12-03 18:06:27 +00:00
" BENGALI-GURMUKHI " , " [:BENGALI:] " , " [:GURMUKHI:] " ,
2002-07-29 22:56:05 +00:00
" [ \\ u0A02 \\ u0a5c \\ u0a47 \\ u0a70 \\ u0a71 \\ u0A33 \\ u0A35 \\ u0A59 \\ u0A5A \\ u0A5B \\ u0A5E \\ u0A72 \\ u0A73 \\ u0A74] " , /*roundtrip exclusions*/
2001-11-26 23:09:33 +00:00
2001-12-03 18:06:27 +00:00
" GUJARATI-BENGALI " , " [:GUJARATI:] " , " [:BENGALI:] " ,
2005-02-04 00:50:01 +00:00
" [ \\ u09d7 \\ u09e2 \\ u09e3 \\ u098c \\ u09e1 \\ u09f0 \\ u09f1 \\ u09f2- \\ u09fa \\ u09ce] " , /*roundtrip exclusions*/
2001-11-26 23:09:33 +00:00
2001-12-03 18:06:27 +00:00
" BENGALI-GUJARATI " , " [:BENGALI:] " , " [:GUJARATI:] " ,
" [ \\ u0A82 \\ u0a83 \\ u0Ac9 \\ u0Ac5 \\ u0ac7 \\ u0A8D \\ u0A91 \\ u0AB3 \\ u0AB5 \\ u0ABD \\ u0AD0] " , /*roundtrip exclusions*/
2001-11-26 23:09:33 +00:00
2001-12-03 18:06:27 +00:00
" ORIYA-BENGALI " , " [:ORIYA:] " , " [:BENGALI:] " ,
2005-02-04 00:50:01 +00:00
" [ \\ u09c4 \\ u09e2 \\ u09e3 \\ u09f0 \\ u09f1 \\ u09f2- \\ u09fa \\ u09ce] " , /*roundtrip exclusions*/
2001-11-26 23:09:33 +00:00
2001-12-03 18:06:27 +00:00
" BENGALI-ORIYA " , " [:BENGALI:] " , " [:ORIYA:] " ,
2004-05-19 04:17:37 +00:00
" [ \\ u0b35 \\ u0b71 \\ u0b5f \\ u0b56 \\ u0b33 \\ u0b3d] " , /*roundtrip exclusions*/
2001-11-26 23:09:33 +00:00
2001-12-03 18:06:27 +00:00
" Tamil-BENGALI " , " [:tamil:] " , " [:BENGALI:] " ,
2005-02-04 00:50:01 +00:00
" [ \\ u0981 \\ u09bc \\ u09c3 \\ u09c4 \\ u09e2 \\ u09e3 \\ u09f0 \\ u09f1 \\ u098B \\ u098C \\ u0996 \\ u0997 \\ u0998 \\ u099B \\ u099D \\ u09A0 \\ u09A1 \\ u09A2 \\ u09A5 \\ u09A6 \\ u09A7 \\ u09AB \\ u09AC \\ u09AD \\ u09B6 \\ u09DC \\ u09DD \\ u09DF \\ u09E0 \\ u09E1 \\ u09f2- \\ u09fa \\ u09ce] " , /*roundtrip exclusions*/
2001-11-26 23:09:33 +00:00
2001-12-03 18:06:27 +00:00
" BENGALI-Tamil " , " [:BENGALI:] " , " [:tamil:] " ,
2007-07-31 07:10:14 +00:00
" [ \\ u0bc6 \\ u0bc7 \\ u0bca \\ u0B8E \\ u0B92 \\ u0BA9 \\ u0BB1 \\ u0BB3 \\ u0BB4 \\ u0BB5 \\ u0BF0 \\ u0BF1 \\ u0BF2] " , /*roundtrip exclusions*/
2001-11-26 23:09:33 +00:00
2001-12-03 18:06:27 +00:00
" Telugu-BENGALI " , " [:telugu:] " , " [:BENGALI:] " ,
2005-02-04 00:50:01 +00:00
" [ \\ u09e2 \\ u09e3 \\ u09bc \\ u09d7 \\ u09f0 \\ u09f1 \\ u09dc \\ u09dd \\ u09df \\ u09f2- \\ u09fa \\ u09ce] " , /*roundtrip exclusions*/
2001-11-26 23:09:33 +00:00
2001-12-03 18:06:27 +00:00
" BENGALI-TELUGU " , " [:BENGALI:] " , " [:TELUGU:] " ,
" [ \\ u0c55 \\ u0c56 \\ u0c47 \\ u0c46 \\ u0c4a \\ u0C0E \\ u0C12 \\ u0C31 \\ u0C33 \\ u0C35] " , /*roundtrip exclusions*/
2001-11-26 23:09:33 +00:00
2001-12-03 18:06:27 +00:00
" KANNADA-BENGALI " , " [:KANNADA:] " , " [:BENGALI:] " ,
2005-02-04 00:50:01 +00:00
" [ \\ u0981 \\ u09e2 \\ u09e3 \\ u09bc \\ u09d7 \\ u09dc \\ u09dd \\ u09df \\ u09f0 \\ u09f1 \\ u09f2- \\ u09fa \\ u09ce] " , /*roundtrip exclusions*/
2001-11-26 23:09:33 +00:00
2001-12-03 18:06:27 +00:00
" BENGALI-KANNADA " , " [:BENGALI:] " , " [:KANNADA:] " ,
2004-05-19 04:17:37 +00:00
" [{ \\ u0cb0 \\ u0cbc}{ \\ u0cb3 \\ u0cbc} \\ u0cc6 \\ u0cca \\ u0cd5 \\ u0cd6 \\ u0cc7 \\ u0C8E \\ u0C92 \\ u0CB1 \\ u0cb3 \\ u0cb5 \\ u0cde] " , /*roundtrip exclusions*/
2001-11-26 23:09:33 +00:00
2001-12-03 18:06:27 +00:00
" MALAYALAM-BENGALI " , " [:MALAYALAM:] " , " [:BENGALI:] " ,
2005-02-04 00:50:01 +00:00
" [ \\ u0981 \\ u09e2 \\ u09e3 \\ u09bc \\ u09c4 \\ u09f0 \\ u09f1 \\ u09dc \\ u09dd \\ u09df \\ u09dc \\ u09dd \\ u09df \\ u09f2- \\ u09fa \\ u09ce] " , /*roundtrip exclusions*/
2001-11-26 23:09:33 +00:00
2001-12-03 18:06:27 +00:00
" BENGALI-MALAYALAM " , " [:BENGALI:] " , " [:MALAYALAM:] " ,
" [ \\ u0d46 \\ u0d4a \\ u0d47 \\ u0d31- \\ u0d35 \\ u0d0e \\ u0d12] " , /*roundtrip exclusions*/
2007-07-31 07:10:14 +00:00
2001-12-03 18:06:27 +00:00
" GUJARATI-GURMUKHI " , " [:GUJARATI:] " , " [:GURMUKHI:] " ,
2002-07-29 22:56:05 +00:00
" [ \\ u0A02 \\ u0ab3 \\ u0ab6 \\ u0A70 \\ u0a71 \\ u0a82 \\ u0a83 \\ u0ac3 \\ u0ac4 \\ u0ac5 \\ u0ac9 \\ u0a5c \\ u0a72 \\ u0a73 \\ u0a74 \\ u0a8b \\ u0a8d \\ u0a91 \\ u0abd] " , /*roundtrip exclusions*/
2001-10-30 18:29:45 +00:00
2001-12-03 18:06:27 +00:00
" GURMUKHI-GUJARATI " , " [:GURMUKHI:] " , " [:GUJARATI:] " ,
2004-05-19 04:17:37 +00:00
" [ \\ u0a5c \\ u0A70 \\ u0a71 \\ u0a72 \\ u0a73 \\ u0a74 \\ u0a82 \\ u0a83 \\ u0a8b \\ u0a8c \\ u0a8d \\ u0a91 \\ u0ab3 \\ u0ab6 \\ u0ab7 \\ u0abd \\ u0ac3 \\ u0ac4 \\ u0ac5 \\ u0ac9 \\ u0ad0 \\ u0ae0 \\ u0ae1] " , /*roundtrip exclusions*/
2001-08-31 18:02:09 +00:00
2001-12-03 18:06:27 +00:00
" ORIYA-GURMUKHI " , " [:ORIYA:] " , " [:GURMUKHI:] " ,
2002-07-29 22:56:05 +00:00
" [ \\ u0A01 \\ u0A02 \\ u0a5c \\ u0a21 \\ u0a47 \\ u0a71 \\ u0b02 \\ u0b03 \\ u0b33 \\ u0b36 \\ u0b43 \\ u0b56 \\ u0b57 \\ u0B0B \\ u0B0C \\ u0B37 \\ u0B3D \\ u0B5F \\ u0B60 \\ u0B61 \\ u0a35 \\ u0a72 \\ u0a73 \\ u0a74] " , /*roundtrip exclusions*/
2001-08-31 18:02:09 +00:00
2001-12-03 18:06:27 +00:00
" GURMUKHI-ORIYA " , " [:GURMUKHI:] " , " [:ORIYA:] " ,
2004-05-19 04:17:37 +00:00
" [ \\ u0b01 \\ u0b02 \\ u0b03 \\ u0b33 \\ u0b36 \\ u0b43 \\ u0b56 \\ u0b57 \\ u0B0B \\ u0B0C \\ u0B37 \\ u0B3D \\ u0B5F \\ u0B60 \\ u0B61 \\ u0b70 \\ u0b71] " , /*roundtrip exclusions*/
2001-08-31 18:02:09 +00:00
2001-12-03 18:06:27 +00:00
" TAMIL-GURMUKHI " , " [:TAMIL:] " , " [:GURMUKHI:] " ,
2004-05-19 04:17:37 +00:00
" [ \\ u0A01 \\ u0A02 \\ u0a33 \\ u0a36 \\ u0a3c \\ u0a70 \\ u0a71 \\ u0a47 \\ u0A16 \\ u0A17 \\ u0A18 \\ u0A1B \\ u0A1D \\ u0A20 \\ u0A21 \\ u0A22 \\ u0A25 \\ u0A26 \\ u0A27 \\ u0A2B \\ u0A2C \\ u0A2D \\ u0A59 \\ u0A5A \\ u0A5B \\ u0A5C \\ u0A5E \\ u0A72 \\ u0A73 \\ u0A74] " , /*roundtrip exclusions*/
2001-10-30 18:29:45 +00:00
2001-12-03 18:06:27 +00:00
" GURMUKHI-TAMIL " , " [:GURMUKHI:] " , " [:TAMIL:] " ,
2007-07-31 07:10:14 +00:00
" [ \\ u0b82 \\ u0bc6 \\ u0bca \\ u0bd7 \\ u0bb7 \\ u0bb3 \\ u0b83 \\ u0B8E \\ u0B92 \\ u0BA9 \\ u0BB1 \\ u0BB4 \\ u0bb6 \\ u0BF0 \\ u0BF1 \\ u0BF2] " , /*roundtrip exclusions*/
2001-10-30 18:29:45 +00:00
2001-12-03 18:06:27 +00:00
" TELUGU-GURMUKHI " , " [:TELUGU:] " , " [:GURMUKHI:] " ,
2002-07-29 22:56:05 +00:00
" [ \\ u0A02 \\ u0a33 \\ u0a36 \\ u0a3c \\ u0a70 \\ u0a71 \\ u0A59 \\ u0A5A \\ u0A5B \\ u0A5C \\ u0A5E \\ u0A72 \\ u0A73 \\ u0A74] " , /*roundtrip exclusions*/
2001-08-31 18:02:09 +00:00
2001-12-03 18:06:27 +00:00
" GURMUKHI-TELUGU " , " [:GURMUKHI:] " , " [:TELUGU:] " ,
2002-07-29 22:56:05 +00:00
" [ \\ u0c01 \\ u0c02 \\ u0c03 \\ u0c33 \\ u0c36 \\ u0c44 \\ u0c43 \\ u0c46 \\ u0c4a \\ u0c56 \\ u0c55 \\ u0C0B \\ u0C0C \\ u0C0E \\ u0C12 \\ u0C31 \\ u0C37 \\ u0C60 \\ u0C61] " , /*roundtrip exclusions*/
2001-08-31 18:02:09 +00:00
2001-12-03 18:06:27 +00:00
" KANNADA-GURMUKHI " , " [:KANNADA:] " , " [:GURMUKHI:] " ,
2004-05-19 04:17:37 +00:00
" [ \\ u0A01 \\ u0A02 \\ u0a33 \\ u0a36 \\ u0a3c \\ u0a70 \\ u0a71 \\ u0A59 \\ u0A5A \\ u0A5B \\ u0A5C \\ u0A5E \\ u0A72 \\ u0A73 \\ u0A74] " , /*roundtrip exclusions*/
2001-11-26 23:09:33 +00:00
2001-12-03 18:06:27 +00:00
" GURMUKHI-KANNADA " , " [:GURMUKHI:] " , " [:KANNADA:] " ,
2004-05-19 04:17:37 +00:00
" [{ \\ u0cb0 \\ u0cbc}{ \\ u0cb3 \\ u0cbc} \\ u0c82 \\ u0c83 \\ u0cb3 \\ u0cb6 \\ u0cc4 \\ u0cc3 \\ u0cc6 \\ u0cca \\ u0cd5 \\ u0cd6 \\ u0C8B \\ u0C8C \\ u0C8E \\ u0C92 \\ u0CB1 \\ u0CB7 \\ u0cbd \\ u0CE0 \\ u0CE1 \\ u0cde] " , /*roundtrip exclusions*/
2001-11-26 23:09:33 +00:00
2001-12-03 18:06:27 +00:00
" MALAYALAM-GURMUKHI " , " [:MALAYALAM:] " , " [:GURMUKHI:] " ,
2004-05-19 04:17:37 +00:00
" [ \\ u0A01 \\ u0A02 \\ u0a4b \\ u0a4c \\ u0a33 \\ u0a36 \\ u0a3c \\ u0a70 \\ u0a71 \\ u0A59 \\ u0A5A \\ u0A5B \\ u0A5C \\ u0A5E \\ u0A72 \\ u0A73 \\ u0A74] " , /*roundtrip exclusions*/
2001-11-26 23:09:33 +00:00
2001-12-03 18:06:27 +00:00
" GURMUKHI-MALAYALAM " , " [:GURMUKHI:] " , " [:MALAYALAM:] " ,
2002-07-29 22:56:05 +00:00
" [ \\ u0d02 \\ u0d03 \\ u0d33 \\ u0d36 \\ u0d43 \\ u0d46 \\ u0d4a \\ u0d4c \\ u0d57 \\ u0D0B \\ u0D0C \\ u0D0E \\ u0D12 \\ u0D31 \\ u0D34 \\ u0D37 \\ u0D60 \\ u0D61] " , /*roundtrip exclusions*/
2001-11-26 23:09:33 +00:00
2001-12-03 18:06:27 +00:00
" GUJARATI-ORIYA " , " [:GUJARATI:] " , " [:ORIYA:] " ,
2004-05-19 04:17:37 +00:00
" [ \\ u0b56 \\ u0b57 \\ u0B0C \\ u0B5F \\ u0B61 \\ u0b70 \\ u0b71] " , /*roundtrip exclusions*/
2001-11-26 23:09:33 +00:00
2001-12-03 18:06:27 +00:00
" ORIYA-GUJARATI " , " [:ORIYA:] " , " [:GUJARATI:] " ,
" [ \\ u0Ac4 \\ u0Ac5 \\ u0Ac9 \\ u0Ac7 \\ u0A8D \\ u0A91 \\ u0AB5 \\ u0Ad0] " , /*roundtrip exclusions*/
2001-11-26 23:09:33 +00:00
2001-12-03 18:06:27 +00:00
" TAMIL-GUJARATI " , " [:TAMIL:] " , " [:GUJARATI:] " ,
2004-05-19 04:17:37 +00:00
" [ \\ u0A81 \\ u0a8c \\ u0abc \\ u0ac3 \\ u0Ac4 \\ u0Ac5 \\ u0Ac9 \\ u0Ac7 \\ u0A8B \\ u0A8D \\ u0A91 \\ u0A96 \\ u0A97 \\ u0A98 \\ u0A9B \\ u0A9D \\ u0AA0 \\ u0AA1 \\ u0AA2 \\ u0AA5 \\ u0AA6 \\ u0AA7 \\ u0AAB \\ u0AAC \\ u0AAD \\ u0AB6 \\ u0ABD \\ u0AD0 \\ u0AE0 \\ u0AE1] " , /*roundtrip exclusions*/
2001-11-26 23:09:33 +00:00
2001-12-03 18:06:27 +00:00
" GUJARATI-TAMIL " , " [:GUJARATI:] " , " [:TAMIL:] " ,
2007-07-31 07:10:14 +00:00
" [ \\ u0Bc6 \\ u0Bca \\ u0Bd7 \\ u0B8E \\ u0B92 \\ u0BA9 \\ u0BB1 \\ u0BB4 \\ u0BF0 \\ u0BF1 \\ u0BF2] " , /*roundtrip exclusions*/
2001-11-26 23:09:33 +00:00
2001-12-03 18:06:27 +00:00
" TELUGU-GUJARATI " , " [:TELUGU:] " , " [:GUJARATI:] " ,
" [ \\ u0abc \\ u0Ac5 \\ u0Ac9 \\ u0A8D \\ u0A91 \\ u0ABD \\ u0Ad0] " , /*roundtrip exclusions*/
2001-11-26 23:09:33 +00:00
2001-12-03 18:06:27 +00:00
" GUJARATI-TELUGU " , " [:GUJARATI:] " , " [:TELUGU:] " ,
" [ \\ u0c46 \\ u0c4a \\ u0c55 \\ u0c56 \\ u0C0C \\ u0C0E \\ u0C12 \\ u0C31 \\ u0C61] " , /*roundtrip exclusions*/
2001-11-26 23:09:33 +00:00
2001-12-03 18:06:27 +00:00
" KANNADA-GUJARATI " , " [:KANNADA:] " , " [:GUJARATI:] " ,
2002-05-31 18:05:12 +00:00
" [ \\ u0A81 \\ u0abc \\ u0Ac5 \\ u0Ac9 \\ u0A8D \\ u0A91 \\ u0ABD \\ u0Ad0] " , /*roundtrip exclusions*/
2001-11-26 23:09:33 +00:00
2001-12-03 18:06:27 +00:00
" GUJARATI-KANNADA " , " [:GUJARATI:] " , " [:KANNADA:] " ,
2004-05-19 04:17:37 +00:00
" [{ \\ u0cb0 \\ u0cbc}{ \\ u0cb3 \\ u0cbc} \\ u0cc6 \\ u0cca \\ u0cd5 \\ u0cd6 \\ u0C8C \\ u0C8E \\ u0C92 \\ u0CB1 \\ u0CDE \\ u0CE1] " , /*roundtrip exclusions*/
2001-11-26 23:09:33 +00:00
2001-12-03 18:06:27 +00:00
" MALAYALAM-GUJARATI " , " [:MALAYALAM:] " , " [:GUJARATI:] " ,
2002-05-31 18:05:12 +00:00
" [ \\ u0A81 \\ u0ac4 \\ u0acb \\ u0acc \\ u0abc \\ u0Ac5 \\ u0Ac9 \\ u0A8D \\ u0A91 \\ u0ABD \\ u0Ad0] " , /*roundtrip exclusions*/
2001-11-26 23:09:33 +00:00
2001-12-03 18:06:27 +00:00
" GUJARATI-MALAYALAM " , " [:GUJARATI:] " , " [:MALAYALAM:] " ,
" [ \\ u0d46 \\ u0d4a \\ u0d4c \\ u0d55 \\ u0d57 \\ u0D0C \\ u0D0E \\ u0D12 \\ u0D31 \\ u0D34 \\ u0D61] " , /*roundtrip exclusions*/
2001-11-26 23:09:33 +00:00
2001-12-03 18:06:27 +00:00
" TAMIL-ORIYA " , " [:TAMIL:] " , " [:ORIYA:] " ,
2004-05-19 04:17:37 +00:00
" [ \\ u0B01 \\ u0b3c \\ u0b43 \\ u0b56 \\ u0B0B \\ u0B0C \\ u0B16 \\ u0B17 \\ u0B18 \\ u0B1B \\ u0B1D \\ u0B20 \\ u0B21 \\ u0B22 \\ u0B25 \\ u0B26 \\ u0B27 \\ u0B2B \\ u0B2C \\ u0B2D \\ u0B36 \\ u0B3D \\ u0B5C \\ u0B5D \\ u0B5F \\ u0B60 \\ u0B61 \\ u0b70 \\ u0b71] " , /*roundtrip exclusions*/
2001-11-26 23:09:33 +00:00
2001-12-03 18:06:27 +00:00
" ORIYA-TAMIL " , " [:ORIYA:] " , " [:TAMIL:] " ,
2007-07-31 07:10:14 +00:00
" [ \\ u0bc6 \\ u0bca \\ u0bc7 \\ u0B8E \\ u0B92 \\ u0BA9 \\ u0BB1 \\ u0BB4 \\ u0BB5 \\ u0BF0 \\ u0BF1 \\ u0BF2] " , /*roundtrip exclusions*/
2001-11-26 23:09:33 +00:00
2001-12-03 18:06:27 +00:00
" TELUGU-ORIYA " , " [:TELUGU:] " , " [:ORIYA:] " ,
2004-05-19 04:17:37 +00:00
" [ \\ u0b3c \\ u0b57 \\ u0b56 \\ u0B3D \\ u0B5C \\ u0B5D \\ u0B5F \\ u0b70 \\ u0b71] " , /*roundtrip exclusions*/
2001-11-26 23:09:33 +00:00
2001-12-03 18:06:27 +00:00
" ORIYA-TELUGU " , " [:ORIYA:] " , " [:TELUGU:] " ,
" [ \\ u0c44 \\ u0c46 \\ u0c4a \\ u0c55 \\ u0c47 \\ u0C0E \\ u0C12 \\ u0C31 \\ u0C35] " , /*roundtrip exclusions*/
2001-11-26 23:09:33 +00:00
2001-12-03 18:06:27 +00:00
" KANNADA-ORIYA " , " [:KANNADA:] " , " [:ORIYA:] " ,
2004-05-19 04:17:37 +00:00
" [ \\ u0B01 \\ u0b3c \\ u0b57 \\ u0B3D \\ u0B5C \\ u0B5D \\ u0B5F \\ u0b70 \\ u0b71] " , /*roundtrip exclusions*/
2001-11-26 23:09:33 +00:00
2001-12-03 18:06:27 +00:00
" ORIYA-KANNADA " , " [:ORIYA:] " , " [:KANNADA:] " ,
2004-05-19 04:17:37 +00:00
" [{ \\ u0cb0 \\ u0cbc}{ \\ u0cb3 \\ u0cbc} \\ u0cc4 \\ u0cc6 \\ u0cca \\ u0cd5 \\ u0cc7 \\ u0C8E \\ u0C92 \\ u0CB1 \\ u0CB5 \\ u0CDE] " , /*roundtrip exclusions*/
2001-11-26 23:09:33 +00:00
2001-12-03 18:06:27 +00:00
" MALAYALAM-ORIYA " , " [:MALAYALAM:] " , " [:ORIYA:] " ,
2004-05-19 04:17:37 +00:00
" [ \\ u0B01 \\ u0b3c \\ u0b56 \\ u0B3D \\ u0B5C \\ u0B5D \\ u0B5F \\ u0b70 \\ u0b71] " , /*roundtrip exclusions*/
2001-11-26 23:09:33 +00:00
2001-12-03 18:06:27 +00:00
" ORIYA-MALAYALAM " , " [:ORIYA:] " , " [:MALAYALAM:] " ,
" [ \\ u0D47 \\ u0D46 \\ u0D4a \\ u0D0E \\ u0D12 \\ u0D31 \\ u0D34 \\ u0D35] " , /*roundtrip exclusions*/
2001-11-26 23:09:33 +00:00
2001-12-03 18:06:27 +00:00
" TELUGU-TAMIL " , " [:TELUGU:] " , " [:TAMIL:] " ,
2007-07-31 07:10:14 +00:00
" [ \\ u0bd7 \\ u0ba9 \\ u0bb4 \\ u0BF0 \\ u0BF1 \\ u0BF2] " , /*roundtrip exclusions*/
2001-11-26 23:09:33 +00:00
2001-12-03 18:06:27 +00:00
" TAMIL-TELUGU " , " [:TAMIL:] " , " [:TELUGU:] " ,
2002-05-31 18:05:12 +00:00
" [ \\ u0C01 \\ u0c43 \\ u0c44 \\ u0c46 \\ u0c47 \\ u0c55 \\ u0c56 \\ u0c66 \\ u0C0B \\ u0C0C \\ u0C16 \\ u0C17 \\ u0C18 \\ u0C1B \\ u0C1D \\ u0C20 \\ u0C21 \\ u0C22 \\ u0C25 \\ u0C26 \\ u0C27 \\ u0C2B \\ u0C2C \\ u0C2D \\ u0C36 \\ u0C60 \\ u0C61] " , /*roundtrip exclusions*/
2001-11-26 23:09:33 +00:00
2001-12-03 18:06:27 +00:00
" KANNADA-TAMIL " , " [:KANNADA:] " , " [:TAMIL:] " ,
2007-07-31 07:10:14 +00:00
" [ \\ u0bd7 \\ u0bc6 \\ u0ba9 \\ u0bb4 \\ u0BF0 \\ u0BF1 \\ u0BF2] " , /*roundtrip exclusions*/
2001-11-26 23:09:33 +00:00
2001-12-03 18:06:27 +00:00
" TAMIL-KANNADA " , " [:TAMIL:] " , " [:KANNADA:] " ,
2004-05-19 04:17:37 +00:00
" [ \\ u0cc3 \\ u0cc4 \\ u0cc6 \\ u0cc7 \\ u0cd5 \\ u0cd6 \\ u0C8B \\ u0C8C \\ u0C96 \\ u0C97 \\ u0C98 \\ u0C9B \\ u0C9D \\ u0CA0 \\ u0CA1 \\ u0CA2 \\ u0CA5 \\ u0CA6 \\ u0CA7 \\ u0CAB \\ u0CAC \\ u0CAD \\ u0CB6 \\ u0cbc \\ u0cbd \\ u0CDE \\ u0CE0 \\ u0CE1] " , /*roundtrip exclusions*/
2001-11-26 23:09:33 +00:00
2001-12-03 18:06:27 +00:00
" MALAYALAM-TAMIL " , " [:MALAYALAM:] " , " [:TAMIL:] " ,
2007-07-31 07:10:14 +00:00
" [ \\ u0ba9 \\ u0BF0 \\ u0BF1 \\ u0BF2] " , /*roundtrip exclusions*/
2001-11-26 23:09:33 +00:00
2001-12-03 18:06:27 +00:00
" TAMIL-MALAYALAM " , " [:TAMIL:] " , " [:MALAYALAM:] " ,
" [ \\ u0d43 \\ u0d12 \\ u0D0B \\ u0D0C \\ u0D16 \\ u0D17 \\ u0D18 \\ u0D1B \\ u0D1D \\ u0D20 \\ u0D21 \\ u0D22 \\ u0D25 \\ u0D26 \\ u0D27 \\ u0D2B \\ u0D2C \\ u0D2D \\ u0D36 \\ u0D60 \\ u0D61] " , /*roundtrip exclusions*/
2001-11-26 23:09:33 +00:00
2001-12-03 18:06:27 +00:00
" KANNADA-TELUGU " , " [:KANNADA:] " , " [:TELUGU:] " ,
2002-05-31 18:05:12 +00:00
" [ \\ u0C01 \\ u0c3f \\ u0c46 \\ u0c48 \\ u0c4a] " , /*roundtrip exclusions*/
2001-11-26 23:09:33 +00:00
2001-12-03 18:06:27 +00:00
" TELUGU-KANNADA " , " [:TELUGU:] " , " [:KANNADA:] " ,
2004-05-19 04:17:37 +00:00
" [ \\ u0cc8 \\ u0cd5 \\ u0cd6 \\ u0cbc \\ u0cbd \\ u0CDE] " , /*roundtrip exclusions*/
2001-11-26 23:09:33 +00:00
2001-12-03 18:06:27 +00:00
" MALAYALAM-TELUGU " , " [:MALAYALAM:] " , " [:TELUGU:] " ,
2002-05-31 18:05:12 +00:00
" [ \\ u0C01 \\ u0c44 \\ u0c4a \\ u0c4c \\ u0c4b \\ u0c55 \\ u0c56] " , /*roundtrip exclusions*/
2001-11-26 23:09:33 +00:00
2001-12-03 18:06:27 +00:00
" TELUGU-MALAYALAM " , " [:TELUGU:] " , " [:MALAYALAM:] " ,
" [ \\ u0d4c \\ u0d57 \\ u0D34] " , /*roundtrip exclusions*/
2001-11-26 23:09:33 +00:00
2001-12-03 18:06:27 +00:00
" MALAYALAM-KANNADA " , " [:MALAYALAM:] " , " [:KANNADA:] " ,
2004-05-19 04:17:37 +00:00
" [ \\ u0cbc \\ u0cbd \\ u0cc4 \\ u0cc6 \\ u0cca \\ u0ccc \\ u0ccb \\ u0cd5 \\ u0cd6 \\ u0cDe] " , /*roundtrip exclusions*/
2001-10-30 18:29:45 +00:00
2001-12-03 18:06:27 +00:00
" KANNADA-MALAYALAM " , " [:KANNADA:] " , " [:MALAYALAM:] " ,
" [ \\ u0d4c \\ u0d57 \\ u0d46 \\ u0D34] " , /*roundtrip exclusions*/
2007-07-31 07:10:14 +00:00
2001-12-03 18:06:27 +00:00
" Latin-Bengali " , latinForIndic , " [[:Bengali:][ \\ u0964 \\ u0965]] " ,
2005-02-04 00:50:01 +00:00
" [ \\ u0965 \\ u09f0- \\ u09fa \\ u09ce] " /*roundtrip exclusions*/ ,
2007-07-31 07:10:14 +00:00
2001-12-03 18:06:27 +00:00
" Latin-Gurmukhi " , latinForIndic , " [[:Gurmukhi:][ \\ u0964 \\ u0965]] " ,
2004-05-19 04:17:37 +00:00
" [ \\ u0a01 \\ u0965 \\ u0a02 \\ u0a72 \\ u0a73 \\ u0a74] " /*roundtrip exclusions*/ ,
2007-07-31 07:10:14 +00:00
2001-12-03 18:06:27 +00:00
" Latin-Gujarati " , latinForIndic , " [[:Gujarati:][ \\ u0964 \\ u0965]] " ,
" [ \\ u0965] " /*roundtrip exclusions*/ ,
2007-07-31 07:10:14 +00:00
2001-12-03 18:06:27 +00:00
" Latin-Oriya " , latinForIndic , " [[:Oriya:][ \\ u0964 \\ u0965]] " ,
2004-05-19 04:17:37 +00:00
" [ \\ u0965 \\ u0b70] " /*roundtrip exclusions*/ ,
2007-07-31 07:10:14 +00:00
2001-12-03 18:06:27 +00:00
" Latin-Tamil " , latinForIndic , " [:Tamil:] " ,
2007-07-31 07:10:14 +00:00
" [ \\ u0BF0 \\ u0BF1 \\ u0BF2] " /*roundtrip exclusions*/ ,
2001-12-03 18:06:27 +00:00
" Latin-Telugu " , latinForIndic , " [:Telugu:] " ,
NULL /*roundtrip exclusions*/ ,
2007-07-31 07:10:14 +00:00
2001-12-03 18:06:27 +00:00
" Latin-Kannada " , latinForIndic , " [:Kannada:] " ,
NULL /*roundtrip exclusions*/ ,
2007-07-31 07:10:14 +00:00
2001-12-03 18:06:27 +00:00
" Latin-Malayalam " , latinForIndic , " [:Malayalam:] " ,
NULL /*roundtrip exclusions*/
2001-11-22 02:41:06 +00:00
} ;
2001-08-31 18:02:09 +00:00
2002-05-31 18:05:12 +00:00
void TransliteratorRoundTripTest : : TestDebug ( const char * name , const char fromSet [ ] ,
2002-08-20 20:59:01 +00:00
const char * toSet , const char * exclusions ) {
2002-05-31 18:05:12 +00:00
RTTest test ( name ) ;
Legal * legal = new LegalIndic ( ) ;
test . test ( UnicodeString ( fromSet , " " ) , UnicodeString ( toSet , " " ) , exclusions , this , quick , legal ) ;
}
2001-11-22 02:41:06 +00:00
void TransliteratorRoundTripTest : : TestInterIndic ( ) {
2002-07-30 00:03:36 +00:00
//TestDebug("Latin-Gurmukhi", latinForIndic, "[:Gurmukhi:]","[\\u0965\\u0a02\\u0a72\\u0a73\\u0a74]",TRUE);
2001-12-03 18:06:27 +00:00
int32_t num = ( int32_t ) ( sizeof ( interIndicArray ) / ( INTER_INDIC_ARRAY_WIDTH * sizeof ( char * ) ) ) ;
if ( quick ) {
2001-11-22 05:51:42 +00:00
logln ( " Testing only 5 of %i. Skipping rest (use -e for exhaustive) " , num ) ;
num = 5 ;
}
2008-08-06 00:42:42 +00:00
// CLDR bug #1911: This test should be moved into CLDR.
// It is left in its current state as a regression test.
// if (isICUVersionAtLeast(ICU_39)) {
// // We temporarily filter against Unicode 4.1, but we only do this
// // before version 3.4.
// errln("FAIL: TestInterIndic needs to be updated to remove delete the [:Age=4.1:] filter ");
// return;
// } else {
// logln("Warning: TestInterIndic needs to be updated to remove delete the section marked [:Age=4.1:] filter");
// }
2001-11-28 06:05:12 +00:00
for ( int i = 0 ; i < num ; i + + ) {
2001-12-03 18:06:27 +00:00
RTTest test ( interIndicArray [ i * INTER_INDIC_ARRAY_WIDTH + 0 ] ) ;
2003-04-23 00:23:26 +00:00
Legal * legal = new LegalIndic ( ) ;
2005-02-04 00:50:01 +00:00
logln ( UnicodeString ( " Stress testing " ) + interIndicArray [ i * INTER_INDIC_ARRAY_WIDTH + 0 ] ) ;
2006-04-04 23:23:05 +00:00
/* Uncomment lines below when transliterator is fixed */
/*
2004-05-19 04:17:37 +00:00
test . test ( interIndicArray [ i * INTER_INDIC_ARRAY_WIDTH + 1 ] ,
interIndicArray [ i * INTER_INDIC_ARRAY_WIDTH + 2 ] ,
interIndicArray [ i * INTER_INDIC_ARRAY_WIDTH + 3 ] , // roundtrip exclusions
this , quick , legal , 50 ) ;
2006-04-04 23:23:05 +00:00
*/
/* comment lines below when transliterator is fixed */
// start
UnicodeString source ( " [ " ) ;
source . append ( interIndicArray [ i * INTER_INDIC_ARRAY_WIDTH + 1 ] ) ;
source . append ( " & [:Age=4.1:]] " ) ;
UnicodeString target ( " [ " ) ;
target . append ( interIndicArray [ i * INTER_INDIC_ARRAY_WIDTH + 2 ] ) ;
target . append ( " & [:Age=4.1:]] " ) ;
test . test ( source ,
target ,
interIndicArray [ i * INTER_INDIC_ARRAY_WIDTH + 3 ] , // roundtrip exclusions
this , quick , legal , 50 ) ;
// end
2003-09-10 01:28:29 +00:00
delete legal ;
2001-08-31 18:02:09 +00:00
}
}
2001-11-22 02:41:06 +00:00
// end indic tests ----------------------------------------------------------
2002-09-21 00:43:14 +00:00
# endif /* #if !UCONFIG_NO_TRANSLITERATION */