2000-03-10 00:42:27 +00:00
/********************************************************************
2013-06-14 00:47:11 +00:00
* Copyright ( c ) 1999 - 2013 , International Business Machines
2007-07-27 03:12:12 +00:00
* Corporation and others . All Rights Reserved .
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
* Date Name Description
* 12 / 14 / 99 Madhu Creation .
* 01 / 12 / 2000 Madhu updated for changed API
2000-03-10 00:42:27 +00:00
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2000-01-17 20:59:08 +00:00
# include "unicode/utypes.h"
2002-09-21 00:43:14 +00:00
# if !UCONFIG_NO_BREAK_ITERATION
2002-08-21 22:14:43 +00:00
# include "unicode/uchar.h"
2000-01-17 20:59:08 +00:00
# include "intltest.h"
# include "unicode/rbbi.h"
# include "unicode/schriter.h"
# include "rbbiapts.h"
2003-02-17 18:06:42 +00:00
# include "rbbidata.h"
# include "cstring.h"
2006-03-23 00:54:12 +00:00
# include "ubrkimpl.h"
2011-04-29 17:49:01 +00:00
# include "unicode/locid.h"
2003-11-07 00:04:13 +00:00
# include "unicode/ustring.h"
2005-06-26 21:31:36 +00:00
# include "unicode/utext.h"
2008-09-25 05:48:27 +00:00
# include "cmemory.h"
2000-01-17 20:59:08 +00:00
/**
* API Test the RuleBasedBreakIterator class
*/
2004-03-05 22:37:54 +00:00
# define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) {\
2011-08-30 19:11:32 +00:00
dataerrln ( " Failure at file %s, line %d, error = %s " , __FILE__ , __LINE__ , u_errorName ( status ) ) ; } }
2004-03-05 22:37:54 +00:00
2011-04-29 17:49:01 +00:00
# define TEST_ASSERT(expr) {if ((expr) == FALSE) { \
errln ( " Test Failure at file %s, line %d: \" %s \" is false. \n " , __FILE__ , __LINE__ , # expr ) ; } ; }
2000-01-17 20:59:08 +00:00
void RBBIAPITest : : TestCloneEquals ( )
{
2000-08-14 21:42:36 +00:00
UErrorCode status = U_ZERO_ERROR ;
RuleBasedBreakIterator * bi1 = ( RuleBasedBreakIterator * ) RuleBasedBreakIterator : : createCharacterInstance ( Locale : : getDefault ( ) , status ) ;
RuleBasedBreakIterator * biequal = ( RuleBasedBreakIterator * ) RuleBasedBreakIterator : : createCharacterInstance ( Locale : : getDefault ( ) , status ) ;
RuleBasedBreakIterator * bi3 = ( RuleBasedBreakIterator * ) RuleBasedBreakIterator : : createCharacterInstance ( Locale : : getDefault ( ) , status ) ;
RuleBasedBreakIterator * bi2 = ( RuleBasedBreakIterator * ) RuleBasedBreakIterator : : createWordInstance ( Locale : : getDefault ( ) , status ) ;
if ( U_FAILURE ( status ) ) {
2009-06-12 19:34:21 +00:00
errcheckln ( status , " Fail : in construction - %s " , u_errorName ( status ) ) ;
2000-08-14 21:42:36 +00:00
return ;
}
UnicodeString testString = " Testing word break iterators's clone() and equals() " ;
bi1 - > setText ( testString ) ;
bi2 - > setText ( testString ) ;
biequal - > setText ( testString ) ;
bi3 - > setText ( " hello " ) ;
logln ( ( UnicodeString ) " Testing equals() " ) ;
logln ( ( UnicodeString ) " Testing == and != " ) ;
2002-06-25 17:23:07 +00:00
UBool b = ( * bi1 ! = * biequal ) ;
b | = * bi1 = = * bi2 ;
b | = * bi1 = = * bi3 ;
if ( b ) {
errln ( ( UnicodeString ) " ERROR:1 RBBI's == and != operator failed. " ) ;
}
2000-08-14 21:42:36 +00:00
if ( * bi2 = = * biequal | | * bi2 = = * bi1 | | * biequal = = * bi3 )
errln ( ( UnicodeString ) " ERROR:2 RBBI's == and != operator failed. " ) ;
2007-08-16 23:14:06 +00:00
// Quick test of RulesBasedBreakIterator assignment -
2002-03-28 01:14:02 +00:00
// Check that
// two different iterators are !=
// they are == after assignment
// source and dest iterator produce the same next() after assignment.
// deleting one doesn't disable the other.
logln ( " Testing assignment " ) ;
2005-01-01 21:55:07 +00:00
RuleBasedBreakIterator * bix = ( RuleBasedBreakIterator * ) BreakIterator : : createLineInstance ( Locale : : getDefault ( ) , status ) ;
2002-03-28 01:14:02 +00:00
if ( U_FAILURE ( status ) ) {
2009-06-12 19:34:21 +00:00
errcheckln ( status , " Fail : in construction - %s " , u_errorName ( status ) ) ;
2002-03-28 01:14:02 +00:00
return ;
}
2002-09-05 18:51:50 +00:00
RuleBasedBreakIterator biDefault , biDefault2 ;
if ( U_FAILURE ( status ) ) {
errln ( ( UnicodeString ) " FAIL : in construction of default iterator " ) ;
return ;
}
if ( biDefault = = * bix ) {
errln ( ( UnicodeString ) " ERROR: iterators should not compare == " ) ;
return ;
}
if ( biDefault ! = biDefault2 ) {
errln ( ( UnicodeString ) " ERROR: iterators should compare == " ) ;
return ;
}
2002-03-28 01:14:02 +00:00
UnicodeString HelloString ( " Hello Kitty " ) ;
bix - > setText ( HelloString ) ;
if ( * bix = = * bi2 ) {
errln ( UnicodeString ( " ERROR: strings should not be equal before assignment. " ) ) ;
}
* bix = * bi2 ;
if ( * bix ! = * bi2 ) {
errln ( UnicodeString ( " ERROR: strings should be equal before assignment. " ) ) ;
}
int bixnext = bix - > next ( ) ;
int bi2next = bi2 - > next ( ) ;
if ( ! ( bixnext = = bi2next & & bixnext = = 7 ) ) {
errln ( UnicodeString ( " ERROR: iterators behaved differently after assignment. " ) ) ;
}
delete bix ;
if ( bi2 - > next ( ) ! = 8 ) {
errln ( UnicodeString ( " ERROR: iterator.next() failed after deleting copy. " ) ) ;
}
2000-08-14 21:42:36 +00:00
logln ( ( UnicodeString ) " Testing clone() " ) ;
RuleBasedBreakIterator * bi1clone = ( RuleBasedBreakIterator * ) bi1 - > clone ( ) ;
RuleBasedBreakIterator * bi2clone = ( RuleBasedBreakIterator * ) bi2 - > clone ( ) ;
2007-08-16 23:14:06 +00:00
if ( * bi1clone ! = * bi1 | | * bi1clone ! = * biequal | |
2000-08-14 21:42:36 +00:00
* bi1clone = = * bi3 | | * bi1clone = = * bi2 )
errln ( ( UnicodeString ) " ERROR:1 RBBI's clone() method failed " ) ;
2007-08-16 23:14:06 +00:00
if ( * bi2clone = = * bi1 | | * bi2clone = = * biequal | |
2000-08-14 21:42:36 +00:00
* bi2clone = = * bi3 | | * bi2clone ! = * bi2 )
errln ( ( UnicodeString ) " ERROR:2 RBBI's clone() method failed " ) ;
if ( bi1 - > getText ( ) ! = bi1clone - > getText ( ) | |
2007-08-16 23:14:06 +00:00
bi2clone - > getText ( ) ! = bi2 - > getText ( ) | |
2000-08-14 21:42:36 +00:00
* bi2clone = = * bi1clone )
errln ( ( UnicodeString ) " ERROR: RBBI's clone() method failed " ) ;
delete bi1clone ;
delete bi2clone ;
delete bi1 ;
delete bi3 ;
delete bi2 ;
delete biequal ;
2000-01-17 20:59:08 +00:00
}
2000-08-14 21:42:36 +00:00
2002-11-27 02:40:34 +00:00
void RBBIAPITest : : TestBoilerPlate ( )
{
UErrorCode status = U_ZERO_ERROR ;
2006-03-23 00:54:12 +00:00
BreakIterator * a = BreakIterator : : createWordInstance ( Locale ( " hi " ) , status ) ;
BreakIterator * b = BreakIterator : : createWordInstance ( Locale ( " hi_IN " ) , status ) ;
2004-09-13 15:39:02 +00:00
if ( U_FAILURE ( status ) ) {
2009-06-12 19:34:21 +00:00
errcheckln ( status , " Creation of break iterator failed %s " , u_errorName ( status ) ) ;
2004-09-13 15:39:02 +00:00
return ;
}
2002-11-27 02:40:34 +00:00
if ( * a ! = * b ) {
errln ( " Failed: boilerplate method operator!= does not return correct results " ) ;
}
2012-08-16 23:01:49 +00:00
// Japanese word break iterators are identical to root with
// a dictionary-based break iterator
BreakIterator * c = BreakIterator : : createCharacterInstance ( Locale ( " ja " ) , status ) ;
BreakIterator * d = BreakIterator : : createCharacterInstance ( Locale ( " root " ) , status ) ;
if ( c & & d ) {
if ( * c ! = * d ) {
errln ( " Failed: boilerplate method operator== does not return correct results " ) ;
2004-05-18 06:39:59 +00:00
}
} else {
errln ( " creation of break iterator failed " ) ;
2002-11-27 02:40:34 +00:00
}
2002-12-17 04:42:07 +00:00
delete a ;
delete b ;
delete c ;
2012-08-16 23:01:49 +00:00
delete d ;
2002-11-27 02:40:34 +00:00
}
2000-01-17 20:59:08 +00:00
void RBBIAPITest : : TestgetRules ( )
{
2000-08-14 21:42:36 +00:00
UErrorCode status = U_ZERO_ERROR ;
2000-01-17 20:59:08 +00:00
2000-08-14 21:42:36 +00:00
RuleBasedBreakIterator * bi1 = ( RuleBasedBreakIterator * ) RuleBasedBreakIterator : : createCharacterInstance ( Locale : : getDefault ( ) , status ) ;
2000-01-17 20:59:08 +00:00
RuleBasedBreakIterator * bi2 = ( RuleBasedBreakIterator * ) RuleBasedBreakIterator : : createWordInstance ( Locale : : getDefault ( ) , status ) ;
2000-08-14 21:42:36 +00:00
if ( U_FAILURE ( status ) ) {
2009-06-12 19:34:21 +00:00
errcheckln ( status , " FAIL: in construction - %s " , u_errorName ( status ) ) ;
2000-06-29 19:42:17 +00:00
delete bi1 ;
2000-08-14 21:42:36 +00:00
delete bi2 ;
return ;
}
logln ( ( UnicodeString ) " Testing toString() " ) ;
bi1 - > setText ( ( UnicodeString ) " Hello there " ) ;
RuleBasedBreakIterator * bi3 = ( RuleBasedBreakIterator * ) bi1 - > clone ( ) ;
UnicodeString temp = bi1 - > getRules ( ) ;
UnicodeString temp2 = bi2 - > getRules ( ) ;
UnicodeString temp3 = bi3 - > getRules ( ) ;
if ( temp2 . compare ( temp3 ) = = 0 | | temp . compare ( temp2 ) = = 0 | | temp . compare ( temp3 ) ! = 0 )
errln ( ( UnicodeString ) " ERROR: error in getRules() method " ) ;
delete bi1 ;
delete bi2 ;
delete bi3 ;
2000-01-17 20:59:08 +00:00
}
void RBBIAPITest : : TestHashCode ( )
{
2000-08-14 21:42:36 +00:00
UErrorCode status = U_ZERO_ERROR ;
RuleBasedBreakIterator * bi1 = ( RuleBasedBreakIterator * ) RuleBasedBreakIterator : : createCharacterInstance ( Locale : : getDefault ( ) , status ) ;
RuleBasedBreakIterator * bi3 = ( RuleBasedBreakIterator * ) RuleBasedBreakIterator : : createCharacterInstance ( Locale : : getDefault ( ) , status ) ;
RuleBasedBreakIterator * bi2 = ( RuleBasedBreakIterator * ) RuleBasedBreakIterator : : createWordInstance ( Locale : : getDefault ( ) , status ) ;
if ( U_FAILURE ( status ) ) {
2009-06-12 19:34:21 +00:00
errcheckln ( status , " Fail : in construction - %s " , u_errorName ( status ) ) ;
2000-08-14 21:42:36 +00:00
delete bi1 ;
delete bi2 ;
delete bi3 ;
return ;
}
logln ( ( UnicodeString ) " Testing hashCode() " ) ;
bi1 - > setText ( ( UnicodeString ) " Hash code " ) ;
bi2 - > setText ( ( UnicodeString ) " Hash code " ) ;
bi3 - > setText ( ( UnicodeString ) " Hash code " ) ;
RuleBasedBreakIterator * bi1clone = ( RuleBasedBreakIterator * ) bi1 - > clone ( ) ;
RuleBasedBreakIterator * bi2clone = ( RuleBasedBreakIterator * ) bi2 - > clone ( ) ;
if ( bi1 - > hashCode ( ) ! = bi1clone - > hashCode ( ) | | bi1 - > hashCode ( ) ! = bi3 - > hashCode ( ) | |
bi1clone - > hashCode ( ) ! = bi3 - > hashCode ( ) | | bi2 - > hashCode ( ) ! = bi2clone - > hashCode ( ) )
2002-06-25 17:23:07 +00:00
errln ( ( UnicodeString ) " ERROR: identical objects have different hashcodes " ) ;
2000-08-14 21:42:36 +00:00
if ( bi1 - > hashCode ( ) = = bi2 - > hashCode ( ) | | bi2 - > hashCode ( ) = = bi3 - > hashCode ( ) | |
bi1clone - > hashCode ( ) = = bi2clone - > hashCode ( ) | | bi1clone - > hashCode ( ) = = bi2 - > hashCode ( ) )
2002-06-25 17:23:07 +00:00
errln ( ( UnicodeString ) " ERROR: different objects have same hashcodes " ) ;
2000-08-14 21:42:36 +00:00
delete bi1clone ;
2007-08-16 23:14:06 +00:00
delete bi2clone ;
2000-08-14 21:42:36 +00:00
delete bi1 ;
delete bi2 ;
delete bi3 ;
2000-01-17 20:59:08 +00:00
}
void RBBIAPITest : : TestGetSetAdoptText ( )
{
logln ( ( UnicodeString ) " Testing getText setText " ) ;
2009-11-20 06:28:25 +00:00
IcuTestErrorCode status ( * this , " TestGetSetAdoptText " ) ;
2000-08-14 21:42:36 +00:00
UnicodeString str1 = " first string. " ;
UnicodeString str2 = " Second string. " ;
2009-11-20 06:28:25 +00:00
LocalPointer < RuleBasedBreakIterator > charIter1 ( ( RuleBasedBreakIterator * ) RuleBasedBreakIterator : : createCharacterInstance ( Locale : : getDefault ( ) , status ) ) ;
LocalPointer < RuleBasedBreakIterator > wordIter1 ( ( RuleBasedBreakIterator * ) RuleBasedBreakIterator : : createWordInstance ( Locale : : getDefault ( ) , status ) ) ;
if ( status . isFailure ( ) ) {
errcheckln ( status , " Fail : in construction - %s " , status . errorName ( ) ) ;
2000-08-14 21:42:36 +00:00
return ;
2000-01-17 20:59:08 +00:00
}
CharacterIterator * text1 = new StringCharacterIterator ( str1 ) ;
2000-06-29 19:42:17 +00:00
CharacterIterator * text1Clone = text1 - > clone ( ) ;
2000-01-17 20:59:08 +00:00
CharacterIterator * text2 = new StringCharacterIterator ( str2 ) ;
2002-03-28 01:14:02 +00:00
CharacterIterator * text3 = new StringCharacterIterator ( str2 , 3 , 10 , 3 ) ; // "ond str"
2007-08-16 23:14:06 +00:00
2000-01-17 20:59:08 +00:00
wordIter1 - > setText ( str1 ) ;
2006-04-22 05:29:27 +00:00
CharacterIterator * tci = & wordIter1 - > getText ( ) ;
UnicodeString tstr ;
tci - > getText ( tstr ) ;
TEST_ASSERT ( tstr = = str1 ) ;
2000-01-17 20:59:08 +00:00
if ( wordIter1 - > current ( ) ! = 0 )
errln ( ( UnicodeString ) " ERROR:1 setText did not set the iteration position to the beginning of the text, it is " + wordIter1 - > current ( ) + ( UnicodeString ) " \n " ) ;
2000-08-14 21:42:36 +00:00
wordIter1 - > next ( 2 ) ;
2000-01-17 20:59:08 +00:00
wordIter1 - > setText ( str2 ) ;
if ( wordIter1 - > current ( ) ! = 0 )
errln ( ( UnicodeString ) " ERROR:2 setText did not reset the iteration position to the beginning of the text, it is " + wordIter1 - > current ( ) + ( UnicodeString ) " \n " ) ;
2000-08-14 21:42:36 +00:00
charIter1 - > adoptText ( text1Clone ) ;
2006-04-22 05:29:27 +00:00
TEST_ASSERT ( wordIter1 - > getText ( ) ! = charIter1 - > getText ( ) ) ;
tci = & wordIter1 - > getText ( ) ;
tci - > getText ( tstr ) ;
TEST_ASSERT ( tstr = = str2 ) ;
tci = & charIter1 - > getText ( ) ;
tci - > getText ( tstr ) ;
TEST_ASSERT ( tstr = = str1 ) ;
2000-01-17 20:59:08 +00:00
2009-11-20 06:28:25 +00:00
LocalPointer < RuleBasedBreakIterator > rb ( ( RuleBasedBreakIterator * ) wordIter1 - > clone ( ) ) ;
2000-08-14 21:42:36 +00:00
rb - > adoptText ( text1 ) ;
if ( rb - > getText ( ) ! = * text1 )
errln ( ( UnicodeString ) " ERROR:1 error in adoptText " ) ;
rb - > adoptText ( text2 ) ;
if ( rb - > getText ( ) ! = * text2 )
errln ( ( UnicodeString ) " ERROR:2 error in adoptText " ) ;
2000-01-17 20:59:08 +00:00
2002-03-28 01:14:02 +00:00
// Adopt where iterator range is less than the entire orignal source string.
2006-04-22 05:29:27 +00:00
// (With the change of the break engine to working with UText internally,
// CharacterIterators starting at positions other than zero are not supported)
2002-03-28 01:14:02 +00:00
rb - > adoptText ( text3 ) ;
2006-04-22 05:29:27 +00:00
TEST_ASSERT ( rb - > preceding ( 2 ) = = 0 ) ;
TEST_ASSERT ( rb - > following ( 11 ) = = BreakIterator : : DONE ) ;
//if(rb->preceding(2) != 3) {
// errln((UnicodeString)"ERROR:3 error in adoptText ");
//}
//if(rb->following(11) != BreakIterator::DONE) {
// errln((UnicodeString)"ERROR:4 error in adoptText ");
//}
2000-01-17 20:59:08 +00:00
2005-06-26 21:31:36 +00:00
// UText API
//
// Quick test to see if UText is working at all.
//
2005-07-01 16:48:36 +00:00
const char * s1 = " \x68 \x65 \x6C \x6C \x6F \x20 \x77 \x6F \x72 \x6C \x64 " ; /* "hello world" in UTF-8 */
const char * s2 = " \x73 \x65 \x65 \x20 \x79 \x61 " ; /* "see ya" in UTF-8 */
2005-06-26 21:31:36 +00:00
// 012345678901
2009-11-20 06:28:25 +00:00
status . reset ( ) ;
LocalUTextPointer ut ( utext_openUTF8 ( NULL , s1 , - 1 , status ) ) ;
wordIter1 - > setText ( ut . getAlias ( ) , status ) ;
2005-06-26 21:31:36 +00:00
TEST_ASSERT_SUCCESS ( status ) ;
int32_t pos ;
pos = wordIter1 - > first ( ) ;
TEST_ASSERT ( pos = = 0 ) ;
pos = wordIter1 - > next ( ) ;
TEST_ASSERT ( pos = = 5 ) ;
pos = wordIter1 - > next ( ) ;
TEST_ASSERT ( pos = = 6 ) ;
pos = wordIter1 - > next ( ) ;
TEST_ASSERT ( pos = = 11 ) ;
pos = wordIter1 - > next ( ) ;
TEST_ASSERT ( pos = = UBRK_DONE ) ;
2009-11-20 06:28:25 +00:00
status . reset ( ) ;
LocalUTextPointer ut2 ( utext_openUTF8 ( NULL , s2 , - 1 , status ) ) ;
2005-06-26 21:31:36 +00:00
TEST_ASSERT_SUCCESS ( status ) ;
2009-11-20 06:28:25 +00:00
wordIter1 - > setText ( ut2 . getAlias ( ) , status ) ;
2005-06-26 21:31:36 +00:00
TEST_ASSERT_SUCCESS ( status ) ;
pos = wordIter1 - > first ( ) ;
TEST_ASSERT ( pos = = 0 ) ;
pos = wordIter1 - > next ( ) ;
TEST_ASSERT ( pos = = 3 ) ;
pos = wordIter1 - > next ( ) ;
TEST_ASSERT ( pos = = 4 ) ;
pos = wordIter1 - > last ( ) ;
TEST_ASSERT ( pos = = 6 ) ;
pos = wordIter1 - > previous ( ) ;
TEST_ASSERT ( pos = = 4 ) ;
pos = wordIter1 - > previous ( ) ;
TEST_ASSERT ( pos = = 3 ) ;
pos = wordIter1 - > previous ( ) ;
TEST_ASSERT ( pos = = 0 ) ;
pos = wordIter1 - > previous ( ) ;
TEST_ASSERT ( pos = = UBRK_DONE ) ;
2009-11-20 06:28:25 +00:00
status . reset ( ) ;
2006-04-22 05:29:27 +00:00
UnicodeString sEmpty ;
2009-11-20 06:28:25 +00:00
LocalUTextPointer gut2 ( utext_openUnicodeString ( NULL , & sEmpty , status ) ) ;
wordIter1 - > getUText ( gut2 . getAlias ( ) , status ) ;
2005-07-14 23:24:38 +00:00
TEST_ASSERT_SUCCESS ( status ) ;
2009-11-20 06:28:25 +00:00
status . reset ( ) ;
}
2007-08-16 23:14:06 +00:00
2003-05-16 22:05:35 +00:00
void RBBIAPITest : : TestIteration ( )
2000-01-17 20:59:08 +00:00
{
2003-05-16 22:05:35 +00:00
// This test just verifies that the API is present.
// Testing for correct operation of the break rules happens elsewhere.
2000-08-14 21:42:36 +00:00
2003-05-16 22:05:35 +00:00
UErrorCode status = U_ZERO_ERROR ;
RuleBasedBreakIterator * bi = ( RuleBasedBreakIterator * ) RuleBasedBreakIterator : : createCharacterInstance ( Locale : : getDefault ( ) , status ) ;
if ( U_FAILURE ( status ) | | bi = = NULL ) {
2009-06-12 19:34:21 +00:00
errcheckln ( status , " Failure creating character break iterator. Status = %s " , u_errorName ( status ) ) ;
2003-05-16 22:05:35 +00:00
}
delete bi ;
2000-08-14 21:42:36 +00:00
2003-05-16 22:05:35 +00:00
status = U_ZERO_ERROR ;
bi = ( RuleBasedBreakIterator * ) RuleBasedBreakIterator : : createWordInstance ( Locale : : getDefault ( ) , status ) ;
if ( U_FAILURE ( status ) | | bi = = NULL ) {
2009-06-12 19:34:21 +00:00
errcheckln ( status , " Failure creating Word break iterator. Status = %s " , u_errorName ( status ) ) ;
2000-08-14 21:42:36 +00:00
}
2003-05-16 22:05:35 +00:00
delete bi ;
2000-08-14 21:42:36 +00:00
status = U_ZERO_ERROR ;
2003-05-16 22:05:35 +00:00
bi = ( RuleBasedBreakIterator * ) RuleBasedBreakIterator : : createLineInstance ( Locale : : getDefault ( ) , status ) ;
if ( U_FAILURE ( status ) | | bi = = NULL ) {
2009-06-12 19:34:21 +00:00
errcheckln ( status , " Failure creating Line break iterator. Status = %s " , u_errorName ( status ) ) ;
2003-05-16 22:05:35 +00:00
}
delete bi ;
2000-08-14 21:42:36 +00:00
2003-05-16 22:05:35 +00:00
status = U_ZERO_ERROR ;
bi = ( RuleBasedBreakIterator * ) RuleBasedBreakIterator : : createSentenceInstance ( Locale : : getDefault ( ) , status ) ;
if ( U_FAILURE ( status ) | | bi = = NULL ) {
2009-06-12 19:34:21 +00:00
errcheckln ( status , " Failure creating Sentence break iterator. Status = %s " , u_errorName ( status ) ) ;
2003-05-16 22:05:35 +00:00
}
delete bi ;
2000-08-14 21:42:36 +00:00
2003-05-16 22:05:35 +00:00
status = U_ZERO_ERROR ;
bi = ( RuleBasedBreakIterator * ) RuleBasedBreakIterator : : createTitleInstance ( Locale : : getDefault ( ) , status ) ;
if ( U_FAILURE ( status ) | | bi = = NULL ) {
2009-06-12 19:34:21 +00:00
errcheckln ( status , " Failure creating Title break iterator. Status = %s " , u_errorName ( status ) ) ;
2000-08-14 21:42:36 +00:00
}
2003-05-16 22:05:35 +00:00
delete bi ;
2000-08-14 21:42:36 +00:00
status = U_ZERO_ERROR ;
2003-05-16 22:05:35 +00:00
bi = ( RuleBasedBreakIterator * ) RuleBasedBreakIterator : : createCharacterInstance ( Locale : : getDefault ( ) , status ) ;
if ( U_FAILURE ( status ) | | bi = = NULL ) {
2009-06-12 19:34:21 +00:00
errcheckln ( status , " Failure creating character break iterator. Status = %s " , u_errorName ( status ) ) ;
2003-05-16 22:05:35 +00:00
return ; // Skip the rest of these tests.
}
2000-08-14 21:42:36 +00:00
2003-05-16 22:05:35 +00:00
UnicodeString testString = " 0123456789 " ;
bi - > setText ( testString ) ;
int32_t i ;
i = bi - > first ( ) ;
if ( i ! = 0 ) {
errln ( " Incorrect value from bi->first(). Expected 0, got %d. " , i ) ;
2000-08-14 21:42:36 +00:00
}
2003-05-16 22:05:35 +00:00
i = bi - > last ( ) ;
if ( i ! = 10 ) {
errln ( " Incorrect value from bi->last(). Expected 10, got %d " , i ) ;
}
//
// Previous
//
bi - > last ( ) ;
i = bi - > previous ( ) ;
if ( i ! = 9 ) {
errln ( " Incorrect value from bi->last() at line %d. Expected 9, got %d " , __LINE__ , i ) ;
2000-08-14 21:42:36 +00:00
}
2003-05-16 22:05:35 +00:00
bi - > first ( ) ;
i = bi - > previous ( ) ;
if ( i ! = BreakIterator : : DONE ) {
errln ( " Incorrect value from bi->previous() at line %d. Expected DONE, got %d " , __LINE__ , i ) ;
2000-08-14 21:42:36 +00:00
}
2003-05-16 22:05:35 +00:00
//
// next()
//
bi - > first ( ) ;
i = bi - > next ( ) ;
if ( i ! = 1 ) {
errln ( " Incorrect value from bi->next() at line %d. Expected 1, got %d " , __LINE__ , i ) ;
}
2000-08-14 21:42:36 +00:00
2003-05-16 22:05:35 +00:00
bi - > last ( ) ;
i = bi - > next ( ) ;
if ( i ! = BreakIterator : : DONE ) {
errln ( " Incorrect value from bi->next() at line %d. Expected DONE, got %d " , __LINE__ , i ) ;
2000-08-14 21:42:36 +00:00
}
2003-05-16 22:05:35 +00:00
//
// current()
//
bi - > first ( ) ;
i = bi - > current ( ) ;
if ( i ! = 0 ) {
errln ( " Incorrect value from bi->previous() at line %d. Expected 0, got %d " , __LINE__ , i ) ;
}
2000-08-14 21:42:36 +00:00
2003-05-16 22:05:35 +00:00
bi - > next ( ) ;
i = bi - > current ( ) ;
if ( i ! = 1 ) {
errln ( " Incorrect value from bi->previous() at line %d. Expected 1, got %d " , __LINE__ , i ) ;
2000-08-14 21:42:36 +00:00
}
2003-05-16 22:05:35 +00:00
bi - > last ( ) ;
bi - > next ( ) ;
i = bi - > current ( ) ;
if ( i ! = 10 ) {
errln ( " Incorrect value from bi->previous() at line %d. Expected 10, got %d " , __LINE__ , i ) ;
}
bi - > first ( ) ;
bi - > previous ( ) ;
i = bi - > current ( ) ;
if ( i ! = 0 ) {
errln ( " Incorrect value from bi->previous() at line %d. Expected 0, got %d " , __LINE__ , i ) ;
}
2002-07-30 19:09:14 +00:00
2003-05-16 22:05:35 +00:00
//
// Following()
//
i = bi - > following ( 4 ) ;
if ( i ! = 5 ) {
errln ( " Incorrect value from bi->following() at line %d. Expected 5, got %d " , __LINE__ , i ) ;
}
2000-01-17 20:59:08 +00:00
2003-05-16 22:05:35 +00:00
i = bi - > following ( 9 ) ;
if ( i ! = 10 ) {
errln ( " Incorrect value from bi->following() at line %d. Expected 10, got %d " , __LINE__ , i ) ;
2000-08-14 21:42:36 +00:00
}
2000-01-17 20:59:08 +00:00
2003-05-16 22:05:35 +00:00
i = bi - > following ( 10 ) ;
if ( i ! = BreakIterator : : DONE ) {
errln ( " Incorrect value from bi->following() at line %d. Expected DONE, got %d " , __LINE__ , i ) ;
}
2000-01-17 20:59:08 +00:00
2003-05-16 22:05:35 +00:00
//
// Preceding
//
i = bi - > preceding ( 4 ) ;
if ( i ! = 3 ) {
errln ( " Incorrect value from bi->preceding() at line %d. Expected 3, got %d " , __LINE__ , i ) ;
2000-08-14 21:42:36 +00:00
}
2003-05-16 22:05:35 +00:00
i = bi - > preceding ( 10 ) ;
if ( i ! = 9 ) {
errln ( " Incorrect value from bi->preceding() at line %d. Expected 9, got %d " , __LINE__ , i ) ;
}
i = bi - > preceding ( 1 ) ;
if ( i ! = 0 ) {
errln ( " Incorrect value from bi->preceding() at line %d. Expected 0, got %d " , __LINE__ , i ) ;
}
i = bi - > preceding ( 0 ) ;
if ( i ! = BreakIterator : : DONE ) {
errln ( " Incorrect value from bi->preceding() at line %d. Expected DONE, got %d " , __LINE__ , i ) ;
}
//
// isBoundary()
//
bi - > first ( ) ;
if ( bi - > isBoundary ( 3 ) ! = TRUE ) {
errln ( " Incorrect value from bi->isBoudary() at line %d. Expected TRUE, got FALSE " , __LINE__ , i ) ;
}
i = bi - > current ( ) ;
if ( i ! = 3 ) {
errln ( " Incorrect value from bi->current() at line %d. Expected 3, got %d " , __LINE__ , i ) ;
}
if ( bi - > isBoundary ( 11 ) ! = FALSE ) {
errln ( " Incorrect value from bi->isBoudary() at line %d. Expected FALSE, got TRUE " , __LINE__ , i ) ;
}
i = bi - > current ( ) ;
if ( i ! = 10 ) {
errln ( " Incorrect value from bi->current() at line %d. Expected 10, got %d " , __LINE__ , i ) ;
}
//
// next(n)
//
bi - > first ( ) ;
i = bi - > next ( 4 ) ;
if ( i ! = 4 ) {
errln ( " Incorrect value from bi->next() at line %d. Expected 4, got %d " , __LINE__ , i ) ;
}
i = bi - > next ( 6 ) ;
if ( i ! = 10 ) {
errln ( " Incorrect value from bi->next() at line %d. Expected 10, got %d " , __LINE__ , i ) ;
}
bi - > first ( ) ;
i = bi - > next ( 11 ) ;
if ( i ! = BreakIterator : : DONE ) {
errln ( " Incorrect value from bi->next() at line %d. Expected BreakIterator::DONE, got %d " , __LINE__ , i ) ;
}
delete bi ;
2000-01-17 20:59:08 +00:00
}
2002-06-25 17:23:07 +00:00
2003-05-16 22:05:35 +00:00
2002-06-25 17:23:07 +00:00
void RBBIAPITest : : TestBuilder ( ) {
UnicodeString rulesString1 = " $Letters = [:L:]; \n "
" $Numbers = [:N:]; \n "
" $Letters+; \n "
" $Numbers+; \n "
" [^$Letters $Numbers]; \n "
" !.*; \n " ;
UnicodeString testString1 = " abc123..abc " ;
// 01234567890
int32_t bounds1 [ ] = { 0 , 3 , 6 , 7 , 8 , 11 } ;
UErrorCode status = U_ZERO_ERROR ;
UParseError parseError ;
2007-08-16 23:14:06 +00:00
2002-06-25 17:23:07 +00:00
RuleBasedBreakIterator * bi = new RuleBasedBreakIterator ( rulesString1 , parseError , status ) ;
if ( U_FAILURE ( status ) ) {
2009-06-12 19:34:21 +00:00
dataerrln ( " Fail : in construction - %s " , u_errorName ( status ) ) ;
2002-06-25 17:23:07 +00:00
} else {
bi - > setText ( testString1 ) ;
doBoundaryTest ( * bi , testString1 , bounds1 ) ;
}
2002-06-27 01:50:22 +00:00
delete bi ;
2002-06-25 17:23:07 +00:00
}
2002-07-12 01:30:23 +00:00
//
// TestQuoteGrouping
// Single quotes within rules imply a grouping, so that a modifier
// following the quoted text (* or +) applies to all of the quoted chars.
//
void RBBIAPITest : : TestQuoteGrouping ( ) {
UnicodeString rulesString1 = " #Here comes the rule... \n "
2002-07-30 19:09:14 +00:00
" '$@!'*; \n " // (\$\@\!)*
2002-07-12 01:30:23 +00:00
" .; \n " ;
2002-07-30 19:09:14 +00:00
UnicodeString testString1 = " $@!$@!X$@!!X " ;
// 0123456789012
int32_t bounds1 [ ] = { 0 , 6 , 7 , 10 , 11 , 12 } ;
2002-07-12 01:30:23 +00:00
UErrorCode status = U_ZERO_ERROR ;
UParseError parseError ;
2007-08-16 23:14:06 +00:00
2002-07-12 01:30:23 +00:00
RuleBasedBreakIterator * bi = new RuleBasedBreakIterator ( rulesString1 , parseError , status ) ;
if ( U_FAILURE ( status ) ) {
2009-06-12 19:34:21 +00:00
dataerrln ( " Fail : in construction - %s " , u_errorName ( status ) ) ;
2002-07-12 01:30:23 +00:00
} else {
bi - > setText ( testString1 ) ;
doBoundaryTest ( * bi , testString1 , bounds1 ) ;
}
delete bi ;
}
2002-07-30 19:09:14 +00:00
//
2003-10-03 22:25:26 +00:00
// TestRuleStatus
2002-07-30 19:09:14 +00:00
// Test word break rule status constants.
//
2003-10-03 22:25:26 +00:00
void RBBIAPITest : : TestRuleStatus ( ) {
2007-08-16 23:14:06 +00:00
UChar str [ 30 ] ;
2012-08-16 23:01:49 +00:00
//no longer test Han or hiragana breaking here: ruleStatusVec would return nothing
// changed UBRK_WORD_KANA to UBRK_WORD_IDEO
u_unescape ( " plain word 123.45 \\ u30a1 \\ u30a2 " ,
// 012345678901234567 8 9 0
// Katakana
2003-11-07 00:04:13 +00:00
str , 30 ) ;
UnicodeString testString1 ( str ) ;
2012-08-16 23:01:49 +00:00
int32_t bounds1 [ ] = { 0 , 5 , 6 , 10 , 11 , 17 , 18 , 20 , 21 } ;
2002-07-30 19:09:14 +00:00
int32_t tag_lo [ ] = { UBRK_WORD_NONE , UBRK_WORD_LETTER , UBRK_WORD_NONE , UBRK_WORD_LETTER ,
UBRK_WORD_NONE , UBRK_WORD_NUMBER , UBRK_WORD_NONE ,
2012-08-16 23:01:49 +00:00
UBRK_WORD_IDEO , UBRK_WORD_NONE } ;
2002-07-30 19:09:14 +00:00
2002-08-08 00:39:13 +00:00
int32_t tag_hi [ ] = { UBRK_WORD_NONE_LIMIT , UBRK_WORD_LETTER_LIMIT , UBRK_WORD_NONE_LIMIT , UBRK_WORD_LETTER_LIMIT ,
UBRK_WORD_NONE_LIMIT , UBRK_WORD_NUMBER_LIMIT , UBRK_WORD_NONE_LIMIT ,
2012-08-16 23:01:49 +00:00
UBRK_WORD_IDEO_LIMIT , UBRK_WORD_NONE_LIMIT } ;
2002-07-30 19:09:14 +00:00
UErrorCode status = U_ZERO_ERROR ;
2007-08-16 23:14:06 +00:00
2013-06-14 00:47:11 +00:00
BreakIterator * bi = BreakIterator : : createWordInstance ( Locale : : getEnglish ( ) , status ) ;
2002-07-30 19:09:14 +00:00
if ( U_FAILURE ( status ) ) {
2009-06-12 19:34:21 +00:00
errcheckln ( status , " Fail : in construction - %s " , u_errorName ( status ) ) ;
2002-07-30 19:09:14 +00:00
} else {
bi - > setText ( testString1 ) ;
// First test that the breaks are in the right spots.
doBoundaryTest ( * bi , testString1 , bounds1 ) ;
// Then go back and check tag values
int32_t i = 0 ;
int32_t pos , tag ;
for ( pos = bi - > first ( ) ; pos ! = BreakIterator : : DONE ; pos = bi - > next ( ) , i + + ) {
if ( pos ! = bounds1 [ i ] ) {
errln ( " FAIL: unexpected word break at postion %d " , pos ) ;
break ;
}
tag = bi - > getRuleStatus ( ) ;
if ( tag < tag_lo [ i ] | | tag > = tag_hi [ i ] ) {
errln ( " FAIL: incorrect tag value %d at position %d " , tag , pos ) ;
break ;
}
2007-08-16 23:14:06 +00:00
2004-03-05 22:37:54 +00:00
// Check that we get the same tag values from getRuleStatusVec()
int32_t vec [ 10 ] ;
int t = bi - > getRuleStatusVec ( vec , 10 , status ) ;
TEST_ASSERT_SUCCESS ( status ) ;
TEST_ASSERT ( t = = 1 ) ;
TEST_ASSERT ( vec [ 0 ] = = tag ) ;
2002-07-30 19:09:14 +00:00
}
}
delete bi ;
2003-10-03 22:25:26 +00:00
// Now test line break status. This test mostly is to confirm that the status constants
// are correctly declared in the header.
testString1 = " test line. \n " ;
// break type s s h
2013-06-14 00:47:11 +00:00
bi = BreakIterator : : createLineInstance ( Locale : : getEnglish ( ) , status ) ;
2003-10-03 22:25:26 +00:00
if ( U_FAILURE ( status ) ) {
2009-06-12 19:34:21 +00:00
errcheckln ( status , " failed to create word break iterator. - %s " , u_errorName ( status ) ) ;
2003-10-03 22:25:26 +00:00
} else {
int32_t i = 0 ;
int32_t pos , tag ;
UBool success ;
bi - > setText ( testString1 ) ;
pos = bi - > current ( ) ;
tag = bi - > getRuleStatus ( ) ;
for ( i = 0 ; i < 3 ; i + + ) {
switch ( i ) {
case 0 :
success = pos = = 0 & & tag = = UBRK_LINE_SOFT ; break ;
case 1 :
success = pos = = 5 & & tag = = UBRK_LINE_SOFT ; break ;
case 2 :
success = pos = = 12 & & tag = = UBRK_LINE_HARD ; break ;
default :
success = FALSE ; break ;
}
if ( success = = FALSE ) {
errln ( " Fail: incorrect word break status or position. i=%d, pos=%d, tag=%d " ,
i , pos , tag ) ;
break ;
}
pos = bi - > next ( ) ;
tag = bi - > getRuleStatus ( ) ;
}
if ( UBRK_LINE_SOFT > = UBRK_LINE_SOFT_LIMIT | |
UBRK_LINE_HARD > = UBRK_LINE_HARD_LIMIT | |
2010-07-12 18:03:29 +00:00
( UBRK_LINE_HARD > UBRK_LINE_SOFT & & UBRK_LINE_HARD < UBRK_LINE_SOFT_LIMIT ) ) {
2003-10-03 22:25:26 +00:00
errln ( " UBRK_LINE_* constants from header are inconsistent. " ) ;
}
}
delete bi ;
2002-07-30 19:09:14 +00:00
}
2002-07-12 01:30:23 +00:00
2004-03-05 22:37:54 +00:00
//
// TestRuleStatusVec
// Test the vector form of break rule status.
//
void RBBIAPITest : : TestRuleStatusVec ( ) {
2008-06-17 00:55:35 +00:00
UnicodeString rulesString ( " [A-N]{100}; \n "
2004-03-05 22:37:54 +00:00
" [a-w]{200}; \n "
" [ \\ p{L}]{300}; \n "
" [ \\ p{N}]{400}; \n "
" [0-5]{500}; \n "
2008-06-17 00:55:35 +00:00
" !.*; \n " , - 1 , US_INV ) ;
2004-03-05 22:37:54 +00:00
UnicodeString testString1 = " Aapz5? " ;
int32_t statusVals [ 10 ] ;
int32_t numStatuses ;
int32_t pos ;
UErrorCode status = U_ZERO_ERROR ;
UParseError parseError ;
2007-08-16 23:14:06 +00:00
2004-03-05 22:37:54 +00:00
RuleBasedBreakIterator * bi = new RuleBasedBreakIterator ( rulesString , parseError , status ) ;
2009-06-12 19:34:21 +00:00
if ( U_FAILURE ( status ) ) {
dataerrln ( " Failure at file %s, line %d, error = %s " , __FILE__ , __LINE__ , u_errorName ( status ) ) ;
} else {
2004-03-05 22:37:54 +00:00
bi - > setText ( testString1 ) ;
// A
pos = bi - > next ( ) ;
TEST_ASSERT ( pos = = 1 ) ;
numStatuses = bi - > getRuleStatusVec ( statusVals , 10 , status ) ;
TEST_ASSERT_SUCCESS ( status ) ;
TEST_ASSERT ( numStatuses = = 2 ) ;
TEST_ASSERT ( statusVals [ 0 ] = = 100 ) ;
TEST_ASSERT ( statusVals [ 1 ] = = 300 ) ;
// a
pos = bi - > next ( ) ;
TEST_ASSERT ( pos = = 2 ) ;
numStatuses = bi - > getRuleStatusVec ( statusVals , 10 , status ) ;
TEST_ASSERT_SUCCESS ( status ) ;
TEST_ASSERT ( numStatuses = = 2 ) ;
TEST_ASSERT ( statusVals [ 0 ] = = 200 ) ;
TEST_ASSERT ( statusVals [ 1 ] = = 300 ) ;
// p
pos = bi - > next ( ) ;
TEST_ASSERT ( pos = = 3 ) ;
numStatuses = bi - > getRuleStatusVec ( statusVals , 10 , status ) ;
TEST_ASSERT_SUCCESS ( status ) ;
TEST_ASSERT ( numStatuses = = 2 ) ;
TEST_ASSERT ( statusVals [ 0 ] = = 200 ) ;
TEST_ASSERT ( statusVals [ 1 ] = = 300 ) ;
// z
pos = bi - > next ( ) ;
TEST_ASSERT ( pos = = 4 ) ;
numStatuses = bi - > getRuleStatusVec ( statusVals , 10 , status ) ;
TEST_ASSERT_SUCCESS ( status ) ;
TEST_ASSERT ( numStatuses = = 1 ) ;
TEST_ASSERT ( statusVals [ 0 ] = = 300 ) ;
// 5
pos = bi - > next ( ) ;
TEST_ASSERT ( pos = = 5 ) ;
numStatuses = bi - > getRuleStatusVec ( statusVals , 10 , status ) ;
TEST_ASSERT_SUCCESS ( status ) ;
TEST_ASSERT ( numStatuses = = 2 ) ;
TEST_ASSERT ( statusVals [ 0 ] = = 400 ) ;
TEST_ASSERT ( statusVals [ 1 ] = = 500 ) ;
// ?
pos = bi - > next ( ) ;
TEST_ASSERT ( pos = = 6 ) ;
numStatuses = bi - > getRuleStatusVec ( statusVals , 10 , status ) ;
TEST_ASSERT_SUCCESS ( status ) ;
TEST_ASSERT ( numStatuses = = 1 ) ;
TEST_ASSERT ( statusVals [ 0 ] = = 0 ) ;
2004-03-08 16:53:59 +00:00
//
2007-08-16 23:14:06 +00:00
// Check buffer overflow error handling. Char == A
2004-03-08 16:53:59 +00:00
//
bi - > first ( ) ;
pos = bi - > next ( ) ;
TEST_ASSERT ( pos = = 1 ) ;
memset ( statusVals , - 1 , sizeof ( statusVals ) ) ;
numStatuses = bi - > getRuleStatusVec ( statusVals , 0 , status ) ;
TEST_ASSERT ( status = = U_BUFFER_OVERFLOW_ERROR ) ;
TEST_ASSERT ( numStatuses = = 2 ) ;
TEST_ASSERT ( statusVals [ 0 ] = = - 1 ) ;
status = U_ZERO_ERROR ;
memset ( statusVals , - 1 , sizeof ( statusVals ) ) ;
numStatuses = bi - > getRuleStatusVec ( statusVals , 1 , status ) ;
TEST_ASSERT ( status = = U_BUFFER_OVERFLOW_ERROR ) ;
TEST_ASSERT ( numStatuses = = 2 ) ;
TEST_ASSERT ( statusVals [ 0 ] = = 100 ) ;
TEST_ASSERT ( statusVals [ 1 ] = = - 1 ) ;
status = U_ZERO_ERROR ;
memset ( statusVals , - 1 , sizeof ( statusVals ) ) ;
numStatuses = bi - > getRuleStatusVec ( statusVals , 2 , status ) ;
TEST_ASSERT_SUCCESS ( status ) ;
TEST_ASSERT ( numStatuses = = 2 ) ;
TEST_ASSERT ( statusVals [ 0 ] = = 100 ) ;
TEST_ASSERT ( statusVals [ 1 ] = = 300 ) ;
TEST_ASSERT ( statusVals [ 2 ] = = - 1 ) ;
2004-03-05 22:37:54 +00:00
}
delete bi ;
}
2002-08-29 00:28:11 +00:00
//
// Bug 2190 Regression test. Builder crash on rule consisting of only a
// $variable reference
void RBBIAPITest : : TestBug2190 ( ) {
UnicodeString rulesString1 = " $aaa = abcd; \n "
" $bbb = $aaa; \n "
" $bbb; \n " ;
UnicodeString testString1 = " abcdabcd " ;
// 01234567890
int32_t bounds1 [ ] = { 0 , 4 , 8 } ;
UErrorCode status = U_ZERO_ERROR ;
UParseError parseError ;
2007-08-16 23:14:06 +00:00
2002-08-29 00:28:11 +00:00
RuleBasedBreakIterator * bi = new RuleBasedBreakIterator ( rulesString1 , parseError , status ) ;
if ( U_FAILURE ( status ) ) {
2009-06-12 19:34:21 +00:00
dataerrln ( " Fail : in construction - %s " , u_errorName ( status ) ) ;
2002-08-29 00:28:11 +00:00
} else {
bi - > setText ( testString1 ) ;
doBoundaryTest ( * bi , testString1 , bounds1 ) ;
}
delete bi ;
}
2002-11-08 00:22:18 +00:00
void RBBIAPITest : : TestRegistration ( ) {
2004-07-18 02:02:06 +00:00
# if !UCONFIG_NO_SERVICE
2003-06-12 01:47:15 +00:00
UErrorCode status = U_ZERO_ERROR ;
2006-03-23 00:54:12 +00:00
BreakIterator * ja_word = BreakIterator : : createWordInstance ( " ja_JP " , status ) ;
2003-06-12 01:47:15 +00:00
// ok to not delete these if we exit because of error?
2006-03-23 00:54:12 +00:00
BreakIterator * ja_char = BreakIterator : : createCharacterInstance ( " ja_JP " , status ) ;
2003-06-12 01:47:15 +00:00
BreakIterator * root_word = BreakIterator : : createWordInstance ( " " , status ) ;
BreakIterator * root_char = BreakIterator : : createCharacterInstance ( " " , status ) ;
2009-06-12 19:34:21 +00:00
if ( status = = U_MISSING_RESOURCE_ERROR | | status = = U_FILE_ACCESS_ERROR ) {
dataerrln ( " Error creating instances of break interactors - %s " , u_errorName ( status ) ) ;
2012-08-16 23:01:49 +00:00
2009-06-12 19:34:21 +00:00
delete ja_word ;
delete ja_char ;
delete root_word ;
delete root_char ;
return ;
}
2007-08-16 23:14:06 +00:00
2006-03-23 00:54:12 +00:00
URegistryKey key = BreakIterator : : registerInstance ( ja_word , " xx " , UBRK_WORD , status ) ;
2003-06-12 01:47:15 +00:00
{
2012-08-16 23:01:49 +00:00
#if 0 // With a dictionary based word breaking, ja_word is identical to root.
2006-03-23 00:54:12 +00:00
if ( ja_word & & * ja_word = = * root_word ) {
errln ( " japan not different from root " ) ;
2003-06-12 01:47:15 +00:00
}
2012-08-16 23:01:49 +00:00
# endif
2002-11-15 23:52:14 +00:00
}
2007-08-16 23:14:06 +00:00
2003-06-12 01:47:15 +00:00
{
BreakIterator * result = BreakIterator : : createWordInstance ( " xx_XX " , status ) ;
2004-05-18 06:39:59 +00:00
UBool fail = TRUE ;
if ( result ) {
2006-03-23 00:54:12 +00:00
fail = * result ! = * ja_word ;
2004-05-18 06:39:59 +00:00
}
2003-06-12 01:47:15 +00:00
delete result ;
if ( fail ) {
errln ( " bad result for xx_XX/word " ) ;
}
2002-11-08 00:22:18 +00:00
}
2007-08-16 23:14:06 +00:00
2003-06-12 01:47:15 +00:00
{
2006-03-23 00:54:12 +00:00
BreakIterator * result = BreakIterator : : createCharacterInstance ( " ja_JP " , status ) ;
2004-05-18 06:39:59 +00:00
UBool fail = TRUE ;
if ( result ) {
2006-03-23 00:54:12 +00:00
fail = * result ! = * ja_char ;
2004-05-18 06:39:59 +00:00
}
2003-06-12 01:47:15 +00:00
delete result ;
if ( fail ) {
2006-03-23 00:54:12 +00:00
errln ( " bad result for ja_JP/char " ) ;
2003-06-12 01:47:15 +00:00
}
}
2007-08-16 23:14:06 +00:00
2003-06-12 01:47:15 +00:00
{
BreakIterator * result = BreakIterator : : createCharacterInstance ( " xx_XX " , status ) ;
2004-05-18 06:39:59 +00:00
UBool fail = TRUE ;
if ( result ) {
fail = * result ! = * root_char ;
}
2003-06-12 01:47:15 +00:00
delete result ;
if ( fail ) {
errln ( " bad result for xx_XX/char " ) ;
}
}
2007-08-16 23:14:06 +00:00
2003-06-12 01:47:15 +00:00
{
StringEnumeration * avail = BreakIterator : : getAvailableLocales ( ) ;
UBool found = FALSE ;
const UnicodeString * p ;
while ( ( p = avail - > snext ( status ) ) ) {
if ( p - > compare ( " xx " ) = = 0 ) {
found = TRUE ;
break ;
}
}
delete avail ;
if ( ! found ) {
errln ( " did not find test locale " ) ;
}
}
2007-08-16 23:14:06 +00:00
2003-06-12 01:47:15 +00:00
{
UBool unreg = BreakIterator : : unregister ( key , status ) ;
if ( ! unreg ) {
errln ( " unable to unregister " ) ;
}
}
2007-08-16 23:14:06 +00:00
2003-06-12 01:47:15 +00:00
{
2005-01-01 21:55:07 +00:00
BreakIterator * result = BreakIterator : : createWordInstance ( " en_US " , status ) ;
2003-06-12 01:47:15 +00:00
BreakIterator * root = BreakIterator : : createWordInstance ( " " , status ) ;
2004-05-18 06:39:59 +00:00
UBool fail = TRUE ;
if ( root ) {
fail = * root ! = * result ;
}
2003-06-12 01:47:15 +00:00
delete root ;
delete result ;
if ( fail ) {
errln ( " did not get root break " ) ;
}
}
2007-08-16 23:14:06 +00:00
2003-06-12 01:47:15 +00:00
{
StringEnumeration * avail = BreakIterator : : getAvailableLocales ( ) ;
UBool found = FALSE ;
const UnicodeString * p ;
while ( ( p = avail - > snext ( status ) ) ) {
if ( p - > compare ( " xx " ) = = 0 ) {
found = TRUE ;
break ;
}
}
delete avail ;
if ( found ) {
errln ( " found test locale " ) ;
}
}
2007-08-16 23:14:06 +00:00
2003-06-12 01:47:15 +00:00
{
int32_t count ;
UBool foundLocale = FALSE ;
const Locale * avail = BreakIterator : : getAvailableLocales ( count ) ;
for ( int i = 0 ; i < count ; i + + ) {
if ( avail [ i ] = = Locale : : getEnglish ( ) ) {
foundLocale = TRUE ;
break ;
}
}
if ( foundLocale = = FALSE ) {
errln ( " BreakIterator::getAvailableLocales(&count), failed to find EN. " ) ;
}
}
2007-08-16 23:14:06 +00:00
2006-03-23 00:54:12 +00:00
// ja_word was adopted by factory
delete ja_char ;
2003-06-12 01:47:15 +00:00
delete root_word ;
delete root_char ;
2004-07-18 02:02:06 +00:00
# endif
2002-11-08 00:22:18 +00:00
}
2002-08-29 00:28:11 +00:00
2003-02-17 18:06:42 +00:00
void RBBIAPITest : : RoundtripRule ( const char * dataFile ) {
UErrorCode status = U_ZERO_ERROR ;
UParseError parseError ;
2004-11-11 23:34:58 +00:00
parseError . line = 0 ;
parseError . offset = 0 ;
2009-11-20 06:28:25 +00:00
LocalUDataMemoryPointer data ( udata_open ( U_ICUDATA_BRKITR , " brk " , dataFile , & status ) ) ;
2003-02-17 18:06:42 +00:00
uint32_t length ;
const UChar * builtSource ;
const uint8_t * rbbiRules ;
const uint8_t * builtRules ;
if ( U_FAILURE ( status ) ) {
2009-06-12 19:34:21 +00:00
errcheckln ( status , " Can't open \" %s \" - %s " , dataFile , u_errorName ( status ) ) ;
2003-02-17 18:06:42 +00:00
return ;
}
2009-11-20 06:28:25 +00:00
builtRules = ( const uint8_t * ) udata_getMemory ( data . getAlias ( ) ) ;
2003-02-17 18:06:42 +00:00
builtSource = ( const UChar * ) ( builtRules + ( ( RBBIDataHeader * ) builtRules ) - > fRuleSource ) ;
RuleBasedBreakIterator * brkItr = new RuleBasedBreakIterator ( builtSource , parseError , status ) ;
if ( U_FAILURE ( status ) ) {
errln ( " createRuleBasedBreakIterator: ICU Error \" %s \" at line %d, column %d \n " ,
u_errorName ( status ) , parseError . line , parseError . offset ) ;
return ;
} ;
rbbiRules = brkItr - > getBinaryRules ( length ) ;
logln ( " Comparing \" %s \" len=%d " , dataFile , length ) ;
if ( memcmp ( builtRules , rbbiRules , ( int32_t ) length ) ! = 0 ) {
errln ( " Built rules and rebuilt rules are different %s " , dataFile ) ;
return ;
}
delete brkItr ;
}
void RBBIAPITest : : TestRoundtripRules ( ) {
RoundtripRule ( " word " ) ;
RoundtripRule ( " title " ) ;
RoundtripRule ( " sent " ) ;
RoundtripRule ( " line " ) ;
RoundtripRule ( " char " ) ;
if ( ! quick ) {
2006-07-07 21:27:59 +00:00
RoundtripRule ( " word_POSIX " ) ;
2003-02-17 18:06:42 +00:00
}
}
2008-09-25 05:48:27 +00:00
// Try out the RuleBasedBreakIterator constructors that take RBBIDataHeader*
// (these are protected so we access them via a local class RBBIWithProtectedFunctions).
// This is just a sanity check, not a thorough test (e.g. we don't check that the
// first delete actually frees rulesCopy).
void RBBIAPITest : : TestCreateFromRBBIData ( ) {
// Get some handy RBBIData
const char * brkName = " word " ; // or "sent", "line", "char", etc.
UErrorCode status = U_ZERO_ERROR ;
2009-11-20 06:28:25 +00:00
LocalUDataMemoryPointer data ( udata_open ( U_ICUDATA_BRKITR , " brk " , brkName , & status ) ) ;
2008-09-25 05:48:27 +00:00
if ( U_SUCCESS ( status ) ) {
2009-11-20 06:28:25 +00:00
const RBBIDataHeader * builtRules = ( const RBBIDataHeader * ) udata_getMemory ( data . getAlias ( ) ) ;
2008-09-25 05:48:27 +00:00
uint32_t length = builtRules - > fLength ;
RBBIWithProtectedFunctions * brkItr ;
// Try the memory-adopting constructor, need to copy the data first
RBBIDataHeader * rulesCopy = ( RBBIDataHeader * ) uprv_malloc ( length ) ;
if ( rulesCopy ) {
uprv_memcpy ( rulesCopy , builtRules , length ) ;
brkItr = new RBBIWithProtectedFunctions ( rulesCopy , status ) ;
if ( U_SUCCESS ( status ) ) {
delete brkItr ; // this should free rulesCopy
} else {
errln ( " create RuleBasedBreakIterator from RBBIData (adopted): ICU Error \" %s \" \n " , u_errorName ( status ) ) ;
status = U_ZERO_ERROR ; // reset for the next test
uprv_free ( rulesCopy ) ;
}
}
// Now try the non-adopting constructor
brkItr = new RBBIWithProtectedFunctions ( builtRules , RBBIWithProtectedFunctions : : kDontAdopt , status ) ;
if ( U_SUCCESS ( status ) ) {
delete brkItr ; // this should NOT attempt to free builtRules
if ( builtRules - > fLength ! = length ) { // sanity check
errln ( " create RuleBasedBreakIterator from RBBIData (non-adopted): delete affects data \n " ) ;
}
} else {
errln ( " create RuleBasedBreakIterator from RBBIData (non-adopted): ICU Error \" %s \" \n " , u_errorName ( status ) ) ;
}
}
2011-04-29 17:49:01 +00:00
// getBinaryRules() and RuleBasedBreakIterator(uint8_t binaryRules, ...)
//
status = U_ZERO_ERROR ;
RuleBasedBreakIterator * rb = ( RuleBasedBreakIterator * ) BreakIterator : : createWordInstance ( Locale : : getEnglish ( ) , status ) ;
2011-05-10 22:01:46 +00:00
if ( rb = = NULL | | U_FAILURE ( status ) ) {
dataerrln ( " Unable to create BreakIterator::createWordInstance (Locale::getEnglish) - %s " , u_errorName ( status ) ) ;
} else {
uint32_t length ;
const uint8_t * rules = rb - > getBinaryRules ( length ) ;
RuleBasedBreakIterator * rb2 = new RuleBasedBreakIterator ( rules , length , status ) ;
TEST_ASSERT_SUCCESS ( status ) ;
TEST_ASSERT ( * rb = = * rb2 ) ;
UnicodeString words = " one two three " ;
rb2 - > setText ( words ) ;
int wordCounter = 0 ;
while ( rb2 - > next ( ) ! = UBRK_DONE ) {
wordCounter + + ;
}
TEST_ASSERT ( wordCounter = = 6 ) ;
2011-04-29 17:49:01 +00:00
2011-05-10 22:01:46 +00:00
status = U_ZERO_ERROR ;
RuleBasedBreakIterator * rb3 = new RuleBasedBreakIterator ( rules , length - 1 , status ) ;
TEST_ASSERT ( status = = U_ILLEGAL_ARGUMENT_ERROR ) ;
2011-04-29 17:49:01 +00:00
2011-05-10 22:01:46 +00:00
delete rb ;
delete rb2 ;
delete rb3 ;
}
2008-09-25 05:48:27 +00:00
}
2011-06-09 22:49:40 +00:00
void RBBIAPITest : : TestRefreshInputText ( ) {
/*
* RefreshInput changes out the input of a Break Iterator without
* changing anything else in the iterator ' s state . Used with Java JNI ,
* when Java moves the underlying string storage . This test
* runs BreakIterator : : next ( ) repeatedly , moving the text in the middle of the sequence .
* The right set of boundaries should still be found .
*/
UChar testStr [ ] = { 0x20 , 0x41 , 0x20 , 0x42 , 0x20 , 0x43 , 0x20 , 0x44 , 0x0 } ; /* = " A B C D" */
UChar movedStr [ ] = { 0x20 , 0x20 , 0x20 , 0x20 , 0x20 , 0x20 , 0x20 , 0x20 , 0 } ;
UErrorCode status = U_ZERO_ERROR ;
UText ut1 = UTEXT_INITIALIZER ;
UText ut2 = UTEXT_INITIALIZER ;
RuleBasedBreakIterator * bi = ( RuleBasedBreakIterator * ) BreakIterator : : createLineInstance ( Locale : : getEnglish ( ) , status ) ;
TEST_ASSERT_SUCCESS ( status ) ;
utext_openUChars ( & ut1 , testStr , - 1 , & status ) ;
TEST_ASSERT_SUCCESS ( status ) ;
2011-08-30 19:11:32 +00:00
if ( U_SUCCESS ( status ) ) {
bi - > setText ( & ut1 , status ) ;
TEST_ASSERT_SUCCESS ( status ) ;
2011-06-09 22:49:40 +00:00
2011-08-30 19:11:32 +00:00
/* Line boundaries will occur before each letter in the original string */
TEST_ASSERT ( 1 = = bi - > next ( ) ) ;
TEST_ASSERT ( 3 = = bi - > next ( ) ) ;
2011-06-09 22:49:40 +00:00
2011-08-30 19:11:32 +00:00
/* Move the string, kill the original string. */
u_strcpy ( movedStr , testStr ) ;
u_memset ( testStr , 0x20 , u_strlen ( testStr ) ) ;
utext_openUChars ( & ut2 , movedStr , - 1 , & status ) ;
TEST_ASSERT_SUCCESS ( status ) ;
RuleBasedBreakIterator * returnedBI = & bi - > refreshInputText ( & ut2 , status ) ;
TEST_ASSERT_SUCCESS ( status ) ;
TEST_ASSERT ( bi = = returnedBI ) ;
/* Find the following matches, now working in the moved string. */
TEST_ASSERT ( 5 = = bi - > next ( ) ) ;
TEST_ASSERT ( 7 = = bi - > next ( ) ) ;
TEST_ASSERT ( 8 = = bi - > next ( ) ) ;
TEST_ASSERT ( UBRK_DONE = = bi - > next ( ) ) ;
utext_close ( & ut1 ) ;
utext_close ( & ut2 ) ;
}
2011-06-09 22:49:40 +00:00
delete bi ;
}
2000-01-17 20:59:08 +00:00
//---------------------------------------------
// runIndexedTest
//---------------------------------------------
2000-08-23 19:11:16 +00:00
void RBBIAPITest : : runIndexedTest ( int32_t index , UBool exec , const char * & name , char * /*par*/ )
2000-01-17 20:59:08 +00:00
{
if ( exec ) logln ( ( UnicodeString ) " TestSuite RuleBasedBreakIterator API " ) ;
switch ( index ) {
// case 0: name = "TestConstruction"; if (exec) TestConstruction(); break;
2009-08-04 21:09:17 +00:00
# if !UCONFIG_NO_FILE_IO
2002-08-29 00:28:11 +00:00
case 0 : name = " TestCloneEquals " ; if ( exec ) TestCloneEquals ( ) ; break ;
case 1 : name = " TestgetRules " ; if ( exec ) TestgetRules ( ) ; break ;
case 2 : name = " TestHashCode " ; if ( exec ) TestHashCode ( ) ; break ;
case 3 : name = " TestGetSetAdoptText " ; if ( exec ) TestGetSetAdoptText ( ) ; break ;
2003-05-16 22:05:35 +00:00
case 4 : name = " TestIteration " ; if ( exec ) TestIteration ( ) ; break ;
2009-08-04 21:09:17 +00:00
# else
case 0 : case 1 : case 2 : case 3 : case 4 : name = " skip " ; break ;
# endif
2005-01-01 21:55:07 +00:00
case 5 : name = " TestBuilder " ; if ( exec ) TestBuilder ( ) ; break ;
case 6 : name = " TestQuoteGrouping " ; if ( exec ) TestQuoteGrouping ( ) ; break ;
2009-08-04 21:09:17 +00:00
case 7 : name = " TestRuleStatusVec " ; if ( exec ) TestRuleStatusVec ( ) ; break ;
case 8 : name = " TestBug2190 " ; if ( exec ) TestBug2190 ( ) ; break ;
# if !UCONFIG_NO_FILE_IO
case 9 : name = " TestRegistration " ; if ( exec ) TestRegistration ( ) ; break ;
case 10 : name = " TestBoilerPlate " ; if ( exec ) TestBoilerPlate ( ) ; break ;
case 11 : name = " TestRuleStatus " ; if ( exec ) TestRuleStatus ( ) ; break ;
2005-01-01 21:55:07 +00:00
case 12 : name = " TestRoundtripRules " ; if ( exec ) TestRoundtripRules ( ) ; break ;
2008-09-25 05:48:27 +00:00
case 13 : name = " TestCreateFromRBBIData " ; if ( exec ) TestCreateFromRBBIData ( ) ; break ;
2009-08-04 21:09:17 +00:00
# else
case 9 : case 10 : case 11 : case 12 : case 13 : name = " skip " ; break ;
# endif
2011-06-09 22:49:40 +00:00
case 14 : name = " TestRefreshInputText " ; if ( exec ) TestRefreshInputText ( ) ; break ;
2002-08-29 00:28:11 +00:00
2003-11-05 23:50:39 +00:00
default : name = " " ; break ; // needed to end loop
2000-01-17 20:59:08 +00:00
}
}
//---------------------------------------------
//Internal subroutines
//---------------------------------------------
2013-06-14 00:47:11 +00:00
void RBBIAPITest : : doBoundaryTest ( BreakIterator & bi , UnicodeString & text , int32_t * boundaries ) {
2000-01-17 20:59:08 +00:00
logln ( ( UnicodeString ) " testIsBoundary(): " ) ;
int32_t p = 0 ;
2000-05-18 22:08:39 +00:00
UBool isB ;
2000-01-17 20:59:08 +00:00
for ( int32_t i = 0 ; i < text . length ( ) ; i + + ) {
isB = bi . isBoundary ( i ) ;
logln ( ( UnicodeString ) " bi.isBoundary( " + i + " ) -> " + isB ) ;
if ( i = = boundaries [ p ] ) {
if ( ! isB )
errln ( ( UnicodeString ) " Wrong result from isBoundary() for " + i + ( UnicodeString ) " : expected true, got false " ) ;
p + + ;
}
else {
if ( isB )
errln ( ( UnicodeString ) " Wrong result from isBoundary() for " + i + ( UnicodeString ) " : expected false, got true " ) ;
}
}
}
void RBBIAPITest : : doTest ( UnicodeString & testString , int32_t start , int32_t gotoffset , int32_t expectedOffset , const char * expectedString ) {
UnicodeString selected ;
2000-08-14 21:42:36 +00:00
UnicodeString expected = CharsToUnicodeString ( expectedString ) ;
2000-01-17 20:59:08 +00:00
if ( gotoffset ! = expectedOffset )
errln ( ( UnicodeString ) " ERROR:****returned # " + gotoffset + ( UnicodeString ) " instead of # " + expectedOffset ) ;
if ( start < = gotoffset ) {
2007-08-16 23:14:06 +00:00
testString . extractBetween ( start , gotoffset , selected ) ;
2000-01-17 20:59:08 +00:00
}
else {
testString . extractBetween ( gotoffset , start , selected ) ;
}
if ( selected . compare ( expected ) ! = 0 )
errln ( prettify ( ( UnicodeString ) " ERROR:****selected \" " + selected + " \" instead of \" " + expected + " \" " ) ) ;
else
logln ( prettify ( " ****selected \" " + selected + " \" " ) ) ;
2000-08-14 21:42:36 +00:00
}
2000-01-17 20:59:08 +00:00
2008-09-25 05:48:27 +00:00
//---------------------------------------------
//RBBIWithProtectedFunctions class functions
//---------------------------------------------
RBBIWithProtectedFunctions : : RBBIWithProtectedFunctions ( RBBIDataHeader * data , UErrorCode & status )
: RuleBasedBreakIterator ( data , status )
{
}
RBBIWithProtectedFunctions : : RBBIWithProtectedFunctions ( const RBBIDataHeader * data , enum EDontAdopt , UErrorCode & status )
: RuleBasedBreakIterator ( data , RuleBasedBreakIterator : : kDontAdopt , status )
{
}
2002-09-21 00:43:14 +00:00
# endif /* #if !UCONFIG_NO_BREAK_ITERATION */