From fa68259b4d70fa8359c35d8196ca0d20771195d4 Mon Sep 17 00:00:00 2001 From: Andy Heninger Date: Fri, 20 Jan 2012 02:35:00 +0000 Subject: [PATCH] ICU-8711 Move Thai and Khmer test data from dedicated file to standard test data file, and remove dedicated code. X-SVN-Rev: 31234 --- icu4c/source/test/intltest/Makefile.in | 4 +- icu4c/source/test/intltest/dicttest.cpp | 140 ------------------ icu4c/source/test/intltest/dicttest.h | 31 ---- icu4c/source/test/intltest/intltest.vcxproj | 2 - .../test/intltest/intltest.vcxproj.filters | 6 - icu4c/source/test/intltest/itrbbi.cpp | 4 +- icu4c/source/test/testdata/rbbitst.txt | 51 ++++++- 7 files changed, 53 insertions(+), 185 deletions(-) delete mode 100644 icu4c/source/test/intltest/dicttest.cpp delete mode 100644 icu4c/source/test/intltest/dicttest.h diff --git a/icu4c/source/test/intltest/Makefile.in b/icu4c/source/test/intltest/Makefile.in index 295e83141b..d4f7e7c313 100644 --- a/icu4c/source/test/intltest/Makefile.in +++ b/icu4c/source/test/intltest/Makefile.in @@ -1,6 +1,6 @@ #****************************************************************************** # -# Copyright (C) 1999-2011, International Business Machines +# Copyright (C) 1999-2012, International Business Machines # Corporation and others. All Rights Reserved. # #****************************************************************************** @@ -48,7 +48,7 @@ tfsmalls.o tmsgfmt.o trcoll.o tscoll.o tsdate.o tsdcfmsy.o tsdtfmsy.o \ tsmthred.o tsnmfmt.o tsputil.o tstnrapi.o tstnorm.o tzbdtest.o \ tzregts.o tztest.o ucdtest.o usettest.o ustrtest.o strcase.o transtst.o strtest.o thcoll.o \ bytestrietest.o ucharstrietest.o \ -itrbbi.o rbbiapts.o dicttest.o rbbitst.o ittrans.o transapi.o cpdtrtst.o \ +itrbbi.o rbbiapts.o rbbitst.o ittrans.o transapi.o cpdtrtst.o \ testutil.o transrt.o trnserr.o normconf.o sfwdchit.o \ jamotest.o srchtest.o reptest.o regextst.o \ itrbnf.o itrbnfrt.o itrbnfp.o ucaconf.o icusvtst.o \ diff --git a/icu4c/source/test/intltest/dicttest.cpp b/icu4c/source/test/intltest/dicttest.cpp deleted file mode 100644 index c985a281bf..0000000000 --- a/icu4c/source/test/intltest/dicttest.cpp +++ /dev/null @@ -1,140 +0,0 @@ -/* -********************************************************************** -* Copyright (C) 2011-2011, International Business Machines Corporation -* and others. All Rights Reserved. -********************************************************************** -************************************************************************ -* Date Name Description -* 05/14/2011 grhoten Creation. -************************************************************************/ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_BREAK_ITERATION - -#include "dicttest.h" -#include "textfile.h" -#include "uvector.h" -#include "unicode/rbbi.h" - -void DictionaryWordTest::TestThaiBreaks() { - UErrorCode status=U_ZERO_ERROR; - BreakIterator* b; - Locale locale = Locale("th"); - int32_t p, index; - UChar c[]= { - 0x0E01, 0x0E39, 0x0020, 0x0E01, 0x0E34, 0x0E19, 0x0E01, 0x0E38, 0x0E49, 0x0E07, 0x0020, 0x0E1B, - 0x0E34, 0x0E49, 0x0E48, 0x0E07, 0x0E2D, 0x0E22, 0x0E39, 0x0E48, 0x0E43, 0x0E19, - 0x0E16, 0x0E49, 0x0E33, 0x0000 - }; - int32_t expectedWordResult[] = { - 2, 3, 6, 10, 11, 15, 17, 20, 22 - }; - int32_t expectedLineResult[] = { - 3, 6, 11, 15, 17, 20, 22 - }; - - int32_t size = u_strlen(c); - UnicodeString text=UnicodeString(c); - - b = BreakIterator::createWordInstance(locale, status); - if (U_FAILURE(status)) { - errcheckln(status, "Unable to create thai word break iterator. - %s", u_errorName(status)); - return; - } - b->setText(text); - p = index = 0; - while ((p=b->next())!=BreakIterator::DONE && p < size) { - if (p != expectedWordResult[index++]) { - errln("Incorrect break given by thai word break iterator. Expected: %d Got: %d", expectedWordResult[index-1], p); - } - } - delete b; - - b = BreakIterator::createLineInstance(locale, status); - if (U_FAILURE(status)) { - errln("Unable to create thai line break iterator."); - return; - } - b->setText(text); - p = index = 0; - while ((p=b->next())!=BreakIterator::DONE && p < size) { - if (p != expectedLineResult[index++]) { - errln("Incorrect break given by thai line break iterator. Expected: %d Got: %d", expectedLineResult[index-1], p); - } - } - - delete b; -} - -#define DICTIONARY_TEST_FILE "wordsegments.txt" - -void DictionaryWordTest::TestWordBoundaries() { - UErrorCode status = U_ZERO_ERROR; - - TextFile phrases(DICTIONARY_TEST_FILE, "UTF8", status); - if (U_FAILURE(status)) { - dataerrln("Can't open "DICTIONARY_TEST_FILE": %s; skipping test", - u_errorName(status)); - return; - } - - // Due to how the word break iterator works, - // scripts for languages that use no spaces should use the correct dictionary by default. - BreakIterator *wb = BreakIterator::createWordInstance("en", status); - if (U_FAILURE(status)) { - dataerrln("Word break iterator can not be opened: %s; skipping test", - u_errorName(status)); - return; - } - - int32_t pos, pIdx; - int32_t testLines = 0; - UnicodeString phrase; - while (phrases.readLineSkippingComments(phrase, status, FALSE) && U_SUCCESS(status)) { - UVector breaks(status); - - for (pIdx = 0; pIdx < phrase.length(); pIdx++) { - if (phrase.charAt(pIdx) == 0x007C /* | */) { - breaks.addElement(pIdx, status); - phrase.remove(pIdx, 1); - } - } - breaks.addElement(pIdx, status); - - wb->setText(phrase); - int32_t brkArrPos = 0; - while ((pos=wb->next())!=BreakIterator::DONE) { - int32_t expectedPos = breaks.elementAti(brkArrPos); - if (expectedPos != pos) { - errln("Incorrect forward word break on line %d. Expected: %d Got: %d", - phrases.getLineNumber(), breaks.elementAt(brkArrPos), pos); - } - brkArrPos++; - } - brkArrPos = breaks.size() - 1; - while ((pos=wb->previous())!=BreakIterator::DONE) { - brkArrPos--; - int32_t expectedPos = breaks.elementAti(brkArrPos); - if (expectedPos != pos) { - errln("Incorrect backward word break on line %d. Expected: %d Got: %d", - phrases.getLineNumber(), breaks.elementAt(brkArrPos), pos); - } - } - testLines++; - } - delete wb; - logln("%d tests were run.", testLines); -} - -void DictionaryWordTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par */) -{ - if (exec) logln("TestSuite DictionaryWordTest: "); - TESTCASE_AUTO_BEGIN; - TESTCASE_AUTO(TestThaiBreaks); - TESTCASE_AUTO(TestWordBoundaries); - TESTCASE_AUTO_END; -} - - -#endif diff --git a/icu4c/source/test/intltest/dicttest.h b/icu4c/source/test/intltest/dicttest.h deleted file mode 100644 index ffce470d57..0000000000 --- a/icu4c/source/test/intltest/dicttest.h +++ /dev/null @@ -1,31 +0,0 @@ -/* -********************************************************************** -* Copyright (C) 2011-2011, International Business Machines Corporation -* and others. All Rights Reserved. -********************************************************************** -************************************************************************ -* Date Name Description -* 05/14/2011 grhoten Creation. -************************************************************************/ - -#ifndef DICTTEST_H -#define DICTTEST_H - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_BREAK_ITERATION - -#include "intltest.h" - - -class DictionaryWordTest: public IntlTest { -public: - void runIndexedTest( int32_t index, UBool exec, const char* &name, char* par = NULL ); - void TestWordBoundaries(); - void TestThaiBreaks(); -}; - -#endif /* #if !UCONFIG_NO_BREAK_ITERATION */ - -#endif - diff --git a/icu4c/source/test/intltest/intltest.vcxproj b/icu4c/source/test/intltest/intltest.vcxproj index 613d635649..5a527d6c81 100644 --- a/icu4c/source/test/intltest/intltest.vcxproj +++ b/icu4c/source/test/intltest/intltest.vcxproj @@ -224,7 +224,6 @@ - @@ -390,7 +389,6 @@ - diff --git a/icu4c/source/test/intltest/intltest.vcxproj.filters b/icu4c/source/test/intltest/intltest.vcxproj.filters index 11738b65b5..962d3df31c 100644 --- a/icu4c/source/test/intltest/intltest.vcxproj.filters +++ b/icu4c/source/test/intltest/intltest.vcxproj.filters @@ -444,9 +444,6 @@ collation - - break iteration - @@ -815,8 +812,5 @@ collation - - break iteration - diff --git a/icu4c/source/test/intltest/itrbbi.cpp b/icu4c/source/test/intltest/itrbbi.cpp index c6deee06c3..b99a405bea 100644 --- a/icu4c/source/test/intltest/itrbbi.cpp +++ b/icu4c/source/test/intltest/itrbbi.cpp @@ -1,6 +1,6 @@ /* ********************************************************************** -* Copyright (C) 1998-2011, International Business Machines Corporation +* Copyright (C) 1998-2012, International Business Machines Corporation * and others. All Rights Reserved. ********************************************************************** */ @@ -19,7 +19,6 @@ #include "itrbbi.h" #include "rbbiapts.h" #include "rbbitst.h" -#include "dicttest.h" #define TESTCLASS(n,classname) \ case n: \ @@ -39,7 +38,6 @@ void IntlTestRBBI::runIndexedTest( int32_t index, UBool exec, const char* &name, switch (index) { TESTCLASS(0, RBBIAPITest); TESTCLASS(1, RBBITest); - TESTCLASS(2, DictionaryWordTest); default: name=""; break; } } diff --git a/icu4c/source/test/testdata/rbbitst.txt b/icu4c/source/test/testdata/rbbitst.txt index 8c185980bf..63c6d7de69 100644 --- a/icu4c/source/test/testdata/rbbitst.txt +++ b/icu4c/source/test/testdata/rbbitst.txt @@ -1,4 +1,4 @@ -# Copyright (c) 2001-2011 International Business Machines +# Copyright (c) 2001-2012 International Business Machines # Corporation and others. All Rights Reserved. # # RBBI Test Data @@ -561,6 +561,55 @@ What is the proper use of the abbreviation pp.? •Yes, I am definatelly 12" tal \u0E14\u0E49\u0E27\u0E22<200>\u0e2b\u0e25\u0e32\u0e22<200>\ \u0e1e\u0e22\u0e32\u0e07\u0e04\u0e4c<200> +# Test data originally from http://bugs.icu-project.org/trac/search?q=r30327 +•กู<200> •กิน<200>กุ้ง<200> •ปิ้่<200>งอ<200>ยู่<200>ใน<200>ถ้ำ<200> + +•\u0E01\u0E39<200>\u0020•\u0E01\u0E34\u0E19<200>\u0E01\u0E38\u0E49\u0E07<200>\ +\u0020•\u0E1B\u0E34\u0E49\u0E48<200>\u0E07\u0E2D<200>\u0E22\u0E39\u0E48<200>\ +\u0E43\u0E19<200>\u0E16\u0E49\u0E33<200> + + +•0E01\u0E39\u0020•\u0E01\u0E34\u0E19•\u0E01\u0E38\u0E49\u0E07\ +\u0020•\u0E1B\u0E34\u0E49\u0E48•\u0E07\u0E2D•\u0E22\u0E39\u0E48•\ +\u0E43\u0E19•\u0E16\u0E49\u0E33• + +########################################################################################## +# +# Khmer Tests +# +########################################################################################## + +# Test data originally from http://bugs.icu-project.org/trac/search?q=r30327 +# from the file testdata/wordsegments.txt + + + +•តើ<200>លោក<200>មក<200>ពី<200>ប្រទេស<200>ណា<200> +•សណ្ដូក<200>ក<200>បណ្ដែត<200>ខ្លួន<200> +•ពណ៌ស<200>ម្ដេច<200>ថា<200>ខ្មៅ<200> +#ប្រយោគ|ពី|របៀប|រួបរួម|និង|ភាព|ផ្សេងគ្នា|ដែល|អាច|ចូល<200> +•ប្រយោគ<200>ពី<200>របៀប<200>ដែល<200>និង<200>ភាព<200>ផ្សេងគ្នា<200>ដែល<200>អាច<200>ចូល<200> +#ប្រយោគ|ពី|របៀប|ជា|មួយ|និង|ភាព|ផ្សេងគ្នា|ដែល|អាច|ចូល<200> +•សូម<200>ចំណាយពេល<200>បន្តិច<200>ដើម្បី<200>អធិស្ឋាន<200>អរព្រះគុណ<200>ដល់<200>ព្រះអង្គ<200> +•ការ<200>ថោកទាប<200>បរិប្បូណ៌<200>ដោយ<200> +•ប្រើប្រាស់<200>ស្អាត<200>ទាំង<200>ចិត្ត<200>សិស្ស<200>នោះ<200> +•បើ<200>អ្នក<200>ប្រព្រឺត្ត<200>អំពើអាក្រក់<200>មុខ<200>ជា<200>មាន<200> +•ប្រដាប់<200>ប្រដា<200>រ<200>រៀនសូត្រ<200>បន្ទប់<200>រៀន<200> +•ដើរតួ<200>មនុស្សគ<200>ឥត<200>បញ្ចេញ<200>យោបល់<200>សោះ<200>ឡើយ<200> +•មិន<200>អាច<200>ឲ្យ<200>យើង<200>ធ្វើ<200>កសិកម្ម<200>បាន<200>ឡើយ<200> +•បន្ត<200>សេចក្ត<200>ទៅទៀត<200> +•ក្រុម<200>ប៉ូលិស<200>បណ្តាក់<200>គ្នា<200> +•គ្មាន<200>សុខ<200>សំរាន្ត<200>ដង<200>ណា<200> +•បាន<200>សុខភាព<200>បរិប្បូណ៌<200> +•ជា<200>មេចោរ<200>ខ្ញុំ<200>នឹង<200>ស្លាប់<200>ទៅវិញ<200>ជា<200>មេចោរ<200> +•ឯ<200>ការ<200>វាយ<200>ផ្ចាល<200>ដែល<200>នាំ<200> +•គេ<200>ដឹក<200>ទៅ<200>សំឡាប់<200> +#អ្នក|ដែល|ជា|មន្ត្រី|ធំ|លើ|គាត់|ទេ<200> +•យក<200>ទៅ<200>សម្លាប់ចោល<200>ស្ងាត់<200> +•ត្រូវ<200>បាន<200>គេ<200>សម្លាប់<200> +•នៅក្នុង<200>ស្រុក<200>ខ្ល<200>ងហ្ស៊ុន<200> + + # # Jitterbug 3671 Test Case #