From cbe883535bfd7ce473fe39f73dba1b2fdf6b9ba2 Mon Sep 17 00:00:00 2001 From: John Fitzpatrick Date: Mon, 28 Feb 2000 18:04:30 +0000 Subject: [PATCH] initial checkin X-SVN-Rev: 860 --- .../rbbi/BreakIteratorRules_en_US_TEST.java | 105 ++++++++++++++++++ .../rbbi/BreakIteratorRules_en_US_TEST.java | 105 ++++++++++++++++++ 2 files changed, 210 insertions(+) create mode 100755 icu4j/src/com/ibm/icu/dev/test/rbbi/BreakIteratorRules_en_US_TEST.java create mode 100755 icu4j/src/com/ibm/test/rbbi/BreakIteratorRules_en_US_TEST.java diff --git a/icu4j/src/com/ibm/icu/dev/test/rbbi/BreakIteratorRules_en_US_TEST.java b/icu4j/src/com/ibm/icu/dev/test/rbbi/BreakIteratorRules_en_US_TEST.java new file mode 100755 index 0000000000..61ee141f39 --- /dev/null +++ b/icu4j/src/com/ibm/icu/dev/test/rbbi/BreakIteratorRules_en_US_TEST.java @@ -0,0 +1,105 @@ +/* + * (C) IBM Corp. 1997-1998. All Rights Reserved. + * + * The program is provided "as is" without any warranty express or + * implied, including the warranty of non-infringement and the implied + * warranties of merchantibility and fitness for a particular purpose. + * IBM will not be liable for any damages suffered by you as a result + * of using the Program. In no event will IBM be liable for any + * special, indirect or consequential damages or lost profits even if + * IBM has been advised of the possibility of their occurrence. IBM + * will not be liable for any third party claims against you. + */ +package com.ibm.text.resources; + +import java.util.ListResourceBundle; + +/** + * This resource bundle is included for testing and demonstration purposes only. + * It applies the dictionary-based algorithm to English text that has had all the + * spaces removed. Once we have good test cases for Thai, we will replace this + * with good resource data (and a good dictionary file) for Thai + */ +public class BreakIteratorRules_en_US_TEST extends ListResourceBundle { + public Object[][] getContents() { + return contents; + } + + static final Object[][] contents = { + // names of classes to instantiate for the different kinds of break + // iterator. Notice we're now using DictionaryBasedBreakIterator + // for word and line breaking. + { "BreakIteratorClasses", + new String[] { "RuleBasedBreakIterator", // character-break iterator class + "DictionaryBasedBreakIterator", // word-break iterator class + "DictionaryBasedBreakIterator", // line-break iterator class + "RuleBasedBreakIterator" } // sentence-break iterator class + }, + + // These are the same word-breaking rules as are specified in the default + // resource, except that the Latin letters, apostrophe, and hyphen are + // specified as dictionary characters + { "WordBreakRules", + "=[:Mn::Me::Cf:];" + + "=[a-zA-z\\'\\-];" + + "=[\u3005\u4e00-\u9fa5\uf900-\ufa2d];" + + "=[\u30a1-\u30fa];" + + "=[\u3041-\u3094];" + + "=[\u3099-\u309c];" + + "=[:L:^[]];" + + "=[:N:];" + + "=[:Pd:\u00ad\u2027\\\"\\\'\\.];" + + "=[\\\"\\\'\\,\u066b\\.];" + + "=[:Sc:\\#\\.^\u00a2];" + + "=[\\%\\&\u00a2\u066a\u2030\u2031];" + + "=[\n\u000c\u2028\u2029];" + + "=[:Zs:\t];" + + "=(*(*)*|[a-zA-Z][a-z\\'\\-]*);" + + "=(*(*)*);" + + ".;" + + "{}()*{{}};" + + "()*{{}};" + + "*{\r}{};" + + "[]*;" + + "[]*;" + + "*;" }, + + // These are the same line-breaking rules as are specified in the default + // resource, except that the Latin letters, apostrophe, and hyphen are + // specified as dictionary characters + { "LineBreakRules", + "=[:Mn::Me::Cf:];" + + "=[a-zA-z\\'\\-];" + + "=[\u0003\t\n\f\u2028\u2029];" + + "=[\u00a0\u2007\u2011\ufeff];" + + "=[:Zs::Cc:^[\r]];" + + "=[:Pd:\u00ad^];" + + "=[:Sc::Ps:^\u00a2];" + + "=[:Pe:\\!\\%\\.\\,\\:\\;\\?\u00a2\u00b0\u066a\u2030-\u2034\u2103" + + "\u2105\u2109\u3001\u3002\u3005\u3041\u3043\u3045\u3047\u3049\u3063" + + "\u3083\u3085\u3087\u308e\u3099-\u309e\u30a1\u30a3\u30a5\u30a7\u30a9" + + "\u30c3\u30e3\u30e5\u30e7\u30ee\u30f5\u30f6\u30fc-\u30fe\uff01\uff0e" + + "\uff1f];" + + "=[\u4e00-\u9fa5\uf900-\ufa2d\u3041-\u3094\u30a1-\u30fa^[]];" + + "=[:Nd::No:];" + + "=[\\.\\,];" + + "=[^[\r]];" + + "=([]**(*)*);" + + "=(*|||[a-zA-Z][a-z\\'\\-]*);" + + "=((*|*)*);" + + "=(*);" + + "(*)*{\r}{};" }, + + // these two resources specify the pathnames of the dictionary files to + // use for word breaking and line breaking. Both currently refer to + // a file called english.dict placed in com\ibm\text\resources + // somewhere in the class path. It's important to note that + // english.dict was created for testing purposes only, and doesn't + // come anywhere close to being an exhaustive dictionary of English + // words (basically, it contains all the words in the Declaration of + // Independence, and the Revised Standard Version of the book of Genesis, + // plus a few other words thrown in to show more interesting cases). + { "WordBreakDictionary", "com\\ibm\\text\\resources\\english.dict" }, + { "LineBreakDictionary", "com\\ibm\\text\\resources\\english.dict" } + }; +} diff --git a/icu4j/src/com/ibm/test/rbbi/BreakIteratorRules_en_US_TEST.java b/icu4j/src/com/ibm/test/rbbi/BreakIteratorRules_en_US_TEST.java new file mode 100755 index 0000000000..61ee141f39 --- /dev/null +++ b/icu4j/src/com/ibm/test/rbbi/BreakIteratorRules_en_US_TEST.java @@ -0,0 +1,105 @@ +/* + * (C) IBM Corp. 1997-1998. All Rights Reserved. + * + * The program is provided "as is" without any warranty express or + * implied, including the warranty of non-infringement and the implied + * warranties of merchantibility and fitness for a particular purpose. + * IBM will not be liable for any damages suffered by you as a result + * of using the Program. In no event will IBM be liable for any + * special, indirect or consequential damages or lost profits even if + * IBM has been advised of the possibility of their occurrence. IBM + * will not be liable for any third party claims against you. + */ +package com.ibm.text.resources; + +import java.util.ListResourceBundle; + +/** + * This resource bundle is included for testing and demonstration purposes only. + * It applies the dictionary-based algorithm to English text that has had all the + * spaces removed. Once we have good test cases for Thai, we will replace this + * with good resource data (and a good dictionary file) for Thai + */ +public class BreakIteratorRules_en_US_TEST extends ListResourceBundle { + public Object[][] getContents() { + return contents; + } + + static final Object[][] contents = { + // names of classes to instantiate for the different kinds of break + // iterator. Notice we're now using DictionaryBasedBreakIterator + // for word and line breaking. + { "BreakIteratorClasses", + new String[] { "RuleBasedBreakIterator", // character-break iterator class + "DictionaryBasedBreakIterator", // word-break iterator class + "DictionaryBasedBreakIterator", // line-break iterator class + "RuleBasedBreakIterator" } // sentence-break iterator class + }, + + // These are the same word-breaking rules as are specified in the default + // resource, except that the Latin letters, apostrophe, and hyphen are + // specified as dictionary characters + { "WordBreakRules", + "=[:Mn::Me::Cf:];" + + "=[a-zA-z\\'\\-];" + + "=[\u3005\u4e00-\u9fa5\uf900-\ufa2d];" + + "=[\u30a1-\u30fa];" + + "=[\u3041-\u3094];" + + "=[\u3099-\u309c];" + + "=[:L:^[]];" + + "=[:N:];" + + "=[:Pd:\u00ad\u2027\\\"\\\'\\.];" + + "=[\\\"\\\'\\,\u066b\\.];" + + "=[:Sc:\\#\\.^\u00a2];" + + "=[\\%\\&\u00a2\u066a\u2030\u2031];" + + "=[\n\u000c\u2028\u2029];" + + "=[:Zs:\t];" + + "=(*(*)*|[a-zA-Z][a-z\\'\\-]*);" + + "=(*(*)*);" + + ".;" + + "{}()*{{}};" + + "()*{{}};" + + "*{\r}{};" + + "[]*;" + + "[]*;" + + "*;" }, + + // These are the same line-breaking rules as are specified in the default + // resource, except that the Latin letters, apostrophe, and hyphen are + // specified as dictionary characters + { "LineBreakRules", + "=[:Mn::Me::Cf:];" + + "=[a-zA-z\\'\\-];" + + "=[\u0003\t\n\f\u2028\u2029];" + + "=[\u00a0\u2007\u2011\ufeff];" + + "=[:Zs::Cc:^[\r]];" + + "=[:Pd:\u00ad^];" + + "=[:Sc::Ps:^\u00a2];" + + "=[:Pe:\\!\\%\\.\\,\\:\\;\\?\u00a2\u00b0\u066a\u2030-\u2034\u2103" + + "\u2105\u2109\u3001\u3002\u3005\u3041\u3043\u3045\u3047\u3049\u3063" + + "\u3083\u3085\u3087\u308e\u3099-\u309e\u30a1\u30a3\u30a5\u30a7\u30a9" + + "\u30c3\u30e3\u30e5\u30e7\u30ee\u30f5\u30f6\u30fc-\u30fe\uff01\uff0e" + + "\uff1f];" + + "=[\u4e00-\u9fa5\uf900-\ufa2d\u3041-\u3094\u30a1-\u30fa^[]];" + + "=[:Nd::No:];" + + "=[\\.\\,];" + + "=[^[\r]];" + + "=([]**(*)*);" + + "=(*|||[a-zA-Z][a-z\\'\\-]*);" + + "=((*|*)*);" + + "=(*);" + + "(*)*{\r}{};" }, + + // these two resources specify the pathnames of the dictionary files to + // use for word breaking and line breaking. Both currently refer to + // a file called english.dict placed in com\ibm\text\resources + // somewhere in the class path. It's important to note that + // english.dict was created for testing purposes only, and doesn't + // come anywhere close to being an exhaustive dictionary of English + // words (basically, it contains all the words in the Declaration of + // Independence, and the Revised Standard Version of the book of Genesis, + // plus a few other words thrown in to show more interesting cases). + { "WordBreakDictionary", "com\\ibm\\text\\resources\\english.dict" }, + { "LineBreakDictionary", "com\\ibm\\text\\resources\\english.dict" } + }; +}