From 132489e8477c55d3626ba88a53d74f67989c13a3 Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Mon, 20 Aug 2007 21:27:51 +0000 Subject: [PATCH] ICU-5609 add Hangul type boundaries for UCHAR_HANGUL_SYLLABLE_TYPE; fix UCharacter.getPropertyValueName() for lccc and tccc; replace test hacks and time bombs X-SVN-Rev: 22429 --- .../icu/dev/test/translit/UnicodeSetTest.java | 57 +++++++++++++------ .../com/ibm/icu/impl/UCharacterProperty.java | 9 +++ icu4j/src/com/ibm/icu/lang/UCharacter.java | 4 +- 3 files changed, 53 insertions(+), 17 deletions(-) diff --git a/icu4j/src/com/ibm/icu/dev/test/translit/UnicodeSetTest.java b/icu4j/src/com/ibm/icu/dev/test/translit/UnicodeSetTest.java index 440cae1dae..5289cdeefa 100755 --- a/icu4j/src/com/ibm/icu/dev/test/translit/UnicodeSetTest.java +++ b/icu4j/src/com/ibm/icu/dev/test/translit/UnicodeSetTest.java @@ -44,13 +44,41 @@ public class UnicodeSetTest extends TestFmwk { public static void main(String[] args) throws Exception { new UnicodeSetTest().run(args); } - - + + private static final boolean isCccValue(int ccc) { + switch (ccc) { + case 0: + case 1: + case 7: + case 8: + case 9: + case 200: + case 202: + case 216: + case 218: + case 220: + case 222: + case 224: + case 226: + case 228: + case 230: + case 232: + case 233: + case 234: + case 240: + return true; + default: + return false; + } + } + public void TestPropertyAccess() { // test to see that all of the names work for (int propNum = UProperty.BINARY_START; propNum < UProperty.INT_LIMIT; ++propNum) { + if (propNum >= UProperty.BINARY_LIMIT && propNum < UProperty.INT_START) { // skip the gap + propNum = UProperty.INT_START; + } for (int nameChoice = UProperty.NameChoice.SHORT; nameChoice <= UProperty.NameChoice.LONG; ++nameChoice) { - if (propNum >= UProperty.BINARY_LIMIT && propNum < UProperty.INT_START) continue; // skip the gap String propName; try { propName = UCharacter.getPropertyName(propNum, nameChoice); @@ -72,18 +100,18 @@ public class UnicodeSetTest extends TestFmwk { valueName = UCharacter.getPropertyValueName(propNum, valueNum, nameChoice); if (valueName == null) { if (nameChoice == UProperty.NameChoice.SHORT) continue; // allow non-existent short names - throw new NullPointerException(); + if ((propNum == UProperty.CANONICAL_COMBINING_CLASS || + propNum == UProperty.LEAD_CANONICAL_COMBINING_CLASS || + propNum == UProperty.TRAIL_CANONICAL_COMBINING_CLASS) && + !isCccValue(valueNum)) { + // Only a few of the canonical combining classes have names. + // Otherwise they are just integer values. + continue; + } else { + throw new NullPointerException(); + } } } catch (RuntimeException e1) { - // HACK for now - - if (propNum == 4098 && e1 instanceof NullPointerException) { - if (skipIfBeforeICU(3,8,0)) continue; - } - if (propNum == 4112 || propNum == 4113) { - if (skipIfBeforeICU(3,8,0)) continue; - } - errln("Can't get property value name for: " + "Property (" + propNum + "): " + propName + ", " + "Value (" + valueNum + ") " @@ -110,9 +138,6 @@ public class UnicodeSetTest extends TestFmwk { } } if (collectedErrors.size() != 0) { - if (propNum == 4107 && valueNum == 0 && skipIfBeforeICU(3,8,0)) { - continue; - } errln("Property Value Differs: " + "Property (" + propNum + "): " + propName + ", " + "Value (" + valueNum + "): " + valueName + ", " diff --git a/icu4j/src/com/ibm/icu/impl/UCharacterProperty.java b/icu4j/src/com/ibm/icu/impl/UCharacterProperty.java index 0a650ff136..e9e225619f 100644 --- a/icu4j/src/com/ibm/icu/impl/UCharacterProperty.java +++ b/icu4j/src/com/ibm/icu/impl/UCharacterProperty.java @@ -21,6 +21,8 @@ import com.ibm.icu.text.UTF16; import com.ibm.icu.util.RangeValueIterator; import com.ibm.icu.util.VersionInfo; +import com.ibm.icu.impl.NormalizerImpl; + /** *

Internal class used for Unicode character property database.

*

This classes store binary data read from uprops.icu. @@ -955,6 +957,13 @@ public final class UCharacterProperty set.add(c); } } + + /* Add Hangul type boundaries for UCHAR_HANGUL_SYLLABLE_TYPE. */ + for(c=NormalizerImpl.HANGUL_BASE; c<(NormalizerImpl.HANGUL_BASE+NormalizerImpl.HANGUL_COUNT); c+=NormalizerImpl.JAMO_T_COUNT) { + set.add(c); + set.add(c+1); + } + set.add(c); } public UnicodeSet addPropertyStarts(UnicodeSet set) { diff --git a/icu4j/src/com/ibm/icu/lang/UCharacter.java b/icu4j/src/com/ibm/icu/lang/UCharacter.java index c1ea006f49..0376687b26 100755 --- a/icu4j/src/com/ibm/icu/lang/UCharacter.java +++ b/icu4j/src/com/ibm/icu/lang/UCharacter.java @@ -3978,7 +3978,9 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection int value, int nameChoice) { - if (property == UProperty.CANONICAL_COMBINING_CLASS + if ((property == UProperty.CANONICAL_COMBINING_CLASS + || property == UProperty.LEAD_CANONICAL_COMBINING_CLASS + || property == UProperty.TRAIL_CANONICAL_COMBINING_CLASS) && value >= UCharacter.getIntPropertyMinValue( UProperty.CANONICAL_COMBINING_CLASS) && value <= UCharacter.getIntPropertyMaxValue(