ICU-0 fixed tools for 4.1 generation

X-SVN-Rev: 17314
This commit is contained in:
Mark Davis 2005-03-10 02:37:20 +00:00
parent f6fffb93ad
commit e8122316a2
10 changed files with 103 additions and 66 deletions

View File

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateData.java,v $
* $Date: 2004/12/15 02:39:25 $
* $Revision: 1.36 $
* $Date: 2005/03/10 02:37:19 $
* $Revision: 1.37 $
*
*******************************************************************************
*/
@ -339,19 +339,21 @@ public class GenerateData implements UCD_Types {
} else if (propAbb.equals("isc")) {
type = MISC_PROP;
}
addLine(sorted, UCD_Names.PROP_TYPE_NAMES[type][1], propAbb, prop);
addLine(sorted, UCD_Names.PROP_TYPE_NAMES[type][1], propAbb, prop, null);
checkDuplicate(duplicates, accumulation, propAbb, prop);
if (!prop.equals(propAbb)) checkDuplicate(duplicates, accumulation, prop, prop);
}
addLine(sorted, UCD_Names.PROP_TYPE_NAMES[MISC_PROP][1], "URS", "Unicode_Radical_Stroke");
addLine(sorted, UCD_Names.PROP_TYPE_NAMES[MISC_PROP][1], "URS", "Unicode_Radical_Stroke", null);
// TODO: merge above
for (int k = 0; k < UCD_Names.SUPER_CATEGORIES.length; ++k) {
valueAbb = Utility.getUnskeleton(UCD_Names.SUPER_CATEGORIES[k][0], false);
value = Utility.getUnskeleton(UCD_Names.SUPER_CATEGORIES[k][1], true);
addLine(sorted, "gc", valueAbb, value, "# " + UCD_Names.SUPER_CATEGORIES[k][2]);
String extra = Utility.getUnskeleton(UCD_Names.SUPER_CATEGORIES[k][1], true);
addLine(sorted, "gc", valueAbb, value, extra, "# " + UCD_Names.SUPER_CATEGORIES[k][2]);
checkDuplicate(duplicates, accumulation, value, "General_Category=" + value);
if (!value.equals(valueAbb)) checkDuplicate(duplicates, accumulation, valueAbb, "General_Category=" + value);
if (extra != null) checkDuplicate(duplicates, accumulation, extra, "General_Category=" + value);
}
/*
@ -360,14 +362,14 @@ public class GenerateData implements UCD_Types {
addLine(sorted, "xx; F ; False");
checkDuplicate(duplicates, accumulation, "F", "xx=False");
*/
addLine(sorted, "qc", UCD_Names.YN_TABLE[1], UCD_Names.YN_TABLE_LONG[1]);
addLine(sorted, "qc", UCD_Names.YN_TABLE[1], UCD_Names.YN_TABLE_LONG[1], null);
checkDuplicate(duplicates, accumulation, UCD_Names.YN_TABLE[1], "qc=" + UCD_Names.YN_TABLE_LONG[1]);
addLine(sorted, "qc", UCD_Names.YN_TABLE[0], UCD_Names.YN_TABLE_LONG[0]);
addLine(sorted, "qc", UCD_Names.YN_TABLE[0], UCD_Names.YN_TABLE_LONG[0], null);
checkDuplicate(duplicates, accumulation, UCD_Names.YN_TABLE[0], "qc=" + UCD_Names.YN_TABLE_LONG[0]);
addLine(sorted, "qc", "M", "Maybe");
addLine(sorted, "qc", "M", "Maybe", null);
checkDuplicate(duplicates, accumulation, "M", "qc=Maybe");
addLine(sorted, "blk", "n/a", Utility.getUnskeleton("no block", true));
addLine(sorted, "blk", "n/a", Utility.getUnskeleton("no block", true), null);
for (int i = 0; i < LIMIT_ENUM; ++i) {
int type = i & 0xFF00;
@ -400,7 +402,7 @@ public class GenerateData implements UCD_Types {
: type != DERIVED && type != BINARY_PROPERTIES
? UCD_Names.PROP_TYPE_NAMES[ENUMERATED_PROP][1]
: UCD_Names.PROP_TYPE_NAMES[up.getValueType()][1],
propAbb, prop);
propAbb, prop, null);
checkDuplicate(duplicates, accumulation, propAbb, prop);
if (!prop.equals(propAbb)) checkDuplicate(duplicates, accumulation, prop, prop);
}
@ -471,9 +473,9 @@ public class GenerateData implements UCD_Types {
if (type == COMBINING_CLASS) {
String num = up.getValue(NUMBER);
num = "; " + Utility.repeat(" ", 3-num.length()) + num;
addLine(sorted, propAbb + num, valueAbb, value);
addLine(sorted, propAbb + num, valueAbb, value, null);
} else if (!valueAbb.equals(UCD_Names.YN_TABLE[1])) {
addLine(sorted, propAbb, valueAbb, value);
addLine(sorted, propAbb, valueAbb, value, null);
}
checkDuplicate(duplicates, accumulation, value, prop + "=" + value);
if (!value.equalsIgnoreCase(valueAbb) && !valueAbb.equals("n/a")) {
@ -483,7 +485,7 @@ public class GenerateData implements UCD_Types {
Iterator blockIterator = Default.ucd().getBlockNames().iterator();
while (blockIterator.hasNext()) {
addLine(sorted, "blk", "n/a", (String)blockIterator.next());
addLine(sorted, "blk", "n/a", (String)blockIterator.next(), null);
}
/*
UCD.BlockData blockData = new UCD.BlockData();
@ -550,20 +552,24 @@ public class GenerateData implements UCD_Types {
Utility.renameIdentical(mostRecent, Utility.getOutputName(newFile), batName[0]);
}
static void addLine(Set sorted, String f1, String f2, String f3) {
addLine(sorted, f1, f2, f3, null);
static void addLine(Set sorted, String f1, String f2, String f3, String f4) {
addLine(sorted, f1, f2, f3, f4, null);
}
static void addLine(Set sorted, String f1, String f2, String f3, String f4) {
static void addLine(Set sorted, String f1, String f2, String f3, String f4, String comment) {
//System.out.println("Adding: " + line);
f1 += Utility.repeat(" ", 3 - f1.length());
f1 += "; " + f2;
f1 += Utility.repeat(" ", 15 - f1.length());
f1 += "; " + f3;
if (f4 != null) {
f1 += Utility.repeat(" ", 50 - f1.length());
f1 += Utility.repeat(" ", 30 - f1.length());
f1 += f4;
}
if (comment != null) {
f1 += Utility.repeat(" ", 50 - f1.length());
f1 += comment;
}
sorted.add(f1);
}

View File

@ -526,7 +526,8 @@ public class MakeUnicodeFiles {
TreeSet sortedSet = new TreeSet(CASELESS_COMPARATOR);
BagFormatter bf = new BagFormatter();
Tabber.MonoTabber mt = new Tabber.MonoTabber()
.add(10,Tabber.LEFT);
.add(10,Tabber.LEFT)
.add(30,Tabber.LEFT);
int count = 0;
for (int i = UnicodeProperty.LIMIT_TYPE - 1; i >= UnicodeProperty.BINARY; --i) {
@ -604,7 +605,7 @@ public class MakeUnicodeFiles {
"gc\t;\tLC\t;\tCased_Letter\t# Ll | Lt | Lu",
"gc\t;\tM\t;\tMark\t# Mc | Me | Mn",
"gc\t;\tN\t;\tNumber\t# Nd | Nl | No",
"gc\t;\tP\t;\tPunctuation\t# Pc | Pd | Pe | Pf | Pi | Po | Ps",
"gc\t;\tP\t;\tPunctuation\t;\tpunct\t# Pc | Pd | Pe | Pf | Pi | Po | Ps",
"gc\t;\tS\t;\tSymbol\t# Sc | Sk | Sm | So",
"gc\t;\tZ\t;\tSeparator\t# Zl | Zp | Zs"};

View File

@ -1,5 +1,5 @@
Generate: .*
DeltaVersion: 9
Generate: Derived.*
DeltaVersion: 12
CopyrightYear: 2005
File: auxiliary/GraphemeBreakProperty
@ -95,13 +95,13 @@ Property: ID_Start
# Derived Property: ID_Start
# Characters that can start an identifier.
# Generated from Lu+Ll+Lt+Lm+Lo+Nl+Other_ID_Start
# NOTE: See UAX #31 for more information
Property: ID_Continue
# Derived Property: ID_Continue
# Characters that can continue an identifier.
# Generated from: ID_Start + Mn+Mc+Nd+Pc + Other_ID_Continue
# NOTE: Cf characters should be filtered out.
# NOTE: See UAX #31 for more information
Property: XID_Start
@ -110,7 +110,7 @@ Property: XID_Start
# Modified as described in UAX #15
# NOTE: Does NOT remove the non-NFKx characters.
# Merely ensures that if isIdentifer(string) then isIdentifier(NFKx(string))
# NOTE: See UAX #31 for more information
Property: XID_Continue
# Derived Property: XID_Continue
@ -119,7 +119,7 @@ Property: XID_Continue
# NOTE: Cf characters should be filtered out.
# NOTE: Does NOT remove the non-NFKx characters.
# Merely ensures that if isIdentifer(string) then isIdentifier(NFKx(string))
# NOTE: See UAX #31 for more information
Property: Default_Ignorable_Code_Point
# Derived Property: Default_Ignorable_Code_Point

View File

@ -31,7 +31,8 @@
# RFC 3066 (or its successor), and replacing '-' by '_'.
#
# A context for a character C is defined by Section 3.13 Default Case Operations,
# on p. 89-90 of The Unicode Standard, Version 4.0, as amended by Unicode 4.0.1.
# on p. 89-90 of The Unicode Standard, Version 4.0, as amended by Unicode 4.1.0,
# as specified in http://www.unicode.org/versions/Unicode4.1.0/
#
# Parsers of this file must be prepared to deal with future additions to this format:
# * Additional contexts

View File

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/TestData.java,v $
* $Date: 2005/02/24 02:59:34 $
* $Revision: 1.17 $
* $Date: 2005/03/10 02:37:19 $
* $Revision: 1.18 $
*
*******************************************************************************
*/

View File

@ -7,6 +7,7 @@ import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.TreeSet;
import com.ibm.icu.dev.test.util.UnicodeMap;
@ -355,7 +356,9 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
}
protected List _getNameAliases(List result) {
addUnique(ucdProperty.getName(UCDProperty.SHORT), result);
addUnique(getName(), result);
String name = getName();
addUnique(name, result);
if (name.equals("White_Space")) addUnique("space", result);
return result;
}
protected List _getValueAliases(String valueAlias, List result) {
@ -437,7 +440,8 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
case UCD_Types.LINE_BREAK>>8: temp = (ucd.getLineBreakID_fromIndex((byte)i, style)); break;
case UCD_Types.JOINING_TYPE>>8: temp = (ucd.getJoiningTypeID_fromIndex((byte)i, style)); break;
case UCD_Types.JOINING_GROUP>>8: temp = (ucd.getJoiningGroupID_fromIndex((byte)i, style)); break;
case UCD_Types.SCRIPT>>8: temp = (ucd.getScriptID_fromIndex((byte)i, style)); titlecase = true;
case UCD_Types.SCRIPT>>8:
temp = (ucd.getScriptID_fromIndex((byte)i, style)); titlecase = true;
if (UnicodeProperty.UNUSED.equals(temp)) continue;
if (temp != null) temp = UCharacter.toTitleCase(Locale.ENGLISH,temp,null);
break;
@ -465,7 +469,10 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
public List _getNameAliases(List result) {
if (result == null) result = new ArrayList();
addUnique(Utility.getUnskeleton(up.getName(UCD_Types.SHORT), false), result);
addUnique(Utility.getUnskeleton(up.getName(UCD_Types.LONG), true), result);
String longName = up.getName(UCD_Types.LONG);
addUnique(Utility.getUnskeleton(longName, true), result);
// hack
if (longName.equals("White_Space")) addUnique("space", result);
return result;
}
@ -476,7 +483,7 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
else if (type == NUMERIC) return result;
else if (type == BINARY) {
UnicodeProperty.addUnique(valueAlias, result);
return lookup(valueAlias, UCD_Names.YN_TABLE_LONG, UCD_Names.YN_TABLE, result);
return lookup(valueAlias, UCD_Names.YN_TABLE_LONG, UCD_Names.YN_TABLE, null, result);
} else if (type == ENUMERATED || type == CATALOG) {
byte style = UCD_Types.LONG;
int prop = propMask>>8;
@ -485,33 +492,33 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
try {
switch (prop) {
case UCD_Types.CATEGORY>>8:
return lookup(valueAlias, UCD_Names.LONG_GENERAL_CATEGORY, UCD_Names.GENERAL_CATEGORY, result);
return lookup(valueAlias, UCD_Names.LONG_GENERAL_CATEGORY, UCD_Names.GENERAL_CATEGORY, UCD_Names.EXTRA_GENERAL_CATEGORY, result);
case UCD_Types.COMBINING_CLASS>>8:
addUnique(String.valueOf(0xFF&Utility.lookup(valueAlias, UCD_Names.LONG_COMBINING_CLASS, true)), result);
return lookup(valueAlias, UCD_Names.LONG_COMBINING_CLASS, UCD_Names.COMBINING_CLASS, result);
return lookup(valueAlias, UCD_Names.LONG_COMBINING_CLASS, UCD_Names.COMBINING_CLASS, null, result);
case UCD_Types.BIDI_CLASS>>8:
return lookup(valueAlias, UCD_Names.LONG_BIDI_CLASS, UCD_Names.BIDI_CLASS, result);
return lookup(valueAlias, UCD_Names.LONG_BIDI_CLASS, UCD_Names.BIDI_CLASS, null, result);
case UCD_Types.DECOMPOSITION_TYPE>>8:
return lookup(valueAlias, UCD_Names.LONG_DECOMPOSITION_TYPE, UCD_Names.DECOMPOSITION_TYPE, result);
return lookup(valueAlias, UCD_Names.LONG_DECOMPOSITION_TYPE, UCD_Names.DECOMPOSITION_TYPE, null, result);
case UCD_Types.NUMERIC_TYPE>>8:
return lookup(valueAlias, UCD_Names.LONG_NUMERIC_TYPE, UCD_Names.NUMERIC_TYPE, result);
return lookup(valueAlias, UCD_Names.LONG_NUMERIC_TYPE, UCD_Names.NUMERIC_TYPE, null, result);
case UCD_Types.EAST_ASIAN_WIDTH>>8:
return lookup(valueAlias, UCD_Names.LONG_EAST_ASIAN_WIDTH, UCD_Names.EAST_ASIAN_WIDTH, result);
return lookup(valueAlias, UCD_Names.LONG_EAST_ASIAN_WIDTH, UCD_Names.EAST_ASIAN_WIDTH, null, result);
case UCD_Types.LINE_BREAK>>8:
lookup(valueAlias, UCD_Names.LONG_LINE_BREAK, UCD_Names.LINE_BREAK, result);
lookup(valueAlias, UCD_Names.LONG_LINE_BREAK, UCD_Names.LINE_BREAK, null, result);
if (valueAlias.equals("Inseparable")) addUnique("Inseperable", result);
// Inseparable; Inseperable
return result;
case UCD_Types.JOINING_TYPE>>8:
return lookup(valueAlias, UCD_Names.LONG_JOINING_TYPE, UCD_Names.JOINING_TYPE, result);
return lookup(valueAlias, UCD_Names.LONG_JOINING_TYPE, UCD_Names.JOINING_TYPE, null, result);
case UCD_Types.JOINING_GROUP>>8:
return lookup(valueAlias, UCD_Names.JOINING_GROUP, null, result);
return lookup(valueAlias, UCD_Names.JOINING_GROUP, null, null, result);
case UCD_Types.SCRIPT>>8:
return lookup(valueAlias, UCD_Names.LONG_SCRIPT, UCD_Names.SCRIPT, result);
return lookup(valueAlias, UCD_Names.LONG_SCRIPT, UCD_Names.SCRIPT, UCD_Names.EXTRA_SCRIPT, result);
case UCD_Types.AGE>>8:
return lookup(valueAlias, UCD_Names.AGE, null, result);
return lookup(valueAlias, UCD_Names.AGE, null, null, result);
case UCD_Types.HANGUL_SYLLABLE_TYPE>>8:
return lookup(valueAlias, UCD_Names.LONG_HANGUL_SYLLABLE_TYPE, UCD_Names.HANGUL_SYLLABLE_TYPE, result);
return lookup(valueAlias, UCD_Names.LONG_HANGUL_SYLLABLE_TYPE, UCD_Names.HANGUL_SYLLABLE_TYPE, null, result);
default: throw new IllegalArgumentException("Internal Error: " + prop);
}
} catch (ArrayIndexOutOfBoundsException e) {
@ -620,14 +627,19 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
return result;
}
static List lookup(String valueAlias, String[] main, String[] aux, List result) {
static List lookup(String valueAlias, String[] main, String[] aux, Map aux2, List result) {
//System.out.println(valueAlias + "=>");
//System.out.println("=>" + aux[pos]);
if (aux != null) {
int pos = 0xFF & Utility.lookup(valueAlias, main, true);
UnicodeProperty.addUnique(aux[pos], result);
}
return (List) UnicodeProperty.addUnique(valueAlias, result);
UnicodeProperty.addUnique(valueAlias, result);
if (aux2 != null) {
String xtra = (String) aux2.get(valueAlias);
if (xtra != null) UnicodeProperty.addUnique(xtra, result);
}
return result;
}
/*

View File

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD.java,v $
* $Date: 2004/12/11 06:03:08 $
* $Revision: 1.37 $
* $Date: 2005/03/10 02:37:20 $
* $Revision: 1.38 $
*
*******************************************************************************
*/
@ -891,7 +891,9 @@ public final class UCD implements UCD_Types {
public static String getCategoryID_fromIndex(byte prop, byte style) {
return prop < 0 || prop >= UCD_Names.GENERAL_CATEGORY.length ? null
: (style != LONG) ? UCD_Names.GENERAL_CATEGORY[prop] : UCD_Names.LONG_GENERAL_CATEGORY[prop];
: (style == EXTRA_ALIAS && prop == DECIMAL_DIGIT_NUMBER) ? "digit"
: (style != LONG) ? UCD_Names.GENERAL_CATEGORY[prop]
: UCD_Names.LONG_GENERAL_CATEGORY[prop];
}
@ -1056,7 +1058,9 @@ public final class UCD implements UCD_Types {
public static String getScriptID_fromIndex(byte prop, byte length) {
return prop < 0 || prop >= UCD_Names.SCRIPT.length ? null
: (length == SHORT) ? UCD_Names.SCRIPT[prop] : UCD_Names.LONG_SCRIPT[prop];
: (length == EXTRA_ALIAS && prop == COPTIC) ? "Qaac"
: (length == SHORT) ? UCD_Names.SCRIPT[prop]
: UCD_Names.LONG_SCRIPT[prop];
}
public String getAgeID(int codePoint) {

View File

@ -5,15 +5,17 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD_Names.java,v $
* $Date: 2004/12/11 06:03:08 $
* $Revision: 1.30 $
* $Date: 2005/03/10 02:37:20 $
* $Revision: 1.31 $
*
*******************************************************************************
*/
package com.ibm.text.UCD;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
import com.ibm.icu.dev.test.util.UnicodeProperty;
import com.ibm.text.utility.*;
@ -346,6 +348,11 @@ final class UCD_Names implements UCD_Types {
};
public static final Map EXTRA_SCRIPT = new HashMap();
static {
EXTRA_SCRIPT.put("Coptic", "Qaac");
}
public static final String[] SCRIPT = {
"Zyyy", // COMMON -- NOT A LETTER: NO EXACT CORRESPONDENCE IN 15924
"Latn", // LATIN
@ -479,7 +486,7 @@ final class UCD_Names implements UCD_Types {
"Pi", // = Punctuation, Initial quote 29 (may behave like Ps or Pe depending on usage)
"Pf" // = Punctuation, Final quote 30 (may behave like Ps or Pe dependingon usage)
};
static final String[] LONG_GENERAL_CATEGORY = {
"Unassigned", // = Other, Not Assigned 0
@ -524,16 +531,22 @@ final class UCD_Names implements UCD_Types {
};
static final String[][] SUPER_CATEGORIES = {
{"L", "Letter", "Ll | Lm | Lo | Lt | Lu"},
{"M", "Mark", "Mc | Me | Mn"},
{"N", "Number", "Nd | Nl | No"},
{"Z", "Separator", "Zl | Zp | Zs"},
{"C", "Other", "Cc | Cf | Cn | Co | Cs"},
{"S", "Symbol", "Sc | Sk | Sm | So"},
{"P", "Punctuation", "Pc | Pd | Pe | Pf | Pi | Po | Ps"},
{"LC", "Cased Letter", "Ll | Lt | Lu"},
{"L", "Letter", null, "Ll | Lm | Lo | Lt | Lu"},
{"M", "Mark", null, "Mc | Me | Mn"},
{"N", "Number", null, "Nd | Nl | No"},
{"Z", "Separator", null, "Zl | Zp | Zs"},
{"C", "Other", "cntrl", "Cc | Cf | Cn | Co | Cs"},
{"S", "Symbol", null, "Sc | Sk | Sm | So"},
{"P", "Punctuation", "punct", "Pc | Pd | Pe | Pf | Pi | Po | Ps"},
{"LC", "Cased Letter", null, "Ll | Lt | Lu"},
};
public static final Map EXTRA_GENERAL_CATEGORY = new HashMap();
static {
EXTRA_GENERAL_CATEGORY.put("Decimal_Number", "digit");
EXTRA_GENERAL_CATEGORY.put("Control", "cntrl");
}
static final String[] BIDI_CLASS = {

View File

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD_Types.java,v $
* $Date: 2004/12/11 06:03:08 $
* $Revision: 1.30 $
* $Date: 2005/03/10 02:37:20 $
* $Revision: 1.31 $
*
*******************************************************************************
*/
@ -72,7 +72,7 @@ public interface UCD_Types {
// for IDs
static final byte NUMBER = -2, SHORT = -1, NORMAL = 0, LONG = 1, BOTH = 2;
static final byte NUMBER = -2, SHORT = -1, NORMAL = 0, LONG = 1, BOTH = 2, EXTRA_ALIAS = 3;
// Binary ENUM Grouping
public static final int

View File

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/VerifyUCD.java,v $
* $Date: 2005/02/24 02:59:34 $
* $Revision: 1.26 $
* $Date: 2005/03/10 02:37:20 $
* $Revision: 1.27 $
*
*******************************************************************************
*/