scuffed-code/tools/unicodetools/com/ibm/text/UCD/UCD_Names.java
Mark Davis 459c96f0b1 various changes for new properties
X-SVN-Rev: 14494
2004-02-12 08:23:19 +00:00

1126 lines
34 KiB
Java

/**
*******************************************************************************
* Copyright (C) 1996-2001, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD_Names.java,v $
* $Date: 2004/02/12 08:23:17 $
* $Revision: 1.25 $
*
*******************************************************************************
*/
package com.ibm.text.UCD;
import java.util.Locale;
import com.ibm.text.utility.*;
final class UCD_Names implements UCD_Types {
public static String[][] NON_ENUMERATED_NAMES = {
{"na", "Name"},
{"dm", "Decomposition_Mapping"},
{"nv", "Numeric_Value"},
{"bmg", "Bidi_Mirroring_Glyph"},
{"lc", "Lowercase_Mapping"},
{"uc", "Uppercase_Mapping"},
{"tc", "Titlecase_Mapping"},
{"cf", "Case_Folding"},
{"slc", "Simple_Lowercase_Mapping"},
{"suc", "Simple_Uppercase_Mapping"},
{"stc", "Simple_Titlecase_Mapping"},
{"sfc", "Simple_Case_Folding"},
{"scc", "Special_Case_Condition"},
{"blk", "Block"},
{"na1", "Unicode_1_Name"},
{"isc", "ISO_Comment"},
{"age", "Age"},
};
static final String[] UNIFIED_PROPERTY_HEADERS = {
"General Category (listing UnicodeData.txt, field 2: see UCD.html)\r\n"
+ "#\tAll code points not explicitly listed in this file have the property\r\n"
+ "#\tvalue: Cn.",
"Combining Class (listing UnicodeData.txt, field 3: see UCD.html)\r\n"
+ "#\tAll code points not explicitly listed in this file have the property\r\n"
+ "#\tvalue: 0.",
"Bidi Class (listing UnicodeData.txt, field 4: see UCD.html)\r\n"
+ "#\tAll code points not explicitly listed in this file have the property\r\n"
+ "#\tvalue: L.",
"Decomposition Type (from UnicodeData.txt, field 5: see UCD.html)\r\n"
+ "#\tAll code points not explicitly listed in this file have the property\r\n"
+ "#\tvalue: None.",
"Numeric Type (from UnicodeData.txt, field 6/7/8 plus Unihan.txt: see UCD.html)\r\n"
+ "#\tAll code points not explicitly listed in this file have the property\r\n"
+ "#\tvalue: None.",
"East Asian Width (listing EastAsianWidth.txt, field 1)\r\n"
+ "#\tAll code points not explicitly listed in this file have the property\r\n"
+ "#\tvalue: N.",
"Line Break (listing LineBreak.txt, field 1)\r\n"
+ "#\tAll code points not explicitly listed in this file have the property\r\n"
+ "#\tvalue: XX.",
"Joining Type (listing ArabicShaping.txt, field 2).\r\n"
+ "#\tType T is derived, as described in ArabicShaping.txt\r\n"
+ "#\tAll code points not explicitly listed in this file have the property\r\n"
+ "#\tvalue: U.",
"Joining Group (listing ArabicShaping.txt, field 3)\r\n"
+ "#\tAll code points not explicitly listed in this file have the property\r\n"
+ "#\tvalue: NO_JOINING_GROUP.",
"BidiMirrored (listing UnicodeData.txt, field 9: see UCD.html)\r\n"
+ "#\tAll code points not explicitly listed in this file have the property\r\n"
+ "#\tvalue: N.",
"Script\r\n"
+ "#\tAll code points not explicitly listed in this file have the property\r\n"
+ "#\tvalue: COMMON.",
"Age (from a comparison of UCD versions 1.1 [minus Hangul], 2.0, 2.1, 3.0, 3.1)",
"Hangul Syllable Type\r\n"
+ "#\tAll code points not explicitly listed in this file have the property\r\n"
+ "#\tvalue: NA.",
"Derived"
};
static final String[] UNIFIED_PROPERTIES = {
"GeneralCategory",
"CanonicalCombiningClass",
"BidiClass",
"DecompositionType",
"NumericType",
"EastAsianWidth",
"LineBreak",
"JoiningType",
"JoiningGroup",
"",
"Script",
"Age",
"Hangul_Syllable_Type",
""
};
static final String[] SHORT_UNIFIED_PROPERTIES = {
"gc",
"ccc",
"bc",
"dt",
"nt",
"ea",
"lb",
"jt",
"jg",
"",
"sc",
"age",
"hst",
"",
};
static final String[] BP = {
"Bidi_Mirrored",
"Composition_Exclusion",
"White_Space",
"NonBreak",
"Bidi_Control",
"Join_Control",
"Dash",
"Hyphen",
"Quotation_Mark",
"Terminal_Punctuation",
"Other_Math",
"Hex_Digit",
"ASCII_Hex_Digit",
"Other_Alphabetic",
"Ideographic",
"Diacritic",
"Extender",
"Other_Lowercase",
"Other_Uppercase",
"Noncharacter_Code_Point",
"Case_Fold_Turkish_I",
"Other_Grapheme_Extend",
"Grapheme_Link",
"IDS_Binary_Operator",
"IDS_Trinary_Operator",
"Radical",
"Unified_Ideograph",
"Other_Default_Ignorable_Code_Point",
"Deprecated",
"Soft_Dotted",
"Logical_Order_Exception",
"Other_ID_Start",
"STerm",
"Variation_Selector"
};
static final String[] SHORT_BP = {
"Bidi_M",
"CE",
"WSpace",
"NBrk",
"Bidi_C",
"JoinC",
"Dash",
"Hyphen",
"QMark",
"Term",
"OMath",
"Hex",
"AHex",
"OAlpha",
"Ideo",
"Dia",
"Ext",
"OLower",
"OUpper",
"NChar",
"TurkI",
"OGrExt",
"Gr_Link",
"IDSB",
"IDST",
"Radical",
"UIdeo",
"ODI",
"Dep",
"SD",
"LOE",
"OIDS",
"STerm",
"VS"
};
/*
static final String[] BP_OLD = {
"BidiMirrored",
"CompositionExclusion",
"White_space",
"Non_break",
"Bidi_Control",
"Join_Control",
"Dash",
"Hyphen",
"Quotation_Mark",
"Terminal_Punctuation",
"Math",
"Hex_Digit",
"Other_Alphabetic",
"Ideographic",
"Diacritic",
"Extender",
"Other_Lowercase",
"Other_Uppercase",
"Noncharacter_Code_Point",
"Other_GraphemeExtend",
"GraphemeLink",
"IDS_BinaryOperator",
"IDS_TrinaryOperator",
"Radical",
"UnifiedIdeograph"
};
*/
static final String[] DeletedProperties = {
"Private_Use",
"Composite",
"Format_Control",
"High_Surrogate",
"Identifier_Part_Not_Cf",
"Low_Surrogate",
"Other_Format_Control",
"Private_Use_High_Surrogate",
"Unassigned_Code_Point"
};
static final String[] YN_TABLE = {"F", "T"};
static final String[] YN_TABLE_LONG = {"False", "True"};
static String[] EAST_ASIAN_WIDTH = {
"N", "A", "H", "W", "F", "Na"
};
static String[] LONG_EAST_ASIAN_WIDTH = {
"Neutral", "Ambiguous", "Halfwidth", "Wide", "Fullwidth", "Narrow"
};
static final String[] LINE_BREAK = {
"XX", "OP", "CL", "QU", "GL", "NS", "EX", "SY",
"IS", "PR", "PO", "NU", "AL", "ID", "IN", "HY",
"CM", "BB", "BA", "SP", "BK", "CR", "LF", "CB",
"SA", "AI", "B2", "SG", "ZW",
"NL",
"WJ",
//"JL",
//"JV",
//"JT",
};
static final String[] LONG_LINE_BREAK = {
"Unknown", "OpenPunctuation", "ClosePunctuation", "Quotation",
"Glue", "Nonstarter", "Exclamation", "BreakSymbols",
"InfixNumeric", "PrefixNumeric", "PostfixNumeric",
"Numeric", "Alphabetic", "Ideographic", "Inseperable", "Hyphen",
"CombiningMark", "BreakBefore", "BreakAfter", "Space",
"MandatoryBreak", "CarriageReturn", "LineFeed", "ContingentBreak",
"ComplexContext", "Ambiguous", "BreakBoth", "Surrogate", "ZWSpace",
"Next_Line",
"Word_Joiner"
//"Leading_Jamo",
//"Vowel_Jamo",
//"Trailing_Jamo",
};
public static final String[] LONG_SCRIPT = {
"COMMON", // COMMON -- NOT A LETTER: NO EXACT CORRESPONDENCE IN 15924
"LATIN", // LATIN
"GREEK", // GREEK
"CYRILLIC", // CYRILLIC
"ARMENIAN", // ARMENIAN
"HEBREW", // HEBREW
"ARABIC", // ARABIC
"SYRIAC", // SYRIAC
"THAANA", // THAANA
"DEVANAGARI", // DEVANAGARI
"BENGALI", // BENGALI
"GURMUKHI", // GURMUKHI
"GUJARATI", // GUJARATI
"ORIYA", // ORIYA
"TAMIL", // TAMIL
"TELUGU", // TELUGU
"KANNADA", // KANNADA
"MALAYALAM", // MALAYALAM
"SINHALA", // SINHALA
"THAI", // THAI
"LAO", // LAO
"TIBETAN", // TIBETAN
"MYANMAR", // MYANMAR
"GEORGIAN", // GEORGIAN
"<unused>", // JAMO -- NOT SEPARATED FROM HANGUL IN 15924
"HANGUL", // HANGUL
"ETHIOPIC", // ETHIOPIC
"CHEROKEE", // CHEROKEE
"CANADIAN-ABORIGINAL", // ABORIGINAL
"OGHAM", // OGHAM
"RUNIC", // RUNIC
"KHMER", // KHMER
"MONGOLIAN", // MONGOLIAN
"HIRAGANA", // HIRAGANA
"KATAKANA", // KATAKANA
"BOPOMOFO", // BOPOMOFO
"HAN", // HAN
"YI", // YI
"OLD-ITALIC",
"GOTHIC",
"DESERET",
"INHERITED", // nonspacing marks
"TAGALOG",
"HANUNOO",
"BUHID",
"TAGBANWA",
"LIMBU",
"TAI_LE",
"LINEAR_B",
"UGARITIC",
"SHAVIAN",
"OSMANYA",
"CYPRIOT",
"BRAILLE",
};
public static final String[] SCRIPT = {
"Zyyy", // COMMON -- NOT A LETTER: NO EXACT CORRESPONDENCE IN 15924
"Latn", // LATIN
"Grek", // GREEK
"Cyrl", // CYRILLIC
"Armn", // ARMENIAN
"Hebr", // HEBREW
"Arab", // ARABIC
"Syrc", // SYRIAC
"Thaa", // THAANA
"Deva", // DEVANAGARI
"Beng", // BENGALI
"Guru", // GURMUKHI
"Gujr", // GUJARATI
"Orya", // ORIYA
"Taml", // TAMIL
"Telu", // TELUGU
"Knda", // KANNADA
"Mlym", // MALAYALAM
"Sinh", // SINHALA
"Thai", // THAI
"Laoo", // LAO
"Tibt", // TIBETAN
"Mymr", // MYANMAR
"Geor", // GEORGIAN
"<unused>", // JAMO -- NOT SEPARATED FROM HANGUL IN 15924
"Hang", // HANGUL
"Ethi", // ETHIOPIC
"Cher", // CHEROKEE
"Cans", // ABORIGINAL
"Ogam", // OGHAM
"Runr", // RUNIC
"Khmr", // KHMER
"Mong", // MONGOLIAN
"Hira", // HIRAGANA
"Kana", // KATAKANA
"Bopo", // BOPOMOFO
"Hani", // HAN
"Yiii", // YI
"Ital",
"Goth",
"Dsrt",
"Qaai",
"Tglg",
"Hano",
"Buhd",
"Tagb",
/*
"LIMBU",
"TAI_LE",
"LINEAR_B",
"UGARITIC",
"SHAVIAN",
"OSMANYA",
"CYPRIOT",
*/
"Limb",
"Tale",
"Linb",
"Ugar",
"Shaw",
"Osma",
"Cprt",
"Brai",
};
static final String[] AGE = {
"unassigned",
"1.1",
"2.0",
"2.1",
"3.0",
"3.1",
"3.2",
"4.0"
};
static final String[] GENERAL_CATEGORY = {
"Cn", // = Other, Not Assigned 0
"Lu", // = Letter, Uppercase 1
"Ll", // = Letter, Lowercase 2
"Lt", // = Letter, Titlecase 3
"Lm", // = Letter, Modifier 4
"Lo", // = Letter, Other 5
"Mn", // = Mark, Non-Spacing 6
"Me", // = Mark, Enclosing 8
"Mc", // = Mark, Spacing Combining 7
"Nd", // = Number, Decimal Digit 9
"Nl", // = Number, Letter 10
"No", // = Number, Other 11
"Zs", // = Separator, Space 12
"Zl", // = Separator, Line 13
"Zp", // = Separator, Paragraph 14
"Cc", // = Other, Control 15
"Cf", // = Other, Format 16
"<unused>", // missing
"Co", // = Other, Private Use 18
"Cs", // = Other, Surrogate 19
"Pd", // = Punctuation, Dash 20
"Ps", // = Punctuation, Open 21
"Pe", // = Punctuation, Close 22
"Pc", // = Punctuation, Connector 23
"Po", // = Punctuation, Other 24
"Sm", // = Symbol, Math 25
"Sc", // = Symbol, Currency 26
"Sk", // = Symbol, Modifier 27
"So", // = Symbol, Other 28
"Pi", // = Punctuation, Initial quote 29 (may behave like Ps or Pe depending on usage)
"Pf" // = Punctuation, Final quote 30 (may behave like Ps or Pe dependingon usage)
};
static final String[] LONG_GENERAL_CATEGORY = {
"Unassigned", // = Other, Not Assigned 0
"UppercaseLetter", // = Letter, Uppercase 1
"LowercaseLetter", // = Letter, Lowercase 2
"TitlecaseLetter", // = Letter, Titlecase 3
"ModifierLetter", // = Letter, Modifier 4
"OtherLetter", // = Letter, Other 5
"NonspacingMark", // = Mark, Non-Spacing 6
"EnclosingMark", // = Mark, Enclosing 8
"SpacingMark", // = Mark, Spacing Combining 7
"DecimalNumber", // = Number, Decimal Digit 9
"LetterNumber", // = Number, Letter 10
"OtherNumber", // = Number, Other 11
"SpaceSeparator", // = Separator, Space 12
"LineSeparator", // = Separator, Line 13
"ParagraphSeparator", // = Separator, Paragraph 14
"Control", // = Other, Control 15
"Format", // = Other, Format 16
"<unused>", // missing
"PrivateUse", // = Other, Private Use 18
"Surrogate", // = Other, Surrogate 19
"DashPunctuation", // = Punctuation, Dash 20
"OpenPunctuation", // = Punctuation, Open 21
"ClosePunctuation", // = Punctuation, Close 22
"ConnectorPunctuation", // = Punctuation, Connector 23
"OtherPunctuation", // = Punctuation, Other 24
"MathSymbol", // = Symbol, Math 25
"CurrencySymbol", // = Symbol, Currency 26
"ModifierSymbol", // = Symbol, Modifier 27
"OtherSymbol", // = Symbol, Other 28
"InitialPunctuation", // = Punctuation, Initial quote 29 (may behave like Ps or Pe depending on usage)
"FinalPunctuation" // = Punctuation, Final quote 30 (may behave like Ps or Pe dependingon usage)
};
static final String[][] SUPER_CATEGORIES = {
{"L", "Letter", "Ll | Lm | Lo | Lt | Lu"},
{"M", "Mark", "Mc | Me | Mn"},
{"N", "Number", "Nd | Nl | No"},
{"Z", "Separator", "Zl | Zp | Zs"},
{"C", "Other", "Cc | Cf | Cn | Co | Cs"},
{"S", "Symbol", "Sc | Sk | Sm | So"},
{"P", "Punctuation", "Pc | Pd | Pe | Pf | Pi | Po | Ps"},
{"LC", "Cased Letter", "Ll | Lt | Lu"},
};
static final String[] BIDI_CLASS = {
"L", // Left-Right; Most alphabetic, syllabic, and logographic characters (e.g., CJK ideographs)
"R", // Right-Left; Arabic, Hebrew, and punctuation specific to those scripts
"EN", // European Number
"ES", // European Number Separator
"ET", // European Number Terminator
"AN", // Arabic Number
"CS", // Common Number Separator
"B", // Paragraph Separator
"S", // Segment Separator
"WS", // Whitespace
"ON", // Other Neutrals ; All other characters: punctuation, symbols
"<unused>", "BN", "NSM", "AL", "LRO", "RLO", "LRE", "RLE", "PDF"
};
static String[] LONG_BIDI_CLASS = {
"LeftToRight", // Left-Right; Most alphabetic, syllabic, and logographic characters (e.g., CJK ideographs)
"RightToLeft", // Right-Left; Arabic, Hebrew, and punctuation specific to those scripts
"EuropeanNumber", // European Number
"EuropeanSeparator", // European Number Separator
"EuropeanTerminator", // European Number Terminator
"ArabicNumber", // Arabic Number
"CommonSeparator", // Common Number Separator
"ParagraphSeparator", // Paragraph Separator
"SegmentSeparator", // Segment Separator
"WhiteSpace", // Whitespace
"OtherNeutral", // Other Neutrals ; All other characters: punctuation, symbols
"<unused>",
"BoundaryNeutral", "NonspacingMark", "ArabicLetter",
"LeftToRightOverride",
"RightToLeftOverride", "LeftToRightEmbedding",
"RightToLeftEmbedding", "PopDirectionalFormat"
};
private static String[] CASE_TABLE = {
"LOWER", "TITLE", "UPPER", "UNCASED"
};
static String[] LONG_DECOMPOSITION_TYPE = {
"none", // NONE
"canonical", // CANONICAL
"compat", // Otherwise unspecified compatibility character.
"font", // A font variant (e.g. a blackletter form).
"noBreak", // A no-break version of a space or hyphen.
"initial", // // An initial presentation form (Arabic).
"medial", // // A medial presentation form (Arabic).
"final", // // A final presentation form (Arabic).
"isolated", // An isolated presentation form (Arabic).
"circle", // An encircled form.
"super", // A superscript form.
"sub", // A subscript form.
"vertical", // A vertical layout presentation form.
"wide", // A wide (or zenkaku) compatibility character.
"narrow", // A narrow (or hankaku) compatibility character.
"small", // A small variant form (CNS compatibility).
"square", // A CJK squared font variant.
"fraction", // A vulgar fraction form.
};
static String[] DECOMPOSITION_TYPE = {
"none", // NONE
"can", // CANONICAL
"com", // Otherwise unspecified compatibility character.
"font", // A font variant (e.g. a blackletter form).
"nb", // A no-break version of a space or hyphen.
"init", // // An initial presentation form (Arabic).
"med", // // A medial presentation form (Arabic).
"fin", // // A final presentation form (Arabic).
"iso", // An isolated presentation form (Arabic).
"enc", // An encircled form.
"sup", // A superscript form.
"sub", // A subscript form.
"vert", // A vertical layout presentation form.
"wide", // A wide (or zenkaku) compatibility character.
"nar", // A narrow (or hankaku) compatibility character.
"sml", // A small variant form (CNS compatibility).
"sqr", // A CJK squared font variant.
"fra", // A vulgar fraction form.
};
static {
fixArray(LONG_DECOMPOSITION_TYPE);
fixArray(DECOMPOSITION_TYPE);
}
static private String[] MIRRORED_TABLE = {
"N",
"Y"
};
static String[] LONG_NUMERIC_TYPE = {
"none",
"numeric",
"digit",
"decimal",
/*
"Han_Primary",
"Han_Accounting",
"Han_Other"
*/
};
static String[] NUMERIC_TYPE = {
"none",
"nu",
"di",
"de",
/*
"hp",
"ha",
"ho"
*/
};
static {
fixArray(LONG_NUMERIC_TYPE);
fixArray(NUMERIC_TYPE);
}
static String[] COMBINING_CLASS = new String[256];
static String[] LONG_COMBINING_CLASS = new String[256];
// TODO clean this up, just a quick copy of code
static {
for (int style = SHORT; style <= LONG; ++style)
for (int index = 0; index < 256; ++index) {
String s = null;
switch (index) {
case 0: s = style < LONG ? "NR" : "NotReordered"; break;
case 1: s = style < LONG ? "OV" : "Overlay"; break;
case 7: s = style < LONG ? "NK" : "Nukta"; break;
case 8: s = style < LONG ? "KV" : "KanaVoicing"; break;
case 9: s = style < LONG ? "VR" : "Virama"; break;
case 200: s = style < LONG ? "ATBL" : "AttachedBelowLeft"; break;
case 202: s = style < LONG ? "ATB" : "AttachedBelow"; break;
case 204: s = style < LONG ? "ATBR" : "AttachedBelowRight"; break;
case 208: s = style < LONG ? "ATL" : "AttachedLeft"; break;
case 210: s = style < LONG ? "ATR" : "AttachedRight"; break;
case 212: s = style < LONG ? "ATAL" : "AttachedAboveLeft"; break;
case 214: s = style < LONG ? "ATA" : "AttachedAbove"; break;
case 216: s = style < LONG ? "ATAR" : "AttachedAboveRight"; break;
case 218: s = style < LONG ? "BL" : "BelowLeft"; break;
case 220: s = style < LONG ? "B" : "Below"; break;
case 222: s = style < LONG ? "BR" : "BelowRight"; break;
case 224: s = style < LONG ? "L" : "Left"; break;
case 226: s = style < LONG ? "R" : "Right"; break;
case 228: s = style < LONG ? "AL" : "AboveLeft"; break;
case 230: s = style < LONG ? "A" : "Above"; break;
case 232: s = style < LONG ? "AR" : "AboveRight"; break;
case 233: s = style < LONG ? "DB" : "DoubleBelow"; break;
case 234: s = style < LONG ? "DA" : "DoubleAbove"; break;
case 240: s = style < LONG ? "IS" : "IotaSubscript"; break;
default: s = style < LONG ? "" + index : "Fixed_" + index;
}
if (style < LONG) COMBINING_CLASS[index] = s;
else LONG_COMBINING_CLASS[index] = s;
}
if (false) for (int i = 0; i < 256; ++i) {
System.out.println(i
+ "\t" + COMBINING_CLASS[i]
+ "\t" + LONG_COMBINING_CLASS[i]);
}
}
static {
if (LIMIT_CATEGORY != GENERAL_CATEGORY.length || LIMIT_CATEGORY != LONG_GENERAL_CATEGORY.length) {
System.err.println("!! ERROR !! Enums and Names out of sync: category");
}
if (LIMIT_BIDI_CLASS != BIDI_CLASS.length) {
System.err.println("!! ERROR !! Enums and Names out of sync: bidi");
}
if (LIMIT_LINE_BREAK != LINE_BREAK.length || LIMIT_LINE_BREAK != LONG_LINE_BREAK.length) {
System.err.println("!! ERROR !! Enums and Names out of sync: linebreak");
}
if (LIMIT_DECOMPOSITION_TYPE != LONG_DECOMPOSITION_TYPE.length || LIMIT_DECOMPOSITION_TYPE != DECOMPOSITION_TYPE.length) {
System.err.println("!! ERROR !! Enums and Names out of sync: decomp type");
}
if (LIMIT_MIRRORED != MIRRORED_TABLE.length) {
System.err.println("!! ERROR !! Enums and Names out of sync: compat type");
}
if (LIMIT_CASE != CASE_TABLE.length) {
System.err.println("!! ERROR !! Enums and Names out of sync: case");
}
if (LIMIT_NUMERIC_TYPE != LONG_NUMERIC_TYPE.length) {
System.err.println("!! ERROR !! Enums and Names out of sync: numeric type");
}
if (LIMIT_EAST_ASIAN_WIDTH != LONG_EAST_ASIAN_WIDTH.length) {
System.err.println("!! ERROR !! Enums and Names out of sync: east Asian Width");
}
if (LIMIT_BINARY_PROPERTIES != BP.length) {
System.err.println("!! ERROR !! Enums and Names out of sync: binary properties");
}
if (LIMIT_SCRIPT != LONG_SCRIPT.length) {
System.err.println("!! ERROR !! Enums and Names out of sync: script");
}
if (LIMIT_AGE != AGE.length) {
System.err.println("!! ERROR !! Enums and Names out of sync: age");
}
}
public static byte ON = Utility.lookup("ON", BIDI_CLASS, true);
public static String[] HANGUL_SYLLABLE_TYPE = {
"NA",
"L",
"V",
"T",
"LV",
"LVT",
};
public static String[] LONG_HANGUL_SYLLABLE_TYPE = {
"Not_Applicable",
"Leading_Jamo",
"Vowel_Jamo",
"Trailing_Jamo",
"LV_Syllable",
"LVT_Syllable",
};
public static String[] JOINING_TYPE = {
"C",
"D",
"R",
"U",
"L",
"T"
};
public static String[] LONG_JOINING_TYPE = {
"JoinCausing",
"DualJoining",
"RightJoining",
"NonJoining",
"LeftJoining",
"Transparent"
};
public static String[] JOINING_GROUP = {
"NO_JOINING_GROUP",
"AIN",
"ALAPH",
"ALEF",
"BEH",
"BETH",
"DAL",
"DALATH_RISH",
"E",
"FEH",
"FINAL_SEMKATH",
"GAF",
"GAMAL",
"HAH",
"HAMZA_ON_HEH_GOAL",
"HE",
"HEH",
"HEH_GOAL",
"HETH",
"KAF",
"KAPH",
"KNOTTED_HEH",
"LAM",
"LAMADH",
"MEEM",
"MIM",
"NOON",
"NUN",
"PE",
"QAF",
"QAPH",
"REH",
"REVERSED_PE",
"SAD",
"SADHE",
"SEEN",
"SEMKATH",
"SHIN",
"SWASH_KAF",
"TAH",
"TAW",
"TEH_MARBUTA",
"TETH",
"WAW",
"SYRIAC WAW",
"YEH",
"YEH_BARREE",
"YEH_WITH_TAIL",
"YUDH",
"YUDH_HE",
"ZAIN",
"ZHAIN",
"KHAPH",
"FE",
};
static {
fixArray(JOINING_GROUP);
}
static void fixArray (String[] array) {
for (int i = 0; i < array.length; ++i) {
array[i] = Utility.getUnskeleton(
array[i].toLowerCase(Locale.ENGLISH),
true);
}
}
public static String[] OLD_JOINING_GROUP = {
"<no shaping>",
"AIN",
"ALAPH",
"ALEF",
"BEH",
"BETH",
"DAL",
"DALATH_RISH",
"E",
"FEH",
"FINAL_SEMKATH",
"GAF",
"GAMAL",
"HAH",
"HAMZA_ON_HEH_GOAL",
"HE",
"HEH",
"HEH_GOAL",
"HETH",
"KAF",
"KAPH",
"KNOTTED_HEH",
"LAM",
"LAMADH",
"MEEM",
"MIM",
"NOON",
"NUN",
"PE",
"QAF",
"QAPH",
"REH",
"REVERSED_PE",
"SAD",
"SADHE",
"SEEN",
"SEMKATH",
"SHIN",
"SWASH_KAF",
"TAH",
"TAW",
"TEH_MARBUTA",
"TETH",
"WAW",
"SYRIAC WAW",
"YEH",
"YEH_BARREE",
"YEH_WITH_TAIL",
"YUDH",
"YUDH_HE",
"ZAIN",
"ZHAIN",
"KHAPH",
"FE",
};
static String[] JAMO_L_TABLE = {
// Value; Short Name; Unicode Name
"G", // U+1100; G; HANGUL CHOSEONG KIYEOK
"GG", // U+1101; GG; HANGUL CHOSEONG SSANGKIYEOK
"N", // U+1102; N; HANGUL CHOSEONG NIEUN
"D", // U+1103; D; HANGUL CHOSEONG TIKEUT
"DD", // U+1104; DD; HANGUL CHOSEONG SSANGTIKEUT
"R", // U+1105; L; HANGUL CHOSEONG RIEUL
"M", // U+1106; M; HANGUL CHOSEONG MIEUM
"B", // U+1107; B; HANGUL CHOSEONG PIEUP
"BB", // U+1108; BB; HANGUL CHOSEONG SSANGPIEUP
"S", // U+1109; S; HANGUL CHOSEONG SIOS
"SS", // U+110A; SS; HANGUL CHOSEONG SSANGSIOS
"", // U+110B; ; HANGUL CHOSEONG IEUNG
"J", // U+110C; J; HANGUL CHOSEONG CIEUC
"JJ", // U+110D; JJ; HANGUL CHOSEONG SSANGCIEUC
"C", // U+110E; C; HANGUL CHOSEONG CHIEUCH
"K", // U+110F; K; HANGUL CHOSEONG KHIEUKH
"T", // U+1110; T; HANGUL CHOSEONG THIEUTH
"P", // U+1111; P; HANGUL CHOSEONG PHIEUPH
"H" // U+1112; H; HANGUL CHOSEONG HIEUH
};
static String[] JAMO_V_TABLE = {
// Value; Short Name; Unicode Name
"A", // U+1161; A; HANGUL JUNGSEONG A
"AE", // U+1162; AE; HANGUL JUNGSEONG AE
"YA", // U+1163; YA; HANGUL JUNGSEONG YA
"YAE", // U+1164; YAE; HANGUL JUNGSEONG YAE
"EO", // U+1165; EO; HANGUL JUNGSEONG EO
"E", // U+1166; E; HANGUL JUNGSEONG E
"YEO", // U+1167; YEO; HANGUL JUNGSEONG YEO
"YE", // U+1168; YE; HANGUL JUNGSEONG YE
"O", // U+1169; O; HANGUL JUNGSEONG O
"WA", // U+116A; WA; HANGUL JUNGSEONG WA
"WAE", // U+116B; WAE; HANGUL JUNGSEONG WAE
"OE", // U+116C; OE; HANGUL JUNGSEONG OE
"YO", // U+116D; YO; HANGUL JUNGSEONG YO
"U", // U+116E; U; HANGUL JUNGSEONG U
"WEO", // U+116F; WEO; HANGUL JUNGSEONG WEO
"WE", // U+1170; WE; HANGUL JUNGSEONG WE
"WI", // U+1171; WI; HANGUL JUNGSEONG WI
"YU", // U+1172; YU; HANGUL JUNGSEONG YU
"EU", // U+1173; EU; HANGUL JUNGSEONG EU
"YI", // U+1174; YI; HANGUL JUNGSEONG YI
"I", // U+1175; I; HANGUL JUNGSEONG I
};
static String[] JAMO_T_TABLE = {
// Value; Short Name; Unicode Name
"", // filler, for LV syllable
"G", // U+11A8; G; HANGUL JONGSEONG KIYEOK
"GG", // U+11A9; GG; HANGUL JONGSEONG SSANGKIYEOK
"GS", // U+11AA; GS; HANGUL JONGSEONG KIYEOK-SIOS
"N", // U+11AB; N; HANGUL JONGSEONG NIEUN
"NJ", // U+11AC; NJ; HANGUL JONGSEONG NIEUN-CIEUC
"NH", // U+11AD; NH; HANGUL JONGSEONG NIEUN-HIEUH
"D", // U+11AE; D; HANGUL JONGSEONG TIKEUT
"L", // U+11AF; L; HANGUL JONGSEONG RIEUL
"LG", // U+11B0; LG; HANGUL JONGSEONG RIEUL-KIYEOK
"LM", // U+11B1; LM; HANGUL JONGSEONG RIEUL-MIEUM
"LB", // U+11B2; LB; HANGUL JONGSEONG RIEUL-PIEUP
"LS", // U+11B3; LS; HANGUL JONGSEONG RIEUL-SIOS
"LT", // U+11B4; LT; HANGUL JONGSEONG RIEUL-THIEUTH
"LP", // U+11B5; LP; HANGUL JONGSEONG RIEUL-PHIEUPH
"LH", // U+11B6; LH; HANGUL JONGSEONG RIEUL-HIEUH
"M", // U+11B7; M; HANGUL JONGSEONG MIEUM
"B", // U+11B8; B; HANGUL JONGSEONG PIEUP
"BS", // U+11B9; BS; HANGUL JONGSEONG PIEUP-SIOS
"S", // U+11BA; S; HANGUL JONGSEONG SIOS
"SS", // U+11BB; SS; HANGUL JONGSEONG SSANGSIOS
"NG", // U+11BC; NG; HANGUL JONGSEONG IEUNG
"J", // U+11BD; J; HANGUL JONGSEONG CIEUC
"C", // U+11BE; C; HANGUL JONGSEONG CHIEUCH
"K", // U+11BF; K; HANGUL JONGSEONG KHIEUKH
"T", // U+11C0; T; HANGUL JONGSEONG THIEUTH
"P", // U+11C1; P; HANGUL JONGSEONG PHIEUPH
"H", // U+11C2; H; HANGUL JONGSEONG HIEUH
};
static final String[] NF_NAME = {"NFD", "NFC", "NFKD", "NFKC"};
static final String[][] NAME_ABBREVIATIONS = {
{"CJK UNIFIED IDEOGRAPH-", "CJK-"},
{"CJK COMPATIBILITY IDEOGRAPH-", "CJKC-"},
{"IDEOGRAPHIC TELEGRAPH SYMBOL FOR", "ITSF."},
{"BRAILLE PATTERN DOTS-", "BPD-"},
{"CANADIAN SYLLABICS WEST-", "CSW."},
/*{"LATIN SMALL LETTER", "LSL."},
{"LATIN CAPITAL LETTER", "LCL."},
{"GREEK SMALL LETTER", "GSL."},
{"GREEK CAPITAL LETTER", "GCL."},
{"CYRILLIC SMALL LETTER", "GSL."},
{"CYRILLIC CAPITAL LETTER", "GCL."},
{"BYZANTINE MUSICAL SYMBOL", "BMS."},
{"YI SYLLABLE", "YS."},
{"ETHIOPIC SYLLABLE", "ES."},
{"HANGUL SYLLABLE", "HS."},
{"CANADIAN SYLLABICS", "CS."},
{"ARABIC LETTER", "ALt."},
{"ARABIC LIGATURE", "AL."},
*/
{"MATHEMATICAL SANS-SERIF", "MSS."},
{"MATHEMATICAL SERIF", "MS."},
{"BOLD ITALIC", "BI."},
{"ISOLATED FORM", "IF."},
{"FINAL FORM", "FF."},
{"INITIAL FORM", "IF."},
{"VOWEL SIGN", "VS."},
{"KANGXI RADICAL", "KR."},
{"MUSICAL SYMBOL", "MS."},
{"SMALL LETTER", "SL."},
{"CAPITAL LETTER", "CL."},
{"LIGATURE", "Lg."},
{"SYLLABICS", "Ss."},
{"MATHEMATICAL", "M."},
{"LETTER", "L."},
{"SYLLABLE", "S."},
{"SYMBOL", "Sy."},
{"WITH", "W."},
{"CAPITAL", "C."},
{"SMALL", "C."},
{"COMBINING", "Cm."},
{"HANGUL", "H."},
};
static final String[][] PROP_TYPE_NAMES = {
{"Numeric", "AA"},
{"String", "AB"},
{"Miscellaneous", "AC"},
{"Catalog", "AD"},
{"Enumerated", "AE"},
{"Binary", "ZX"},
{"Flattened Binary", "ZY"},
{"Unknown", "ZZ"}
};
/*
LETTER: 23598
MATHEMATICAL: 11976
SYLLABLE: 11872
CAPITAL: 8918
WITH: 8008
COMPATIBILITY: 7800
SMALL: 7740
IDEOGRAPH: 6165
SYLLABICS: 5670
ARABIC: 5646
CANADIAN: 5040
LATIN: 4840
SYMBOL: 4626
LIGATURE: 4048
MUSICAL: 3255
FORM: 3044
ETHIOPIC: 2760
RADICAL: 2695
HANGUL: 2670
ITALIC: 2526
YI: 2468
BOLD: 2256
BYZANTINE: 2214
COMPATIBILITY/IDEOGRAPH: 13800
YI/SYLLABLE: 12815
CANADIAN/SYLLABICS: 11340
CAPITAL/LETTER: 10948
SMALL/LETTER: 10692
CJK/COMPATIBILITY: 10200
ARABIC/LIGATURE: 7110
IDEOGRAPH/-: 6600
MUSICAL/SYMBOL: 6510
MATHEMATICAL/SANS: 5848
LATIN/SMALL: 5786
MATHEMATICAL/BOLD: 5678
ETHIOPIC/SYLLABLE: 5389
LATIN/CAPITAL: 5330
ARABIC/LETTER: 4992
BYZANTINE/MUSICAL: 4182
BRAILLE/PATTERN: 3825
ISOLATED/FORM: 3068
PATTERN/DOTS: 3060
KANGXI/RADICAL: 2996
SYLLABICS/CARRIER: 2975
-/SERIF: 2576
ITALIC/CAPITAL: 2520
BOLD/ITALIC: 2420
KATAKANA/LETTER: 2415
FINAL/FORM: 2400
SERIF/BOLD: 2300
SANS/-: 2208
ITALIC/SMALL: 2184
MONGOLIAN/LETTER: 2080
MATHEMATICAL/ITALIC: 2071
INITIAL/FORM: 2064
CYRILLIC/CAPITAL: 2032
CJK/COMPATIBILITY/IDEOGRAPH: 16200
COMPATIBILITY/IDEOGRAPH/-: 15000
LATIN/SMALL/LETTER: 9306
LATIN/CAPITAL/LETTER: 8160
MATHEMATICAL/SANS/-: 6536
BYZANTINE/MUSICAL/SYMBOL: 5904
BRAILLE/PATTERN/DOTS: 5100
CANADIAN/SYLLABICS/CARRIER: 4550
SANS/-/SERIF: 4416
PATTERN/DOTS/-: 3570
GREEK/SMALL/LETTER: 2934
CYRILLIC/CAPITAL/LETTER: 2852
-/SERIF/BOLD: 2760
MATHEMATICAL/BOLD/ITALIC: 2640
CYRILLIC/SMALL/LETTER: 2604
GREEK/CAPITAL/LETTER: 2580
CJK/COMPATIBILITY/IDEOGRAPH/-: 17400
MATHEMATICAL/SANS/-/SERIF: 8600
BRAILLE/PATTERN/DOTS/-: 5610
SANS/-/SERIF/BOLD: 3910
CANADIAN/SYLLABICS/WEST/-: 2200
IDEOGRAPHIC/TELEGRAPH/SYMBOL/FOR: 2176
-/SERIF/BOLD/ITALIC: 2090
*/
/*
static {
UNASSIGNED_INFO.code = '\uFFFF';
UNASSIGNED_INFO.name = "<reserved>";
UNASSIGNED_INFO.decomposition = "";
UNASSIGNED_INFO.fullCanonicalDecomposition = "";
UNASSIGNED_INFO.fullCompatibilityDecomposition = "";
UNASSIGNED_INFO.name10 = "";
UNASSIGNED_INFO.comment = "";
UNASSIGNED_INFO.numericType = NONE;
UNASSIGNED_INFO.decompositionType = NONE;
UNASSIGNED_INFO.category = lookup("Cn",CATEGORY_TABLE, "PROXY");
UNASSIGNED_INFO.canonical = 0;
UNASSIGNED_INFO.uppercase = "";
UNASSIGNED_INFO.lowercase = "";
UNASSIGNED_INFO.titlecase = "";
UNASSIGNED_INFO.bidi = ON;
UNASSIGNED_INFO.mirrored = NO;
}
*/
}