Updated to generate new derived properties; Added DerivedProperty to clean up the code.
X-SVN-Rev: 5673
This commit is contained in:
parent
6669fa1672
commit
34e6b8126d
519
tools/unicodetools/com/ibm/text/UCD/DerivedProperty.java
Normal file
519
tools/unicodetools/com/ibm/text/UCD/DerivedProperty.java
Normal file
@ -0,0 +1,519 @@
|
|||||||
|
/**
|
||||||
|
*******************************************************************************
|
||||||
|
* Copyright (C) 1996-2001, International Business Machines Corporation and *
|
||||||
|
* others. All Rights Reserved. *
|
||||||
|
*******************************************************************************
|
||||||
|
*
|
||||||
|
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/DerivedProperty.java,v $
|
||||||
|
* $Date: 2001/09/01 00:06:48 $
|
||||||
|
* $Revision: 1.1 $
|
||||||
|
*
|
||||||
|
*******************************************************************************
|
||||||
|
*/
|
||||||
|
|
||||||
|
package com.ibm.text.UCD;
|
||||||
|
import com.ibm.text.utility.*;
|
||||||
|
|
||||||
|
public class DerivedProperty implements UCD_Types {
|
||||||
|
|
||||||
|
UCD ucdData;
|
||||||
|
|
||||||
|
static final int
|
||||||
|
PropMath = 0,
|
||||||
|
PropAlphabetic = 1,
|
||||||
|
PropLowercase = 2,
|
||||||
|
PropUppercase = 3,
|
||||||
|
|
||||||
|
ID_Start = 4,
|
||||||
|
ID_Continue_NO_Cf = 5,
|
||||||
|
|
||||||
|
Mod_ID_Start = 6,
|
||||||
|
Mod_ID_Continue_NO_Cf = 7,
|
||||||
|
|
||||||
|
Missing_Uppercase = 8,
|
||||||
|
Missing_Lowercase = 9,
|
||||||
|
Missing_Mixedcase = 10,
|
||||||
|
|
||||||
|
FC_NFKC_Closure = 11,
|
||||||
|
|
||||||
|
FullCompExclusion = 12,
|
||||||
|
FullCompInclusion = 13,
|
||||||
|
|
||||||
|
QuickNFD = 14,
|
||||||
|
QuickNFC = 15,
|
||||||
|
QuickNFKD = 16,
|
||||||
|
QuickNFKC = 17,
|
||||||
|
|
||||||
|
ExpandsOnNFD = 18,
|
||||||
|
ExpandsOnNFC = 19,
|
||||||
|
ExpandsOnNFKD = 20,
|
||||||
|
ExpandsOnNFKC = 21,
|
||||||
|
|
||||||
|
GenNFD = 22,
|
||||||
|
GenNFC = 23,
|
||||||
|
GenNFKD = 24,
|
||||||
|
GenNFKC = 25,
|
||||||
|
|
||||||
|
DefaultIgnorable = 26,
|
||||||
|
GraphemeExtend = 27,
|
||||||
|
GraphemeBase = 28,
|
||||||
|
|
||||||
|
LIMIT = 29;
|
||||||
|
|
||||||
|
|
||||||
|
public DerivedProperty(UCD ucd) {
|
||||||
|
ucdData = ucd;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getHeader(int propNumber) {
|
||||||
|
DProp dp = dprops[propNumber];
|
||||||
|
if (dp != null) return dp.getHeader();
|
||||||
|
else return "Unimplemented!!";
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getName(int propNumber) {
|
||||||
|
DProp dp = dprops[propNumber];
|
||||||
|
if (dp != null) return dp.getName();
|
||||||
|
else return "Unimplemented!!";
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getProperty(int cp, int propNumber) {
|
||||||
|
DProp dp = dprops[propNumber];
|
||||||
|
if (dp != null) return dp.getProperty(cp);
|
||||||
|
else return "Unimplemented!!";
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isDefined(int propNumber) {
|
||||||
|
return dprops[propNumber] != null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean hasProperty(int cp, int propNumber) {
|
||||||
|
return dprops[propNumber].hasProperty(cp);
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean propertyVaries(int propNumber) {
|
||||||
|
return dprops[propNumber].propertyVaries();
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
public String getProperty(int cp, int propNumber) {
|
||||||
|
return dprops[propNumber].getProperty(int cp);
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
private DProp[] dprops = new DProp[50];
|
||||||
|
private Normalizer[] nf = new Normalizer[4];
|
||||||
|
private Normalizer nfd, nfc, nfkd, nfkc;
|
||||||
|
static final String[] NAME = {"NFD", "NFC", "NFKD", "NFKC"};
|
||||||
|
static final String[] CaseNames = {
|
||||||
|
"Uppercase",
|
||||||
|
"Lowercase",
|
||||||
|
"Mixedcase"};
|
||||||
|
|
||||||
|
private abstract class DProp {
|
||||||
|
String name, header;
|
||||||
|
String getName() { return name; }
|
||||||
|
String getHeader() { return header; }
|
||||||
|
abstract boolean hasProperty(int cp);
|
||||||
|
public boolean propertyVaries() { return false; }
|
||||||
|
public String getProperty(int cp) { return hasProperty(cp) ? name : ""; }
|
||||||
|
}
|
||||||
|
|
||||||
|
class ExDProp extends DProp {
|
||||||
|
Normalizer nfx;
|
||||||
|
ExDProp(int i) {
|
||||||
|
nfx = nf[i-ExpandsOnNFD];
|
||||||
|
name = "Expands_On_" + NAME[i-ExpandsOnNFD];
|
||||||
|
header = "# Derived Property: " + name
|
||||||
|
+ "\r\n# Generated according to UAX #15."
|
||||||
|
+ "\r\n# Characters whose normalized length is not one."
|
||||||
|
+ "\r\n# WARNING: Normalization of STRINGS must use the algorithm in UAX #15 because characters may interact."
|
||||||
|
+ "\r\n# The length of a normalized string is not necessarily the sum of the lengths of the normalized characters!";
|
||||||
|
}
|
||||||
|
boolean hasProperty(int cp) {
|
||||||
|
if (ucdData.getDecompositionType(cp) == NONE) return false;
|
||||||
|
String cps = UTF32.valueOf32(cp);
|
||||||
|
if (UTF32.length32(nfx.normalize(cps)) == UTF32.length32(cps)) return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class GenDProp extends DProp {
|
||||||
|
Normalizer nfx;
|
||||||
|
GenDProp (int i) {
|
||||||
|
nfx = nf[i-GenNFD];
|
||||||
|
name = NAME[i-GenNFD];
|
||||||
|
header = "# Derived Property: " + name
|
||||||
|
+ "\r\n# Normalized forms, where different from the characters themselves."
|
||||||
|
+ "\r\n# HANGUL SYLLABLES are algorithmically decomposed, and not listed explicitly."
|
||||||
|
+ "\r\n# WARNING: Normalization of STRINGS must use the algorithm in UAX #15 because characters may interact."
|
||||||
|
+ "\r\n# It is NOT sufficient to replace characters one-by-one with these results!";
|
||||||
|
}
|
||||||
|
public boolean propertyVaries() {return true;} // default
|
||||||
|
public String getProperty(int cp) {
|
||||||
|
if (ucdData.getDecompositionType(cp) == NONE) return "";
|
||||||
|
String cps = UTF32.valueOf32(cp);
|
||||||
|
if (cps.equals(nfx.normalize(cps))) {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
String norm = Utility.hex(nfx.normalize(cp));
|
||||||
|
String pad = Utility.repeat(" ", 14-norm.length());
|
||||||
|
return name + "; " + norm + pad;
|
||||||
|
//if (cp >= 0xAC00 && cp <= 0xD7A3) return true;
|
||||||
|
//System.out.println(Utility.hex(cps) + " => " + Utility.hex(nf[i-4].normalize(cps)));
|
||||||
|
} // default
|
||||||
|
boolean hasProperty(int cp) { return getProperty(cp).length() != 0; }
|
||||||
|
};
|
||||||
|
|
||||||
|
class CaseDProp extends DProp {
|
||||||
|
byte val;
|
||||||
|
CaseDProp (int i) {
|
||||||
|
val = (i == Missing_Uppercase ? Lu : i == Missing_Lowercase ? Ll : Lt);
|
||||||
|
name = "Possible_Missing_" + CaseNames[i-Missing_Uppercase];
|
||||||
|
header = "# Derived Property: " + name
|
||||||
|
+ "\r\n# Generated from: NFKD has >0 " + CaseNames[i-Missing_Uppercase] + ", no other cases";
|
||||||
|
}
|
||||||
|
boolean hasProperty(int cp) {
|
||||||
|
byte cat = ucdData.getCategory(cp);
|
||||||
|
if (cat == val
|
||||||
|
|| val != Lt && ucdData.getBinaryProperty(cp, Other_Uppercase)) return false;
|
||||||
|
byte xCat = getDecompCat(cp);
|
||||||
|
if (xCat == val) return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class QuickDProp extends DProp {
|
||||||
|
String NO;
|
||||||
|
String MAYBE;
|
||||||
|
Normalizer nfx;
|
||||||
|
QuickDProp (int i) {
|
||||||
|
nfx = nf[i - QuickNFD];
|
||||||
|
NO = NAME[i-QuickNFD] + "_NO";
|
||||||
|
MAYBE = NAME[i-QuickNFD] + "_MAYBE";
|
||||||
|
name = NAME[i-QuickNFD] + "_QuickCheck";
|
||||||
|
header = "# Derived Property: " + name
|
||||||
|
+ "\r\n# Generated from computing decomposibles"
|
||||||
|
+ ((i == QuickNFC || i == QuickNFKC)
|
||||||
|
? " (and characters that may compose with previous ones)" : "");
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean propertyVaries() {return true;}
|
||||||
|
public String getProperty(int cp) {
|
||||||
|
if (nfx.normalizationDiffers(cp)) return NO;
|
||||||
|
else if (nfx.isTrailing(cp)) return MAYBE;
|
||||||
|
else return "";
|
||||||
|
}
|
||||||
|
boolean hasProperty(int cp) { return getProperty(cp).length() != 0; }
|
||||||
|
};
|
||||||
|
|
||||||
|
{
|
||||||
|
nfd = nf[0] = new Normalizer(Normalizer.NFD);
|
||||||
|
nfc = nf[1] = new Normalizer(Normalizer.NFC);
|
||||||
|
nfkd = nf[2] = new Normalizer(Normalizer.NFKD);
|
||||||
|
nfkc = nf[3] = new Normalizer(Normalizer.NFKC);
|
||||||
|
|
||||||
|
for (int i = ExpandsOnNFD; i <= ExpandsOnNFKC; ++i) {
|
||||||
|
dprops[i] = new ExDProp(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = GenNFD; i <= GenNFKC; ++i) {
|
||||||
|
dprops[i] = new GenDProp(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
dprops[ID_Start] = new DProp() {
|
||||||
|
{
|
||||||
|
name = "ID_Start";
|
||||||
|
header = "# Derived Property: " + name
|
||||||
|
+ "\r\n# Characters that can start an identifier."
|
||||||
|
+ "\r\n# Generated from Lu+Ll+Lt+Lm+Lo+Nl";
|
||||||
|
}
|
||||||
|
boolean hasProperty(int cp) {
|
||||||
|
return ucdData.isIdentifierStart(cp, false);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
dprops[ID_Continue_NO_Cf] = new DProp() {
|
||||||
|
{
|
||||||
|
name = "ID_Continue";
|
||||||
|
header = "# Derived Property: " + name
|
||||||
|
+ "\r\n# Characters that can continue an identifier."
|
||||||
|
+ "\r\n# Generated from: ID_Start + Mn+Mc+Nd+Pc"
|
||||||
|
+ "\r\n# NOTE: Cf characters should be filtered out.";
|
||||||
|
}
|
||||||
|
boolean hasProperty(int cp) {
|
||||||
|
return ucdData.isIdentifierContinue_NO_Cf(cp, false);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
dprops[Mod_ID_Start] = new DProp() {
|
||||||
|
{
|
||||||
|
name = "XID_Start";
|
||||||
|
header = "# Derived Property: " + name
|
||||||
|
+ "\r\n# ID_Start modified for closure under NFKx"
|
||||||
|
+ "\r\n# Modified as described in UAX #15"
|
||||||
|
+ "\r\n# NOTE: Does NOT remove the non-NFKx characters."
|
||||||
|
+ "\r\n# Merely ensures that if isIdentifer(string) then isIdentifier(NFKx(string))";
|
||||||
|
}
|
||||||
|
boolean hasProperty(int cp) {
|
||||||
|
return ucdData.isIdentifierStart(cp, true);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
dprops[Mod_ID_Continue_NO_Cf] = new DProp() {
|
||||||
|
{
|
||||||
|
name = "XID_Continue";
|
||||||
|
header = "# Derived Property: " + name
|
||||||
|
+ "\r\n# Mod_ID_Continue modified for closure under NFKx"
|
||||||
|
+ "\r\n# Modified as described in UAX #15"
|
||||||
|
+ "\r\n# NOTE: Cf characters should be filtered out."
|
||||||
|
+ "\r\n# NOTE: Does NOT remove the non-NFKx characters."
|
||||||
|
+ "\r\n# Merely ensures that if isIdentifer(string) then isIdentifier(NFKx(string))";
|
||||||
|
}
|
||||||
|
boolean hasProperty(int cp) {
|
||||||
|
return ucdData.isIdentifierContinue_NO_Cf(cp, true);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
dprops[PropMath] = new DProp() {
|
||||||
|
{
|
||||||
|
name = "Math";
|
||||||
|
header = "# Derived Property: " + name
|
||||||
|
+ "\r\n# Generated from: Sm + Other_Math";
|
||||||
|
}
|
||||||
|
boolean hasProperty(int cp) {
|
||||||
|
byte cat = ucdData.getCategory(cp);
|
||||||
|
if (cat == Sm
|
||||||
|
|| ucdData.getBinaryProperty(cp,Math_Property)) return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
dprops[PropAlphabetic] = new DProp() {
|
||||||
|
{
|
||||||
|
name = "Alphabetic";
|
||||||
|
header = "# Derived Property: " + name
|
||||||
|
+ "\r\n# Generated from: Lu+Ll+Lt+Lm+Lo+Nl + Other_Alphabetic";
|
||||||
|
}
|
||||||
|
boolean hasProperty(int cp) {
|
||||||
|
byte cat = ucdData.getCategory(cp);
|
||||||
|
if (cat == Lu || cat == Ll || cat == Lt || cat == Lm || cat == Lo || cat == Nl
|
||||||
|
|| ucdData.getBinaryProperty(cp, Alphabetic)) return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
dprops[PropLowercase] = new DProp() {
|
||||||
|
{
|
||||||
|
name = "Lowercase";
|
||||||
|
header = "# Derived Property: " + name
|
||||||
|
+ "\r\n# Generated from: Ll + Other_Lowercase";
|
||||||
|
}
|
||||||
|
boolean hasProperty(int cp) {
|
||||||
|
byte cat = ucdData.getCategory(cp);
|
||||||
|
if (cat == Ll
|
||||||
|
|| ucdData.getBinaryProperty(cp, Other_Lowercase)) return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
dprops[PropUppercase] = new DProp() {
|
||||||
|
{
|
||||||
|
name = "Uppercase";
|
||||||
|
header = "# Derived Property: " + name
|
||||||
|
+ "\r\n# Generated from: Lu + Other_Uppercase";
|
||||||
|
}
|
||||||
|
boolean hasProperty(int cp) {
|
||||||
|
byte cat = ucdData.getCategory(cp);
|
||||||
|
if (cat == Lu
|
||||||
|
|| ucdData.getBinaryProperty(cp, Other_Uppercase)) return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
for (int i = Missing_Uppercase; i <= Missing_Mixedcase; ++i) {
|
||||||
|
dprops[i] = new CaseDProp(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
(3) Singleton Decompositions: characters that can be derived from the UnicodeData file by
|
||||||
|
including all characters whose canonical decomposition consists of a single character.
|
||||||
|
(4) Non-Starter Decompositions: characters that can be derived from the UnicodeData
|
||||||
|
file by including all characters whose canonical decomposition consists of a sequence
|
||||||
|
of characters, the first of which has a non-zero combining class.
|
||||||
|
*/
|
||||||
|
dprops[FullCompExclusion] = new DProp() {
|
||||||
|
{
|
||||||
|
name = "Comp_Ex";
|
||||||
|
header = "# Derived Property: " + name
|
||||||
|
+ ": Full Composition Exclusion"
|
||||||
|
+ "\r\n# Generated from: Composition Exclusions + Singletons + Non-Starter Decompositions";
|
||||||
|
}
|
||||||
|
boolean hasProperty(int cp) {
|
||||||
|
if (!ucdData.isRepresented(cp)) return false;
|
||||||
|
byte dtype = ucdData.getDecompositionType(cp);
|
||||||
|
if (dtype != CANONICAL) return false;
|
||||||
|
|
||||||
|
if (isCompEx(cp)) return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
dprops[FullCompInclusion] = new DProp() {
|
||||||
|
{
|
||||||
|
name = "Comp_In";
|
||||||
|
header = "# Derived Property: " + name
|
||||||
|
+ ": Full Composition Inclusion"
|
||||||
|
+ "\r\n# characters with Canonical Decompositions MINUS Full Composition Exclusion";
|
||||||
|
}
|
||||||
|
boolean hasProperty(int cp) {
|
||||||
|
if (!ucdData.isRepresented(cp)) return false;
|
||||||
|
byte dtype = ucdData.getDecompositionType(cp);
|
||||||
|
if (dtype != CANONICAL) return false;
|
||||||
|
|
||||||
|
if (isCompEx(cp)) return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
dprops[FC_NFKC_Closure] = new DProp() {
|
||||||
|
{
|
||||||
|
name = "FC_NFKC_Closure";
|
||||||
|
header = "# Derived Property: " + name
|
||||||
|
+ "\r\n# Generated from computing: b = NFKC(Fold(a)); c = NFKC(Fold(b));"
|
||||||
|
+ "\r\n# Then if (c != b) add the mapping from a to c to the set of"
|
||||||
|
+ "\r\n# mappings that constitute the FC_NFKC_Closure list";
|
||||||
|
}
|
||||||
|
public boolean propertyVaries() {return true;} // default
|
||||||
|
public String getProperty(int cp) {
|
||||||
|
if (!ucdData.isRepresented(cp)) return "";
|
||||||
|
String b = nfkc.normalize(fold(cp));
|
||||||
|
String c = nfkc.normalize(fold(b));
|
||||||
|
if (c.equals(b)) return "";
|
||||||
|
return "FNC; " + Utility.hex(c);
|
||||||
|
} // default
|
||||||
|
boolean hasProperty(int cp) { return getProperty(cp).length() != 0; }
|
||||||
|
};
|
||||||
|
|
||||||
|
for (int i = QuickNFD; i <= QuickNFKC; ++i) {
|
||||||
|
dprops[i] = new QuickDProp(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
dprops[DefaultIgnorable] = new DProp() {
|
||||||
|
{
|
||||||
|
name = "Default_Ignorable_Code_Point";
|
||||||
|
header = header = "# Derived Property: " + name
|
||||||
|
+ "\r\n# Generated from Other_Default_Ignorable_Code_Point + Cf + Cc + Cs - WhiteSpace";
|
||||||
|
}
|
||||||
|
boolean hasProperty(int cp) {
|
||||||
|
if (ucdData.getBinaryProperty(cp, White_space)) return false;
|
||||||
|
byte cat = ucdData.getCategory(cp);
|
||||||
|
if (cat == Cf || cat == Cs || cat == Cc
|
||||||
|
|| ucdData.getBinaryProperty(cp,Reserved_Cf_Code_Point)) return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
GraphemeExtend = 27,
|
||||||
|
GraphemeBase = 28,
|
||||||
|
# GraphemeExtend := Me + Mn + Mc + Other_GraphemeExtend - GraphemeLink
|
||||||
|
# GraphemeBase :=
|
||||||
|
|
||||||
|
*/
|
||||||
|
dprops[GraphemeExtend] = new DProp() {
|
||||||
|
{
|
||||||
|
name = "GraphemeExtend";
|
||||||
|
header = header = "# Derived Property: " + name
|
||||||
|
+ "\r\n# Generated from: Me + Mn + Mc + Other_GraphemeExtend - GraphemeLink";
|
||||||
|
}
|
||||||
|
boolean hasProperty(int cp) {
|
||||||
|
if (ucdData.getBinaryProperty(cp, GraphemeExtend)) return false;
|
||||||
|
byte cat = ucdData.getCategory(cp);
|
||||||
|
if (cat == Me || cat == Mn || cat == Mc
|
||||||
|
|| ucdData.getBinaryProperty(cp,Other_GraphemeExtend)) return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
dprops[GraphemeBase] = new DProp() {
|
||||||
|
{
|
||||||
|
name = "GraphemeBase";
|
||||||
|
header = header = "# Derived Property: " + name
|
||||||
|
+ "\r\n# Generated from: [0..10FFFF] - Cc - Cf - Cs - Co - Cn - Zl - Zp - GraphemeLink - GraphemeExtend";
|
||||||
|
}
|
||||||
|
boolean hasProperty(int cp) {
|
||||||
|
byte cat = ucdData.getCategory(cp);
|
||||||
|
if (cat == Cc || cat == Cf || cat == Cs || cat == Co || cat == Cn || cat == Zl || cat == Zp
|
||||||
|
|| ucdData.getBinaryProperty(cp,GraphemeLink)) return false;
|
||||||
|
if (dprops[GraphemeExtend].hasProperty(cp)) return false;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
byte getDecompCat(int cp) {
|
||||||
|
byte cat = ucdData.getCategory(cp);
|
||||||
|
if (cat == Lu
|
||||||
|
|| ucdData.getBinaryProperty(cp, Other_Uppercase)) return Lu;
|
||||||
|
if (cat == Ll
|
||||||
|
|| ucdData.getBinaryProperty(cp, Other_Lowercase)) return Ll;
|
||||||
|
if (cat == Lt || cat == Lo || cat == Lm || cat == Nl) return cat;
|
||||||
|
if (!nf[2].normalizationDiffers(cp)) return Lo;
|
||||||
|
|
||||||
|
String norm = nf[2].normalize(cp);
|
||||||
|
int cp2;
|
||||||
|
boolean gotUpper = false;
|
||||||
|
boolean gotLower = false;
|
||||||
|
boolean gotTitle = false;
|
||||||
|
for (int i = 0; i < norm.length(); i += UTF32.count16(cp2)) {
|
||||||
|
cp2 = UTF32.char32At(norm, i);
|
||||||
|
byte catx = ucdData.getCategory(cp2);
|
||||||
|
boolean upx = ucdData.getBinaryProperty(cp, Other_Uppercase);
|
||||||
|
boolean lowx = ucdData.getBinaryProperty(cp, Other_Lowercase);
|
||||||
|
if (catx == Ll || lowx || cp2 == 0x345) gotLower = true;
|
||||||
|
if (catx == Lu || upx) gotUpper = true;
|
||||||
|
if (catx == Lt) gotTitle = true;
|
||||||
|
}
|
||||||
|
if (gotLower && !gotUpper && !gotTitle) return Ll;
|
||||||
|
if (!gotLower && gotUpper && !gotTitle) return Lu;
|
||||||
|
if (gotLower || gotUpper || gotTitle) return Lt;
|
||||||
|
return cat;
|
||||||
|
}
|
||||||
|
|
||||||
|
boolean isCompEx(int cp) {
|
||||||
|
if (ucdData.getBinaryProperty(cp, CompositionExclusion)) return true;
|
||||||
|
String decomp = ucdData.getDecompositionMapping(cp);
|
||||||
|
if (UTF32.length32(decomp) == 1) return true;
|
||||||
|
int first = UTF32.char32At(decomp,0);
|
||||||
|
if (ucdData.getCombiningClass(first) != 0) return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
String fold(int cp) {
|
||||||
|
return ucdData.getCase(cp, FULL, FOLD);
|
||||||
|
}
|
||||||
|
|
||||||
|
String fold(String s) {
|
||||||
|
return ucdData.getCase(s, FULL, FOLD);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void test() {
|
||||||
|
UCD ucd = UCD.make();
|
||||||
|
DerivedProperty dprop = new DerivedProperty(ucd);
|
||||||
|
/*
|
||||||
|
for (int j = 0; j < LIMIT; ++j) {
|
||||||
|
System.out.println();
|
||||||
|
System.out.println(j + "\t" + dprop.getName(j));
|
||||||
|
System.out.println(dprop.getHeader(j));
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
for (int cp = 0xA0; cp < 0xFF; ++cp) {
|
||||||
|
System.out.println();
|
||||||
|
System.out.println(ucd.getCodeAndName(cp));
|
||||||
|
for (int j = 0; j < LIMIT; ++j) {
|
||||||
|
String prop = dprop.getProperty(cp, j);
|
||||||
|
if (prop.length() != 0) System.out.println("\t" + prop);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -5,8 +5,8 @@
|
|||||||
*******************************************************************************
|
*******************************************************************************
|
||||||
*
|
*
|
||||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/DerivedPropertyLister.java,v $
|
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/DerivedPropertyLister.java,v $
|
||||||
* $Date: 2001/08/31 00:30:17 $
|
* $Date: 2001/09/01 00:06:15 $
|
||||||
* $Revision: 1.2 $
|
* $Revision: 1.3 $
|
||||||
*
|
*
|
||||||
*******************************************************************************
|
*******************************************************************************
|
||||||
*/
|
*/
|
||||||
@ -21,198 +21,40 @@ final class DerivedPropertyLister extends PropertyLister {
|
|||||||
static final boolean BRIDGE = false;
|
static final boolean BRIDGE = false;
|
||||||
|
|
||||||
static int enum = 0;
|
static int enum = 0;
|
||||||
static final int
|
|
||||||
PropMath = 0,
|
|
||||||
PropAlphabetic = 1,
|
|
||||||
PropLowercase = 2,
|
|
||||||
PropUppercase = 3,
|
|
||||||
|
|
||||||
ID_Start = 4,
|
|
||||||
ID_Continue_NO_Cf = 5,
|
|
||||||
|
|
||||||
Mod_ID_Start = 6,
|
|
||||||
Mod_ID_Continue_NO_Cf = 7,
|
|
||||||
|
|
||||||
Missing_Uppercase = 8,
|
|
||||||
Missing_Lowercase = 9,
|
|
||||||
Missing_Mixedcase = 10,
|
|
||||||
|
|
||||||
FC_NFKC_Closure = 11,
|
|
||||||
|
|
||||||
FullCompExclusion = 12,
|
|
||||||
FullCompInclusion = 13,
|
|
||||||
|
|
||||||
QuickNFD = 14,
|
|
||||||
QuickNFC = 15,
|
|
||||||
QuickNFKD = 16,
|
|
||||||
QuickNFKC = 17,
|
|
||||||
|
|
||||||
ExpandsOnNFD = 18,
|
|
||||||
ExpandsOnNFC = 19,
|
|
||||||
ExpandsOnNFKD = 20,
|
|
||||||
ExpandsOnNFKC = 21,
|
|
||||||
|
|
||||||
GenNFD = 22,
|
|
||||||
GenNFC = 23,
|
|
||||||
GenNFKD = 24,
|
|
||||||
GenNFKC = 25,
|
|
||||||
|
|
||||||
LIMIT = 26;
|
|
||||||
;
|
|
||||||
|
|
||||||
private int propMask;
|
private int propMask;
|
||||||
private Normalizer[] nf = new Normalizer[4];
|
private DerivedProperty dprop;
|
||||||
private Normalizer nfd, nfc, nfkd, nfkc;
|
|
||||||
int width;
|
int width;
|
||||||
|
boolean varies;
|
||||||
|
|
||||||
public DerivedPropertyLister(UCD ucd, int propMask, PrintStream output) {
|
public DerivedPropertyLister(UCD ucd, int propMask, PrintStream output) {
|
||||||
this.propMask = propMask;
|
this.propMask = propMask;
|
||||||
this.output = output;
|
this.output = output;
|
||||||
this.ucdData = ucd;
|
this.ucdData = ucd;
|
||||||
nfd = nf[0] = new Normalizer(Normalizer.NFD);
|
this.dprop = new DerivedProperty(ucd);
|
||||||
nfc = nf[1] = new Normalizer(Normalizer.NFC);
|
varies = dprop.propertyVaries(propMask);
|
||||||
nfkd = nf[2] = new Normalizer(Normalizer.NFKD);
|
|
||||||
nfkc = nf[3] = new Normalizer(Normalizer.NFKC);
|
|
||||||
|
|
||||||
width = super.minPropertyWidth();
|
width = super.minPropertyWidth();
|
||||||
switch (propMask) {
|
switch (propMask) {
|
||||||
case GenNFD: case GenNFC: case GenNFKD: case GenNFKC:
|
case DerivedProperty.GenNFD: case DerivedProperty.GenNFC: case DerivedProperty.GenNFKD: case DerivedProperty.GenNFKC:
|
||||||
alwaysBreaks = true;
|
alwaysBreaks = true;
|
||||||
break;
|
break;
|
||||||
case FC_NFKC_Closure:
|
case DerivedProperty.FC_NFKC_Closure:
|
||||||
alwaysBreaks = true;
|
alwaysBreaks = true;
|
||||||
width = 21;
|
width = 21;
|
||||||
break;
|
break;
|
||||||
case QuickNFC: case QuickNFKC:
|
case DerivedProperty.QuickNFC: case DerivedProperty.QuickNFKC:
|
||||||
width = 11;
|
width = 11;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public String headerString() {
|
public String headerString() {
|
||||||
String result = "# Derived Property: ";
|
return dprop.getHeader(propMask);
|
||||||
switch (propMask) {
|
|
||||||
case ExpandsOnNFD: case ExpandsOnNFC: case ExpandsOnNFKD: case ExpandsOnNFKC:
|
|
||||||
result += "Expands_On_" + NAME[propMask-ExpandsOnNFD] + "\r\n# Generated according to UAX #15."
|
|
||||||
+ "\r\n# Characters whose normalized length is not one."
|
|
||||||
+ "\r\n# WARNING: Normalization of STRINGS must use the algorithm in UAX #15 because characters may interact."
|
|
||||||
+ "\r\n# The length of a normalized string is not necessarily the sum of the lengths of the normalized characters!";
|
|
||||||
break;
|
|
||||||
case GenNFD: case GenNFC: case GenNFKD: case GenNFKC:
|
|
||||||
result += NAME[propMask-GenNFD] + "\r\n# Generated according to UAX #15."
|
|
||||||
+ "\r\n# Normalized forms, where different from the characters themselves."
|
|
||||||
+ ((propMask == 5 || propMask == 3)
|
|
||||||
? ""
|
|
||||||
: "\r\n# HANGUL SYLLABLES are algorithmically decomposed, and not listed explicitly.")
|
|
||||||
+ "\r\n# WARNING: Normalization of STRINGS must use the algorithm in UAX #15 because characters may interact."
|
|
||||||
+ "\r\n# It is NOT sufficient to replace characters one-by-one with these results!";
|
|
||||||
break;
|
|
||||||
case ID_Start: result +=
|
|
||||||
"ID_Start"
|
|
||||||
+ "\r\n# Characters that can start an identifier."
|
|
||||||
+ "\r\n# Generated from Lu+Ll+Lt+Lm+Lo+Nl";
|
|
||||||
break;
|
|
||||||
case ID_Continue_NO_Cf: result +=
|
|
||||||
"ID_Continue"
|
|
||||||
+ "\r\n# Characters that can continue an identifier."
|
|
||||||
+ "\r\n# Generated from: ID_Start + Mn+Mc+Nd+Pc"
|
|
||||||
+ "\r\n# NOTE: Cf characters should be filtered out.";
|
|
||||||
break;
|
|
||||||
case Mod_ID_Start: result +=
|
|
||||||
"XID_Start"
|
|
||||||
+ "\r\n# ID_Start modified for closure under NFKx"
|
|
||||||
+ "\r\n# Modified as described in UAX #15"
|
|
||||||
+ "\r\n# NOTE: Does NOT remove the non-NFKx characters."
|
|
||||||
+ "\r\n# Merely ensures that if isIdentifer(string) then isIdentifier(NFKx(string))";
|
|
||||||
break;
|
|
||||||
case Mod_ID_Continue_NO_Cf: result +=
|
|
||||||
"XID_Continue"
|
|
||||||
+ "\r\n# Mod_ID_Continue modified for closure under NFKx"
|
|
||||||
+ "\r\n# Modified as described in UAX #15"
|
|
||||||
+ "\r\n# NOTE: Cf characters should be filtered out."
|
|
||||||
+ "\r\n# NOTE: Does NOT remove the non-NFKx characters."
|
|
||||||
+ "\r\n# Merely ensures that if isIdentifer(string) then isIdentifier(NFKx(string))";
|
|
||||||
break;
|
|
||||||
case PropMath:
|
|
||||||
result += "Math"
|
|
||||||
+ "\r\n# Generated from: Sm + Other_Math";
|
|
||||||
break;
|
|
||||||
case PropAlphabetic:
|
|
||||||
result += "Alphabetic"
|
|
||||||
+ "\r\n# Generated from: Lu+Ll+Lt+Lm+Lo+Nl + Other_Alphabetic";
|
|
||||||
break;
|
|
||||||
case PropLowercase:
|
|
||||||
result += "Lowercase"
|
|
||||||
+ "\r\n# Generated from: Ll + Other_Lowercase";
|
|
||||||
break;
|
|
||||||
case PropUppercase: result +=
|
|
||||||
"Uppercase"
|
|
||||||
+ "\r\n# Generated from: Lu + Other_Uppercase";
|
|
||||||
break;
|
|
||||||
case Missing_Uppercase: result +=
|
|
||||||
"Missing_Uppercase"
|
|
||||||
+ "\r\n# Generated from: NFKD has >0 Uppercase, no other cases";
|
|
||||||
break;
|
|
||||||
case Missing_Lowercase: result +=
|
|
||||||
"Missing_Lowercase"
|
|
||||||
+ "\r\n# Generated from: NFKD has >0 Lowercase, no other cases";
|
|
||||||
break;
|
|
||||||
case Missing_Mixedcase: result +=
|
|
||||||
"Missing_Mixedcase"
|
|
||||||
+ "\r\n# Generated from: NFKD has >0 Mixedcase, no other cases";
|
|
||||||
break;
|
|
||||||
case FullCompExclusion: result +=
|
|
||||||
"Full Composition Exclusion"
|
|
||||||
+ "\r\n# Generated from: Composition Exclusions + Singletons + Non-Starter Decompositions";
|
|
||||||
break;
|
|
||||||
case FullCompInclusion: result +=
|
|
||||||
"Full Composition Inclusion"
|
|
||||||
+ "\r\n# characters with Canonical Decompositions MINUS Full Composition Exclusion";
|
|
||||||
break;
|
|
||||||
case FC_NFKC_Closure: result +=
|
|
||||||
"FC_NFKC_Closure"
|
|
||||||
+ "\r\n# Generated from computing: b = NFKC(Fold(a)); c = NFKC(Fold(b));"
|
|
||||||
+ "\r\n# Then if (c != b) add the mapping from a to c to the set of"
|
|
||||||
+ "\r\n# mappings that constitute the FC_NFKC_Closure list";
|
|
||||||
break;
|
|
||||||
case QuickNFD: case QuickNFC: case QuickNFKD: case QuickNFKC:
|
|
||||||
result += NAME[propMask-QuickNFD] + "_QuickCheck"
|
|
||||||
+ "\r\n# Generated from computing decomposibles"
|
|
||||||
+ ((propMask == QuickNFC || propMask == QuickNFKC)
|
|
||||||
? " (and characters that may compose with previous ones)" : "");
|
|
||||||
break;
|
|
||||||
default: result += "Unimplemented!!";
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public String propertyName(int cp) {
|
public String propertyName(int cp) {
|
||||||
switch (propMask) {
|
return dprop.getProperty(cp, propMask);
|
||||||
case ExpandsOnNFD: case ExpandsOnNFC: case ExpandsOnNFKD: case ExpandsOnNFKC:
|
|
||||||
return "Expands_On_" + NAME[propMask-ExpandsOnNFD];
|
|
||||||
case GenNFD: case GenNFC: case GenNFKD: case GenNFKC:
|
|
||||||
if (cp >= 0xAC00 && cp <= 0xD7A3) return NAME[propMask-GenNFD] + "; " + "<algorithmic normalization>";
|
|
||||||
String norm = Utility.hex(nf[propMask-GenNFD].normalize(cp));
|
|
||||||
String pad = Utility.repeat(" ", 14-norm.length());
|
|
||||||
return NAME[propMask-GenNFD] + "; " + norm + pad;
|
|
||||||
case ID_Start: return "ID_Start";
|
|
||||||
case ID_Continue_NO_Cf: return "ID_Continue";
|
|
||||||
case Mod_ID_Start: return "XID_Start";
|
|
||||||
case Mod_ID_Continue_NO_Cf: return "XID_Continue";
|
|
||||||
case PropMath: return "Math";
|
|
||||||
case PropAlphabetic: return "Alphabetic";
|
|
||||||
case PropLowercase: return "Lowercase";
|
|
||||||
case PropUppercase: return "Uppercase";
|
|
||||||
case Missing_Uppercase: return "Possible_Missing_Uppercase";
|
|
||||||
case Missing_Lowercase: return "Possible_Missing_Lowercase";
|
|
||||||
case Missing_Mixedcase: return "Possible_Missing_Titlecase";
|
|
||||||
case FullCompExclusion: return "Comp_Ex";
|
|
||||||
case FullCompInclusion: return "Comp_In";
|
|
||||||
case FC_NFKC_Closure: return "FNC; " + Utility.hex(getComputedValue(cp));
|
|
||||||
case QuickNFD: case QuickNFC: case QuickNFKD: case QuickNFKC:
|
|
||||||
return NAME[propMask-QuickNFD] + "_" + getComputedValue(cp);
|
|
||||||
default: return "Unimplemented!!";
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//public String optionalComment(int cp) {
|
//public String optionalComment(int cp) {
|
||||||
@ -225,7 +67,6 @@ final class DerivedPropertyLister extends PropertyLister {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static final String[] NAME = {"NFD", "NFC", "NFKD", "NFKC"};
|
|
||||||
/*
|
/*
|
||||||
public String optionalComment(int cp) {
|
public String optionalComment(int cp) {
|
||||||
String id = ucdData.getCategoryID(cp);
|
String id = ucdData.getCategoryID(cp);
|
||||||
@ -243,155 +84,20 @@ final class DerivedPropertyLister extends PropertyLister {
|
|||||||
}
|
}
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
String last;
|
||||||
|
|
||||||
public byte status(int cp) {
|
public byte status(int cp) {
|
||||||
if (!ucdData.isAssigned(cp)) return EXCLUDE;
|
if (!ucdData.isAssigned(cp)) return EXCLUDE;
|
||||||
//if (cp == 0xFFFF) {
|
if (!varies) {
|
||||||
// System.out.println("# " + Utility.hex(cp));
|
return dprop.hasProperty(cp, propMask) ? INCLUDE : EXCLUDE;
|
||||||
//}
|
|
||||||
byte cat = ucdData.getCategory(cp);
|
|
||||||
//if (cp == 0x0385) {
|
|
||||||
// System.out.println(Utility.hex(firstRealCp));
|
|
||||||
//}
|
|
||||||
|
|
||||||
String cps;
|
|
||||||
byte xCat;
|
|
||||||
|
|
||||||
switch (propMask) {
|
|
||||||
default: return EXCLUDE;
|
|
||||||
|
|
||||||
case ExpandsOnNFD: case ExpandsOnNFC: case ExpandsOnNFKD: case ExpandsOnNFKC:
|
|
||||||
if (ucdData.getDecompositionType(cp) == NONE) return EXCLUDE;
|
|
||||||
cps = UTF32.valueOf32(cp);
|
|
||||||
if (UTF32.length32(nf[propMask-ExpandsOnNFD].normalize(cps)) == UTF32.length32(cps)) return EXCLUDE;
|
|
||||||
break;
|
|
||||||
case GenNFD: case GenNFC: case GenNFKD: case GenNFKC:
|
|
||||||
if (ucdData.getDecompositionType(cp) == NONE) return EXCLUDE;
|
|
||||||
cps = UTF32.valueOf32(cp);
|
|
||||||
if (cps.equals(nf[propMask-GenNFD].normalize(cps))) {
|
|
||||||
return EXCLUDE;
|
|
||||||
}
|
|
||||||
if (cp >= 0xAC00 && cp <= 0xD7A3) return INCLUDE;
|
|
||||||
//System.out.println(Utility.hex(cps) + " => " + Utility.hex(nf[propMask-4].normalize(cps)));
|
|
||||||
return BREAK;
|
|
||||||
case ID_Start:
|
|
||||||
if (ucdData.isIdentifierStart(cp, false)) return INCLUDE;
|
|
||||||
return EXCLUDE;
|
|
||||||
case ID_Continue_NO_Cf:
|
|
||||||
if (ucdData.isIdentifierContinue_NO_Cf(cp, false)) return INCLUDE;
|
|
||||||
return EXCLUDE;
|
|
||||||
case Mod_ID_Start:
|
|
||||||
if (ucdData.isIdentifierStart(cp, true)) return INCLUDE;
|
|
||||||
return EXCLUDE;
|
|
||||||
case Mod_ID_Continue_NO_Cf:
|
|
||||||
if (ucdData.isIdentifierContinue_NO_Cf(cp, true)) return INCLUDE;
|
|
||||||
return EXCLUDE;
|
|
||||||
case PropMath:
|
|
||||||
if (cat == Sm
|
|
||||||
|| ucdData.getBinaryProperty(cp,Math_Property)) return INCLUDE;
|
|
||||||
return EXCLUDE;
|
|
||||||
case PropAlphabetic:
|
|
||||||
if (cat == Lu || cat == Ll || cat == Lt || cat == Lm || cat == Lo || cat == Nl
|
|
||||||
|| ucdData.getBinaryProperty(cp, Alphabetic)) return INCLUDE;
|
|
||||||
case PropLowercase:
|
|
||||||
if (cat == Ll
|
|
||||||
|| ucdData.getBinaryProperty(cp, Other_Lowercase)) return INCLUDE;
|
|
||||||
return EXCLUDE;
|
|
||||||
case PropUppercase:
|
|
||||||
if (cat == Lu
|
|
||||||
|| ucdData.getBinaryProperty(cp, Other_Uppercase)) return INCLUDE;
|
|
||||||
return EXCLUDE;
|
|
||||||
case Missing_Uppercase:
|
|
||||||
if (cat == Lu
|
|
||||||
|| ucdData.getBinaryProperty(cp, Other_Uppercase)) return EXCLUDE;
|
|
||||||
xCat = getDecompCat(cp);
|
|
||||||
if (xCat == Lu) return INCLUDE;
|
|
||||||
return EXCLUDE;
|
|
||||||
case Missing_Lowercase:
|
|
||||||
if (cat == Ll
|
|
||||||
|| ucdData.getBinaryProperty(cp, Other_Lowercase)) return EXCLUDE;
|
|
||||||
xCat = getDecompCat(cp);
|
|
||||||
if (xCat == Ll) return INCLUDE;
|
|
||||||
return EXCLUDE;
|
|
||||||
case Missing_Mixedcase:
|
|
||||||
if (cat == Lt) return EXCLUDE;
|
|
||||||
xCat = getDecompCat(cp);
|
|
||||||
if (xCat == Lt) return INCLUDE;
|
|
||||||
return EXCLUDE;
|
|
||||||
case FullCompExclusion:
|
|
||||||
/*
|
|
||||||
(3) Singleton Decompositions: characters that can be derived from the UnicodeData file by
|
|
||||||
including all characters whose canonical decomposition consists of a single character.
|
|
||||||
(4) Non-Starter Decompositions: characters that can be derived from the UnicodeData
|
|
||||||
file by including all characters whose canonical decomposition consists of a sequence
|
|
||||||
of characters, the first of which has a non-zero combining class.
|
|
||||||
*/
|
|
||||||
{
|
|
||||||
if (!ucdData.isRepresented(cp)) return EXCLUDE;
|
|
||||||
byte dtype = ucdData.getDecompositionType(cp);
|
|
||||||
if (dtype != CANONICAL) return EXCLUDE;
|
|
||||||
|
|
||||||
if (isCompEx(cp)) return INCLUDE;
|
|
||||||
return EXCLUDE;
|
|
||||||
}
|
|
||||||
case FullCompInclusion:
|
|
||||||
{
|
|
||||||
if (!ucdData.isRepresented(cp)) return EXCLUDE;
|
|
||||||
byte dtype = ucdData.getDecompositionType(cp);
|
|
||||||
if (dtype != CANONICAL) return EXCLUDE;
|
|
||||||
|
|
||||||
if (isCompEx(cp)) return EXCLUDE;
|
|
||||||
return INCLUDE;
|
|
||||||
}
|
|
||||||
case FC_NFKC_Closure:
|
|
||||||
if (!ucdData.isRepresented(cp)) return EXCLUDE;
|
|
||||||
|
|
||||||
/*
|
|
||||||
b = Normalize(Fold(a));
|
|
||||||
c = Normalize(Fold(b));
|
|
||||||
if (c != b) add a => c
|
|
||||||
*/
|
|
||||||
{
|
|
||||||
String b = nfkc.normalize(fold(cp));
|
|
||||||
String c = nfkc.normalize(fold(b));
|
|
||||||
if (c.equals(b)) return EXCLUDE;
|
|
||||||
setComputedValue(cp, c);
|
|
||||||
if (cp == 0x1F88) {
|
|
||||||
System.out.println(ucdData.toString(cp));
|
|
||||||
System.out.println("cp: " + ucdData.getCodeAndName(cp));
|
|
||||||
System.out.println("fold(cp): " + ucdData.getCodeAndName(fold(cp)));
|
|
||||||
System.out.println("b: " + ucdData.getCodeAndName(b));
|
|
||||||
System.out.println("fold(b): " + ucdData.getCodeAndName(fold(b)));
|
|
||||||
System.out.println("c: " + ucdData.getCodeAndName(c));
|
|
||||||
}
|
|
||||||
return BREAK;
|
|
||||||
}
|
|
||||||
|
|
||||||
case QuickNFD: case QuickNFC: case QuickNFKD: case QuickNFKC:
|
|
||||||
lastValue = currentValue;
|
|
||||||
Normalizer nfx = nf[propMask - QuickNFD];
|
|
||||||
if (nfx.normalizationDiffers(cp)) currentValue = "NO";
|
|
||||||
else if (nfx.isTrailing(cp)) currentValue = "MAYBE";
|
|
||||||
else return EXCLUDE;
|
|
||||||
setComputedValue(cp, currentValue);
|
|
||||||
if (currentValue != lastValue) return BREAK;
|
|
||||||
return INCLUDE;
|
|
||||||
}
|
}
|
||||||
|
String prop = dprop.getProperty(cp, propMask);
|
||||||
|
if (prop.length() == 0) return EXCLUDE;
|
||||||
// handle script stuff
|
if (prop.equals(last)) return INCLUDE;
|
||||||
/*
|
|
||||||
if (firstRealCp == -1) return INCLUDE;
|
|
||||||
byte cat2 = ucdData.getCategory(firstRealCp);
|
|
||||||
if (cat == cat2) return INCLUDE;
|
|
||||||
int mc = UCD.mainCategoryMask(cat);
|
|
||||||
if (LETTER_MASK == mc && mc == UCD.mainCategoryMask(cat2)) return INCLUDE;
|
|
||||||
|
|
||||||
return BREAK;
|
return BREAK;
|
||||||
*/
|
|
||||||
return INCLUDE;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
static Map computedValue = new HashMap();
|
static Map computedValue = new HashMap();
|
||||||
static String getComputedValue(int cp) {
|
static String getComputedValue(int cp) {
|
||||||
return (String) computedValue.get(new Integer(cp));
|
return (String) computedValue.get(new Integer(cp));
|
||||||
@ -402,52 +108,8 @@ of characters, the first of which has a non-zero combining class.
|
|||||||
static String lastValue = "";
|
static String lastValue = "";
|
||||||
static String currentValue = "";
|
static String currentValue = "";
|
||||||
|
|
||||||
boolean isCompEx(int cp) {
|
|
||||||
if (ucdData.getBinaryProperty(cp, CompositionExclusion)) return true;
|
|
||||||
String decomp = ucdData.getDecompositionMapping(cp);
|
|
||||||
if (UTF32.length32(decomp) == 1) return true;
|
|
||||||
int first = UTF32.char32At(decomp,0);
|
|
||||||
if (ucdData.getCombiningClass(first) != 0) return true;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
StringBuffer foldBuffer = new StringBuffer();
|
StringBuffer foldBuffer = new StringBuffer();
|
||||||
|
|
||||||
String fold(int cp) {
|
*/
|
||||||
return ucdData.getCase(cp, FULL, FOLD);
|
|
||||||
}
|
|
||||||
|
|
||||||
String fold(String s) {
|
|
||||||
return ucdData.getCase(s, FULL, FOLD);
|
|
||||||
}
|
|
||||||
|
|
||||||
byte getDecompCat(int cp) {
|
|
||||||
byte cat = ucdData.getCategory(cp);
|
|
||||||
if (cat == Lu
|
|
||||||
|| ucdData.getBinaryProperty(cp, Other_Uppercase)) return Lu;
|
|
||||||
if (cat == Ll
|
|
||||||
|| ucdData.getBinaryProperty(cp, Other_Lowercase)) return Ll;
|
|
||||||
if (cat == Lt || cat == Lo || cat == Lm || cat == Nl) return cat;
|
|
||||||
if (!nf[2].normalizationDiffers(cp)) return Lo;
|
|
||||||
|
|
||||||
String norm = nf[2].normalize(cp);
|
|
||||||
int cp2;
|
|
||||||
boolean gotUpper = false;
|
|
||||||
boolean gotLower = false;
|
|
||||||
boolean gotTitle = false;
|
|
||||||
for (int i = 0; i < norm.length(); i += UTF32.count16(cp2)) {
|
|
||||||
cp2 = UTF32.char32At(norm, i);
|
|
||||||
byte catx = ucdData.getCategory(cp2);
|
|
||||||
boolean upx = ucdData.getBinaryProperty(cp, Other_Uppercase);
|
|
||||||
boolean lowx = ucdData.getBinaryProperty(cp, Other_Lowercase);
|
|
||||||
if (catx == Ll || lowx || cp2 == 0x345) gotLower = true;
|
|
||||||
if (catx == Lu || upx) gotUpper = true;
|
|
||||||
if (catx == Lt) gotTitle = true;
|
|
||||||
}
|
|
||||||
if (gotLower && !gotUpper && !gotTitle) return Ll;
|
|
||||||
if (!gotLower && gotUpper && !gotTitle) return Lu;
|
|
||||||
if (gotLower || gotUpper || gotTitle) return Lt;
|
|
||||||
return cat;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -5,8 +5,8 @@
|
|||||||
*******************************************************************************
|
*******************************************************************************
|
||||||
*
|
*
|
||||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateData.java,v $
|
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateData.java,v $
|
||||||
* $Date: 2001/08/31 00:30:17 $
|
* $Date: 2001/09/01 00:06:15 $
|
||||||
* $Revision: 1.2 $
|
* $Revision: 1.3 $
|
||||||
*
|
*
|
||||||
*******************************************************************************
|
*******************************************************************************
|
||||||
*/
|
*/
|
||||||
@ -48,10 +48,12 @@ public class GenerateData implements UCD_Types {
|
|||||||
} else if (arg.equalsIgnoreCase("DerivedBidiClass")) {
|
} else if (arg.equalsIgnoreCase("DerivedBidiClass")) {
|
||||||
generateVerticalSlice(BIDI_CLASS, BIDI_CLASS+NEXT_ENUM, KEEP_SPECIAL, HEADER_DERIVED,
|
generateVerticalSlice(BIDI_CLASS, BIDI_CLASS+NEXT_ENUM, KEEP_SPECIAL, HEADER_DERIVED,
|
||||||
"DerivedBidiClass-" + version );
|
"DerivedBidiClass-" + version );
|
||||||
|
|
||||||
} else if (arg.equalsIgnoreCase("DerivedNormalizationProperties")) {
|
} else if (arg.equalsIgnoreCase("DerivedNormalizationProperties")) {
|
||||||
mask = Utility.setBits(0, DerivedPropertyLister.FC_NFKC_Closure, DerivedPropertyLister.ExpandsOnNFKC);
|
mask = Utility.setBits(0, DerivedProperty.FC_NFKC_Closure, DerivedProperty.ExpandsOnNFKC);
|
||||||
mask = Utility.clearBit(mask, DerivedPropertyLister.FullCompInclusion);
|
mask = Utility.clearBit(mask, DerivedProperty.FullCompInclusion);
|
||||||
generateDerived(mask, HEADER_DERIVED, "DerivedNormalizationProperties-" + version );
|
generateDerived(mask, HEADER_DERIVED, "DerivedNormalizationProperties-" + version );
|
||||||
|
|
||||||
} else if (arg.equalsIgnoreCase("DerivedEastAsianWidth")) {
|
} else if (arg.equalsIgnoreCase("DerivedEastAsianWidth")) {
|
||||||
generateVerticalSlice(EAST_ASIAN_WIDTH, EAST_ASIAN_WIDTH+NEXT_ENUM, KEEP_SPECIAL, HEADER_DERIVED,
|
generateVerticalSlice(EAST_ASIAN_WIDTH, EAST_ASIAN_WIDTH+NEXT_ENUM, KEEP_SPECIAL, HEADER_DERIVED,
|
||||||
"DerivedEastAsianWidth-" + version );
|
"DerivedEastAsianWidth-" + version );
|
||||||
@ -82,16 +84,19 @@ public class GenerateData implements UCD_Types {
|
|||||||
} else if (arg.equalsIgnoreCase("DerivedNumericValues")) {
|
} else if (arg.equalsIgnoreCase("DerivedNumericValues")) {
|
||||||
generateVerticalSlice(LIMIT_ENUM, LIMIT_ENUM, KEEP_SPECIAL, HEADER_DERIVED,
|
generateVerticalSlice(LIMIT_ENUM, LIMIT_ENUM, KEEP_SPECIAL, HEADER_DERIVED,
|
||||||
"DerivedNumericValues-" + version );
|
"DerivedNumericValues-" + version );
|
||||||
|
|
||||||
} else if (arg.equalsIgnoreCase("DerivedCoreProperties")) {
|
} else if (arg.equalsIgnoreCase("DerivedCoreProperties")) {
|
||||||
mask = Utility.setBits(0, DerivedPropertyLister.PropMath, DerivedPropertyLister.Mod_ID_Continue_NO_Cf);
|
mask = Utility.setBits(0, DerivedProperty.PropMath, DerivedProperty.Mod_ID_Continue_NO_Cf);
|
||||||
|
mask = Utility.setBits(mask, DerivedProperty.DefaultIgnorable, DerivedProperty.LIMIT-1);
|
||||||
generateDerived(mask, HEADER_DERIVED, "DerivedCoreProperties-" + version );
|
generateDerived(mask, HEADER_DERIVED, "DerivedCoreProperties-" + version );
|
||||||
|
|
||||||
} else if (arg.equalsIgnoreCase("DerivedLineBreak")) {
|
} else if (arg.equalsIgnoreCase("DerivedLineBreak")) {
|
||||||
generateVerticalSlice(LINE_BREAK, LINE_BREAK+NEXT_ENUM, KEEP_SPECIAL, HEADER_DERIVED,
|
generateVerticalSlice(LINE_BREAK, LINE_BREAK+NEXT_ENUM, KEEP_SPECIAL, HEADER_DERIVED,
|
||||||
"DerivedLineBreak-" + version );
|
"DerivedLineBreak-" + version );
|
||||||
} else if (arg.equalsIgnoreCase("Scripts")) {
|
} else if (arg.equalsIgnoreCase("Scripts")) {
|
||||||
generateVerticalSlice(SCRIPT+1, SCRIPT + NEXT_ENUM, KEEP_SPECIAL, HEADER_SCRIPTS, "Scripts-");
|
generateVerticalSlice(SCRIPT+1, SCRIPT + NEXT_ENUM, KEEP_SPECIAL, HEADER_SCRIPTS, "Scripts-");
|
||||||
} else if (arg.equalsIgnoreCase("PropList")) {
|
} else if (arg.equalsIgnoreCase("PropList")) {
|
||||||
generateVerticalSlice(BINARY_PROPERTIES + White_space, BINARY_PROPERTIES + Noncharacter_Code_Point + 1,
|
generateVerticalSlice(BINARY_PROPERTIES + White_space, BINARY_PROPERTIES + NEXT_ENUM,
|
||||||
KEEP_SPECIAL, HEADER_EXTEND, "PropList-" + version);
|
KEEP_SPECIAL, HEADER_EXTEND, "PropList-" + version);
|
||||||
} else if (arg.equalsIgnoreCase("AllBinary")) {
|
} else if (arg.equalsIgnoreCase("AllBinary")) {
|
||||||
generateVerticalSlice(BINARY_PROPERTIES, BINARY_PROPERTIES + NEXT_ENUM,
|
generateVerticalSlice(BINARY_PROPERTIES, BINARY_PROPERTIES + NEXT_ENUM,
|
||||||
@ -110,7 +115,7 @@ public class GenerateData implements UCD_Types {
|
|||||||
//checkHoffman("\u0592\u05B7\u05BC\u05A5\u05B0\u05C0\u05C4\u05AD");
|
//checkHoffman("\u0592\u05B7\u05BC\u05A5\u05B0\u05C0\u05C4\u05AD");
|
||||||
|
|
||||||
|
|
||||||
//generateDerived(Utility.setBits(0, DerivedPropertyLister.PropMath, DerivedPropertyLister.Mod_ID_Continue_NO_Cf),
|
//generateDerived(Utility.setBits(0, DerivedProperty.PropMath, DerivedProperty.Mod_ID_Continue_NO_Cf),
|
||||||
// HEADER_DERIVED, "DerivedPropData2-" + version );
|
// HEADER_DERIVED, "DerivedPropData2-" + version );
|
||||||
//generateVerticalSlice(SCRIPT, SCRIPT+1, KEEP_SPECIAL, "ScriptCommon-" + version );
|
//generateVerticalSlice(SCRIPT, SCRIPT+1, KEEP_SPECIAL, "ScriptCommon-" + version );
|
||||||
//listStrings("LowerCase-" + version , 0,0);
|
//listStrings("LowerCase-" + version , 0,0);
|
||||||
@ -194,16 +199,15 @@ public class GenerateData implements UCD_Types {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public static void generateDerived (int bitMask, int headerChoice, String fileName) throws IOException {
|
public static void generateDerived (int bitMask, int headerChoice, String fileName) throws IOException {
|
||||||
ucd = UCD.make("310");
|
PrintStream output = new PrintStream(new FileOutputStream(GEN_DIR + fileName + ".txt"));
|
||||||
PrintStream output = new PrintStream(new FileOutputStream(GEN_DIR + fileName));
|
|
||||||
doHeader(fileName, output, headerChoice);
|
doHeader(fileName, output, headerChoice);
|
||||||
for (int i = 0; i < 32; ++i) {
|
for (int i = 0; i < DerivedProperty.LIMIT; ++i) {
|
||||||
if ((bitMask & (1<<i)) == 0) continue;
|
if ((bitMask & (1<<i)) == 0) continue;
|
||||||
if (i >= DerivedPropertyLister.LIMIT) break;
|
|
||||||
System.out.print('.');
|
System.out.print('.');
|
||||||
output.println("# ================================================");
|
output.println("# ================================================");
|
||||||
output.println();
|
output.println();
|
||||||
new DerivedPropertyLister(ucd, i, output).print();
|
new DerivedPropertyLister(ucd, i, output).print();
|
||||||
|
output.flush();
|
||||||
}
|
}
|
||||||
output.close();
|
output.close();
|
||||||
}
|
}
|
||||||
@ -440,6 +444,7 @@ public class GenerateData implements UCD_Types {
|
|||||||
if (!MyPropertyLister.isUnifiedBinaryPropertyDefined(ucd, i)) continue;
|
if (!MyPropertyLister.isUnifiedBinaryPropertyDefined(ucd, i)) continue;
|
||||||
if (i == DECOMPOSITION_TYPE || i == NUMERIC_TYPE
|
if (i == DECOMPOSITION_TYPE || i == NUMERIC_TYPE
|
||||||
|| i == (BINARY_PROPERTIES | Non_break)
|
|| i == (BINARY_PROPERTIES | Non_break)
|
||||||
|
|| i == (BINARY_PROPERTIES | CaseFoldTurkishI)
|
||||||
|| i == (JOINING_TYPE | JT_U)
|
|| i == (JOINING_TYPE | JT_U)
|
||||||
|| i == (JOINING_GROUP | NO_SHAPING)
|
|| i == (JOINING_GROUP | NO_SHAPING)
|
||||||
) continue; // skip zero case
|
) continue; // skip zero case
|
||||||
@ -483,6 +488,7 @@ public class GenerateData implements UCD_Types {
|
|||||||
output.println();
|
output.println();
|
||||||
System.out.print(".");
|
System.out.print(".");
|
||||||
}
|
}
|
||||||
|
output.flush();
|
||||||
}
|
}
|
||||||
output.close();
|
output.close();
|
||||||
System.out.println();
|
System.out.println();
|
||||||
@ -493,7 +499,6 @@ public class GenerateData implements UCD_Types {
|
|||||||
static public Normalizer formC, formD, formKC, formKD;
|
static public Normalizer formC, formD, formKC, formKD;
|
||||||
|
|
||||||
static public void writeNormalizerTestSuite(String fileName) throws IOException {
|
static public void writeNormalizerTestSuite(String fileName) throws IOException {
|
||||||
ucd = UCD.make();
|
|
||||||
|
|
||||||
PrintWriter log = Utility.openPrintWriter(fileName);
|
PrintWriter log = Utility.openPrintWriter(fileName);
|
||||||
|
|
||||||
|
63
tools/unicodetools/com/ibm/text/UCD/Main.java
Normal file
63
tools/unicodetools/com/ibm/text/UCD/Main.java
Normal file
@ -0,0 +1,63 @@
|
|||||||
|
/**
|
||||||
|
*******************************************************************************
|
||||||
|
* Copyright (C) 1996-2001, International Business Machines Corporation and *
|
||||||
|
* others. All Rights Reserved. *
|
||||||
|
*******************************************************************************
|
||||||
|
*
|
||||||
|
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/Main.java,v $
|
||||||
|
* $Date: 2001/09/01 00:06:48 $
|
||||||
|
* $Revision: 1.1 $
|
||||||
|
*
|
||||||
|
*******************************************************************************
|
||||||
|
*/
|
||||||
|
|
||||||
|
package com.ibm.text.UCD;
|
||||||
|
import com.ibm.text.utility.*;
|
||||||
|
|
||||||
|
public final class Main {
|
||||||
|
static String ucdVersion = "";
|
||||||
|
|
||||||
|
public static void main (String[] args) throws Exception {
|
||||||
|
|
||||||
|
for (int i = 0; i < args.length; ++i) {
|
||||||
|
String arg = args[i];
|
||||||
|
if (arg.charAt(0) == '#') return; // skip rest of line
|
||||||
|
|
||||||
|
Utility.fixDot();
|
||||||
|
System.out.println("Argument: " + args[i]);
|
||||||
|
|
||||||
|
if (arg.equalsIgnoreCase("all")) {
|
||||||
|
//checkCase();
|
||||||
|
VerifyUCD.checkCanonicalProperties();
|
||||||
|
VerifyUCD.CheckCaseFold();
|
||||||
|
VerifyUCD.checkAgainstUInfo();
|
||||||
|
|
||||||
|
} else if (arg.equalsIgnoreCase("build")) {
|
||||||
|
ConvertUCD.main(new String[]{ucdVersion});
|
||||||
|
} else if (arg.equalsIgnoreCase("version")) ucdVersion = args[++i];
|
||||||
|
else if (arg.equalsIgnoreCase("generateXML")) VerifyUCD.generateXML();
|
||||||
|
else if (arg.equalsIgnoreCase("testDerivedProperties")) DerivedProperty.test();
|
||||||
|
else if (arg.equalsIgnoreCase("checkCase")) VerifyUCD.checkCase();
|
||||||
|
else if (arg.equalsIgnoreCase("checkCase2")) VerifyUCD.checkCase2();
|
||||||
|
else if (arg.equalsIgnoreCase("checkCanonicalProperties")) VerifyUCD.checkCanonicalProperties();
|
||||||
|
else if (arg.equalsIgnoreCase("CheckCaseFold")) VerifyUCD.CheckCaseFold();
|
||||||
|
else if (arg.equalsIgnoreCase("idn")) VerifyUCD.VerifyIDN();
|
||||||
|
else if (arg.equalsIgnoreCase("NFTest")) VerifyUCD.NFTest();
|
||||||
|
else if (arg.equalsIgnoreCase("test1")) VerifyUCD.test1();
|
||||||
|
//else if (arg.equalsIgnoreCase("checkAgainstUInfo")) checkAgainstUInfo();
|
||||||
|
else if (arg.equalsIgnoreCase("checkScripts")) VerifyUCD.checkScripts();
|
||||||
|
else if (arg.equalsIgnoreCase("IdentifierTest")) VerifyUCD.IdentifierTest();
|
||||||
|
else if (arg.equalsIgnoreCase("GenerateData")) GenerateData.main(Utility.split(args[++i],','));
|
||||||
|
else if (arg.equalsIgnoreCase("BuildNames")) BuildNames.main(null);
|
||||||
|
else if (arg.equalsIgnoreCase("writeNormalizerTestSuite"))
|
||||||
|
GenerateData.writeNormalizerTestSuite("NormalizationTest-3.1.1d1.txt");
|
||||||
|
else {
|
||||||
|
System.out.println("Unknown option -- must be one of the following (case-insensitive)");
|
||||||
|
System.out.println("generateXML, checkCase, checkCanonicalProperties, CheckCaseFold,");
|
||||||
|
System.out.println("VerifyIDN, NFTest, test1, ");
|
||||||
|
// System.out.println(checkAgainstUInfo,");
|
||||||
|
System.out.println("checkScripts, IdentifierTest, writeNormalizerTestSuite");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -5,8 +5,8 @@
|
|||||||
*******************************************************************************
|
*******************************************************************************
|
||||||
*
|
*
|
||||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/TestData.java,v $
|
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/TestData.java,v $
|
||||||
* $Date: 2001/08/31 00:30:17 $
|
* $Date: 2001/09/01 00:06:15 $
|
||||||
* $Revision: 1.2 $
|
* $Revision: 1.3 $
|
||||||
*
|
*
|
||||||
*******************************************************************************
|
*******************************************************************************
|
||||||
*/
|
*/
|
||||||
@ -38,8 +38,8 @@ public class TestData implements UCD_Types {
|
|||||||
"DerivedBidiClass-3.1.1d1.txt");
|
"DerivedBidiClass-3.1.1d1.txt");
|
||||||
|
|
||||||
|
|
||||||
mask = Utility.setBits(0, DerivedPropertyLister.FC_NFKC_Closure, DerivedPropertyLister.ExpandsOnNFKC);
|
mask = Utility.setBits(0, DerivedProperty.FC_NFKC_Closure, DerivedProperty.ExpandsOnNFKC);
|
||||||
mask = Utility.clearBit(mask, DerivedPropertyLister.FullCompInclusion);
|
mask = Utility.clearBit(mask, DerivedProperty.FullCompInclusion);
|
||||||
generateDerived(mask, HEADER_DERIVED, "DerivedNormalizationProperties-3.1.0d1.txt");
|
generateDerived(mask, HEADER_DERIVED, "DerivedNormalizationProperties-3.1.0d1.txt");
|
||||||
|
|
||||||
generateVerticalSlice(EAST_ASIAN_WIDTH, EAST_ASIAN_WIDTH+NEXT_ENUM, KEEP_SPECIAL, HEADER_DERIVED,
|
generateVerticalSlice(EAST_ASIAN_WIDTH, EAST_ASIAN_WIDTH+NEXT_ENUM, KEEP_SPECIAL, HEADER_DERIVED,
|
||||||
@ -64,7 +64,7 @@ public class TestData implements UCD_Types {
|
|||||||
generateVerticalSlice(LIMIT_ENUM, LIMIT_ENUM, KEEP_SPECIAL, HEADER_DERIVED,
|
generateVerticalSlice(LIMIT_ENUM, LIMIT_ENUM, KEEP_SPECIAL, HEADER_DERIVED,
|
||||||
"DerivedNumericValues-3.1.0d1.txt");
|
"DerivedNumericValues-3.1.0d1.txt");
|
||||||
|
|
||||||
mask = Utility.setBits(0, DerivedPropertyLister.PropMath, DerivedPropertyLister.Mod_ID_Continue_NO_Cf);
|
mask = Utility.setBits(0, DerivedProperty.PropMath, DerivedProperty.Mod_ID_Continue_NO_Cf);
|
||||||
generateDerived(mask, HEADER_DERIVED, "DerivedCoreProperties-3.1.0d1.txt");
|
generateDerived(mask, HEADER_DERIVED, "DerivedCoreProperties-3.1.0d1.txt");
|
||||||
|
|
||||||
generateVerticalSlice(LINE_BREAK, LINE_BREAK+NEXT_ENUM, KEEP_SPECIAL, HEADER_DERIVED,
|
generateVerticalSlice(LINE_BREAK, LINE_BREAK+NEXT_ENUM, KEEP_SPECIAL, HEADER_DERIVED,
|
||||||
@ -83,7 +83,7 @@ public class TestData implements UCD_Types {
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
//generateDerived(Utility.setBits(0, DerivedPropertyLister.PropMath, DerivedPropertyLister.Mod_ID_Continue_NO_Cf),
|
//generateDerived(Utility.setBits(0, DerivedProperty.PropMath, DerivedProperty.Mod_ID_Continue_NO_Cf),
|
||||||
// HEADER_DERIVED, "DerivedPropData2-3.1.0d1.txt");
|
// HEADER_DERIVED, "DerivedPropData2-3.1.0d1.txt");
|
||||||
//generateVerticalSlice(SCRIPT, SCRIPT+1, KEEP_SPECIAL, "ScriptCommon-3.1.0d1.txt");
|
//generateVerticalSlice(SCRIPT, SCRIPT+1, KEEP_SPECIAL, "ScriptCommon-3.1.0d1.txt");
|
||||||
//listStrings("LowerCase-3.1.0d1.txt", 0,0);
|
//listStrings("LowerCase-3.1.0d1.txt", 0,0);
|
||||||
@ -172,7 +172,7 @@ public class TestData implements UCD_Types {
|
|||||||
doHeader(fileName, output, headerChoice);
|
doHeader(fileName, output, headerChoice);
|
||||||
for (int i = 0; i < 32; ++i) {
|
for (int i = 0; i < 32; ++i) {
|
||||||
if ((bitMask & (1<<i)) == 0) continue;
|
if ((bitMask & (1<<i)) == 0) continue;
|
||||||
if (i >= DerivedPropertyLister.LIMIT) break;
|
if (i >= DerivedProperty.LIMIT) break;
|
||||||
System.out.print('.');
|
System.out.print('.');
|
||||||
output.println("# ================================================");
|
output.println("# ================================================");
|
||||||
output.println();
|
output.println();
|
||||||
|
@ -5,8 +5,8 @@
|
|||||||
*******************************************************************************
|
*******************************************************************************
|
||||||
*
|
*
|
||||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD_Names.java,v $
|
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD_Names.java,v $
|
||||||
* $Date: 2001/08/31 00:29:50 $
|
* $Date: 2001/09/01 00:06:15 $
|
||||||
* $Revision: 1.2 $
|
* $Revision: 1.3 $
|
||||||
*
|
*
|
||||||
*******************************************************************************
|
*******************************************************************************
|
||||||
*/
|
*/
|
||||||
@ -94,7 +94,7 @@ final class UCD_Names implements UCD_Types {
|
|||||||
"IDS_TrinaryOperator",
|
"IDS_TrinaryOperator",
|
||||||
"Radical",
|
"Radical",
|
||||||
"UnifiedIdeograph",
|
"UnifiedIdeograph",
|
||||||
"Reserved_Cf_Code_Point",
|
"Other_Default_Ignorable_Code_Point",
|
||||||
"Deprecated",
|
"Deprecated",
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -5,8 +5,8 @@
|
|||||||
*******************************************************************************
|
*******************************************************************************
|
||||||
*
|
*
|
||||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/VerifyUCD.java,v $
|
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/VerifyUCD.java,v $
|
||||||
* $Date: 2001/08/31 00:29:50 $
|
* $Date: 2001/09/01 00:06:15 $
|
||||||
* $Revision: 1.2 $
|
* $Revision: 1.3 $
|
||||||
*
|
*
|
||||||
*******************************************************************************
|
*******************************************************************************
|
||||||
*/
|
*/
|
||||||
@ -26,51 +26,6 @@ import com.ibm.text.utility.*;
|
|||||||
public class VerifyUCD implements UCD_Types {
|
public class VerifyUCD implements UCD_Types {
|
||||||
|
|
||||||
public static final String IDN_DIR = DATA_DIR + "\\IDN\\";
|
public static final String IDN_DIR = DATA_DIR + "\\IDN\\";
|
||||||
static String ucdVersion = "";
|
|
||||||
|
|
||||||
public static void main (String[] args) throws Exception {
|
|
||||||
|
|
||||||
for (int i = 0; i < args.length; ++i) {
|
|
||||||
String arg = args[i];
|
|
||||||
if (arg.charAt(0) == '#') return; // skip rest of line
|
|
||||||
|
|
||||||
Utility.fixDot();
|
|
||||||
System.out.println("Argument: " + args[i]);
|
|
||||||
|
|
||||||
if (arg.equalsIgnoreCase("all")) {
|
|
||||||
//checkCase();
|
|
||||||
checkCanonicalProperties();
|
|
||||||
CheckCaseFold();
|
|
||||||
checkAgainstUInfo();
|
|
||||||
|
|
||||||
} else if (arg.equalsIgnoreCase("build")) {
|
|
||||||
ConvertUCD.main(new String[]{ucdVersion});
|
|
||||||
} else if (arg.equalsIgnoreCase("version")) ucdVersion = args[++i];
|
|
||||||
else if (arg.equalsIgnoreCase("generateXML")) generateXML();
|
|
||||||
else if (arg.equalsIgnoreCase("checkCase")) checkCase();
|
|
||||||
else if (arg.equalsIgnoreCase("checkCase2")) checkCase2();
|
|
||||||
else if (arg.equalsIgnoreCase("checkCanonicalProperties")) checkCanonicalProperties();
|
|
||||||
else if (arg.equalsIgnoreCase("CheckCaseFold")) CheckCaseFold();
|
|
||||||
else if (arg.equalsIgnoreCase("idn")) VerifyIDN();
|
|
||||||
else if (arg.equalsIgnoreCase("NFTest")) NFTest();
|
|
||||||
else if (arg.equalsIgnoreCase("test1")) test1();
|
|
||||||
//else if (arg.equalsIgnoreCase("checkAgainstUInfo")) checkAgainstUInfo();
|
|
||||||
else if (arg.equalsIgnoreCase("checkScripts")) checkScripts();
|
|
||||||
else if (arg.equalsIgnoreCase("IdentifierTest")) IdentifierTest();
|
|
||||||
else if (arg.equalsIgnoreCase("GenerateData")) GenerateData.main(Utility.split(args[++i],','));
|
|
||||||
else if (arg.equalsIgnoreCase("BuildNames")) BuildNames.main(null);
|
|
||||||
else if (arg.equalsIgnoreCase("writeNormalizerTestSuite"))
|
|
||||||
GenerateData.writeNormalizerTestSuite("NormalizationTest-3.1.1d1.txt");
|
|
||||||
|
|
||||||
else {
|
|
||||||
System.out.println("Unknown option -- must be one of the following (case-insensitive)");
|
|
||||||
System.out.println("generateXML, checkCase, checkCanonicalProperties, CheckCaseFold,");
|
|
||||||
System.out.println("VerifyIDN, NFTest, test1, ");
|
|
||||||
// System.out.println(checkAgainstUInfo,");
|
|
||||||
System.out.println("checkScripts, IdentifierTest, writeNormalizerTestSuite");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
System.out.println(ucd.toString(0x0387));
|
System.out.println(ucd.toString(0x0387));
|
||||||
@ -85,7 +40,7 @@ public class VerifyUCD implements UCD_Types {
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
static void checkAgainstOtherVersion(String otherVersion) {
|
static void checkAgainstOtherVersion(String otherVersion) {
|
||||||
ucd = UCD.make(ucdVersion);
|
ucd = UCD.make(Main.ucdVersion);
|
||||||
UCD ucd2 = UCD.make(otherVersion);
|
UCD ucd2 = UCD.make(otherVersion);
|
||||||
for (int cp = 0; cp <= 0x10FFFF; ++cp) {
|
for (int cp = 0; cp <= 0x10FFFF; ++cp) {
|
||||||
UData curr = ucd.get(cp, true);
|
UData curr = ucd.get(cp, true);
|
||||||
@ -100,7 +55,7 @@ public class VerifyUCD implements UCD_Types {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void generateXML() throws IOException {
|
static void generateXML() throws IOException {
|
||||||
ucd = UCD.make(ucdVersion);
|
ucd = UCD.make(Main.ucdVersion);
|
||||||
String filename = "UCD.xml";
|
String filename = "UCD.xml";
|
||||||
PrintWriter log = Utility.openPrintWriter(filename);
|
PrintWriter log = Utility.openPrintWriter(filename);
|
||||||
|
|
||||||
@ -125,7 +80,7 @@ public class VerifyUCD implements UCD_Types {
|
|||||||
public static void checkCase() throws IOException {
|
public static void checkCase() throws IOException {
|
||||||
Utility.fixDot();
|
Utility.fixDot();
|
||||||
System.out.println("checkCase");
|
System.out.println("checkCase");
|
||||||
ucd = UCD.make(ucdVersion);
|
ucd = UCD.make(Main.ucdVersion);
|
||||||
initNormalizers();
|
initNormalizers();
|
||||||
System.out.println(ucd.getCase("ABC,DE'F G\u0308H", FULL, TITLE));
|
System.out.println(ucd.getCase("ABC,DE'F G\u0308H", FULL, TITLE));
|
||||||
String fileName = "CaseDifferences.txt";
|
String fileName = "CaseDifferences.txt";
|
||||||
@ -178,7 +133,7 @@ public class VerifyUCD implements UCD_Types {
|
|||||||
public static void checkCase2() throws IOException {
|
public static void checkCase2() throws IOException {
|
||||||
Utility.fixDot();
|
Utility.fixDot();
|
||||||
System.out.println("checkCase");
|
System.out.println("checkCase");
|
||||||
ucd = UCD.make(ucdVersion);
|
ucd = UCD.make(Main.ucdVersion);
|
||||||
initNormalizers();
|
initNormalizers();
|
||||||
System.out.println(ucd.getCase("ABC,DE'F G\u0308H", FULL, TITLE));
|
System.out.println(ucd.getCase("ABC,DE'F G\u0308H", FULL, TITLE));
|
||||||
String fileName = "CaseNormalizationDifferences.txt";
|
String fileName = "CaseNormalizationDifferences.txt";
|
||||||
@ -326,7 +281,7 @@ public class VerifyUCD implements UCD_Types {
|
|||||||
static final String upperNames[] = {"", "Other_Upper"};
|
static final String upperNames[] = {"", "Other_Upper"};
|
||||||
|
|
||||||
public static void CheckCaseFold() {
|
public static void CheckCaseFold() {
|
||||||
ucd = UCD.make(ucdVersion);
|
ucd = UCD.make(Main.ucdVersion);
|
||||||
System.out.println("Checking Case Fold");
|
System.out.println("Checking Case Fold");
|
||||||
for (int cp = 0; cp <= 0x10FFFF; ++cp) {
|
for (int cp = 0; cp <= 0x10FFFF; ++cp) {
|
||||||
Utility.dot(cp);
|
Utility.dot(cp);
|
||||||
@ -358,7 +313,7 @@ public class VerifyUCD implements UCD_Types {
|
|||||||
|
|
||||||
public static void VerifyIDN() throws IOException {
|
public static void VerifyIDN() throws IOException {
|
||||||
System.out.println("VerifyIDN");
|
System.out.println("VerifyIDN");
|
||||||
ucd = UCD.make(ucdVersion);
|
ucd = UCD.make(Main.ucdVersion);
|
||||||
initNormalizers();
|
initNormalizers();
|
||||||
|
|
||||||
System.out.println();
|
System.out.println();
|
||||||
@ -725,7 +680,7 @@ E0020-E007F; [TAGGING CHARACTERS]
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private static void IdentifierTest() {
|
public static void IdentifierTest() {
|
||||||
String x = normalize(UTF32.valueOf32(0x10300), 4) ;
|
String x = normalize(UTF32.valueOf32(0x10300), 4) ;
|
||||||
getCategoryID(x);
|
getCategoryID(x);
|
||||||
|
|
||||||
@ -827,7 +782,7 @@ E0020-E007F; [TAGGING CHARACTERS]
|
|||||||
private static UCD ucd;
|
private static UCD ucd;
|
||||||
private static final String[] NAMES = {"NFD", "NFC", "NFKD", "NFKC", "Fold"};
|
private static final String[] NAMES = {"NFD", "NFC", "NFKD", "NFKC", "Fold"};
|
||||||
|
|
||||||
private static void NFTest() {
|
public static void NFTest() {
|
||||||
initNormalizers();
|
initNormalizers();
|
||||||
for (int j = 0; j < 4; ++j) {
|
for (int j = 0; j < 4; ++j) {
|
||||||
Normalizer nfx = nf[j];
|
Normalizer nfx = nf[j];
|
||||||
@ -852,7 +807,7 @@ E0020-E007F; [TAGGING CHARACTERS]
|
|||||||
}
|
}
|
||||||
|
|
||||||
public static void checkScripts() {
|
public static void checkScripts() {
|
||||||
ucd = UCD.make(ucdVersion);
|
ucd = UCD.make(Main.ucdVersion);
|
||||||
for (int i = 0; i < 0x10FFFF; ++i) {
|
for (int i = 0; i < 0x10FFFF; ++i) {
|
||||||
//byte script = ucd.getScript(i);
|
//byte script = ucd.getScript(i);
|
||||||
if (true) { // script != COMMON_SCRIPT) {
|
if (true) { // script != COMMON_SCRIPT) {
|
||||||
@ -863,7 +818,7 @@ E0020-E007F; [TAGGING CHARACTERS]
|
|||||||
|
|
||||||
public static void checkAgainstUInfo() {
|
public static void checkAgainstUInfo() {
|
||||||
/*
|
/*
|
||||||
ucd = UCD.make(ucdVersion);
|
ucd = UCD.make(Main.ucdVersion);
|
||||||
UData x = new UData();
|
UData x = new UData();
|
||||||
x.fleshOut();
|
x.fleshOut();
|
||||||
|
|
||||||
@ -982,7 +937,7 @@ E0020-E007F; [TAGGING CHARACTERS]
|
|||||||
}
|
}
|
||||||
|
|
||||||
public static void test1() {
|
public static void test1() {
|
||||||
ucd = UCD.make(ucdVersion);
|
ucd = UCD.make(Main.ucdVersion);
|
||||||
|
|
||||||
for (int i = 0x19; i < 0x10FFFF; ++i) {
|
for (int i = 0x19; i < 0x10FFFF; ++i) {
|
||||||
|
|
||||||
@ -1019,7 +974,7 @@ E0020-E007F; [TAGGING CHARACTERS]
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void checkCanonicalProperties() {
|
static void checkCanonicalProperties() {
|
||||||
ucd = UCD.make(ucdVersion);
|
ucd = UCD.make(Main.ucdVersion);
|
||||||
System.out.println(ucd.toString(0x1E0A));
|
System.out.println(ucd.toString(0x1E0A));
|
||||||
|
|
||||||
System.out.println("Cross-checking canonical equivalence");
|
System.out.println("Cross-checking canonical equivalence");
|
||||||
|
Loading…
Reference in New Issue
Block a user