scuffed-code/tools/unicodetools/com/ibm/text/UCD/UData.java
Mark Davis 459c96f0b1 various changes for new properties
X-SVN-Rev: 14494
2004-02-12 08:23:19 +00:00

345 lines
14 KiB
Java

/**
*******************************************************************************
* Copyright (C) 1996-2001, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UData.java,v $
* $Date: 2004/02/12 08:23:16 $
* $Revision: 1.11 $
*
*******************************************************************************
*/
package com.ibm.text.UCD;
import java.io.*;
import com.ibm.text.utility.*;
class UData implements UCD_Types {
String name;
String shortName; // cache
String decompositionMapping;
String simpleUppercase;
String simpleLowercase;
String simpleTitlecase;
String simpleCaseFolding;
String fullUppercase;
String fullLowercase;
String fullTitlecase;
String fullCaseFolding;
String specialCasing = "";
String bidiMirror;
int codePoint = -1;
double numericValue = Double.NaN;
long binaryProperties; // bidiMirroring, compositionExclusions, PropList
byte generalCategory = Cn;
byte combiningClass = 0;
byte bidiClass = BIDI_ON;
byte decompositionType = NONE;
byte numericType = NUMERIC_NONE;
byte eastAsianWidth = EAN;
byte lineBreak = LB_XX;
byte joiningType = -1;
byte joiningGroup = NO_SHAPING;
byte script = COMMON_SCRIPT;
byte age = 0;
static final UData UNASSIGNED = new UData();
//static final UData NONCHARACTER = new UData();
static {
UNASSIGNED.name = "<unassigned>";
UNASSIGNED.decompositionMapping = UNASSIGNED.bidiMirror
= UNASSIGNED.simpleUppercase
= UNASSIGNED.simpleLowercase
= UNASSIGNED.simpleTitlecase = "";
UNASSIGNED.fleshOut();
/*NONCHARACTER.name = "<noncharacter>";
NONCHARACTER.decompositionMapping = NONCHARACTER.bidiMirror
= NONCHARACTER.simpleUppercase
= NONCHARACTER.simpleLowercase
= NONCHARACTER.simpleTitlecase = "";
NONCHARACTER.binaryProperties = Noncharacter_Code_PointMask;
NONCHARACTER.fleshOut();
*/
}
public UData (int codePoint) {
this.codePoint = codePoint;
}
public UData () {
}
public boolean equals(Object that) {
UData other = (UData) that;
// use equals for objects
if (!name.equals(other.name)) return false;
if (!decompositionMapping.equals(other.decompositionMapping)) return false;
if (!simpleUppercase.equals(other.simpleUppercase)) return false;
if (!simpleLowercase.equals(other.simpleLowercase)) return false;
if (!simpleTitlecase.equals(other.simpleTitlecase)) return false;
if (!simpleCaseFolding.equals(other.simpleCaseFolding)) return false;
if (!fullUppercase.equals(other.fullUppercase)) return false;
if (!fullLowercase.equals(other.fullLowercase)) return false;
if (!fullTitlecase.equals(other.fullTitlecase)) return false;
if (!fullCaseFolding.equals(other.fullCaseFolding)) return false;
if (!specialCasing.equals(other.specialCasing)) return false;
if (!bidiMirror.equals(other.bidiMirror)) return false;
// == for primitives
// Warning: doubles have to use special comparison, because of NaN
if (codePoint != other.codePoint) return false;
if (numericValue < other.numericValue || numericValue > other.numericValue) return false;
if (binaryProperties != other.binaryProperties) return false;
if (generalCategory != other.generalCategory) return false;
if (combiningClass != other.combiningClass) return false;
if (bidiClass != other.bidiClass) return false;
if (decompositionType != other.decompositionType) return false;
if (numericType != other.numericType) return false;
if (eastAsianWidth != other.eastAsianWidth) return false;
if (lineBreak != other.lineBreak) return false;
if (joiningType != other.joiningType) return false;
if (joiningGroup != other.joiningGroup) return false;
if (script != other.script) return false;
if (age != other.age) return false;
return true;
}
public void fleshOut() {
String codeValue = UTF32.valueOf32(codePoint);
if (decompositionMapping == null) decompositionMapping = codeValue;
if (bidiMirror == null) bidiMirror = codeValue;
if (simpleLowercase == null) simpleLowercase = codeValue;
if (simpleCaseFolding == null) simpleCaseFolding = simpleLowercase;
if (fullLowercase == null) fullLowercase = simpleLowercase;
if (fullCaseFolding == null) fullCaseFolding = fullLowercase;
if (simpleUppercase == null) simpleUppercase = codeValue;
if (simpleTitlecase == null) simpleTitlecase = codeValue;
if (fullUppercase == null) fullUppercase = simpleUppercase;
if (fullTitlecase == null) fullTitlecase = simpleTitlecase;
}
public void compact() {
fleshOut();
String codeValue = UTF32.valueOf32(codePoint);
if (fullTitlecase.equals(simpleTitlecase)) fullTitlecase = null;
if (fullUppercase.equals(simpleUppercase)) fullUppercase = null;
if (simpleTitlecase.equals(codeValue)) simpleTitlecase = null;
if (simpleUppercase.equals(codeValue)) simpleUppercase = null;
if (fullCaseFolding.equals(fullLowercase)) fullCaseFolding = null;
if (fullLowercase.equals(simpleLowercase)) fullLowercase = null;
if (simpleCaseFolding.equals(simpleLowercase)) simpleCaseFolding = null;
if (simpleLowercase.equals(codeValue)) simpleLowercase = null;
if (decompositionMapping.equals(codeValue)) decompositionMapping = null;
if (bidiMirror.equals(codeValue)) bidiMirror = null;
// Fix T, U in joining type
if (joiningType < 0) {
if (generalCategory == Mn || generalCategory == Cf) joiningType = JT_T;
else joiningType = JT_U;
}
}
public void setBinaryProperties(int binaryProperties) {
this.binaryProperties = binaryProperties;
}
public boolean isLetter() {
return ((1<<generalCategory) & UCD_Types.LETTER_MASK) != 0;
}
public static void writeString(DataOutputStream os, String s) throws IOException {
if (s == null) {
os.writeByte(0);
} else {
os.writeByte(1);
os.writeUTF(s);
}
}
static final byte[] byteBuffer = new byte[256];
public static String readString(DataInputStream is) throws IOException {
int type = is.readUnsignedByte();
if (type == 0) return null;
return is.readUTF();
}
static final byte ABBREVIATED = 0, FULL = 1;
public String toString() {
return toString(Default.ucd(), FULL);
}
public String toString(UCD ucd, byte style) {
boolean full = style == FULL;
StringBuffer result = new StringBuffer();
String s = UTF32.valueOf32(codePoint);
result.append("<e cp='").append(Utility.quoteXML(codePoint)).append('\'');
result.append(" hx='").append(Utility.hex(codePoint)).append('\'');
if (full || script != COMMON_SCRIPT) result.append(" sn='").append(ucd.getScriptID_fromIndex(script,SHORT)).append('\'');
result.append(" n='").append(Utility.quoteXML(name)).append("'\r\n");
int lastPos = result.length();
if (full || generalCategory != Lo) result.append(" gc='").append(UCD_Names.GENERAL_CATEGORY[generalCategory]).append('\'');
if (full || combiningClass != 0) result.append(" cc='").append(combiningClass & 0xFF).append('\'');
if (full || decompositionType != NONE) result.append(" dt='").append(UCD_Names.LONG_DECOMPOSITION_TYPE[decompositionType]).append('\'');
if (full || !s.equals(decompositionMapping)) result.append(" dm='").append(Utility.quoteXML(decompositionMapping)).append('\'');
if (full || numericType != NUMERIC_NONE) result.append(" nt='").append(UCD_Names.LONG_NUMERIC_TYPE[numericType]).append('\'');
if (full || !Double.isNaN(numericValue)) result.append(" nv='").append(numericValue).append('\'');
if (full || eastAsianWidth != EAN) result.append(" ea='").append(UCD_Names.LONG_EAST_ASIAN_WIDTH[eastAsianWidth]).append('\'');
if (full || lineBreak != LB_AL) result.append(" lb='").append(UCD_Names.LINE_BREAK[lineBreak]).append('\'');
if (joiningType != -1 && (full || joiningType != JT_U)) result.append(" jt='").append(UCD_Names.JOINING_TYPE[joiningType]).append('\'');
if (full || joiningGroup != NO_SHAPING) result.append(" jg='").append(UCD_Names.JOINING_GROUP[joiningGroup]).append('\'');
if (full || age != 0) result.append(" ag='").append(UCD_Names.AGE[age]).append('\'');
if (full || bidiClass != BIDI_L) result.append(" bc='").append(UCD_Names.BIDI_CLASS[bidiClass]).append('\'');
if (full || !bidiMirror.equals(s)) result.append(" bmg='").append(Utility.quoteXML(bidiMirror)).append('\'');
if (lastPos != result.length()) {
result.append("\r\n");
lastPos = result.length();
}
//String bp = "";
long bprops = binaryProperties;
for (int i = 0; i < LIMIT_BINARY_PROPERTIES; ++i) {
if ((bprops & (1L<<i)) != 0) result.append(UCD_Names.BP[i]).append("='T' ");
}
if (lastPos != result.length()) {
result.append("\r\n");
lastPos = result.length();
}
if (full || !fullLowercase.equals(s)) result.append(" lc='").append(Utility.quoteXML(fullLowercase)).append('\'');
if (full || !fullUppercase.equals(simpleUppercase)) result.append(" uc='").append(Utility.quoteXML(fullUppercase)).append('\'');
if (full || !fullTitlecase.equals(fullUppercase)) result.append(" tc='").append(Utility.quoteXML(fullTitlecase)).append('\'');
if (full || !fullCaseFolding.equals(fullLowercase)) result.append(" cf='").append(Utility.quoteXML(fullCaseFolding)).append('\'');
if (full || !simpleLowercase.equals(simpleLowercase)) result.append(" slc='").append(Utility.quoteXML(simpleLowercase)).append('\'');
if (full || !simpleUppercase.equals(simpleUppercase)) result.append(" suc='").append(Utility.quoteXML(simpleUppercase)).append('\'');
if (full || !simpleTitlecase.equals(simpleUppercase)) result.append(" stc='").append(Utility.quoteXML(simpleTitlecase)).append('\'');
if (full || !simpleCaseFolding.equals(simpleLowercase)) result.append(" sfc='").append(Utility.quoteXML(simpleCaseFolding)).append('\'');
if (full || !specialCasing.equals("")) result.append(" fsc='").append(Utility.quoteXML(specialCasing)).append('\'');
result.append("/>");
return result.toString();
}
public void writeBytes(DataOutputStream os) throws IOException {
compact();
os.writeInt(codePoint);
writeString(os, name);
writeString(os, decompositionMapping);
writeString(os, simpleUppercase);
writeString(os, simpleLowercase);
writeString(os, simpleTitlecase);
writeString(os, simpleCaseFolding);
writeString(os, fullUppercase);
writeString(os, fullLowercase);
writeString(os, fullTitlecase);
writeString(os, fullCaseFolding);
writeString(os, specialCasing);
writeString(os, bidiMirror);
os.writeDouble(numericValue);
os.writeLong(binaryProperties);
os.writeByte(generalCategory);
os.writeByte(combiningClass);
os.writeByte(bidiClass);
os.writeByte(decompositionType);
os.writeByte(numericType);
os.writeByte(eastAsianWidth);
os.writeByte(lineBreak);
os.writeByte(joiningType);
os.writeByte(joiningGroup);
os.writeByte(script);
os.writeByte(age);
}
public void readBytes(DataInputStream is) throws IOException {
codePoint = is.readInt();
name = readString(is);
decompositionMapping = readString(is);
simpleUppercase = readString(is);
simpleLowercase = readString(is);
simpleTitlecase = readString(is);
simpleCaseFolding = readString(is);
fullUppercase = readString(is);
fullLowercase = readString(is);
fullTitlecase = readString(is);
fullCaseFolding = readString(is);
specialCasing = readString(is);
bidiMirror = readString(is);
numericValue = is.readDouble();
binaryProperties = is.readLong();
generalCategory = is.readByte();
combiningClass = is.readByte();
bidiClass = is.readByte();
decompositionType = is.readByte();
numericType = is.readByte();
eastAsianWidth = is.readByte();
lineBreak = is.readByte();
joiningType = is.readByte();
joiningGroup = is.readByte();
script = is.readByte();
age = is.readByte();
fleshOut();
// HACK
/*
int bp = binaryProperties;
bp &= ~(1 << CaseFoldTurkishI); // clear bit
if (codePoint == 'i' || codePoint == 'I') {
bp |= (1 << CaseFoldTurkishI);
}
if (bp != binaryProperties) {
if (!HACK) {
System.out.println("\tHACK Resetting CaseFoldTurkishI on U+" + Utility.hex(codePoint) + " " + name + " and others...");
HACK = true;
}
binaryProperties = bp;
}
*/
/*
if (generalCategory == Sm) {
if ((binaryProperties & Math_PropertyMask) != 0) {
if (!HACK) {
System.out.println("Stripping " + Utility.hex(codePoint) + " " + name + " and others...");
HACK = true;
}
binaryProperties &= ~Math_PropertyMask;
}
}
*/
}
static boolean HACK = false;
}