ICU-3732 part 1 - make canonical tables for all scripts.

X-SVN-Rev: 16167
This commit is contained in:
Eric Mader 2004-08-19 23:36:25 +00:00
parent 69e7f9fdf3
commit 30a6c4cc9c
9 changed files with 499 additions and 494 deletions

View File

@ -240,7 +240,7 @@ JTextArea text;
enableEvents(WindowEvent.WINDOW_CLOSING);
enableEvents(KeyEvent.KEY_PRESSED);
enableEvents(KeyEvent.KEY_RELEASED);
enableEvents(KeyEvent.KEY_RELEASED);
text.addKeyListener(new KeyAdapter() {
public void keyPressed(KeyEvent e) {
@ -288,7 +288,7 @@ JTextArea text;
"\u0935\u093f\u0937\u093e\u0926 " +
"\u092f\u094b\u0917 " +
"\u0927\u0943\u0924\u0930\u093e\u0937\u094d\u091f\u094d\u0930 " +
"\u0909\u0935\u093E\u091A\u0943 " +
"\u0909\u0935\u093E\u091A\u0964 " +
"\u0927\u0930\u094d\u092e\u0915\u094d\u0937\u0947\u0924\u094d\u0930\u0947 " +
"\u0915\u0941\u0930\u0941\u0915\u094d\u0937\u0947\u0924\u094d\u0930\u0947 " +
"\u0938\u092e\u0935\u0947\u0924\u093e " +
@ -296,7 +296,7 @@ JTextArea text;
"\u092e\u093e\u092e\u0915\u093e\u0903 " +
"\u092a\u093e\u0923\u094d\u0921\u0935\u093e\u0936\u094d\u091a\u0948\u0935 " +
"\u0915\u093f\u092e\u0915\u0941\u0930\u094d\u0935\u0924 " +
"\u0938\u0902\u091c\u0935";
"\u0938\u0902\u091c\u092F";
}

View File

@ -15,7 +15,7 @@ import com.ibm.icu.lang.UProperty;
import com.ibm.icu.text.Normalizer;
import com.ibm.icu.text.UnicodeSet;
public class CharacterData
public class ArabicCharacterData
{
public class Record
{
@ -68,7 +68,7 @@ public class CharacterData
private String decomposition;
}
private CharacterData(int charCount)
private ArabicCharacterData(int charCount)
{
records = new Record[charCount];
}
@ -97,10 +97,10 @@ public class CharacterData
// after the iteration is done, so we'd have to use a vector
// to hold the Records at first and copy it to an array
// when we're done...
public static CharacterData factory(UnicodeSet characterSet)
public static ArabicCharacterData factory(UnicodeSet characterSet)
{
int charCount = characterSet.size();
CharacterData data = new CharacterData(charCount);
ArabicCharacterData data = new ArabicCharacterData(charCount);
for (int i = 0; i < charCount; i += 1) {
data.add(characterSet.charAt(i));

View File

@ -1,352 +0,0 @@
/*
*******************************************************************************
* Copyright (C) 1998-2004, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
* Created on Dec 3, 2003
*
*******************************************************************************
*/
package com.ibm.icu.dev.tool.layout;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.text.UTF16;
public class ArabicGSUBBuilder
{
static public String convertString(int type, int ligature, String decomp, ClassTable isolClassTable)
{
int leftType = ArabicShaping.VALUE_NONE;
int rightType = ArabicShaping.VALUE_NONE;
switch (type) {
case UCharacter.DecompositionType.ISOLATED:
break;
case UCharacter.DecompositionType.FINAL:
rightType = ArabicShaping.VALUE_LEFT;
break;
case UCharacter.DecompositionType.INITIAL:
leftType = ArabicShaping.VALUE_RIGHT;
break;
case UCharacter.DecompositionType.MEDIAL:
rightType = ArabicShaping.VALUE_LEFT;
leftType = ArabicShaping.VALUE_RIGHT;
break;
default:
return decomp + UCharacter.toString(ligature);
}
char[] chars = decomp.toCharArray();
ArabicShaping.shape(chars, leftType, rightType, isolClassTable);
return new String(chars) + UCharacter.toString(ligature);
}
static void buildContextualForms(CharacterData data, ClassTable initClassTable, ClassTable mediClassTable,
ClassTable finaClassTable, ClassTable isolClassTable)
{
System.out.print("Finding contextual forms... ");
for (int i = 0; i < data.countRecords(); i += 1) {
CharacterData.Record record = data.getRecord(i);
String decomposition = record.getDecomposition();
if (decomposition != null && decomposition.length() == 1) {
int contextual = record.getCodePoint();
int isolated = UTF16.charAt(record.getDecomposition(), 0);
switch (record.getDecompositionType()) {
case UCharacter.DecompositionType.INITIAL:
initClassTable.addMapping(isolated, contextual);
break;
case UCharacter.DecompositionType.MEDIAL:
mediClassTable.addMapping(isolated, contextual);
break;
case UCharacter.DecompositionType.FINAL:
finaClassTable.addMapping(isolated, contextual);
break;
case UCharacter.DecompositionType.ISOLATED:
isolClassTable.addMapping(isolated, contextual);
break;
default:
// issue some error message?
break;
}
}
}
System.out.println("Done.");
}
static void buildLigatureTrees(CharacterData data, ClassTable isolClassTable,
LigatureTree contextualTree, LigatureTree cannonicalTree)
{
System.out.print("Building ligature trees... ");
for (int i = 0; i < data.countRecords(); i += 1) {
CharacterData.Record record = data.getRecord(i);
String decomposition = record.getDecomposition();
if (decomposition != null && decomposition.length() > 1) {
int ligature = record.getCodePoint();
int decompType = record.getDecompositionType();
switch (decompType) {
case UCharacter.DecompositionType.FINAL:
case UCharacter.DecompositionType.INITIAL:
case UCharacter.DecompositionType.MEDIAL:
case UCharacter.DecompositionType.ISOLATED:
contextualTree.insert(convertString(decompType, ligature, decomposition, isolClassTable));
break;
case UCharacter.DecompositionType.CANONICAL:
cannonicalTree.insert(decomposition + UCharacter.toString(ligature));
break;
}
}
}
System.out.println("Done.");
}
static final int SIMPLE_GLYPH = 1;
static final int LIGATURE_GLYPH = 2;
static final int MARK_GLYPH = 3;
static final int COMPONENT_GLYPH = 4;
static final int categoryClassMap[] = {
0, // UNASSIGNED
SIMPLE_GLYPH, // UPPERCASE_LETTER
SIMPLE_GLYPH, // LOWERCASE_LETTER
SIMPLE_GLYPH, // TITLECASE_LETTER
SIMPLE_GLYPH, // MODIFIER_LETTER
SIMPLE_GLYPH, // OTHER_LETTER
MARK_GLYPH, // NON_SPACING_MARK
MARK_GLYPH, // ENCLOSING_MARK ??
MARK_GLYPH, // COMBINING_SPACING_MARK ??
SIMPLE_GLYPH, // DECIMAL_NUMBER
SIMPLE_GLYPH, // LETTER_NUMBER
SIMPLE_GLYPH, // OTHER_NUMBER;
0, // SPACE_SEPARATOR
0, // LINE_SEPARATOR
0, // PARAGRAPH_SEPARATOR
0, // CONTROL
0, // FORMAT
0, // PRIVATE_USE
0, // SURROGATE
SIMPLE_GLYPH, // DASH_PUNCTUATION
SIMPLE_GLYPH, // START_PUNCTUATION
SIMPLE_GLYPH, // END_PUNCTUATION
SIMPLE_GLYPH, // CONNECTOR_PUNCTUATION
SIMPLE_GLYPH, // OTHER_PUNCTUATION
SIMPLE_GLYPH, // MATH_SYMBOL;
SIMPLE_GLYPH, // CURRENCY_SYMBOL
SIMPLE_GLYPH, // MODIFIER_SYMBOL
SIMPLE_GLYPH, // OTHER_SYMBOL
SIMPLE_GLYPH, // INITIAL_PUNCTUATION
SIMPLE_GLYPH // FINAL_PUNCTUATION
};
static int getGlyphClass(CharacterData.Record record)
{
String decomp = record.getDecomposition();
if (decomp != null && decomp.length() > 1) {
return LIGATURE_GLYPH;
}
return categoryClassMap[record.getGeneralCategory()];
}
static ClassTable buildGlyphClassTable(CharacterData data)
{
System.out.print("Building glyph class table... ");
ClassTable classTable = new ClassTable();
for (int i = 0; i < data.countRecords(); i += 1) {
CharacterData.Record record = data.getRecord(i);
classTable.addMapping(record.getCodePoint(), getGlyphClass(record));
}
System.out.println("Done.");
return classTable;
}
private static void buildArabicTables(String fileName) {
// TODO: Might want to have the ligature table builder explicitly check for ligatures
// which start with space and tatweel rather than pulling them out here...
UnicodeSet arabicBlock = new UnicodeSet("[[\\p{block=Arabic}] & [[:Cf:][:Po:][:So:][:Mn:][:Nd:][:Lm:]]]");
UnicodeSet oddLigatures = new UnicodeSet("[\\uFC5E-\\uFC63\\uFCF2-\\uFCF4\\uFE70-\\uFE7F]");
UnicodeSet arabicLetters = new UnicodeSet("[\\p{Arabic}]");
CharacterData arabicData = CharacterData.factory(arabicLetters.addAll(arabicBlock).removeAll(oddLigatures));
ClassTable classTable = buildGlyphClassTable(arabicData);
ClassTable initClassTable = new ClassTable();
ClassTable mediClassTable = new ClassTable();
ClassTable finaClassTable = new ClassTable();
ClassTable isolClassTable = new ClassTable();
buildContextualForms(arabicData, initClassTable, mediClassTable, finaClassTable, isolClassTable);
isolClassTable.snapshot();
LigatureTree ccmpTree = new LigatureTree();
LigatureTree ligaTree = new LigatureTree();
buildLigatureTrees(arabicData, isolClassTable, ligaTree, ccmpTree);
LigatureTreeWalker ccmpWalker = new LigatureTreeWalker();
LigatureTreeWalker ligaWalker = new LigatureTreeWalker();
ccmpTree.walk(ccmpWalker);
ligaTree.walk(ligaWalker);
LookupList lookupList = new LookupList();
FeatureList featureList = new FeatureList();
ScriptList scriptList = new ScriptList();
Lookup ccmpLookup, initLookup, mediLookup, finaLookup, ligaLookup;
int ccmpLookupIndex, initLookupIndex, mediLookupIndex, finaLookupIndex, ligaLookupIndex;
ccmpLookup = new Lookup(Lookup.GSST_Ligature, 0);
ccmpLookup.addSubtable(ccmpWalker);
initLookup = new Lookup(Lookup.GSST_Single, 0);
initLookup.addSubtable(initClassTable);
mediLookup = new Lookup(Lookup.GSST_Single, 0);
mediLookup.addSubtable(mediClassTable);
finaLookup = new Lookup(Lookup.GSST_Single, 0);
finaLookup.addSubtable(finaClassTable);
ligaLookup = new Lookup(Lookup.GSST_Ligature, Lookup.LF_IgnoreMarks);
ligaLookup.addSubtable(ligaWalker);
ccmpLookupIndex = lookupList.addLookup(ccmpLookup);
initLookupIndex = lookupList.addLookup(initLookup);
mediLookupIndex = lookupList.addLookup(mediLookup);
finaLookupIndex = lookupList.addLookup(finaLookup);
ligaLookupIndex = lookupList.addLookup(ligaLookup);
featureList.addLookup("ccmp", ccmpLookupIndex);
featureList.addLookup("init", initLookupIndex);
featureList.addLookup("medi", mediLookupIndex);
featureList.addLookup("fina", finaLookupIndex);
featureList.addLookup("liga", ligaLookupIndex);
featureList.finalizeFeatureList();
scriptList.addFeature("arab", "(default)", featureList.getFeatureIndex("ccmp"));
scriptList.addFeature("arab", "(default)", featureList.getFeatureIndex("init"));
scriptList.addFeature("arab", "(default)", featureList.getFeatureIndex("medi"));
scriptList.addFeature("arab", "(default)", featureList.getFeatureIndex("fina"));
scriptList.addFeature("arab", "(default)", featureList.getFeatureIndex("liga"));
GSUBWriter gsubWriter = new GSUBWriter("Arabic", scriptList, featureList, lookupList);
GDEFWriter gdefWriter = new GDEFWriter("Arabic", classTable);
String[] includeFiles = {"LETypes.h", "ArabicShaping.h"};
LigatureModuleWriter writer = new LigatureModuleWriter();
writer.openFile(fileName);
writer.writeHeader(null, includeFiles);
writer.writeTable(gsubWriter);
writer.writeTable(gdefWriter);
writer.writeTrailer();
writer.closeFile();
}
private static void buildHebrewTables(String fileName)
{
UnicodeSet hebrewBlock = new UnicodeSet("[[\\p{block=Hebrew}] & [[:Cf:][:Po:][:So:][:Mn:][:Nd:][:Lm:]]]");
UnicodeSet oddLigatures = new UnicodeSet("[\\uFC5E-\\uFC63\\uFCF2-\\uFCF4\\uFE70-\\uFE7F]");
UnicodeSet hebrewLetters = new UnicodeSet("[\\p{Hebrew}]");
CharacterData hebrewData = CharacterData.factory(hebrewLetters.addAll(hebrewBlock).removeAll(oddLigatures));
ClassTable classTable = buildGlyphClassTable(hebrewData);
LigatureTree ligaTree = new LigatureTree();
buildLigatureTrees(hebrewData, null, null, ligaTree);
LigatureTreeWalker ligaWalker = new LigatureTreeWalker();
ligaTree.walk(ligaWalker);
LookupList lookupList = new LookupList();
FeatureList featureList = new FeatureList();
ScriptList scriptList = new ScriptList();
Lookup ligaLookup;
int ligaLookupIndex;
ligaLookup = new Lookup(Lookup.GSST_Ligature, 0);
ligaLookup.addSubtable(ligaWalker);
ligaLookupIndex = lookupList.addLookup(ligaLookup);
featureList.addLookup("liga", ligaLookupIndex);
featureList.finalizeFeatureList();
scriptList.addFeature("hebr", "(default)", featureList.getFeatureIndex("liga"));
GSUBWriter gsubWriter = new GSUBWriter("Hebrew", scriptList, featureList, lookupList);
GDEFWriter gdefWriter = new GDEFWriter("Hebrew", classTable);
String[] includeFiles = {"LETypes.h", "HebrewShaping.h"};
LigatureModuleWriter writer = new LigatureModuleWriter();
writer.openFile(fileName);
writer.writeHeader(null, includeFiles);
writer.writeTable(gsubWriter);
writer.writeTable(gdefWriter);
writer.writeTrailer();
writer.closeFile();
}
/*
* Conversion notes:
*
* Use a UnicodeSet of [\p{Arab}] to get all the Arabic letters. (Might want to
* subtract [\uFBE8\uFBE9] (Uighur Alef Maksura forms) and [\UFBF9-\UFBFB] Uighur
* ligatures which decompose to the same characters as the corresponding "normal"
* ones.)
*
* Use UCharacter.getType(ch) to get the general category. Values are defined in
* UCharacterCategory.
*
* Use (something like) [\p{DecompositonType=INITIAL}] to get initial, medial
* final (and isolated?) forms.
*
* Use the normalizer to decompose the characters: if the decomposition is
* a single letter, it's an initial, medial or final form, otherwise it's a
* ligature.
*
* Use ArabicShaping to convert the decomposed ligature back into shaped
* presentation forms. Need to add kashida's on the front and / or back to
* get it to generate the correct forms. (could add either a kashida or a
* non-joiner so that we never need to look at the first or last character)
*
* Can do contextual forms and ligatures in a single pass, since we have
* to actually normalize the character to figure out if it's a ligature or
* not. Also need the ligature-ness of the character to compute it's glyph
* class... might work to keep a class table which says contextual or ligature
* for each character. Build it while building contextual and ligature tables,
* then use it to generate the actual glyph class table... (this is backwards
* to how it's done now ;-)
*/
public static void main(String[] args)
{
buildArabicTables(args[0]);
buildHebrewTables(args[1]);
}
}

View File

@ -9,7 +9,9 @@
package com.ibm.icu.dev.tool.layout;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.lang.UScript;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.text.UTF16;
/**
* @author Eric Mader
@ -31,36 +33,272 @@ import com.ibm.icu.text.UnicodeSet;
*/
public class CanonGSUBBuilder
{
public static void buildLigatureTree(CanonicalCharacterData data, LigatureTree ligatureTree)
static public String convertArabicString(int type, int ligature, String decomp, ClassTable isolClassTable)
{
System.out.print("building composition ligature tree...");
int leftType = ArabicShaping.VALUE_NONE;
int rightType = ArabicShaping.VALUE_NONE;
switch (type) {
case UCharacter.DecompositionType.ISOLATED:
break;
case UCharacter.DecompositionType.FINAL:
rightType = ArabicShaping.VALUE_LEFT;
break;
case UCharacter.DecompositionType.INITIAL:
leftType = ArabicShaping.VALUE_RIGHT;
break;
case UCharacter.DecompositionType.MEDIAL:
rightType = ArabicShaping.VALUE_LEFT;
leftType = ArabicShaping.VALUE_RIGHT;
break;
default:
return decomp + UCharacter.toString(ligature);
}
char[] chars = decomp.toCharArray();
ArabicShaping.shape(chars, leftType, rightType, isolClassTable);
return new String(chars) + UCharacter.toString(ligature);
}
static void buildArabicContextualForms(ArabicCharacterData data, ClassTable initClassTable, ClassTable mediClassTable,
ClassTable finaClassTable, ClassTable isolClassTable)
{
System.out.print("Finding Arabic contextual forms... ");
for (int i = 0; i < data.countRecords(); i += 1) {
CanonicalCharacterData.Record record = data.getRecord(i);
ArabicCharacterData.Record record = data.getRecord(i);
String decomposition = record.getDecomposition();
if (decomposition != null && decomposition.length() == 1) {
int contextual = record.getCodePoint();
int isolated = UTF16.charAt(record.getDecomposition(), 0);
switch (record.getDecompositionType()) {
case UCharacter.DecompositionType.INITIAL:
initClassTable.addMapping(isolated, contextual);
break;
case UCharacter.DecompositionType.MEDIAL:
mediClassTable.addMapping(isolated, contextual);
break;
case UCharacter.DecompositionType.FINAL:
finaClassTable.addMapping(isolated, contextual);
break;
case UCharacter.DecompositionType.ISOLATED:
isolClassTable.addMapping(isolated, contextual);
break;
default:
// issue some error message?
break;
}
}
}
System.out.println("Done.");
}
static LigatureTree buildArabicLigatureTree(ArabicCharacterData data, ClassTable isolClassTable)
{
LigatureTree contextualTree = new LigatureTree();
int ligatureCount = 0;
System.out.print("Building Arabic ligature tree... ");
for (int i = 0; i < data.countRecords(); i += 1) {
ArabicCharacterData.Record record = data.getRecord(i);
String decomposition = record.getDecomposition();
if (decomposition != null && decomposition.length() > 1) {
int ligature = record.getCodePoint();
int decompType = record.getDecompositionType();
switch (decompType) {
case UCharacter.DecompositionType.FINAL:
case UCharacter.DecompositionType.INITIAL:
case UCharacter.DecompositionType.MEDIAL:
case UCharacter.DecompositionType.ISOLATED:
contextualTree.insert(convertArabicString(decompType, ligature, decomposition, isolClassTable));
ligatureCount += 1;
break;
case UCharacter.DecompositionType.CANONICAL:
//cannonicalTree.insert(decomposition + UCharacter.toString(ligature));
break;
}
}
}
System.out.println(ligatureCount + " ligatures.");
return contextualTree;
}
static final int SIMPLE_GLYPH = 1;
static final int LIGATURE_GLYPH = 2;
static final int MARK_GLYPH = 3;
static final int COMPONENT_GLYPH = 4;
static final int categoryClassMap[] = {
0, // UNASSIGNED
SIMPLE_GLYPH, // UPPERCASE_LETTER
SIMPLE_GLYPH, // LOWERCASE_LETTER
SIMPLE_GLYPH, // TITLECASE_LETTER
SIMPLE_GLYPH, // MODIFIER_LETTER
SIMPLE_GLYPH, // OTHER_LETTER
MARK_GLYPH, // NON_SPACING_MARK
MARK_GLYPH, // ENCLOSING_MARK ??
MARK_GLYPH, // COMBINING_SPACING_MARK ??
SIMPLE_GLYPH, // DECIMAL_NUMBER
SIMPLE_GLYPH, // LETTER_NUMBER
SIMPLE_GLYPH, // OTHER_NUMBER;
0, // SPACE_SEPARATOR
0, // LINE_SEPARATOR
0, // PARAGRAPH_SEPARATOR
0, // CONTROL
0, // FORMAT
0, // PRIVATE_USE
0, // SURROGATE
SIMPLE_GLYPH, // DASH_PUNCTUATION
SIMPLE_GLYPH, // START_PUNCTUATION
SIMPLE_GLYPH, // END_PUNCTUATION
SIMPLE_GLYPH, // CONNECTOR_PUNCTUATION
SIMPLE_GLYPH, // OTHER_PUNCTUATION
SIMPLE_GLYPH, // MATH_SYMBOL;
SIMPLE_GLYPH, // CURRENCY_SYMBOL
SIMPLE_GLYPH, // MODIFIER_SYMBOL
SIMPLE_GLYPH, // OTHER_SYMBOL
SIMPLE_GLYPH, // INITIAL_PUNCTUATION
SIMPLE_GLYPH // FINAL_PUNCTUATION
};
static int getGlyphClass(ArabicCharacterData.Record record)
{
String decomp = record.getDecomposition();
if (decomp != null && decomp.length() > 1) {
return LIGATURE_GLYPH;
}
return categoryClassMap[record.getGeneralCategory()];
}
static void addArabicGlyphClasses(ArabicCharacterData data, ClassTable classTable)
{
System.out.print("Adding Arabic glyph classes... ");
for (int i = 0; i < data.countRecords(); i += 1) {
ArabicCharacterData.Record record = data.getRecord(i);
classTable.addMapping(record.getCodePoint(), getGlyphClass(record));
}
System.out.println("Done.");
}
private static void buildArabicTables(ScriptList scriptList, FeatureList featureList,
LookupList lookupList, ClassTable classTable) {
// TODO: Might want to have the ligature table builder explicitly check for ligatures
// which start with space and tatweel rather than pulling them out here...
UnicodeSet arabicBlock = new UnicodeSet("[[\\p{block=Arabic}] & [[:Cf:][:Po:][:So:][:Mn:][:Nd:][:Lm:]]]");
UnicodeSet oddLigatures = new UnicodeSet("[\\uFC5E-\\uFC63\\uFCF2-\\uFCF4\\uFE70-\\uFE7F]");
UnicodeSet arabicLetters = new UnicodeSet("[\\p{Arabic}]");
ArabicCharacterData arabicData = ArabicCharacterData.factory(arabicLetters.addAll(arabicBlock).removeAll(oddLigatures));
addArabicGlyphClasses(arabicData, classTable);
ClassTable initClassTable = new ClassTable();
ClassTable mediClassTable = new ClassTable();
ClassTable finaClassTable = new ClassTable();
ClassTable isolClassTable = new ClassTable();
buildArabicContextualForms(arabicData, initClassTable, mediClassTable, finaClassTable, isolClassTable);
isolClassTable.snapshot();
LigatureTree ligaTree = buildArabicLigatureTree(arabicData, isolClassTable);
LigatureTreeWalker ligaWalker = new LigatureTreeWalker();
ligaTree.walk(ligaWalker);
Lookup initLookup, mediLookup, finaLookup, ligaLookup;
initLookup = new Lookup(Lookup.GSST_Single, 0);
initLookup.addSubtable(initClassTable);
mediLookup = new Lookup(Lookup.GSST_Single, 0);
mediLookup.addSubtable(mediClassTable);
finaLookup = new Lookup(Lookup.GSST_Single, 0);
finaLookup.addSubtable(finaClassTable);
ligaLookup = new Lookup(Lookup.GSST_Ligature, Lookup.LF_IgnoreMarks);
ligaLookup.addSubtable(ligaWalker);
Feature init = new Feature("init");
Feature medi = new Feature("medi");
Feature fina = new Feature("fina");
Feature liga = new Feature("liga");
init.addLookup(lookupList.addLookup(initLookup));
medi.addLookup(lookupList.addLookup(mediLookup));
fina.addLookup(lookupList.addLookup(finaLookup));
liga.addLookup(lookupList.addLookup(ligaLookup));
featureList.addFeature(init);
featureList.addFeature(medi);
featureList.addFeature(fina);
featureList.addFeature(liga);
scriptList.addFeature("arab", "(default)", init);
scriptList.addFeature("arab", "(default)", medi);
scriptList.addFeature("arab", "(default)", fina);
scriptList.addFeature("arab", "(default)", liga);
System.out.println();
}
public static void buildLigatureTree(CanonicalCharacterData data, int script, LigatureTree ligatureTree)
{
int ligatureCount = 0;
System.out.print("building composition ligature tree for " + UScript.getName(script) + "... ");
for (int i = 0; i < data.countRecords(script); i += 1) {
CanonicalCharacterData.Record record = data.getRecord(script, i);
String composed = UCharacter.toString(record.getComposedCharacter());
for (int e = 0; e < record.countEquivalents(); e += 1) {
String equivalent = record.getEquivalent(e);
ligatureTree.insert(equivalent + composed);
ligatureCount += 1;
}
}
System.out.println(" Done.");
System.out.println(ligatureCount + " ligatures.");
}
public static DecompTable[] buildDecompTables(CanonicalCharacterData data)
public static DecompTable[] buildDecompTables(CanonicalCharacterData data, int script)
{
int maxDecompCount = data.getMaxEquivalents();
int maxDecompCount = data.getMaxEquivalents(script);
DecompTable[] decompTables = new DecompTable[maxDecompCount];
System.out.print("Building decompositon tables... max number of decompositions is " + maxDecompCount + "...");
System.out.print("Building decompositon tables for " + UScript.getName(script) +
"... total decompositions: " + data.countRecords(script) +
", max: " + maxDecompCount + "...");
for (int i = 0; i < maxDecompCount; i += 1) {
DecompTable table = new DecompTable();
for (int r = 0; r < data.countRecords(); r += 1) {
CanonicalCharacterData.Record record = data.getRecord(r);
for (int r = 0; r < data.countRecords(script); r += 1) {
CanonicalCharacterData.Record record = data.getRecord(script, r);
if (record.countEquivalents() > i) {
table.add(record.getComposedCharacter(), record.getEquivalent(i));
@ -75,26 +313,24 @@ public class CanonGSUBBuilder
return decompTables;
}
public static void buildLatinTables(String fileName)
public static int[] buildLookups(CanonicalCharacterData data, LookupList lookupList, int script)
{
UnicodeSet latinSet = new UnicodeSet("[[\\p{Latin}] & [\\p{DecompositionType=Canonical}]]");
CanonicalCharacterData data = CanonicalCharacterData.factory(latinSet);
int[] lookups = new int[2];
DecompTable[] decompTables = buildDecompTables(data);
DecompTable[] decompTables = buildDecompTables(data, script);
LigatureTree compTree = new LigatureTree();
buildLigatureTree(data, compTree);
buildLigatureTree(data, script, compTree);
System.out.println();
LigatureTreeWalker compWalker = new LigatureTreeWalker();
compTree.walk(compWalker);
LookupList lookupList = new LookupList();
FeatureList featureList = new FeatureList();
ScriptList scriptList = new ScriptList();
Lookup compLookup, dcmpLookup;
int compLookupIndex, dcmpLookupIndex;
//int compLookupIndex, dcmpLookupIndex;
compLookup = new Lookup(Lookup.GSST_Ligature, 0);
compLookup.addSubtable(compWalker);
@ -104,16 +340,65 @@ public class CanonGSUBBuilder
dcmpLookup.addSubtable(decompTables[i]);
}
compLookupIndex = lookupList.addLookup(compLookup);
dcmpLookupIndex = lookupList.addLookup(dcmpLookup);
lookups[0] = lookupList.addLookup(compLookup);
lookups[1] = lookupList.addLookup(dcmpLookup);
return lookups;
}
public static void addLookups(Feature feature, int[] lookups)
{
for (int i = 0; i < lookups.length; i += 1) {
feature.addLookup(lookups[i]);
}
}
public static void buildDecompTables(String fileName)
{
UnicodeSet decompSet = new UnicodeSet("[[\\P{Hangul}] & [\\p{DecompositionType=Canonical}]]");
CanonicalCharacterData data = CanonicalCharacterData.factory(decompSet);
ClassTable classTable = new ClassTable();
LookupList lookupList = new LookupList();
FeatureList featureList = new FeatureList();
ScriptList scriptList = new ScriptList();
featureList.addLookup("ccmp", compLookupIndex);
featureList.addLookup("ccmp", dcmpLookupIndex);
// build common, inherited lookups...
// int[] commonLookups = buildLookups(data, lookupList, UScript.COMMON);
// int[] inheritedLookups = buildLookups(data, lookupList, UScript.INHERITED);
for (int script = 0; script < UScript.CODE_LIMIT; script += 1) {
// This is a bit lame, but it's the only way I can think of
// to make this work w/o knowing the values of COMMON and INHERITED...
if (script == UScript.COMMON || script == UScript.INHERITED ||
data.getMaxEquivalents(script) == 0) {
continue;
}
int[] lookups = buildLookups(data, lookupList, script);
Feature ccmp = new Feature("ccmp");
addLookups(ccmp, lookups);
// addLookups(ccmp, commonLookups);
// addLookups(ccmp, inheritedLookups);
featureList.addFeature(ccmp);
String scriptTag = TagUtilities.tagLabel(UScript.getShortName(script));
scriptList.addFeature(scriptTag, "(default)", ccmp);
if (script == UScript.ARABIC) {
buildArabicTables(scriptList, featureList, lookupList, classTable);
}
}
featureList.finalizeFeatureList();
scriptList.addFeature("latn", "(default)", featureList.getFeatureIndex("ccmp"));
GSUBWriter gsubWriter = new GSUBWriter("Canon", scriptList, featureList, lookupList);
GDEFWriter gdefWriter = new GDEFWriter("Canon", classTable);
String[] includeFiles = {"LETypes.h", "CanonShaping.h"};
LigatureModuleWriter writer = new LigatureModuleWriter();
@ -121,12 +406,13 @@ public class CanonGSUBBuilder
writer.openFile(fileName);
writer.writeHeader(null, includeFiles);
writer.writeTable(gsubWriter);
writer.writeTable(gdefWriter);
writer.writeTrailer();
writer.closeFile();
}
public static void main(String[] args)
{
buildLatinTables(args[0]);
buildDecompTables(args[0]);
}
}

View File

@ -7,7 +7,9 @@
package com.ibm.icu.dev.tool.layout;
import com.ibm.icu.impl.Utility;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.lang.UScript;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.text.CanonicalIterator;
import com.ibm.icu.text.UTF16;
@ -15,10 +17,12 @@ import java.util.Vector;
public class CanonicalCharacterData
{
private static int THRESHOLD = 4;
public class Record
{
// TODO: might want to save arrays of Char32's rather than UTF16 strings...
Record(int character)
Record(int character, int script)
{
String char32 = UCharacter.toString(character);
CanonicalIterator iterator = new CanonicalIterator(char32);
@ -36,13 +40,17 @@ public class CanonicalCharacterData
int nEquivalents = equivs.size();
if (nEquivalents > maxEquivalents) {
maxEquivalents = nEquivalents;
if (nEquivalents > maxEquivalents[script]) {
maxEquivalents[script] = nEquivalents;
}
if (nEquivalents > 0) {
equivalents = new String[nEquivalents];
if (nEquivalents > THRESHOLD) {
dumpEquivalents(character, equivs);
}
for (int e = 0; e < nEquivalents; e += 1) {
equivalents[e] = (String) equivs.elementAt(e);
}
@ -61,6 +69,7 @@ public class CanonicalCharacterData
if (equivalents == null) {
return 0;
}
return equivalents.length;
}
@ -78,48 +87,91 @@ public class CanonicalCharacterData
return equivalents[index];
}
private void dumpEquivalents(int character, Vector equivs)
{
int count = equivs.size();
System.out.println(Utility.hex(character, 6) + " - " + count + ":");
for (int i = 0; i < count; i += 1) {
String equiv = (String) equivs.elementAt(i);
int codePoints = UTF16.countCodePoint(equiv);
for (int c = 0; c < codePoints; c += 1) {
if (c > 0) {
System.out.print(" ");
}
System.out.print(Utility.hex(UTF16.charAt(equiv, c), 6));
}
System.out.println();
}
System.out.println();
}
private int composed;
private String[] equivalents = null;
}
public CanonicalCharacterData(int charCount)
public CanonicalCharacterData()
{
records = new Record[charCount];
// nothing to do...
}
public void add(int character)
{
records[recordIndex++] = new Record(character);
int script = UScript.getScript(character);
Vector recordVector = recordVectors[script];
if (recordVector == null) {
recordVector = recordVectors[script] = new Vector();
}
recordVector.add(new Record(character, script));
}
public int getCharacterCount()
public int getMaxEquivalents(int script)
{
return recordIndex;
if (script < 0 || script >= UScript.CODE_LIMIT) {
return 0;
}
return maxEquivalents[script];
}
public int getMaxEquivalents()
public Record getRecord(int script, int index)
{
return maxEquivalents;
}
public Record getRecord(int index)
{
if (index < 0 || index >= records.length) {
if (script < 0 || script >= UScript.CODE_LIMIT) {
return null;
}
return records[index];
Vector recordVector = recordVectors[script];
if (recordVector == null || index < 0 || index >= recordVector.size()) {
return null;
}
return (Record) recordVector.elementAt(index);
}
public int countRecords()
public int countRecords(int script)
{
return records.length;
if (script < 0 || script >= UScript.CODE_LIMIT ||
recordVectors[script] == null) {
return 0;
}
return recordVectors[script].size();
}
public static CanonicalCharacterData factory(UnicodeSet characterSet)
{
int charCount = characterSet.size();
CanonicalCharacterData data = new CanonicalCharacterData(charCount);
CanonicalCharacterData data = new CanonicalCharacterData();
System.out.println("There are " + charCount + " characters with a canonical decomposition.");
for (int i = 0; i < charCount; i += 1) {
data.add(characterSet.charAt(i));
@ -160,8 +212,7 @@ public class CanonicalCharacterData
}
}
private Record[] records;
private int recordIndex = 0;
private int maxEquivalents = 0;
private Vector recordVectors[] = new Vector[UScript.CODE_LIMIT];
private int maxEquivalents[] = new int[UScript.CODE_LIMIT];
}

View File

@ -0,0 +1,59 @@
/*
*******************************************************************************
* Copyright (C) 2002-2004, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*/
package com.ibm.icu.dev.tool.layout;
class Feature extends TaggedRecord
{
private int[] lookupIndices;
private int lookupCount;
private int featureIndex;
public Feature(String theFeatureTag)
{
super(theFeatureTag);
lookupIndices = new int[10];
lookupCount = 0;
featureIndex = -1;
}
public void addLookup(int theLookupIndex)
{
if (lookupCount >= lookupIndices.length) {
int[] newLookupIndices = new int[lookupIndices.length + 5];
System.arraycopy(lookupIndices, 0, newLookupIndices, 0, lookupIndices.length);
lookupIndices = newLookupIndices;
}
lookupIndices[lookupCount] = theLookupIndex;
lookupCount += 1;
}
public void writeFeature(OpenTypeTableWriter writer)
{
writer.writeData(0); // featureParams (must be NULL)
writer.writeData(lookupCount);
for (int i = 0; i < lookupCount; i += 1) {
writer.writeData(lookupIndices[i]);
}
}
public int getFeatureIndex()
{
return featureIndex;
}
public void setFeatureIndex(int index)
{
featureIndex = index;
}
}

View File

@ -14,92 +14,34 @@ package com.ibm.icu.dev.tool.layout;
public class FeatureList
{
static class FeatureRecord extends TaggedRecord
{
private int[] lookupIndices;
private int lookupCount;
public FeatureRecord(String theFeatureTag)
{
super(theFeatureTag);
lookupIndices = new int[10];
lookupCount = 0;
}
public void addLookup(int theLookupIndex)
{
if (lookupCount > lookupIndices.length) {
int[] newLookupIndices = new int[lookupIndices.length + 5];
System.arraycopy(lookupIndices, 0, newLookupIndices, 0, lookupIndices.length);
lookupIndices = newLookupIndices;
}
lookupIndices[lookupCount] = theLookupIndex;
lookupCount += 1;
}
public void writeFeatureRecord(OpenTypeTableWriter writer)
{
writer.writeData(0); // featureParams (must be NULL)
writer.writeData(lookupCount);
for (int i = 0; i < lookupCount; i += 1) {
writer.writeData(lookupIndices[i]);
}
}
}
private FeatureRecord[] featureRecords;
private Feature[] features;
private int featureCount;
public FeatureList()
{
featureRecords = new FeatureRecord[10];
features = new Feature[10];
featureCount = 0;
}
private FeatureRecord findFeatureRecord(String featureTag)
public void addFeature(Feature feature)
{
for (int i = 0; i < featureCount; i += 1) {
FeatureRecord featureRecord = featureRecords[i];
if (featureCount >= features.length) {
Feature[] newFeatures = new Feature[features.length + 5];
if (featureRecord.getTag().equals(featureTag)) {
return featureRecord;
}
System.arraycopy(features, 0, newFeatures, 0, features.length);
features = newFeatures;
}
if (featureCount >= featureRecords.length) {
FeatureRecord[] newFeatureRecords = new FeatureRecord[featureCount + 5];
System.arraycopy(featureRecords, 0, newFeatureRecords, 0, featureRecords.length);
featureRecords = newFeatureRecords;
}
FeatureRecord newFeatureRecord = new FeatureRecord(featureTag);
featureRecords[featureCount] = newFeatureRecord;
featureCount += 1;
return newFeatureRecord;
}
public void addLookup(String featureTag, int lookupIndex)
{
FeatureRecord featureRecord = findFeatureRecord(featureTag);
featureRecord.addLookup(lookupIndex);
features[featureCount++] = feature;
}
public void finalizeFeatureList()
{
TaggedRecord.sort(featureRecords, featureCount);
}
public int getFeatureIndex(String featureTag)
{
return TaggedRecord.search(featureRecords, featureCount, featureTag);
TaggedRecord.sort(features, featureCount);
for (int i = 0; i < featureCount; i += 1) {
features[i].setFeatureIndex(i);
}
}
public void writeFeaturetList(OpenTypeTableWriter writer)
@ -113,7 +55,7 @@ public class FeatureList
int featureRecordOffset = writer.getOutputIndex();
for (int i = 0; i < featureCount; i += 1) {
String tag = featureRecords[i].getTag();
String tag = features[i].getTag();
System.out.print(" '" + tag + "'");
writer.writeTag(tag);
@ -127,7 +69,7 @@ public class FeatureList
writer.fixOffset(featureRecordOffset + 2, featureListBase);
featureRecordOffset += 3;
featureRecords[i].writeFeatureRecord(writer);
features[i].writeFeature(writer);
}
System.out.println();

View File

@ -69,6 +69,24 @@ public class LigatureTree
return null;
}
String ligatureString(int[] chars)
{
StringBuffer result = new StringBuffer();
int len = chars.length - 1;
for (int i = 0; i < len; i += 1) {
if (i > 0) {
result.append(" + ");
}
result.append(Utility.hex(chars[i], 6));
}
result.append(" => " + Utility.hex(chars[len], 6));
return result.toString();
}
void insert(int[] chars, int index)
{
int c = chars[index];
@ -76,10 +94,12 @@ public class LigatureTree
if (len == index + 1) {
if (ligature != -1) {
System.out.println("overwriting ligature " + Utility.hex(ligature, 6) + " with " + Utility.hex(c, 6));
System.out.println("ignoring ligature " + ligatureString(chars) +
": already have " + Utility.hex(ligature, 6));
} else {
ligature = c;
}
ligature = c;
return;
}

View File

@ -15,31 +15,30 @@ public class ScriptList
{
static class LangSysRecord extends TaggedRecord
{
private int[] featureIndices;
private Feature[] features;
private int featureCount;
public LangSysRecord(String theLanguageTag)
{
super(theLanguageTag);
featureIndices = new int[10];
features = new Feature[10];
featureCount = 0;
}
public void addFeature(int theFeatureIndex)
public void addFeature(Feature feature)
{
if (featureCount > featureIndices.length) {
int[] newFeatureIndices = new int[featureIndices.length + 5];
if (featureCount > features.length) {
Feature[] newFeatures = new Feature[features.length + 5];
System.arraycopy(featureIndices, 0, newFeatureIndices, 0, featureIndices.length);
featureIndices = newFeatureIndices;
System.arraycopy(features, 0, newFeatures, 0, features.length);
features = newFeatures;
}
featureIndices[featureCount] = theFeatureIndex;
featureCount += 1;
features[featureCount++] = feature;
}
public void writeLangSysRecord(OpenTypeTableWriter writer)
public void writeLangSysRecord(OpenTypeTableWriter writer)
{
writer.writeData(0); // lookupOrder (must be NULL)
writer.writeData(0xFFFF); // reqFeatureIndex (0xFFFF means none)
@ -47,7 +46,7 @@ public class ScriptList
writer.writeData(featureCount);
for (int i = 0; i < featureCount; i += 1) {
writer.writeData(featureIndices[i]);
writer.writeData(features[i].getFeatureIndex());
}
}
}
@ -162,11 +161,11 @@ public class ScriptList
return newScriptRecord.findLangSysRecord(languageTag);
}
public void addFeature(String scriptTag, String languageTag, int featureIndex)
public void addFeature(String scriptTag, String languageTag, Feature feature)
{
LangSysRecord langSysRecord = findLangSysRecord(scriptTag, languageTag);
langSysRecord.addFeature(featureIndex);
langSysRecord.addFeature(feature);
}
public void writeScriptList(OpenTypeTableWriter writer)