ICU-2055 work in progress of LDML2ICUConverter

X-SVN-Rev: 16170
This commit is contained in:
Ram Viswanadha 2004-08-20 02:23:59 +00:00
parent e7c90c6f7d
commit 559d6775de
5 changed files with 1373 additions and 459 deletions

View File

@ -36,7 +36,7 @@ public class ICUResourceWriter {
private static final String IMPORT = "import";
private static final String ALIAS = "alias";
private static final String INTVECTOR = "intvector";
private static final String ARRAYS = "array";
//private static final String ARRAYS = "array";
private static final String LINESEP = System.getProperty("line.separator");
public static class Resource{
@ -101,7 +101,7 @@ public class ICUResourceWriter {
}
}
public void sort(){
System.out.println("In sort");
//System.out.println("In sort");
return;
}
public void swap(){
@ -131,18 +131,34 @@ public class ICUResourceWriter {
public void write(OutputStream writer, int numIndent, boolean bare){
writeComments(writer, numIndent);
writeIndent(writer, numIndent);
write(writer, name+COLON+ARRAYS+OPENBRACE+LINESEP);
if(name!=null){
write(writer, name+OPENBRACE+LINESEP);
}else{
write(writer, OPENBRACE+LINESEP);
}
numIndent++;
Resource current = first;
while(current != null){
current.write(writer, numIndent, true);
write(writer, COMMA+LINESEP);
if(current instanceof ResourceTable ||
current instanceof ResourceArray){
}else{
write(writer, COMMA+LINESEP);
}
current = current.next;
}
numIndent--;
writeIndent(writer, numIndent);
write(writer, CLOSEBRACE+LINESEP);
}
public void sort(){
Resource current = first;
while(current!=null){
current.sort();
current = current.next;
}
}
}
public static class ResourceBinary extends Resource{
@ -308,10 +324,10 @@ public class ICUResourceWriter {
Resource b =new Resource();
Resource a = first;
Resource t,u,x;
for(t = a.next; t!=null; t=u ){
for(t = a; t!=null; t=u ){
u=t.next;
for(x=b;x.next!=null; x=x.next){
if(x.next.name.compareToIgnoreCase(t.name)>0){
if(x.next.name.compareTo(t.name)>0){
break;
}
}
@ -319,7 +335,16 @@ public class ICUResourceWriter {
x.next = t;
}
// System.out.println("Exiting sort of table");
first = b.next;
if(b.next!=null){
first = b.next;
}
Resource current = first;
while(current!=null){
current.sort();
current = current.next;
}
} // end sort()
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,159 @@
/*
******************************************************************************
* Copyright (C) 2004, International Business Machines Corporation and *
* others. All Rights Reserved. *
******************************************************************************
*/
package com.ibm.icu.dev.tool.cldr;
/**
* @author ram
*/
public class LDMLConstants {
public static final String LDML = "ldml";
public static final String IDENTITY = "identity";
public static final String LDN = "localeDisplayNames";
public static final String LAYOUT = "layout";
public static final String ORIENTATION = "orientation";
public static final String CHARACTERS = "characters";
public static final String DELIMITERS = "delimiters";
public static final String MEASUREMENT = "measurement";
public static final String DATES = "dates";
public static final String NUMBERS = "numbers";
public static final String COLLATIONS = "collations";
public static final String POSIX = "posix";
public static final String SPECIAL = "special";
public static final String SOURCE = "source";
public static final String PATH = "path";
public static final String ALT = "alt";
public static final String ALIAS = "alias";
public static final String VERSION = "version";
public static final String LANGUAGE = "language";
public static final String SCRIPT = "script";
public static final String TERRITORY = "territory";
public static final String VARIANT = "variant";
public static final String TYPE = "type";
public static final String NUMBER = "number";
public static final String GENERATION = "generation";
public static final String LANGUAGES = "languages";
public static final String SCRIPTS = "scripts";
public static final String TERRITORIES = "territories";
public static final String VARIANTS = "variants";
public static final String TYPES = "types";
public static final String KEYS = "keys";
public static final String KEY = "key";
public static final String EXEMPLAR_CHARACTERS ="exemplarCharacters";
public static final String MAPPING ="mapping";
public static final String QS = "quotationStart";
public static final String QE = "quotationEnd";
public static final String AQS = "alternateQuotationStart";
public static final String AQE = "alternateQuotationEnd";
public static final String MS = "measurementSystem";
public static final String HEIGHT = "height";
public static final String WIDTH = "width";
public static final String PAPER_SIZE = "paperSize";
public static final String LPC = "localizedPatternChars";
public static final String DEFAULT = "default";
public static final String CALENDARS = "calendars";
public static final String MONTHS = "months";
public static final String DAYS = "days";
public static final String TZN = "timeZoneNames";
public static final String CALENDAR = "calendar";
public static final String ZONE = "zone";
public static final String EXEMPLAR_CITY = "exemplarCity";
public static final String LONG = "long";
public static final String SHORT = "short";
public static final String DAYLIGHT = "daylight";
public static final String STANDARD = "standard";
public static final String WEEK = "week";
public static final String AM = "am";
public static final String PM = "pm";
public static final String ERAS = "eras";
public static final String DATE_FORMATS = "dateFormats";
public static final String DFL = "dateFormatLength";
public static final String DATE_FORMAT = "dateFormat";
public static final String TIME_FORMATS = "timeFormats";
public static final String TFL = "timeFormatLength";
public static final String TIME_FORMAT = "timeFormat";
public static final String DATE_TIME_FORMATS = "dateTimeFormats";
public static final String DTFL = "dateTimeFormatLength";
public static final String DATE_TIME_FORMAT = "dateTimeFormat";
public static final String MONTH_CONTEXT = "monthContext";
public static final String DAY_CONTEXT = "dayContext";
public static final String MONTH_WIDTH = "monthWidth";
public static final String DAY_WIDTH = "dayWidth";
public static final String MONTH = "month";
public static final String DAY = "day";
public static final String COUNT = "count";
public static final String MINDAYS = "minDays";
public static final String FIRSTDAY = "firstDay" ;
public static final String WENDSTART = "weekendStart";
public static final String WENDEND = "weekendEnd";
public static final String WEEKEND = "weekend";
public static final String TIME = "time";
public static final String ERA = "era";
public static final String ERAABBR = "eraAbbr";
public static final String ERANAMES = "eraNames";
public static final String ABBREVIATED = "abbreviated";
public static final String WIDE = "wide";
public static final String CURRENCY = "currency";
public static final String DECIMAL_FORMATS = "decimalFormats";
public static final String SCIENTIFIC_FORMATS = "scientificFormats";
public static final String CURRENCY_FORMATS = "currencyFormats";
public static final String PERCENT_FORMATS = "percentFormats";
public static final String SYMBOLS = "symbols";
public static final String CURRENCIES = "currencies";
public static final String SYMBOL = "symbol";
public static final String DISPLAY_NAME = "displayName";
public static final String PATTERN = "pattern";
public static final String DECIMAL = "decimal";
public static final String GROUP = "group";
public static final String MESSAGES = "messages";
public static final String YESSTR = "yesstr";
public static final String YESEXPR = "yesexpr";
public static final String NOSTR = "nostr";
public static final String NOEXPR = "noexpr";
public static final String COLLATION = "collation";
public static final String SEQUENCE = "Sequence";
public static final String RULES = "rules";
public static final String SETTINGS = "settings";
public static final String SUPPRESS_CONTRACTIONS = "suppress_contractions";
public static final String OPTIMIZE = "optimize";
public static final String BASE = "base";
public static final String STRENGTH = "strength";
public static final String ALTERNATE = "alternate";
public static final String BACKWARDS = "backwards";
public static final String NORMALIZATION = "normalization";
public static final String CASE_LEVEL = "caseLevel";
public static final String CASE_FIRST = "caseFirst";
public static final String HIRAGANA_Q = "hiraganaQuarternary";
public static final String NUMERIC = "numeric";
public static final String RESET = "reset";
public static final String PC = "pc";
public static final String SC = "sc";
public static final String TC = "tc";
public static final String QC = "qc";
public static final String IC = "ic";
public static final String P = "p";
public static final String S = "s";
public static final String T = "t";
public static final String Q = "q";
public static final String I = "i";
public static final String X = "x";
public static final String LAST_VARIABLE = "last_variable";
public static final String CP = "cp";
public static final String HEX = "hex";
public static final String PRIMARY = "primary";
public static final String SECONDARY = "secondary";
public static final String TERTIARY = "tertiary";
public static final String QUARTERNARY = "quarternary";
public static final String IDENTICAL = "identical";
public static final String BEFORE = "before";
public static final String CONTEXT = "context";
public static final String EXTEND = "extend";
}

View File

@ -18,6 +18,7 @@ import java.io.BufferedReader;
// DOM imports
import org.apache.xpath.XPathAPI;
import org.apache.xpath.objects.XObject;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
@ -36,6 +37,7 @@ import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
/**
* @author ram
*
@ -44,13 +46,22 @@ import org.xml.sax.SAXParseException;
*/
public class LDMLUtilities {
private static final String SOURCE = "source";
private static final String PATH = "path";
private static final String ALT = "alt";
private static final String KEY = "key";
private static final String REGISTRY = "registry";
private static final String DRAFT = "draft";
private static String TYPE = "type";
private static String IDENTITY = "identity";
/**
* Creates a fully resolved locale starting with root and
* @param sourceDir
* @param locale
* @return
*/
public static Document getFullyResolvedLDML(String sourceDir, String locale, boolean ignoreRoot){
public static Document getFullyResolvedLDML(String sourceDir, String locale, boolean ignoreRoot, boolean ignoreUnavailable){
Document full =null;
try{
full = parse(sourceDir+File.separator+ "root.xml");
@ -59,6 +70,13 @@ public class LDMLUtilities {
throw ex;
}
}
int index = locale.indexOf(".xml");
if(index > -1){
locale = locale.substring(0,index);
}
if(locale.equals("root")){
return full;
}
String[] constituents = locale.split("_");
String loc=null;
for(int i=0; i<constituents.length; i++){
@ -67,14 +85,216 @@ public class LDMLUtilities {
}else{
loc = loc +"_"+ constituents[i];
}
Document doc = parse(sourceDir+File.separator+loc+".xml");
StringBuffer xpath = new StringBuffer();
mergeLDMLDocuments(full, doc, xpath);
Document doc = null;
String fileName = sourceDir+File.separator+loc+".xml";
File file = new File(fileName);
if(file.exists()){
doc = parse(fileName);
StringBuffer xpath = new StringBuffer();
mergeLDMLDocuments(full, doc, xpath);
}else{
if(!ignoreUnavailable){
throw new RuntimeException("Could not find: " +fileName);
}
}
}
return full;
}
private static String TYPE = "type";
private static String IDENTITY = "identity";
public static String convertXPath2ICU(Node alias, Node namespaceNode, StringBuffer fullPath)
throws TransformerException{
Node context = alias.getParentNode();
StringBuffer icu = new StringBuffer();
String source = getAttributeValue(alias, SOURCE);
String xpath = getAttributeValue(alias, PATH);
// make sure that the xpaths are valid
if(namespaceNode==null){
XPathAPI.eval(context, fullPath.toString());
if(xpath!=null){
XPathAPI.eval(context,xpath);
}
}else{
XPathAPI.eval(context, fullPath.toString(), namespaceNode);
if(xpath!=null){
XPathAPI.eval(context, xpath, namespaceNode);
}
}
icu.append(source);
if(xpath!=null){
XPathTokenizer.relativeToAbsolute(xpath, fullPath);
// make sure that fullPath is not corrupted!
XPathAPI.eval(context, fullPath.toString());
//TODO .. do the conversion
XPathTokenizer tokenizer = new XPathTokenizer(fullPath);
String token = tokenizer.nextToken();
while(token!=null){
if(!token.equals("ldml")){
String equiv = getICUEquivalent(token);
if(equiv==null){
throw new IllegalArgumentException("Could not find ICU equivalent for token: " +token);
}
if(equiv.length()>0){
icu.append("/");
icu.append(equiv);
}
}
token = tokenizer.nextToken();
}
}
return icu.toString();
}
public static String getDayIndexAsString(String type){
if(type.equals("sun")){
return "0";
}else if(type.equals("mon")){
return "1";
}else if(type.equals("tue")){
return "2";
}else if(type.equals("wed")){
return "3";
}else if(type.equals("thu")){
return "4";
}else if(type.equals("fri")){
return "5";
}else if(type.equals("sat")){
return "6";
}else{
throw new IllegalArgumentException("Unknown type: "+type);
}
}
public static String getMonthIndexAsString(String type){
return Integer.toString(Integer.parseInt(type)-1);
}
private static String getICUEquivalent(String token){
int index = 0;
if(token.indexOf(LDMLConstants.LDN) > -1){
return "";
}else if(token.indexOf(LDMLConstants.LANGUAGES) > -1){
return "Languages";
}else if(token.indexOf(LDMLConstants.LANGUAGE) > -1){
return getAttributeValue(token, LDMLConstants.TYPE);
}else if(token.indexOf(LDMLConstants.TERRITORIES) > -1){
return "Countries";
}else if(token.indexOf(LDMLConstants.TERRITORY) > -1){
return getAttributeValue(token, LDMLConstants.TYPE);
}else if(token.indexOf(LDMLConstants.SCRIPTS) > -1){
return "Scripts";
}else if(token.indexOf(LDMLConstants.SCRIPT) > -1){
return getAttributeValue(token, LDMLConstants.TYPE);
}else if(token.indexOf(LDMLConstants.VARIANTS) > -1){
return "Variants";
}else if(token.indexOf(LDMLConstants.VARIANT) > -1){
return getAttributeValue(token, LDMLConstants.TYPE);
}else if(token.indexOf(LDMLConstants.KEYS) > -1){
return "Keys";
}else if(token.indexOf(LDMLConstants.KEY) > -1){
return getAttributeValue(token, LDMLConstants.TYPE);
}else if(token.indexOf(LDMLConstants.TYPES) > -1){
return "Types";
}else if((index=token.indexOf(LDMLConstants.TYPE)) > -1 && token.charAt(index-1)!='@'){
String type = getAttributeValue(token, LDMLConstants.TYPE);
String key = getAttributeValue(token, LDMLConstants.KEY);
return type+"/"+key;
}else if(token.indexOf(LDMLConstants.LAYOUT) > -1){
return "Layout";
}else if(token.indexOf(LDMLConstants.ORIENTATION) > -1){
//TODO fix this
}else if(token.indexOf(LDMLConstants.CHARACTERS) > -1){
return "";
}else if(token.indexOf(LDMLConstants.EXEMPLAR_CHARACTERS) > -1){
return "ExemplarCharacters";
}else if(token.indexOf(LDMLConstants.MEASUREMENT) > -1){
return "";
}else if(token.indexOf(LDMLConstants.MS) > -1){
return "MeasurementSystem";
}else if(token.indexOf(LDMLConstants.PAPER_SIZE) > -1){
return "PaperSize";
}else if(token.indexOf(LDMLConstants.HEIGHT) > -1){
return "0";
}else if(token.indexOf(LDMLConstants.WIDTH) > -1){
return "1";
}else if(token.indexOf(LDMLConstants.DATES) > -1){
return "";
}else if(token.indexOf(LDMLConstants.LPC) > -1){
return "localPatternCharacters";
}else if(token.indexOf(LDMLConstants.CALENDARS) > -1){
return "calendar";
}else if(token.indexOf(LDMLConstants.DEFAULT) > -1){
return "default";
}else if(token.indexOf(LDMLConstants.CALENDAR) > -1){
return getAttributeValue(token, LDMLConstants.TYPE);
}else if(token.indexOf(LDMLConstants.ERAS) > -1){
return "eras";
}else if(token.indexOf(LDMLConstants.ERAABBR) > -1){
return "abbreviated";
}else if(token.indexOf(LDMLConstants.ERA) > -1){
return getAttributeValue(token, LDMLConstants.TYPE);
}else if(token.indexOf(LDMLConstants.DATE_FORMATS) > -1){
// TODO fix this
}else if(token.indexOf(LDMLConstants.DFL) > -1){
// TODO fix this
}else if(token.indexOf(LDMLConstants.DATE_FORMAT) > -1){
// TODO fix this
}else if(token.indexOf(LDMLConstants.TIME_FORMATS) > -1){
// TODO fix this
}else if(token.indexOf(LDMLConstants.TFL) > -1){
// TODO fix this
}else if(token.indexOf(LDMLConstants.TIME_FORMAT) > -1){
// TODO fix this
}else if(token.indexOf(LDMLConstants.DATE_TIME_FORMATS) > -1){
// TODO fix this
}else if(token.indexOf(LDMLConstants.DTFL) > -1){
// TODO fix this
}else if(token.indexOf(LDMLConstants.DATE_TIME_FORMAT) > -1){
// TODO fix this
}else if(token.indexOf(LDMLConstants.MONTHS) > -1){
return "monthNames";
}else if(token.indexOf(LDMLConstants.MONTH_CONTEXT) > -1){
return getAttributeValue(token, LDMLConstants.TYPE);
}else if(token.indexOf(LDMLConstants.MONTH_WIDTH) > -1){
return getAttributeValue(token, LDMLConstants.TYPE);
}else if(token.indexOf(LDMLConstants.MONTH) > -1){
String valStr = getAttributeValue(token, LDMLConstants.TYPE);
return getMonthIndexAsString(valStr);
}else if(token.indexOf(LDMLConstants.DAYS) > -1){
return "dayNames";
}else if(token.indexOf(LDMLConstants.DAY_CONTEXT) > -1){
return getAttributeValue(token, LDMLConstants.TYPE);
}else if(token.indexOf(LDMLConstants.DAY_WIDTH) > -1){
return getAttributeValue(token, LDMLConstants.TYPE);
}else if(token.indexOf(LDMLConstants.DAY) > -1){
String dayName = getAttributeValue(token, LDMLConstants.TYPE);
return getDayIndexAsString(dayName);
}
// TODO: this method is not finished yet
// the conversion of Xpath to ICU alias path
// is not as straight forward as I thought
// need to cater to idiosynchracies of each
// element node :(
throw new IllegalArgumentException("Unknown Xpath fragment: " + token);
}
private static String getAttributeValue(String token, String attrib){
int attribStart = token.indexOf(attrib);
int valStart = token.indexOf('=', attribStart)+1/*skip past the separtor*/;
int valEnd = token.indexOf('@', valStart);
if(valEnd <0){
valEnd = valStart + (token.length()-valStart-1);
}else{
valEnd = token.length() - 1 /*valEnd should be index*/;
}
String value = token.substring(valStart, valEnd);
int s = value.indexOf('\'');
if(s>-1){
s++;
int e = value.lastIndexOf('\'');
return value.substring(s,e);
}
return value;
}
/**
* Resolved Data File
* <p>To produce fully resolved locale data file from CLDR for a locale ID L, you start with root, and
@ -127,30 +347,45 @@ public class LDMLUtilities {
xpath.append("/");
xpath.append(childName);
appendXPathAttribute(child,xpath);
Node nodeInSource = getNode(source, xpath.toString());
Node nodeInSource = null;
if(childName.indexOf(":")>-1){
nodeInSource = getNode(source, xpath.toString(), child);
}else{
nodeInSource = getNode(source, xpath.toString());
}
if(nodeInSource==null){
// the child xml has a new node
// that should be added to parent
String parentXpath = xpath.substring(0, savedLength);
Node parentNodeInSource = getNode(source, parentXpath);
Node parentNodeInSource = null;
if(childName.indexOf(":")>-1){
parentNodeInSource = getNode(source, parentXpath, child);
}else{
parentNodeInSource = getNode(source,parentXpath);
}
if(parentNodeInSource==null){
throw new RuntimeException("Internal Error");
}
if(xpath.indexOf(IDENTITY)>-1){
String delXPath= xpath.substring(0,savedLength)+"/"+childName;
Node delNode = getNode(source, delXPath);
if(delNode!=null){
parentNodeInSource.removeChild(delNode);
}
}
Node childToImport = source.importNode(child,true);
parentNodeInSource.appendChild(childToImport);
}else if( xpath.indexOf(LDMLConstants.IDENTITY)>1){
// replace the source doc
Node parentNodeInSource = nodeInSource.getParentNode();
Node childToImport = source.importNode(child,true);
parentNodeInSource.replaceChild(childToImport, nodeInSource);
}else{
if(areChildrenElementNodes(child)){
//recurse to pickup any children!
mergeLDMLDocuments(source, child, xpath);
}else{
if(childName.equals(LDMLConstants.ALIAS)){
//TODO fix this
}
// we have reached a leaf node now get the
// replace to the source doc
Node parentNodeInSource = nodeInSource.getParentNode();
@ -163,9 +398,37 @@ public class LDMLUtilities {
}
return source;
}
private static final String ALT = "alt";
private static final String KEY = "key";
private static final String REGISTRY = "registry";
//TODO add funtions for fetching legitimate children
// for ICU
public boolean isParentDraft(Document fullyResolved, String xpath){
Node node = getNode(fullyResolved, xpath);
Node parentNode ;
while((parentNode = node.getParentNode())!=null){
String draft = getAttributeValue(parentNode, DRAFT);
if(draft!=null ){
if(draft.equals("true")){
return true;
}else{
return false;
}
}
}
// the default value is false if none specified
return false;
}
public boolean isNodeDraft(Node node){
String draft = getAttributeValue(node, DRAFT);
if(draft!=null ){
if(draft.equals("true")){
return true;
}else{
return false;
}
}
return false;
}
public static void appendXPathAttribute(Node node, StringBuffer xpath){
boolean terminate = false;
String val = getAttributeValue(node, TYPE);
@ -218,6 +481,24 @@ public class LDMLUtilities {
throw new RuntimeException(ex.getMessage());
}
}
public static Node getNode(Document doc, String xpath, Node namespaceNode){
try{
NodeList nl = XPathAPI.selectNodeList(doc, xpath, namespaceNode);
int len = nl.getLength();
//TODO watch for attribute "alt"
if(len>1){
throw new IllegalArgumentException("The XPATH returned more than 1 node!. Check XPATH: "+xpath);
}
if(len==0){
return null;
}
return nl.item(0);
}catch(TransformerException ex){
throw new RuntimeException(ex.getMessage());
}
}
/**
*
* @param doc

View File

@ -0,0 +1,107 @@
/*
******************************************************************************
* Copyright (C) 2004, International Business Machines Corporation and *
* others. All Rights Reserved. *
******************************************************************************
*/
package com.ibm.icu.dev.tool.cldr;
/**
* @author ram
*/
public class XPathTokenizer{
char[] xpath;
int current;
public XPathTokenizer(StringBuffer path){
this(path.toString());
}
public XPathTokenizer(String path){
xpath = path.toCharArray();
if(path.indexOf("..")== 0){
current = 0;
}else{
// this is absolute
// since xpath starts with "//"
current = 2;
}
}
public String nextToken(){
boolean inquote = false;
String retval;
int save = current;
while(current < xpath.length){
switch(xpath[current]){
case '\'':
inquote = (inquote==true)? false:true;
current++;
break;
case '/':
if(inquote==false){
retval = new String(xpath,save, (current-save));
current++; //skip past the separator
return retval;
}
//fall through
default:
current++;
}
}
if(current == xpath.length){
retval = new String(xpath,save, (current-save));
current++;
return retval;
}
return null;
}
private static StringBuffer deleteToken(StringBuffer xpath){
int length = xpath.length();
int current = length - 1;
while(current > 0 ){
boolean inquote = false;
switch(xpath.charAt(current)){
case '\'':
inquote = (inquote==true)? false:true;
current--;
break;
case '/':
if(inquote==false){
if(current < length){
xpath.delete(current, length);
}
return xpath;
}
//fall through
default:
current--;
}
}
return xpath;
}
/**
* This method will try to convert a relative xpath to absolute
* xpath.
* TODO: The method will only resolve relative tokens in the begining
* of the string. Try to handle embedded ".."
* @param xpath
* @param fullPath
* @return
*/
public static StringBuffer relativeToAbsolute(String xpath, StringBuffer fullPath){
XPathTokenizer tokenizer = new XPathTokenizer(xpath);
String token=tokenizer.nextToken();
while(token.equals("..")){
deleteToken(fullPath);
token = tokenizer.nextToken();
}
while(token!=null){
fullPath.append("/");
fullPath.append(token);
token = tokenizer.nextToken();
}
return fullPath;
}
}