b236d896f6
X-SVN-Rev: 11784
1186 lines
42 KiB
Java
1186 lines
42 KiB
Java
/**
|
|
*******************************************************************************
|
|
* Copyright (C) 1996-2001, International Business Machines Corporation and *
|
|
* others. All Rights Reserved. *
|
|
*******************************************************************************
|
|
*
|
|
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/Utility.java,v $
|
|
* $Date: 2003/05/02 21:46:33 $
|
|
* $Revision: 1.33 $
|
|
*
|
|
*******************************************************************************
|
|
*/
|
|
|
|
package com.ibm.text.utility;
|
|
|
|
import java.util.*;
|
|
import java.text.*;
|
|
import java.io.*;
|
|
import com.ibm.icu.text.UnicodeSet;
|
|
import com.ibm.icu.text.UTF16;
|
|
import com.ibm.icu.text.Replaceable;
|
|
import com.ibm.icu.text.ReplaceableString;
|
|
import com.ibm.icu.text.UnicodeMatcher;
|
|
|
|
import com.ibm.text.UCD.*;
|
|
|
|
public final class Utility implements UCD_Types { // COMMON UTILITIES
|
|
|
|
// static final boolean UTF8 = true; // TODO -- make argument
|
|
public static final char BOM = '\uFEFF';
|
|
|
|
public static String[] append(String[] array1, String[] array2) {
|
|
String[] temp = new String[array1.length + array2.length];
|
|
System.arraycopy(array1, 0, temp, 0, array1.length);
|
|
System.arraycopy(array2, 0, temp, array1.length, array2.length);
|
|
return temp;
|
|
}
|
|
|
|
public static String[] subarray(String[] array1, int start, int limit) {
|
|
String[] temp = new String[limit - start];
|
|
System.arraycopy(array1, start, temp, 0, limit - start);
|
|
return temp;
|
|
}
|
|
|
|
public static String[] subarray(String[] array1, int start) {
|
|
return subarray(array1, start, array1.length);
|
|
}
|
|
|
|
public static String getName(int i, String[] names) {
|
|
try {
|
|
return names[i];
|
|
} catch (Exception e) {
|
|
return "UNKNOWN";
|
|
}
|
|
}
|
|
|
|
private static boolean needCRLF = false;
|
|
|
|
public static int DOTMASK = 0x7FF;
|
|
|
|
public static void dot(int i) {
|
|
if ((i % DOTMASK) == 0) {
|
|
needCRLF = true;
|
|
System.out.print('.');
|
|
}
|
|
}
|
|
|
|
public static void fixDot() {
|
|
if (needCRLF) {
|
|
System.out.println();
|
|
needCRLF = false;
|
|
}
|
|
}
|
|
|
|
public static long setBits(long source, int start, int end) {
|
|
if (start < end) {
|
|
int temp = start;
|
|
start = end;
|
|
end = temp;
|
|
}
|
|
long bmstart = (1L << (start+1)) - 1;
|
|
long bmend = (1L << end) - 1;
|
|
bmstart &= ~bmend;
|
|
return source |= bmstart;
|
|
}
|
|
|
|
public static long setBit(long source, int start) {
|
|
return setBits(source, start, start);
|
|
}
|
|
|
|
public static long clearBits(long source, int start, int end) {
|
|
if (start < end) {
|
|
int temp = start;
|
|
start = end;
|
|
end = temp;
|
|
}
|
|
int bmstart = (1 << (start+1)) - 1;
|
|
int bmend = (1 << end) - 1;
|
|
bmstart &= ~bmend;
|
|
return source &= ~bmstart;
|
|
}
|
|
|
|
public static long clearBit(long source, int start) {
|
|
return clearBits(source, start, start);
|
|
}
|
|
|
|
public static int find(String source, String[] target, boolean skeletonize) {
|
|
if (skeletonize) source = getSkeleton(source);
|
|
for (int i = 0; i < target.length; ++i) {
|
|
if (source.equals(getSkeleton(target[i]))) return i;
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
/**
|
|
* These routines use the Java functions, because they only need to act on ASCII.
|
|
* Removes space, _, and lowercases.
|
|
*/
|
|
|
|
public static String getSkeleton(String source) {
|
|
skeletonBuffer.setLength(0);
|
|
boolean gotOne = false;
|
|
// remove spaces, '_', '-'
|
|
// we can do this with char, since no surrogates are involved
|
|
for (int i = 0; i < source.length(); ++i) {
|
|
char ch = source.charAt(i);
|
|
if (ch == '_' || ch == ' ' || ch == '-') {
|
|
gotOne = true;
|
|
} else {
|
|
char ch2 = Character.toLowerCase(ch);
|
|
if (ch2 != ch) {
|
|
gotOne = true;
|
|
skeletonBuffer.append(ch2);
|
|
} else {
|
|
skeletonBuffer.append(ch);
|
|
}
|
|
}
|
|
}
|
|
if (!gotOne) return source; // avoid string creation
|
|
return skeletonBuffer.toString();
|
|
}
|
|
|
|
private static StringBuffer skeletonBuffer = new StringBuffer();
|
|
|
|
/**
|
|
* These routines use the Java functions, because they only need to act on ASCII
|
|
* Changes space, - into _, inserts _ between lower and UPPER.
|
|
*/
|
|
|
|
public static String getUnskeleton(String source, boolean titlecaseStart) {
|
|
if (source.equals("noBreak")) return source; // HACK
|
|
StringBuffer result = new StringBuffer();
|
|
int lastCat = -1;
|
|
boolean haveFirstCased = true;
|
|
for (int i = 0; i < source.length(); ++i) {
|
|
char c = source.charAt(i);
|
|
if (c == ' ' || c == '-' || c == '_') {
|
|
c = '_';
|
|
haveFirstCased = true;
|
|
}
|
|
if (c == '=') haveFirstCased = true;
|
|
int cat = Character.getType(c);
|
|
if (lastCat == Character.LOWERCASE_LETTER && cat == Character.UPPERCASE_LETTER) {
|
|
result.append('_');
|
|
}
|
|
if (haveFirstCased && (cat == Character.LOWERCASE_LETTER
|
|
|| cat == Character.TITLECASE_LETTER || cat == Character.UPPERCASE_LETTER)) {
|
|
if (titlecaseStart) {
|
|
c = Character.toUpperCase(c);
|
|
}
|
|
haveFirstCased = false;
|
|
}
|
|
result.append(c);
|
|
lastCat = cat;
|
|
}
|
|
return result.toString();
|
|
}
|
|
|
|
public static String findSubstring(String source, Set target, boolean invert) {
|
|
Iterator it = target.iterator();
|
|
while (it.hasNext()) {
|
|
String other = it.next().toString();
|
|
if ((other.indexOf(source) >= 0) == invert) return other;
|
|
}
|
|
return null;
|
|
}
|
|
|
|
public static byte lookup(String source, String[] target, boolean skeletonize) {
|
|
int result = Utility.find(source, target, skeletonize);
|
|
if (result != -1) return (byte)result;
|
|
throw new ChainException("Could not find \"{0}\" in table [{1}]", new Object [] {source, target});
|
|
}
|
|
|
|
/**
|
|
* Supplies a zero-padded hex representation of an integer (without 0x)
|
|
*/
|
|
static public String hex(long i, int places) {
|
|
if (i == Long.MIN_VALUE) return "-8000000000000000";
|
|
boolean negative = i < 0;
|
|
if (negative) {
|
|
i = -i;
|
|
}
|
|
String result = Long.toString(i, 16).toUpperCase();
|
|
if (result.length() < places) {
|
|
result = "0000000000000000".substring(result.length(),places) + result;
|
|
}
|
|
if (negative) {
|
|
return '-' + result;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
public static String hex(long ch) {
|
|
return hex(ch,4);
|
|
}
|
|
|
|
public static String hex(byte ch) {
|
|
return hex(ch & 0xFF,2);
|
|
}
|
|
|
|
public static String hex(char ch) {
|
|
return hex(ch & 0xFFFF,4);
|
|
}
|
|
|
|
public static String hex(Object s) {
|
|
return hex(s, 4, " ");
|
|
}
|
|
|
|
public static String hex(Object s, int places) {
|
|
return hex(s, places, " ");
|
|
}
|
|
|
|
public static String hex(Object s, String separator) {
|
|
return hex(s, 4, separator);
|
|
}
|
|
|
|
public static String hex(Object o, int places, String separator) {
|
|
if (o == null) return "";
|
|
if (o instanceof Number) return hex(((Number)o).longValue(), places);
|
|
|
|
String s = o.toString();
|
|
StringBuffer result = new StringBuffer();
|
|
int ch;
|
|
for (int i = 0; i < s.length(); i += UTF32.count16(ch)) {
|
|
if (i != 0) result.append(separator);
|
|
ch = UTF32.char32At(s, i);
|
|
result.append(hex(ch));
|
|
}
|
|
return result.toString();
|
|
}
|
|
|
|
public static String hex(byte[] o, int start, int end, String separator) {
|
|
StringBuffer result = new StringBuffer();
|
|
//int ch;
|
|
for (int i = start; i < end; ++i) {
|
|
if (i != 0) result.append(separator);
|
|
result.append(hex(o[i]));
|
|
}
|
|
return result.toString();
|
|
}
|
|
|
|
public static String hex(char[] o, int start, int end, String separator) {
|
|
StringBuffer result = new StringBuffer();
|
|
for (int i = start; i < end; ++i) {
|
|
if (i != 0) result.append(separator);
|
|
result.append(hex(o[i]));
|
|
}
|
|
return result.toString();
|
|
}
|
|
|
|
/**
|
|
* Returns a string containing count copies of s.
|
|
* If count <= 0, returns "".
|
|
*/
|
|
public static String repeat(String s, int count) {
|
|
if (count <= 0) return "";
|
|
if (count == 1) return s;
|
|
StringBuffer result = new StringBuffer(count*s.length());
|
|
for (int i = 0; i < count; ++i) {
|
|
result.append(s);
|
|
}
|
|
return result.toString();
|
|
}
|
|
|
|
public static int intFrom(String p) {
|
|
if (p.length() == 0) return Short.MIN_VALUE;
|
|
return Integer.parseInt(p);
|
|
}
|
|
|
|
public static float floatFrom(String p) {
|
|
if (p.length() == 0) return Float.NaN;
|
|
int fract = p.indexOf('/');
|
|
if (fract == -1) return Float.valueOf(p).floatValue();
|
|
String q = p.substring(0,fract);
|
|
float num = 0;
|
|
if (q.length() != 0) num = Integer.parseInt(q);
|
|
p = p.substring(fract+1,p.length());
|
|
float den = 0;
|
|
if (p.length() != 0) den = Integer.parseInt(p);
|
|
return num/den;
|
|
}
|
|
|
|
public static double doubleFrom(String p) {
|
|
if (p.length() == 0) return Double.NaN;
|
|
int fract = p.indexOf('/');
|
|
if (fract == -1) return Double.valueOf(p).doubleValue();
|
|
String q = p.substring(0,fract);
|
|
double num = 0;
|
|
if (q.length() != 0) num = Integer.parseInt(q);
|
|
p = p.substring(fract+1,p.length());
|
|
double den = 0;
|
|
if (p.length() != 0) den = Integer.parseInt(p);
|
|
return num/den;
|
|
}
|
|
|
|
public static int codePointFromHex(String p) {
|
|
String temp = Utility.fromHex(p);
|
|
if (UTF32.length32(temp) != 1) throw new ChainException("String is not single (UTF32) character: " + p, null);
|
|
return UTF32.char32At(temp, 0);
|
|
}
|
|
|
|
public static String fromHex(String p) {
|
|
StringBuffer output = new StringBuffer();
|
|
int value = 0;
|
|
int count = 0;
|
|
main:
|
|
for (int i = 0; i < p.length(); ++i) {
|
|
char ch = p.charAt(i);
|
|
int digit = 0;
|
|
switch (ch) {
|
|
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
|
|
digit = ch - 'a' + 10;
|
|
break;
|
|
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
|
|
digit = ch - 'A' + 10;
|
|
break;
|
|
case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7':
|
|
case '8': case '9':
|
|
digit = ch - '0';
|
|
break;
|
|
default:
|
|
int type = Character.getType(ch);
|
|
if (type != Character.SPACE_SEPARATOR) {
|
|
throw new ChainException("bad hex value: '{0}' at position {1} in \"{2}\"",
|
|
new Object[] {String.valueOf(ch), new Integer(i), p});
|
|
}
|
|
// fall through!!
|
|
case ' ': case ',': case ';': // do SPACE here, just for speed
|
|
if (count != 0) {
|
|
UTF32.append32(output, value);
|
|
}
|
|
count = 0;
|
|
value = 0;
|
|
continue main;
|
|
}
|
|
value <<= 4;
|
|
value += digit;
|
|
if (value > 0x10FFFF) {
|
|
throw new ChainException("Character code too large: '{0}' at position {1} in \"{2}\"",
|
|
new Object[] {String.valueOf(ch), new Integer(i), p});
|
|
}
|
|
count++;
|
|
}
|
|
if (count != 0) {
|
|
UTF32.append32(output, value);
|
|
}
|
|
return output.toString();
|
|
}
|
|
|
|
|
|
public static final class Position {
|
|
public int start, limit;
|
|
}
|
|
|
|
/**
|
|
* Finds the next position in the text that matches.
|
|
* @param divider A UnicodeMatcher, such as a UnicodeSet.
|
|
* @text obvious
|
|
* @offset starting offset
|
|
* @output start and limit of the piece found. If the return is false, then start,limit = length
|
|
* @return true iff match found
|
|
*/
|
|
public static boolean next(UnicodeMatcher matcher, Replaceable text, int offset,
|
|
Position output) {
|
|
int[] io = new int[1]; // TODO replace later; extra object creation
|
|
int limit = text.length();
|
|
// don't worry about surrogates; matcher will handle
|
|
for (int i = offset; i <= limit; ++i) {
|
|
io[0] = i;
|
|
if (matcher.matches(text, io, limit, false) == UnicodeMatcher.U_MATCH) {
|
|
// a hit, return
|
|
output.start = i;
|
|
output.limit = io[0];
|
|
return true;
|
|
}
|
|
}
|
|
output.start = output.limit = limit;
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* Finds the next position in the text that matches.
|
|
* @param divider A UnicodeMatcher, such as a UnicodeSet.
|
|
* @text obvious
|
|
* @offset starting offset
|
|
* @output start and limit of the piece found. If the return is false, then start,limit = 0
|
|
* @return true iff match found
|
|
*/
|
|
public static boolean previous(UnicodeMatcher matcher, Replaceable text, int offset,
|
|
Position output) {
|
|
int[] io = new int[1]; // TODO replace later; extra object creation
|
|
int limit = 0;
|
|
// don't worry about surrogates; matcher will handle
|
|
for (int i = offset; i >= limit; --i) {
|
|
io[0] = i;
|
|
if (matcher.matches(text, io, offset, false) == UnicodeMatcher.U_MATCH) {
|
|
// a hit, return
|
|
output.start = i;
|
|
output.limit = io[0];
|
|
return true;
|
|
}
|
|
}
|
|
output.start = output.limit = limit;
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* Splits a string containing divider into pieces, storing in output
|
|
* and returns the number of pieces. The string does not have to be terminated:
|
|
* the segment after the last divider is returned in the last output element.
|
|
* Thus if the string has no dividers, then the whole string is returned in output[0]
|
|
* with a return value of 1.
|
|
* @param divider A UnicodeMatcher, such as a UnicodeSet.
|
|
* @param s the text to be divided
|
|
* @param output where the resulting pieces go
|
|
* @return the number of items put into output
|
|
*/
|
|
public static int split(UnicodeMatcher divider, Replaceable text, Position[] output) {
|
|
int index = 0;
|
|
for (int offset = 0;; offset = output[index-1].limit) {
|
|
if (output[index] == null) output[index] = new Position();
|
|
boolean matches = next(divider, text, offset, output[index++]);
|
|
if (!matches) return index;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Splits a string containing divider into pieces, storing in output
|
|
* and returns the number of pieces.
|
|
*/
|
|
public static int split(String s, char divider, String[] output) {
|
|
int last = 0;
|
|
int current = 0;
|
|
int i;
|
|
for (i = 0; i < s.length(); ++i) {
|
|
if (s.charAt(i) == divider) {
|
|
output[current++] = s.substring(last,i);
|
|
last = i+1;
|
|
}
|
|
}
|
|
output[current++] = s.substring(last,i);
|
|
int result = current;
|
|
while (current < output.length) {
|
|
output[current++] = "";
|
|
}
|
|
return result;
|
|
}
|
|
|
|
public static String[] split(String s, char divider) {
|
|
String[] result = new String[100]; // HACK
|
|
int count = split(s, divider, result);
|
|
return extract(result, 0, count);
|
|
}
|
|
|
|
public static String[] extract(String[] source, int start, int limit) {
|
|
String[] result = new String[limit-start];
|
|
System.arraycopy(source, start, result, 0, limit - start);
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
public static String quoteJava(String s) {
|
|
StringBuffer result = new StringBuffer();
|
|
for (int i = 0; i < s.length(); ++i) {
|
|
result.append(quoteJava(s.charAt(i)));
|
|
}
|
|
return result.toString();
|
|
}
|
|
*/
|
|
public static String quoteJavaString(String s) {
|
|
if (s == null) return "null";
|
|
StringBuffer result = new StringBuffer();
|
|
result.append('"');
|
|
for (int i = 0; i < s.length(); ++i) {
|
|
result.append(quoteJava(s.charAt(i)));
|
|
}
|
|
result.append('"');
|
|
return result.toString();
|
|
}
|
|
|
|
public static String quoteJava(int c) {
|
|
switch (c) {
|
|
case '\\':
|
|
return "\\\\";
|
|
case '"':
|
|
return "\\\"";
|
|
case '\r':
|
|
return "\\r";
|
|
case '\n':
|
|
return "\\n";
|
|
default:
|
|
if (c >= 0x20 && c <= 0x7E) {
|
|
return String.valueOf((char)c);
|
|
} else if (UTF32.isSupplementary(c)) {
|
|
return "\\u" + hex((char)UTF32.getLead(c),4) + "\\u" + hex((char)UTF32.getTrail(c),4);
|
|
} else {
|
|
return "\\u" + hex((char)c,4);
|
|
}
|
|
}
|
|
}
|
|
|
|
public static String quoteXML(int c, boolean HTML) {
|
|
switch (c) {
|
|
case '<': return "<";
|
|
case '>': return ">";
|
|
case '&': return "&";
|
|
case '\'': if (!HTML) return "'";
|
|
break;
|
|
case '"': return """;
|
|
|
|
// fix controls, since XML can't handle
|
|
|
|
// also do this for 09, 0A, and 0D, so we can see them.
|
|
case 0x00: case 0x01: case 0x02: case 0x03: case 0x04: case 0x05: case 0x06: case 0x07:
|
|
case 0x08: case 0x09: case 0x0A: case 0x0B: case 0x0C: case 0x0D: case 0x0E: case 0x0F:
|
|
case 0x10: case 0x11: case 0x12: case 0x13: case 0x14: case 0x15: case 0x16: case 0x17:
|
|
case 0x18: case 0x19: case 0x1A: case 0x1B: case 0x1C: case 0x1D: case 0x1E: case 0x1F:
|
|
case 0x7F:
|
|
|
|
// fix noncharacters, since XML can't handle
|
|
case 0xFFFE: case 0xFFFF:
|
|
|
|
return "<codepoint hex=\"" + hex(c,1) + "\"/>";
|
|
}
|
|
|
|
// fix surrogates, since XML can't handle
|
|
if (UTF32.isSurrogate(c)) {
|
|
return "<codepoint hex=\"" + hex(c,1) + "\"/>";
|
|
}
|
|
|
|
if (c <= 0x7E) {
|
|
return UTF32.valueOf32(c);
|
|
}
|
|
|
|
// fix supplementaries & high characters, because of IE bug
|
|
/*if (UTF32.isSupplementary(c) || 0xFFF9 <= c && c <= 0xFFFD) {
|
|
return "#x" + hex(c,1) + ";";
|
|
}
|
|
*/
|
|
|
|
return "&#x" + hex(c,1) + ";";
|
|
}
|
|
|
|
public static String quoteXML(String source, boolean HTML) {
|
|
if (source == null) return "null";
|
|
StringBuffer result = new StringBuffer();
|
|
for (int i = 0; i < source.length(); ++i) {
|
|
int c = UTF32.char32At(source, i);
|
|
if (UTF32.isSupplementary(c)) ++i;
|
|
result.append(quoteXML(c, HTML));
|
|
}
|
|
return result.toString();
|
|
}
|
|
|
|
public static String quoteXML(String source) {
|
|
return quoteXML(source, false);
|
|
}
|
|
|
|
public static String quoteXML(int source) {
|
|
return quoteXML(source, false);
|
|
}
|
|
|
|
private static UnicodeProperty defaultIgnorable = null;
|
|
|
|
public static String getDisplay(int cp) {
|
|
String result = UTF16.valueOf(cp);
|
|
byte cat = Default.ucd.getCategory(cp);
|
|
if (cat == Mn || cat == Me) {
|
|
result = String.valueOf(DOTTED_CIRCLE) + result;
|
|
} else if (cat == Cf || cat == Cc || cp == 0x034F || cp == 0x00AD || cp == 0x1806) {
|
|
result = "\u25A1";
|
|
} else {
|
|
if (defaultIgnorable == null) defaultIgnorable = DerivedProperty.make(DefaultIgnorable);
|
|
if (defaultIgnorable.hasValue(cp)) result = "\u25A1";
|
|
}
|
|
return result;
|
|
}
|
|
|
|
public static int compare(char[] a, int aStart, int aEnd, char[] b, int bStart, int bEnd) {
|
|
while (aStart < aEnd && bStart < bEnd) {
|
|
int diff = a[aStart++] - b[bStart++];
|
|
if (diff != 0) return diff;
|
|
}
|
|
return (aEnd - aStart) - (bEnd - bStart);
|
|
}
|
|
|
|
public static int compare(byte[] a, int aStart, int aEnd, byte[] b, int bStart, int bEnd) {
|
|
while (aStart < aEnd && bStart < bEnd) {
|
|
int diff = a[aStart++] - b[bStart++];
|
|
if (diff != 0) return diff;
|
|
}
|
|
return (aEnd - aStart) - (bEnd - bStart);
|
|
}
|
|
|
|
public static int compareUnsigned(byte[] a, int aStart, int aEnd, byte[] b, int bStart, int bEnd) {
|
|
while (aStart < aEnd && bStart < bEnd) {
|
|
int diff = (a[aStart++] & 0xFF) - (b[bStart++] & 0xFF);
|
|
if (diff != 0) return diff;
|
|
}
|
|
return (aEnd - aStart) - (bEnd - bStart);
|
|
}
|
|
|
|
/**
|
|
* Joins an array together, using divider between the pieces
|
|
*/
|
|
public static String join(int[] array, String divider) {
|
|
String result = "{";
|
|
for (int i = 0; i < array.length; ++i) {
|
|
if (i != 0) result += divider;
|
|
result += array[i];
|
|
}
|
|
return result + "}";
|
|
}
|
|
|
|
public static String join(long[] array, String divider) {
|
|
String result = "{";
|
|
for (int i = 0; i < array.length; ++i) {
|
|
if (i != 0) result += divider;
|
|
result += array[i];
|
|
}
|
|
return result + "}";
|
|
}
|
|
|
|
private static final String[] searchPath = {
|
|
"EXTRAS",
|
|
"4.0.0",
|
|
"3.2.0",
|
|
"3.1.1",
|
|
"3.1.0",
|
|
"3.0.1",
|
|
"3.0.0",
|
|
"2.1.9",
|
|
"2.1.8",
|
|
"2.1.5",
|
|
"2.1.2",
|
|
"2.0.0",
|
|
"1.1.0",
|
|
};
|
|
|
|
/*public static PrintWriter openPrintWriter(String filename) throws IOException {
|
|
return openPrintWriter(filename, LATIN1_UNIX);
|
|
}
|
|
*/
|
|
|
|
public static final class Encoding extends PoorMansEnum {
|
|
private static PoorMansEnum.EnumStore store = new PoorMansEnum.EnumStore();
|
|
|
|
/* Boilerplate */
|
|
public Encoding next() { return (Encoding) next; }
|
|
public void getAliases(Collection output) { store.getAliases(this, output); }
|
|
public static Encoding get(String s) { return (Encoding) store.get(s); }
|
|
public static Encoding get(int v) { return (Encoding) store.get(v); }
|
|
public static int getMax() { return store.getMax(); }
|
|
|
|
private Encoding() {}
|
|
private static Encoding add(String name) { return (Encoding) store.add(new Encoding(), name);}
|
|
}
|
|
|
|
public static final Encoding
|
|
LATIN1_UNIX = Encoding.add("LATIN1_UNIX"),
|
|
LATIN1_WINDOWS = Encoding.add("LATIN1_WINDOWS"),
|
|
UTF8_UNIX = Encoding.add("UTF8_UNIX"),
|
|
UTF8_WINDOWS = Encoding.add("UTF8_WINDOWS"),
|
|
|
|
//UTF8 = Encoding.add("UTF8"), // for read-only
|
|
//LATIN1 = Encoding.add("LATIN1"), // for read-only
|
|
|
|
// read-only (platform doesn't matter, since it is only line-end)
|
|
|
|
UTF8 = UTF8_WINDOWS,
|
|
LATIN1 = LATIN1_WINDOWS,
|
|
|
|
FIRST = LATIN1_UNIX;
|
|
|
|
|
|
/*
|
|
public static final Encoding
|
|
LATIN1_UNIX = Encoding.LATIN1_UNIX,
|
|
LATIN1_WINDOWS = Encoding.LATIN1_WINDOWS,
|
|
UTF8_UNIX = Encoding.UTF8_UNIX,
|
|
UTF8_WINDOWS = Encoding.UTF8_WINDOWS;
|
|
*/
|
|
|
|
|
|
// Normally use false, false.
|
|
// But for UCD files use true, true
|
|
// Or if they are UTF8, use true, false
|
|
public static PrintWriter openPrintWriter(String filename, Encoding options) throws IOException {
|
|
File file = new File(getOutputName(filename));
|
|
Utility.fixDot();
|
|
System.out.println("Creating File: " + file.getCanonicalPath());
|
|
File parent = new File(file.getParent());
|
|
//System.out.println("Creating File: "+ parent);
|
|
parent.mkdirs();
|
|
return new PrintWriter(
|
|
new UTF8StreamWriter(
|
|
new FileOutputStream(file),
|
|
32*1024,
|
|
options == LATIN1_UNIX || options == UTF8_UNIX,
|
|
options == LATIN1_UNIX || options == LATIN1_WINDOWS));
|
|
}
|
|
|
|
public static String getOutputName(String filename) {
|
|
return UCD_Types.GEN_DIR + filename;
|
|
}
|
|
|
|
public static void print(PrintWriter pw, Collection c, String separator) {
|
|
print(pw, c, separator, null);
|
|
}
|
|
|
|
public interface Breaker {
|
|
public String get(Object current, Object old);
|
|
public boolean filter(Object current); // true is keep
|
|
}
|
|
|
|
public static void printMapOfCollection(PrintWriter pw, Map c, String mainSeparator, String itemSeparator, String subseparator) {
|
|
Iterator it = c.keySet().iterator();
|
|
boolean first = true;
|
|
Object last = null;
|
|
while (it.hasNext()) {
|
|
Object key = it.next();
|
|
Collection value = (Collection) c.get(key);
|
|
if (first) {
|
|
first = false;
|
|
} else {
|
|
pw.print(mainSeparator);
|
|
}
|
|
pw.print(key);
|
|
pw.print(itemSeparator);
|
|
print(pw, value, subseparator);
|
|
}
|
|
}
|
|
|
|
public static void print(PrintWriter pw, Collection c, String separator, Breaker b) {
|
|
Iterator it = c.iterator();
|
|
boolean first = true;
|
|
Object last = null;
|
|
while (it.hasNext()) {
|
|
Object obj = it.next();
|
|
if (b != null && !b.filter(obj)) continue;
|
|
if (first) {
|
|
first = false;
|
|
} else {
|
|
pw.print(separator);
|
|
}
|
|
if (b != null) {
|
|
pw.print(b.get(obj, last));
|
|
} else {
|
|
pw.print(obj);
|
|
}
|
|
last = obj;
|
|
}
|
|
}
|
|
|
|
public static void print(PrintWriter pw, Map c, String pairSeparator, String separator, Breaker b) {
|
|
Iterator it = c.keySet().iterator();
|
|
boolean first = true;
|
|
Object last = null;
|
|
while (it.hasNext()) {
|
|
Object obj = it.next();
|
|
Object result = c.get(obj);
|
|
if (b != null && !b.filter(obj)) continue;
|
|
if (first) {
|
|
first = false;
|
|
} else {
|
|
pw.print(separator);
|
|
}
|
|
if (b != null) {
|
|
pw.print(b.get(obj, last) + pairSeparator + result);
|
|
} else {
|
|
pw.print(obj + pairSeparator + result);
|
|
}
|
|
last = obj;
|
|
}
|
|
}
|
|
|
|
public static BufferedReader openReadFile(String filename, Encoding encoding) throws FileNotFoundException, UnsupportedEncodingException {
|
|
FileInputStream fis = new FileInputStream(filename);
|
|
InputStreamReader isr;
|
|
if (encoding == UTF8_UNIX || encoding == UTF8_WINDOWS) {
|
|
isr = new InputStreamReader(fis, "UTF8");
|
|
} else {
|
|
isr = new InputStreamReader(fis);
|
|
}
|
|
BufferedReader br = new BufferedReader(isr, 32*1024);
|
|
return br;
|
|
}
|
|
|
|
public static void addCount(Map m, Object key, int count) {
|
|
Integer oldCount = (Integer) m.get(key);
|
|
if (oldCount == null) {
|
|
m.put(key, new Integer(count));
|
|
return;
|
|
}
|
|
m.put(key, new Integer(oldCount.intValue() + count));
|
|
}
|
|
|
|
public static void addToSet(Map m, Object key, Object value) {
|
|
Collection set = (Collection) m.get(key);
|
|
if (set == null) {
|
|
set = new TreeSet();
|
|
m.put(key, set);
|
|
}
|
|
set.add(value);
|
|
}
|
|
|
|
public static void addToList(Map m, Object key, Object value, boolean unique) {
|
|
Collection set = (Collection) m.get(key);
|
|
if (set == null) {
|
|
set = new ArrayList();
|
|
m.put(key, set);
|
|
}
|
|
if (!unique || !set.contains(value)) set.add(value);
|
|
}
|
|
|
|
public static String readDataLine(BufferedReader br) throws IOException {
|
|
String originalLine = "";
|
|
String line = "";
|
|
|
|
try {
|
|
line = originalLine = br.readLine();
|
|
if (line == null) return null;
|
|
if (line.length() > 0 && line.charAt(0) == 0xFEFF) line = line.substring(1);
|
|
int commentPos = line.indexOf('#');
|
|
if (commentPos >= 0) line = line.substring(0, commentPos);
|
|
line = line.trim();
|
|
} catch (Exception e) {
|
|
throw new ChainException("Line \"{0}\", \"{1}\"", new String[] {originalLine, line}, e);
|
|
}
|
|
return line;
|
|
}
|
|
|
|
public static void appendFile(String filename, Encoding encoding, PrintWriter output) throws IOException {
|
|
appendFile(filename, encoding, output, null);
|
|
}
|
|
|
|
public static void appendFile(String filename, Encoding encoding, PrintWriter output, String[] replacementList) throws IOException {
|
|
BufferedReader br = openReadFile(filename, encoding);
|
|
/*
|
|
FileInputStream fis = new FileInputStream(filename);
|
|
InputStreamReader isr = (encoding == UTF8_UNIX || encoding == UTF8_WINDOWS) ? new InputStreamReader(fis, "UTF8") : new InputStreamReader(fis);
|
|
BufferedReader br = new BufferedReader(isr, 32*1024);
|
|
*/
|
|
while (true) {
|
|
String line = br.readLine();
|
|
if (line == null) break;
|
|
if (replacementList != null) {
|
|
for (int i = 0; i < replacementList.length; i += 2) {
|
|
line = replace(line, replacementList[i], replacementList[i+1]);
|
|
}
|
|
}
|
|
output.println(line);
|
|
}
|
|
}
|
|
|
|
public static boolean renameIdentical(String file1, String file2, String batFile) throws IOException {
|
|
if (file1 == null) {
|
|
System.out.println("Null file");
|
|
return false;
|
|
}
|
|
|
|
boolean identical = false;
|
|
BufferedReader br1 = new BufferedReader(new FileReader(file1), 32*1024);
|
|
BufferedReader br2 = new BufferedReader(new FileReader(file2), 32*1024);
|
|
String line1 = "";
|
|
String line2 = "";
|
|
try {
|
|
for (int lineCount = 0; ; ++lineCount) {
|
|
line1 = getLineWithoutFluff(br1, lineCount == 0);
|
|
line2 = getLineWithoutFluff(br2, lineCount == 0);
|
|
if (line1 == null) {
|
|
if (line2 == null) identical = true;
|
|
break;
|
|
}
|
|
if (!line1.equals(line2)) {
|
|
break;
|
|
}
|
|
}
|
|
} finally {
|
|
br1.close();
|
|
br2.close();
|
|
}
|
|
if (identical) {
|
|
renameIdentical(file2);
|
|
if (batFile != null) renameIdentical(batFile);
|
|
return true;
|
|
} else {
|
|
if (line1 == null) line1 = "<end of file>";
|
|
if (line2 == null) line2 = "<end of file>";
|
|
fixDot();
|
|
System.out.println("Found difference in : " + file1 + ", " + file2);
|
|
int diff = compare(line1, line2);
|
|
System.out.println(" Line1: '" + line1.substring(0,diff) + "', '" + line1.substring(diff));
|
|
System.out.println(" Line2: '" + line2.substring(0,diff) + "', '" + line2.substring(diff));
|
|
return false;
|
|
}
|
|
}
|
|
|
|
static void renameIdentical(String file2) {
|
|
File foo = new File(file2);
|
|
File newName = new File(foo.getParent(), "UNCHANGED-" + foo.getName());
|
|
if (newName.exists()) {
|
|
for (int i = 1; newName.exists(); ++i) {
|
|
newName = new File(foo.getParent(), "UNCHANGED" + i + "-" + foo.getName());
|
|
}
|
|
}
|
|
System.out.println("IDENTICAL TO PREVIOUS, RENAMING : " + foo);
|
|
System.out.println("TO : " + newName);
|
|
boolean renameResult = foo.renameTo(newName);
|
|
if (!renameResult) System.out.println("Couldn't rename!");
|
|
}
|
|
|
|
static String getLineWithoutFluff(BufferedReader br1, boolean first) throws IOException {
|
|
while (true) {
|
|
String line1 = br1.readLine();
|
|
if (line1 == null) return line1;
|
|
line1 = line1.trim();
|
|
if (line1.length() == 0) continue;
|
|
if (line1.equals("#")) continue;
|
|
if (line1.startsWith("# Generated")) continue;
|
|
if (line1.startsWith("# Date")) continue;
|
|
if (first && line1.startsWith("#")) {
|
|
first = false;
|
|
continue;
|
|
}
|
|
return line1;
|
|
}
|
|
}
|
|
|
|
/** Returns -1 if strings are equal; otherwise the position they are different at
|
|
*/
|
|
public static int compare(String a, String b) {
|
|
int len = a.length();
|
|
if (len > b.length()) len = b.length();
|
|
for (int i = 0; i < len; ++i) {
|
|
if (a.charAt(i) != b.charAt(i)) return i;
|
|
}
|
|
if (a.length() != b.length()) return len;
|
|
return -1;
|
|
}
|
|
|
|
public static void copyTextFile(String filename, Encoding encoding, String newName, String[] replacementList) throws IOException {
|
|
PrintWriter out = Utility.openPrintWriter(newName, UTF8_WINDOWS);
|
|
appendFile(filename, encoding, out, replacementList);
|
|
out.close();
|
|
}
|
|
|
|
public static void copyTextFile(String filename, Encoding encoding, String newName) throws IOException {
|
|
copyTextFile(filename, encoding, newName, null);
|
|
}
|
|
|
|
public static BufferedReader openUnicodeFile(String filename, String version, boolean show, Encoding encoding) throws IOException {
|
|
String name = getMostRecentUnicodeDataFile(filename, version, true, show);
|
|
if (name == null) return null;
|
|
return openReadFile(name, encoding); // new BufferedReader(new FileReader(name),32*1024);
|
|
}
|
|
|
|
public static String getMostRecentUnicodeDataFile(String filename, String version,
|
|
boolean acceptLatest, boolean show) throws IOException {
|
|
// get all the files in the directory
|
|
|
|
int compValue = acceptLatest ? 0 : 1;
|
|
for (int i = 0; i < searchPath.length; ++i) {
|
|
if (version.length() != 0 && version.compareTo(searchPath[i]) < compValue) continue;
|
|
|
|
String directoryName = UCD_Types.UCD_DIR + File.separator + searchPath[i] + "-Update" + File.separator;
|
|
if (show) System.out.println("Trying: '" + directoryName + "', '" + filename + "'");
|
|
String result = searchDirectory(new File(directoryName), filename, show);
|
|
if (result != null) return result;
|
|
|
|
}
|
|
return null;
|
|
}
|
|
|
|
public static Set getDirectoryContentsLastFirst(File directory) {
|
|
Set result = new TreeSet(new Comparator() {
|
|
public int compare(Object a, Object b) {
|
|
return ((Comparable) b).compareTo(a);
|
|
}
|
|
});
|
|
result.addAll(java.util.Arrays.asList(directory.list()));
|
|
return result;
|
|
}
|
|
|
|
public static String searchDirectory(File directory, String filename, boolean show) throws IOException {
|
|
Iterator it = getDirectoryContentsLastFirst(directory).iterator();
|
|
while (it.hasNext()) {
|
|
String fn = (String) it.next();
|
|
File foo = new File(directory + File.separator + fn);
|
|
// System.out.println("\tChecking: '" + foo.getCanonicalPath() + "'");
|
|
if (foo.isDirectory()) {
|
|
String attempt = searchDirectory(foo, filename, show);
|
|
if (attempt != null) return attempt;
|
|
}
|
|
if (fn.endsWith(".txt") && fn.startsWith(filename)) {
|
|
if (show) System.out.println("\tFound: '" + fn + "'");
|
|
return foo.getCanonicalPath();
|
|
}
|
|
}
|
|
return null;
|
|
}
|
|
|
|
public static void writeHtmlHeader(PrintWriter log, String title) {
|
|
log.println("<html><head>");
|
|
log.println("<meta http-equiv='Content-Type' content='text/html; charset=utf-8'>");
|
|
log.println("<title>" + title + "</title>");
|
|
log.println("<style><!--");
|
|
log.println("table { border-collapse: collapse; border: 1 solid blue }");
|
|
log.println("td { border: 1 solid blue; padding: 2 }");
|
|
log.println("th { border: 1 solid blue; padding: 2 }");
|
|
log.println("--></style>");
|
|
log.println("</head><body>");
|
|
}
|
|
|
|
/**
|
|
* Replaces all occurances of piece with replacement, and returns new String
|
|
*/
|
|
public static String replace(String source, String piece, String replacement) {
|
|
if (source == null || source.length() < piece.length()) return source;
|
|
int pos = 0;
|
|
while (true) {
|
|
pos = source.indexOf(piece, pos);
|
|
if (pos < 0) return source;
|
|
source = source.substring(0,pos) + replacement + source.substring(pos + piece.length());
|
|
pos += replacement.length();
|
|
}
|
|
}
|
|
|
|
public static String replace(String source, String[][] replacements) {
|
|
for (int i = 0; i < replacements.length; ++i) {
|
|
source = replace(source, replacements[i][0], replacements[i][1]);
|
|
}
|
|
return source;
|
|
}
|
|
|
|
public static String replace(String source, String[][] replacements, boolean reverse) {
|
|
if (!reverse) return replace(source, replacements);
|
|
for (int i = 0; i < replacements.length; ++i) {
|
|
source = replace(source, replacements[i][1], replacements[i][0]);
|
|
}
|
|
return source;
|
|
}
|
|
|
|
public static String getStack() {
|
|
Exception e = new Exception();
|
|
StringWriter sw = new StringWriter();
|
|
PrintWriter pw = new PrintWriter(sw);
|
|
e.printStackTrace(pw);
|
|
pw.flush();
|
|
return "Showing Stack with fake " + sw.getBuffer().toString();
|
|
}
|
|
|
|
public static String getUnicodeImage(int cp) {
|
|
String code = hex(cp, 4);
|
|
return "<img alt='U+" + code + "' src='http://www.unicode.org/cgi-bin/refglyph?24-" + code + "' style='vertical-align:middle'>";
|
|
}
|
|
|
|
static PrintWriter showSetNamesPw;
|
|
|
|
public static void showSetDifferences(String name1, UnicodeSet set1, String name2, UnicodeSet set2, boolean separateLines, UCD ucd) {
|
|
if (showSetNamesPw == null) showSetNamesPw = new PrintWriter(System.out);
|
|
showSetDifferences(showSetNamesPw, name1, set1, name2, set2, separateLines, false, null, ucd);
|
|
}
|
|
|
|
public static void showSetDifferences(PrintWriter pw, String name1, UnicodeSet set1, String name2, UnicodeSet set2,
|
|
boolean separateLines, boolean withChar, UnicodeMap names, UCD ucd) {
|
|
|
|
UnicodeSet temp = new UnicodeSet(set1).removeAll(set2);
|
|
pw.println();
|
|
pw.println("In " + name1 + ", but not in " + name2 + ": ");
|
|
showSetNames(pw, "\t", temp, separateLines, false, withChar, names, ucd);
|
|
|
|
temp = new UnicodeSet(set2).removeAll(set1);
|
|
pw.println();
|
|
pw.println("Not in " + name1 + ", but in " + name2 + ": ");
|
|
showSetNames(pw, "\t", temp, separateLines, false, withChar, names, ucd);
|
|
|
|
temp = new UnicodeSet(set2).retainAll(set1);
|
|
pw.println();
|
|
pw.println("In both " + name1 + " and " + name2 + ": ");
|
|
pw.println(temp.size() == 0 ? "<none>" : ""+ temp);
|
|
// showSetNames(pw, "\t", temp, false, false, withChar, names, ucd);
|
|
}
|
|
|
|
public static void showSetNames(String prefix, UnicodeSet set, boolean separateLines, UCD ucd) {
|
|
showSetNames(prefix, set, separateLines, false, false, ucd);
|
|
}
|
|
|
|
public static void showSetNames(String prefix, UnicodeSet set, boolean separateLines, boolean IDN, UCD ucd) {
|
|
showSetNames(prefix, set, separateLines, IDN, false, ucd);
|
|
}
|
|
|
|
public static void showSetNames(PrintWriter pw, String prefix, UnicodeSet set, boolean separateLines, boolean IDN, UCD ucd) {
|
|
showSetNames( pw, prefix, set, separateLines, IDN, false, null, ucd);
|
|
}
|
|
|
|
public static void showSetNames(String prefix, UnicodeSet set, boolean separateLines, boolean IDN, boolean withChar, UCD ucd) {
|
|
if (showSetNamesPw == null) showSetNamesPw = new PrintWriter(System.out);
|
|
showSetNames(showSetNamesPw, prefix, set, separateLines, IDN, withChar, null, ucd);
|
|
}
|
|
|
|
static java.text.NumberFormat nf = java.text.NumberFormat.getInstance();
|
|
|
|
public static void showSetNames(PrintWriter pw, String prefix, UnicodeSet set, boolean separateLines, boolean IDN,
|
|
boolean withChar, UnicodeMap names, UCD ucd) {
|
|
if (set.size() == 0) {
|
|
pw.println(prefix + "<none>");
|
|
pw.flush();
|
|
return;
|
|
}
|
|
boolean useHTML = false;
|
|
int count = set.getRangeCount();
|
|
for (int i = 0; i < count; ++i) {
|
|
int start = set.getRangeStart(i);
|
|
int end = set.getRangeEnd(i);
|
|
if (separateLines || (IDN && isSeparateLineIDN(start,end,ucd))) {
|
|
for (int cp = start; cp <= end; ++cp) {
|
|
if (!IDN) pw.println(prefix + ucd.getCode(cp)
|
|
+ "\t# "
|
|
+ (useHTML ? "(" + getUnicodeImage(cp) + ") " : "")
|
|
+ (withChar && (cp >= 0x20) ? "(" + UTF16.valueOf(cp) + ") " : "")
|
|
+ (names != null ? names.getLabel(cp) + " " : "")
|
|
+ ucd.getName(cp)
|
|
+ (useHTML ? "<br>" : ""));
|
|
else {
|
|
pw.println(prefix + Utility.hex(cp,4) + "; " + ucd.getName(cp));
|
|
}
|
|
}
|
|
} else {
|
|
if (!IDN) {
|
|
pw.println(prefix + ucd.getCode(start)
|
|
+ ((start != end) ? (".." + ucd.getCode(end)) : "")
|
|
+ "\t# "
|
|
+ (withChar && (start >= 0x20) ? " (" + UTF16.valueOf(start)
|
|
+ ((start != end) ? (".." + UTF16.valueOf(end)) : "") + ") " : "")
|
|
+ ucd.getName(start) + ((start != end) ? (".." + ucd.getName(end)) : "")
|
|
);
|
|
} else {
|
|
|
|
pw.println(prefix + Utility.hex(start,4)
|
|
+ ((start != end) ? ("-" + Utility.hex(end,4)) : "")
|
|
+ (ucd.isAssigned(start)
|
|
? "; " + ucd.getName(start) + ((start != end)
|
|
? ("-" + ucd.getName(end))
|
|
: "")
|
|
: "")
|
|
);
|
|
}
|
|
}
|
|
}
|
|
pw.println("Total: " + nf.format(set.size()));
|
|
pw.flush();
|
|
}
|
|
|
|
private static boolean isSeparateLineIDN(int cp, UCD ucd) {
|
|
if (ucd.hasComputableName(cp)) return false;
|
|
int cat = ucd.getCategory(cp);
|
|
if (cat == UCD_Types.Cn) return false;
|
|
if (ucd.getCategory(cp) == UCD_Types.Cc && !ucd.getBinaryProperty(cp, UCD_Types.White_space)) return false;
|
|
return true;
|
|
}
|
|
|
|
private static boolean isSeparateLineIDN(int start, int end, UCD ucd) {
|
|
return (isSeparateLineIDN(start, ucd) || isSeparateLineIDN(end, ucd));
|
|
}
|
|
} |