scuffed-code/tools/unicodetools/com/ibm/text/UCD/CompactName.java

273 lines
8.8 KiB
Java
Raw Normal View History

2001-08-31 00:30:17 +00:00
/**
*******************************************************************************
* Copyright (C) 1996-2001, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/CompactName.java,v $
* $Date: 2001/08/31 00:30:17 $
* $Revision: 1.2 $
*
*******************************************************************************
*/
2001-08-30 20:50:18 +00:00
package com.ibm.text.UCD;
import java.io.IOException;
import java.util.*;
import java.io.*;
import java.text.*;
public class CompactName {
2001-08-31 00:30:17 +00:00
2001-08-30 20:50:18 +00:00
static final boolean DEBUG = false;
2001-08-31 00:30:17 +00:00
2001-08-30 20:50:18 +00:00
public static void main(String[] args) throws IOException {
2001-08-31 00:30:17 +00:00
2001-08-30 20:50:18 +00:00
int test = tokenFromString("ABZ");
String ss = stringFromToken(test);
System.out.println(ss);
2001-08-31 00:30:17 +00:00
2001-08-30 20:50:18 +00:00
CompactName.addWord("ABSOLUTEISM");
2001-08-31 00:30:17 +00:00
2001-08-30 20:50:18 +00:00
for (int i = 0; i < CompactName.lastToken; ++i) {
String s = CompactName.stringFromToken(i);
System.out.println(s);
}
2001-08-31 00:30:17 +00:00
2001-08-30 20:50:18 +00:00
}
2001-08-31 00:30:17 +00:00
2001-08-30 20:50:18 +00:00
static final char[] compactMap = new char[128];
static final char[] compactUnmap = new char[128];
2001-08-31 00:30:17 +00:00
2001-08-30 20:50:18 +00:00
static {
char counter = 0;
compactMap[0] = counter++;
for (int i = 'A'; i <= 'Z'; ++i) {
compactMap[i] = counter++;
}
compactMap['-'] = counter++;
compactMap['>'] = counter++;
compactMap['<'] = counter++;
compactMap['*'] = counter++;
2001-08-31 00:30:17 +00:00
2001-08-30 20:50:18 +00:00
compactUnmap[0] = 0;
for (char i = 0; i < compactUnmap.length; ++i) {
int x = compactMap[i];
if (x != 0) compactUnmap[x] = i;
}
}
2001-08-31 00:30:17 +00:00
2001-08-30 20:50:18 +00:00
/*
static String expand(String s) {
StringBuffer result = new StringBuffer();
for (int i = 0; i < s.length(); ++i) {
int m = s.charAt(i);
if (m == 31 && i < s.length() + 1) {
m = 31 + s.charAt(++i);
}
result.append(compactUnmap[m]);
}
return result.toString();
}
2001-08-31 00:30:17 +00:00
2001-08-30 20:50:18 +00:00
static String compact(String s) {
StringBuffer result = new StringBuffer();
for (int i = 0; i < s.length(); ++i) {
int m = compactMap[s.charAt(i)];
if (m >= 31) {
result.append((char)31);
m -= 31;
}
result.append(m);
}
return result.toString();
}
*/
2001-08-31 00:30:17 +00:00
2001-08-30 20:50:18 +00:00
static Map string_token = new HashMap();
static Map token_string = new HashMap();
2001-08-31 00:30:17 +00:00
2001-08-30 20:50:18 +00:00
static int[] tokenList = new int[40000];
static final int tokenStart = 0;
static int lastToken = 0;
2001-08-31 00:30:17 +00:00
2001-08-30 20:50:18 +00:00
static int spacedMinimum = Integer.MAX_VALUE;
2001-08-31 00:30:17 +00:00
2001-08-30 20:50:18 +00:00
static boolean isLiteral(int i) {
return (i & 0x8000) != 0;
}
2001-08-31 00:30:17 +00:00
2001-08-30 20:50:18 +00:00
static int addTokenForString(String s, int lead, int trail) {
Object in = string_token.get(s);
if (in != null) throw new IllegalArgumentException();
int value = (lead << 16) + (trail & 0xFFFF);
int result = lastToken;
tokenList[lastToken++] = value;
2001-08-31 00:30:17 +00:00
2001-08-30 20:50:18 +00:00
if (DEBUG) {
System.out.println("'" + s + "', tokenList[" + result + "] = lead: " + lead + ", trail: " + trail);
String roundTrip = stringFromToken(result);
if (!roundTrip.equals(s)) {
System.out.println("\t*** No Round Trip: '" + roundTrip + "'");
}
}
string_token.put(s, new Integer(result));
return result;
}
2001-08-31 00:30:17 +00:00
2001-08-30 20:50:18 +00:00
static String stringFromToken(int i) {
String result;
if ((i & 0x8000) != 0) {
char first = compactUnmap[(i >> 10) & 0x1F];
char second = compactUnmap[(i >> 5) & 0x1F];
char third = compactUnmap[i & 0x1F];
result = String.valueOf(first);
if (second != 0) result += String.valueOf(second);
if (third != 0) result += String.valueOf(third);
} else if (i > lastToken) {
throw new IllegalArgumentException("bad token: " + i);
} else {
int value = tokenList[i];
int lead = value >>> 16;
int trail = value & 0xFFFF;
if (i >= spacedMinimum) result = stringFromToken(lead) + ' ' + stringFromToken(trail);
else result = stringFromToken(lead) + stringFromToken(trail);
}
if (DEBUG) System.out.println("token: " + i + " => '" + result + "'");
return result;
}
2001-08-31 00:30:17 +00:00
2001-08-30 20:50:18 +00:00
static int tokenFromString(String s) {
if (s.length() <= 3) {
int first = compactMap[s.charAt(0)];
int second = compactMap[s.length() > 1 ? s.charAt(1) : 0];
int third = compactMap[s.length() > 2 ? s.charAt(2) : 0];
return 0x8000 + (first << 10) + (second << 5) + third;
}
Object in = string_token.get(s);
if (in == null) return -1;
return ((Integer)in).intValue();
}
2001-08-31 00:30:17 +00:00
2001-08-30 20:50:18 +00:00
static int addWord(String s) {
2001-08-31 00:30:17 +00:00
2001-08-30 20:50:18 +00:00
int result = tokenFromString(s);
if (result != -1) return result;
int bestLen = 0;
int best_i = 0;
2001-08-31 00:30:17 +00:00
2001-08-30 20:50:18 +00:00
int limit = s.length() - 1;
2001-08-31 00:30:17 +00:00
2001-08-30 20:50:18 +00:00
for (int i = limit; i >= 1; --i) {
String firstPart = s.substring(0, i);
String lastPart = s.substring(i);
int lead = tokenFromString(firstPart);
int trail = tokenFromString(lastPart);
2001-08-31 00:30:17 +00:00
2001-08-30 20:50:18 +00:00
if (lead >= 0 && trail >= 0) { // if both match, return immediately with pair
if (DEBUG) show(s, firstPart, lastPart, "MATCH BOTH");
return addTokenForString(s, lead, trail);
}
if (!isLiteral(lead)) {
if (i > bestLen) {
bestLen = i;
best_i = i;
}
}
if (!isLiteral(trail)) {
int end_i = s.length() - i;
if (end_i > bestLen) {
bestLen = end_i;
best_i = i;
}
}
}
if (bestLen > 0) { // if one matches, recurse -- and return pair
String firstPart = s.substring(0, best_i);
String lastPart = s.substring(best_i);
int lead = tokenFromString(firstPart);
int trail = tokenFromString(lastPart);
if (lead >= 0) {
if (DEBUG) show(s, firstPart, lastPart, "MATCH FIRST");
return addTokenForString(s, lead, addWord(lastPart));
} else {
if (DEBUG) show(s, firstPart, lastPart, "MATCH SECOND");
return addTokenForString(s, addWord(firstPart), trail);
}
}
2001-08-31 00:30:17 +00:00
2001-08-30 20:50:18 +00:00
// break at multiple of 3
2001-08-31 00:30:17 +00:00
2001-08-30 20:50:18 +00:00
best_i = ((s.length() + 1) / 6) * 3;
String firstPart = s.substring(0, best_i);
String lastPart = s.substring(best_i);
if (DEBUG) show(s, firstPart, lastPart, "Fallback");
return addTokenForString(s, addWord(firstPart), addWord(lastPart));
}
2001-08-31 00:30:17 +00:00
2001-08-30 20:50:18 +00:00
static void show(String s, String firstPart, String lastPart, String comment) {
System.out.println((s) + " => '" + (firstPart)
+ "' # '" + (lastPart) + "' " + comment);
}
2001-08-31 00:30:17 +00:00
2001-08-30 20:50:18 +00:00
static void startLines() {
spacedMinimum = lastToken;
}
2001-08-31 00:30:17 +00:00
2001-08-30 20:50:18 +00:00
static int addLine(String s) {
2001-08-31 00:30:17 +00:00
2001-08-30 20:50:18 +00:00
int result = tokenFromString(s);
if (result != -1) return result;
int bestLen = 0;
int best_i = 0;
2001-08-31 00:30:17 +00:00
2001-08-30 20:50:18 +00:00
int limit = s.length() - 2;
2001-08-31 00:30:17 +00:00
2001-08-30 20:50:18 +00:00
for (int i = limit; i >= 1; --i) {
char c = s.charAt(i);
if (c != ' ') continue;
String firstPart = s.substring(0, i);
String lastPart = s.substring(i+1);
int lead = tokenFromString(firstPart);
int trail = tokenFromString(lastPart);
2001-08-31 00:30:17 +00:00
2001-08-30 20:50:18 +00:00
if (lead >= 0 && trail >= 0) { // if both match, return immediately with pair
if (DEBUG) show(s, firstPart, lastPart, "MATCH BOTH");
return addTokenForString(s, lead, trail);
}
if (i > bestLen) {
bestLen = i;
best_i = i;
}
int end_i = s.length() - i - 1;
if (end_i > bestLen) {
bestLen = end_i;
best_i = i;
}
}
if (bestLen > 0) { // if one matches, recurse -- and return pair
String firstPart = s.substring(0, best_i);
String lastPart = s.substring(best_i + 1);
int lead = tokenFromString(firstPart);
int trail = tokenFromString(lastPart);
if (lead >= 0) {
if (DEBUG) show(s, firstPart, lastPart, "MATCH FIRST");
return addTokenForString(s, lead, addLine(lastPart));
} else {
if (DEBUG) show(s, firstPart, lastPart, "MATCH SECOND");
return addTokenForString(s, addLine(firstPart), trail);
}
}
2001-08-31 00:30:17 +00:00
2001-08-30 20:50:18 +00:00
System.out.println("SHOULD HAVE MATCHED!!");
throw new IllegalArgumentException("SHOULD HAVE MATCHED!! " + s);
}
}