351 lines
12 KiB
Java
351 lines
12 KiB
Java
|
/**
|
||
|
*******************************************************************************
|
||
|
* Copyright (C) 1996-2001, International Business Machines Corporation and *
|
||
|
* others. All Rights Reserved. *
|
||
|
*******************************************************************************
|
||
|
*
|
||
|
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/CheckCollator.java,v $
|
||
|
* $Date: 2002/08/08 15:35:01 $
|
||
|
* $Revision: 1.1 $
|
||
|
*
|
||
|
*******************************************************************************
|
||
|
*/
|
||
|
|
||
|
// http://java.sun.com/j2se/1.3/docs/guide/intl/encoding.doc.html
|
||
|
|
||
|
package com.ibm.text.UCD;
|
||
|
|
||
|
import java.util.*;
|
||
|
import java.io.*;
|
||
|
import java.text.NumberFormat;
|
||
|
|
||
|
import com.ibm.text.utility.*;
|
||
|
import com.ibm.icu.text.UTF16;
|
||
|
import com.ibm.icu.text.UnicodeSet;
|
||
|
|
||
|
/**
|
||
|
* This is a quick and dirty program to get some idea of collation performance, comparing old Java to new stuff.
|
||
|
*/
|
||
|
abstract public class CheckCollator {
|
||
|
static final String PREFIX = "C:\\ICUInternal\\icu4c\\collation-perf-data\\TestNames_";
|
||
|
static final boolean DO_RAW = false;
|
||
|
|
||
|
static final NumberFormat nf = NumberFormat.getInstance();
|
||
|
static final NumberFormat percent = NumberFormat.getPercentInstance();
|
||
|
static {
|
||
|
nf.setMaximumFractionDigits(2);
|
||
|
}
|
||
|
|
||
|
public static void main(String[] args) throws IOException {
|
||
|
|
||
|
// later, drive off of args
|
||
|
|
||
|
// choices are: Asian, Chinese, Japanese, Japanese_h, Japanese_k, Korean, Latin, Russian, Thai
|
||
|
test(Locale.KOREAN, "Korean");
|
||
|
test(Locale.ENGLISH, "Latin");
|
||
|
test(Locale.FRENCH, "Latin");
|
||
|
test(Locale.JAPANESE, "Japanese");
|
||
|
}
|
||
|
|
||
|
public static void test(Locale loc, String name) throws IOException {
|
||
|
|
||
|
System.out.println();
|
||
|
System.out.println("Testing " + loc.getDisplayName() + ", file: " + name);
|
||
|
System.out.println();
|
||
|
|
||
|
// get test data
|
||
|
|
||
|
String fileName = PREFIX + name + ".txt";
|
||
|
|
||
|
FileInputStream fis = new FileInputStream(fileName);
|
||
|
InputStreamReader isr = new InputStreamReader(fis, "UnicodeLittle");
|
||
|
BufferedReader br = new BufferedReader(isr, 32*1024);
|
||
|
|
||
|
int counter = 0;
|
||
|
|
||
|
ArrayList list = new ArrayList();
|
||
|
while (true) {
|
||
|
String line = Utility.readDataLine(br);
|
||
|
if (line == null) break;
|
||
|
if (line.length() == 0) continue;
|
||
|
Utility.dot(counter++);
|
||
|
list.add(line);
|
||
|
}
|
||
|
System.out.println("Read " + counter + " lines in file");
|
||
|
|
||
|
int limit = 800; // put a limit on it to save time
|
||
|
|
||
|
// pump it up if there aren't very many
|
||
|
while (list.size() < limit) {
|
||
|
list.addAll(list);
|
||
|
}
|
||
|
|
||
|
int size = list.size();
|
||
|
|
||
|
|
||
|
// later, adjust these so we always get a reasonble number of tries
|
||
|
|
||
|
int extraIterations = 200;
|
||
|
if (size > limit) size = limit;
|
||
|
|
||
|
String[] tests = new String [size];
|
||
|
|
||
|
for (int i = 0; i < size; ++i) {
|
||
|
tests[i] = (String) list.get(i);
|
||
|
}
|
||
|
|
||
|
// get collators
|
||
|
|
||
|
com.ibm.icu.text.Collator newCol = com.ibm.icu.text.Collator.getInstance(loc);
|
||
|
java.text.Collator oldCol = java.text.Collator.getInstance(loc);
|
||
|
|
||
|
|
||
|
double startTime, endTime;
|
||
|
double delta, oldDelta;
|
||
|
String probe;
|
||
|
|
||
|
|
||
|
// load classes at least once before starting
|
||
|
|
||
|
newCol.compare("a", "b");
|
||
|
oldCol.compare("a", "b");
|
||
|
|
||
|
// ================================================
|
||
|
// check sort key size
|
||
|
|
||
|
int stringSize = 0, newSize = 0, oldSize = 0;
|
||
|
|
||
|
for (int i = 0; i < size; ++i) {
|
||
|
stringSize += tests[i].length() * 2;
|
||
|
byte[] newKey = newCol.getCollationKey(tests[i]).toByteArray();
|
||
|
newSize += newKey.length;
|
||
|
byte[] oldKey = oldCol.getCollationKey(tests[i]).toByteArray();
|
||
|
oldSize += oldKey.length;
|
||
|
}
|
||
|
delta = stringSize/(size + 0.0);
|
||
|
System.out.println("string size: " + nf.format(delta) + " bytes per key");
|
||
|
System.out.println();
|
||
|
|
||
|
delta = oldDelta = (oldSize/(size + 0.0));
|
||
|
System.out.println("old sortkey size: " + nf.format(delta) + " bytes per key ");
|
||
|
delta = (newSize/(size + 0.0));
|
||
|
System.out.println("new sortkey size: " + nf.format(delta) + " bytes per key " + percent.format(delta/oldDelta));
|
||
|
System.out.println();
|
||
|
|
||
|
// ================================================
|
||
|
// Sort Key: old time
|
||
|
|
||
|
// get overhead time
|
||
|
counter = 0;
|
||
|
startTime = System.currentTimeMillis();
|
||
|
|
||
|
for (int i = 0; i < size; ++i) {
|
||
|
for (int j = 0; j < size; ++j) {
|
||
|
counter++;
|
||
|
}
|
||
|
}
|
||
|
endTime = System.currentTimeMillis();
|
||
|
double overhead = (1000*(endTime - startTime) / counter);
|
||
|
System.out.println("overhead: " + nf.format((endTime - startTime) / counter) + " micros");
|
||
|
|
||
|
counter = 0;
|
||
|
startTime = System.currentTimeMillis();
|
||
|
|
||
|
for (int i = 0; i < size; ++i) {
|
||
|
probe = tests[i];
|
||
|
for (int k = 0; k < extraIterations; ++k) {
|
||
|
oldCol.getCollationKey(probe);
|
||
|
counter++;
|
||
|
}
|
||
|
}
|
||
|
endTime = System.currentTimeMillis();
|
||
|
oldDelta = delta = (1000*(endTime - startTime) / counter) - overhead;
|
||
|
System.out.println("Old sort key time: " + nf.format(delta)
|
||
|
+ " micros (" + counter + " iterations)");
|
||
|
|
||
|
// Sort Key: new time
|
||
|
|
||
|
counter = 0;
|
||
|
startTime = System.currentTimeMillis();
|
||
|
|
||
|
for (int i = 0; i < size; ++i) {
|
||
|
probe = tests[i];
|
||
|
for (int k = 0; k < extraIterations; ++k) {
|
||
|
newCol.getCollationKey(probe);
|
||
|
counter++;
|
||
|
}
|
||
|
}
|
||
|
endTime = System.currentTimeMillis();
|
||
|
delta = (1000*(endTime - startTime) / counter) - overhead;
|
||
|
System.out.println("New sort key time: " + nf.format(delta)
|
||
|
+ " micros (" + counter + " iterations) " + percent.format(delta/oldDelta));
|
||
|
System.out.println();
|
||
|
|
||
|
// ================================================
|
||
|
// Raw Compare
|
||
|
|
||
|
if (DO_RAW) {
|
||
|
// get overhead time
|
||
|
counter = 0;
|
||
|
startTime = System.currentTimeMillis();
|
||
|
int opt = 0; // to keep the compiler from optimizing out
|
||
|
|
||
|
for (int i = 0; i < size; ++i) {
|
||
|
probe = tests[i];
|
||
|
for (int j = 0; j < size; ++j) {
|
||
|
opt ^= probe.compareTo(tests[j]);
|
||
|
counter++;
|
||
|
}
|
||
|
}
|
||
|
endTime = System.currentTimeMillis();
|
||
|
overhead = (1000*(endTime - startTime) / counter);
|
||
|
System.out.println("overhead: " + nf.format((endTime - startTime) / counter) + " micros");
|
||
|
|
||
|
// Raw Compare: old time
|
||
|
|
||
|
counter = 0;
|
||
|
startTime = System.currentTimeMillis();
|
||
|
|
||
|
for (int i = 0; i < size; ++i) {
|
||
|
probe = tests[i];
|
||
|
for (int j = 0; j < size; ++j) {
|
||
|
opt ^= oldCol.compare(probe, tests[j]);
|
||
|
counter++;
|
||
|
}
|
||
|
}
|
||
|
endTime = System.currentTimeMillis();
|
||
|
oldDelta = delta = (1000*(endTime - startTime) / counter) - overhead;
|
||
|
System.out.println("Old raw compare time: " + nf.format(delta)
|
||
|
+ " micros (" + counter + " iterations)");
|
||
|
|
||
|
// Raw Compare: new time
|
||
|
|
||
|
counter = 0;
|
||
|
startTime = System.currentTimeMillis();
|
||
|
|
||
|
for (int i = 0; i < size; ++i) {
|
||
|
probe = tests[i];
|
||
|
for (int j = 0; j < size; ++j) {
|
||
|
opt ^= newCol.compare(probe, tests[j]);
|
||
|
counter++;
|
||
|
}
|
||
|
}
|
||
|
endTime = System.currentTimeMillis();
|
||
|
delta = (1000*(endTime - startTime) / counter) - overhead;
|
||
|
System.out.println("New raw compare time: " + nf.format(delta)
|
||
|
+ " micros (" + counter + " iterations) " + percent.format(delta/oldDelta));
|
||
|
System.out.println();
|
||
|
}
|
||
|
|
||
|
// ================================================
|
||
|
// Binary Search
|
||
|
// note: I don't worry about getting the binary search precisely right, since I just want to
|
||
|
// see which strings would get compared.
|
||
|
|
||
|
// overhead
|
||
|
|
||
|
int iterations = (size * extraIterations);
|
||
|
startTime = System.currentTimeMillis();
|
||
|
Arrays.sort(tests);
|
||
|
int opt2 = 0; // keep from optimizing out
|
||
|
|
||
|
for (int i = 0; i < size; ++i) {
|
||
|
probe = tests[i];
|
||
|
for (int k = 0; k < extraIterations; ++k) {
|
||
|
opt2 ^= Arrays.binarySearch(tests, probe);
|
||
|
}
|
||
|
}
|
||
|
endTime = System.currentTimeMillis();
|
||
|
overhead = delta = (1000*(endTime - startTime) / iterations);
|
||
|
System.out.println("Overhead: " + nf.format(delta)
|
||
|
+ " micros (" + iterations + " iterations)");
|
||
|
|
||
|
// old time
|
||
|
|
||
|
startTime = System.currentTimeMillis();
|
||
|
Arrays.sort(tests, oldCol);
|
||
|
|
||
|
for (int i = 0; i < size; ++i) {
|
||
|
probe = tests[i];
|
||
|
for (int k = 0; k < extraIterations; ++k) {
|
||
|
opt2 ^= Arrays.binarySearch(tests, probe, oldCol);
|
||
|
}
|
||
|
}
|
||
|
endTime = System.currentTimeMillis();
|
||
|
oldDelta = delta = (1000*(endTime - startTime) / iterations) - overhead;
|
||
|
System.out.println("Old binary search time: " + nf.format(delta)
|
||
|
+ " micros (" + iterations + " iterations)");
|
||
|
|
||
|
|
||
|
// new time
|
||
|
|
||
|
Arrays.sort(tests, newCol);
|
||
|
|
||
|
startTime = System.currentTimeMillis();
|
||
|
|
||
|
for (int i = 0; i < size; ++i) {
|
||
|
probe = tests[i];
|
||
|
for (int k = 0; k < extraIterations; ++k) {
|
||
|
opt2 ^= Arrays.binarySearch(tests, probe, newCol);
|
||
|
}
|
||
|
}
|
||
|
endTime = System.currentTimeMillis();
|
||
|
delta = (1000*(endTime - startTime) / iterations) - overhead;
|
||
|
System.out.println("New binary search time: " + nf.format(delta)
|
||
|
+ " micros (" + iterations + " iterations) " + percent.format(delta/oldDelta));
|
||
|
System.out.println();
|
||
|
|
||
|
// ================================================
|
||
|
// Sort
|
||
|
|
||
|
String[] sortTests = (String[]) tests.clone();
|
||
|
extraIterations = 5;
|
||
|
iterations = (size * extraIterations);
|
||
|
|
||
|
// overhead
|
||
|
|
||
|
startTime = System.currentTimeMillis();
|
||
|
|
||
|
for (int i = 0; i < size; ++i) {
|
||
|
for (int k = 0; k < extraIterations; ++k) {
|
||
|
System.arraycopy(tests, 0, sortTests, 0, tests.length); // copy array
|
||
|
Arrays.sort(sortTests);
|
||
|
}
|
||
|
}
|
||
|
endTime = System.currentTimeMillis();
|
||
|
overhead = delta = (1000*(endTime - startTime) / iterations);
|
||
|
System.out.println("overhead: " + nf.format(delta)
|
||
|
+ " micros (" + iterations + " iterations)");
|
||
|
|
||
|
// old time
|
||
|
|
||
|
startTime = System.currentTimeMillis();
|
||
|
|
||
|
for (int i = 0; i < size; ++i) {
|
||
|
for (int k = 0; k < extraIterations; ++k) {
|
||
|
System.arraycopy(tests, 0, sortTests, 0, tests.length); // copy array
|
||
|
Arrays.sort(sortTests, oldCol);
|
||
|
}
|
||
|
}
|
||
|
endTime = System.currentTimeMillis();
|
||
|
oldDelta = delta = (1000*(endTime - startTime) / iterations) - overhead;
|
||
|
System.out.println("Old sort time: " + nf.format(delta)
|
||
|
+ " micros (" + iterations + " iterations)");
|
||
|
|
||
|
// new time
|
||
|
|
||
|
startTime = System.currentTimeMillis();
|
||
|
|
||
|
for (int i = 0; i < size; ++i) {
|
||
|
for (int k = 0; k < extraIterations; ++k) {
|
||
|
System.arraycopy(tests, 0, sortTests, 0, tests.length); // copy array
|
||
|
Arrays.sort(sortTests, newCol);
|
||
|
}
|
||
|
}
|
||
|
endTime = System.currentTimeMillis();
|
||
|
delta = (1000*(endTime - startTime) / iterations) - overhead;
|
||
|
System.out.println("New sort time: " + nf.format(delta)
|
||
|
+ " micros (" + iterations + " iterations) " + percent.format(delta/oldDelta));
|
||
|
|
||
|
}
|
||
|
}
|