864d2ef0ba
X-SVN-Rev: 9935
331 lines
12 KiB
Java
331 lines
12 KiB
Java
|
|
/**
|
|
*******************************************************************************
|
|
* Copyright (C) 1996-2001, International Business Machines Corporation and *
|
|
* others. All Rights Reserved. *
|
|
*******************************************************************************
|
|
*
|
|
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/FastBinarySearch.java,v $
|
|
* $Date: 2002/10/01 01:43:21 $
|
|
* $Revision: 1.3 $
|
|
*
|
|
*******************************************************************************
|
|
*/
|
|
|
|
package com.ibm.text.utility;
|
|
|
|
import java.util.Random;
|
|
import java.util.Arrays;
|
|
import com.ibm.icu.text.NumberFormat;
|
|
|
|
/**
|
|
* Quick & Dirty test program for fast (unrolled) binary search
|
|
* Should use new PerfTest once that is done, although since there is no object
|
|
* creation the numbers should be fairly reliable.
|
|
*/
|
|
|
|
final public class FastBinarySearch {
|
|
|
|
/**
|
|
* Testing
|
|
*/
|
|
|
|
static public void test() {
|
|
perfTest(100, 100); // warmup
|
|
|
|
// try different combinations of data size and iterations
|
|
|
|
perfTest(10, 2000000);
|
|
perfTest(30, 500000);
|
|
perfTest(100, 200000);
|
|
perfTest(1000, 2000);
|
|
perfTest(100000, 200);
|
|
|
|
// skip the following
|
|
if (true) return;
|
|
validityTest();
|
|
}
|
|
|
|
static void perfTest(int dataSize, int iterations) {
|
|
NumberFormat percent = NumberFormat.getPercentInstance();
|
|
percent.setMaximumFractionDigits(0);
|
|
|
|
Random random = new Random(123456789L);
|
|
int[] myData = new int[dataSize];
|
|
FastBinarySearch fbs = new FastBinarySearch();
|
|
|
|
// produce test case
|
|
for (int i = 0; i < myData.length; ++i) {
|
|
myData[i] = (int) (random.nextDouble() * myData.length * 3);
|
|
}
|
|
Arrays.sort(myData, 0, myData.length);
|
|
fbs.setData(myData, myData.length);
|
|
|
|
// produce probe data
|
|
int[] probe = new int[myData.length*2];
|
|
for (int i = 0; i < probe.length; ++i) {
|
|
probe[i] = (int) (random.nextDouble() * myData.length * 3);
|
|
}
|
|
|
|
int sum = 0;
|
|
double startTime, endTime, time, baseTime;
|
|
|
|
System.out.println();
|
|
long totalIterations = iterations * probe.length;
|
|
System.out.println("Iterations = " + totalIterations + ", Data size = " + dataSize);
|
|
|
|
startTime = System.currentTimeMillis();
|
|
for (int testCount = 0; testCount < iterations; ++testCount) {
|
|
for (int i = 0; i < probe.length; ++i) {
|
|
sum += fbs.findCodePoint(i);
|
|
}
|
|
}
|
|
endTime = System.currentTimeMillis();
|
|
baseTime = time = (endTime - startTime)*1000/totalIterations;
|
|
System.out.println("Basic; time=" + time + " microsecs/call");
|
|
|
|
startTime = System.currentTimeMillis();
|
|
for (int testCount = 0; testCount < iterations; ++testCount) {
|
|
for (int i = 0; i < probe.length; ++i) {
|
|
sum += fbs.highestIndexLEQ(i);
|
|
}
|
|
}
|
|
endTime = System.currentTimeMillis();
|
|
time = (endTime - startTime)*1000/totalIterations;
|
|
System.out.println("Fast; time=" + time + " microsecs/call\t" + percent.format(time/baseTime-1));
|
|
|
|
startTime = System.currentTimeMillis();
|
|
for (int testCount = 0; testCount < iterations; ++testCount) {
|
|
for (int i = 0; i < probe.length; ++i) {
|
|
sum += fbs.highestIndexLEQ2(i);
|
|
}
|
|
}
|
|
endTime = System.currentTimeMillis();
|
|
time = (endTime - startTime)*1000/totalIterations;
|
|
System.out.println("Compact; time=" + time + " microsecs/call\t" + percent.format(time/baseTime-1));
|
|
}
|
|
|
|
|
|
static void validityTest() {
|
|
Random random = new Random(123456789L);
|
|
int[] myData = new int[50];
|
|
FastBinarySearch fbs = new FastBinarySearch();
|
|
|
|
for (int testCount = 0; testCount < 100; ++testCount) {
|
|
|
|
// produce test case
|
|
double ran = random.nextDouble();
|
|
//System.out.println(ran);
|
|
int myCount = 2+ (int) (ran * (myData.length - 2));
|
|
for (int i = 0; i < myCount; ++i) {
|
|
ran = random.nextDouble();
|
|
//System.out.println(ran);
|
|
myData[i] = (int) (ran * myData.length * 3);
|
|
}
|
|
System.out.println("Trial " + testCount + ", len: " + myCount);
|
|
Arrays.sort(myData, 0, myCount);
|
|
fbs.setData(myData, myCount);
|
|
|
|
// compare brute force & fast methods
|
|
boolean ok = true;
|
|
for (int i = -1; i < myData.length * 3 + 1; ++i) {
|
|
int brute = fbs.bruteForce(i);
|
|
int fast = fbs.highestIndexLEQ(i);
|
|
if (fast != brute) {
|
|
if (ok) {
|
|
System.out.println(fbs);
|
|
}
|
|
System.out.println("Error: probe=" + i + ", brute=" + brute + ", fast=" + fast);
|
|
fast = fbs.highestIndexLEQ(i); // do again with debugger
|
|
ok = false;
|
|
}
|
|
}
|
|
if (!ok) return;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Set the data to be scanned. It must be in sorted order.
|
|
*/
|
|
|
|
public void setData(int data[], int count) {
|
|
|
|
this.data = (int[]) data.clone(); // clone for safety
|
|
isValid = this.count == count; // isValid only depends on the count remaining the same
|
|
this.count = count;
|
|
}
|
|
|
|
/**
|
|
* Basic binary search
|
|
*/
|
|
|
|
private final int findCodePoint(int c) {
|
|
// Return the smallest i such that c < list[i]. Assume
|
|
// list[len - 1] == HIGH and that c is legal (0..HIGH-1).
|
|
if (c < data[0]) return 0;
|
|
int lo = 0;
|
|
int hi = count - 1;
|
|
// invariant: c >= list[lo]
|
|
// invariant: c < list[hi]
|
|
for (;;) {
|
|
int i = (lo + hi) >>> 1;
|
|
if (i == lo) return hi;
|
|
if (c < data[i]) {
|
|
hi = i;
|
|
} else {
|
|
lo = i;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* @return greatest index whose value is less than or equal to the searchValue.
|
|
* If there is no such index, then -1 is returned
|
|
*/
|
|
|
|
public int bruteForce(int searchValue) {
|
|
int i = count;
|
|
while (--i >= 0 && data[i] > searchValue) {}
|
|
return i;
|
|
}
|
|
|
|
/**
|
|
* @return greatest index such that data[index] <= searchValue
|
|
* If there is no such index (e.g. searchValue < data[0]), then -1 is returned
|
|
*/
|
|
|
|
public int highestIndexLEQ(int searchValue) {
|
|
|
|
if (!isValid) validate();
|
|
int temp;
|
|
|
|
// set up initial range to search. Each subrange is a power of two in length
|
|
int high = searchValue < data[topOfLow] ? topOfLow : topOfHigh;
|
|
|
|
// Completely unrolled binary search, folhighing "Programming Pearls"
|
|
// Each case deliberately falls through to the next
|
|
// Logically, data[-1] < all_search_values && data[count] > all_search_values
|
|
// although the values -1 and count are never actually touched.
|
|
|
|
// The bounds at each point are low & high,
|
|
// where low == high - delta*2
|
|
// so high - delta is the midpoint
|
|
|
|
// The invariant AFTER each line is that data[low] < searchValue <= data[high]
|
|
|
|
switch (power) {
|
|
//case 31: if (searchValue < data[temp = high-0x40000000]) high = temp; // no unsigned int in Java
|
|
case 30: if (searchValue < data[temp = high-0x20000000]) high = temp;
|
|
case 29: if (searchValue < data[temp = high-0x10000000]) high = temp;
|
|
|
|
case 28: if (searchValue < data[temp = high- 0x8000000]) high = temp;
|
|
case 27: if (searchValue < data[temp = high- 0x4000000]) high = temp;
|
|
case 26: if (searchValue < data[temp = high- 0x2000000]) high = temp;
|
|
case 25: if (searchValue < data[temp = high- 0x1000000]) high = temp;
|
|
|
|
case 24: if (searchValue < data[temp = high- 0x800000]) high = temp;
|
|
case 23: if (searchValue < data[temp = high- 0x400000]) high = temp;
|
|
case 22: if (searchValue < data[temp = high- 0x200000]) high = temp;
|
|
case 21: if (searchValue < data[temp = high- 0x100000]) high = temp;
|
|
|
|
case 20: if (searchValue < data[temp = high- 0x80000]) high = temp;
|
|
case 19: if (searchValue < data[temp = high- 0x40000]) high = temp;
|
|
case 18: if (searchValue < data[temp = high- 0x20000]) high = temp;
|
|
case 17: if (searchValue < data[temp = high- 0x10000]) high = temp;
|
|
|
|
case 16: if (searchValue < data[temp = high- 0x8000]) high = temp;
|
|
case 15: if (searchValue < data[temp = high- 0x4000]) high = temp;
|
|
case 14: if (searchValue < data[temp = high- 0x2000]) high = temp;
|
|
case 13: if (searchValue < data[temp = high- 0x1000]) high = temp;
|
|
|
|
case 12: if (searchValue < data[temp = high- 0x800]) high = temp;
|
|
case 11: if (searchValue < data[temp = high- 0x400]) high = temp;
|
|
case 10: if (searchValue < data[temp = high- 0x200]) high = temp;
|
|
case 9: if (searchValue < data[temp = high- 0x100]) high = temp;
|
|
|
|
case 8: if (searchValue < data[temp = high- 0x80]) high = temp;
|
|
case 7: if (searchValue < data[temp = high- 0x40]) high = temp;
|
|
case 6: if (searchValue < data[temp = high- 0x20]) high = temp;
|
|
case 5: if (searchValue < data[temp = high- 0x10]) high = temp;
|
|
|
|
case 4: if (searchValue < data[temp = high- 0x8]) high = temp;
|
|
case 3: if (searchValue < data[temp = high- 0x4]) high = temp;
|
|
case 2: if (searchValue < data[temp = high- 0x2]) high = temp;
|
|
case 1: if (searchValue < data[temp = high- 0x1]) high = temp;
|
|
}
|
|
if (high == topOfHigh && searchValue >= data[high]) return high;
|
|
return high-1;
|
|
}
|
|
|
|
|
|
// NOTE: on some machines the above may not be optimal, if the size of the function
|
|
// forces code out of the cache. For that case, it would be better for program in a loop, like the following
|
|
|
|
public int highestIndexLEQ2(int searchValue) {
|
|
|
|
if (!isValid) validate();
|
|
int temp;
|
|
int high = searchValue < data[topOfLow] ? topOfLow : topOfHigh;
|
|
for (int delta = deltaStart; delta != 0; delta >>= 1) {
|
|
if (searchValue < data[temp = high-delta]) high = temp;
|
|
}
|
|
if (high == topOfHigh && searchValue >= data[high]) return high;
|
|
return high-1;
|
|
}
|
|
|
|
/**
|
|
* For debugging
|
|
*/
|
|
public String toString() {
|
|
String result = "[";
|
|
for (int j = 0; j < count; ++j) {
|
|
if (j != 0) result += ", ";
|
|
result += data[j];
|
|
}
|
|
result += "]";
|
|
result += ", power: " + power;
|
|
result += ", topOfLow: " + topOfLow;
|
|
result += ", topOfHigh: " + topOfHigh;
|
|
return result;
|
|
}
|
|
|
|
|
|
// ================ Privates ================
|
|
|
|
// data
|
|
|
|
int data[];
|
|
int count;
|
|
|
|
// validate internal parameters
|
|
|
|
private void validate() {
|
|
if (count <= 1) throw new IllegalArgumentException("Array must have at least 2 elements");
|
|
|
|
// find greatest power of 2 less than or equal to count
|
|
for (power = exp2.length-1; power > 0 && exp2[power] > count; power--) {}
|
|
|
|
// determine the starting points
|
|
topOfLow = exp2[power] - 1;
|
|
topOfHigh = count - 1;
|
|
deltaStart = exp2[power-1];
|
|
isValid = true;
|
|
}
|
|
|
|
private boolean isValid = false;
|
|
private int topOfLow;
|
|
private int topOfHigh;
|
|
private int power;
|
|
private int deltaStart;
|
|
|
|
private static final int exp2[] = {
|
|
0x1, 0x2, 0x4, 0x8,
|
|
0x10, 0x20, 0x40, 0x80,
|
|
0x100, 0x200, 0x400, 0x800,
|
|
0x1000, 0x2000, 0x4000, 0x8000,
|
|
0x10000, 0x20000, 0x40000, 0x80000,
|
|
0x100000, 0x200000, 0x400000, 0x800000,
|
|
0x1000000, 0x2000000, 0x4000000, 0x8000000,
|
|
0x10000000, 0x20000000 // , 0x40000000 // no unsigned int in Java
|
|
};
|
|
} |