Added new file CharTrie
X-SVN-Rev: 4362
This commit is contained in:
parent
c23796f9c9
commit
0b8f770650
@ -5,8 +5,8 @@
|
||||
******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/impl/Attic/ByteTrie.java,v $
|
||||
* $Date: 2001/03/08 03:04:02 $
|
||||
* $Revision: 1.1 $
|
||||
* $Date: 2001/03/28 00:01:52 $
|
||||
* $Revision: 1.2 $
|
||||
*
|
||||
******************************************************************************
|
||||
*/
|
||||
@ -43,7 +43,7 @@ import com.ibm.text.UCharacter;
|
||||
* int thirdindex = index2[secondindex] + ch & LAST_FOUR_BITS_MASK;<br>
|
||||
* f(ch) = value[thirdindex];<br>
|
||||
* </p>
|
||||
* @version $Revision: 1.1 $
|
||||
* @version $Revision: 1.2 $
|
||||
* @author Syn Wee Quek
|
||||
*/
|
||||
public final class ByteTrie
|
||||
@ -80,7 +80,7 @@ public final class ByteTrie
|
||||
* @param index to be manipulated into corresponding trie index
|
||||
* @return trie value at index
|
||||
*/
|
||||
public int getValue(int index)
|
||||
public byte getValue(int index)
|
||||
{
|
||||
// index of the first access to the database
|
||||
int index1 = index >> STAGE_1_SHIFT_;
|
||||
@ -175,4 +175,58 @@ public final class ByteTrie
|
||||
m_stage2_ = new int[size];
|
||||
System.arraycopy(stage2, 0, m_stage2_, 0, size);
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts trie to a readable format
|
||||
* @return string version of the trie
|
||||
*/
|
||||
public String toString()
|
||||
{
|
||||
int size = m_stage1_.length;
|
||||
int count = 0;
|
||||
StringBuffer result = new StringBuffer("int m_stage1_[] = {\n");
|
||||
for (int i = 0; i < size; i ++) {
|
||||
result.append("0x" + Integer.toHexString(m_stage1_[i]));
|
||||
if (i != size - 1) {
|
||||
result.append(", ");
|
||||
}
|
||||
count ++;
|
||||
if (count == 10) {
|
||||
count = 0;
|
||||
result.append("\n");
|
||||
}
|
||||
}
|
||||
result.append("\n}\n\n");
|
||||
size = m_stage2_.length;
|
||||
result.append("int m_stage2_[] = {\n");
|
||||
count = 0;
|
||||
for (int i = 0; i < size; i ++) {
|
||||
result.append("0x" + Integer.toHexString(m_stage2_[i]));
|
||||
if (i != size - 1) {
|
||||
result.append(", ");
|
||||
}
|
||||
count ++;
|
||||
if (count == 10) {
|
||||
count = 0;
|
||||
result.append("\n");
|
||||
}
|
||||
}
|
||||
result.append("\n}\n\n");
|
||||
size = m_stage3_.length;
|
||||
result.append("byte m_stage3_[] = {\n");
|
||||
count = 0;
|
||||
for (int i = 0; i < size; i ++) {
|
||||
result.append("0x" + Integer.toHexString((char)m_stage3_[i]));
|
||||
if (i != size - 1) {
|
||||
result.append(", ");
|
||||
}
|
||||
count ++;
|
||||
if (count == 10) {
|
||||
count = 0;
|
||||
result.append("\n");
|
||||
}
|
||||
}
|
||||
result.append("\n}");
|
||||
return result.toString();
|
||||
}
|
||||
}
|
@ -5,8 +5,8 @@
|
||||
******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/impl/TrieBuilder.java,v $
|
||||
* $Date: 2001/03/08 03:04:02 $
|
||||
* $Revision: 1.1 $
|
||||
* $Date: 2001/03/28 00:01:51 $
|
||||
* $Revision: 1.2 $
|
||||
*
|
||||
******************************************************************************
|
||||
*/
|
||||
@ -41,7 +41,7 @@ package com.ibm.util;
|
||||
* int thirdindex = index2[secondindex] + ch & LAST_SET_OF_BITS_MASK;<br>
|
||||
* f(ch) = value[thirdindex];<br>
|
||||
* </p>
|
||||
* @version $Revision: 1.1 $
|
||||
* @version $Revision: 1.2 $
|
||||
* @author Syn Wee Quek
|
||||
*/
|
||||
final class TrieBuilder
|
||||
@ -106,6 +106,64 @@ final class TrieBuilder
|
||||
return valuesize;
|
||||
}
|
||||
|
||||
/**
|
||||
* Takes argument array and forms a compact array into the result arrays.
|
||||
* The result will be
|
||||
* <code>
|
||||
* array[index] == valuearray[indexarray[index]]
|
||||
* </code>.
|
||||
* Note : This method is generic, it only takes values from the array.
|
||||
* @param array value array to be manipulated
|
||||
* @param start index of the array to process
|
||||
* @param length of array to process.
|
||||
* @param blocksize size of each blocks existing in valuearray
|
||||
* @param indexarray result index array with length = array.length, with
|
||||
* values which indexes to valuearray.
|
||||
* @param valuearray result value array compact value array
|
||||
* @return size of valuearray
|
||||
*/
|
||||
static int build(char array[], int start, int length, int blocksize,
|
||||
int indexarray[], char valuearray[])
|
||||
{
|
||||
int valuesize = 0;
|
||||
int valueindex;
|
||||
int blockcount = 0;
|
||||
int index = 0;
|
||||
int min;
|
||||
|
||||
while (start < length) {
|
||||
// for a block of blocksize in the array
|
||||
// we try to find a similar block in valuearray
|
||||
for (valueindex = 0; valueindex < valuesize; valueindex ++) {
|
||||
// testing each block of blocksize at index valueindex in valuearray
|
||||
// if it is == to array blocks
|
||||
min = Math.min(blocksize, valuesize - valueindex);
|
||||
for (blockcount = 0; blockcount < min;blockcount ++) {
|
||||
if (array[start + blockcount] !=
|
||||
valuearray[valueindex + blockcount]) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (blockcount == blocksize || valueindex + blockcount == valuesize) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// if no similar block is found in value array
|
||||
// we populate the result arrays with data
|
||||
for (min = Math.min(blocksize, length - start); blockcount < min;
|
||||
blockcount ++) {
|
||||
valuearray[valuesize ++] = array[start + blockcount];
|
||||
}
|
||||
|
||||
indexarray[index ++] = valueindex;
|
||||
start += blocksize;
|
||||
}
|
||||
|
||||
return valuesize;
|
||||
}
|
||||
|
||||
/**
|
||||
* Takes argument array and forms a compact array into the result arrays.
|
||||
* The result will be
|
||||
|
@ -5,8 +5,8 @@
|
||||
******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/util/Attic/ByteTrie.java,v $
|
||||
* $Date: 2001/03/08 03:04:02 $
|
||||
* $Revision: 1.1 $
|
||||
* $Date: 2001/03/28 00:01:52 $
|
||||
* $Revision: 1.2 $
|
||||
*
|
||||
******************************************************************************
|
||||
*/
|
||||
@ -43,7 +43,7 @@ import com.ibm.text.UCharacter;
|
||||
* int thirdindex = index2[secondindex] + ch & LAST_FOUR_BITS_MASK;<br>
|
||||
* f(ch) = value[thirdindex];<br>
|
||||
* </p>
|
||||
* @version $Revision: 1.1 $
|
||||
* @version $Revision: 1.2 $
|
||||
* @author Syn Wee Quek
|
||||
*/
|
||||
public final class ByteTrie
|
||||
@ -80,7 +80,7 @@ public final class ByteTrie
|
||||
* @param index to be manipulated into corresponding trie index
|
||||
* @return trie value at index
|
||||
*/
|
||||
public int getValue(int index)
|
||||
public byte getValue(int index)
|
||||
{
|
||||
// index of the first access to the database
|
||||
int index1 = index >> STAGE_1_SHIFT_;
|
||||
@ -175,4 +175,58 @@ public final class ByteTrie
|
||||
m_stage2_ = new int[size];
|
||||
System.arraycopy(stage2, 0, m_stage2_, 0, size);
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts trie to a readable format
|
||||
* @return string version of the trie
|
||||
*/
|
||||
public String toString()
|
||||
{
|
||||
int size = m_stage1_.length;
|
||||
int count = 0;
|
||||
StringBuffer result = new StringBuffer("int m_stage1_[] = {\n");
|
||||
for (int i = 0; i < size; i ++) {
|
||||
result.append("0x" + Integer.toHexString(m_stage1_[i]));
|
||||
if (i != size - 1) {
|
||||
result.append(", ");
|
||||
}
|
||||
count ++;
|
||||
if (count == 10) {
|
||||
count = 0;
|
||||
result.append("\n");
|
||||
}
|
||||
}
|
||||
result.append("\n}\n\n");
|
||||
size = m_stage2_.length;
|
||||
result.append("int m_stage2_[] = {\n");
|
||||
count = 0;
|
||||
for (int i = 0; i < size; i ++) {
|
||||
result.append("0x" + Integer.toHexString(m_stage2_[i]));
|
||||
if (i != size - 1) {
|
||||
result.append(", ");
|
||||
}
|
||||
count ++;
|
||||
if (count == 10) {
|
||||
count = 0;
|
||||
result.append("\n");
|
||||
}
|
||||
}
|
||||
result.append("\n}\n\n");
|
||||
size = m_stage3_.length;
|
||||
result.append("byte m_stage3_[] = {\n");
|
||||
count = 0;
|
||||
for (int i = 0; i < size; i ++) {
|
||||
result.append("0x" + Integer.toHexString((char)m_stage3_[i]));
|
||||
if (i != size - 1) {
|
||||
result.append(", ");
|
||||
}
|
||||
count ++;
|
||||
if (count == 10) {
|
||||
count = 0;
|
||||
result.append("\n");
|
||||
}
|
||||
}
|
||||
result.append("\n}");
|
||||
return result.toString();
|
||||
}
|
||||
}
|
232
icu4j/src/com/ibm/util/CharTrie.java
Executable file
232
icu4j/src/com/ibm/util/CharTrie.java
Executable file
@ -0,0 +1,232 @@
|
||||
/*
|
||||
******************************************************************************
|
||||
* Copyright (C) 1996-2001, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/util/Attic/CharTrie.java,v $
|
||||
* $Date: 2001/03/28 00:01:52 $
|
||||
* $Revision: 1.1 $
|
||||
*
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
package com.ibm.util;
|
||||
|
||||
import com.ibm.text.UCharacter;
|
||||
|
||||
/**
|
||||
* Class to manipulate and generate a trie.
|
||||
* This is useful for ICU data in primitive types.
|
||||
* Provides a compact way to store information that is indexed by Unicode
|
||||
* values, such as character properties, types, keyboard values, etc. This is
|
||||
* very useful when you have a block of Unicode data that contains significant
|
||||
* values while the rest of the Unicode data is unused in the application or
|
||||
* when you have a lot of redundance, such as where all 21,000 Han ideographs
|
||||
* have the same value. However, lookup is much faster than a hash table.
|
||||
* A trie of any primitive data type serves two purposes:
|
||||
* <UL type = round>
|
||||
* <LI>Fast access of the indexed values.
|
||||
* <LI>Smaller memory footprint.
|
||||
* </UL>
|
||||
* A trie is composed of 2 index array and value array. Combining the 2 index
|
||||
* array, we could get the indicies of Unicode characters to the value array.
|
||||
* The first index array will contain indexes corresponding to the first 11
|
||||
* bits of a 21 bit codepoint, the second index array will contain indexes
|
||||
* corresponding to the next 6 bits of the code point. The last array will
|
||||
* contain the values. Hence to access the value of a codepoint, we can use the
|
||||
* following program
|
||||
* <p>
|
||||
* int firstindex = ch >> FIRST_11_BITS_SHIFT;<br>
|
||||
* int secondindex = index1[firstindex] +
|
||||
* (ch >> NEXT_6_BITS_SHIFT) & NEXT_6_BITS_MASK;<br>
|
||||
* int thirdindex = index2[secondindex] + ch & LAST_FOUR_BITS_MASK;<br>
|
||||
* f(ch) = value[thirdindex];<br>
|
||||
* </p>
|
||||
* @version $Revision: 1.1 $
|
||||
* @author Syn Wee Quek
|
||||
*/
|
||||
public final class CharTrie
|
||||
{
|
||||
// constructors -----------------------------------------------------
|
||||
|
||||
/**
|
||||
* constructor
|
||||
* @param array of data to be populated into trie
|
||||
*/
|
||||
public CharTrie(char array[])
|
||||
{
|
||||
build(array);
|
||||
}
|
||||
|
||||
/**
|
||||
* constructor that assigns trie the argument values. Arrays are not
|
||||
* duplicated.
|
||||
* @param stage1 array of the first set of indexes
|
||||
* @param stage2 array of the second set of indexes
|
||||
* @param stage3 array of data
|
||||
*/
|
||||
public CharTrie(int stage1[], int stage2[], char stage3[])
|
||||
{
|
||||
m_stage1_ = stage1;
|
||||
m_stage2_ = stage2;
|
||||
m_stage3_ = stage3;
|
||||
}
|
||||
|
||||
// public methods ----------------------------------------------------
|
||||
|
||||
/**
|
||||
* Getting the trie data corresponding to the argument index.
|
||||
* @param index to be manipulated into corresponding trie index
|
||||
* @return trie value at index
|
||||
*/
|
||||
public char getValue(int index)
|
||||
{
|
||||
// index of the first access to the database
|
||||
int index1 = index >> STAGE_1_SHIFT_;
|
||||
// index of the second access to the database
|
||||
int index2 = m_stage1_[index1] +
|
||||
((index >> STAGE_2_SHIFT_) & STAGE_2_MASK_AFTER_SHIFT_);
|
||||
// index of the third access to the database
|
||||
int index3 = m_stage2_[index2] + (index & STAGE_3_MASK_);
|
||||
// retrieves value
|
||||
return m_stage3_[index3];
|
||||
}
|
||||
|
||||
// private data members ------------------------------------------------
|
||||
|
||||
/**
|
||||
* Stage 1 index array
|
||||
*/
|
||||
private int m_stage1_[];
|
||||
|
||||
/**
|
||||
* Stage 2 index array
|
||||
*/
|
||||
private int m_stage2_[];
|
||||
|
||||
/**
|
||||
* Stage 3 value array
|
||||
*/
|
||||
private char m_stage3_[];
|
||||
|
||||
/**
|
||||
* Stage 1 shift
|
||||
*/
|
||||
private static final int STAGE_1_SHIFT_ = 10;
|
||||
|
||||
/**
|
||||
* Stage 2 shift
|
||||
*/
|
||||
private static final int STAGE_2_SHIFT_ = 4;
|
||||
|
||||
/**
|
||||
* Stage 2 mask
|
||||
*/
|
||||
private static final int STAGE_2_MASK_AFTER_SHIFT_ = 0x3F;
|
||||
|
||||
/**
|
||||
* Stage 3 mask
|
||||
*/
|
||||
private static final int STAGE_3_MASK_ = 0xF;
|
||||
|
||||
/**
|
||||
* Number of numbers possible from a 4 bit type
|
||||
*/
|
||||
private static final int COUNT_4_BIT_ = 0x10;
|
||||
|
||||
/**
|
||||
* Number of numbers possible from a 6 bit type
|
||||
*/
|
||||
private static final int COUNT_6_BIT_ = 0x40;
|
||||
|
||||
/**
|
||||
* Number of numbers possible from the first 17 bits of a codepoint
|
||||
*/
|
||||
private static final int COUNT_CODEPOINT_FIRST_17_BIT_ = 0x110000 >> 4;
|
||||
|
||||
/**
|
||||
* Number of numbers possible from the first 11 bits of a codepoint
|
||||
*/
|
||||
private static final int COUNT_CODEPOINT_FIRST_11_BIT_ = 0x110000 >> 10;
|
||||
|
||||
// private methods -----------------------------------------------------
|
||||
|
||||
/**
|
||||
* Building the trie from a argument array.
|
||||
* Each unicode character will be used to generate data.
|
||||
* @param output file path
|
||||
*/
|
||||
private void build(char array[])
|
||||
{
|
||||
int ch = 0;
|
||||
|
||||
int stage2[] = new int[COUNT_CODEPOINT_FIRST_17_BIT_];
|
||||
char stage3[] = new char[UCharacter.MAX_VALUE >> 1];
|
||||
int size = TrieBuilder.build(array, 0, array.length, COUNT_4_BIT_, stage2,
|
||||
stage3);
|
||||
|
||||
m_stage3_ = new char[size];
|
||||
System.arraycopy(stage3, 0, m_stage3_, 0, size);
|
||||
|
||||
m_stage1_ = new int[COUNT_CODEPOINT_FIRST_11_BIT_];
|
||||
size = TrieBuilder.build(stage2, 0, stage2.length, COUNT_6_BIT_, m_stage1_,
|
||||
stage2);
|
||||
m_stage2_ = new int[size];
|
||||
System.arraycopy(stage2, 0, m_stage2_, 0, size);
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts trie to a readable format
|
||||
* @return string version of the trie
|
||||
*/
|
||||
public String toString()
|
||||
{
|
||||
int size = m_stage1_.length;
|
||||
int count = 0;
|
||||
StringBuffer result = new StringBuffer("int m_stage1_[] = {\n");
|
||||
for (int i = 0; i < size; i ++) {
|
||||
result.append("0x" + Integer.toHexString(m_stage1_[i]));
|
||||
if (i != size - 1) {
|
||||
result.append(", ");
|
||||
}
|
||||
count ++;
|
||||
if (count == 10) {
|
||||
count = 0;
|
||||
result.append("\n");
|
||||
}
|
||||
}
|
||||
result.append("\n}\n\n");
|
||||
size = m_stage2_.length;
|
||||
result.append("int m_stage2_[] = {\n");
|
||||
count = 0;
|
||||
for (int i = 0; i < size; i ++) {
|
||||
result.append("0x" + Integer.toHexString(m_stage2_[i]));
|
||||
if (i != size - 1) {
|
||||
result.append(", ");
|
||||
}
|
||||
count ++;
|
||||
if (count == 10) {
|
||||
count = 0;
|
||||
result.append("\n");
|
||||
}
|
||||
}
|
||||
result.append("\n}\n\n");
|
||||
size = m_stage3_.length;
|
||||
result.append("char m_stage3_[] = {\n");
|
||||
count = 0;
|
||||
for (int i = 0; i < size; i ++) {
|
||||
result.append("0x" + Integer.toHexString(m_stage3_[i]));
|
||||
if (i != size - 1) {
|
||||
result.append(", ");
|
||||
}
|
||||
count ++;
|
||||
if (count == 10) {
|
||||
count = 0;
|
||||
result.append("\n");
|
||||
}
|
||||
}
|
||||
result.append("\n}");
|
||||
return result.toString();
|
||||
}
|
||||
}
|
@ -5,8 +5,8 @@
|
||||
******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/util/Attic/TrieBuilder.java,v $
|
||||
* $Date: 2001/03/08 03:04:02 $
|
||||
* $Revision: 1.1 $
|
||||
* $Date: 2001/03/28 00:01:51 $
|
||||
* $Revision: 1.2 $
|
||||
*
|
||||
******************************************************************************
|
||||
*/
|
||||
@ -41,7 +41,7 @@ package com.ibm.util;
|
||||
* int thirdindex = index2[secondindex] + ch & LAST_SET_OF_BITS_MASK;<br>
|
||||
* f(ch) = value[thirdindex];<br>
|
||||
* </p>
|
||||
* @version $Revision: 1.1 $
|
||||
* @version $Revision: 1.2 $
|
||||
* @author Syn Wee Quek
|
||||
*/
|
||||
final class TrieBuilder
|
||||
@ -106,6 +106,64 @@ final class TrieBuilder
|
||||
return valuesize;
|
||||
}
|
||||
|
||||
/**
|
||||
* Takes argument array and forms a compact array into the result arrays.
|
||||
* The result will be
|
||||
* <code>
|
||||
* array[index] == valuearray[indexarray[index]]
|
||||
* </code>.
|
||||
* Note : This method is generic, it only takes values from the array.
|
||||
* @param array value array to be manipulated
|
||||
* @param start index of the array to process
|
||||
* @param length of array to process.
|
||||
* @param blocksize size of each blocks existing in valuearray
|
||||
* @param indexarray result index array with length = array.length, with
|
||||
* values which indexes to valuearray.
|
||||
* @param valuearray result value array compact value array
|
||||
* @return size of valuearray
|
||||
*/
|
||||
static int build(char array[], int start, int length, int blocksize,
|
||||
int indexarray[], char valuearray[])
|
||||
{
|
||||
int valuesize = 0;
|
||||
int valueindex;
|
||||
int blockcount = 0;
|
||||
int index = 0;
|
||||
int min;
|
||||
|
||||
while (start < length) {
|
||||
// for a block of blocksize in the array
|
||||
// we try to find a similar block in valuearray
|
||||
for (valueindex = 0; valueindex < valuesize; valueindex ++) {
|
||||
// testing each block of blocksize at index valueindex in valuearray
|
||||
// if it is == to array blocks
|
||||
min = Math.min(blocksize, valuesize - valueindex);
|
||||
for (blockcount = 0; blockcount < min;blockcount ++) {
|
||||
if (array[start + blockcount] !=
|
||||
valuearray[valueindex + blockcount]) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (blockcount == blocksize || valueindex + blockcount == valuesize) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// if no similar block is found in value array
|
||||
// we populate the result arrays with data
|
||||
for (min = Math.min(blocksize, length - start); blockcount < min;
|
||||
blockcount ++) {
|
||||
valuearray[valuesize ++] = array[start + blockcount];
|
||||
}
|
||||
|
||||
indexarray[index ++] = valueindex;
|
||||
start += blocksize;
|
||||
}
|
||||
|
||||
return valuesize;
|
||||
}
|
||||
|
||||
/**
|
||||
* Takes argument array and forms a compact array into the result arrays.
|
||||
* The result will be
|
||||
|
Loading…
Reference in New Issue
Block a user