ICU-4392 slightly better trie compaction by finding overlaps between blocks with multiple values in the overlap region

X-SVN-Rev: 17258
This commit is contained in:
Markus Scherer 2005-02-25 05:26:04 +00:00
parent 19380bf95b
commit a715e415bd
2 changed files with 33 additions and 43 deletions

View File

@ -1,6 +1,6 @@
/*
******************************************************************************
* Copyright (C) 1996-2004, International Business Machines Corporation and *
* Copyright (C) 1996-2005, International Business Machines Corporation and *
* others. All Rights Reserved. *
******************************************************************************
*/
@ -404,11 +404,10 @@ public class IntTrieBuilder extends TrieBuilder
* - removes blocks that are identical with earlier ones
* - overlaps adjacent blocks as much as possible (if overlap == true)
* - moves blocks in steps of the data granularity
* - moves and overlaps blocks that overlap with multiple values in the overlap region
*
* It does not
* - try to move and overlap blocks that are not already adjacent
* - try to move and overlap blocks that overlap with multiple values in
* the overlap region
* @param overlap flag
*/
private void compact(boolean overlap)
@ -429,21 +428,21 @@ public class IntTrieBuilder extends TrieBuilder
}
int newStart = DATA_BLOCK_LENGTH;
int prevEnd = newStart - 1;
int i;
for (int start = newStart; start < m_dataLength_;) {
// start: index of first entry of current block
// prevEnd: index to last entry of previous block
// newStart: index where the current block is to be moved
// (right after current end of already-compacted data)
// skip blocks that are not used
if (m_map_[start >>> SHIFT_] < 0) {
// advance start to the next block
start += DATA_BLOCK_LENGTH;
// leave prevEnd and newStart with the previous block!
// leave newStart with the previous block!
continue;
}
// search for an identical block
if (start >= overlapStart) {
int i = findSameDataBlock(m_data_, newStart, start,
i = findSameDataBlock(m_data_, newStart, start,
overlap ? DATA_GRANULARITY_ : DATA_BLOCK_LENGTH);
if (i >= 0) {
// found an identical block, set the other block's index
@ -451,27 +450,20 @@ public class IntTrieBuilder extends TrieBuilder
m_map_[start >>> SHIFT_] = i;
// advance start to the next block
start += DATA_BLOCK_LENGTH;
// leave prevEnd and newStart with the previous block!
// leave newStart with the previous block!
continue;
}
}
// see if the beginning of this block can be overlapped with the
// end of the previous block
// x: first value in the current block
int x = m_data_[start];
int i = 0;
if (x == m_data_[prevEnd] && overlap && start >= overlapStart)
{
// overlap by at least one
for (i = 1; i < DATA_BLOCK_LENGTH
&& x == m_data_[start + i]
&& x == m_data_[prevEnd - i]; ++ i)
{
}
// overlap by i, rounded down for the data block granularity
i &= ~(DATA_GRANULARITY_ - 1);
}
if(overlap && start>=overlapStart) {
/* look for maximum overlap (modulo granularity) with the previous, adjacent block */
for(i=DATA_BLOCK_LENGTH-DATA_GRANULARITY_;
i>0 && !equal_int(m_data_, newStart-i, start, i);
i-=DATA_GRANULARITY_) {}
} else {
i=0;
}
if (i > 0) {
// some overlap
m_map_[start >>> SHIFT_] = newStart - i;
@ -493,11 +485,9 @@ public class IntTrieBuilder extends TrieBuilder
newStart += DATA_BLOCK_LENGTH;
start = newStart;
}
prevEnd = newStart - 1;
}
// now adjust the index (stage 1) table
for (int i = 0; i < m_indexLength_; ++ i) {
for (i = 0; i < m_indexLength_; ++ i) {
m_index_[i] = m_map_[Math.abs(m_index_[i]) >>> SHIFT_];
}
m_dataLength_ = newStart;
@ -517,13 +507,7 @@ public class IntTrieBuilder extends TrieBuilder
dataLength -= DATA_BLOCK_LENGTH;
for (int block = 0; block <= dataLength; block += step) {
int i = 0;
for (i = 0; i < DATA_BLOCK_LENGTH; ++ i) {
if (data[block + i] != data[otherBlock + i]) {
break;
}
}
if (i == DATA_BLOCK_LENGTH) {
if(equal_int(data, block, otherBlock, DATA_BLOCK_LENGTH)) {
return block;
}
}

View File

@ -1,6 +1,6 @@
/*
******************************************************************************
* Copyright (C) 1996-2004, International Business Machines Corporation and *
* Copyright (C) 1996-2005, International Business Machines Corporation and *
* others. All Rights Reserved. *
******************************************************************************
*/
@ -193,8 +193,20 @@ public class TrieBuilder
m_isCompacted_ = table.m_isCompacted_;
}
// protected data member ----------------------------------------------
// protected functions ------------------------------------------------
/**
* Compare two sections of an array for equality.
*/
protected static final boolean equal_int(int[] array, int start1, int start2, int length) {
while(length>0 && array[start1]==array[start2]) {
++start1;
++start2;
--length;
}
return length==0;
}
/**
* Set a value in the trie index map to indicate which data block
* is referenced and which one is not.
@ -229,13 +241,7 @@ public class TrieBuilder
{
for (int block = BMP_INDEX_LENGTH_; block < indexLength;
block += SURROGATE_BLOCK_COUNT_) {
int i = 0;
for (; i < SURROGATE_BLOCK_COUNT_; ++ i) {
if (index[block + i] != index[otherBlock + i]) {
break;
}
}
if (i == SURROGATE_BLOCK_COUNT_) {
if(equal_int(index, block, otherBlock, SURROGATE_BLOCK_COUNT_)) {
return block;
}
}