scuffed-code/tools/unicodetools/com/ibm/text/utility/CompactShortArray.java

381 lines
13 KiB
Java
Raw Normal View History

2001-08-31 00:20:40 +00:00
/**
*******************************************************************************
* Copyright (C) 1996-2001, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/CompactShortArray.java,v $
* $Date: 2001/08/31 00:19:16 $
* $Revision: 1.2 $
*
*******************************************************************************
*/
2001-08-30 20:50:18 +00:00
package com.ibm.text.utility;
/*
* %W% %E%
*
* (C) Copyright Taligent, Inc. 1996 - All Rights Reserved
* (C) Copyright IBM Corp. 1996 - All Rights Reserved
*
* Portions copyright (c) 1996 Sun Microsystems, Inc. All Rights Reserved.
*
* The original version of this source code and documentation is copyrighted
* and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These
* materials are provided under terms of a License Agreement between Taligent
* and Sun. This technology is protected by multiple US and International
* patents. This notice and attribution to Taligent may not be removed.
* Taligent is a registered trademark of Taligent, Inc.
*
* Permission to use, copy, modify, and distribute this software
* and its documentation for NON-COMMERCIAL purposes and without
* fee is hereby granted provided that this copyright notice
* appears in all copies. Please refer to the file "copyright.html"
* for further important copyright and licensing information.
*
* SUN MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF
* THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
* TO THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
* PARTICULAR PURPOSE, OR NON-INFRINGEMENT. SUN SHALL NOT BE LIABLE FOR
* ANY DAMAGES SUFFERED BY LICENSEE AS A RESULT OF USING, MODIFYING OR
* DISTRIBUTING THIS SOFTWARE OR ITS DERIVATIVES.
*
*/
import java.io.*;
import java.lang.*;
/**
* class CompactATypeArray : use only on primitive data types
* Provides a compact way to store information that is indexed by Unicode
* values, such as character properties, types, keyboard values, etc.This
* is very useful when you have a block of Unicode data that contains
* significant values while the rest of the Unicode data is unused in the
* application or when you have a lot of redundance, such as where all 21,000
* Han ideographs have the same value. However, lookup is much faster than a
* hash table.
* A compact array of any primitive data type serves two purposes:
* <UL type = round>
* <LI>Fast access of the indexed values.
* <LI>Smaller memory footprint.
* </UL>
* A compact array is composed of a index array and value array. The index
* array contains the indicies of Unicode characters to the value array.
* @see CompactByteArray
* @see CompactIntArray
* @see CompactCharArray
* @see CompactStringArray
* @version %I% %G%
* @author Helena Shih
*/
public final class CompactShortArray implements Serializable {
/**
* The total number of Unicode characters.
*/
public static final int UNICODECOUNT =65536;
/**
* Default constructor for CompactShortArray, the default value of the
* compact array is 0.
*/
public CompactShortArray()
{
this((short)0);
}
/**
* Constructor for CompactShortArray.
* @param defaultValue the default value of the compact array.
*/
public CompactShortArray(short defaultValue)
{
int i;
values = new short[UNICODECOUNT];
indices = new short[INDEXCOUNT];
for (i = 0; i < UNICODECOUNT; ++i) {
values[i] = defaultValue;
}
for (i = 0; i < INDEXCOUNT; ++i) {
indices[i] = (short)(i<<BLOCKSHIFT);
}
isCompact = false;
}
/**
* Constructor for CompactShortArray.
* @param indexArray the indicies of the compact array.
* @param newValues the values of the compact array.
* @exception IllegalArgumentException If the index is out of range.
*/
public CompactShortArray(short indexArray[],
short newValues[]) throws IllegalArgumentException
{
int i;
if (indexArray.length != INDEXCOUNT)
throw new IllegalArgumentException("Index out of bounds.");
for (i = 0; i < INDEXCOUNT; ++i) {
short index = indexArray[i];
if ((index < 0) || (index >= newValues.length+BLOCKCOUNT))
throw new IllegalArgumentException("Index out of bounds.");
}
indices = indexArray;
values = newValues;
}
/**
* Get the mapped value of a Unicode character.
* @param index the character to get the mapped value with
* @return the mapped value of the given character
*/
public short elementAt(char index) // parameterized on short
{
return (values[(indices[index >> BLOCKSHIFT] & 0xFFFF)
+ (index & BLOCKMASK)]);
}
/**
* Set a new value for a Unicode character.
* Set automatically expands the array if it is compacted.
* @param index the character to set the mapped value with
* @param value the new mapped value
*/
public void setElementAt(char index, short value)
{
if (isCompact)
expand();
values[(int)index] = value;
}
/**
* Set new values for a range of Unicode character.
* @param start the starting offset of the range
* @param end the ending offset of the range
* @param value the new mapped value
*/
public void setElementAt(char start, char end, short value)
{
int i;
if (isCompact) {
expand();
}
for (i = start; i <= end; ++i) {
values[i] = value;
}
}
/**
*Compact the array.
*/
public void compact()
{
if (isCompact == false) {
char[] tempIndex;
int tempIndexCount;
short[] tempArray;
short iBlock, iIndex;
// make temp storage, larger than we need
tempIndex = new char[UNICODECOUNT];
// set up first block.
tempIndexCount = BLOCKCOUNT;
for (iIndex = 0; iIndex < BLOCKCOUNT; ++iIndex) {
tempIndex[iIndex] = (char)iIndex;
}; // endfor (iIndex = 0; .....)
indices[0] = (short)0;
// for each successive block, find out its first position
// in the compacted array
for (iBlock = 1; iBlock < INDEXCOUNT; ++iBlock) {
int newCount, firstPosition, block;
block = iBlock<<BLOCKSHIFT;
if (DEBUGSMALL) if (block > DEBUGSMALLLIMIT) break;
firstPosition = FindOverlappingPosition(block, tempIndex,
tempIndexCount);
newCount = firstPosition + BLOCKCOUNT;
if (newCount > tempIndexCount) {
for (iIndex = (short)tempIndexCount;
iIndex < newCount;
++iIndex) {
tempIndex[iIndex]
= (char)(iIndex - firstPosition + block);
} // endfor (iIndex = tempIndexCount....)
tempIndexCount = newCount;
} // endif (newCount > tempIndexCount)
indices[iBlock] = (short)firstPosition;
} // endfor (iBlock = 1.....)
// now allocate and copy the items into the array
tempArray = new short[tempIndexCount];
for (iIndex = 0; iIndex < tempIndexCount; ++iIndex) {
tempArray[iIndex] = values[tempIndex[iIndex]];
}
values = null;
values = tempArray;
isCompact = true;
} // endif (isCompact != false)
}
/** For internal use only. Do not modify the result, the behavior of
* modified results are undefined.
*/
public short getIndexArray()[]
{
return indices;
}
/** For internal use only. Do not modify the result, the behavior of
* modified results are undefined.
*/
public short getStringArray()[]
{
return values;
}
// --------------------------------------------------------------
// package private
// --------------------------------------------------------------
void writeArrays()
{
int i;
int cnt = ((values.length > 0) ? values.length :
(values.length + UNICODECOUNT));
System.out.println("{");
for (i = 0; i < INDEXCOUNT-1; i++)
{
System.out.print("(short)" + (int)((getIndexArrayValue(i) >= 0) ?
(int)getIndexArrayValue(i) :
(int)(getIndexArrayValue(i)+UNICODECOUNT)) + ", ");
if (i != 0)
if (i % 10 == 0)
System.out.println();
}
System.out.println("(short)" +
(int)((getIndexArrayValue(INDEXCOUNT-1) >= 0) ?
(int)getIndexArrayValue(i) :
(int)(getIndexArrayValue(i)+UNICODECOUNT)) +
" }");
System.out.println("{");
for (i = 0; i < cnt-1; i++)
{
System.out.print("(short)" + (int)getArrayValue(i) + ", ");
if (i != 0)
if (i % 10 == 0)
System.out.println();
}
System.out.println("(short)" + (int)getArrayValue(cnt-1) + " }");
}
// Print char Array : Debug only
void printIndex(short start, short count)
{
int i;
for (i = start; i < count; ++i)
{
System.out.println(i + " -> : " +
(int)((indices[i] >= 0) ?
indices[i] :
indices[i] + UNICODECOUNT));
}
System.out.println();
}
void printPlainArray(int start,int count, char[] tempIndex)
{
int iIndex;
if (tempIndex != null)
{
for (iIndex = start; iIndex < start + count; ++iIndex)
{
System.out.print(" " + (int)getArrayValue(tempIndex[iIndex]));
}
}
else
{
for (iIndex = start; iIndex < start + count; ++iIndex)
{
System.out.print(" " + (int)getArrayValue(iIndex));
}
}
System.out.println(" Range: start " + start + " , count " + count);
}
// --------------------------------------------------------------
// private
// --------------------------------------------------------------
/**
* Expanding takes the array back to a 65536 element array.
*/
private void expand()
{
int i;
if (isCompact) {
short[] tempArray;
tempArray = new short[UNICODECOUNT];
for (i = 0; i < UNICODECOUNT; ++i) {
tempArray[i] = elementAt((char)i);
}
for (i = 0; i < INDEXCOUNT; ++i) {
indices[i] = (short)(i<<BLOCKSHIFT);
}
values = null;
values = tempArray;
isCompact = false;
}
}
// # of elements in the indexed array
private short capacity()
{
return (short)values.length;
}
public int storage()
{
return values.length * 2 + indices.length * 2 + 12;
}
private short getArrayValue(int n)
{
return values[n];
}
private short getIndexArrayValue(int n)
{
return indices[n];
}
private int
FindOverlappingPosition(int start, char[] tempIndex, int tempIndexCount)
{
int i;
short j;
short currentCount;
if (DEBUGOVERLAP && start < DEBUGSHOWOVERLAPLIMIT) {
printPlainArray(start, BLOCKCOUNT, null);
printPlainArray(0, tempIndexCount, tempIndex);
}
for (i = 0; i < tempIndexCount; i += BLOCKCOUNT) {
currentCount = (short)BLOCKCOUNT;
if (i + BLOCKCOUNT > tempIndexCount) {
currentCount = (short)(tempIndexCount - i);
}
for (j = 0; j < currentCount; ++j) {
if (values[start + j] != values[tempIndex[i + j]]) break;
}
if (j == currentCount) break;
}
if (DEBUGOVERLAP && start < DEBUGSHOWOVERLAPLIMIT) {
for (j = 1; j < i; ++j) {
System.out.print(" ");
}
printPlainArray(start, BLOCKCOUNT, null);
System.out.println(" Found At: " + i);
}
return i;
}
private static final int DEBUGSHOWOVERLAPLIMIT = 100;
private static final boolean DEBUGTRACE = false;
private static final boolean DEBUGSMALL = false;
private static final boolean DEBUGOVERLAP = false;
private static final int DEBUGSMALLLIMIT = 30000;
private static final int BLOCKSHIFT =7;
private static final int BLOCKCOUNT =(1<<BLOCKSHIFT);
private static final int INDEXSHIFT =(16-BLOCKSHIFT);
private static final int INDEXCOUNT =(1<<INDEXSHIFT);
private static final int BLOCKMASK = BLOCKCOUNT - 1;
private short values[]; // char -> short (char parameterized short)
private short indices[];
private boolean isCompact;
};