scuffed-code/tools/unicodetools/com/ibm/text/utility/CompactByteArray.java

306 lines
11 KiB
Java
Raw Normal View History

2001-08-30 20:50:18 +00:00
package com.ibm.text.utility;
/*
* %W% %E%
*
* (C) Copyright Taligent, Inc. 1996 - All Rights Reserved
* (C) Copyright IBM Corp. 1996 - All Rights Reserved
*
* Portions copyright (c) 1996 Sun Microsystems, Inc. All Rights Reserved.
*
* The original version of this source code and documentation is copyrighted
* and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These
* materials are provided under terms of a License Agreement between Taligent
* and Sun. This technology is protected by multiple US and International
* patents. This notice and attribution to Taligent may not be removed.
* Taligent is a registered trademark of Taligent, Inc.
*
* Permission to use, copy, modify, and distribute this software
* and its documentation for NON-COMMERCIAL purposes and without
* fee is hereby granted provided that this copyright notice
* appears in all copies. Please refer to the file "copyright.html"
* for further important copyright and licensing information.
*
* SUN MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF
* THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
* TO THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
* PARTICULAR PURPOSE, OR NON-INFRINGEMENT. SUN SHALL NOT BE LIABLE FOR
* ANY DAMAGES SUFFERED BY LICENSEE AS A RESULT OF USING, MODIFYING OR
* DISTRIBUTING THIS SOFTWARE OR ITS DERIVATIVES.
*
*/
import java.io.*;
/**
*
* Provides a compact way to store information that is indexed by Unicode
* values, such as character properties, types, keyboard values, etc.
* only for internal use for now. Made public for discussion purposes.
*
* @see CompactIntArray
* @see CompactShortArray
* @version %I% %G%
* @author Helena Shih
*/
public final class CompactByteArray implements Serializable {
public static final int UNICODECOUNT =65536;
public CompactByteArray()
{
this((byte)0);
}
public CompactByteArray(byte defaultValue)
{
int i;
values = new byte[UNICODECOUNT];
indices = new short[INDEXCOUNT];
for (i = 0; i < UNICODECOUNT; ++i) {
values[i] = defaultValue;
}
for (i = 0; i < INDEXCOUNT; ++i) {
indices[i] = (short)(i<<BLOCKSHIFT);
}
isCompact = false;
}
public CompactByteArray(short indexArray[],
byte newValues[]) throws IllegalArgumentException
{
int i;
if (indexArray.length != INDEXCOUNT)
throw new IllegalArgumentException();
for (i = 0; i < INDEXCOUNT; ++i) {
short index = indexArray[i];
if ((index < 0) || (index >= newValues.length+BLOCKCOUNT))
throw new IllegalArgumentException();
}
indices = indexArray;
values = newValues;
isCompact = true;
}
public void writeArrays(PrintWriter output)
{
int i;
output.println("package com.ibm.text.unicode;");
output.println("import com.ibm.text.collections.*;");
output.println("public final class GeneralCategory {");
output.println(" public static byte getCategory (char ch) {");
output.println(" return compactArray.elementAt(ch);");
output.println(" }");
output.println(" static CompactByteArray compactArray;");
output.println(" static void init () {");
output.println(" short[] index = {");
for (i = 0; i < indices.length; i++) {
if (i % 8 == 0) output.println();
output.print("(short)" + (indices[i] & 0xFFFF) + ", ");
}
output.println(" };");
output.println(" byte[] data = {");
for (i = 0; i < values.length; i++) {
if (i % 8 == 0) output.println();
output.print("(byte)" + (values[i] & 0xFF) + ", ");
}
output.println(" };");
output.println(" compactArray = new CompactByteArray(index, data);");
output.println(" }");
output.println("}");
output.close();
}
public byte elementAt(char index) // parameterized on byte
{
return (values[(indices[index >>> BLOCKSHIFT] & 0xFFFF) +
(index & BLOCKMASK)]);
}
// Set automatically expands the array if it is compacted.
// parameterized on value (byte)
public void setElementAt(char index, byte value)
{
if (isCompact)
expand();
values[(int)index] = value;
}
public void setElementAt(char start, char end, byte value)
{
int i;
if (isCompact) {
expand();
}
for (i = start; i <= end; ++i) {
values[i] = value;
}
}
// Compact the array.
// The value of cycle determines how large the overlap can be.
// A cycle of 1 is the most compacted, but takes the most time to do.
// If values stored in the array tend to repeat in cycles of, say, 16,
// then using that will be faster than cycle = 1, and get almost the
// same compression. cycle is hardcoded as BLOCKCOUNT now.
public void compact()
{
if (isCompact == false) {
char[] tempIndex;
int tempIndexCount;
byte[] tempArray;
short iBlock, iIndex;
// make temp storage, larger than we need
tempIndex = new char[UNICODECOUNT];
// set up first block.
tempIndexCount = BLOCKCOUNT;
for (iIndex = 0; iIndex < BLOCKCOUNT; ++iIndex) {
tempIndex[iIndex] = (char)iIndex;
}; // endfor (iIndex = 0; .....)
indices[0] = (short)0;
// for each successive block, find out its first position
// in the compacted array
for (iBlock = 1; iBlock < INDEXCOUNT; ++iBlock) {
int newCount, firstPosition, block;
block = iBlock<<BLOCKSHIFT;
if (DEBUGSMALL) if (block > DEBUGSMALLLIMIT) break;
firstPosition = FindOverlappingPosition( block, tempIndex,
tempIndexCount );
newCount = firstPosition + BLOCKCOUNT;
if (newCount > tempIndexCount) {
for (iIndex = (short)tempIndexCount;
iIndex < newCount;
++iIndex) {
tempIndex[iIndex] = (char)
(iIndex - firstPosition + block);
} // endfor (iIndex = tempIndexCount....)
tempIndexCount = newCount;
} // endif (newCount > tempIndexCount)
indices[iBlock] = (short)firstPosition;
} // endfor (iBlock = 1.....)
// now allocate and copy the items into the array
tempArray = new byte[tempIndexCount];
for (iIndex = 0; iIndex < tempIndexCount; ++iIndex) {
tempArray[iIndex] = values[tempIndex[iIndex]];
}
values = null;
values = tempArray;
isCompact = true;
} // endif (isCompact != false)
}
// Expanded takes the array back to a 65536 element array
public void expand()
{
int i;
if (isCompact) {
byte[] tempArray;
tempArray = new byte[UNICODECOUNT];
for (i = 0; i < UNICODECOUNT; ++i) {
tempArray[i] = elementAt((char)i);
}
for (i = 0; i < INDEXCOUNT; ++i) {
indices[i] = (short)(i<<BLOCKSHIFT);
}
values = null;
values = tempArray;
isCompact = false;
}
}
// Print char Array : Debug only
public void printIndex(short start, short count)
{
int i;
for (i = start; i < count; ++i)
{
System.out.println(i + " -> : " +
(int)((indices[i] >= 0) ?
indices[i] :
indices[i] + UNICODECOUNT));
}
System.out.println();
}
public void printPlainArray(int start,int count, char[] tempIndex)
{
int iIndex;
if (tempIndex != null)
{
for (iIndex = start; iIndex < start + count; ++iIndex)
{
System.out.print(" " + (int)values[tempIndex[iIndex]]);
}
}
else
{
for (iIndex = start; iIndex < start + count; ++iIndex)
{
System.out.print(" " + (int)values[iIndex]);
}
}
System.out.println(" Range: start " + start + " , count " + count);
}
// # of elements in the indexed array
public short capacity()
{
return (short)values.length;
}
public int storage()
{
return values.length * 1 + indices.length * 2 + 12;
}
private byte[] getArray()
{
return values;
}
private int
FindOverlappingPosition(int start, char[] tempIndex, int tempIndexCount)
{
int i;
short j;
short currentCount;
if (DEBUGOVERLAP && start < DEBUGSHOWOVERLAPLIMIT) {
printPlainArray(start, BLOCKCOUNT, null);
printPlainArray(0, tempIndexCount, tempIndex);
}
for (i = 0; i < tempIndexCount; i += BLOCKCOUNT) {
currentCount = (short)BLOCKCOUNT;
if (i + BLOCKCOUNT > tempIndexCount) {
currentCount = (short)(tempIndexCount - i);
}
for (j = 0; j < currentCount; ++j) {
if (values[start + j] != values[tempIndex[i + j]]) break;
}
if (j == currentCount) break;
}
if (DEBUGOVERLAP && start < DEBUGSHOWOVERLAPLIMIT) {
for (j = 1; j < i; ++j) {
System.out.print(" ");
}
printPlainArray(start, BLOCKCOUNT, null);
System.out.println(" Found At: " + i);
}
return i;
}
private static final int DEBUGSHOWOVERLAPLIMIT = 100;
private static final boolean DEBUGTRACE = false;
private static final boolean DEBUGSMALL = false;
private static final boolean DEBUGOVERLAP = false;
private static final int DEBUGSMALLLIMIT = 30000;
private static final int BLOCKSHIFT =6;
private static final int BLOCKCOUNT =(1<<BLOCKSHIFT);
private static final int INDEXSHIFT =(16-BLOCKSHIFT);
private static final int INDEXCOUNT =(1<<INDEXSHIFT);
private static final int BLOCKMASK = BLOCKCOUNT - 1;
private byte[] values; // char -> short (char parameterized short)
private short indices[];
private boolean isCompact;
};