ICU-3984 initial commit of the collation reordering
X-SVN-Rev: 29015
This commit is contained in:
parent
5af2364f93
commit
1fac4c690b
@ -3631,6 +3631,7 @@ final class CollationParsedRuleBuilder {
|
|||||||
collator.m_isHiragana4_ = option.m_isHiragana4_;
|
collator.m_isHiragana4_ = option.m_isHiragana4_;
|
||||||
collator.setStrength(option.m_strength_);
|
collator.setStrength(option.m_strength_);
|
||||||
collator.m_variableTopValue_ = option.m_variableTopValue_;
|
collator.m_variableTopValue_ = option.m_variableTopValue_;
|
||||||
|
collator.m_scriptOrder_ = option.m_scriptOrder_;
|
||||||
collator.latinOneFailed_ = false;
|
collator.latinOneFailed_ = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -7,6 +7,7 @@
|
|||||||
package com.ibm.icu.text;
|
package com.ibm.icu.text;
|
||||||
|
|
||||||
import java.text.ParseException;
|
import java.text.ParseException;
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
@ -16,6 +17,7 @@ import com.ibm.icu.util.UResourceBundle;
|
|||||||
import com.ibm.icu.util.ULocale;
|
import com.ibm.icu.util.ULocale;
|
||||||
import com.ibm.icu.impl.UCharacterProperty;
|
import com.ibm.icu.impl.UCharacterProperty;
|
||||||
import com.ibm.icu.lang.UCharacter;
|
import com.ibm.icu.lang.UCharacter;
|
||||||
|
import com.ibm.icu.lang.UScript;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Class for parsing collation rules, produces a list of tokens that will be
|
* Class for parsing collation rules, produces a list of tokens that will be
|
||||||
@ -89,6 +91,14 @@ final class CollationRuleParser
|
|||||||
m_decomposition_ = collator.getDecomposition();
|
m_decomposition_ = collator.getDecomposition();
|
||||||
m_strength_ = collator.getStrength();
|
m_strength_ = collator.getStrength();
|
||||||
m_isHiragana4_ = collator.m_isHiragana4_;
|
m_isHiragana4_ = collator.m_isHiragana4_;
|
||||||
|
|
||||||
|
if(collator.m_scriptOrder_ != null){
|
||||||
|
m_scriptOrder_ = new int[collator.m_scriptOrder_.length];
|
||||||
|
for(int i = 0; i < m_scriptOrder_.length; i++){
|
||||||
|
m_scriptOrder_[i] = collator.m_scriptOrder_[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// package private data members --------------------------------------
|
// package private data members --------------------------------------
|
||||||
@ -119,6 +129,11 @@ final class CollationRuleParser
|
|||||||
* attribute for special Hiragana
|
* attribute for special Hiragana
|
||||||
*/
|
*/
|
||||||
boolean m_isHiragana4_;
|
boolean m_isHiragana4_;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* the ordering of the scripts
|
||||||
|
*/
|
||||||
|
int[] m_scriptOrder_;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -291,6 +306,14 @@ final class CollationRuleParser
|
|||||||
collator.m_defaultCaseFirst_ = m_options_.m_caseFirst_;
|
collator.m_defaultCaseFirst_ = m_options_.m_caseFirst_;
|
||||||
collator.m_defaultIsHiragana4_ = m_options_.m_isHiragana4_;
|
collator.m_defaultIsHiragana4_ = m_options_.m_isHiragana4_;
|
||||||
collator.m_defaultVariableTopValue_ = m_options_.m_variableTopValue_;
|
collator.m_defaultVariableTopValue_ = m_options_.m_variableTopValue_;
|
||||||
|
if(m_options_.m_scriptOrder_ != null) {
|
||||||
|
collator.m_defaultScriptOrder_ = new int[m_options_.m_scriptOrder_.length];
|
||||||
|
for (int i = 0; i < m_options_.m_scriptOrder_.length; i++) {
|
||||||
|
collator.m_defaultScriptOrder_[i] = m_options_.m_scriptOrder_[i];
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
collator.m_defaultScriptOrder_ = null;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// private inner classes -------------------------------------------------
|
// private inner classes -------------------------------------------------
|
||||||
@ -662,7 +685,7 @@ final class CollationRuleParser
|
|||||||
RULES_OPTIONS_[15] = new TokenOption("undefined",
|
RULES_OPTIONS_[15] = new TokenOption("undefined",
|
||||||
RuleBasedCollator.Attribute.LIMIT_,
|
RuleBasedCollator.Attribute.LIMIT_,
|
||||||
null, null);
|
null, null);
|
||||||
RULES_OPTIONS_[16] = new TokenOption("scriptOrder",
|
RULES_OPTIONS_[16] = new TokenOption("reorder",
|
||||||
RuleBasedCollator.Attribute.LIMIT_,
|
RuleBasedCollator.Attribute.LIMIT_,
|
||||||
null, null);
|
null, null);
|
||||||
RULES_OPTIONS_[17] = new TokenOption("charsetname",
|
RULES_OPTIONS_[17] = new TokenOption("charsetname",
|
||||||
@ -2028,7 +2051,6 @@ final class CollationRuleParser
|
|||||||
return new UnicodeSet(source.substring(start, start+current)); //uset_openPattern(start, current);
|
return new UnicodeSet(source.substring(start, start+current)); //uset_openPattern(start, current);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/** in C, optionarg is passed by reference to function.
|
/** in C, optionarg is passed by reference to function.
|
||||||
* We use a private int to simulate this.
|
* We use a private int to simulate this.
|
||||||
*/
|
*/
|
||||||
@ -2061,6 +2083,7 @@ final class CollationRuleParser
|
|||||||
}
|
}
|
||||||
return i;
|
return i;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Reads and set collation options
|
* Reads and set collation options
|
||||||
* @return TOKEN_SUCCESS if option is set correct, 0 otherwise
|
* @return TOKEN_SUCCESS if option is set correct, 0 otherwise
|
||||||
@ -2152,6 +2175,11 @@ final class CollationRuleParser
|
|||||||
m_optionEnd_ = m_current_-1;
|
m_optionEnd_ = m_current_-1;
|
||||||
return TOKEN_SUCCESS_MASK_;
|
return TOKEN_SUCCESS_MASK_;
|
||||||
}
|
}
|
||||||
|
else if(i == 16) {
|
||||||
|
m_current_ = m_optionarg_; // skip opening brace and name
|
||||||
|
parseScriptReorder();
|
||||||
|
return TOKEN_SUCCESS_MASK_;
|
||||||
|
}
|
||||||
else {
|
else {
|
||||||
throwParseException(m_rules_, optionarg);
|
throwParseException(m_rules_, optionarg);
|
||||||
}
|
}
|
||||||
@ -2282,4 +2310,31 @@ final class CollationRuleParser
|
|||||||
}
|
}
|
||||||
return rules;
|
return rules;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void parseScriptReorder() throws ParseException{
|
||||||
|
ArrayList<Integer> tempOrder = new ArrayList<Integer>();
|
||||||
|
int end = m_rules_.indexOf(']', m_current_);
|
||||||
|
while(m_current_ < end){
|
||||||
|
// Ensure that the following token is 4 characters long
|
||||||
|
if ((end != m_current_+4) &&
|
||||||
|
(m_rules_.charAt(m_current_+4) != ' ')) {
|
||||||
|
throw new ParseException(m_rules_, m_current_);
|
||||||
|
}
|
||||||
|
int[] script = UScript.getCode(m_rules_.substring(m_current_, m_current_+4));
|
||||||
|
if (script.length > 0) {
|
||||||
|
tempOrder.add(script[0]);
|
||||||
|
} else {
|
||||||
|
throw new ParseException(m_rules_, m_current_);
|
||||||
|
}
|
||||||
|
m_current_+= 4;
|
||||||
|
while (m_current_ < end && UCharacter.isWhitespace(m_rules_.charAt(m_current_)))
|
||||||
|
{ // eat whitespace
|
||||||
|
m_current_++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
m_options_.m_scriptOrder_ = new int[tempOrder.size()];
|
||||||
|
for(int i = 0; i < tempOrder.size(); i++){
|
||||||
|
m_options_.m_scriptOrder_[i] = tempOrder.get(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -225,6 +225,18 @@ public abstract class Collator implements Comparator<Object>, Cloneable
|
|||||||
*/
|
*/
|
||||||
public final static int CANONICAL_DECOMPOSITION = 17;
|
public final static int CANONICAL_DECOMPOSITION = 17;
|
||||||
|
|
||||||
|
public final static class CollationReorderCodes {
|
||||||
|
private CollationReorderCodes() {}
|
||||||
|
|
||||||
|
public final static int SPACE = 0x1000;
|
||||||
|
public final static int FIRST = SPACE;
|
||||||
|
public final static int PUNCTUATION = 0x1001;
|
||||||
|
public final static int SYMBOL = 0x1002;
|
||||||
|
public final static int CURRENCY = 0x1003;
|
||||||
|
public final static int DIGIT = 0x1004;
|
||||||
|
public final static int LIMIT = 0x1005;
|
||||||
|
|
||||||
|
}
|
||||||
// public methods --------------------------------------------------------
|
// public methods --------------------------------------------------------
|
||||||
|
|
||||||
// public setters --------------------------------------------------------
|
// public setters --------------------------------------------------------
|
||||||
@ -314,6 +326,17 @@ public abstract class Collator implements Comparator<Object>, Cloneable
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set the order for scripts to be ordered in.
|
||||||
|
* @param order the reordering of scripts
|
||||||
|
* @see #getScriptOrder
|
||||||
|
* @stable
|
||||||
|
*/
|
||||||
|
public void setScriptOrder(int... order)
|
||||||
|
{
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
// public getters --------------------------------------------------------
|
// public getters --------------------------------------------------------
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -988,6 +1011,17 @@ public abstract class Collator implements Comparator<Object>, Cloneable
|
|||||||
* @stable ICU 2.8
|
* @stable ICU 2.8
|
||||||
*/
|
*/
|
||||||
public abstract VersionInfo getUCAVersion();
|
public abstract VersionInfo getUCAVersion();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Method to retrieve the script reordering
|
||||||
|
* @see #setScriptOrder
|
||||||
|
* @return the ordering of the scripts if one has been set, null otherwise.
|
||||||
|
* @stable
|
||||||
|
*/
|
||||||
|
public int[] getScriptOrder()
|
||||||
|
{
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
// protected constructor -------------------------------------------------
|
// protected constructor -------------------------------------------------
|
||||||
|
|
||||||
|
@ -1,9 +1,9 @@
|
|||||||
/**
|
/**
|
||||||
*******************************************************************************
|
*******************************************************************************
|
||||||
* Copyright (C) 1996-2010, International Business Machines Corporation and *
|
* Copyright (C) 1996-2010, International Business Machines Corporation and *
|
||||||
* others. All Rights Reserved. *
|
* others. All Rights Reserved. *
|
||||||
*******************************************************************************
|
*******************************************************************************
|
||||||
*/
|
*/
|
||||||
package com.ibm.icu.text;
|
package com.ibm.icu.text;
|
||||||
|
|
||||||
import java.io.BufferedInputStream;
|
import java.io.BufferedInputStream;
|
||||||
@ -18,29 +18,30 @@ import com.ibm.icu.impl.ICUResourceBundle;
|
|||||||
import com.ibm.icu.impl.IntTrie;
|
import com.ibm.icu.impl.IntTrie;
|
||||||
import com.ibm.icu.lang.UCharacter;
|
import com.ibm.icu.lang.UCharacter;
|
||||||
import com.ibm.icu.text.CollationParsedRuleBuilder.InverseUCA;
|
import com.ibm.icu.text.CollationParsedRuleBuilder.InverseUCA;
|
||||||
|
import com.ibm.icu.text.RuleBasedCollator.LeadByteConstants;
|
||||||
import com.ibm.icu.text.RuleBasedCollator.UCAConstants;
|
import com.ibm.icu.text.RuleBasedCollator.UCAConstants;
|
||||||
import com.ibm.icu.util.VersionInfo;
|
import com.ibm.icu.util.VersionInfo;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* <p>Internal reader class for ICU data file uca.icu containing
|
* <p>Internal reader class for ICU data file uca.icu containing
|
||||||
* Unicode Collation Algorithm data.</p>
|
* Unicode Collation Algorithm data.</p>
|
||||||
* <p>This class simply reads uca.icu, authenticates that it is a valid
|
* <p>This class simply reads uca.icu, authenticates that it is a valid
|
||||||
* ICU data file and split its contents up into blocks of data for use in
|
* ICU data file and split its contents up into blocks of data for use in
|
||||||
* <a href=Collator.html>com.ibm.icu.text.Collator</a>.
|
* <a href=Collator.html>com.ibm.icu.text.Collator</a>.
|
||||||
* </p>
|
* </p>
|
||||||
* <p>uca.icu which is in big-endian format is jared together with this
|
* <p>uca.icu which is in big-endian format is jared together with this
|
||||||
* package.</p>
|
* package.</p>
|
||||||
* @author Syn Wee Quek
|
* @author Syn Wee Quek
|
||||||
* @since release 2.2, April 18 2002
|
* @since release 2.2, April 18 2002
|
||||||
*/
|
*/
|
||||||
|
|
||||||
final class CollatorReader
|
final class CollatorReader
|
||||||
{
|
{
|
||||||
static char[] read(RuleBasedCollator rbc, UCAConstants ucac) throws IOException {
|
static char[] read(RuleBasedCollator rbc, UCAConstants ucac, LeadByteConstants leadByteConstants) throws IOException {
|
||||||
InputStream i = ICUData.getRequiredStream(ICUResourceBundle.ICU_BUNDLE+"/coll/ucadata.icu");
|
InputStream i = ICUData.getRequiredStream(ICUResourceBundle.ICU_BUNDLE+"/coll/ucadata.icu");
|
||||||
BufferedInputStream b = new BufferedInputStream(i, 90000);
|
BufferedInputStream b = new BufferedInputStream(i, 90000);
|
||||||
CollatorReader reader = new CollatorReader(b);
|
CollatorReader reader = new CollatorReader(b);
|
||||||
char[] result = reader.readImp(rbc, ucac);
|
char[] result = reader.readImp(rbc, ucac, leadByteConstants);
|
||||||
b.close();
|
b.close();
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
@ -62,14 +63,23 @@ final class CollatorReader
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void initRBC(RuleBasedCollator rbc, ByteBuffer data) throws IOException {
|
static void initRBC(RuleBasedCollator rbc, ByteBuffer data) throws IOException {
|
||||||
final int MIN_BINARY_DATA_SIZE_ = (42 + 25) << 2;
|
// TODO - why? 4 extra bytes? padding in the swapper?
|
||||||
|
//final int MIN_BINARY_DATA_SIZE_ = (42 + 25) << 2;
|
||||||
|
final int MIN_BINARY_DATA_SIZE_ = 272;
|
||||||
int dataLength = data.remaining();
|
int dataLength = data.remaining();
|
||||||
// TODO: Change the rest of this class to use the ByteBuffer directly, rather than
|
// TODO: Change the rest of this class to use the ByteBuffer directly, rather than
|
||||||
// a DataInputStream, except for passing an InputStream to ICUBinary.readHeader().
|
// a DataInputStream, except for passing an InputStream to ICUBinary.readHeader().
|
||||||
// Consider changing ICUBinary to also work with a ByteBuffer.
|
// Consider changing ICUBinary to also work with a ByteBuffer.
|
||||||
CollatorReader reader = new CollatorReader(makeByteBufferInputStream(data), false);
|
CollatorReader reader = new CollatorReader(makeByteBufferInputStream(data), false);
|
||||||
if (dataLength > MIN_BINARY_DATA_SIZE_) {
|
if (dataLength > MIN_BINARY_DATA_SIZE_) {
|
||||||
reader.readImp(rbc, null);
|
// for (int i = 0; i < dataLength; i++) {
|
||||||
|
// byte b = data.get(i);
|
||||||
|
// System.out.print("0x" + (((int) 0xff & b) < 0x0f ? "0" : "") + Integer.toHexString(0xff & b) + " ");
|
||||||
|
// if (i % 16 == 0) {
|
||||||
|
// System.out.println();
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
reader.readImp(rbc, null, null);
|
||||||
} else {
|
} else {
|
||||||
reader.readHeader(rbc);
|
reader.readHeader(rbc);
|
||||||
reader.readOptions(rbc);
|
reader.readOptions(rbc);
|
||||||
@ -77,30 +87,30 @@ final class CollatorReader
|
|||||||
rbc.setWithUCATables();
|
rbc.setWithUCATables();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static InverseUCA getInverseUCA() throws IOException {
|
static InverseUCA getInverseUCA() throws IOException {
|
||||||
InverseUCA result = null;
|
InverseUCA result = null;
|
||||||
InputStream i = ICUData.getRequiredStream(ICUResourceBundle.ICU_BUNDLE+"/coll/invuca.icu");
|
InputStream i = ICUData.getRequiredStream(ICUResourceBundle.ICU_BUNDLE+"/coll/invuca.icu");
|
||||||
// try {
|
// try {
|
||||||
// String invdat = "/com/ibm/icu/impl/data/invuca.icu";
|
// String invdat = "/com/ibm/icu/impl/data/invuca.icu";
|
||||||
// InputStream i = CollationParsedRuleBuilder.class.getResourceAsStream(invdat);
|
// InputStream i = CollationParsedRuleBuilder.class.getResourceAsStream(invdat);
|
||||||
BufferedInputStream b = new BufferedInputStream(i, 110000);
|
BufferedInputStream b = new BufferedInputStream(i, 110000);
|
||||||
result = CollatorReader.readInverseUCA(b);
|
result = CollatorReader.readInverseUCA(b);
|
||||||
b.close();
|
b.close();
|
||||||
i.close();
|
i.close();
|
||||||
return result;
|
return result;
|
||||||
// } catch (Exception e) {
|
// } catch (Exception e) {
|
||||||
// throw new RuntimeException(e.getMessage());
|
// throw new RuntimeException(e.getMessage());
|
||||||
// }
|
// }
|
||||||
}
|
}
|
||||||
|
|
||||||
// protected constructor ---------------------------------------------
|
// protected constructor ---------------------------------------------
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* <p>Protected constructor.</p>
|
* <p>Protected constructor.</p>
|
||||||
* @param inputStream ICU collator file input stream
|
* @param inputStream ICU collator file input stream
|
||||||
* @exception IOException throw if data file fails authentication
|
* @exception IOException throw if data file fails authentication
|
||||||
*/
|
*/
|
||||||
private CollatorReader(InputStream inputStream) throws IOException
|
private CollatorReader(InputStream inputStream) throws IOException
|
||||||
{
|
{
|
||||||
this(inputStream, true);
|
this(inputStream, true);
|
||||||
@ -114,40 +124,40 @@ final class CollatorReader
|
|||||||
throw new IOException(WRONG_UNICODE_VERSION_ERROR_);
|
throw new IOException(WRONG_UNICODE_VERSION_ERROR_);
|
||||||
}
|
}
|
||||||
m_dataInputStream_ = new DataInputStream(inputStream);
|
m_dataInputStream_ = new DataInputStream(inputStream);
|
||||||
*/
|
*/
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* <p>Protected constructor.</p>
|
* <p>Protected constructor.</p>
|
||||||
* @param inputStream ICU uprops.icu file input stream
|
* @param inputStream ICU uprops.icu file input stream
|
||||||
* @param readICUHeader flag to indicate if the ICU header has to be read
|
* @param readICUHeader flag to indicate if the ICU header has to be read
|
||||||
* @exception IOException throw if data file fails authentication
|
* @exception IOException throw if data file fails authentication
|
||||||
*/
|
*/
|
||||||
private CollatorReader(InputStream inputStream, boolean readICUHeader)
|
private CollatorReader(InputStream inputStream, boolean readICUHeader)
|
||||||
throws IOException
|
throws IOException
|
||||||
{
|
{
|
||||||
if (readICUHeader) {
|
if (readICUHeader) {
|
||||||
byte[] UnicodeVersion = ICUBinary.readHeader(inputStream, DATA_FORMAT_ID_,
|
byte[] UnicodeVersion = ICUBinary.readHeader(inputStream, DATA_FORMAT_ID_,
|
||||||
UCA_AUTHENTICATE_);
|
UCA_AUTHENTICATE_);
|
||||||
// weiv: check that we have the correct Unicode version in
|
// weiv: check that we have the correct Unicode version in
|
||||||
// binary files
|
// binary files
|
||||||
VersionInfo UCDVersion = UCharacter.getUnicodeVersion();
|
VersionInfo UCDVersion = UCharacter.getUnicodeVersion();
|
||||||
if(UnicodeVersion[0] != UCDVersion.getMajor()
|
if(UnicodeVersion[0] != UCDVersion.getMajor()
|
||||||
|| UnicodeVersion[1] != UCDVersion.getMinor()) {
|
|| UnicodeVersion[1] != UCDVersion.getMinor()) {
|
||||||
throw new IOException(WRONG_UNICODE_VERSION_ERROR_);
|
throw new IOException(WRONG_UNICODE_VERSION_ERROR_);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
m_dataInputStream_ = new DataInputStream(inputStream);
|
m_dataInputStream_ = new DataInputStream(inputStream);
|
||||||
}
|
}
|
||||||
|
|
||||||
// protected methods -------------------------------------------------
|
// protected methods -------------------------------------------------
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Read and break up the header stream of data passed in as arguments into
|
* Read and break up the header stream of data passed in as arguments into
|
||||||
* meaningful Collator data.
|
* meaningful Collator data.
|
||||||
* @param rbc RuleBasedCollator to populate with header information
|
* @param rbc RuleBasedCollator to populate with header information
|
||||||
* @exception IOException thrown when there's a data error.
|
* @exception IOException thrown when there's a data error.
|
||||||
*/
|
*/
|
||||||
private void readHeader(RuleBasedCollator rbc) throws IOException
|
private void readHeader(RuleBasedCollator rbc) throws IOException
|
||||||
{
|
{
|
||||||
m_size_ = m_dataInputStream_.readInt();
|
m_size_ = m_dataInputStream_.readInt();
|
||||||
@ -158,11 +168,11 @@ final class CollatorReader
|
|||||||
int readcount = 8; // for size and headersize
|
int readcount = 8; // for size and headersize
|
||||||
// structure which holds values for indirect positioning and implicit
|
// structure which holds values for indirect positioning and implicit
|
||||||
// ranges
|
// ranges
|
||||||
int UCAConst = m_dataInputStream_.readInt();
|
m_UCAConstOffset_ = m_dataInputStream_.readInt();
|
||||||
readcount += 4;
|
readcount += 4;
|
||||||
// this one is needed only for UCA, to copy the appropriate
|
// this one is needed only for UCA, to copy the appropriate
|
||||||
// contractions
|
// contractions
|
||||||
m_dataInputStream_.skip(4);
|
int contractionUCACombos = m_dataInputStream_.readInt();
|
||||||
readcount += 4;
|
readcount += 4;
|
||||||
// reserved for future use
|
// reserved for future use
|
||||||
m_dataInputStream_.skipBytes(4);
|
m_dataInputStream_.skipBytes(4);
|
||||||
@ -180,7 +190,7 @@ final class CollatorReader
|
|||||||
int contractionCE = m_dataInputStream_.readInt();
|
int contractionCE = m_dataInputStream_.readInt();
|
||||||
readcount += 4;
|
readcount += 4;
|
||||||
// needed for various closures int contractionSize
|
// needed for various closures int contractionSize
|
||||||
/*int contractionSize = */m_dataInputStream_.readInt();
|
int contractionSize = m_dataInputStream_.readInt();
|
||||||
readcount += 4;
|
readcount += 4;
|
||||||
// array of last collation element in expansion
|
// array of last collation element in expansion
|
||||||
int expansionEndCE = m_dataInputStream_.readInt();
|
int expansionEndCE = m_dataInputStream_.readInt();
|
||||||
@ -190,7 +200,7 @@ final class CollatorReader
|
|||||||
int expansionEndCEMaxSize = m_dataInputStream_.readInt();
|
int expansionEndCEMaxSize = m_dataInputStream_.readInt();
|
||||||
readcount += 4;
|
readcount += 4;
|
||||||
// size of endExpansionCE int expansionEndCESize
|
// size of endExpansionCE int expansionEndCESize
|
||||||
m_dataInputStream_.skipBytes(4);
|
/*int endExpansionCECount =*/ m_dataInputStream_.readInt();
|
||||||
readcount += 4;
|
readcount += 4;
|
||||||
// hash table of unsafe code points
|
// hash table of unsafe code points
|
||||||
int unsafe = m_dataInputStream_.readInt();
|
int unsafe = m_dataInputStream_.readInt();
|
||||||
@ -199,25 +209,35 @@ final class CollatorReader
|
|||||||
int contractionEnd = m_dataInputStream_.readInt();
|
int contractionEnd = m_dataInputStream_.readInt();
|
||||||
readcount += 4;
|
readcount += 4;
|
||||||
// int CEcount = m_dataInputStream_.readInt();
|
// int CEcount = m_dataInputStream_.readInt();
|
||||||
m_dataInputStream_.skipBytes(4);
|
int contractionUCACombosSize = m_dataInputStream_.readInt();
|
||||||
readcount += 4;
|
readcount += 4;
|
||||||
// is jamoSpecial
|
// is jamoSpecial
|
||||||
rbc.m_isJamoSpecial_ = m_dataInputStream_.readBoolean();
|
rbc.m_isJamoSpecial_ = m_dataInputStream_.readBoolean();
|
||||||
readcount++;
|
readcount++;
|
||||||
// padding
|
// isBigEndian and charSetFamily
|
||||||
m_dataInputStream_.skipBytes(3);
|
m_dataInputStream_.skipBytes(2);
|
||||||
readcount += 3;
|
readcount += 2;
|
||||||
|
int contractionUCACombosWidth = m_dataInputStream_.readByte();
|
||||||
|
readcount += 1;
|
||||||
rbc.m_version_ = readVersion(m_dataInputStream_);
|
rbc.m_version_ = readVersion(m_dataInputStream_);
|
||||||
readcount += 4;
|
readcount += 4;
|
||||||
rbc.m_UCA_version_ = readVersion(m_dataInputStream_);
|
rbc.m_UCA_version_ = readVersion(m_dataInputStream_);
|
||||||
readcount += 4;
|
readcount += 4;
|
||||||
rbc.m_UCD_version_ = readVersion(m_dataInputStream_);
|
rbc.m_UCD_version_ = readVersion(m_dataInputStream_);
|
||||||
readcount += 4;
|
readcount += 4;
|
||||||
|
VersionInfo formatVersion = readVersion(m_dataInputStream_);
|
||||||
|
readcount += 4;
|
||||||
|
rbc.m_scriptToLeadBytes = m_dataInputStream_.readInt();
|
||||||
|
readcount += 4;
|
||||||
|
rbc.m_leadByteToScripts = m_dataInputStream_.readInt();
|
||||||
|
readcount += 4;
|
||||||
|
|
||||||
// byte charsetName[] = new byte[32]; // for charset CEs
|
// byte charsetName[] = new byte[32]; // for charset CEs
|
||||||
m_dataInputStream_.skipBytes(32);
|
m_dataInputStream_.skipBytes(32);
|
||||||
readcount += 32;
|
readcount += 32;
|
||||||
m_dataInputStream_.skipBytes(56); // for future use
|
|
||||||
readcount += 56;
|
m_dataInputStream_.skipBytes(44); // for future use
|
||||||
|
readcount += 44;
|
||||||
if (m_headerSize_ < readcount) {
|
if (m_headerSize_ < readcount) {
|
||||||
///CLOVER:OFF
|
///CLOVER:OFF
|
||||||
throw new IOException("Internal Error: Header size error");
|
throw new IOException("Internal Error: Header size error");
|
||||||
@ -237,16 +257,20 @@ final class CollatorReader
|
|||||||
m_expansionEndCESize_ = expansionEndCEMaxSize - expansionEndCE;
|
m_expansionEndCESize_ = expansionEndCEMaxSize - expansionEndCE;
|
||||||
m_expansionEndCEMaxSizeSize_ = unsafe - expansionEndCEMaxSize;
|
m_expansionEndCEMaxSizeSize_ = unsafe - expansionEndCEMaxSize;
|
||||||
m_unsafeSize_ = contractionEnd - unsafe;
|
m_unsafeSize_ = contractionEnd - unsafe;
|
||||||
m_UCAValuesSize_ = m_size_ - UCAConst; // UCA value, will be handled
|
//m_UCAValuesSize_ = m_size_ - UCAConst; // UCA value, will be handled later
|
||||||
// later
|
m_UCAcontractionSize_ = contractionUCACombosSize * contractionUCACombosWidth * 2;
|
||||||
|
|
||||||
// treat it as normal collator first
|
// treat it as normal collator first
|
||||||
// for normal collator there is no UCA contraction
|
// for normal collator there is no UCA contraction
|
||||||
m_contractionEndSize_ = m_size_ - contractionEnd;
|
// contractions (UChar[contractionSize] + CE[contractionSize])
|
||||||
|
int old_contractionSize_ = m_size_ - contractionEnd;
|
||||||
|
// m_contractionSize_ = contractionSize * 2 + contractionSize * 4;
|
||||||
|
m_contractionSize_ = contractionSize * 2 + contractionSize * 4;
|
||||||
|
|
||||||
rbc.m_contractionOffset_ >>= 1; // casting to ints
|
rbc.m_contractionOffset_ >>= 1; // casting to ints
|
||||||
rbc.m_expansionOffset_ >>= 2; // casting to chars
|
rbc.m_expansionOffset_ >>= 2; // casting to chars
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Read and break up the collation options passed in the stream of data and
|
* Read and break up the collation options passed in the stream of data and
|
||||||
* update the argument Collator with the results
|
* update the argument Collator with the results
|
||||||
@ -262,16 +286,19 @@ final class CollatorReader
|
|||||||
rbc.m_defaultVariableTopValue_ = m_dataInputStream_.readInt();
|
rbc.m_defaultVariableTopValue_ = m_dataInputStream_.readInt();
|
||||||
readcount += 4;
|
readcount += 4;
|
||||||
rbc.m_defaultIsFrenchCollation_ = (m_dataInputStream_.readInt()
|
rbc.m_defaultIsFrenchCollation_ = (m_dataInputStream_.readInt()
|
||||||
== RuleBasedCollator.AttributeValue.ON_);
|
== RuleBasedCollator.AttributeValue.ON_);
|
||||||
readcount += 4;
|
readcount += 4;
|
||||||
rbc.m_defaultIsAlternateHandlingShifted_
|
rbc.m_defaultIsAlternateHandlingShifted_
|
||||||
= (m_dataInputStream_.readInt() ==
|
= (m_dataInputStream_.readInt() ==
|
||||||
RuleBasedCollator.AttributeValue.SHIFTED_);
|
RuleBasedCollator.AttributeValue.SHIFTED_);
|
||||||
readcount += 4;
|
readcount += 4;
|
||||||
rbc.m_defaultCaseFirst_ = m_dataInputStream_.readInt();
|
rbc.m_defaultCaseFirst_ = m_dataInputStream_.readInt();
|
||||||
readcount += 4;
|
readcount += 4;
|
||||||
rbc.m_defaultIsCaseLevel_ = (m_dataInputStream_.readInt()
|
// rbc.m_defaultIsCaseLevel_ = (m_dataInputStream_.readInt()
|
||||||
== RuleBasedCollator.AttributeValue.ON_);
|
// == RuleBasedCollator.AttributeValue.ON_);
|
||||||
|
int defaultIsCaseLevel = m_dataInputStream_.readInt();
|
||||||
|
rbc.m_defaultIsCaseLevel_ = (defaultIsCaseLevel
|
||||||
|
== RuleBasedCollator.AttributeValue.ON_);
|
||||||
readcount += 4;
|
readcount += 4;
|
||||||
int value = m_dataInputStream_.readInt();
|
int value = m_dataInputStream_.readInt();
|
||||||
readcount += 4;
|
readcount += 4;
|
||||||
@ -285,10 +312,10 @@ final class CollatorReader
|
|||||||
rbc.m_defaultStrength_ = m_dataInputStream_.readInt();
|
rbc.m_defaultStrength_ = m_dataInputStream_.readInt();
|
||||||
readcount += 4;
|
readcount += 4;
|
||||||
rbc.m_defaultIsHiragana4_ = (m_dataInputStream_.readInt()
|
rbc.m_defaultIsHiragana4_ = (m_dataInputStream_.readInt()
|
||||||
== RuleBasedCollator.AttributeValue.ON_);
|
== RuleBasedCollator.AttributeValue.ON_);
|
||||||
readcount += 4;
|
readcount += 4;
|
||||||
rbc.m_defaultIsNumericCollation_ = (m_dataInputStream_.readInt()
|
rbc.m_defaultIsNumericCollation_ = (m_dataInputStream_.readInt()
|
||||||
== RuleBasedCollator.AttributeValue.ON_);
|
== RuleBasedCollator.AttributeValue.ON_);
|
||||||
readcount += 4;
|
readcount += 4;
|
||||||
m_dataInputStream_.skip(60); // reserved for future use
|
m_dataInputStream_.skip(60); // reserved for future use
|
||||||
readcount += 60;
|
readcount += 60;
|
||||||
@ -299,21 +326,25 @@ final class CollatorReader
|
|||||||
///CLOVER:ON
|
///CLOVER:ON
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Read and break up the stream of data passed in as arguments into
|
* Read and break up the stream of data passed in as arguments into
|
||||||
* meaningful Collator data.
|
* meaningful Collator data.
|
||||||
* @param rbc RuleBasedCollator to populate
|
* @param rbc RuleBasedCollator to populate
|
||||||
* @param UCAConst object to fill up with UCA constants if we are reading
|
* @param UCAConst object to fill up with UCA constants if we are reading
|
||||||
* the UCA collator, if not use a null
|
* the UCA collator, if not use a null
|
||||||
* @return UCAContractions array filled up with the UCA contractions if we
|
* @param leadByteConstants
|
||||||
* are reading the UCA collator
|
* @return UCAContractions array filled up with the UCA contractions if we
|
||||||
* @exception IOException thrown when there's a data error.
|
* are reading the UCA collator
|
||||||
*/
|
* @exception IOException thrown when there's a data error.
|
||||||
|
*/
|
||||||
private char[] readImp(RuleBasedCollator rbc,
|
private char[] readImp(RuleBasedCollator rbc,
|
||||||
RuleBasedCollator.UCAConstants UCAConst)
|
RuleBasedCollator.UCAConstants UCAConst,
|
||||||
throws IOException
|
RuleBasedCollator.LeadByteConstants leadByteConstants)
|
||||||
|
throws IOException
|
||||||
{
|
{
|
||||||
|
char ucaContractions[] = null; // return result
|
||||||
|
|
||||||
readHeader(rbc);
|
readHeader(rbc);
|
||||||
// header size has been checked by readHeader
|
// header size has been checked by readHeader
|
||||||
int readcount = m_headerSize_;
|
int readcount = m_headerSize_;
|
||||||
@ -328,24 +359,24 @@ final class CollatorReader
|
|||||||
readcount += (m_expansionSize_ << 2);
|
readcount += (m_expansionSize_ << 2);
|
||||||
if (m_contractionIndexSize_ > 0) {
|
if (m_contractionIndexSize_ > 0) {
|
||||||
m_contractionIndexSize_ >>= 1;
|
m_contractionIndexSize_ >>= 1;
|
||||||
rbc.m_contractionIndex_ = new char[m_contractionIndexSize_];
|
rbc.m_contractionIndex_ = new char[m_contractionIndexSize_];
|
||||||
for (int i = 0; i < m_contractionIndexSize_; i ++) {
|
for (int i = 0; i < m_contractionIndexSize_; i ++) {
|
||||||
rbc.m_contractionIndex_[i] = m_dataInputStream_.readChar();
|
rbc.m_contractionIndex_[i] = m_dataInputStream_.readChar();
|
||||||
}
|
}
|
||||||
readcount += (m_contractionIndexSize_ << 1);
|
readcount += (m_contractionIndexSize_ << 1);
|
||||||
m_contractionCESize_ >>= 2;
|
m_contractionCESize_ >>= 2;
|
||||||
rbc.m_contractionCE_ = new int[m_contractionCESize_];
|
rbc.m_contractionCE_ = new int[m_contractionCESize_];
|
||||||
for (int i = 0; i < m_contractionCESize_; i ++) {
|
for (int i = 0; i < m_contractionCESize_; i ++) {
|
||||||
rbc.m_contractionCE_[i] = m_dataInputStream_.readInt();
|
rbc.m_contractionCE_[i] = m_dataInputStream_.readInt();
|
||||||
}
|
}
|
||||||
readcount += (m_contractionCESize_ << 2);
|
readcount += (m_contractionCESize_ << 2);
|
||||||
}
|
}
|
||||||
rbc.m_trie_ = new IntTrie(m_dataInputStream_,
|
rbc.m_trie_ = new IntTrie(m_dataInputStream_,
|
||||||
RuleBasedCollator.DataManipulate.getInstance());
|
RuleBasedCollator.DataManipulate.getInstance());
|
||||||
if (!rbc.m_trie_.isLatin1Linear()) {
|
if (!rbc.m_trie_.isLatin1Linear()) {
|
||||||
throw new IOException("Data corrupted, "
|
throw new IOException("Data corrupted, "
|
||||||
+ "Collator Tries expected to have linear "
|
+ "Collator Tries expected to have linear "
|
||||||
+ "latin one data arrays");
|
+ "latin one data arrays");
|
||||||
}
|
}
|
||||||
readcount += rbc.m_trie_.getSerializedDataSize();
|
readcount += rbc.m_trie_.getSerializedDataSize();
|
||||||
m_expansionEndCESize_ >>= 2;
|
m_expansionEndCESize_ >>= 2;
|
||||||
@ -368,13 +399,16 @@ final class CollatorReader
|
|||||||
// we are reading the UCA
|
// we are reading the UCA
|
||||||
// unfortunately the UCA offset in any collator data is not 0 and
|
// unfortunately the UCA offset in any collator data is not 0 and
|
||||||
// only refers to the UCA data
|
// only refers to the UCA data
|
||||||
m_contractionEndSize_ -= m_UCAValuesSize_;
|
//m_contractionSize_ -= m_UCAValuesSize_;
|
||||||
|
m_contractionSize_ = m_UCAConstOffset_ - readcount;
|
||||||
|
} else {
|
||||||
|
m_contractionSize_ = m_size_ - readcount;
|
||||||
}
|
}
|
||||||
rbc.m_contractionEnd_ = new byte[m_contractionEndSize_];
|
rbc.m_contractionEnd_ = new byte[m_contractionSize_];
|
||||||
for (int i = 0; i < m_contractionEndSize_; i ++) {
|
for (int i = 0; i < m_contractionSize_; i ++) {
|
||||||
rbc.m_contractionEnd_[i] = m_dataInputStream_.readByte();
|
rbc.m_contractionEnd_[i] = m_dataInputStream_.readByte();
|
||||||
}
|
}
|
||||||
readcount += m_contractionEndSize_;
|
readcount += m_contractionSize_;
|
||||||
if (UCAConst != null) {
|
if (UCAConst != null) {
|
||||||
UCAConst.FIRST_TERTIARY_IGNORABLE_[0]
|
UCAConst.FIRST_TERTIARY_IGNORABLE_[0]
|
||||||
= m_dataInputStream_.readInt();
|
= m_dataInputStream_.readInt();
|
||||||
@ -383,22 +417,22 @@ final class CollatorReader
|
|||||||
= m_dataInputStream_.readInt();
|
= m_dataInputStream_.readInt();
|
||||||
readUCAConstcount += 4;
|
readUCAConstcount += 4;
|
||||||
UCAConst.LAST_TERTIARY_IGNORABLE_[0]
|
UCAConst.LAST_TERTIARY_IGNORABLE_[0]
|
||||||
= m_dataInputStream_.readInt();
|
= m_dataInputStream_.readInt();
|
||||||
readUCAConstcount += 4;
|
readUCAConstcount += 4;
|
||||||
UCAConst.LAST_TERTIARY_IGNORABLE_[1]
|
UCAConst.LAST_TERTIARY_IGNORABLE_[1]
|
||||||
= m_dataInputStream_.readInt();
|
= m_dataInputStream_.readInt();
|
||||||
readUCAConstcount += 4;
|
readUCAConstcount += 4;
|
||||||
UCAConst.FIRST_PRIMARY_IGNORABLE_[0]
|
UCAConst.FIRST_PRIMARY_IGNORABLE_[0]
|
||||||
= m_dataInputStream_.readInt();
|
= m_dataInputStream_.readInt();
|
||||||
readUCAConstcount += 4;
|
readUCAConstcount += 4;
|
||||||
UCAConst.FIRST_PRIMARY_IGNORABLE_[1]
|
UCAConst.FIRST_PRIMARY_IGNORABLE_[1]
|
||||||
= m_dataInputStream_.readInt();
|
= m_dataInputStream_.readInt();
|
||||||
readUCAConstcount += 4;
|
readUCAConstcount += 4;
|
||||||
UCAConst.FIRST_SECONDARY_IGNORABLE_[0]
|
UCAConst.FIRST_SECONDARY_IGNORABLE_[0]
|
||||||
= m_dataInputStream_.readInt();
|
= m_dataInputStream_.readInt();
|
||||||
readUCAConstcount += 4;
|
readUCAConstcount += 4;
|
||||||
UCAConst.FIRST_SECONDARY_IGNORABLE_[1]
|
UCAConst.FIRST_SECONDARY_IGNORABLE_[1]
|
||||||
= m_dataInputStream_.readInt();
|
= m_dataInputStream_.readInt();
|
||||||
readUCAConstcount += 4;
|
readUCAConstcount += 4;
|
||||||
UCAConst.LAST_SECONDARY_IGNORABLE_[0]
|
UCAConst.LAST_SECONDARY_IGNORABLE_[0]
|
||||||
= m_dataInputStream_.readInt();
|
= m_dataInputStream_.readInt();
|
||||||
@ -407,10 +441,10 @@ final class CollatorReader
|
|||||||
= m_dataInputStream_.readInt();
|
= m_dataInputStream_.readInt();
|
||||||
readUCAConstcount += 4;
|
readUCAConstcount += 4;
|
||||||
UCAConst.LAST_PRIMARY_IGNORABLE_[0]
|
UCAConst.LAST_PRIMARY_IGNORABLE_[0]
|
||||||
= m_dataInputStream_.readInt();
|
= m_dataInputStream_.readInt();
|
||||||
readUCAConstcount += 4;
|
readUCAConstcount += 4;
|
||||||
UCAConst.LAST_PRIMARY_IGNORABLE_[1]
|
UCAConst.LAST_PRIMARY_IGNORABLE_[1]
|
||||||
= m_dataInputStream_.readInt();
|
= m_dataInputStream_.readInt();
|
||||||
readUCAConstcount += 4;
|
readUCAConstcount += 4;
|
||||||
UCAConst.FIRST_VARIABLE_[0] = m_dataInputStream_.readInt();
|
UCAConst.FIRST_VARIABLE_[0] = m_dataInputStream_.readInt();
|
||||||
readUCAConstcount += 4;
|
readUCAConstcount += 4;
|
||||||
@ -462,27 +496,39 @@ final class CollatorReader
|
|||||||
readUCAConstcount += 4;
|
readUCAConstcount += 4;
|
||||||
UCAConst.PRIMARY_SPECIAL_MAX_ = m_dataInputStream_.readInt();
|
UCAConst.PRIMARY_SPECIAL_MAX_ = m_dataInputStream_.readInt();
|
||||||
readUCAConstcount += 4;
|
readUCAConstcount += 4;
|
||||||
int resultsize = (m_UCAValuesSize_ - readUCAConstcount) >> 1;
|
|
||||||
char result[] = new char[resultsize];
|
readcount += readUCAConstcount;
|
||||||
|
|
||||||
|
//int resultsize = m_UCAcontractionSize_ / 2;
|
||||||
|
int resultsize = (rbc.m_scriptToLeadBytes - readcount) / 2;
|
||||||
|
ucaContractions = new char[resultsize];
|
||||||
for (int i = 0; i < resultsize; i ++) {
|
for (int i = 0; i < resultsize; i ++) {
|
||||||
result[i] = m_dataInputStream_.readChar();
|
ucaContractions[i] = m_dataInputStream_.readChar();
|
||||||
}
|
}
|
||||||
readcount += m_UCAValuesSize_;
|
readcount += m_UCAcontractionSize_;
|
||||||
if (readcount != m_size_) {
|
|
||||||
///CLOVER:OFF
|
// if (readcount != m_size_) {
|
||||||
throw new IOException("Internal Error: Data file size error");
|
// ///CLOVER:OFF
|
||||||
///CLOVER:ON
|
// throw new IOException("Internal Error: Data file size error");
|
||||||
}
|
// ///CLOVER:ON
|
||||||
return result;
|
// }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (leadByteConstants != null)
|
||||||
|
{
|
||||||
|
readcount += m_dataInputStream_.skip(rbc.m_scriptToLeadBytes - readcount);
|
||||||
|
leadByteConstants.read(m_dataInputStream_);
|
||||||
|
readcount += leadByteConstants.getSerializedDataSize();
|
||||||
|
}
|
||||||
|
|
||||||
if (readcount != m_size_) {
|
if (readcount != m_size_) {
|
||||||
///CLOVER:OFF
|
///CLOVER:OFF
|
||||||
throw new IOException("Internal Error: Data file size error");
|
throw new IOException("Internal Error: Data file size error");
|
||||||
///CLOVER:ON
|
///CLOVER:ON
|
||||||
}
|
}
|
||||||
return null;
|
return ucaContractions;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Reads in the inverse uca data
|
* Reads in the inverse uca data
|
||||||
* @param input input stream with the inverse uca data
|
* @param input input stream with the inverse uca data
|
||||||
@ -491,22 +537,22 @@ final class CollatorReader
|
|||||||
* inverse uca
|
* inverse uca
|
||||||
*/
|
*/
|
||||||
private static CollationParsedRuleBuilder.InverseUCA readInverseUCA(
|
private static CollationParsedRuleBuilder.InverseUCA readInverseUCA(
|
||||||
InputStream inputStream)
|
InputStream inputStream)
|
||||||
throws IOException
|
throws IOException
|
||||||
{
|
{
|
||||||
byte[] UnicodeVersion = ICUBinary.readHeader(inputStream, INVERSE_UCA_DATA_FORMAT_ID_,
|
byte[] UnicodeVersion = ICUBinary.readHeader(inputStream, INVERSE_UCA_DATA_FORMAT_ID_,
|
||||||
INVERSE_UCA_AUTHENTICATE_);
|
INVERSE_UCA_AUTHENTICATE_);
|
||||||
|
|
||||||
// weiv: check that we have the correct Unicode version in
|
// weiv: check that we have the correct Unicode version in
|
||||||
// binary files
|
// binary files
|
||||||
VersionInfo UCDVersion = UCharacter.getUnicodeVersion();
|
VersionInfo UCDVersion = UCharacter.getUnicodeVersion();
|
||||||
if(UnicodeVersion[0] != UCDVersion.getMajor()
|
if(UnicodeVersion[0] != UCDVersion.getMajor()
|
||||||
|| UnicodeVersion[1] != UCDVersion.getMinor()) {
|
|| UnicodeVersion[1] != UCDVersion.getMinor()) {
|
||||||
throw new IOException(WRONG_UNICODE_VERSION_ERROR_);
|
throw new IOException(WRONG_UNICODE_VERSION_ERROR_);
|
||||||
}
|
}
|
||||||
|
|
||||||
CollationParsedRuleBuilder.InverseUCA result =
|
CollationParsedRuleBuilder.InverseUCA result =
|
||||||
new CollationParsedRuleBuilder.InverseUCA();
|
new CollationParsedRuleBuilder.InverseUCA();
|
||||||
DataInputStream input = new DataInputStream(inputStream);
|
DataInputStream input = new DataInputStream(inputStream);
|
||||||
input.readInt(); // bytesize
|
input.readInt(); // bytesize
|
||||||
int tablesize = input.readInt(); // in int size
|
int tablesize = input.readInt(); // in int size
|
||||||
@ -515,11 +561,11 @@ final class CollatorReader
|
|||||||
input.readInt(); // conts in bytes
|
input.readInt(); // conts in bytes
|
||||||
result.m_UCA_version_ = readVersion(input);
|
result.m_UCA_version_ = readVersion(input);
|
||||||
input.skipBytes(8); // skip padding
|
input.skipBytes(8); // skip padding
|
||||||
|
|
||||||
int size = tablesize * 3; // one column for each strength
|
int size = tablesize * 3; // one column for each strength
|
||||||
result.m_table_ = new int[size];
|
result.m_table_ = new int[size];
|
||||||
result.m_continuations_ = new char[contsize];
|
result.m_continuations_ = new char[contsize];
|
||||||
|
|
||||||
for (int i = 0; i < size; i ++) {
|
for (int i = 0; i < size; i ++) {
|
||||||
result.m_table_[i] = input.readInt();
|
result.m_table_[i] = input.readInt();
|
||||||
}
|
}
|
||||||
@ -529,7 +575,7 @@ final class CollatorReader
|
|||||||
input.close();
|
input.close();
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Reads four bytes from the input and returns a VersionInfo
|
* Reads four bytes from the input and returns a VersionInfo
|
||||||
* object. Use it to read different collator versions.
|
* object. Use it to read different collator versions.
|
||||||
@ -539,143 +585,147 @@ final class CollatorReader
|
|||||||
* @throws IOException thrown when error occurs while reading
|
* @throws IOException thrown when error occurs while reading
|
||||||
* version bytes
|
* version bytes
|
||||||
*/
|
*/
|
||||||
|
|
||||||
protected static VersionInfo readVersion(DataInputStream input)
|
protected static VersionInfo readVersion(DataInputStream input)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
byte[] version = new byte[4];
|
byte[] version = new byte[4];
|
||||||
version[0] = input.readByte();
|
version[0] = input.readByte();
|
||||||
version[1] = input.readByte();
|
version[1] = input.readByte();
|
||||||
version[2] = input.readByte();
|
version[2] = input.readByte();
|
||||||
version[3] = input.readByte();
|
version[3] = input.readByte();
|
||||||
|
|
||||||
VersionInfo result =
|
VersionInfo result =
|
||||||
VersionInfo.getInstance(
|
VersionInfo.getInstance(
|
||||||
(int)version[0], (int)version[1],
|
(int)version[0], (int)version[1],
|
||||||
(int)version[2], (int)version[3]);
|
(int)version[2], (int)version[3]);
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
// private inner class -----------------------------------------------
|
// private inner class -----------------------------------------------
|
||||||
|
|
||||||
// private variables -------------------------------------------------
|
// private variables -------------------------------------------------
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Authenticate uca data format version
|
* Authenticate uca data format version
|
||||||
*/
|
*/
|
||||||
private static final ICUBinary.Authenticate UCA_AUTHENTICATE_
|
private static final ICUBinary.Authenticate UCA_AUTHENTICATE_
|
||||||
= new ICUBinary.Authenticate() {
|
= new ICUBinary.Authenticate() {
|
||||||
public boolean isDataVersionAcceptable(byte version[])
|
public boolean isDataVersionAcceptable(byte version[])
|
||||||
{
|
{
|
||||||
return version[0] == DATA_FORMAT_VERSION_[0]
|
return version[0] == DATA_FORMAT_VERSION_[0]
|
||||||
&& version[1] >= DATA_FORMAT_VERSION_[1];
|
&& version[1] >= DATA_FORMAT_VERSION_[1];
|
||||||
// Too harsh
|
// Too harsh
|
||||||
//&& version[1] == DATA_FORMAT_VERSION_[1]
|
//&& version[1] == DATA_FORMAT_VERSION_[1]
|
||||||
//&& version[2] == DATA_FORMAT_VERSION_[2]
|
//&& version[2] == DATA_FORMAT_VERSION_[2]
|
||||||
//&& version[3] == DATA_FORMAT_VERSION_[3];
|
//&& version[3] == DATA_FORMAT_VERSION_[3];
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Authenticate uca data format version
|
* Authenticate uca data format version
|
||||||
*/
|
*/
|
||||||
private static final ICUBinary.Authenticate INVERSE_UCA_AUTHENTICATE_
|
private static final ICUBinary.Authenticate INVERSE_UCA_AUTHENTICATE_
|
||||||
= new ICUBinary.Authenticate() {
|
= new ICUBinary.Authenticate() {
|
||||||
public boolean isDataVersionAcceptable(byte version[])
|
public boolean isDataVersionAcceptable(byte version[])
|
||||||
{
|
{
|
||||||
return version[0]
|
return version[0]
|
||||||
== INVERSE_UCA_DATA_FORMAT_VERSION_[0]
|
== INVERSE_UCA_DATA_FORMAT_VERSION_[0]
|
||||||
&& version[1]
|
&& version[1]
|
||||||
>= INVERSE_UCA_DATA_FORMAT_VERSION_[1];
|
>= INVERSE_UCA_DATA_FORMAT_VERSION_[1];
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
|
||||||
* Data input stream for uca.icu
|
|
||||||
*/
|
|
||||||
private DataInputStream m_dataInputStream_;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* File format version and id that this class understands.
|
|
||||||
* No guarantees are made if a older version is used
|
|
||||||
*/
|
|
||||||
private static final byte DATA_FORMAT_VERSION_[] =
|
|
||||||
{(byte)0x2, (byte)0x2, (byte)0x0, (byte)0x0};
|
|
||||||
private static final byte DATA_FORMAT_ID_[] = {(byte)0x55, (byte)0x43,
|
|
||||||
(byte)0x6f, (byte)0x6c};
|
|
||||||
/**
|
|
||||||
* Inverse UCA file format version and id that this class understands.
|
|
||||||
* No guarantees are made if a older version is used
|
|
||||||
*/
|
|
||||||
private static final byte INVERSE_UCA_DATA_FORMAT_VERSION_[] =
|
|
||||||
{(byte)0x2, (byte)0x1, (byte)0x0, (byte)0x0};
|
|
||||||
private static final byte INVERSE_UCA_DATA_FORMAT_ID_[] = {(byte)0x49,
|
|
||||||
(byte)0x6e,
|
|
||||||
(byte)0x76,
|
|
||||||
(byte)0x43};
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Wrong unicode version error string
|
|
||||||
*/
|
|
||||||
private static final String WRONG_UNICODE_VERSION_ERROR_ =
|
|
||||||
"Unicode version in binary image is not compatible with the current Unicode version";
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Size of expansion table in bytes
|
* Data input stream for uca.icu
|
||||||
*/
|
*/
|
||||||
private int m_expansionSize_;
|
private DataInputStream m_dataInputStream_;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Size of contraction index table in bytes
|
* File format version and id that this class understands.
|
||||||
|
* No guarantees are made if a older version is used
|
||||||
*/
|
*/
|
||||||
private int m_contractionIndexSize_;
|
private static final byte DATA_FORMAT_VERSION_[] =
|
||||||
|
{(byte)0x3, (byte)0x0, (byte)0x0, (byte)0x0};
|
||||||
|
private static final byte DATA_FORMAT_ID_[] = {(byte)0x55, (byte)0x43,
|
||||||
|
(byte)0x6f, (byte)0x6c};
|
||||||
/**
|
/**
|
||||||
* Size of contraction table in bytes
|
* Inverse UCA file format version and id that this class understands.
|
||||||
|
* No guarantees are made if a older version is used
|
||||||
*/
|
*/
|
||||||
private int m_contractionCESize_;
|
private static final byte INVERSE_UCA_DATA_FORMAT_VERSION_[] =
|
||||||
/*
|
{(byte)0x2, (byte)0x1, (byte)0x0, (byte)0x0};
|
||||||
* Size of the Trie in bytes
|
private static final byte INVERSE_UCA_DATA_FORMAT_ID_[] = {(byte)0x49,
|
||||||
*/
|
(byte)0x6e,
|
||||||
//private int m_trieSize_;
|
(byte)0x76,
|
||||||
|
(byte)0x43};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Size of the table that contains information about collation elements
|
* Wrong unicode version error string
|
||||||
* that end with an expansion
|
|
||||||
*/
|
*/
|
||||||
private int m_expansionEndCESize_;
|
private static final String WRONG_UNICODE_VERSION_ERROR_ =
|
||||||
/**
|
"Unicode version in binary image is not compatible with the current Unicode version";
|
||||||
* Size of the table that contains information about the maximum size of
|
|
||||||
* collation elements that end with a particular expansion CE corresponding
|
/**
|
||||||
* to the ones in expansionEndCE
|
* Size of expansion table in bytes
|
||||||
*/
|
*/
|
||||||
private int m_expansionEndCEMaxSizeSize_;
|
private int m_expansionSize_;
|
||||||
/**
|
/**
|
||||||
* Size of the option table that contains information about the collation
|
* Size of contraction index table in bytes
|
||||||
* options
|
*/
|
||||||
*/
|
private int m_contractionIndexSize_;
|
||||||
private int m_optionSize_;
|
/**
|
||||||
/**
|
* Size of contraction table in bytes
|
||||||
* Size of the whole data file minusing the ICU header
|
*/
|
||||||
*/
|
private int m_contractionCESize_;
|
||||||
private int m_size_;
|
/*
|
||||||
/**
|
* Size of the Trie in bytes
|
||||||
* Size of the collation data header
|
*/
|
||||||
*/
|
//private int m_trieSize_;
|
||||||
private int m_headerSize_;
|
/**
|
||||||
/**
|
* Size of the table that contains information about collation elements
|
||||||
* Size of the table that contains information about the "Unsafe"
|
* that end with an expansion
|
||||||
* codepoints
|
*/
|
||||||
*/
|
private int m_expansionEndCESize_;
|
||||||
private int m_unsafeSize_;
|
/**
|
||||||
/**
|
* Size of the table that contains information about the maximum size of
|
||||||
* Size of the table that contains information about codepoints that ends
|
* collation elements that end with a particular expansion CE corresponding
|
||||||
* with a contraction
|
* to the ones in expansionEndCE
|
||||||
*/
|
*/
|
||||||
private int m_contractionEndSize_;
|
private int m_expansionEndCEMaxSizeSize_;
|
||||||
/**
|
/**
|
||||||
* Size of the table that contains UCA contraction information
|
* Size of the option table that contains information about the collation
|
||||||
*/
|
* options
|
||||||
private int m_UCAValuesSize_;
|
*/
|
||||||
|
private int m_optionSize_;
|
||||||
// private methods ---------------------------------------------------
|
/**
|
||||||
|
* Size of the whole data file minusing the ICU header
|
||||||
|
*/
|
||||||
|
private int m_size_;
|
||||||
|
/**
|
||||||
|
* Size of the collation data header
|
||||||
|
*/
|
||||||
|
private int m_headerSize_;
|
||||||
|
/**
|
||||||
|
* Size of the table that contains information about the "Unsafe"
|
||||||
|
* codepoints
|
||||||
|
*/
|
||||||
|
private int m_unsafeSize_;
|
||||||
|
/**
|
||||||
|
* Size in bytes of the table that contains information about codepoints that ends
|
||||||
|
* with a contraction
|
||||||
|
*/
|
||||||
|
private int m_contractionSize_;
|
||||||
|
/**
|
||||||
|
* Size of the table that contains UCA contraction information in bytes
|
||||||
|
*/
|
||||||
|
private int m_UCAcontractionSize_;
|
||||||
|
/**
|
||||||
|
* Offset of the UCA Const
|
||||||
|
*/
|
||||||
|
private int m_UCAConstOffset_;
|
||||||
|
|
||||||
|
// private methods ---------------------------------------------------
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user