ICU-3984 collation reordering checkpoint - still needs more tests and name change from script to reorder
X-SVN-Rev: 29018
This commit is contained in:
parent
bae3e7a74a
commit
0e27c3ea13
@ -11,8 +11,10 @@ import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import com.ibm.icu.impl.ICUResourceBundle;
|
||||
import com.ibm.icu.text.Collator.ReorderCodes;
|
||||
import com.ibm.icu.util.UResourceBundle;
|
||||
import com.ibm.icu.util.ULocale;
|
||||
import com.ibm.icu.impl.UCharacterProperty;
|
||||
@ -2311,29 +2313,51 @@ final class CollationRuleParser
|
||||
return rules;
|
||||
}
|
||||
|
||||
private void parseScriptReorder() throws ParseException{
|
||||
/* This is the data that is used for non-script reordering codes. These _must_ be kept
|
||||
* in order that they are to be applied as defaults and in synch with the Collator.ReorderCodes statics.
|
||||
*/
|
||||
static final String ReorderingTokensArray[] = {
|
||||
"SPACE",
|
||||
"PUNCT",
|
||||
"SYMBOL",
|
||||
"CURRENCY",
|
||||
"DIGIT",
|
||||
};
|
||||
|
||||
int findReorderingEntry(String name) {
|
||||
for (int tokenIndex = 0; tokenIndex < ReorderingTokensArray.length; tokenIndex++) {
|
||||
if (name.equalsIgnoreCase(ReorderingTokensArray[tokenIndex])) {
|
||||
return tokenIndex + ReorderCodes.FIRST;
|
||||
}
|
||||
}
|
||||
return UScript.INVALID_CODE;
|
||||
}
|
||||
|
||||
private void parseScriptReorder() throws ParseException {
|
||||
ArrayList<Integer> tempOrder = new ArrayList<Integer>();
|
||||
int end = m_rules_.indexOf(']', m_current_);
|
||||
while(m_current_ < end){
|
||||
// Ensure that the following token is 4 characters long
|
||||
if ((end != m_current_+4) &&
|
||||
(m_rules_.charAt(m_current_+4) != ' ')) {
|
||||
throw new ParseException(m_rules_, m_current_);
|
||||
}
|
||||
int[] script = UScript.getCode(m_rules_.substring(m_current_, m_current_+4));
|
||||
if (script.length > 0) {
|
||||
tempOrder.add(script[0]);
|
||||
} else {
|
||||
throw new ParseException(m_rules_, m_current_);
|
||||
}
|
||||
m_current_+= 4;
|
||||
while (m_current_ < end && UCharacter.isWhitespace(m_rules_.charAt(m_current_)))
|
||||
{ // eat whitespace
|
||||
m_current_++;
|
||||
}
|
||||
}
|
||||
int end = m_rules_.indexOf(']', m_current_);
|
||||
if (end == -1) {
|
||||
return;
|
||||
}
|
||||
String tokenString = m_rules_.substring(m_current_, end);
|
||||
String[] tokens = tokenString.split("\\s+", 0);
|
||||
String token;
|
||||
for (int tokenIndex = 0; tokenIndex < tokens.length; tokenIndex++) {
|
||||
token = tokens[tokenIndex];
|
||||
int reorderCode = findReorderingEntry(token);
|
||||
if (reorderCode != UScript.INVALID_CODE) {
|
||||
tempOrder.add(reorderCode);
|
||||
} else {
|
||||
int[] reorderCodes = UScript.getCode(token);
|
||||
if (reorderCodes.length > 0) {
|
||||
tempOrder.add(reorderCodes[0]);
|
||||
} else {
|
||||
throw new ParseException(m_rules_, tokenIndex);
|
||||
}
|
||||
}
|
||||
}
|
||||
m_options_.m_scriptOrder_ = new int[tempOrder.size()];
|
||||
for(int i = 0; i < tempOrder.size(); i++){
|
||||
for(int i = 0; i < tempOrder.size(); i++) {
|
||||
m_options_.m_scriptOrder_[i] = tempOrder.get(i);
|
||||
}
|
||||
}
|
||||
|
@ -225,9 +225,7 @@ public abstract class Collator implements Comparator<Object>, Cloneable
|
||||
*/
|
||||
public final static int CANONICAL_DECOMPOSITION = 17;
|
||||
|
||||
public final static class CollationReorderCodes {
|
||||
private CollationReorderCodes() {}
|
||||
|
||||
public static interface ReorderCodes {
|
||||
public final static int SPACE = 0x1000;
|
||||
public final static int FIRST = SPACE;
|
||||
public final static int PUNCTUATION = 0x1001;
|
||||
|
@ -23,22 +23,25 @@ import com.ibm.icu.text.RuleBasedCollator.UCAConstants;
|
||||
import com.ibm.icu.util.VersionInfo;
|
||||
|
||||
/**
|
||||
* <p>Internal reader class for ICU data file uca.icu containing
|
||||
* Unicode Collation Algorithm data.</p>
|
||||
* <p>This class simply reads uca.icu, authenticates that it is a valid
|
||||
* ICU data file and split its contents up into blocks of data for use in
|
||||
* <a href=Collator.html>com.ibm.icu.text.Collator</a>.
|
||||
* </p>
|
||||
* <p>uca.icu which is in big-endian format is jared together with this
|
||||
* package.</p>
|
||||
* <p>
|
||||
* Internal reader class for ICU data file uca.icu containing Unicode Collation Algorithm data.
|
||||
* </p>
|
||||
* <p>
|
||||
* This class simply reads uca.icu, authenticates that it is a valid ICU data file and split its contents up into blocks
|
||||
* of data for use in <a href=Collator.html>com.ibm.icu.text.Collator</a>.
|
||||
* </p>
|
||||
* <p>
|
||||
* uca.icu which is in big-endian format is jared together with this package.
|
||||
* </p>
|
||||
*
|
||||
* @author Syn Wee Quek
|
||||
* @since release 2.2, April 18 2002
|
||||
*/
|
||||
|
||||
final class CollatorReader
|
||||
{
|
||||
static char[] read(RuleBasedCollator rbc, UCAConstants ucac, LeadByteConstants leadByteConstants) throws IOException {
|
||||
InputStream i = ICUData.getRequiredStream(ICUResourceBundle.ICU_BUNDLE+"/coll/ucadata.icu");
|
||||
final class CollatorReader {
|
||||
static char[] read(RuleBasedCollator rbc, UCAConstants ucac, LeadByteConstants leadByteConstants)
|
||||
throws IOException {
|
||||
InputStream i = ICUData.getRequiredStream(ICUResourceBundle.ICU_BUNDLE + "/coll/ucadata.icu");
|
||||
BufferedInputStream b = new BufferedInputStream(i, 90000);
|
||||
CollatorReader reader = new CollatorReader(b);
|
||||
char[] result = reader.readImp(rbc, ucac, leadByteConstants);
|
||||
@ -54,6 +57,7 @@ final class CollatorReader
|
||||
}
|
||||
return buf.get() & 0xff;
|
||||
}
|
||||
|
||||
public int read(byte[] bytes, int off, int len) throws IOException {
|
||||
len = Math.min(len, buf.remaining());
|
||||
buf.get(bytes, off, len);
|
||||
@ -81,60 +85,62 @@ final class CollatorReader
|
||||
|
||||
static InverseUCA getInverseUCA() throws IOException {
|
||||
InverseUCA result = null;
|
||||
InputStream i = ICUData.getRequiredStream(ICUResourceBundle.ICU_BUNDLE+"/coll/invuca.icu");
|
||||
// try {
|
||||
// String invdat = "/com/ibm/icu/impl/data/invuca.icu";
|
||||
// InputStream i = CollationParsedRuleBuilder.class.getResourceAsStream(invdat);
|
||||
InputStream i = ICUData.getRequiredStream(ICUResourceBundle.ICU_BUNDLE + "/coll/invuca.icu");
|
||||
// try {
|
||||
// String invdat = "/com/ibm/icu/impl/data/invuca.icu";
|
||||
// InputStream i = CollationParsedRuleBuilder.class.getResourceAsStream(invdat);
|
||||
BufferedInputStream b = new BufferedInputStream(i, 110000);
|
||||
result = CollatorReader.readInverseUCA(b);
|
||||
b.close();
|
||||
i.close();
|
||||
return result;
|
||||
// } catch (Exception e) {
|
||||
// throw new RuntimeException(e.getMessage());
|
||||
// }
|
||||
// } catch (Exception e) {
|
||||
// throw new RuntimeException(e.getMessage());
|
||||
// }
|
||||
}
|
||||
|
||||
// protected constructor ---------------------------------------------
|
||||
|
||||
/**
|
||||
* <p>Protected constructor.</p>
|
||||
* @param inputStream ICU collator file input stream
|
||||
* @exception IOException throw if data file fails authentication
|
||||
* <p>
|
||||
* Protected constructor.
|
||||
* </p>
|
||||
*
|
||||
* @param inputStream
|
||||
* ICU collator file input stream
|
||||
* @exception IOException
|
||||
* throw if data file fails authentication
|
||||
*/
|
||||
private CollatorReader(InputStream inputStream) throws IOException
|
||||
{
|
||||
private CollatorReader(InputStream inputStream) throws IOException {
|
||||
this(inputStream, true);
|
||||
/*
|
||||
byte[] UnicodeVersion = ICUBinary.readHeader(inputStream, DATA_FORMAT_ID_, UCA_AUTHENTICATE_);
|
||||
// weiv: check that we have the correct Unicode version in
|
||||
// binary files
|
||||
VersionInfo UCDVersion = UCharacter.getUnicodeVersion();
|
||||
if(UnicodeVersion[0] != UCDVersion.getMajor()
|
||||
|| UnicodeVersion[1] != UCDVersion.getMinor()) {
|
||||
throw new IOException(WRONG_UNICODE_VERSION_ERROR_);
|
||||
}
|
||||
m_dataInputStream_ = new DataInputStream(inputStream);
|
||||
* byte[] UnicodeVersion = ICUBinary.readHeader(inputStream, DATA_FORMAT_ID_, UCA_AUTHENTICATE_); // weiv: check
|
||||
* that we have the correct Unicode version in // binary files VersionInfo UCDVersion =
|
||||
* UCharacter.getUnicodeVersion(); if(UnicodeVersion[0] != UCDVersion.getMajor() || UnicodeVersion[1] !=
|
||||
* UCDVersion.getMinor()) { throw new IOException(WRONG_UNICODE_VERSION_ERROR_); } m_dataInputStream_ = new
|
||||
* DataInputStream(inputStream);
|
||||
*/
|
||||
}
|
||||
|
||||
/**
|
||||
* <p>Protected constructor.</p>
|
||||
* @param inputStream ICU uprops.icu file input stream
|
||||
* @param readICUHeader flag to indicate if the ICU header has to be read
|
||||
* @exception IOException throw if data file fails authentication
|
||||
* <p>
|
||||
* Protected constructor.
|
||||
* </p>
|
||||
*
|
||||
* @param inputStream
|
||||
* ICU uprops.icu file input stream
|
||||
* @param readICUHeader
|
||||
* flag to indicate if the ICU header has to be read
|
||||
* @exception IOException
|
||||
* throw if data file fails authentication
|
||||
*/
|
||||
private CollatorReader(InputStream inputStream, boolean readICUHeader)
|
||||
throws IOException
|
||||
{
|
||||
private CollatorReader(InputStream inputStream, boolean readICUHeader) throws IOException {
|
||||
if (readICUHeader) {
|
||||
byte[] UnicodeVersion = ICUBinary.readHeader(inputStream, DATA_FORMAT_ID_,
|
||||
UCA_AUTHENTICATE_);
|
||||
// weiv: check that we have the correct Unicode version in
|
||||
byte[] UnicodeVersion = ICUBinary.readHeader(inputStream, DATA_FORMAT_ID_, UCA_AUTHENTICATE_);
|
||||
// weiv: check that we have the correct Unicode version in
|
||||
// binary files
|
||||
VersionInfo UCDVersion = UCharacter.getUnicodeVersion();
|
||||
if(UnicodeVersion[0] != UCDVersion.getMajor()
|
||||
|| UnicodeVersion[1] != UCDVersion.getMinor()) {
|
||||
if (UnicodeVersion[0] != UCDVersion.getMajor() || UnicodeVersion[1] != UCDVersion.getMinor()) {
|
||||
throw new IOException(WRONG_UNICODE_VERSION_ERROR_);
|
||||
}
|
||||
}
|
||||
@ -144,13 +150,14 @@ final class CollatorReader
|
||||
// protected methods -------------------------------------------------
|
||||
|
||||
/**
|
||||
* Read and break up the header stream of data passed in as arguments into
|
||||
* meaningful Collator data.
|
||||
* @param rbc RuleBasedCollator to populate with header information
|
||||
* @exception IOException thrown when there's a data error.
|
||||
* Read and break up the header stream of data passed in as arguments into meaningful Collator data.
|
||||
*
|
||||
* @param rbc
|
||||
* RuleBasedCollator to populate with header information
|
||||
* @exception IOException
|
||||
* thrown when there's a data error.
|
||||
*/
|
||||
private void readHeader(RuleBasedCollator rbc) throws IOException
|
||||
{
|
||||
private void readHeader(RuleBasedCollator rbc) throws IOException {
|
||||
m_size_ = m_dataInputStream_.readInt();
|
||||
// all the offsets are in bytes
|
||||
// to get the address add to the header address and cast properly
|
||||
@ -163,7 +170,7 @@ final class CollatorReader
|
||||
readcount += 4;
|
||||
// this one is needed only for UCA, to copy the appropriate
|
||||
// contractions
|
||||
int contractionUCACombos = m_dataInputStream_.readInt();
|
||||
/*int contractionUCACombos =*/ m_dataInputStream_.readInt();
|
||||
readcount += 4;
|
||||
// reserved for future use
|
||||
m_dataInputStream_.skipBytes(4);
|
||||
@ -191,7 +198,7 @@ final class CollatorReader
|
||||
int expansionEndCEMaxSize = m_dataInputStream_.readInt();
|
||||
readcount += 4;
|
||||
// size of endExpansionCE int expansionEndCESize
|
||||
/*int endExpansionCECount =*/ m_dataInputStream_.readInt();
|
||||
/* int endExpansionCECount = */m_dataInputStream_.readInt();
|
||||
readcount += 4;
|
||||
// hash table of unsafe code points
|
||||
int unsafe = m_dataInputStream_.readInt();
|
||||
@ -216,7 +223,7 @@ final class CollatorReader
|
||||
readcount += 4;
|
||||
rbc.m_UCD_version_ = readVersion(m_dataInputStream_);
|
||||
readcount += 4;
|
||||
VersionInfo formatVersion = readVersion(m_dataInputStream_);
|
||||
/*VersionInfo formatVersion =*/ readVersion(m_dataInputStream_);
|
||||
readcount += 4;
|
||||
rbc.m_scriptToLeadBytes = m_dataInputStream_.readInt();
|
||||
readcount += 4;
|
||||
@ -230,9 +237,9 @@ final class CollatorReader
|
||||
m_dataInputStream_.skipBytes(44); // for future use
|
||||
readcount += 44;
|
||||
if (m_headerSize_ < readcount) {
|
||||
///CLOVER:OFF
|
||||
// /CLOVER:OFF
|
||||
throw new IOException("Internal Error: Header size error");
|
||||
///CLOVER:ON
|
||||
// /CLOVER:ON
|
||||
}
|
||||
m_dataInputStream_.skipBytes(m_headerSize_ - readcount);
|
||||
|
||||
@ -244,18 +251,18 @@ final class CollatorReader
|
||||
m_expansionSize_ = rbc.m_contractionOffset_ - rbc.m_expansionOffset_;
|
||||
m_contractionIndexSize_ = contractionCE - rbc.m_contractionOffset_;
|
||||
m_contractionCESize_ = mapping - contractionCE;
|
||||
//m_trieSize_ = expansionEndCE - mapping;
|
||||
// m_trieSize_ = expansionEndCE - mapping;
|
||||
m_expansionEndCESize_ = expansionEndCEMaxSize - expansionEndCE;
|
||||
m_expansionEndCEMaxSizeSize_ = unsafe - expansionEndCEMaxSize;
|
||||
m_unsafeSize_ = contractionEnd - unsafe;
|
||||
//m_UCAValuesSize_ = m_size_ - UCAConst; // UCA value, will be handled later
|
||||
// m_UCAValuesSize_ = m_size_ - UCAConst; // UCA value, will be handled later
|
||||
m_UCAcontractionSize_ = contractionUCACombosSize * contractionUCACombosWidth * 2;
|
||||
|
||||
// treat it as normal collator first
|
||||
// for normal collator there is no UCA contraction
|
||||
// contractions (UChar[contractionSize] + CE[contractionSize])
|
||||
int old_contractionSize_ = m_size_ - contractionEnd;
|
||||
// m_contractionSize_ = contractionSize * 2 + contractionSize * 4;
|
||||
//int old_contractionSize_ = m_size_ - contractionEnd;
|
||||
// m_contractionSize_ = contractionSize * 2 + contractionSize * 4;
|
||||
m_contractionSize_ = contractionSize * 2 + contractionSize * 4;
|
||||
|
||||
rbc.m_contractionOffset_ >>= 1; // casting to ints
|
||||
@ -263,126 +270,114 @@ final class CollatorReader
|
||||
}
|
||||
|
||||
/**
|
||||
* Read and break up the collation options passed in the stream of data and
|
||||
* update the argument Collator with the results
|
||||
* Read and break up the collation options passed in the stream of data and update the argument Collator with the
|
||||
* results
|
||||
*
|
||||
* @param rbc
|
||||
* RuleBasedCollator to populate
|
||||
* @exception IOException
|
||||
* thrown when there's a data error.
|
||||
*/
|
||||
private void readOptions(RuleBasedCollator rbc) throws IOException
|
||||
{
|
||||
private void readOptions(RuleBasedCollator rbc) throws IOException {
|
||||
int readcount = 0;
|
||||
rbc.m_defaultVariableTopValue_ = m_dataInputStream_.readInt();
|
||||
readcount += 4;
|
||||
rbc.m_defaultIsFrenchCollation_ = (m_dataInputStream_.readInt()
|
||||
== RuleBasedCollator.AttributeValue.ON_);
|
||||
rbc.m_defaultIsFrenchCollation_ = (m_dataInputStream_.readInt() == RuleBasedCollator.AttributeValue.ON_);
|
||||
readcount += 4;
|
||||
rbc.m_defaultIsAlternateHandlingShifted_
|
||||
= (m_dataInputStream_.readInt() ==
|
||||
RuleBasedCollator.AttributeValue.SHIFTED_);
|
||||
rbc.m_defaultIsAlternateHandlingShifted_ = (m_dataInputStream_.readInt() == RuleBasedCollator.AttributeValue.SHIFTED_);
|
||||
readcount += 4;
|
||||
rbc.m_defaultCaseFirst_ = m_dataInputStream_.readInt();
|
||||
readcount += 4;
|
||||
// rbc.m_defaultIsCaseLevel_ = (m_dataInputStream_.readInt()
|
||||
// == RuleBasedCollator.AttributeValue.ON_);
|
||||
// rbc.m_defaultIsCaseLevel_ = (m_dataInputStream_.readInt()
|
||||
// == RuleBasedCollator.AttributeValue.ON_);
|
||||
int defaultIsCaseLevel = m_dataInputStream_.readInt();
|
||||
rbc.m_defaultIsCaseLevel_ = (defaultIsCaseLevel
|
||||
== RuleBasedCollator.AttributeValue.ON_);
|
||||
rbc.m_defaultIsCaseLevel_ = (defaultIsCaseLevel == RuleBasedCollator.AttributeValue.ON_);
|
||||
readcount += 4;
|
||||
int value = m_dataInputStream_.readInt();
|
||||
readcount += 4;
|
||||
if (value == RuleBasedCollator.AttributeValue.ON_) {
|
||||
value = Collator.CANONICAL_DECOMPOSITION;
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
value = Collator.NO_DECOMPOSITION;
|
||||
}
|
||||
rbc.m_defaultDecomposition_ = value;
|
||||
rbc.m_defaultStrength_ = m_dataInputStream_.readInt();
|
||||
readcount += 4;
|
||||
rbc.m_defaultIsHiragana4_ = (m_dataInputStream_.readInt()
|
||||
== RuleBasedCollator.AttributeValue.ON_);
|
||||
rbc.m_defaultIsHiragana4_ = (m_dataInputStream_.readInt() == RuleBasedCollator.AttributeValue.ON_);
|
||||
readcount += 4;
|
||||
rbc.m_defaultIsNumericCollation_ = (m_dataInputStream_.readInt()
|
||||
== RuleBasedCollator.AttributeValue.ON_);
|
||||
rbc.m_defaultIsNumericCollation_ = (m_dataInputStream_.readInt() == RuleBasedCollator.AttributeValue.ON_);
|
||||
readcount += 4;
|
||||
m_dataInputStream_.skip(60); // reserved for future use
|
||||
readcount += 60;
|
||||
m_dataInputStream_.skipBytes(m_optionSize_ - readcount);
|
||||
if (m_optionSize_ < readcount) {
|
||||
///CLOVER:OFF
|
||||
// /CLOVER:OFF
|
||||
throw new IOException("Internal Error: Option size error");
|
||||
///CLOVER:ON
|
||||
// /CLOVER:ON
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Read and break up the stream of data passed in as arguments into
|
||||
* meaningful Collator data.
|
||||
* @param rbc RuleBasedCollator to populate
|
||||
* @param UCAConst object to fill up with UCA constants if we are reading
|
||||
* the UCA collator, if not use a null
|
||||
* @param leadByteConstants
|
||||
* @return UCAContractions array filled up with the UCA contractions if we
|
||||
* are reading the UCA collator
|
||||
* @exception IOException thrown when there's a data error.
|
||||
* Read and break up the stream of data passed in as arguments into meaningful Collator data.
|
||||
*
|
||||
* @param rbc
|
||||
* RuleBasedCollator to populate
|
||||
* @param UCAConst
|
||||
* object to fill up with UCA constants if we are reading the UCA collator, if not use a null
|
||||
* @param leadByteConstants
|
||||
* @return UCAContractions array filled up with the UCA contractions if we are reading the UCA collator
|
||||
* @exception IOException
|
||||
* thrown when there's a data error.
|
||||
*/
|
||||
private char[] readImp(RuleBasedCollator rbc,
|
||||
RuleBasedCollator.UCAConstants UCAConst,
|
||||
RuleBasedCollator.LeadByteConstants leadByteConstants)
|
||||
throws IOException
|
||||
{
|
||||
char ucaContractions[] = null; // return result
|
||||
private char[] readImp(RuleBasedCollator rbc, RuleBasedCollator.UCAConstants UCAConst,
|
||||
RuleBasedCollator.LeadByteConstants leadByteConstants) throws IOException {
|
||||
char ucaContractions[] = null; // return result
|
||||
|
||||
readHeader(rbc);
|
||||
// header size has been checked by readHeader
|
||||
int readcount = m_headerSize_;
|
||||
int readcount = m_headerSize_;
|
||||
// option size has been checked by readOptions
|
||||
readOptions(rbc);
|
||||
readcount += m_optionSize_;
|
||||
m_expansionSize_ >>= 2;
|
||||
rbc.m_expansion_ = new int[m_expansionSize_];
|
||||
for (int i = 0; i < m_expansionSize_; i ++) {
|
||||
for (int i = 0; i < m_expansionSize_; i++) {
|
||||
rbc.m_expansion_[i] = m_dataInputStream_.readInt();
|
||||
}
|
||||
readcount += (m_expansionSize_ << 2);
|
||||
if (m_contractionIndexSize_ > 0) {
|
||||
if (m_contractionIndexSize_ > 0) {
|
||||
m_contractionIndexSize_ >>= 1;
|
||||
rbc.m_contractionIndex_ = new char[m_contractionIndexSize_];
|
||||
for (int i = 0; i < m_contractionIndexSize_; i ++) {
|
||||
rbc.m_contractionIndex_[i] = m_dataInputStream_.readChar();
|
||||
rbc.m_contractionIndex_ = new char[m_contractionIndexSize_];
|
||||
for (int i = 0; i < m_contractionIndexSize_; i++) {
|
||||
rbc.m_contractionIndex_[i] = m_dataInputStream_.readChar();
|
||||
}
|
||||
readcount += (m_contractionIndexSize_ << 1);
|
||||
m_contractionCESize_ >>= 2;
|
||||
rbc.m_contractionCE_ = new int[m_contractionCESize_];
|
||||
for (int i = 0; i < m_contractionCESize_; i++) {
|
||||
rbc.m_contractionCE_[i] = m_dataInputStream_.readInt();
|
||||
}
|
||||
readcount += (m_contractionCESize_ << 2);
|
||||
}
|
||||
readcount += (m_contractionIndexSize_ << 1);
|
||||
m_contractionCESize_ >>= 2;
|
||||
rbc.m_contractionCE_ = new int[m_contractionCESize_];
|
||||
for (int i = 0; i < m_contractionCESize_; i ++) {
|
||||
rbc.m_contractionCE_[i] = m_dataInputStream_.readInt();
|
||||
}
|
||||
readcount += (m_contractionCESize_ << 2);
|
||||
}
|
||||
rbc.m_trie_ = new IntTrie(m_dataInputStream_,
|
||||
RuleBasedCollator.DataManipulate.getInstance());
|
||||
rbc.m_trie_ = new IntTrie(m_dataInputStream_, RuleBasedCollator.DataManipulate.getInstance());
|
||||
if (!rbc.m_trie_.isLatin1Linear()) {
|
||||
throw new IOException("Data corrupted, "
|
||||
+ "Collator Tries expected to have linear "
|
||||
throw new IOException("Data corrupted, " + "Collator Tries expected to have linear "
|
||||
+ "latin one data arrays");
|
||||
}
|
||||
readcount += rbc.m_trie_.getSerializedDataSize();
|
||||
m_expansionEndCESize_ >>= 2;
|
||||
rbc.m_expansionEndCE_ = new int[m_expansionEndCESize_];
|
||||
for (int i = 0; i < m_expansionEndCESize_; i ++) {
|
||||
for (int i = 0; i < m_expansionEndCESize_; i++) {
|
||||
rbc.m_expansionEndCE_[i] = m_dataInputStream_.readInt();
|
||||
}
|
||||
readcount += (m_expansionEndCESize_ << 2);
|
||||
rbc.m_expansionEndCEMaxSize_ = new byte[m_expansionEndCEMaxSizeSize_];
|
||||
for (int i = 0; i < m_expansionEndCEMaxSizeSize_; i ++) {
|
||||
for (int i = 0; i < m_expansionEndCEMaxSizeSize_; i++) {
|
||||
rbc.m_expansionEndCEMaxSize_[i] = m_dataInputStream_.readByte();
|
||||
}
|
||||
readcount += m_expansionEndCEMaxSizeSize_;
|
||||
rbc.m_unsafe_ = new byte[m_unsafeSize_];
|
||||
for (int i = 0; i < m_unsafeSize_; i ++) {
|
||||
for (int i = 0; i < m_unsafeSize_; i++) {
|
||||
rbc.m_unsafe_[i] = m_dataInputStream_.readByte();
|
||||
}
|
||||
readcount += m_unsafeSize_;
|
||||
@ -390,164 +385,148 @@ final class CollatorReader
|
||||
// we are reading the UCA
|
||||
// unfortunately the UCA offset in any collator data is not 0 and
|
||||
// only refers to the UCA data
|
||||
//m_contractionSize_ -= m_UCAValuesSize_;
|
||||
m_contractionSize_ = m_UCAConstOffset_ - readcount;
|
||||
// m_contractionSize_ -= m_UCAValuesSize_;
|
||||
m_contractionSize_ = m_UCAConstOffset_ - readcount;
|
||||
} else {
|
||||
m_contractionSize_ = m_size_ - readcount;
|
||||
}
|
||||
rbc.m_contractionEnd_ = new byte[m_contractionSize_];
|
||||
for (int i = 0; i < m_contractionSize_; i ++) {
|
||||
for (int i = 0; i < m_contractionSize_; i++) {
|
||||
rbc.m_contractionEnd_[i] = m_dataInputStream_.readByte();
|
||||
}
|
||||
readcount += m_contractionSize_;
|
||||
if (UCAConst != null) {
|
||||
UCAConst.FIRST_TERTIARY_IGNORABLE_[0]
|
||||
= m_dataInputStream_.readInt();
|
||||
UCAConst.FIRST_TERTIARY_IGNORABLE_[0] = m_dataInputStream_.readInt();
|
||||
int readUCAConstcount = 4;
|
||||
UCAConst.FIRST_TERTIARY_IGNORABLE_[1]
|
||||
= m_dataInputStream_.readInt();
|
||||
UCAConst.FIRST_TERTIARY_IGNORABLE_[1] = m_dataInputStream_.readInt();
|
||||
readUCAConstcount += 4;
|
||||
UCAConst.LAST_TERTIARY_IGNORABLE_[0]
|
||||
= m_dataInputStream_.readInt();
|
||||
UCAConst.LAST_TERTIARY_IGNORABLE_[0] = m_dataInputStream_.readInt();
|
||||
readUCAConstcount += 4;
|
||||
UCAConst.LAST_TERTIARY_IGNORABLE_[1]
|
||||
= m_dataInputStream_.readInt();
|
||||
UCAConst.LAST_TERTIARY_IGNORABLE_[1] = m_dataInputStream_.readInt();
|
||||
readUCAConstcount += 4;
|
||||
UCAConst.FIRST_PRIMARY_IGNORABLE_[0]
|
||||
= m_dataInputStream_.readInt();
|
||||
UCAConst.FIRST_PRIMARY_IGNORABLE_[0] = m_dataInputStream_.readInt();
|
||||
readUCAConstcount += 4;
|
||||
UCAConst.FIRST_PRIMARY_IGNORABLE_[1]
|
||||
= m_dataInputStream_.readInt();
|
||||
UCAConst.FIRST_PRIMARY_IGNORABLE_[1] = m_dataInputStream_.readInt();
|
||||
readUCAConstcount += 4;
|
||||
UCAConst.FIRST_SECONDARY_IGNORABLE_[0]
|
||||
= m_dataInputStream_.readInt();
|
||||
UCAConst.FIRST_SECONDARY_IGNORABLE_[0] = m_dataInputStream_.readInt();
|
||||
readUCAConstcount += 4;
|
||||
UCAConst.FIRST_SECONDARY_IGNORABLE_[1]
|
||||
= m_dataInputStream_.readInt();
|
||||
UCAConst.FIRST_SECONDARY_IGNORABLE_[1] = m_dataInputStream_.readInt();
|
||||
readUCAConstcount += 4;
|
||||
UCAConst.LAST_SECONDARY_IGNORABLE_[0]
|
||||
= m_dataInputStream_.readInt();
|
||||
UCAConst.LAST_SECONDARY_IGNORABLE_[0] = m_dataInputStream_.readInt();
|
||||
readUCAConstcount += 4;
|
||||
UCAConst.LAST_SECONDARY_IGNORABLE_[1]
|
||||
= m_dataInputStream_.readInt();
|
||||
UCAConst.LAST_SECONDARY_IGNORABLE_[1] = m_dataInputStream_.readInt();
|
||||
readUCAConstcount += 4;
|
||||
UCAConst.LAST_PRIMARY_IGNORABLE_[0]
|
||||
= m_dataInputStream_.readInt();
|
||||
UCAConst.LAST_PRIMARY_IGNORABLE_[0] = m_dataInputStream_.readInt();
|
||||
readUCAConstcount += 4;
|
||||
UCAConst.LAST_PRIMARY_IGNORABLE_[1]
|
||||
= m_dataInputStream_.readInt();
|
||||
UCAConst.LAST_PRIMARY_IGNORABLE_[1] = m_dataInputStream_.readInt();
|
||||
readUCAConstcount += 4;
|
||||
UCAConst.FIRST_VARIABLE_[0] = m_dataInputStream_.readInt();
|
||||
UCAConst.FIRST_VARIABLE_[0] = m_dataInputStream_.readInt();
|
||||
readUCAConstcount += 4;
|
||||
UCAConst.FIRST_VARIABLE_[1] = m_dataInputStream_.readInt();
|
||||
readUCAConstcount += 4;
|
||||
UCAConst.LAST_VARIABLE_[0] = m_dataInputStream_.readInt();
|
||||
UCAConst.LAST_VARIABLE_[0] = m_dataInputStream_.readInt();
|
||||
readUCAConstcount += 4;
|
||||
UCAConst.LAST_VARIABLE_[1] = m_dataInputStream_.readInt();
|
||||
UCAConst.LAST_VARIABLE_[1] = m_dataInputStream_.readInt();
|
||||
readUCAConstcount += 4;
|
||||
UCAConst.FIRST_NON_VARIABLE_[0] = m_dataInputStream_.readInt();
|
||||
UCAConst.FIRST_NON_VARIABLE_[0] = m_dataInputStream_.readInt();
|
||||
readUCAConstcount += 4;
|
||||
UCAConst.FIRST_NON_VARIABLE_[1] = m_dataInputStream_.readInt();
|
||||
UCAConst.FIRST_NON_VARIABLE_[1] = m_dataInputStream_.readInt();
|
||||
readUCAConstcount += 4;
|
||||
UCAConst.LAST_NON_VARIABLE_[0] = m_dataInputStream_.readInt();
|
||||
UCAConst.LAST_NON_VARIABLE_[0] = m_dataInputStream_.readInt();
|
||||
readUCAConstcount += 4;
|
||||
UCAConst.LAST_NON_VARIABLE_[1] = m_dataInputStream_.readInt();
|
||||
UCAConst.LAST_NON_VARIABLE_[1] = m_dataInputStream_.readInt();
|
||||
readUCAConstcount += 4;
|
||||
UCAConst.RESET_TOP_VALUE_[0] = m_dataInputStream_.readInt();
|
||||
UCAConst.RESET_TOP_VALUE_[0] = m_dataInputStream_.readInt();
|
||||
readUCAConstcount += 4;
|
||||
UCAConst.RESET_TOP_VALUE_[1] = m_dataInputStream_.readInt();
|
||||
UCAConst.RESET_TOP_VALUE_[1] = m_dataInputStream_.readInt();
|
||||
readUCAConstcount += 4;
|
||||
UCAConst.FIRST_IMPLICIT_[0] = m_dataInputStream_.readInt();
|
||||
UCAConst.FIRST_IMPLICIT_[0] = m_dataInputStream_.readInt();
|
||||
readUCAConstcount += 4;
|
||||
UCAConst.FIRST_IMPLICIT_[1] = m_dataInputStream_.readInt();
|
||||
UCAConst.FIRST_IMPLICIT_[1] = m_dataInputStream_.readInt();
|
||||
readUCAConstcount += 4;
|
||||
UCAConst.LAST_IMPLICIT_[0] = m_dataInputStream_.readInt();
|
||||
UCAConst.LAST_IMPLICIT_[0] = m_dataInputStream_.readInt();
|
||||
readUCAConstcount += 4;
|
||||
UCAConst.LAST_IMPLICIT_[1] = m_dataInputStream_.readInt();
|
||||
UCAConst.LAST_IMPLICIT_[1] = m_dataInputStream_.readInt();
|
||||
readUCAConstcount += 4;
|
||||
UCAConst.FIRST_TRAILING_[0] = m_dataInputStream_.readInt();
|
||||
UCAConst.FIRST_TRAILING_[0] = m_dataInputStream_.readInt();
|
||||
readUCAConstcount += 4;
|
||||
UCAConst.FIRST_TRAILING_[1] = m_dataInputStream_.readInt();
|
||||
UCAConst.FIRST_TRAILING_[1] = m_dataInputStream_.readInt();
|
||||
readUCAConstcount += 4;
|
||||
UCAConst.LAST_TRAILING_[0] = m_dataInputStream_.readInt();
|
||||
UCAConst.LAST_TRAILING_[0] = m_dataInputStream_.readInt();
|
||||
readUCAConstcount += 4;
|
||||
UCAConst.LAST_TRAILING_[1] = m_dataInputStream_.readInt();
|
||||
readUCAConstcount += 4;
|
||||
UCAConst.PRIMARY_TOP_MIN_ = m_dataInputStream_.readInt();
|
||||
UCAConst.LAST_TRAILING_[1] = m_dataInputStream_.readInt();
|
||||
readUCAConstcount += 4;
|
||||
UCAConst.PRIMARY_IMPLICIT_MIN_ = m_dataInputStream_.readInt();
|
||||
UCAConst.PRIMARY_TOP_MIN_ = m_dataInputStream_.readInt();
|
||||
readUCAConstcount += 4;
|
||||
UCAConst.PRIMARY_IMPLICIT_MAX_ = m_dataInputStream_.readInt();
|
||||
UCAConst.PRIMARY_IMPLICIT_MIN_ = m_dataInputStream_.readInt();
|
||||
readUCAConstcount += 4;
|
||||
UCAConst.PRIMARY_TRAILING_MIN_ = m_dataInputStream_.readInt();
|
||||
UCAConst.PRIMARY_IMPLICIT_MAX_ = m_dataInputStream_.readInt();
|
||||
readUCAConstcount += 4;
|
||||
UCAConst.PRIMARY_TRAILING_MAX_ = m_dataInputStream_.readInt();
|
||||
UCAConst.PRIMARY_TRAILING_MIN_ = m_dataInputStream_.readInt();
|
||||
readUCAConstcount += 4;
|
||||
UCAConst.PRIMARY_SPECIAL_MIN_ = m_dataInputStream_.readInt();
|
||||
UCAConst.PRIMARY_TRAILING_MAX_ = m_dataInputStream_.readInt();
|
||||
readUCAConstcount += 4;
|
||||
UCAConst.PRIMARY_SPECIAL_MAX_ = m_dataInputStream_.readInt();
|
||||
UCAConst.PRIMARY_SPECIAL_MIN_ = m_dataInputStream_.readInt();
|
||||
readUCAConstcount += 4;
|
||||
UCAConst.PRIMARY_SPECIAL_MAX_ = m_dataInputStream_.readInt();
|
||||
readUCAConstcount += 4;
|
||||
|
||||
readcount += readUCAConstcount;
|
||||
|
||||
//int resultsize = m_UCAcontractionSize_ / 2;
|
||||
// int resultsize = m_UCAcontractionSize_ / 2;
|
||||
int resultsize = (rbc.m_scriptToLeadBytes - readcount) / 2;
|
||||
ucaContractions = new char[resultsize];
|
||||
for (int i = 0; i < resultsize; i ++) {
|
||||
for (int i = 0; i < resultsize; i++) {
|
||||
ucaContractions[i] = m_dataInputStream_.readChar();
|
||||
}
|
||||
readcount += m_UCAcontractionSize_;
|
||||
|
||||
// if (readcount != m_size_) {
|
||||
// ///CLOVER:OFF
|
||||
// throw new IOException("Internal Error: Data file size error");
|
||||
// ///CLOVER:ON
|
||||
// }
|
||||
// if (readcount != m_size_) {
|
||||
// ///CLOVER:OFF
|
||||
// throw new IOException("Internal Error: Data file size error");
|
||||
// ///CLOVER:ON
|
||||
// }
|
||||
}
|
||||
|
||||
if (leadByteConstants != null)
|
||||
{
|
||||
if (leadByteConstants != null) {
|
||||
readcount += m_dataInputStream_.skip(rbc.m_scriptToLeadBytes - readcount);
|
||||
leadByteConstants.read(m_dataInputStream_);
|
||||
readcount += leadByteConstants.getSerializedDataSize();
|
||||
}
|
||||
|
||||
if (readcount != m_size_) {
|
||||
///CLOVER:OFF
|
||||
// /CLOVER:OFF
|
||||
throw new IOException("Internal Error: Data file size error");
|
||||
///CLOVER:ON
|
||||
// /CLOVER:ON
|
||||
}
|
||||
return ucaContractions;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads in the inverse uca data
|
||||
* @param input input stream with the inverse uca data
|
||||
*
|
||||
* @param input
|
||||
* input stream with the inverse uca data
|
||||
* @return an object containing the inverse uca data
|
||||
* @exception IOException thrown when error occurs while reading the
|
||||
* inverse uca
|
||||
* @exception IOException
|
||||
* thrown when error occurs while reading the inverse uca
|
||||
*/
|
||||
private static CollationParsedRuleBuilder.InverseUCA readInverseUCA(
|
||||
InputStream inputStream)
|
||||
throws IOException
|
||||
{
|
||||
byte[] UnicodeVersion = ICUBinary.readHeader(inputStream, INVERSE_UCA_DATA_FORMAT_ID_,
|
||||
private static CollationParsedRuleBuilder.InverseUCA readInverseUCA(InputStream inputStream) throws IOException {
|
||||
byte[] UnicodeVersion = ICUBinary.readHeader(inputStream, INVERSE_UCA_DATA_FORMAT_ID_,
|
||||
INVERSE_UCA_AUTHENTICATE_);
|
||||
|
||||
// weiv: check that we have the correct Unicode version in
|
||||
// weiv: check that we have the correct Unicode version in
|
||||
// binary files
|
||||
VersionInfo UCDVersion = UCharacter.getUnicodeVersion();
|
||||
if(UnicodeVersion[0] != UCDVersion.getMajor()
|
||||
|| UnicodeVersion[1] != UCDVersion.getMinor()) {
|
||||
if (UnicodeVersion[0] != UCDVersion.getMajor() || UnicodeVersion[1] != UCDVersion.getMinor()) {
|
||||
throw new IOException(WRONG_UNICODE_VERSION_ERROR_);
|
||||
}
|
||||
|
||||
CollationParsedRuleBuilder.InverseUCA result =
|
||||
new CollationParsedRuleBuilder.InverseUCA();
|
||||
DataInputStream input = new DataInputStream(inputStream);
|
||||
CollationParsedRuleBuilder.InverseUCA result = new CollationParsedRuleBuilder.InverseUCA();
|
||||
DataInputStream input = new DataInputStream(inputStream);
|
||||
input.readInt(); // bytesize
|
||||
int tablesize = input.readInt(); // in int size
|
||||
int contsize = input.readInt(); // in char size
|
||||
int contsize = input.readInt(); // in char size
|
||||
input.readInt(); // table in bytes
|
||||
input.readInt(); // conts in bytes
|
||||
result.m_UCA_version_ = readVersion(input);
|
||||
@ -557,10 +536,10 @@ final class CollatorReader
|
||||
result.m_table_ = new int[size];
|
||||
result.m_continuations_ = new char[contsize];
|
||||
|
||||
for (int i = 0; i < size; i ++) {
|
||||
for (int i = 0; i < size; i++) {
|
||||
result.m_table_[i] = input.readInt();
|
||||
}
|
||||
for (int i = 0; i < contsize; i ++) {
|
||||
for (int i = 0; i < contsize; i++) {
|
||||
result.m_continuations_[i] = input.readChar();
|
||||
}
|
||||
input.close();
|
||||
@ -568,27 +547,24 @@ final class CollatorReader
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads four bytes from the input and returns a VersionInfo
|
||||
* object. Use it to read different collator versions.
|
||||
* @param input already instantiated DataInputStream, positioned
|
||||
* at the start of four version bytes
|
||||
* Reads four bytes from the input and returns a VersionInfo object. Use it to read different collator versions.
|
||||
*
|
||||
* @param input
|
||||
* already instantiated DataInputStream, positioned at the start of four version bytes
|
||||
* @return a ready VersionInfo object
|
||||
* @throws IOException thrown when error occurs while reading
|
||||
* version bytes
|
||||
* @throws IOException
|
||||
* thrown when error occurs while reading version bytes
|
||||
*/
|
||||
|
||||
protected static VersionInfo readVersion(DataInputStream input)
|
||||
throws IOException {
|
||||
protected static VersionInfo readVersion(DataInputStream input) throws IOException {
|
||||
byte[] version = new byte[4];
|
||||
version[0] = input.readByte();
|
||||
version[1] = input.readByte();
|
||||
version[2] = input.readByte();
|
||||
version[3] = input.readByte();
|
||||
|
||||
VersionInfo result =
|
||||
VersionInfo.getInstance(
|
||||
(int)version[0], (int)version[1],
|
||||
(int)version[2], (int)version[3]);
|
||||
VersionInfo result = VersionInfo.getInstance((int) version[0], (int) version[1], (int) version[2],
|
||||
(int) version[3]);
|
||||
|
||||
return result;
|
||||
}
|
||||
@ -600,123 +576,102 @@ final class CollatorReader
|
||||
/**
|
||||
* Authenticate uca data format version
|
||||
*/
|
||||
private static final ICUBinary.Authenticate UCA_AUTHENTICATE_
|
||||
= new ICUBinary.Authenticate() {
|
||||
public boolean isDataVersionAcceptable(byte version[])
|
||||
{
|
||||
return version[0] == DATA_FORMAT_VERSION_[0]
|
||||
&& version[1] >= DATA_FORMAT_VERSION_[1];
|
||||
// Too harsh
|
||||
//&& version[1] == DATA_FORMAT_VERSION_[1]
|
||||
//&& version[2] == DATA_FORMAT_VERSION_[2]
|
||||
//&& version[3] == DATA_FORMAT_VERSION_[3];
|
||||
private static final ICUBinary.Authenticate UCA_AUTHENTICATE_ = new ICUBinary.Authenticate() {
|
||||
public boolean isDataVersionAcceptable(byte version[]) {
|
||||
return version[0] == DATA_FORMAT_VERSION_[0] && version[1] >= DATA_FORMAT_VERSION_[1];
|
||||
// Too harsh
|
||||
// && version[1] == DATA_FORMAT_VERSION_[1]
|
||||
// && version[2] == DATA_FORMAT_VERSION_[2]
|
||||
// && version[3] == DATA_FORMAT_VERSION_[3];
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Authenticate uca data format version
|
||||
*/
|
||||
private static final ICUBinary.Authenticate INVERSE_UCA_AUTHENTICATE_
|
||||
= new ICUBinary.Authenticate() {
|
||||
public boolean isDataVersionAcceptable(byte version[])
|
||||
{
|
||||
return version[0]
|
||||
== INVERSE_UCA_DATA_FORMAT_VERSION_[0]
|
||||
&& version[1]
|
||||
>= INVERSE_UCA_DATA_FORMAT_VERSION_[1];
|
||||
private static final ICUBinary.Authenticate INVERSE_UCA_AUTHENTICATE_ = new ICUBinary.Authenticate() {
|
||||
public boolean isDataVersionAcceptable(byte version[]) {
|
||||
return version[0] == INVERSE_UCA_DATA_FORMAT_VERSION_[0]
|
||||
&& version[1] >= INVERSE_UCA_DATA_FORMAT_VERSION_[1];
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Data input stream for uca.icu
|
||||
* Data input stream for uca.icu
|
||||
*/
|
||||
private DataInputStream m_dataInputStream_;
|
||||
|
||||
/**
|
||||
* File format version and id that this class understands.
|
||||
* No guarantees are made if a older version is used
|
||||
* File format version and id that this class understands. No guarantees are made if a older version is used
|
||||
*/
|
||||
private static final byte DATA_FORMAT_VERSION_[] =
|
||||
{(byte)0x3, (byte)0x0, (byte)0x0, (byte)0x0};
|
||||
private static final byte DATA_FORMAT_ID_[] = {(byte)0x55, (byte)0x43,
|
||||
(byte)0x6f, (byte)0x6c};
|
||||
private static final byte DATA_FORMAT_VERSION_[] = { (byte) 0x3, (byte) 0x0, (byte) 0x0, (byte) 0x0 };
|
||||
private static final byte DATA_FORMAT_ID_[] = { (byte) 0x55, (byte) 0x43, (byte) 0x6f, (byte) 0x6c };
|
||||
/**
|
||||
* Inverse UCA file format version and id that this class understands.
|
||||
* No guarantees are made if a older version is used
|
||||
* Inverse UCA file format version and id that this class understands. No guarantees are made if a older version is
|
||||
* used
|
||||
*/
|
||||
private static final byte INVERSE_UCA_DATA_FORMAT_VERSION_[] =
|
||||
{(byte)0x2, (byte)0x1, (byte)0x0, (byte)0x0};
|
||||
private static final byte INVERSE_UCA_DATA_FORMAT_ID_[] = {(byte)0x49,
|
||||
(byte)0x6e,
|
||||
(byte)0x76,
|
||||
(byte)0x43};
|
||||
private static final byte INVERSE_UCA_DATA_FORMAT_VERSION_[] = { (byte) 0x2, (byte) 0x1, (byte) 0x0, (byte) 0x0 };
|
||||
private static final byte INVERSE_UCA_DATA_FORMAT_ID_[] = { (byte) 0x49, (byte) 0x6e, (byte) 0x76, (byte) 0x43 };
|
||||
|
||||
/**
|
||||
* Wrong unicode version error string
|
||||
*/
|
||||
private static final String WRONG_UNICODE_VERSION_ERROR_ =
|
||||
"Unicode version in binary image is not compatible with the current Unicode version";
|
||||
private static final String WRONG_UNICODE_VERSION_ERROR_ = "Unicode version in binary image is not compatible with the current Unicode version";
|
||||
|
||||
/**
|
||||
* Size of expansion table in bytes
|
||||
*/
|
||||
private int m_expansionSize_;
|
||||
/**
|
||||
* Size of contraction index table in bytes
|
||||
*/
|
||||
private int m_contractionIndexSize_;
|
||||
/**
|
||||
* Size of contraction table in bytes
|
||||
*/
|
||||
private int m_contractionCESize_;
|
||||
/*
|
||||
* Size of the Trie in bytes
|
||||
*/
|
||||
//private int m_trieSize_;
|
||||
/**
|
||||
* Size of the table that contains information about collation elements
|
||||
* that end with an expansion
|
||||
*/
|
||||
private int m_expansionEndCESize_;
|
||||
/**
|
||||
* Size of the table that contains information about the maximum size of
|
||||
* collation elements that end with a particular expansion CE corresponding
|
||||
* to the ones in expansionEndCE
|
||||
*/
|
||||
private int m_expansionEndCEMaxSizeSize_;
|
||||
/**
|
||||
* Size of the option table that contains information about the collation
|
||||
* options
|
||||
*/
|
||||
private int m_optionSize_;
|
||||
/**
|
||||
* Size of the whole data file minusing the ICU header
|
||||
*/
|
||||
private int m_size_;
|
||||
/**
|
||||
* Size of the collation data header
|
||||
*/
|
||||
private int m_headerSize_;
|
||||
/**
|
||||
* Size of the table that contains information about the "Unsafe"
|
||||
* codepoints
|
||||
*/
|
||||
private int m_unsafeSize_;
|
||||
/**
|
||||
* Size in bytes of the table that contains information about codepoints that ends
|
||||
* with a contraction
|
||||
*/
|
||||
private int m_contractionSize_;
|
||||
/**
|
||||
* Size of the table that contains UCA contraction information in bytes
|
||||
*/
|
||||
private int m_UCAcontractionSize_;
|
||||
/**
|
||||
* Offset of the UCA Const
|
||||
*/
|
||||
private int m_UCAConstOffset_;
|
||||
/**
|
||||
* Size of expansion table in bytes
|
||||
*/
|
||||
private int m_expansionSize_;
|
||||
/**
|
||||
* Size of contraction index table in bytes
|
||||
*/
|
||||
private int m_contractionIndexSize_;
|
||||
/**
|
||||
* Size of contraction table in bytes
|
||||
*/
|
||||
private int m_contractionCESize_;
|
||||
/*
|
||||
* Size of the Trie in bytes
|
||||
*/
|
||||
// private int m_trieSize_;
|
||||
/**
|
||||
* Size of the table that contains information about collation elements that end with an expansion
|
||||
*/
|
||||
private int m_expansionEndCESize_;
|
||||
/**
|
||||
* Size of the table that contains information about the maximum size of collation elements that end with a
|
||||
* particular expansion CE corresponding to the ones in expansionEndCE
|
||||
*/
|
||||
private int m_expansionEndCEMaxSizeSize_;
|
||||
/**
|
||||
* Size of the option table that contains information about the collation options
|
||||
*/
|
||||
private int m_optionSize_;
|
||||
/**
|
||||
* Size of the whole data file minusing the ICU header
|
||||
*/
|
||||
private int m_size_;
|
||||
/**
|
||||
* Size of the collation data header
|
||||
*/
|
||||
private int m_headerSize_;
|
||||
/**
|
||||
* Size of the table that contains information about the "Unsafe" codepoints
|
||||
*/
|
||||
private int m_unsafeSize_;
|
||||
/**
|
||||
* Size in bytes of the table that contains information about codepoints that ends with a contraction
|
||||
*/
|
||||
private int m_contractionSize_;
|
||||
/**
|
||||
* Size of the table that contains UCA contraction information in bytes
|
||||
*/
|
||||
private int m_UCAcontractionSize_;
|
||||
/**
|
||||
* Offset of the UCA Const
|
||||
*/
|
||||
private int m_UCAConstOffset_;
|
||||
|
||||
// private methods ---------------------------------------------------
|
||||
// private methods ---------------------------------------------------
|
||||
|
||||
}
|
||||
|
||||
|
@ -1566,18 +1566,19 @@ public final class RuleBasedCollator extends Collator {
|
||||
if (offset == 0) {
|
||||
return EMPTY_INT_ARRAY;
|
||||
}
|
||||
int[] reorderCodes;
|
||||
if ((offset & DATA_MASK_FOR_INDEX) == DATA_MASK_FOR_INDEX) {
|
||||
int[] reorderCodes = new int[1];
|
||||
reorderCodes = new int[1];
|
||||
reorderCodes[0] = offset & ~DATA_MASK_FOR_INDEX;
|
||||
}
|
||||
} else {
|
||||
|
||||
int length = readShort(this.LEAD_BYTE_TO_SCRIPTS_DATA, offset);
|
||||
offset++;
|
||||
int length = readShort(this.LEAD_BYTE_TO_SCRIPTS_DATA, offset);
|
||||
offset++;
|
||||
|
||||
int[] reorderCodes = new int[length];
|
||||
|
||||
for (int code = 0; code < length; code++, offset++) {
|
||||
reorderCodes[code] = readShort(this.LEAD_BYTE_TO_SCRIPTS_DATA, offset);
|
||||
reorderCodes = new int[length];
|
||||
for (int code = 0; code < length; code++, offset++) {
|
||||
reorderCodes[code] = readShort(this.LEAD_BYTE_TO_SCRIPTS_DATA, offset);
|
||||
}
|
||||
}
|
||||
return reorderCodes;
|
||||
}
|
||||
@ -1610,7 +1611,7 @@ public final class RuleBasedCollator extends Collator {
|
||||
}
|
||||
|
||||
private static int readShort(byte[] data, int offset) {
|
||||
return data[offset * 2] << 8 | data[offset * 2 + 1];
|
||||
return (0xff & data[offset * 2]) << 8 | (data[offset * 2 + 1] & 0xff);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1841,10 +1842,8 @@ public final class RuleBasedCollator extends Collator {
|
||||
ICUResourceBundle.ICU_COLLATION_BASE_NAME, ULocale.ENGLISH);
|
||||
iUCA_.m_rules_ = (String) rb.getObject("UCARules");
|
||||
} catch (MissingResourceException ex) {
|
||||
int i =12;
|
||||
// throw ex;
|
||||
} catch (IOException e) {
|
||||
int i =12;
|
||||
// e.printStackTrace();
|
||||
// throw new MissingResourceException(e.getMessage(),"","");
|
||||
}
|
||||
@ -2422,78 +2421,78 @@ public final class RuleBasedCollator extends Collator {
|
||||
|
||||
int p2 = (ce >>>= 16) & LAST_BYTE_MASK_; // in ints for unsigned
|
||||
int p1 = ce >>> 8; // comparison
|
||||
if (doShift) {
|
||||
if (m_utilCount4_ > 0) {
|
||||
while (m_utilCount4_ > bottomCount4) {
|
||||
m_utilBytes4_ = append(m_utilBytes4_, m_utilBytesCount4_, (byte) (commonBottom4 + bottomCount4));
|
||||
m_utilBytesCount4_++;
|
||||
m_utilCount4_ -= bottomCount4;
|
||||
}
|
||||
m_utilBytes4_ = append(m_utilBytes4_, m_utilBytesCount4_, (byte) (commonBottom4 + (m_utilCount4_ - 1)));
|
||||
m_utilBytesCount4_++;
|
||||
m_utilCount4_ = 0;
|
||||
}
|
||||
// dealing with a variable and we're treating them as shifted
|
||||
// This is a shifted ignorable
|
||||
if (p1 != 0) {
|
||||
// we need to check this since we could be in continuation
|
||||
m_utilBytes4_ = append(m_utilBytes4_, m_utilBytesCount4_, (byte) p1);
|
||||
m_utilBytesCount4_++;
|
||||
}
|
||||
if (p2 != 0) {
|
||||
m_utilBytes4_ = append(m_utilBytes4_, m_utilBytesCount4_, (byte) p2);
|
||||
m_utilBytesCount4_++;
|
||||
}
|
||||
} else {
|
||||
// Note: This code assumes that the table is well built
|
||||
// i.e. not having 0 bytes where they are not supposed to be.
|
||||
// Usually, we'll have non-zero primary1 & primary2, except
|
||||
// in cases of LatinOne and friends, when primary2 will be
|
||||
// regular and simple sortkey calc
|
||||
if (p1 != CollationElementIterator.IGNORABLE) {
|
||||
if (notIsContinuation) {
|
||||
if (leadPrimary == p1) {
|
||||
m_utilBytes1_ = append(m_utilBytes1_, m_utilBytesCount1_, (byte) p2);
|
||||
m_utilBytesCount1_++;
|
||||
} else {
|
||||
if (leadPrimary != 0) {
|
||||
m_utilBytes1_ = append(m_utilBytes1_, m_utilBytesCount1_,
|
||||
((p1 > leadPrimary) ? BYTE_UNSHIFTED_MAX_ : BYTE_UNSHIFTED_MIN_));
|
||||
m_utilBytesCount1_++;
|
||||
}
|
||||
if (p2 == CollationElementIterator.IGNORABLE) {
|
||||
// one byter, not compressed
|
||||
m_utilBytes1_ = append(m_utilBytes1_, m_utilBytesCount1_, (byte) p1);
|
||||
m_utilBytesCount1_++;
|
||||
leadPrimary = 0;
|
||||
} else if (isCompressible(p1)) {
|
||||
// compress
|
||||
leadPrimary = p1;
|
||||
m_utilBytes1_ = append(m_utilBytes1_, m_utilBytesCount1_, (byte) p1);
|
||||
m_utilBytesCount1_++;
|
||||
m_utilBytes1_ = append(m_utilBytes1_, m_utilBytesCount1_, (byte) p2);
|
||||
m_utilBytesCount1_++;
|
||||
} else {
|
||||
leadPrimary = 0;
|
||||
m_utilBytes1_ = append(m_utilBytes1_, m_utilBytesCount1_, (byte) p1);
|
||||
m_utilBytesCount1_++;
|
||||
m_utilBytes1_ = append(m_utilBytes1_, m_utilBytesCount1_, (byte) p2);
|
||||
m_utilBytesCount1_++;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// continuation, add primary to the key, no compression
|
||||
m_utilBytes1_ = append(m_utilBytes1_, m_utilBytesCount1_, (byte) p1);
|
||||
m_utilBytesCount1_++;
|
||||
if (p2 != CollationElementIterator.IGNORABLE) {
|
||||
m_utilBytes1_ = append(m_utilBytes1_, m_utilBytesCount1_, (byte) p2);
|
||||
// second part
|
||||
m_utilBytesCount1_++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return leadPrimary;
|
||||
if (doShift) {
|
||||
if (m_utilCount4_ > 0) {
|
||||
while (m_utilCount4_ > bottomCount4) {
|
||||
m_utilBytes4_ = append(m_utilBytes4_, m_utilBytesCount4_, (byte) (commonBottom4 + bottomCount4));
|
||||
m_utilBytesCount4_++;
|
||||
m_utilCount4_ -= bottomCount4;
|
||||
}
|
||||
m_utilBytes4_ = append(m_utilBytes4_, m_utilBytesCount4_, (byte) (commonBottom4 + (m_utilCount4_ - 1)));
|
||||
m_utilBytesCount4_++;
|
||||
m_utilCount4_ = 0;
|
||||
}
|
||||
// dealing with a variable and we're treating them as shifted
|
||||
// This is a shifted ignorable
|
||||
if (p1 != 0) {
|
||||
// we need to check this since we could be in continuation
|
||||
m_utilBytes4_ = append(m_utilBytes4_, m_utilBytesCount4_, (byte) p1);
|
||||
m_utilBytesCount4_++;
|
||||
}
|
||||
if (p2 != 0) {
|
||||
m_utilBytes4_ = append(m_utilBytes4_, m_utilBytesCount4_, (byte) p2);
|
||||
m_utilBytesCount4_++;
|
||||
}
|
||||
} else {
|
||||
// Note: This code assumes that the table is well built
|
||||
// i.e. not having 0 bytes where they are not supposed to be.
|
||||
// Usually, we'll have non-zero primary1 & primary2, except
|
||||
// in cases of LatinOne and friends, when primary2 will be
|
||||
// regular and simple sortkey calc
|
||||
if (p1 != CollationElementIterator.IGNORABLE) {
|
||||
if (notIsContinuation) {
|
||||
if (leadPrimary == p1) {
|
||||
m_utilBytes1_ = append(m_utilBytes1_, m_utilBytesCount1_, (byte) p2);
|
||||
m_utilBytesCount1_++;
|
||||
} else {
|
||||
if (leadPrimary != 0) {
|
||||
m_utilBytes1_ = append(m_utilBytes1_, m_utilBytesCount1_,
|
||||
((p1 > leadPrimary) ? BYTE_UNSHIFTED_MAX_ : BYTE_UNSHIFTED_MIN_));
|
||||
m_utilBytesCount1_++;
|
||||
}
|
||||
if (p2 == CollationElementIterator.IGNORABLE) {
|
||||
// one byter, not compressed
|
||||
m_utilBytes1_ = append(m_utilBytes1_, m_utilBytesCount1_, (byte) p1);
|
||||
m_utilBytesCount1_++;
|
||||
leadPrimary = 0;
|
||||
} else if (isCompressible(p1)) {
|
||||
// compress
|
||||
leadPrimary = p1;
|
||||
m_utilBytes1_ = append(m_utilBytes1_, m_utilBytesCount1_, (byte) p1);
|
||||
m_utilBytesCount1_++;
|
||||
m_utilBytes1_ = append(m_utilBytes1_, m_utilBytesCount1_, (byte) p2);
|
||||
m_utilBytesCount1_++;
|
||||
} else {
|
||||
leadPrimary = 0;
|
||||
m_utilBytes1_ = append(m_utilBytes1_, m_utilBytesCount1_, (byte) p1);
|
||||
m_utilBytesCount1_++;
|
||||
m_utilBytes1_ = append(m_utilBytes1_, m_utilBytesCount1_, (byte) p2);
|
||||
m_utilBytesCount1_++;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// continuation, add primary to the key, no compression
|
||||
m_utilBytes1_ = append(m_utilBytes1_, m_utilBytesCount1_, (byte) p1);
|
||||
m_utilBytesCount1_++;
|
||||
if (p2 != CollationElementIterator.IGNORABLE) {
|
||||
m_utilBytes1_ = append(m_utilBytes1_, m_utilBytesCount1_, (byte) p2);
|
||||
// second part
|
||||
m_utilBytesCount1_++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return leadPrimary;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -2746,9 +2745,6 @@ public final class RuleBasedCollator extends Collator {
|
||||
m_utilFrenchStart_ = -1;
|
||||
m_utilFrenchEnd_ = -1;
|
||||
|
||||
// scriptorder not implemented yet
|
||||
// const uint8_t *scriptOrder = coll->scriptOrder;
|
||||
|
||||
boolean doShift = false;
|
||||
boolean notIsContinuation = false;
|
||||
|
||||
@ -3784,7 +3780,7 @@ public final class RuleBasedCollator extends Collator {
|
||||
return ch;
|
||||
}
|
||||
|
||||
private static final int UCOL_REORDER_CODE_IGNORE = CollationReorderCodes.LIMIT + 1;
|
||||
private static final int UCOL_REORDER_CODE_IGNORE = ReorderCodes.LIMIT + 1;
|
||||
/**
|
||||
* Builds the lead byte permuatation table
|
||||
*/
|
||||
@ -3812,13 +3808,13 @@ public final class RuleBasedCollator extends Collator {
|
||||
|
||||
// prefill the reordering codes with the leading entries
|
||||
int[] internalReorderCodes = new int[m_scriptOrder_.length + 5]; // TODO - replace 5 with the reorder codes prefix size
|
||||
for (int codeIndex = 0; codeIndex < CollationReorderCodes.LIMIT - CollationReorderCodes.FIRST; codeIndex++) {
|
||||
internalReorderCodes[codeIndex] = CollationReorderCodes.FIRST + codeIndex;
|
||||
for (int codeIndex = 0; codeIndex < ReorderCodes.LIMIT - ReorderCodes.FIRST; codeIndex++) {
|
||||
internalReorderCodes[codeIndex] = ReorderCodes.FIRST + codeIndex;
|
||||
}
|
||||
for (int codeIndex = 0; codeIndex < m_scriptOrder_.length; codeIndex++) {
|
||||
internalReorderCodes[codeIndex + (CollationReorderCodes.LIMIT - CollationReorderCodes.FIRST)] = m_scriptOrder_[codeIndex];
|
||||
if (m_scriptOrder_[codeIndex] >= CollationReorderCodes.FIRST && m_scriptOrder_[codeIndex] < CollationReorderCodes.LIMIT) {
|
||||
internalReorderCodes[m_scriptOrder_[codeIndex] - CollationReorderCodes.FIRST] = UCOL_REORDER_CODE_IGNORE;
|
||||
internalReorderCodes[codeIndex + (ReorderCodes.LIMIT - ReorderCodes.FIRST)] = m_scriptOrder_[codeIndex];
|
||||
if (m_scriptOrder_[codeIndex] >= ReorderCodes.FIRST && m_scriptOrder_[codeIndex] < ReorderCodes.LIMIT) {
|
||||
internalReorderCodes[m_scriptOrder_[codeIndex] - ReorderCodes.FIRST] = UCOL_REORDER_CODE_IGNORE;
|
||||
}
|
||||
}
|
||||
|
||||
@ -3898,6 +3894,8 @@ public final class RuleBasedCollator extends Collator {
|
||||
// for (int i = 0; i < 256; i++){
|
||||
// System.out.println(Integer.toString(i, 16) + " -> " + Integer.toString(m_scriptReorderTable_[i], 16));
|
||||
// }
|
||||
latinOneRegenTable_ = true;
|
||||
updateInternalState();
|
||||
}
|
||||
|
||||
/**
|
||||
@ -4050,8 +4048,9 @@ public final class RuleBasedCollator extends Collator {
|
||||
|
||||
private final void addLatinOneEntry(char ch, int CE, shiftValues sh) {
|
||||
int primary1 = 0, primary2 = 0, secondary = 0, tertiary = 0;
|
||||
boolean continuation = isContinuation(CE);
|
||||
boolean reverseSecondary = false;
|
||||
if (!isContinuation(CE)) {
|
||||
if (!continuation) {
|
||||
tertiary = ((CE & m_mask3_));
|
||||
tertiary ^= m_caseSwitch_;
|
||||
reverseSecondary = true;
|
||||
@ -4066,6 +4065,9 @@ public final class RuleBasedCollator extends Collator {
|
||||
primary1 = (CE >>> 8);
|
||||
|
||||
if (primary1 != 0) {
|
||||
if (m_leadBytePermutationTable_ != null && !continuation) {
|
||||
primary1 = m_leadBytePermutationTable_[primary1];
|
||||
}
|
||||
latinOneCEs_[ch] |= (primary1 << sh.primShift);
|
||||
sh.primShift -= 8;
|
||||
}
|
||||
@ -4398,11 +4400,6 @@ public final class RuleBasedCollator extends Collator {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (!isContinuation(sOrder) && m_leadBytePermutationTable_ != null) {
|
||||
sOrder = (m_leadBytePermutationTable_[((sOrder >> 24) + 256) % 256] << 24) | (sOrder & 0x00FFFFFF);
|
||||
tOrder = (m_leadBytePermutationTable_[((tOrder >> 24) + 256) % 256] << 24) | (tOrder & 0x00FFFFFF);
|
||||
}
|
||||
|
||||
if (sOrder == tOrder) { // if we have same CEs, we continue the loop
|
||||
sOrder = 0;
|
||||
tOrder = 0;
|
||||
|
@ -22,10 +22,12 @@ import com.ibm.icu.dev.test.TestFmwk;
|
||||
import com.ibm.icu.impl.ICUResourceBundle;
|
||||
import com.ibm.icu.impl.ImplicitCEGenerator;
|
||||
import com.ibm.icu.impl.Utility;
|
||||
import com.ibm.icu.lang.UScript;
|
||||
import com.ibm.icu.text.CollationElementIterator;
|
||||
import com.ibm.icu.text.CollationKey;
|
||||
import com.ibm.icu.text.CollationKey.BoundMode;
|
||||
import com.ibm.icu.text.Collator;
|
||||
import com.ibm.icu.text.Collator.ReorderCodes;
|
||||
import com.ibm.icu.text.Normalizer;
|
||||
import com.ibm.icu.text.RawCollationKey;
|
||||
import com.ibm.icu.text.RuleBasedCollator;
|
||||
@ -3065,4 +3067,404 @@ public class CollationMiscTest extends TestFmwk {
|
||||
warnln("ERROR: in creation of rule based collator");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// public void TestGreekFirstReorder(){
|
||||
// String[] testSourceCases = {
|
||||
// "\u0041",
|
||||
// "\u03b1\u0041",
|
||||
// "\u0061",
|
||||
// "\u0041\u0061",
|
||||
// "\u0391",
|
||||
// };
|
||||
//
|
||||
// String[] testTargetCases = {
|
||||
// "\u03b1",
|
||||
// "\u0041\u03b1",
|
||||
// "\u0391",
|
||||
// "\u0391\u03b1",
|
||||
// "\u0391",
|
||||
// };
|
||||
//
|
||||
// int[] results = {
|
||||
// 1,
|
||||
// -1,
|
||||
// 1,
|
||||
// 1,
|
||||
// 0
|
||||
// };
|
||||
//
|
||||
// Collator myCollation;
|
||||
// String rules = "[reorder Grek]";
|
||||
// try {
|
||||
// myCollation = new RuleBasedCollator(rules);
|
||||
// } catch (Exception e) {
|
||||
// warnln("ERROR: in creation of rule based collator");
|
||||
// return;
|
||||
// }
|
||||
// myCollation.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
|
||||
// myCollation.setStrength(Collator.TERTIARY);
|
||||
// for (int i = 0; i < testSourceCases.length; i++)
|
||||
// {
|
||||
// CollationTest.doTest(this, (RuleBasedCollator)myCollation,
|
||||
// testSourceCases[i], testTargetCases[i],
|
||||
// results[i]);
|
||||
// }
|
||||
//
|
||||
// try {
|
||||
// myCollation = new RuleBasedCollator("");
|
||||
// } catch (Exception e) {
|
||||
// warnln("ERROR: in creation of rule based collator");
|
||||
// return;
|
||||
// }
|
||||
// myCollation.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
|
||||
// myCollation.setStrength(Collator.TERTIARY);
|
||||
// myCollation.setScriptOrder(new int[]{UScript.GREEK});
|
||||
// for (int i = 0; i < testSourceCases.length; i++)
|
||||
// {
|
||||
// CollationTest.doTest(this, (RuleBasedCollator)myCollation,
|
||||
// testSourceCases[i], testTargetCases[i],
|
||||
// results[i]);
|
||||
// }
|
||||
//
|
||||
// try {
|
||||
// myCollation = new RuleBasedCollator("");
|
||||
// } catch (Exception e) {
|
||||
// warnln("ERROR: in creation of rule based collator");
|
||||
// return;
|
||||
// }
|
||||
// myCollation.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
|
||||
// myCollation.setStrength(Collator.TERTIARY);
|
||||
// myCollation.setScriptOrder(UScript.GREEK);
|
||||
// for (int i = 0; i < testSourceCases.length; i++)
|
||||
// {
|
||||
// CollationTest.doTest(this, (RuleBasedCollator)myCollation,
|
||||
// testSourceCases[i], testTargetCases[i],
|
||||
// results[i]);
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// public void TestUnknownReorder(){
|
||||
// String[] testSourceCases = {
|
||||
// "\u0041",
|
||||
// "\u0041",
|
||||
// "\u0031",
|
||||
// "\u0391",
|
||||
// "\u0031",
|
||||
// };
|
||||
//
|
||||
// String[] testTargetCases = {
|
||||
// "\u03b1",
|
||||
// "\u0031",
|
||||
// "\u0391",
|
||||
// "\u099d",
|
||||
// "\u0032",
|
||||
// };
|
||||
//
|
||||
// int[] results = {
|
||||
// -1,
|
||||
// 1,
|
||||
// 1,
|
||||
// -1,
|
||||
// -1
|
||||
// };
|
||||
//
|
||||
// Collator myCollation;
|
||||
// String rules = "[reorder Latn Zzzz Zyyy]";
|
||||
// try {
|
||||
// myCollation = new RuleBasedCollator(rules);
|
||||
// } catch (Exception e) {
|
||||
// warnln("ERROR: in creation of rule based collator");
|
||||
// return;
|
||||
// }
|
||||
// myCollation.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
|
||||
// myCollation.setStrength(Collator.TERTIARY);
|
||||
// for (int i = 0; i < testSourceCases.length ; i++)
|
||||
// {
|
||||
// CollationTest.doTest(this, (RuleBasedCollator)myCollation,
|
||||
// testSourceCases[i], testTargetCases[i],
|
||||
// results[i]);
|
||||
// }
|
||||
//
|
||||
// try {
|
||||
// myCollation = new RuleBasedCollator("");
|
||||
// } catch (Exception e) {
|
||||
// warnln("ERROR: in creation of rule based collator");
|
||||
// return;
|
||||
// }
|
||||
// myCollation.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
|
||||
// myCollation.setStrength(Collator.TERTIARY);
|
||||
// myCollation.setScriptOrder(new int[]{UScript.LATIN, UScript.UNKNOWN, UScript.COMMON});
|
||||
// for (int i = 0; i < testSourceCases.length ; i++)
|
||||
// {
|
||||
// CollationTest.doTest(this, (RuleBasedCollator)myCollation,
|
||||
// testSourceCases[i], testTargetCases[i],
|
||||
// results[i]);
|
||||
// }
|
||||
//
|
||||
// try {
|
||||
// myCollation = new RuleBasedCollator("");
|
||||
// } catch (Exception e) {
|
||||
// warnln("ERROR: in creation of rule based collator");
|
||||
// return;
|
||||
// }
|
||||
// myCollation.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
|
||||
// myCollation.setStrength(Collator.TERTIARY);
|
||||
// myCollation.setScriptOrder(UScript.LATIN, UScript.UNKNOWN, UScript.COMMON);
|
||||
// for (int i = 0; i < testSourceCases.length ; i++)
|
||||
// {
|
||||
// CollationTest.doTest(this, (RuleBasedCollator)myCollation,
|
||||
// testSourceCases[i], testTargetCases[i],
|
||||
// results[i]);
|
||||
// }
|
||||
// }
|
||||
|
||||
public void TestSameLeadBytScriptReorder(){
|
||||
String[] testSourceCases = {
|
||||
"\ud800\udf31", // Gothic
|
||||
"\ud801\udc50", // Shavian
|
||||
};
|
||||
|
||||
String[] testTargetCases = {
|
||||
"\u0100", // Latin Extended-A
|
||||
"\u2c74", // Latin Extended-C
|
||||
};
|
||||
|
||||
int[] results = {
|
||||
-1,
|
||||
-1,
|
||||
};
|
||||
|
||||
int[] equivalentScriptsResult = {
|
||||
UScript.BOPOMOFO, //Bopo
|
||||
UScript.LISU, //Lisu
|
||||
UScript.LYCIAN, //Lyci
|
||||
UScript.CARIAN, //Cari
|
||||
UScript.LYDIAN, //Lydi
|
||||
UScript.YI, //Yiii
|
||||
UScript.OLD_ITALIC, //Ital
|
||||
UScript.GOTHIC, //Goth
|
||||
UScript.DESERET, //Dsrt
|
||||
UScript.SHAVIAN, //Shaw
|
||||
UScript.OSMANYA, //Osma
|
||||
UScript.LINEAR_B, //Linb
|
||||
UScript.CYPRIOT, //Cprt
|
||||
UScript.OLD_SOUTH_ARABIAN, //Sarb
|
||||
UScript.AVESTAN, //Avst
|
||||
UScript.IMPERIAL_ARAMAIC, //Armi
|
||||
UScript.INSCRIPTIONAL_PARTHIAN, //Prti
|
||||
UScript.INSCRIPTIONAL_PAHLAVI, //Phli
|
||||
UScript.UGARITIC, //Ugar
|
||||
UScript.OLD_PERSIAN, //Xpeo
|
||||
UScript.CUNEIFORM, //Xsux
|
||||
UScript.EGYPTIAN_HIEROGLYPHS //Egyp
|
||||
};
|
||||
|
||||
Collator myCollation;
|
||||
String rules = "[reorder Goth Latn]";
|
||||
try {
|
||||
myCollation = new RuleBasedCollator(rules);
|
||||
} catch (Exception e) {
|
||||
warnln("ERROR: in creation of rule based collator");
|
||||
return;
|
||||
}
|
||||
myCollation.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
|
||||
myCollation.setStrength(Collator.TERTIARY);
|
||||
for (int i = 0; i < testSourceCases.length ; i++)
|
||||
{
|
||||
CollationTest.doTest(this, (RuleBasedCollator)myCollation,
|
||||
testSourceCases[i], testTargetCases[i],
|
||||
results[i]);
|
||||
}
|
||||
|
||||
// ensure that the non-reordered and reordered collation is the same
|
||||
Collator nonReorderdCollator = RuleBasedCollator.getInstance();
|
||||
int nonReorderedResults = nonReorderdCollator.compare(testSourceCases[0], testSourceCases[1]);
|
||||
CollationTest.doTest(this, (RuleBasedCollator)myCollation,
|
||||
testSourceCases[0], testSourceCases[1], nonReorderedResults);
|
||||
|
||||
Arrays.sort(equivalentScriptsResult);
|
||||
int[] equivalentScripts = RuleBasedCollator.getScriptEquivalentsForReordering(UScript.GOTHIC);
|
||||
Arrays.sort(equivalentScripts);
|
||||
assertTrue("Script Equivalents for Reordering", Arrays.equals(equivalentScripts, equivalentScriptsResult));
|
||||
|
||||
equivalentScripts = RuleBasedCollator.getScriptEquivalentsForReordering(UScript.SHAVIAN);
|
||||
Arrays.sort(equivalentScripts);
|
||||
assertTrue("Script Equivalents for Reordering", Arrays.equals(equivalentScripts, equivalentScriptsResult));
|
||||
}
|
||||
|
||||
public void TestGreekFirstReorderCloning(){
|
||||
String[] testSourceCases = {
|
||||
"\u0041",
|
||||
"\u03b1\u0041",
|
||||
"\u0061",
|
||||
"\u0041\u0061",
|
||||
"\u0391",
|
||||
};
|
||||
|
||||
String[] testTargetCases = {
|
||||
"\u03b1",
|
||||
"\u0041\u03b1",
|
||||
"\u0391",
|
||||
"\u0391\u03b1",
|
||||
"\u0391",
|
||||
};
|
||||
|
||||
int[] results = {
|
||||
1,
|
||||
-1,
|
||||
1,
|
||||
1,
|
||||
0
|
||||
};
|
||||
|
||||
Collator originalCollation;
|
||||
Collator myCollation;
|
||||
String rules = "[reorder Grek]";
|
||||
try {
|
||||
originalCollation = new RuleBasedCollator(rules);
|
||||
} catch (Exception e) {
|
||||
warnln("ERROR: in creation of rule based collator");
|
||||
return;
|
||||
}
|
||||
try {
|
||||
myCollation = (Collator) originalCollation.clone();
|
||||
} catch (Exception e) {
|
||||
warnln("ERROR: in creation of rule based collator");
|
||||
return;
|
||||
}
|
||||
myCollation.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
|
||||
myCollation.setStrength(Collator.TERTIARY);
|
||||
for (int i = 0; i < testSourceCases.length ; i++)
|
||||
{
|
||||
CollationTest.doTest(this, (RuleBasedCollator)myCollation,
|
||||
testSourceCases[i], testTargetCases[i],
|
||||
results[i]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Utility function to test one collation reordering test case.
|
||||
* @param testcases Array of test cases.
|
||||
* @param n_testcases Size of the array testcases.
|
||||
* @param str_rules Array of rules. These rules should be specifying the same rule in different formats.
|
||||
* @param n_rules Size of the array str_rules.
|
||||
*/
|
||||
private void doTestOneReorderingAPITestCase(OneTestCase testCases[], int reorderTokens[])
|
||||
{
|
||||
Collator myCollation = Collator.getInstance(ULocale.ENGLISH);
|
||||
myCollation.setScriptOrder(reorderTokens);
|
||||
|
||||
for (OneTestCase testCase : testCases) {
|
||||
CollationTest.doTest(this, (RuleBasedCollator)myCollation,
|
||||
testCase.m_source_,
|
||||
testCase.m_target_,
|
||||
testCase.m_result_);
|
||||
}
|
||||
}
|
||||
|
||||
public void TestGreekFirstReorder()
|
||||
{
|
||||
String[] strRules = {
|
||||
"[reorder Grek]"
|
||||
};
|
||||
|
||||
int[] apiRules = {
|
||||
UScript.GREEK
|
||||
};
|
||||
|
||||
OneTestCase[] privateUseCharacterStrings = {
|
||||
new OneTestCase("\u0391", "\u0391", 0),
|
||||
new OneTestCase("\u0041", "\u0391", 1),
|
||||
new OneTestCase("\u03B1\u0041", "\u03B1\u0391", 1),
|
||||
new OneTestCase("\u0060", "\u0391", -1),
|
||||
new OneTestCase("\u0391", "\ue2dc", -1),
|
||||
new OneTestCase("\u0391", "\u0060", 1),
|
||||
};
|
||||
|
||||
/* Test rules creation */
|
||||
doTestCollation(privateUseCharacterStrings, strRules);
|
||||
|
||||
/* Test collation reordering API */
|
||||
doTestOneReorderingAPITestCase(privateUseCharacterStrings, apiRules);
|
||||
}
|
||||
|
||||
public void TestGreekLastReorder()
|
||||
{
|
||||
String[] strRules = {
|
||||
"[reorder Zzzz Grek]"
|
||||
};
|
||||
|
||||
int[] apiRules = {
|
||||
UScript.UNKNOWN, UScript.GREEK
|
||||
};
|
||||
|
||||
OneTestCase[] privateUseCharacterStrings = {
|
||||
new OneTestCase("\u0391", "\u0391", 0),
|
||||
new OneTestCase("\u0041", "\u0391", -1),
|
||||
new OneTestCase("\u03B1\u0041", "\u03B1\u0391", -1),
|
||||
new OneTestCase("\u0060", "\u0391", -1),
|
||||
new OneTestCase("\u0391", "\ue2dc", 1),
|
||||
};
|
||||
|
||||
/* Test rules creation */
|
||||
doTestCollation(privateUseCharacterStrings, strRules);
|
||||
|
||||
/* Test collation reordering API */
|
||||
doTestOneReorderingAPITestCase(privateUseCharacterStrings, apiRules);
|
||||
}
|
||||
|
||||
public void TestNonScriptReorder()
|
||||
{
|
||||
String[] strRules = {
|
||||
"[reorder Grek Symbol DIGIT Latn Punct space Zzzz cURRENCy]"
|
||||
};
|
||||
|
||||
int[] apiRules = {
|
||||
UScript.GREEK, ReorderCodes.SYMBOL, ReorderCodes.DIGIT, UScript.LATIN,
|
||||
ReorderCodes.PUNCTUATION, ReorderCodes.SPACE, UScript.UNKNOWN,
|
||||
ReorderCodes.CURRENCY
|
||||
};
|
||||
|
||||
OneTestCase[] privateUseCharacterStrings = {
|
||||
new OneTestCase("\u0391", "\u0041", -1),
|
||||
new OneTestCase("\u0041", "\u0391", 1),
|
||||
new OneTestCase("\u0060", "\u0041", -1),
|
||||
new OneTestCase("\u0060", "\u0391", 1),
|
||||
new OneTestCase("\u0024", "\u0041", 1),
|
||||
};
|
||||
|
||||
/* Test rules creation */
|
||||
doTestCollation(privateUseCharacterStrings, strRules);
|
||||
|
||||
/* Test collation reordering API */
|
||||
doTestOneReorderingAPITestCase(privateUseCharacterStrings, apiRules);
|
||||
}
|
||||
|
||||
public void TestHaniReorder()
|
||||
{
|
||||
String[] strRules = {
|
||||
"[reorder Hani]"
|
||||
};
|
||||
int[] apiRules = {
|
||||
UScript.HAN
|
||||
};
|
||||
|
||||
OneTestCase[] privateUseCharacterStrings = {
|
||||
new OneTestCase("\u4e00", "\u0041", -1),
|
||||
new OneTestCase("\u4e00", "\u0060", 1),
|
||||
new OneTestCase("\uD86D, 0xDF40", "\u0041", -1),
|
||||
new OneTestCase("\uD86D, 0xDF40", "\u0060", 1),
|
||||
new OneTestCase("\u4e00", "\uD86D\uDF40", -1),
|
||||
new OneTestCase("\ufa27", "\u0041", -1),
|
||||
new OneTestCase("\uD869\uDF00", "\u0041", -1),
|
||||
};
|
||||
|
||||
/* Test rules creation */
|
||||
doTestCollation(privateUseCharacterStrings, strRules);
|
||||
|
||||
/* Test collation reordering API */
|
||||
doTestOneReorderingAPITestCase(privateUseCharacterStrings, apiRules);
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user