ICU-3515 new implicit CE generation runtime
X-SVN-Rev: 14378
This commit is contained in:
parent
c45d22e692
commit
43f7629080
@ -2210,7 +2210,7 @@ public final class CollationElementIterator
|
||||
// illegal code value, use completely ignoreable!
|
||||
return IGNORABLE;
|
||||
}
|
||||
int result = getImplicitPrimary(codepoint);
|
||||
int result = RuleBasedCollator.impCEGen_.getImplicitFromCodePoint(codepoint);
|
||||
m_CEBuffer_[0] = (result & RuleBasedCollator.CE_PRIMARY_MASK_)
|
||||
| 0x00000505;
|
||||
m_CEBuffer_[1] = ((result & 0x0000FFFF) << 16) | 0x000000C0;
|
||||
@ -2906,7 +2906,7 @@ public final class CollationElementIterator
|
||||
if (!UCharacter.isLegal(codepoint)) {
|
||||
return IGNORABLE; // illegal code value, completely ignoreable!
|
||||
}
|
||||
int result = getImplicitPrimary(codepoint);
|
||||
int result = RuleBasedCollator.impCEGen_.getImplicitFromCodePoint(codepoint);
|
||||
m_CEBufferSize_ = 2;
|
||||
m_CEBufferOffset_ = 1;
|
||||
m_CEBuffer_[0] = (result & RuleBasedCollator.CE_PRIMARY_MASK_)
|
||||
@ -3008,82 +3008,82 @@ public final class CollationElementIterator
|
||||
* @param cp codepoint
|
||||
* @param value is left justified primary key
|
||||
*/
|
||||
private static final int getImplicitPrimary(int cp)
|
||||
{
|
||||
cp = swapCJK(cp);
|
||||
// private static final int getImplicitPrimary(int cp)
|
||||
// {
|
||||
// cp = swapCJK(cp);
|
||||
//
|
||||
// //if (DEBUG) System.out.println("CJK swapped: " + Utility.hex(cp));
|
||||
// // we now have a range of numbers from 0 to 21FFFF.
|
||||
// // we must skip all 00, 01, 02 bytes, so most bytes have 253 values
|
||||
// // we must leave a gap of 01 between all values of the last byte, so
|
||||
// // the last byte has 126 values (3 byte case)
|
||||
// // we shift so that HAN all has the same first primary, for
|
||||
// // compression.
|
||||
// // for the 4 byte case, we make the gap as large as we can fit.
|
||||
// // Three byte forms are EC xx xx, ED xx xx, EE xx xx (with a gap of 1)
|
||||
// // Four byte forms (most supplementaries) are EF xx xx xx (with a gap
|
||||
// // of LAST2_MULTIPLIER == 14)
|
||||
//
|
||||
// int last0 = cp - RuleBasedCollator.IMPLICIT_4BYTE_BOUNDARY_;
|
||||
// if (last0 < 0) {
|
||||
// int last1 = cp / RuleBasedCollator.LAST_COUNT_;
|
||||
// last0 = cp % RuleBasedCollator.LAST_COUNT_;
|
||||
//
|
||||
// int last2 = last1 / RuleBasedCollator.OTHER_COUNT_;
|
||||
// last1 %= RuleBasedCollator.OTHER_COUNT_;
|
||||
// return RuleBasedCollator.IMPLICIT_BASE_3BYTE_ + (last2 << 24)
|
||||
// + (last1 << 16)
|
||||
// + ((last0 * RuleBasedCollator.LAST_MULTIPLIER_) << 8);
|
||||
// }
|
||||
// else {
|
||||
// int last1 = last0 / RuleBasedCollator.LAST_COUNT2_;
|
||||
// last0 %= RuleBasedCollator.LAST_COUNT2_;
|
||||
//
|
||||
// int last2 = last1 / RuleBasedCollator.OTHER_COUNT_;
|
||||
// last1 %= RuleBasedCollator.OTHER_COUNT_;
|
||||
//
|
||||
// int last3 = last2 / RuleBasedCollator.OTHER_COUNT_;
|
||||
// last2 %= RuleBasedCollator.OTHER_COUNT_;
|
||||
// return RuleBasedCollator.IMPLICIT_BASE_4BYTE_ + (last3 << 24)
|
||||
// + (last2 << 16) + (last1 << 8)
|
||||
// + (last0 * RuleBasedCollator.LAST2_MULTIPLIER_);
|
||||
// }
|
||||
// }
|
||||
|
||||
//if (DEBUG) System.out.println("CJK swapped: " + Utility.hex(cp));
|
||||
// we now have a range of numbers from 0 to 21FFFF.
|
||||
// we must skip all 00, 01, 02 bytes, so most bytes have 253 values
|
||||
// we must leave a gap of 01 between all values of the last byte, so
|
||||
// the last byte has 126 values (3 byte case)
|
||||
// we shift so that HAN all has the same first primary, for
|
||||
// compression.
|
||||
// for the 4 byte case, we make the gap as large as we can fit.
|
||||
// Three byte forms are EC xx xx, ED xx xx, EE xx xx (with a gap of 1)
|
||||
// Four byte forms (most supplementaries) are EF xx xx xx (with a gap
|
||||
// of LAST2_MULTIPLIER == 14)
|
||||
|
||||
int last0 = cp - RuleBasedCollator.IMPLICIT_4BYTE_BOUNDARY_;
|
||||
if (last0 < 0) {
|
||||
int last1 = cp / RuleBasedCollator.LAST_COUNT_;
|
||||
last0 = cp % RuleBasedCollator.LAST_COUNT_;
|
||||
|
||||
int last2 = last1 / RuleBasedCollator.OTHER_COUNT_;
|
||||
last1 %= RuleBasedCollator.OTHER_COUNT_;
|
||||
return RuleBasedCollator.IMPLICIT_BASE_3BYTE_ + (last2 << 24)
|
||||
+ (last1 << 16)
|
||||
+ ((last0 * RuleBasedCollator.LAST_MULTIPLIER_) << 8);
|
||||
}
|
||||
else {
|
||||
int last1 = last0 / RuleBasedCollator.LAST_COUNT2_;
|
||||
last0 %= RuleBasedCollator.LAST_COUNT2_;
|
||||
|
||||
int last2 = last1 / RuleBasedCollator.OTHER_COUNT_;
|
||||
last1 %= RuleBasedCollator.OTHER_COUNT_;
|
||||
|
||||
int last3 = last2 / RuleBasedCollator.OTHER_COUNT_;
|
||||
last2 %= RuleBasedCollator.OTHER_COUNT_;
|
||||
return RuleBasedCollator.IMPLICIT_BASE_4BYTE_ + (last3 << 24)
|
||||
+ (last2 << 16) + (last1 << 8)
|
||||
+ (last0 * RuleBasedCollator.LAST2_MULTIPLIER_);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Swapping CJK characters for implicit ces
|
||||
* @param cp codepoint CJK
|
||||
* @return swapped result
|
||||
*/
|
||||
private static final int swapCJK(int cp)
|
||||
{
|
||||
if (cp >= CJK_BASE_) {
|
||||
if (cp < CJK_LIMIT_) {
|
||||
return cp - CJK_BASE_;
|
||||
}
|
||||
if (cp < CJK_COMPAT_USED_BASE_) {
|
||||
return cp + NON_CJK_OFFSET_;
|
||||
}
|
||||
if (cp < CJK_COMPAT_USED_LIMIT_) {
|
||||
return cp - CJK_COMPAT_USED_BASE_ + (CJK_LIMIT_ - CJK_BASE_);
|
||||
}
|
||||
if (cp < CJK_B_BASE_) {
|
||||
return cp + NON_CJK_OFFSET_;
|
||||
}
|
||||
if (cp < CJK_B_LIMIT_) {
|
||||
return cp; // non-BMP-CJK
|
||||
}
|
||||
return cp + NON_CJK_OFFSET_; // non-CJK
|
||||
}
|
||||
if (cp < CJK_A_BASE_) {
|
||||
return cp + NON_CJK_OFFSET_;
|
||||
}
|
||||
if (cp < CJK_A_LIMIT_) {
|
||||
return cp - CJK_A_BASE_ + (CJK_LIMIT_ - CJK_BASE_)
|
||||
+ (CJK_COMPAT_USED_LIMIT_ - CJK_COMPAT_USED_BASE_);
|
||||
}
|
||||
return cp + NON_CJK_OFFSET_; // non-CJK
|
||||
}
|
||||
// /**
|
||||
// * Swapping CJK characters for implicit ces
|
||||
// * @param cp codepoint CJK
|
||||
// * @return swapped result
|
||||
// */
|
||||
// private static final int swapCJK(int cp)
|
||||
// {
|
||||
// if (cp >= CJK_BASE_) {
|
||||
// if (cp < CJK_LIMIT_) {
|
||||
// return cp - CJK_BASE_;
|
||||
// }
|
||||
// if (cp < CJK_COMPAT_USED_BASE_) {
|
||||
// return cp + NON_CJK_OFFSET_;
|
||||
// }
|
||||
// if (cp < CJK_COMPAT_USED_LIMIT_) {
|
||||
// return cp - CJK_COMPAT_USED_BASE_ + (CJK_LIMIT_ - CJK_BASE_);
|
||||
// }
|
||||
// if (cp < CJK_B_BASE_) {
|
||||
// return cp + NON_CJK_OFFSET_;
|
||||
// }
|
||||
// if (cp < CJK_B_LIMIT_) {
|
||||
// return cp; // non-BMP-CJK
|
||||
// }
|
||||
// return cp + NON_CJK_OFFSET_; // non-CJK
|
||||
// }
|
||||
// if (cp < CJK_A_BASE_) {
|
||||
// return cp + NON_CJK_OFFSET_;
|
||||
// }
|
||||
// if (cp < CJK_A_LIMIT_) {
|
||||
// return cp - CJK_A_BASE_ + (CJK_LIMIT_ - CJK_BASE_)
|
||||
// + (CJK_COMPAT_USED_LIMIT_ - CJK_COMPAT_USED_BASE_);
|
||||
// }
|
||||
// return cp + NON_CJK_OFFSET_; // non-CJK
|
||||
// }
|
||||
|
||||
/**
|
||||
* Gets a character from the source string at a given offset.
|
||||
|
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/RuleBasedCollator.java,v $
|
||||
* $Date: 2004/01/14 21:49:20 $
|
||||
* $Revision: 1.55 $
|
||||
* $Date: 2004/01/22 06:40:38 $
|
||||
* $Revision: 1.56 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -1529,24 +1529,28 @@ public final class RuleBasedCollator extends Collator
|
||||
*/
|
||||
static final char UCA_CONTRACTIONS_[];
|
||||
/**
|
||||
* Implicit constants
|
||||
* Implicit generator
|
||||
*/
|
||||
static final int IMPLICIT_BASE_BYTE_;
|
||||
static final int IMPLICIT_LIMIT_BYTE_;
|
||||
static final int IMPLICIT_4BYTE_BOUNDARY_;
|
||||
static final int LAST_MULTIPLIER_;
|
||||
static final int LAST2_MULTIPLIER_;
|
||||
static final int IMPLICIT_BASE_3BYTE_;
|
||||
static final int IMPLICIT_BASE_4BYTE_;
|
||||
static final int BYTES_TO_AVOID_ = 3;
|
||||
static final int OTHER_COUNT_ = 256 - BYTES_TO_AVOID_;
|
||||
static final int LAST_COUNT_ = OTHER_COUNT_ / 2;
|
||||
/**
|
||||
* Room for intervening, without expanding to 5 bytes
|
||||
*/
|
||||
static final int LAST_COUNT2_ = OTHER_COUNT_ / 21;
|
||||
static final int IMPLICIT_3BYTE_COUNT_ = 1;
|
||||
|
||||
static final ImplicitCEGenerator impCEGen_;
|
||||
// /**
|
||||
// * Implicit constants
|
||||
// */
|
||||
// static final int IMPLICIT_BASE_BYTE_;
|
||||
// static final int IMPLICIT_LIMIT_BYTE_;
|
||||
// static final int IMPLICIT_4BYTE_BOUNDARY_;
|
||||
// static final int LAST_MULTIPLIER_;
|
||||
// static final int LAST2_MULTIPLIER_;
|
||||
// static final int IMPLICIT_BASE_3BYTE_;
|
||||
// static final int IMPLICIT_BASE_4BYTE_;
|
||||
// static final int BYTES_TO_AVOID_ = 3;
|
||||
// static final int OTHER_COUNT_ = 256 - BYTES_TO_AVOID_;
|
||||
// static final int LAST_COUNT_ = OTHER_COUNT_ / 2;
|
||||
// /**
|
||||
// * Room for intervening, without expanding to 5 bytes
|
||||
// */
|
||||
// static final int LAST_COUNT2_ = OTHER_COUNT_ / 21;
|
||||
// static final int IMPLICIT_3BYTE_COUNT_ = 1;
|
||||
//
|
||||
static final byte SORT_LEVEL_TERMINATOR_ = 1;
|
||||
|
||||
|
||||
@ -1566,16 +1570,17 @@ public final class RuleBasedCollator extends Collator
|
||||
b.close();
|
||||
i.close();
|
||||
// called before doing canonical closure for the UCA.
|
||||
IMPLICIT_BASE_BYTE_ = UCA_CONSTANTS_.PRIMARY_IMPLICIT_MIN_;
|
||||
// leave room for 1 3-byte and 2 4-byte forms
|
||||
IMPLICIT_LIMIT_BYTE_ = IMPLICIT_BASE_BYTE_ + 4;
|
||||
IMPLICIT_4BYTE_BOUNDARY_ = IMPLICIT_3BYTE_COUNT_ * OTHER_COUNT_
|
||||
* LAST_COUNT_;
|
||||
LAST_MULTIPLIER_ = OTHER_COUNT_ / LAST_COUNT_;
|
||||
LAST2_MULTIPLIER_ = OTHER_COUNT_ / LAST_COUNT2_;
|
||||
IMPLICIT_BASE_3BYTE_ = (IMPLICIT_BASE_BYTE_ << 24) + 0x030300;
|
||||
IMPLICIT_BASE_4BYTE_ = ((IMPLICIT_BASE_BYTE_
|
||||
+ IMPLICIT_3BYTE_COUNT_) << 24) + 0x030303;
|
||||
impCEGen_ = new ImplicitCEGenerator(UCA_CONSTANTS_.PRIMARY_IMPLICIT_MIN_, UCA_CONSTANTS_.PRIMARY_IMPLICIT_MAX_);
|
||||
// IMPLICIT_BASE_BYTE_ = UCA_CONSTANTS_.PRIMARY_IMPLICIT_MIN_;
|
||||
// // leave room for 1 3-byte and 2 4-byte forms
|
||||
// IMPLICIT_LIMIT_BYTE_ = IMPLICIT_BASE_BYTE_ + 4;
|
||||
// IMPLICIT_4BYTE_BOUNDARY_ = IMPLICIT_3BYTE_COUNT_ * OTHER_COUNT_
|
||||
// * LAST_COUNT_;
|
||||
// LAST_MULTIPLIER_ = OTHER_COUNT_ / LAST_COUNT_;
|
||||
// LAST2_MULTIPLIER_ = OTHER_COUNT_ / LAST_COUNT2_;
|
||||
// IMPLICIT_BASE_3BYTE_ = (IMPLICIT_BASE_BYTE_ << 24) + 0x030300;
|
||||
// IMPLICIT_BASE_4BYTE_ = ((IMPLICIT_BASE_BYTE_
|
||||
// + IMPLICIT_3BYTE_COUNT_) << 24) + 0x030303;
|
||||
UCA_.init();
|
||||
ResourceBundle rb = ICULocaleData.getLocaleElements(Locale.ENGLISH);
|
||||
UCA_.m_rules_ = (String)rb.getObject("%%UCARULES");
|
||||
|
Loading…
Reference in New Issue
Block a user