diff --git a/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationBuilder.java b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationBuilder.java index 139e3539f8..5172d87eb3 100644 --- a/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationBuilder.java +++ b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationBuilder.java @@ -124,7 +124,7 @@ public final class CollationBuilder extends CollationRuleParser.Sink { int index = findOrInsertNodeForCEs(strength); long node = nodes.elementAti(index); - // If the index is for a "weaker" tailored node, + // If the index is for a "weaker" node, // then skip backwards over this and further "weaker" nodes. while(strengthFromNode(node) > strength) { index = previousIndexFromNode(node); @@ -168,6 +168,8 @@ public final class CollationBuilder extends CollationRuleParser.Sink { if(strength >= Collator.TERTIARY) { index = findCommonNode(index, Collator.TERTIARY); } + // findCommonNode() stayed on the stronger node or moved to + // an explicit common-weight node of the reset-before strength. node = nodes.elementAti(index); if(strengthFromNode(node) == strength) { // Found a same-strength node with an explicit weight. @@ -178,90 +180,45 @@ public final class CollationBuilder extends CollationRuleParser.Sink { "reset secondary-before secondary ignorable not possible" : "reset tertiary-before completely ignorable not possible"); } - assert(weight16 >= Collation.COMMON_WEIGHT16); + assert(weight16 > Collation.BEFORE_WEIGHT16); + // Reset to just before this node. + // Insert the preceding same-level explicit weight if it is not there already. + // Which explicit weight immediately precedes this one? + weight16 = getWeight16Before(index, node, strength); + // Does this preceding weight have a node? + int previousWeight16; int previousIndex = previousIndexFromNode(node); - if(weight16 == Collation.COMMON_WEIGHT16) { - // Reset to just before this same-strength common-weight node. + for(int i = previousIndex;; i = previousIndexFromNode(node)) { + node = nodes.elementAti(i); + int previousStrength = strengthFromNode(node); + if(previousStrength < strength) { + assert(weight16 >= Collation.COMMON_WEIGHT16 || i == previousIndex); + // Either the reset element has an above-common weight and + // the parent node provides the implied common weight, + // or the reset element has a weight<=common in the node + // right after the parent, and we need to insert the preceding weight. + previousWeight16 = Collation.COMMON_WEIGHT16; + break; + } else if(previousStrength == strength && !isTailoredNode(node)) { + previousWeight16 = weight16FromNode(node); + break; + } + // Skip weaker nodes and same-level tailored nodes. + } + if(previousWeight16 == weight16) { + // The preceding weight has a node, + // maybe with following weaker or tailored nodes. + // Reset to the last of them. index = previousIndex; } else { - // A non-common weight is only possible from a root CE. - // Find the higher-level weights, which must all be explicit, - // and then find the preceding weight for this level. - long previousWeight16 = 0; - int previousWeightIndex = -1; - int i = index; - if(strength == Collator.SECONDARY) { - long p; - do { - i = previousIndexFromNode(node); - node = nodes.elementAti(i); - if(strengthFromNode(node) == Collator.SECONDARY && !isTailoredNode(node) && - previousWeightIndex < 0) { - previousWeightIndex = i; - previousWeight16 = weight16FromNode(node); - } - } while(strengthFromNode(node) > Collator.PRIMARY); - assert(!isTailoredNode(node)); - p = weight32FromNode(node); - weight16 = rootElements.getSecondaryBefore(p, weight16); - } else { - long p; - int s; - do { - i = previousIndexFromNode(node); - node = nodes.elementAti(i); - if(strengthFromNode(node) == Collator.TERTIARY && !isTailoredNode(node) && - previousWeightIndex < 0) { - previousWeightIndex = i; - previousWeight16 = weight16FromNode(node); - } - } while(strengthFromNode(node) > Collator.SECONDARY); - assert(!isTailoredNode(node)); - if(strengthFromNode(node) == Collator.SECONDARY) { - s = weight16FromNode(node); - do { - i = previousIndexFromNode(node); - node = nodes.elementAti(i); - } while(strengthFromNode(node) > Collator.PRIMARY); - assert(!isTailoredNode(node)); - } else { - assert(!nodeHasBefore2(node)); - s = Collation.COMMON_WEIGHT16; - } - p = weight32FromNode(node); - weight16 = rootElements.getTertiaryBefore(p, s, weight16); - assert((weight16 & ~Collation.ONLY_TERTIARY_MASK) == 0); - } - // Find or insert the new explicit weight before the current one. - if(previousWeightIndex >= 0 && weight16 == previousWeight16) { - // Tailor after the last node between adjacent root nodes. - index = previousIndex; - } else { - node = nodeFromWeight16(weight16) | nodeFromStrength(strength); - index = insertNodeBetween(previousIndex, index, node); - } + // Insert a node with the preceding weight, reset to that. + node = nodeFromWeight16(weight16) | nodeFromStrength(strength); + index = insertNodeBetween(previousIndex, index, node); } } else { // Found a stronger node with implied strength-common weight. - long hasBefore3 = 0; - if(strength == Collator.SECONDARY) { - assert(!nodeHasBefore2(node)); - // Move the HAS_BEFORE3 flag from the parent node - // to the new secondary common node. - hasBefore3 = node & HAS_BEFORE3; - node = (node & ~(long)HAS_BEFORE3) | HAS_BEFORE2; - } else { - assert(!nodeHasBefore3(node)); - node |= HAS_BEFORE3; - } - nodes.setElementAt(node, index); - int nextIndex = nextIndexFromNode(node); - // Insert default nodes with weights 01 and 05, reset to the 01 node. - node = nodeFromWeight16(Collation.BEFORE_WEIGHT16) | nodeFromStrength(strength); - index = insertNodeBetween(index, nextIndex, node); - node = nodeFromWeight16(Collation.COMMON_WEIGHT16) | hasBefore3 | - nodeFromStrength(strength); - insertNodeBetween(index, nextIndex, node); + int weight16 = getWeight16Before(index, node, strength); + index = findOrInsertWeakNode(index, weight16, strength); } // Strength of the temporary CE = strength of its reset position. // Code above raises an error if the before-strength is stronger. @@ -270,6 +227,52 @@ public final class CollationBuilder extends CollationRuleParser.Sink { ces[cesLength - 1] = tempCEFromIndexAndStrength(index, strength); } + /** + * Returns the secondary or tertiary weight preceding the current node's weight. + * node=nodes[index]. + */ + private int getWeight16Before(int index, long node, int level) { + assert(strengthFromNode(node) < level || !isTailoredNode(node)); + // Collect the root CE weights if this node is for a root CE. + // If it is not, then return the low non-primary boundary for a tailored CE. + int t; + if(strengthFromNode(node) == Collator.TERTIARY) { + t = weight16FromNode(node); + } else { + t = Collation.COMMON_WEIGHT16; // Stronger node with implied common weight. + } + while(strengthFromNode(node) > Collator.SECONDARY) { + index = previousIndexFromNode(node); + node = nodes.elementAti(index); + } + if(isTailoredNode(node)) { + return Collation.BEFORE_WEIGHT16; + } + int s; + if(strengthFromNode(node) == Collator.SECONDARY) { + s = weight16FromNode(node); + } else { + s = Collation.COMMON_WEIGHT16; // Stronger node with implied common weight. + } + while(strengthFromNode(node) > Collator.PRIMARY) { + index = previousIndexFromNode(node); + node = nodes.elementAti(index); + } + if(isTailoredNode(node)) { + return Collation.BEFORE_WEIGHT16; + } + // [p, s, t] is a root CE. Return the preceding weight for the requested level. + long p = weight32FromNode(node); + int weight16; + if(level == Collator.SECONDARY) { + weight16 = rootElements.getSecondaryBefore(p, s); + } else { + weight16 = rootElements.getTertiaryBefore(p, s, t); + assert((weight16 & ~Collation.ONLY_TERTIARY_MASK) == 0); + } + return weight16; + } + private long getSpecialResetPosition(CharSequence str) { assert(str.length() == 2); long ce; @@ -557,7 +560,7 @@ public final class CollationBuilder extends CollationRuleParser.Sink { // down to the requested level/strength. // Root CEs must have common=zero quaternary weights (for which we never insert any nodes). assert((ce & 0xc0) == 0); - int index = findOrInsertNodeForPrimary(ce >>> 32 ); + int index = findOrInsertNodeForPrimary(ce >>> 32); if(strength >= Collator.SECONDARY) { int lower32 = (int)ce; index = findOrInsertWeakNode(index, lower32 >>> 16, Collator.SECONDARY); @@ -619,17 +622,44 @@ public final class CollationBuilder extends CollationRuleParser.Sink { /** Finds or inserts the node for a secondary or tertiary weight. */ private int findOrInsertWeakNode(int index, int weight16, int level) { assert(0 <= index && index < nodes.size()); + assert(Collator.SECONDARY <= level && level <= Collator.TERTIARY); - assert(weight16 == 0 || weight16 >= Collation.COMMON_WEIGHT16); - // Only reset-before inserts common weights. if(weight16 == Collation.COMMON_WEIGHT16) { return findCommonNode(index, level); } + + // If this will be the first below-common weight for the parent node, + // then we will also need to insert a common weight after it. + long node = nodes.elementAti(index); + assert(strengthFromNode(node) < level); // parent node is stronger + if(weight16 != 0 && weight16 < Collation.COMMON_WEIGHT16) { + int hasThisLevelBefore = level == Collator.SECONDARY ? HAS_BEFORE2 : HAS_BEFORE3; + if((node & hasThisLevelBefore) == 0) { + // The parent node has an implied level-common weight. + long commonNode = + nodeFromWeight16(Collation.COMMON_WEIGHT16) | nodeFromStrength(level); + if(level == Collator.SECONDARY) { + // Move the HAS_BEFORE3 flag from the parent node + // to the new secondary common node. + commonNode |= node & HAS_BEFORE3; + node &= ~(long)HAS_BEFORE3; + } + nodes.setElementAt(node | hasThisLevelBefore, index); + // Insert below-common-weight node. + int nextIndex = nextIndexFromNode(node); + node = nodeFromWeight16(weight16) | nodeFromStrength(level); + index = insertNodeBetween(index, nextIndex, node); + // Insert common-weight node. + insertNodeBetween(index, nextIndex, commonNode); + // Return index of below-common-weight node. + return index; + } + } + // Find the root CE's weight for this level. // Postpone insertion if not found: // Insert the new root node before the next stronger node, // or before the next root node with the same strength and a larger weight. - long node = nodes.elementAti(index); int nextIndex; while((nextIndex = nextIndexFromNode(node)) != 0) { node = nodes.elementAti(nextIndex); @@ -709,7 +739,7 @@ public final class CollationBuilder extends CollationRuleParser.Sink { /** * Finds the node which implies or contains a common=05 weight of the given strength - * (secondary or tertiary). + * (secondary or tertiary), if the current node is stronger. * Skips weaker nodes and tailored nodes if the current node is stronger * and is followed by an explicit-common-weight node. * Always returns the input index if that node is no stronger than the given strength. @@ -728,13 +758,14 @@ public final class CollationBuilder extends CollationRuleParser.Sink { index = nextIndexFromNode(node); node = nodes.elementAti(index); assert(!isTailoredNode(node) && strengthFromNode(node) == strength && - weight16FromNode(node) == Collation.BEFORE_WEIGHT16); + weight16FromNode(node) < Collation.COMMON_WEIGHT16); // Skip to the explicit common node. do { index = nextIndexFromNode(node); node = nodes.elementAti(index); assert(strengthFromNode(node) >= strength); - } while(isTailoredNode(node) || strengthFromNode(node) > strength); + } while(isTailoredNode(node) || strengthFromNode(node) > strength || + weight16FromNode(node) < Collation.COMMON_WEIGHT16); assert(weight16FromNode(node) == Collation.COMMON_WEIGHT16); return index; } @@ -1097,6 +1128,9 @@ public final class CollationBuilder extends CollationRuleParser.Sink { CollationWeights secondaries = new CollationWeights(); CollationWeights tertiaries = new CollationWeights(); long[] nodesArray = nodes.getBuffer(); + if(DEBUG) { + System.out.println("\nCollationBuilder.makeTailoredCEs()"); + } for(int rpi = 0; rpi < rootPrimaryIndexes.size(); ++rpi) { int i = rootPrimaryIndexes.elementAti(rpi); @@ -1143,11 +1177,11 @@ public final class CollationBuilder extends CollationRuleParser.Sink { // Gap at the beginning of the tertiary CE range. t = rootElements.getTertiaryBoundary() - 0x100; tLimit = (int)rootElements.getFirstTertiaryCE() & Collation.ONLY_TERTIARY_MASK; - } else if(t == Collation.BEFORE_WEIGHT16) { - tLimit = Collation.COMMON_WEIGHT16; } else if(!pIsTailored && !sIsTailored) { // p and s are root weights. tLimit = rootElements.getTertiaryAfter(pIndex, s, t); + } else if(t == Collation.BEFORE_WEIGHT16) { + tLimit = Collation.COMMON_WEIGHT16; } else { // [p, s] is tailored. assert(t == Collation.COMMON_WEIGHT16); @@ -1185,11 +1219,11 @@ public final class CollationBuilder extends CollationRuleParser.Sink { // Gap at the beginning of the secondary CE range. s = rootElements.getSecondaryBoundary() - 0x100; sLimit = (int)(rootElements.getFirstSecondaryCE() >> 16); - } else if(s == Collation.BEFORE_WEIGHT16) { - sLimit = Collation.COMMON_WEIGHT16; } else if(!pIsTailored) { // p is a root primary. sLimit = rootElements.getSecondaryAfter(pIndex, s); + } else if(s == Collation.BEFORE_WEIGHT16) { + sLimit = Collation.COMMON_WEIGHT16; } else { // p is a tailored primary. assert(s == Collation.COMMON_WEIGHT16); @@ -1203,6 +1237,11 @@ public final class CollationBuilder extends CollationRuleParser.Sink { secondaries.initForSecondary(); if(!secondaries.allocWeights(s, sLimit, sCount)) { // C++ U_BUFFER_OVERFLOW_ERROR + if(DEBUG) { + System.out.printf("!secondaries.allocWeights(%x, %x, sCount=%d)\n", + alignWeightRight(s), alignWeightRight(sLimit), + alignWeightRight(sCount)); + } throw new UnsupportedOperationException("secondary tailoring gap too small"); } sIsTailored = true; @@ -1377,15 +1416,13 @@ public final class CollationBuilder extends CollationRuleParser.Sink { /** At most 1M nodes, limited by the 20 bits in node bit fields. */ private static final int MAX_INDEX = 0xfffff; /** - * Node bit 6 is set on a primary node if there are tailored nodes - * with secondary values below the common secondary weight (05), - * from a reset-secondary-before (&[before 2]). + * Node bit 6 is set on a primary node if there are nodes + * with secondary values below the common secondary weight (05). */ private static final int HAS_BEFORE2 = 0x40; /** - * Node bit 5 is set on a primary or secondary node if there are tailored nodes - * with tertiary values below the common tertiary weight (05), - * from a reset-tertiary-before (&[before 3]). + * Node bit 5 is set on a primary or secondary node if there are nodes + * with tertiary values below the common tertiary weight (05). */ private static final int HAS_BEFORE3 = 0x20; /** @@ -1496,15 +1533,16 @@ public final class CollationBuilder extends CollationRuleParser.Sink { * A node of a given strength normally implies "common" weights on weaker levels. * * A node with HAS_BEFORE2 must be immediately followed by - * a secondary node with BEFORE_WEIGHT16, then a secondary tailored node, + * a secondary node with an explicit below-common weight, then a secondary tailored node, * and later an explicit common-secondary node. - * (&[before 2] resets to the BEFORE_WEIGHT16 node so that + * The below-common weight can be a root weight, + * or it can be BEFORE_WEIGHT16 for tailoring before an implied common weight + * or before the lowest root weight. + * (&[before 2] resets to an explicit secondary node so that * the following addRelation(secondary) tailors right after that. * If we did not have this node and instead were to reset on the primary node, * then addRelation(secondary) would skip forward to the the COMMON_WEIGHT16 node.) * - * All secondary tailored nodes between these two explicit ones - * will be assigned lower-than-common secondary weights. * If the flag is not set, then there are no explicit secondary nodes * with the common or lower weights. * diff --git a/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationRootElements.java b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationRootElements.java index bc033774ff..cf2ce93f99 100644 --- a/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationRootElements.java +++ b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationRootElements.java @@ -254,7 +254,7 @@ public final class CollationRootElements { } else { index = findPrimary(p) + 1; previousSec = Collation.BEFORE_WEIGHT16; - sec = Collation.COMMON_WEIGHT16; + sec = (int)getFirstSecTerForPrimary(index) >>> 16; } assert(s >= sec); while(s > sec) { @@ -285,7 +285,7 @@ public final class CollationRootElements { } else { index = findPrimary(p) + 1; previousTer = Collation.BEFORE_WEIGHT16; - secTer = Collation.COMMON_SEC_AND_TER_CE; + secTer = getFirstSecTerForPrimary(index); } long st = ((long)s << 16) | t; while(st > secTer) { @@ -339,37 +339,54 @@ public final class CollationRootElements { /** * Returns the secondary weight after [p, s] where index=findPrimary(p) * except use index=0 for p=0. + * + *
Must return a weight for every root [p, s] as well as for every weight + * returned by getSecondaryBefore(). If p!=0 then s can be BEFORE_WEIGHT16. + * + *
Exception: [0, 0] is handled by the CollationBuilder: + * Both its lower and upper boundaries are special. */ int getSecondaryAfter(int index, int s) { + long secTer; int secLimit; if(index == 0) { // primary = 0 + assert(s != 0); index = (int)elements[IX_FIRST_SECONDARY_INDEX]; + secTer = elements[index]; // Gap at the end of the secondary CE range. secLimit = 0x10000; } else { assert(index >= (int)elements[IX_FIRST_PRIMARY_INDEX]); - ++index; + secTer = getFirstSecTerForPrimary(index + 1); + // If this is an explicit sec/ter unit, then it will be read once more. // Gap for secondaries of primary CEs. secLimit = getSecondaryBoundary(); } for(;;) { - long secTer = elements[index]; - if((secTer & SEC_TER_DELTA_FLAG) == 0) { return secLimit; } int sec = (int)(secTer >> 16); if(sec > s) { return sec; } - ++index; + secTer = elements[++index]; + if((secTer & SEC_TER_DELTA_FLAG) == 0) { return secLimit; } } } /** * Returns the tertiary weight after [p, s, t] where index=findPrimary(p) * except use index=0 for p=0. + * + *
Must return a weight for every root [p, s, t] as well as for every weight + * returned by getTertiaryBefore(). If s!=0 then t can be BEFORE_WEIGHT16. + * + *
Exception: [0, 0, 0] is handled by the CollationBuilder:
+ * Both its lower and upper boundaries are special.
*/
int getTertiaryAfter(int index, int s, int t) {
+ long secTer;
int terLimit;
if(index == 0) {
// primary = 0
if(s == 0) {
+ assert(t != 0);
index = (int)elements[IX_FIRST_TERTIARY_INDEX];
// Gap at the end of the tertiary CE range.
terLimit = 0x4000;
@@ -378,22 +395,44 @@ public final class CollationRootElements {
// Gap for tertiaries of primary/secondary CEs.
terLimit = getTertiaryBoundary();
}
+ secTer = elements[index] & ~SEC_TER_DELTA_FLAG;
} else {
assert(index >= (int)elements[IX_FIRST_PRIMARY_INDEX]);
- ++index;
+ secTer = getFirstSecTerForPrimary(index + 1);
+ // If this is an explicit sec/ter unit, then it will be read once more.
terLimit = getTertiaryBoundary();
}
long st = (((long)s & 0xffffffffL) << 16) | t;
for(;;) {
- long secTer = elements[index];
+ if(secTer > st) {
+ assert((secTer >> 16) == s);
+ return (int)secTer & 0xffff;
+ }
+ secTer = elements[++index];
// No tertiary greater than t for this primary+secondary.
if((secTer & SEC_TER_DELTA_FLAG) == 0 || (secTer >> 16) > s) { return terLimit; }
secTer &= ~SEC_TER_DELTA_FLAG;
- if(secTer > st) { return (int)secTer & 0xffff; }
- ++index;
}
}
+ /**
+ * Returns the first secondary & tertiary weights for p where index=findPrimary(p)+1.
+ */
+ private long getFirstSecTerForPrimary(int index) {
+ long secTer = elements[index];
+ if((secTer & SEC_TER_DELTA_FLAG) == 0) {
+ // No sec/ter delta.
+ return Collation.COMMON_SEC_AND_TER_CE;
+ }
+ secTer &= ~SEC_TER_DELTA_FLAG;
+ if(secTer > Collation.COMMON_SEC_AND_TER_CE) {
+ // Implied sec/ter.
+ return Collation.COMMON_SEC_AND_TER_CE;
+ }
+ // Explicit sec/ter below common/common.
+ return secTer;
+ }
+
/**
* Finds the largest index i where elements[i]<=p.
* Requires first primary<=p<0xffffff00 (PRIMARY_SENTINEL).
diff --git a/icu4j/main/shared/data/icudata.jar b/icu4j/main/shared/data/icudata.jar
index 27687f3021..6936df9268 100755
--- a/icu4j/main/shared/data/icudata.jar
+++ b/icu4j/main/shared/data/icudata.jar
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:75ed6e7d7359c336e87fbc632981e5e26fbbab5550b862fb7e00b2a5acf33078
-size 11802139
+oid sha256:d7bf72e445a207052fe2e2de0d70a989b69bc55da3df272f8e3096d6d9cb2ad0
+size 11801973
diff --git a/icu4j/main/tests/collate/src/com/ibm/icu/dev/data/collationtest.txt b/icu4j/main/tests/collate/src/com/ibm/icu/dev/data/collationtest.txt
index e4a023b576..bd779c2cac 100644
--- a/icu4j/main/tests/collate/src/com/ibm/icu/dev/data/collationtest.txt
+++ b/icu4j/main/tests/collate/src/com/ibm/icu/dev/data/collationtest.txt
@@ -2476,3 +2476,53 @@
* compare
<1 AA
<2 aą
+
+** test: tailor tertiary-after a common tertiary where there is a lower one
+# Assume that Hiragana small A has a below-common tertiary, and Hiragana A has a common one.
+# See ICU ticket 11448 & CLDR ticket 7222.
+@ rules
+&あ<<