ICU-12922 bidi explicit level 0: lift it up to resolved paragraph level except leave it at 0 for paragraph separators
X-SVN-Rev: 39741
This commit is contained in:
parent
410e3f6db5
commit
782e4ff95e
@ -1348,18 +1348,20 @@ resolveExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) {
|
||||
static UBiDiDirection
|
||||
checkExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) {
|
||||
DirProp *dirProps=pBiDi->dirProps;
|
||||
DirProp dirProp;
|
||||
UBiDiLevel *levels=pBiDi->levels;
|
||||
int32_t isolateCount=0;
|
||||
|
||||
int32_t i, length=pBiDi->length;
|
||||
int32_t length=pBiDi->length;
|
||||
Flags flags=0; /* collect all directionalities in the text */
|
||||
UBiDiLevel level;
|
||||
pBiDi->isolateCount=0;
|
||||
|
||||
for(i=0; i<length; ++i) {
|
||||
level=levels[i];
|
||||
dirProp=dirProps[i];
|
||||
int32_t currentParaIndex = 0;
|
||||
int32_t currentParaLimit = pBiDi->paras[0].limit;
|
||||
int32_t currentParaLevel = pBiDi->paraLevel;
|
||||
|
||||
for(int32_t i=0; i<length; ++i) {
|
||||
UBiDiLevel level=levels[i];
|
||||
DirProp dirProp=dirProps[i];
|
||||
if(dirProp==LRI || dirProp==RLI) {
|
||||
isolateCount++;
|
||||
if(isolateCount>pBiDi->isolateCount)
|
||||
@ -1369,21 +1371,41 @@ checkExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) {
|
||||
isolateCount--;
|
||||
else if(dirProp==B)
|
||||
isolateCount=0;
|
||||
if(level&UBIDI_LEVEL_OVERRIDE) {
|
||||
|
||||
// optimized version of int32_t currentParaLevel = GET_PARALEVEL(pBiDi, i);
|
||||
if (pBiDi->defaultParaLevel != 0 &&
|
||||
i == currentParaLimit && (currentParaIndex + 1) < pBiDi->paraCount) {
|
||||
currentParaLevel = pBiDi->paras[++currentParaIndex].level;
|
||||
currentParaLimit = pBiDi->paras[currentParaIndex].limit;
|
||||
}
|
||||
|
||||
UBiDiLevel overrideFlag = level & UBIDI_LEVEL_OVERRIDE;
|
||||
level &= ~UBIDI_LEVEL_OVERRIDE;
|
||||
if (level < currentParaLevel || UBIDI_MAX_EXPLICIT_LEVEL < level) {
|
||||
if (level == 0) {
|
||||
if (dirProp == B) {
|
||||
// Paragraph separators are ok with explicit level 0.
|
||||
// Prevents reordering of paragraphs.
|
||||
} else {
|
||||
// Treat explicit level 0 as a wildcard for the paragraph level.
|
||||
// Avoid making the caller guess what the paragraph level would be.
|
||||
level = (UBiDiLevel)currentParaLevel;
|
||||
levels[i] = level | overrideFlag;
|
||||
}
|
||||
} else {
|
||||
// 1 <= level < currentParaLevel or UBIDI_MAX_EXPLICIT_LEVEL < level
|
||||
/* level out of bounds */
|
||||
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return UBIDI_LTR;
|
||||
}
|
||||
}
|
||||
if (overrideFlag != 0) {
|
||||
/* keep the override flag in levels[i] but adjust the flags */
|
||||
level&=~UBIDI_LEVEL_OVERRIDE; /* make the range check below simpler */
|
||||
flags|=DIRPROP_FLAG_O(level);
|
||||
} else {
|
||||
/* set the flags */
|
||||
flags|=DIRPROP_FLAG_E(level)|DIRPROP_FLAG(dirProp);
|
||||
}
|
||||
if((level<GET_PARALEVEL(pBiDi, i) &&
|
||||
!((0==level)&&(dirProp==B))) ||
|
||||
(UBIDI_MAX_EXPLICIT_LEVEL<level)) {
|
||||
/* level out of bounds */
|
||||
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return UBIDI_LTR;
|
||||
}
|
||||
}
|
||||
if(flags&MASK_EMBEDDING)
|
||||
flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
|
||||
|
@ -1196,11 +1196,12 @@ ubidi_setContext(UBiDi *pBiDi,
|
||||
* A level overrides the directional property of its corresponding
|
||||
* (same index) character if the level has the
|
||||
* <code>#UBIDI_LEVEL_OVERRIDE</code> bit set.<br><br>
|
||||
* Except for that bit, it must be
|
||||
* Aside from that bit, it must be
|
||||
* <code>paraLevel<=embeddingLevels[]<=UBIDI_MAX_EXPLICIT_LEVEL</code>,
|
||||
* with one exception: a level of zero may be specified for a paragraph
|
||||
* separator even if <code>paraLevel>0</code> when multiple paragraphs
|
||||
* are submitted in the same call to <code>ubidi_setPara()</code>.<br><br>
|
||||
* except that level 0 is always allowed.
|
||||
* Level 0 for a paragraph separator prevents reordering of paragraphs.
|
||||
* Level 0 for other characters is treated as a wildcard
|
||||
* and is lifted up to the resolved level of the surrounding paragraph.<br><br>
|
||||
* <strong>Caution: </strong>A copy of this pointer, not of the levels,
|
||||
* will be stored in the <code>UBiDi</code> object;
|
||||
* the <code>embeddingLevels</code> array must not be
|
||||
|
@ -90,6 +90,7 @@ static void testContext(void);
|
||||
static void doTailTest(void);
|
||||
|
||||
static void testBracketOverflow(void);
|
||||
static void TestExplicitLevel0();
|
||||
|
||||
/* new BIDI API */
|
||||
static void testReorderingMode(void);
|
||||
@ -138,6 +139,7 @@ addComplexTest(TestNode** root) {
|
||||
addTest(root, testGetBaseDirection, "complex/bidi/testGetBaseDirection");
|
||||
addTest(root, testContext, "complex/bidi/testContext");
|
||||
addTest(root, testBracketOverflow, "complex/bidi/TestBracketOverflow");
|
||||
addTest(root, &TestExplicitLevel0, "complex/bidi/TestExplicitLevel0");
|
||||
|
||||
addTest(root, doArabicShapingTest, "complex/arabic-shaping/ArabicShapingTest");
|
||||
addTest(root, doLamAlefSpecialVLTRArabicShapingTest, "complex/arabic-shaping/lamalef");
|
||||
@ -4922,3 +4924,24 @@ testBracketOverflow(void) {
|
||||
ubidi_close(bidi);
|
||||
}
|
||||
|
||||
static void TestExplicitLevel0() {
|
||||
// The following used to fail with an error, see ICU ticket #12922.
|
||||
static const UChar text[2] = { 0x202d, 0x05d0 };
|
||||
static UBiDiLevel embeddings[2] = { 0, 0 };
|
||||
UErrorCode errorCode = U_ZERO_ERROR;
|
||||
UBiDi *bidi = ubidi_open();
|
||||
ubidi_setPara(bidi, text, 2, UBIDI_DEFAULT_LTR , embeddings, &errorCode);
|
||||
if (U_FAILURE(errorCode)) {
|
||||
log_err("ubidi_setPara() - %s", u_errorName(errorCode));
|
||||
} else {
|
||||
UBiDiLevel level0 = ubidi_getLevelAt(bidi, 0);
|
||||
UBiDiLevel level1 = ubidi_getLevelAt(bidi, 1);
|
||||
if (level0 != 1 || level1 != 1) {
|
||||
log_err("resolved levels != 1: { %d, %d }\n", level0, level1);
|
||||
}
|
||||
if (embeddings[0] != 1 || embeddings[1] != 1) {
|
||||
log_err("modified embeddings[] levels != 1: { %d, %d }\n", embeddings[0], embeddings[1]);
|
||||
}
|
||||
}
|
||||
ubidi_close(bidi);
|
||||
}
|
||||
|
@ -2674,28 +2674,29 @@ public class Bidi {
|
||||
return dirct;
|
||||
}
|
||||
|
||||
/*
|
||||
/**
|
||||
* Use a pre-specified embedding levels array:
|
||||
*
|
||||
* Adjust the directional properties for overrides (->LEVEL_OVERRIDE),
|
||||
* <p>Adjust the directional properties for overrides (->LEVEL_OVERRIDE),
|
||||
* ignore all explicit codes (X9),
|
||||
* and check all the preset levels.
|
||||
*
|
||||
* Recalculate the flags to have them reflect the real properties
|
||||
* <p>Recalculate the flags to have them reflect the real properties
|
||||
* after taking the explicit embeddings into account.
|
||||
*/
|
||||
private byte checkExplicitLevels() {
|
||||
byte dirProp;
|
||||
int i;
|
||||
int isolateCount = 0;
|
||||
|
||||
this.flags = 0; /* collect all directionalities in the text */
|
||||
byte level;
|
||||
this.isolateCount = 0;
|
||||
|
||||
for (i = 0; i < length; ++i) {
|
||||
level = levels[i];
|
||||
dirProp = dirProps[i];
|
||||
int currentParaIndex = 0;
|
||||
int currentParaLimit = paras_limit[0];
|
||||
byte currentParaLevel = paraLevel;
|
||||
|
||||
for (int i = 0; i < length; ++i) {
|
||||
byte level = levels[i];
|
||||
byte dirProp = dirProps[i];
|
||||
if (dirProp == LRI || dirProp == RLI) {
|
||||
isolateCount++;
|
||||
if (isolateCount > this.isolateCount)
|
||||
@ -2705,21 +2706,40 @@ public class Bidi {
|
||||
isolateCount--;
|
||||
else if (dirProp == B)
|
||||
isolateCount = 0;
|
||||
if ((level & LEVEL_OVERRIDE) != 0) {
|
||||
|
||||
// optimized version of byte currentParaLevel = GetParaLevelAt(i);
|
||||
if (defaultParaLevel != 0 &&
|
||||
i == currentParaLimit && (currentParaIndex + 1) < paraCount) {
|
||||
currentParaLevel = paras_level[++currentParaIndex];
|
||||
currentParaLimit = paras_limit[currentParaIndex];
|
||||
}
|
||||
|
||||
int overrideFlag = level & LEVEL_OVERRIDE;
|
||||
level &= ~LEVEL_OVERRIDE;
|
||||
if (level < currentParaLevel || MAX_EXPLICIT_LEVEL < level) {
|
||||
if (level == 0) {
|
||||
if (dirProp == B) {
|
||||
// Paragraph separators are ok with explicit level 0.
|
||||
// Prevents reordering of paragraphs.
|
||||
} else {
|
||||
// Treat explicit level 0 as a wildcard for the paragraph level.
|
||||
// Avoid making the caller guess what the paragraph level would be.
|
||||
level = currentParaLevel;
|
||||
levels[i] = (byte)(level | overrideFlag);
|
||||
}
|
||||
} else {
|
||||
// 1 <= level < currentParaLevel or MAX_EXPLICIT_LEVEL < level
|
||||
throw new IllegalArgumentException("level " + level +
|
||||
" out of bounds at " + i);
|
||||
}
|
||||
}
|
||||
if (overrideFlag != 0) {
|
||||
/* keep the override flag in levels[i] but adjust the flags */
|
||||
level &= ~LEVEL_OVERRIDE; /* make the range check below simpler */
|
||||
flags |= DirPropFlagO(level);
|
||||
} else {
|
||||
/* set the flags */
|
||||
flags |= DirPropFlagE(level) | DirPropFlag(dirProp);
|
||||
}
|
||||
if ((level < GetParaLevelAt(i) &&
|
||||
!((0 == level) && (dirProp == B))) ||
|
||||
(MAX_EXPLICIT_LEVEL < level)) {
|
||||
/* level out of bounds */
|
||||
throw new IllegalArgumentException("level " + level +
|
||||
" out of bounds at " + i);
|
||||
}
|
||||
}
|
||||
if ((flags & MASK_EMBEDDING) != 0)
|
||||
flags |= DirPropFlagLR(paraLevel);
|
||||
@ -3950,11 +3970,12 @@ public class Bidi {
|
||||
* A level overrides the directional property of its corresponding
|
||||
* (same index) character if the level has the
|
||||
* <code>LEVEL_OVERRIDE</code> bit set.<br><br>
|
||||
* Except for that bit, it must be
|
||||
* Aside from that bit, it must be
|
||||
* <code>paraLevel<=embeddingLevels[]<=MAX_EXPLICIT_LEVEL</code>,
|
||||
* with one exception: a level of zero may be specified for a
|
||||
* paragraph separator even if <code>paraLevel>0</code> when multiple
|
||||
* paragraphs are submitted in the same call to <code>setPara()</code>.<br><br>
|
||||
* except that level 0 is always allowed.
|
||||
* Level 0 for a paragraph separator prevents reordering of paragraphs.
|
||||
* Level 0 for other characters is treated as a wildcard
|
||||
* and is lifted up to the resolved level of the surrounding paragraph.<br><br>
|
||||
* <strong>Caution: </strong>A reference to this array, not a copy
|
||||
* of the levels, will be stored in the <code>Bidi</code> object;
|
||||
* the <code>embeddingLevels</code>
|
||||
@ -5294,13 +5315,19 @@ public class Bidi {
|
||||
|
||||
/**
|
||||
* Create Bidi from the given text, embedding, and direction information.
|
||||
* The embeddings array may be null. If present, the values represent
|
||||
* embedding level information. Negative values from -1 to -61 indicate
|
||||
* overrides at the absolute value of the level. Positive values from 1 to
|
||||
* 61 indicate embeddings. Where values are zero, the base embedding level
|
||||
* as determined by the base direction is assumed.<p>
|
||||
*
|
||||
* Note: this constructor calls setPara() internally.
|
||||
* <p>The embeddings array may be null. If present, the values represent
|
||||
* embedding level information.
|
||||
* Negative values from -1 to -{@link #MAX_EXPLICIT_LEVEL}
|
||||
* indicate overrides at the absolute value of the level.
|
||||
* Positive values from 1 to {@link #MAX_EXPLICIT_LEVEL} indicate embeddings.
|
||||
* Where values are zero, the base embedding level
|
||||
* as determined by the base direction is assumed,
|
||||
* except for paragraph separators which remain at 0 to prevent reordering of paragraphs.<p>
|
||||
*
|
||||
* <p>Note: This constructor calls setPara() internally,
|
||||
* after converting the java.text.Bidi-style embeddings with negative overrides
|
||||
* into ICU-style embeddings with bit fields for {@link #LEVEL_OVERRIDE} and the level.
|
||||
*
|
||||
* @param text an array containing the paragraph of text to process.
|
||||
* @param textStart the index into the text array of the start of the
|
||||
@ -5354,22 +5381,23 @@ public class Bidi {
|
||||
if (embeddings == null) {
|
||||
paraEmbeddings = null;
|
||||
} else {
|
||||
// Convert from java.text.Bidi embeddings to ICU setPara() levels:
|
||||
// Copy to the start of a new array and convert java.text negative overrides
|
||||
// to ICU bit-field-and-mask overrides.
|
||||
// A copy of the embeddings is always required because
|
||||
// setPara() may modify its embeddings.
|
||||
paraEmbeddings = new byte[paragraphLength];
|
||||
byte lev;
|
||||
for (int i = 0; i < paragraphLength; i++) {
|
||||
lev = embeddings[i + embStart];
|
||||
if (lev < 0) {
|
||||
lev = (byte)((- lev) | LEVEL_OVERRIDE);
|
||||
} else if (lev == 0) {
|
||||
lev = paraLvl;
|
||||
if (paraLvl > MAX_EXPLICIT_LEVEL) {
|
||||
lev &= 1;
|
||||
}
|
||||
}
|
||||
// setPara() lifts level 0 up to the resolved paragraph level.
|
||||
paraEmbeddings[i] = lev;
|
||||
}
|
||||
}
|
||||
if (textStart == 0 && embStart == 0 && paragraphLength == text.length) {
|
||||
if (textStart == 0 && paragraphLength == text.length) {
|
||||
setPara(text, paraLvl, paraEmbeddings);
|
||||
} else {
|
||||
char[] paraText = new char[paragraphLength];
|
||||
|
@ -526,7 +526,7 @@ public class TestBidi extends BidiFmwk {
|
||||
bidi.setReorderingMode(Bidi.REORDER_RUNS_ONLY);
|
||||
bidi.setPara("a \u05d0 b \u05d1 c \u05d2 d ", Bidi.LTR, null);
|
||||
assertEquals("\nWrong number of runs #4", 14, bidi.countRuns());
|
||||
|
||||
|
||||
/* test testGetBaseDirection to verify fast string direction detection function */
|
||||
/* mixed start with L */
|
||||
String mixedEnglishFirst = "\u0061\u0627\u0032\u06f3\u0061\u0034";
|
||||
@ -566,7 +566,7 @@ public class TestBidi extends BidiFmwk {
|
||||
assertEquals("\nWrong direction through fast detection #12", Bidi.NEUTRAL, Bidi.getBaseDirection(allArabicDigits));
|
||||
/* null string */
|
||||
String nullString = null;
|
||||
assertEquals("\nWrong direction through fast detection #13", Bidi.NEUTRAL, Bidi.getBaseDirection(nullString));
|
||||
assertEquals("\nWrong direction through fast detection #13", Bidi.NEUTRAL, Bidi.getBaseDirection(nullString));
|
||||
/* first L (English) others are R (Hebrew etc.) */
|
||||
String startEnglishOthersHebrew = "\u0071\u0590\u05D5\u05EA\u05F1";
|
||||
assertEquals("\nWrong direction through fast detection #14", Bidi.LTR, Bidi.getBaseDirection(startEnglishOthersHebrew));
|
||||
@ -574,4 +574,20 @@ public class TestBidi extends BidiFmwk {
|
||||
String lastHebrewOthersEnglishDigit = "\u0031\u0032\u0033\u05F1";
|
||||
assertEquals("\nWrong direction through fast detection #15", Bidi.RTL, Bidi.getBaseDirection(lastHebrewOthersEnglishDigit));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testExplicitLevel0() {
|
||||
// The following used to fail with an error, see ICU ticket #12922.
|
||||
String text = "\u202d\u05d0";
|
||||
byte[] embeddings = new byte[2]; // all 0
|
||||
int flags = Bidi.DIRECTION_DEFAULT_LEFT_TO_RIGHT; // 0x7e
|
||||
Bidi bidi = new Bidi(text.toCharArray(), 0, embeddings, 0, text.length(), flags);
|
||||
assertEquals("resolved level at 0", 1, bidi.getLevelAt(0));
|
||||
assertEquals("resolved level at 1", 1, bidi.getLevelAt(1));
|
||||
|
||||
flags = java.text.Bidi.DIRECTION_DEFAULT_LEFT_TO_RIGHT; // -2
|
||||
java.text.Bidi jb = new java.text.Bidi(text.toCharArray(), 0, embeddings, 0, text.length(), flags);
|
||||
assertEquals("java.text resolved level at 0", 1, jb.getLevelAt(0));
|
||||
assertEquals("java.text resolved level at 1", 1, jb.getLevelAt(1));
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user