ICU-4386 update based on review comments, extend tests

X-SVN-Rev: 17197
This commit is contained in:
Doug Felt 2005-02-14 19:15:44 +00:00
parent f4b63ff7e0
commit 3120d08ecc
5 changed files with 139 additions and 71 deletions

View File

@ -1,7 +1,7 @@
/*
******************************************************************************
*
* Copyright (C) 1999-2004, International Business Machines
* Copyright (C) 1999-2005, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
@ -359,14 +359,14 @@ getDirProps(UBiDi *pBiDi) {
if((dirProp==B)&&(i<length)) { /* B not last char in text */
if(!((uchar==CR) && (text[i]==LF))) {
pBiDi->paraCount++;
}
if(isDefaultLevel) {
state=LOOKING_FOR_STRONG;
paraStart=i; /* i is index to next character */
paraDir=paraDirDefault;
/* keep the paraLevel of the first paragraph even if it
defaulted (no strong char was found) */
paraLevelStillDefault=FALSE;
if(isDefaultLevel) {
state=LOOKING_FOR_STRONG;
paraStart=i; /* i is index to next character */
paraDir=paraDirDefault;
/* keep the paraLevel of the first paragraph even if it
defaulted (no strong char was found) */
paraLevelStillDefault=FALSE;
}
}
}
}
@ -374,7 +374,7 @@ getDirProps(UBiDi *pBiDi) {
needed for absolute paraLevel. */
flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
if(pBiDi->isOrderParagraphsLTR && (flags&DIRPROP_FLAG(B))) {
if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B))) {
flags|=DIRPROP_FLAG(L);
}
@ -490,7 +490,6 @@ resolveExplicitLevels(UBiDi *pBiDi) {
/* recalculate the flags */
flags=0;
/* since we assume that this is a single paragraph, we ignore (X8) */
for(i=0; i<length; ++i) {
dirProp=NO_CONTEXT_RTL(dirProps[i]);
switch(dirProp) {
@ -594,7 +593,7 @@ resolveExplicitLevels(UBiDi *pBiDi) {
if(flags&MASK_EMBEDDING) {
flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
}
if(pBiDi->isOrderParagraphsLTR && (flags&DIRPROP_FLAG(B))) {
if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B))) {
flags|=DIRPROP_FLAG(L);
}
@ -1054,14 +1053,14 @@ adjustWSLevels(UBiDi *pBiDi) {
int32_t i;
if(pBiDi->flags&MASK_WS) {
UBool isOrderParagraphsLTR=pBiDi->isOrderParagraphsLTR;
UBool orderParagraphsLTR=pBiDi->orderParagraphsLTR;
Flags flag;
i=pBiDi->trailingWSStart;
while(i>0) {
/* reset a sequence of WS/BN before eop and B/S to the paragraph paraLevel */
while(i>0 && (flag=DIRPROP_FLAG_NC(dirProps[--i]))&MASK_WS) {
if(isOrderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) {
if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) {
levels[i]=0;
} else {
levels[i]=GET_PARALEVEL(pBiDi, i);
@ -1074,7 +1073,7 @@ adjustWSLevels(UBiDi *pBiDi) {
flag=DIRPROP_FLAG_NC(dirProps[--i]);
if(flag&MASK_BN_EXPLICIT) {
levels[i]=levels[i+1];
} else if(isOrderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) {
} else if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) {
levels[i]=0;
break;
} else if(flag&MASK_B_S) {
@ -1312,16 +1311,16 @@ ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length,
}
U_CAPI void U_EXPORT2
ubidi_orderParagraphsLTR(UBiDi *pBiDi, UBool isOrderParagraphsLTR) {
ubidi_orderParagraphsLTR(UBiDi *pBiDi, UBool orderParagraphsLTR) {
if(pBiDi!=NULL) {
pBiDi->isOrderParagraphsLTR=isOrderParagraphsLTR;
pBiDi->orderParagraphsLTR=orderParagraphsLTR;
}
}
U_CAPI UBool U_EXPORT2
ubidi_isOrderParagraphsLTR(UBiDi *pBiDi) {
if(pBiDi!=NULL) {
return pBiDi->isOrderParagraphsLTR;
return pBiDi->orderParagraphsLTR;
} else {
return FALSE;
}

View File

@ -1,7 +1,7 @@
/*
******************************************************************************
*
* Copyright (C) 1999-2004, International Business Machines
* Copyright (C) 1999-2005, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
@ -185,7 +185,7 @@ struct UBiDi {
UBool isInverse2;
/* must block separators receive level 0? */
UBool isOrderParagraphsLTR;
UBool orderParagraphsLTR;
/* the paragraph level */
UBiDiLevel paraLevel;

View File

@ -1,7 +1,7 @@
/*
******************************************************************************
*
* Copyright (C) 1999-2003, International Business Machines
* Copyright (C) 1999-2005, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
@ -103,7 +103,7 @@ setTrailingWSStart(UBiDi *pBiDi) {
are already set to paragraph level.
Setting trailingWSStart to pBidi->length will avoid changing the
level of B chars from 0 to paraLevel in ubidi_getLevels when
isOrderParagraphsLTR==TRUE.
orderParagraphsLTR==TRUE.
*/
if(NO_CONTEXT_RTL(dirProps[start-1])==B) {
pBiDi->trailingWSStart=start; /* currently == pBiDi->length */

View File

@ -1,7 +1,7 @@
/*
******************************************************************************
*
* Copyright (C) 1999-2004, International Business Machines
* Copyright (C) 1999-2005, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
@ -387,11 +387,11 @@ typedef enum UBiDiDirection UBiDiDirection;
/**
* Forward declaration of the <code>UBiDi</code> structure for the declaration of
* the API functions. Its fields are implementation-specific.<p>
* This structure holds information about a paragraph of text
* with BiDi-algorithm-related details, or about one line of
* This structure holds information about a paragraph (or multiple paragraphs)
* of text with BiDi-algorithm-related details, or about one line of
* such a paragraph.<p>
* Reordering can be done on a line, or on a paragraph which is
* then interpreted as one single line.
* Reordering can be done on a line, or on one or more paragraphs which are
* then interpreted each as one single line.
* @stable ICU 2.0
*/
struct UBiDi;
@ -402,8 +402,9 @@ typedef struct UBiDi UBiDi;
/**
* Allocate a <code>UBiDi</code> structure.
* Such an object is initially empty. It is assigned
* the BiDi properties of a paragraph by <code>ubidi_setPara()</code>
* or the BiDi properties of a line of a paragraph by
* the BiDi properties of a piece of text containing one or more paragraphs
* by <code>ubidi_setPara()</code>
* or the BiDi properties of a line within a paragraph by
* <code>ubidi_setLine()</code>.<p>
* This object can be reused for as long as it is not deallocated
* by calling <code>ubidi_close()</code>.<p>
@ -432,7 +433,7 @@ ubidi_open(void);
* and the internal structures that are associated with it will be allocated
* on demand, just like with <code>ubidi_open()</code>.
*
* @param maxLength is the maximum paragraph or line length that internal memory
* @param maxLength is the maximum text or line length that internal memory
* will be preallocated for. An attempt to associate this object with a
* longer text will fail, unless this value is 0, which leaves the allocation
* up to the implementation.
@ -542,14 +543,14 @@ ubidi_isInverse(UBiDi *pBiDi);
*
* @param pBiDi is a <code>UBiDi</code> object.
*
* @param isOrderParagraphLTR specifies whether paragraph separators (B) must
* @param orderParagraphsLTR specifies whether paragraph separators (B) must
* receive level 0, so that successive paragraphs progress from left to right.
*
* @see ubidi_setPara
* @stable ICU 3.4
*/
U_STABLE void U_EXPORT2
ubidi_orderParagraphsLTR(UBiDi *pBiDi, UBool isOrderParagraphLTR);
ubidi_orderParagraphsLTR(UBiDi *pBiDi, UBool orderParagraphsLTR);
/**
* Is this BiDi object set to allocate level 0 to block separators so that
@ -559,7 +560,7 @@ ubidi_orderParagraphsLTR(UBiDi *pBiDi, UBool isOrderParagraphLTR);
* @return TRUE if the BiDi object is set to allocate level 0 to block
* separators.
*
* @see ubidi_setMultiPara
* @see ubidi_orderParagraphsLTR
* @stable ICU 3.4
*/
U_STABLE UBool U_EXPORT2
@ -571,11 +572,11 @@ ubidi_isOrderParagraphsLTR(UBiDi *pBiDi);
* version 13,
* also described in The Unicode Standard, Version 4.0 .<p>
*
* This function takes a single plain text paragraph with or without
* externally specified embedding levels from <i>styled</i> text
* and computes the left-right-directionality of each character.<p>
* This function takes a piece of plain text containing one or more paragraphs,
* with or without externally specified embedding levels from <i>styled</i>
* text and computes the left-right-directionality of each character.<p>
*
* If the entire paragraph consists of text of only one direction, then
* If the entire text is all of the same directionality, then
* the function may not perform all the steps described by the algorithm,
* i.e., some levels may not be the same as if all steps were performed.
* This is not relevant for unidirectional text.<br>
@ -598,9 +599,6 @@ ubidi_isOrderParagraphsLTR(UBiDi *pBiDi);
*
* @param text is a pointer to the text that the
* BiDi algorithm will be performed on
* (step (P1) of the algorithm must be performed externally if paraLevel
* is specified as <code>UBIDI_DEFAULT_LTR</code> or
* <code>UBIDI_DEFAULT_RTL</code>).
* <strong>The text must be (at least) <code>length</code> long.</strong>
* This pointer is stored in the UBiDi object and can be retrieved
* with <code>ubidi_getText()</code>.
@ -608,14 +606,15 @@ ubidi_isOrderParagraphsLTR(UBiDi *pBiDi);
* @param length is the length of the text; if <code>length==-1</code> then
* the text must be zero-terminated.
*
* @param paraLevel specifies the default level for the paragraph;
* @param paraLevel specifies the default level for the text;
* it is typically 0 (LTR) or 1 (RTL).
* If the function shall determine the paragraph level from the text,
* then <code>paraLevel</code> can be set to
* either <code>UBIDI_DEFAULT_LTR</code>
* or <code>UBIDI_DEFAULT_RTL</code>;
* if there is no strongly typed character, then
* the desired default is used (0 for LTR or 1 for RTL).
* or <code>UBIDI_DEFAULT_RTL</code>; if the text contains multiple
* paragraphs, the paragraph level shall be determined separately for
* each paragraph; if a paragraph does not include any strongly typed
* character, then the desired default is used (0 for LTR or 1 for RTL).
* Any other value between 0 and <code>UBIDI_MAX_EXPLICIT_LEVEL</code> is also valid,
* with odd levels indicating RTL.
*
@ -655,13 +654,13 @@ ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length,
* contain the reordering information, especially the resolved levels,
* for all the characters in a line of text. This line of text is
* specified by referring to a <code>UBiDi</code> object representing
* this information for a paragraph of text, and by specifying
* a range of indexes in this paragraph.<p>
* this information for a piece of text containing one or more paragraphs,
* and by specifying a range of indexes in this text.<p>
* In the new line object, the indexes will range from 0 to <code>limit-start-1</code>.<p>
*
* This is used after calling <code>ubidi_setPara()</code>
* for a paragraph, and after line-breaking on that paragraph.
* It is not necessary if the paragraph is treated as a single line.<p>
* for a piece of text, and after line-breaking on that text.
* It is not necessary if each paragraph is treated as a single line.<p>
*
* After line-breaking, rules (L1) and (L2) for the treatment of
* trailing WS and for reordering are performed on
@ -680,13 +679,15 @@ ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length,
* @param pParaBiDi is the parent paragraph object. It must have been set
* by a successful call to ubidi_setPara.
*
* @param start is the line's first index into the paragraph text.
* @param start is the line's first index into the text.
*
* @param limit is just behind the line's last index into the paragraph text
* @param limit is just behind the line's last index into the text
* (its last index +1).<br>
* It must be <code>0<=start<=limit<=</code>paragraph length.
* It must be <code>0<=start<=limit<=</code>containing paragraph limit.
* If the specified line crosses a paragraph boundary, the function
* will terminate with error code U_ILLEGAL_ARGUMENT_ERROR.
*
* @param pLineBiDi is the object that will now represent a line of the paragraph.
* @param pLineBiDi is the object that will now represent a line of the text.
*
* @param pErrorCode must be a valid pointer to an error code value.
*
@ -769,7 +770,7 @@ U_STABLE int32_t U_EXPORT2
ubidi_countParagraphs(UBiDi *pBiDi);
/**
* Get a paragraph, given a position within the paragraph.
* Get a paragraph, given a position within the text.
* This function returns information about a paragraph.<p>
*
* @param pBiDi is the paragraph or line <code>UBiDi</code> object.
@ -777,8 +778,8 @@ ubidi_countParagraphs(UBiDi *pBiDi);
* @param charIndex is the index of a character within the text, in the
* range <code>[0..ubidi_getLength(pBiDi)-1]</code>.
*
* @param pParaStart will receive the index of the first character in
* the paragraph.
* @param pParaStart will receive the index of the first character of the
* paragraph in the text.
* This pointer can be <code>NULL</code> if this
* value is not necessary.
*
@ -813,8 +814,8 @@ ubidi_getParagraph(const UBiDi *pBiDi, int32_t charIndex, int32_t *pParaStart,
* @param paraIndex is the number of the paragraph, in the
* range <code>[0..ubidi_countParagraphs(pBiDi)-1]</code>.
*
* @param pParaStart will receive the index of the first character in
* the paragraph.
* @param pParaStart will receive the index of the first character of the
* paragraph in the text.
* This pointer can be <code>NULL</code> if this
* value is not necessary.
*
@ -1180,9 +1181,9 @@ ubidi_invertMap(const int32_t *srcMap, int32_t *destMap, int32_t length);
/**
* Take a <code>UBiDi</code> object containing the reordering
* information for one paragraph or line of text as set by
* <code>ubidi_setPara()</code> or <code>ubidi_setLine()</code> and
* write a reordered string to the destination buffer.
* information for a piece of text (one or more paragraphs) set by
* <code>ubidi_setPara()</code> or for a line of text set by <code>ubidi_setLine()</code>
* and write a reordered string to the destination buffer.
*
* This function preserves the integrity of characters with multiple
* code units and (optionally) modifier letters.

View File

@ -1,6 +1,6 @@
/********************************************************************
* COPYRIGHT:
* Copyright (c) 1997-2004, International Business Machines Corporation and
* Copyright (c) 1997-2005, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
/* file name: cbiditst.cpp
@ -739,15 +739,18 @@ static void TestMultipleParagraphs(void) {
0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0,
22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23};
static const char* const text2 = "\\u05d0 1-2\\u001c\\u0630 1-2\\u001c1-2";
static const UBiDiLevel levels2[] = {1,1,2,2,2,0, 1,1,2,1,2,0, 2,2,2};
UBiDiLevel gotLevel;
const UBiDiLevel* gotLevels;
UBool isOrderParagraphsLTR;
UBool orderParagraphsLTR;
UChar src[MAXLEN];
UErrorCode errorCode=U_ZERO_ERROR;
UBiDi* pBidi=ubidi_open();
UBiDi* pLine;
int32_t srcSize, count, paraStart, paraLimit, paraIndex, length;
int i, k;
int i, j, k;
u_unescape(text, src, MAXLEN);
srcSize=u_strlen(src);
ubidi_setPara(pBidi, src, srcSize, UBIDI_LTR, NULL, &errorCode);
@ -840,7 +843,7 @@ static void TestMultipleParagraphs(void) {
i, k, u_errorName(errorCode));
errorCode=U_ZERO_ERROR;
}
/* check level of block separator at end of paragraph when isOrderParagraphsLTR==FALSE */
/* check level of block separator at end of paragraph when orderParagraphsLTR==FALSE */
ubidi_setPara(pBidi, src, srcSize, UBIDI_RTL, NULL, &errorCode);
/* get levels through para Bidi block */
gotLevels=ubidi_getLevels(pBidi, &errorCode);
@ -881,16 +884,16 @@ static void TestMultipleParagraphs(void) {
"level of separator=%d expected=%d\n",
paraIndex, paraStart, paraLimit, gotLevel, UBIDI_RTL, gotLevels[length-1], UBIDI_RTL);
}
isOrderParagraphsLTR=ubidi_isOrderParagraphsLTR(pBidi);
if (isOrderParagraphsLTR) {
log_err("Found isOrderParagraphsLTR=%d expected=%d\n", isOrderParagraphsLTR, FALSE);
orderParagraphsLTR=ubidi_isOrderParagraphsLTR(pBidi);
if (orderParagraphsLTR) {
log_err("Found orderParagraphsLTR=%d expected=%d\n", orderParagraphsLTR, FALSE);
}
ubidi_orderParagraphsLTR(pBidi, TRUE);
isOrderParagraphsLTR=ubidi_isOrderParagraphsLTR(pBidi);
if (!isOrderParagraphsLTR) {
log_err("Found isOrderParagraphsLTR=%d expected=%d\n", isOrderParagraphsLTR, TRUE);
orderParagraphsLTR=ubidi_isOrderParagraphsLTR(pBidi);
if (!orderParagraphsLTR) {
log_err("Found orderParagraphsLTR=%d expected=%d\n", orderParagraphsLTR, TRUE);
}
/* check level of block separator at end of paragraph when isOrderParagraphsLTR==TRUE */
/* check level of block separator at end of paragraph when orderParagraphsLTR==TRUE */
ubidi_setPara(pBidi, src, srcSize, UBIDI_RTL, NULL, &errorCode);
/* get levels through para Bidi block */
gotLevels=ubidi_getLevels(pBidi, &errorCode);
@ -918,6 +921,71 @@ static void TestMultipleParagraphs(void) {
}
log_verbose("\n");
}
/* test that the concatenation of separate invocations of the bidi code
* on each individual paragraph in order matches the levels array that
* results from invoking bidi once over the entire multiparagraph tests
* (with orderParagraphsLTR false, of course)
*/
u_unescape(text, src, MAXLEN); /* restore original content */
srcSize=u_strlen(src);
ubidi_orderParagraphsLTR(pBidi, FALSE);
ubidi_setPara(pBidi, src, srcSize, UBIDI_DEFAULT_RTL, NULL, &errorCode);
gotLevels=ubidi_getLevels(pBidi, &errorCode);
for (i=0; i<paraCount; i++) {
/* use pLine for individual paragraphs */
paraStart = paraBounds[i];
length = paraBounds[i+1] - paraStart;
ubidi_setPara(pLine, src+paraStart, length, UBIDI_DEFAULT_RTL, NULL, &errorCode);
for (j=0; j<length; j++) {
if ((k=ubidi_getLevelAt(pLine, j)) != (gotLevel=gotLevels[paraStart+j])) {
log_err("Checking paragraph concatenation: for paragraph=%d, "
"char=%d(%04x), level=%d, expected=%d\n",
i, j, src[paraStart+j], k, gotLevel);
}
}
}
/* ensure that leading numerics in a paragraph are not treated as arabic
numerals because of arabic text in a preceding paragraph
*/
u_unescape(text2, src, MAXLEN);
srcSize=u_strlen(src);
ubidi_orderParagraphsLTR(pBidi, TRUE);
ubidi_setPara(pBidi, src, srcSize, UBIDI_RTL, NULL, &errorCode);
gotLevels=ubidi_getLevels(pBidi, &errorCode);
for (i=0; i<srcSize; i++) {
if (gotLevels[i]!=levels2[i]) {
log_err("Checking leading numerics: for char %d(%04x), level=%d, expected=%d\n",
i, src[i], gotLevels[i], levels2[i]);
}
}
/* check handling of whitespace before end of paragraph separator when
* orderParagraphsLTR==TRUE, when last paragraph has, and lacks, a terminating B
*/
memset(src, ' ', MAXLEN);
srcSize = 5;
ubidi_orderParagraphsLTR(pBidi, TRUE);
for (i=0x001c; i<=0x0020; i+=(0x0020-0x001c)) {
src[4]=i; /* with and without terminating B */
for (j=0x0041; j<=0x05d0; j+=(0x05d0-0x0041)) {
src[0]=j; /* leading 'A' or Alef */
for (gotLevel=4; gotLevel<=5; gotLevel++) {
/* test even and odd paraLevel */
ubidi_setPara(pBidi, src, srcSize, gotLevel, NULL, &errorCode);
gotLevels=ubidi_getLevels(pBidi, &errorCode);
for (k=1; k<=3; k++) {
if (gotLevels[k]!=gotLevel) {
log_err("Checking trailing spaces: for leading_char=%04x, "
"last_char=%04x, index=%d, level=%d, expected=%d\n",
src[0], src[4], k, gotLevels[k], gotLevel);
}
}
}
}
}
ubidi_close(pLine);
ubidi_close(pBidi);
}