From 8f37e17383103c3be4e9258a6fbddef936ba8003 Mon Sep 17 00:00:00 2001 From: Matitiahu Allouche Date: Sun, 23 Jan 2005 13:06:41 +0000 Subject: [PATCH] ICU-4386 add multiple paragraph support to ubidi APIs X-SVN-Rev: 17155 --- icu4c/source/common/ubidi.c | 365 ++++++++++++++++++++------ icu4c/source/common/ubidiimp.h | 64 ++++- icu4c/source/common/ubidiln.c | 91 ++++--- icu4c/source/common/unicode/ubidi.h | 188 ++++++++++--- icu4c/source/test/cintltst/cbiditst.c | 220 +++++++++++++++- 5 files changed, 757 insertions(+), 171 deletions(-) diff --git a/icu4c/source/common/ubidi.c b/icu4c/source/common/ubidi.c index 8e1cf8335d..4c888dcca2 100644 --- a/icu4c/source/common/ubidi.c +++ b/icu4c/source/common/ubidi.c @@ -1,4 +1,4 @@ -/* +/* ****************************************************************************** * * Copyright (C) 1999-2004, International Business Machines @@ -53,7 +53,7 @@ * that look at immediately surrounding types. * * As a related topic, this implementation does not remove Boundary Neutral - * types from the input, but ignores them whereever this is relevant. + * types from the input, but ignores them wherever this is relevant. * For example, the loop for the resolution of the weak types reads * types until it finds a non-BN. * Also, explicit embedding codes are neither changed into BN nor removed. @@ -120,7 +120,7 @@ static const Flags flagO[2]={ DIRPROP_FLAG(LRO), DIRPROP_FLAG(RLO) }; /* UBiDi object management -------------------------------------------------- */ U_CAPI UBiDi * U_EXPORT2 -ubidi_open(void) +ubidi_open(void) { UErrorCode errorCode=U_ZERO_ERROR; return ubidi_openSized(0, 0, &errorCode); @@ -236,6 +236,7 @@ ubidi_getMemory(void **pMemory, int32_t *pSize, UBool mayAllocate, int32_t sizeN U_CAPI void U_EXPORT2 ubidi_close(UBiDi *pBiDi) { if(pBiDi!=NULL) { + pBiDi->pParaBiDi=NULL; /* in case one tries to reuse this block */ if(pBiDi->dirPropsMemory!=NULL) { uprv_free(pBiDi->dirPropsMemory); } @@ -272,66 +273,109 @@ ubidi_isInverse(UBiDi *pBiDi) { /* * Get the directional properties for the text, * calculate the flags bit-set, and - * determine the partagraph level if necessary. + * determine the paragraph level if necessary. */ static void -getDirProps(UBiDi *pBiDi, const UChar *text) { +getDirProps(UBiDi *pBiDi) { + const UChar *text=pBiDi->text; DirProp *dirProps=pBiDi->dirPropsMemory; /* pBiDi->dirProps is const */ int32_t i=0, i0, i1, length=pBiDi->length; Flags flags=0; /* collect all directionalities in the text */ UChar32 uchar; - DirProp dirProp; + DirProp dirProp, paraDirDefault; + UBool isDefaultLevel=IS_DEFAULT_LEVEL(pBiDi->paraLevel); + UBool paraLevelStillDefault; /* flag for real value not set */ - if(IS_DEFAULT_LEVEL(pBiDi->paraLevel)) { - /* determine the paragraph level (P2..P3) */ - for(;;) { - i0=i; /* index of first code unit */ - UTF_NEXT_CHAR(text, i, length, uchar); - i1=i-1; /* index of last code unit, gets the directional property */ - flags|=DIRPROP_FLAG(dirProps[i1]=dirProp=ubidi_getClass(pBiDi->bdp, uchar)); - if(i1>i0) { /* set previous code units' properties to BN */ - flags|=DIRPROP_FLAG(BN); - do { - dirProps[--i1]=BN; - } while(i1>i0); - } + typedef enum { + NOT_CONTEXTUAL, /* 0: not contextual paraLevel */ + LOOKING_FOR_STRONG, /* 1: looking for first strong char */ + FOUND_STRONG_CHAR /* 2: found first strong char */ + } State; + State state; + int32_t paraStart; /* index of first char in paragraph */ + DirProp paraDir; /* == CONTEXT_RTL within paragraphs + starting with strong R char */ - if(dirProp==L) { - pBiDi->paraLevel=0; - break; - } else if(dirProp==R || dirProp==AL) { - pBiDi->paraLevel=1; - break; - } else if(i>=length) { - /* - * see comment in ubidi.h: - * the DEFAULT_XXX values are designed so that - * their bit 0 alone yields the intended default - */ - pBiDi->paraLevel&=1; - break; - } - } + if(isDefaultLevel) { + paraDirDefault=pBiDi->paraLevel&1 ? CONTEXT_RTL : 0; + state=LOOKING_FOR_STRONG; + paraStart=0; + paraDir=paraDirDefault; + pBiDi->paraLevel&=1; /* set to default */ + paraLevelStillDefault=TRUE; } else { - flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel); + state=NOT_CONTEXTUAL; + paraDir=0; } - - /* get the rest of the directional properties and the flags bits */ - while(ibdp, uchar)); + flags|=DIRPROP_FLAG(dirProp=ubidi_getClass(pBiDi->bdp, uchar)); + dirProps[i1]=dirProp|paraDir; if(i1>i0) { /* set previous code units' properties to BN */ flags|=DIRPROP_FLAG(BN); do { - dirProps[--i1]=BN; + dirProps[--i1]=BN|paraDir; } while(i1>i0); } + if(state==LOOKING_FOR_STRONG) { + if(dirProp==L) { + state=FOUND_STRONG_CHAR; + if(paraLevelStillDefault) { + paraLevelStillDefault=FALSE; + pBiDi->paraLevel=0; + } + if(paraDir) { + paraDir=0; + for(i1=paraStart; i1<=i; i1++) { + dirProps[i1]&=~CONTEXT_RTL; + } + } + continue; + } + if(dirProp==R || dirProp==AL) { + state=FOUND_STRONG_CHAR; + if(paraLevelStillDefault) { + paraLevelStillDefault=FALSE; + pBiDi->paraLevel=1; + } + if(paraDir==0) { + paraDir=CONTEXT_RTL; + for(i1=paraStart; i1<=i; i1++) { + dirProps[i1]|=CONTEXT_RTL; + } + } + continue; + } + } + if((dirProp==B)&&(iparaCount++; + } + if(isDefaultLevel) { + state=LOOKING_FOR_STRONG; + paraStart=i; /* i is index to next character */ + paraDir=paraDirDefault; + /* keep the paraLevel of the first paragraph even if it + defaulted (no strong char was found) */ + paraLevelStillDefault=FALSE; + } + } } - if(flags&MASK_EMBEDDING) { - flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel); + /* The following line does nothing new for contextual paraLevel, but is + needed for absolute paraLevel. */ + flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel); + + if(pBiDi->isOrderParagraphsLTR && (flags&DIRPROP_FLAG(B))) { + flags|=DIRPROP_FLAG(L); } pBiDi->flags=flags; @@ -408,13 +452,15 @@ static UBiDiDirection resolveExplicitLevels(UBiDi *pBiDi) { const DirProp *dirProps=pBiDi->dirProps; UBiDiLevel *levels=pBiDi->levels; - + const UChar *text=pBiDi->text; + int32_t i=0, length=pBiDi->length; Flags flags=pBiDi->flags; /* collect all directionalities in the text */ DirProp dirProp; - UBiDiLevel level=pBiDi->paraLevel; + UBiDiLevel level=GET_PARALEVEL(pBiDi, 0); UBiDiDirection direction; + int32_t paraIndex=0; /* determine if the text is mixed-directional or single-directional */ direction=directionFromFlags(flags); @@ -422,9 +468,11 @@ resolveExplicitLevels(UBiDi *pBiDi) { /* we may not need to resolve any explicit levels */ if(direction!=UBIDI_MIXED) { /* not mixed directionality: levels don't matter - trailingWSStart will be 0 */ - } else if(!(flags&MASK_EXPLICIT) || pBiDi->isInverse) { + } else if((pBiDi->paraCount==1) && + (!(flags&MASK_EXPLICIT) || pBiDi->isInverse)) { /* mixed, but all characters are at the same embedding level */ /* or we are in "inverse BiDi" */ + /* and we don't have contextual multiple paragraphs with some B char */ /* set all levels to the paragraph level */ for(i=0; iparaLevel; + level=GET_PARALEVEL(pBiDi, i); + if((i+1)paras[paraIndex++]=i+1; + } + } flags|=DIRPROP_FLAG(B); break; case BN: @@ -541,6 +594,9 @@ resolveExplicitLevels(UBiDi *pBiDi) { if(flags&MASK_EMBEDDING) { flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel); } + if(pBiDi->isOrderParagraphsLTR && (flags&DIRPROP_FLAG(B))) { + flags|=DIRPROP_FLAG(L); + } /* subsequently, ignore the explicit codes and BN (X9) */ @@ -564,27 +620,38 @@ resolveExplicitLevels(UBiDi *pBiDi) { static UBiDiDirection checkExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) { const DirProp *dirProps=pBiDi->dirProps; + DirProp dirProp; UBiDiLevel *levels=pBiDi->levels; - + const UChar *text=pBiDi->text; + int32_t i, length=pBiDi->length; Flags flags=0; /* collect all directionalities in the text */ - UBiDiLevel level, paraLevel=pBiDi->paraLevel; + UBiDiLevel level; + uint32_t paraIndex=0; for(i=0; iparas[paraIndex++]=i+1; + } + } } if(flags&MASK_EMBEDDING) { flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel); @@ -642,7 +709,7 @@ resolveImplicitLevels(UBiDi *pBiDi, /* initialize: current at sor, next at start (it is startisInverse) { @@ -669,7 +736,7 @@ resolveImplicitLevels(UBiDi *pBiDi, */ while(DIRPROP_FLAG(nextDirProp)&MASK_BN_EXPLICIT) { if(++nextdefaultParaLevel&&((i+1)=0) { /* @@ -980,24 +1054,31 @@ adjustWSLevels(UBiDi *pBiDi) { int32_t i; if(pBiDi->flags&MASK_WS) { - UBiDiLevel paraLevel=pBiDi->paraLevel; + UBool isOrderParagraphsLTR=pBiDi->isOrderParagraphsLTR; Flags flag; i=pBiDi->trailingWSStart; while(i>0) { /* reset a sequence of WS/BN before eop and B/S to the paragraph paraLevel */ - while(i>0 && DIRPROP_FLAG(dirProps[--i])&MASK_WS) { - levels[i]=paraLevel; + while(i>0 && (flag=DIRPROP_FLAG_NC(dirProps[--i]))&MASK_WS) { + if(isOrderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) { + levels[i]=0; + } else { + levels[i]=GET_PARALEVEL(pBiDi, i); + } } /* reset BN to the next character's paraLevel until B/S, which restarts above loop */ /* here, i+1 is guaranteed to be 0) { - flag=DIRPROP_FLAG(dirProps[--i]); + flag=DIRPROP_FLAG_NC(dirProps[--i]); if(flag&MASK_BN_EXPLICIT) { levels[i]=levels[i+1]; + } else if(isOrderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) { + levels[i]=0; + break; } else if(flag&MASK_B_S) { - levels[i]=paraLevel; + levels[i]=GET_PARALEVEL(pBiDi, i); break; } } @@ -1029,6 +1110,7 @@ ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length, } /* initialize the UBiDi structure */ + pBiDi->pParaBiDi=NULL; /* mark unfinished setPara */ pBiDi->text=text; pBiDi->length=length; pBiDi->paraLevel=paraLevel; @@ -1039,6 +1121,19 @@ ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length, pBiDi->levels=NULL; pBiDi->runs=NULL; + /* initialize paras for single paragraph */ + pBiDi->paraCount=1; + pBiDi->paras=pBiDi->simpleParas; + pBiDi->simpleParas[0]=length; + /* + * Save the original paraLevel if contextual; otherwise, set to 0. + */ + if(IS_DEFAULT_LEVEL(paraLevel)) { + pBiDi->defaultParaLevel=paraLevel; + } else { + pBiDi->defaultParaLevel=0; + } + if(length==0) { /* * For an empty paragraph, create a UBiDi object with the paraLevel and @@ -1047,6 +1142,7 @@ ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length, */ if(IS_DEFAULT_LEVEL(paraLevel)) { pBiDi->paraLevel&=1; + pBiDi->defaultParaLevel=0; } if(paraLevel&1) { pBiDi->flags=DIRPROP_FLAG(R); @@ -1057,6 +1153,7 @@ ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length, } pBiDi->runCount=0; + pBiDi->pParaBiDi=pBiDi; /* mark successful setPara */ return; } @@ -1065,15 +1162,25 @@ ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length, /* * Get the directional properties, * the flags bit-set, and - * determine the partagraph level if necessary. + * determine the paragraph level if necessary. */ if(getDirPropsMemory(pBiDi, length)) { pBiDi->dirProps=pBiDi->dirPropsMemory; - getDirProps(pBiDi, text); + getDirProps(pBiDi); } else { *pErrorCode=U_MEMORY_ALLOCATION_ERROR; return; } + /* allocate paras memory */ + if(pBiDi->paraCount>1) { + if(getInitialParasMemory(pBiDi, pBiDi->paraCount)) { + pBiDi->paras=pBiDi->parasMemory; + pBiDi->paras[pBiDi->paraCount-1]=length; + } else { + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + return; + } + } /* are explicit levels specified? */ if(embeddingLevels==NULL) { @@ -1086,7 +1193,7 @@ ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length, return; } } else { - /* set BN for all explicit codes, check that all levels are paraLevel..UBIDI_MAX_EXPLICIT_LEVEL */ + /* set BN for all explicit codes, check that all levels are 0 or paraLevel..UBIDI_MAX_EXPLICIT_LEVEL */ pBiDi->levels=embeddingLevels; direction=checkExplicitLevels(pBiDi, pErrorCode); if(U_FAILURE(*pErrorCode)) { @@ -1128,8 +1235,8 @@ ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length, */ if(embeddingLevels==NULL && !(pBiDi->flags&DIRPROP_FLAG_MULTI_RUNS)) { resolveImplicitLevels(pBiDi, 0, length, - GET_LR_FROM_LEVEL(pBiDi->paraLevel), - GET_LR_FROM_LEVEL(pBiDi->paraLevel)); + GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, 0)), + GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, length-1))); } else { /* sor, eor: start and end types of same-level-run */ UBiDiLevel *levels=pBiDi->levels; @@ -1138,7 +1245,7 @@ ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length, DirProp sor, eor; /* determine the first sor and set eor to it because of the loop body (sor=eor there) */ - level=pBiDi->paraLevel; + level=GET_PARALEVEL(pBiDi, 0); nextLevel=levels[0]; if(level0) && (NO_CONTEXT_RTL(pBiDi->dirProps[start-1])==B)) { + /* except if this is a new paragraph, then set sor = para level */ + sor=GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, start)); + } else { + sor=eor; + } /* search for the limit of this run */ while(++limitparaLevel; + nextLevel=GET_PARALEVEL(pBiDi, length-1); } /* determine eor from max(level, nextLevel); sor is last run's eor */ @@ -1196,11 +1308,28 @@ ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length, } break; } + pBiDi->pParaBiDi=pBiDi; /* mark successful setPara */ +} + +U_CAPI void U_EXPORT2 +ubidi_orderParagraphsLTR(UBiDi *pBiDi, UBool isOrderParagraphsLTR) { + if(pBiDi!=NULL) { + pBiDi->isOrderParagraphsLTR=isOrderParagraphsLTR; + } +} + +U_CAPI UBool U_EXPORT2 +ubidi_isOrderParagraphsLTR(UBiDi *pBiDi) { + if(pBiDi!=NULL) { + return pBiDi->isOrderParagraphsLTR; + } else { + return FALSE; + } } U_CAPI UBiDiDirection U_EXPORT2 ubidi_getDirection(const UBiDi *pBiDi) { - if(pBiDi!=NULL) { + if(IS_VALID_PARA_OR_LINE(pBiDi)) { return pBiDi->direction; } else { return UBIDI_LTR; @@ -1209,7 +1338,7 @@ ubidi_getDirection(const UBiDi *pBiDi) { U_CAPI const UChar * U_EXPORT2 ubidi_getText(const UBiDi *pBiDi) { - if(pBiDi!=NULL) { + if(IS_VALID_PARA_OR_LINE(pBiDi)) { return pBiDi->text; } else { return NULL; @@ -1218,22 +1347,87 @@ ubidi_getText(const UBiDi *pBiDi) { U_CAPI int32_t U_EXPORT2 ubidi_getLength(const UBiDi *pBiDi) { - if(pBiDi!=NULL) { + if(IS_VALID_PARA_OR_LINE(pBiDi)) { return pBiDi->length; } else { return 0; } } +/* paragraphs API functions ------------------------------------------------- */ + U_CAPI UBiDiLevel U_EXPORT2 ubidi_getParaLevel(const UBiDi *pBiDi) { - if(pBiDi!=NULL) { + if(IS_VALID_PARA_OR_LINE(pBiDi)) { return pBiDi->paraLevel; } else { return 0; } } +U_CAPI int32_t U_EXPORT2 +ubidi_countParagraphs(UBiDi *pBiDi) { + if(!IS_VALID_PARA_OR_LINE(pBiDi)) { + return 0; + } else { + return pBiDi->paraCount; + } +} + +U_STABLE void U_EXPORT2 +ubidi_getParagraphByIndex(const UBiDi *pBiDi, int32_t paraIndex, + int32_t *pParaStart, int32_t *pParaLimit, + UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) { + int32_t paraStart; + + /* check the argument values */ + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return; + } else if( !IS_VALID_PARA_OR_LINE(pBiDi) || /* no valid setPara/setLine */ + paraIndex<0 || paraIndex>=pBiDi->paraCount ) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return; + } + pBiDi=pBiDi->pParaBiDi; /* get Para object if Line object */ + if(paraIndex) { + paraStart=pBiDi->paras[paraIndex-1]; + } else { + paraStart=0; + } + if(pParaStart!=NULL) { + *pParaStart=paraStart; + } + if(pParaLimit!=NULL) { + *pParaLimit=pBiDi->paras[paraIndex]; + } + if(pParaLevel!=NULL) { + *pParaLevel=GET_PARALEVEL(pBiDi, paraStart); + } + return; +} + +U_STABLE int32_t U_EXPORT2 +ubidi_getParagraph(const UBiDi *pBiDi, int32_t charIndex, + int32_t *pParaStart, int32_t *pParaLimit, + UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) { + uint32_t paraIndex; + + /* check the argument values */ + /* pErrorCode will be checked by the call to ubidi_getParagraphByIndex */ + if( !IS_VALID_PARA_OR_LINE(pBiDi)) {/* no valid setPara/setLine */ + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return -1; + } + pBiDi=pBiDi->pParaBiDi; /* get Para object if Line object */ + if( charIndex<0 || charIndex>=pBiDi->length ) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return -1; + } + for(paraIndex=0; charIndex>=pBiDi->paras[paraIndex]; paraIndex++); + ubidi_getParagraphByIndex(pBiDi, paraIndex, pParaStart, pParaLimit, pParaLevel, pErrorCode); + return paraIndex; +} + /* statetable prototype ----------------------------------------------------- */ /* @@ -1319,7 +1513,7 @@ resolveImplicitLevels(BiDi *pBiDi, i=next; do { if(++next=0xfe) + +/* + * The following bit is ORed to the property of characters in paragraphs + * with contextual RTL direction when paraLevel is contextual. + */ +#define CONTEXT_RTL 0x80 +#define NO_CONTEXT_RTL(dir) ((dir)&~CONTEXT_RTL) +/* + * The following is a variant of DIRPROP_FLAG which ignores the CONTEXT_RTL bit. + */ +#define DIRPROP_FLAG_NC(dir) (1UL<<(NO_CONTEXT_RTL(dir))) + +#define GET_PARALEVEL(ubidi, index) \ + (UBiDiLevel)((ubidi)->defaultParaLevel ? (ubidi)->dirProps[index]>>7 \ + : (ubidi)->paraLevel) + +/* Paragraph type for multiple paragraph support --- -------------------- */ +typedef int32_t Para; + +#define CR 0x000D +#define LF 0x000A /* Run structure for reordering --------------------------------------------- */ typedef struct Run { int32_t logicalStart, /* first character of the run; b31 indicates even/odd level */ - visualLimit; /* last visual position of the run +1 */ + visualLimit; /* last visual position of the run +1 */ } Run; /* in a Run, logicalStart will get this bit set if the run level is odd */ @@ -129,6 +150,12 @@ ubidi_getRuns(UBiDi *pBiDi); /* UBiDi structure ----------------------------------------------------------- */ struct UBiDi { + /* pointer to parent paragraph object (pointer to self if this object is + * a paragraph object); set to NULL in a newly opened object; set to a + * real value after a successful execution of ubidi_setPara or ubidi_setLine + */ + const UBiDi * pParaBiDi; + UBiDiProps *bdp; /* alias pointer to the current text */ @@ -138,11 +165,12 @@ struct UBiDi { int32_t length; /* memory sizes in bytes */ - int32_t dirPropsSize, levelsSize, runsSize; + int32_t dirPropsSize, levelsSize, parasSize, runsSize; /* allocated memory */ DirProp *dirPropsMemory; UBiDiLevel *levelsMemory; + Para *parasMemory; Run *runsMemory; /* indicators for whether memory may be allocated after ubidi_open() */ @@ -154,9 +182,19 @@ struct UBiDi { /* are we performing an approximation of the "inverse BiDi" algorithm? */ UBool isInverse; + UBool isInverse2; + + /* must block separators receive level 0? */ + UBool isOrderParagraphsLTR; /* the paragraph level */ UBiDiLevel paraLevel; + /* original paraLevel when contextual */ + /* must be one of UBIDI_DEFAULT_xxx or 0 if not contextual */ + UBiDiLevel defaultParaLevel; + + /* the following is set in ubidi_setPara, used in processPropertySeq */ + const struct ImpTabPair * pImpTabPair; /* pointer to levels state table pair */ /* the overall paragraph or line directionality - see UBiDiDirection */ UBiDiDirection direction; @@ -168,6 +206,14 @@ struct UBiDi { /* implicitly at the paraLevel (rule (L1)) - levels may not reflect that */ int32_t trailingWSStart; + /* fields for paragraph handling */ + int32_t paraCount; /* set in getDirProps() */ + Para *paras; /* limits of paragraphs, filled in + ResolveExplicitLevels() or CheckExplicitLevels() */ + + /* for single paragraph text, we only need a tiny array of paras (no malloc()) */ + Para simpleParas[1]; + /* fields for line reordering */ int32_t runCount; /* ==-1: runs not set up yet */ Run *runs; @@ -176,6 +222,10 @@ struct UBiDi { Run simpleRuns[1]; }; +#define IS_VALID_PARA(x) ((x) && ((x)->pParaBiDi==(x))) +#define IS_VALID_LINE(x) ((x) && ((x)->pParaBiDi) && ((x)->pParaBiDi->pParaBiDi==(x)->pParaBiDi)) +#define IS_VALID_PARA_OR_LINE(x) ((x) && ((x)->pParaBiDi==(x) || (((x)->pParaBiDi) && (x)->pParaBiDi->pParaBiDi==(x)->pParaBiDi))) + /* helper function to (re)allocate memory if allowed */ U_CFUNC UBool ubidi_getMemory(void **pMemory, int32_t *pSize, UBool mayAllocate, int32_t sizeNeeded); @@ -202,6 +252,10 @@ ubidi_getMemory(void **pMemory, int32_t *pSize, UBool mayAllocate, int32_t sizeN ubidi_getMemory((void **)&(pBiDi)->levelsMemory, &(pBiDi)->levelsSize, \ TRUE, (length)) +#define getInitialParasMemory(pBiDi, length) \ + ubidi_getMemory((void **)&(pBiDi)->parasMemory, &(pBiDi)->parasSize, \ + TRUE, (length)*sizeof(Para)) + #define getInitialRunsMemory(pBiDi, length) \ ubidi_getMemory((void **)&(pBiDi)->runsMemory, &(pBiDi)->runsSize, \ TRUE, (length)*sizeof(Run)) diff --git a/icu4c/source/common/ubidiln.c b/icu4c/source/common/ubidiln.c index 6731cf2a86..1671e78c28 100644 --- a/icu4c/source/common/ubidiln.c +++ b/icu4c/source/common/ubidiln.c @@ -1,4 +1,4 @@ -/* +/* ****************************************************************************** * * Copyright (C) 1999-2003, International Business Machines @@ -33,8 +33,8 @@ * text in a single paragraph or in a line of a single paragraph * which has already been processed according to * the Unicode 3.0 BiDi algorithm as defined in - * http://www.unicode.org/unicode/reports/tr9/ , version 5, - * also described in The Unicode Standard, Version 3.0 . + * http://www.unicode.org/unicode/reports/tr9/ , version 13, + * also described in The Unicode Standard, Version 4.0.1 . * * This means that there is a UBiDi object with a levels * and a dirProps array. @@ -86,6 +86,9 @@ * * To make subsequent operations easier, we also include the run * before the WS if it is at the paraLevel - we merge the two here. + * + * This function is called only from ubidi_setLine(), so pBiDi->paraLevel is + * set correctly for the line even when contextual multiple paragraphs. */ static void setTrailingWSStart(UBiDi *pBiDi) { @@ -96,8 +99,18 @@ setTrailingWSStart(UBiDi *pBiDi) { int32_t start=pBiDi->length; UBiDiLevel paraLevel=pBiDi->paraLevel; + /* If the line is terminated by a block separator, all preceding WS etc... + are already set to paragraph level. + Setting trailingWSStart to pBidi->length will avoid changing the + level of B chars from 0 to paraLevel in ubidi_getLevels when + isOrderParagraphsLTR==TRUE. + */ + if(NO_CONTEXT_RTL(dirProps[start-1])==B) { + pBiDi->trailingWSStart=start; /* currently == pBiDi->length */ + return; + } /* go backwards across all WS, BN, explicit codes */ - while(start>0 && DIRPROP_FLAG(dirProps[start-1])&MASK_WS) { + while(start>0 && DIRPROP_FLAG_NC(dirProps[start-1])&MASK_WS) { --start; } @@ -121,19 +134,24 @@ ubidi_setLine(const UBiDi *pParaBiDi, /* check the argument values */ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return; - } else if(pParaBiDi==NULL || pLineBiDi==NULL) { + } else if(!IS_VALID_PARA(pParaBiDi) || pLineBiDi==NULL) { *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; return; } else if(start<0 || start>limit || limit>pParaBiDi->length) { *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return; + } else if(ubidi_getParagraph(pParaBiDi, start, NULL, NULL, NULL, pErrorCode) != + ubidi_getParagraph(pParaBiDi, limit-1, NULL, NULL, NULL, pErrorCode)) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return; } /* set the values in pLineBiDi from its pParaBiDi parent */ + pLineBiDi->pParaBiDi=NULL; /* mark unfinished setLine */ pLineBiDi->text=pParaBiDi->text+start; length=pLineBiDi->length=limit-start; - pLineBiDi->paraLevel=pParaBiDi->paraLevel; - + pLineBiDi->paraLevel=GET_PARALEVEL(pParaBiDi, start); + pLineBiDi->paraCount=pParaBiDi->paraCount; pLineBiDi->runs=NULL; pLineBiDi->flags=0; @@ -222,16 +240,17 @@ ubidi_setLine(const UBiDi *pParaBiDi, pLineBiDi->dirProps=NULL; pLineBiDi->levels=NULL; } + pLineBiDi->pParaBiDi=pParaBiDi; /* mark successful setLine */ return; } U_CAPI UBiDiLevel U_EXPORT2 ubidi_getLevelAt(const UBiDi *pBiDi, int32_t charIndex) { /* return paraLevel if in the trailing WS run, otherwise the real level */ - if(pBiDi==NULL || charIndex<0 || pBiDi->length<=charIndex) { + if(!IS_VALID_PARA_OR_LINE(pBiDi) || charIndex<0 || pBiDi->length<=charIndex) { return 0; } else if(pBiDi->direction!=UBIDI_MIXED || charIndex>=pBiDi->trailingWSStart) { - return pBiDi->paraLevel; + return GET_PARALEVEL(pBiDi, charIndex); } else { return pBiDi->levels[charIndex]; } @@ -243,7 +262,7 @@ ubidi_getLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) { if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return NULL; - } else if(pBiDi==NULL || (length=pBiDi->length)<=0) { + } else if(!IS_VALID_PARA_OR_LINE(pBiDi) || (length=pBiDi->length)<=0) { *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; return NULL; } @@ -267,6 +286,8 @@ ubidi_getLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) { if(start>0 && levels!=pBiDi->levels) { uprv_memcpy(levels, pBiDi->levels, start); } + /* pBiDi->paraLevel is ok even if contextual multiple paragraphs, + since pBidi is a line object */ uprv_memset(levels+start, pBiDi->paraLevel, length-start); /* this new levels array is set for the line and reflects the WS run */ @@ -284,7 +305,8 @@ ubidi_getLogicalRun(const UBiDi *pBiDi, int32_t logicalStart, int32_t *pLogicalLimit, UBiDiLevel *pLevel) { int32_t length; - if(pBiDi==NULL || logicalStart<0 || (length=pBiDi->length)<=logicalStart) { + if(!IS_VALID_PARA_OR_LINE(pBiDi) || logicalStart<0 || + (length=pBiDi->length)<=logicalStart) { return; } @@ -293,7 +315,7 @@ ubidi_getLogicalRun(const UBiDi *pBiDi, int32_t logicalStart, *pLogicalLimit=length; } if(pLevel!=NULL) { - *pLevel=pBiDi->paraLevel; + *pLevel=GET_PARALEVEL(pBiDi, logicalStart); } } else { UBiDiLevel *levels=pBiDi->levels; @@ -318,7 +340,8 @@ U_CAPI int32_t U_EXPORT2 ubidi_countRuns(UBiDi *pBiDi, UErrorCode *pErrorCode) { if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return -1; - } else if(pBiDi==NULL || (pBiDi->runCount<0 && !ubidi_getRuns(pBiDi))) { + } else if(!IS_VALID_PARA_OR_LINE(pBiDi) || + (pBiDi->runCount<0 && !ubidi_getRuns(pBiDi))) { *pErrorCode=U_MEMORY_ALLOCATION_ERROR; return -1; } else { @@ -329,7 +352,7 @@ ubidi_countRuns(UBiDi *pBiDi, UErrorCode *pErrorCode) { U_CAPI UBiDiDirection U_EXPORT2 ubidi_getVisualRun(UBiDi *pBiDi, int32_t runIndex, int32_t *pLogicalStart, int32_t *pLength) { - if( pBiDi==NULL || runIndex<0 || + if( !IS_VALID_PARA_OR_LINE(pBiDi) || runIndex<0 || (pBiDi->runCount==-1 && !ubidi_getRuns(pBiDi)) || runIndex>=pBiDi->runCount ) { @@ -398,10 +421,9 @@ getSingleRun(UBiDi *pBiDi, UBiDiLevel level) { */ static void reorderLine(UBiDi *pBiDi, UBiDiLevel minLevel, UBiDiLevel maxLevel) { - Run *runs; + Run *runs, tempRun; UBiDiLevel *levels; - int32_t firstRun, endRun, limitRun, runCount, - temp; + int32_t firstRun, endRun, limitRun, runCount; /* nothing to do? */ if(maxLevel<=(minLevel|1)) { @@ -444,13 +466,9 @@ reorderLine(UBiDi *pBiDi, UBiDiLevel minLevel, UBiDiLevel maxLevel) { /* Swap the entire sequence of runs from firstRun to limitRun-1. */ endRun=limitRun-1; while(firstRundirection!=UBIDI_MIXED) { /* simple, single-run case - this covers length==0 */ + /* pBiDi->paraLevel is ok even for contextual multiple paragraphs */ getSingleRun(pBiDi, pBiDi->paraLevel); } else /* UBIDI_MIXED, length>0 */ { /* mixed directionality */ @@ -521,7 +536,7 @@ ubidi_getRuns(UBiDi *pBiDi) { limit=pBiDi->trailingWSStart; if(limit==0) { /* there is only WS on this line */ - getSingleRun(pBiDi, pBiDi->paraLevel); + getSingleRun(pBiDi, GET_PARALEVEL(pBiDi, 0)); } else { UBiDiLevel *levels=pBiDi->levels; int32_t i, runCount; @@ -550,7 +565,7 @@ ubidi_getRuns(UBiDi *pBiDi) { int32_t runIndex, start; UBiDiLevel minLevel=UBIDI_MAX_EXPLICIT_LEVEL+1, maxLevel=0; - /* now, count a (non-mergable) WS run */ + /* now, count a (non-mergeable) WS run */ if(limitparaLevel is ok even + if contextual multiple paragraphs. */ if(pBiDi->paraLevelparaLevel; } @@ -616,6 +633,8 @@ ubidi_getRuns(UBiDi *pBiDi) { /* Set the "odd" bit for the trailing WS run. */ /* For a RTL paragraph, it will be the *first* run in visual order. */ + /* For the trailing WS run, pBiDi->paraLevel is ok even if + contextual multiple paragraphs. */ if(runIndexparaLevel & 1) != 0)? 0 : runIndex; @@ -796,7 +815,7 @@ U_CAPI int32_t U_EXPORT2 ubidi_getVisualIndex(UBiDi *pBiDi, int32_t logicalIndex, UErrorCode *pErrorCode) { if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return 0; - } else if(pBiDi==NULL) { + } else if(!IS_VALID_PARA_OR_LINE(pBiDi)) { *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; return 0; } else if(logicalIndex<0 || pBiDi->length<=logicalIndex) { @@ -841,7 +860,7 @@ U_CAPI int32_t U_EXPORT2 ubidi_getLogicalIndex(UBiDi *pBiDi, int32_t visualIndex, UErrorCode *pErrorCode) { if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return 0; - } else if(pBiDi==NULL) { + } else if(!IS_VALID_PARA_OR_LINE(pBiDi)) { *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; return 0; } else if(visualIndex<0 || pBiDi->length<=visualIndex) { @@ -869,7 +888,7 @@ ubidi_getLogicalIndex(UBiDi *pBiDi, int32_t visualIndex, UErrorCode *pErrorCode) /* binary search for the run */ int32_t begin=0, limit=runCount; - /* the middle if() will guaranteed find the run, we don't need a loop limit */ + /* the middle if() is guaranteed to find the run, we don't need a loop limit */ for(;;) { i=(begin+limit)/2; if(visualIndex>=runs[i].visualLimit) { diff --git a/icu4c/source/common/unicode/ubidi.h b/icu4c/source/common/unicode/ubidi.h index a0528d21c6..94f97e710b 100644 --- a/icu4c/source/common/unicode/ubidi.h +++ b/icu4c/source/common/unicode/ubidi.h @@ -407,8 +407,8 @@ typedef struct UBiDi UBiDi; * ubidi_setLine().

* This object can be reused for as long as it is not deallocated * by calling ubidi_close().

- * ubidi_set() will allocate additional memory for - * internal structures as necessary. + * ubidi_setPara() and ubidi_setLine() will allocate + * additional memory for internal structures as necessary. * * @return An empty UBiDi object. * @stable ICU 2.0 @@ -445,8 +445,7 @@ ubidi_open(void); * The number of runs depends on the actual text and maybe anywhere between * 1 and maxLength. It is typically small.

* - * @param pErrorCode must be a valid pointer to an error code value, - * which must not indicate a failure before the function call. + * @param pErrorCode must be a valid pointer to an error code value. * * @return An empty UBiDi object with preallocated memory. * @stable ICU 2.0 @@ -520,14 +519,52 @@ ubidi_setInverse(UBiDi *pBiDi, UBool isInverse); * Is this BiDi object set to perform the inverse BiDi algorithm? * * @param pBiDi is a UBiDi object. - * @return TRUE if the BiDi object set to perform the inverse BiDi algorithm + * @return TRUE if the BiDi object is set to perform the inverse BiDi algorithm * * @see ubidi_setInverse * @stable ICU 2.0 */ + U_STABLE UBool U_EXPORT2 ubidi_isInverse(UBiDi *pBiDi); +/** + * Specify whether block separators must be allocated level zero, + * so that successive paragraphs will progress from left to right. + * This function must be called before ubidi_setPara(). + * Paragraph separators (B) may appear in the text. Setting them to level zero + * means that all paragraph separators (including one possibly appearing + * in the last text position) are kept in the reordered text after the text + * that they follow in the source text. + * When this feature is not enabled, a paragraph separator at the last + * position of the text before reordering will go to the first position + * of the reordered text when the paragraph level is odd. + * + * @param pBiDi is a UBiDi object. + * + * @param isOrderParagraphLTR specifies whether paragraph separators (B) must + * receive level 0, so that successive paragraphs progress from left to right. + * + * @see ubidi_setPara + * @stable ICU 3.4 + */ +U_STABLE void U_EXPORT2 +ubidi_orderParagraphsLTR(UBiDi *pBiDi, UBool isOrderParagraphLTR); + +/** + * Is this BiDi object set to allocate level 0 to block separators so that + * successive paragraphs progress from left to right? + * + * @param pBiDi is a UBiDi object. + * @return TRUE if the BiDi object is set to allocate level 0 to block + * separators. + * + * @see ubidi_setMultiPara + * @stable ICU 3.4 + */ +U_STABLE UBool U_EXPORT2 +ubidi_isOrderParagraphsLTR(UBiDi *pBiDi); + /** * Perform the Unicode BiDi algorithm. It is defined in the * Unicode Standard Anned #9, @@ -547,16 +584,23 @@ ubidi_isInverse(UBiDi *pBiDi); * the algorithm. This implementation may set all resolved levels to * the same value in such a case.

* - * The text must be externally split into separate paragraphs (rule P1). - * Paragraph separators (B) should appear at most at the very end. + * The text can be composed of multiple paragraphs. Occurrence of a block + * separator in the text terminates a paragraph, and whatever comes next starts + * a new paragraph. The exception to this rule is when a Carriage Return (CR) + * is followed by a Line Feed (LF). Both CR and LF are block separators, but + * in that case, the pair of characters is considered as terminating the + * preceding paragraph, and a new paragraph will be started by a character + * coming after the LF. * * @param pBiDi A UBiDi object allocated with ubidi_open() * which will be set to contain the reordering information, * especially the resolved levels for all the characters in text. * - * @param text is a pointer to the single-paragraph text that the + * @param text is a pointer to the text that the * BiDi algorithm will be performed on - * (step (P1) of the algorithm is performed externally). + * (step (P1) of the algorithm must be performed externally if paraLevel + * is specified as UBIDI_DEFAULT_LTR or + * UBIDI_DEFAULT_RTL). * The text must be (at least) length long. * This pointer is stored in the UBiDi object and can be retrieved * with ubidi_getText(). @@ -581,7 +625,10 @@ ubidi_isInverse(UBiDi *pBiDi); * (same index) character if the level has the * UBIDI_LEVEL_OVERRIDE bit set.

* Except for that bit, it must be - * paraLevel<=embeddingLevels[]<=UBIDI_MAX_EXPLICIT_LEVEL.

+ * paraLevel<=embeddingLevels[]<=UBIDI_MAX_EXPLICIT_LEVEL, + * with one exception: a level of zero may be specified for a paragraph + * separator even if paraLevel>0 when multiple paragraphs + * are submitted in the same call to ubidi_setPara().

* Caution: A copy of this pointer, not of the levels, * will be stored in the UBiDi object; * the embeddingLevels array must not be @@ -595,8 +642,7 @@ ubidi_isInverse(UBiDi *pBiDi); * The embeddingLevels array must be * at least length long. * - * @param pErrorCode must be a valid pointer to an error code value, - * which must not indicate a failure before the function call. + * @param pErrorCode must be a valid pointer to an error code value. * @stable ICU 2.0 */ U_STABLE void U_EXPORT2 @@ -631,7 +677,8 @@ ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length, * and start is added to it so that it points to the beginning of the * line for this object. * - * @param pParaBiDi is the parent paragraph object. + * @param pParaBiDi is the parent paragraph object. It must have been set + * by a successful call to ubidi_setPara. * * @param start is the line's first index into the paragraph text. * @@ -641,8 +688,7 @@ ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length, * * @param pLineBiDi is the object that will now represent a line of the paragraph. * - * @param pErrorCode must be a valid pointer to an error code value, - * which must not indicate a failure before the function call. + * @param pErrorCode must be a valid pointer to an error code value. * * @see ubidi_setPara * @stable ICU 2.0 @@ -698,14 +744,97 @@ ubidi_getLength(const UBiDi *pBiDi); * * @param pBiDi is the paragraph or line UBiDi object. * - * @return The paragraph level. + * @return The paragraph level. If there are multiple paragraphs, their + * level may vary if the required paraLevel is UBIDI_DEFAULT_LTR or + * UBIDI_DEFAULT_RTL. In that case, the level of the first paragraph + * is returned. * * @see UBiDiLevel + * @see ubidi_getParagraph + * @see ubidi_getParagraphByIndex * @stable ICU 2.0 */ U_STABLE UBiDiLevel U_EXPORT2 ubidi_getParaLevel(const UBiDi *pBiDi); +/** + * Get the number of paragraphs. + * + * @param pBiDi is the paragraph or line UBiDi object. + * + * @return The number of paragraphs. + * @stable ICU 3.4 + */ +U_STABLE int32_t U_EXPORT2 +ubidi_countParagraphs(UBiDi *pBiDi); + +/** + * Get a paragraph, given a position within the paragraph. + * This function returns information about a paragraph.

+ * + * @param pBiDi is the paragraph or line UBiDi object. + * + * @param charIndex is the index of a character within the text, in the + * range [0..ubidi_getLength(pBiDi)-1]. + * + * @param pParaStart will receive the index of the first character in + * the paragraph. + * This pointer can be NULL if this + * value is not necessary. + * + * @param pParaLimit will receive the limit of the paragraph. + * The l-value that you point to here may be the + * same expression (variable) as the one for + * charIndex. + * This pointer can be NULL if this + * value is not necessary. + * + * @param pParaLevel will receive the level of the paragraph. + * This pointer can be NULL if this + * value is not necessary. + * + * @param pErrorCode must be a valid pointer to an error code value. + * + * @return The index of the paragraph containing the specified position. + * @stable ICU 3.4 + */ +U_STABLE int32_t U_EXPORT2 +ubidi_getParagraph(const UBiDi *pBiDi, int32_t charIndex, int32_t *pParaStart, + int32_t *pParaLimit, UBiDiLevel *pParaLevel, + UErrorCode *pErrorCode); + +/** + * Get a paragraph, given the index of this paragraph. + * + * This function returns information about a paragraph.

+ * + * @param pBiDi is the paragraph UBiDi object. + * + * @param paraIndex is the number of the paragraph, in the + * range [0..ubidi_countParagraphs(pBiDi)-1]. + * + * @param pParaStart will receive the index of the first character in + * the paragraph. + * This pointer can be NULL if this + * value is not necessary. + * + * @param pParaLimit will receive the limit of the paragraph. + * This pointer can be NULL if this + * value is not necessary. + * + * @param pParaLevel will receive the level of the paragraph. + * This pointer can be NULL if this + * value is not necessary. + * + * @param pErrorCode must be a valid pointer to an error code value. + * + * @stable ICU 3.4 + */ +U_STABLE void U_EXPORT2 +ubidi_getParagraphByIndex(const UBiDi *pBiDi, int32_t paraIndex, + int32_t *pParaStart, int32_t *pParaLimit, + UBiDiLevel *pParaLevel, UErrorCode *pErrorCode); + /** * Get the level for one character. * @@ -727,10 +856,10 @@ ubidi_getLevelAt(const UBiDi *pBiDi, int32_t charIndex); * Note that this function may allocate memory under some * circumstances, unlike ubidi_getLevelAt(). * - * @param pBiDi is the paragraph or line UBiDi object. + * @param pBiDi is the paragraph or line UBiDi object, whose + * text length must be strictly positive. * - * @param pErrorCode must be a valid pointer to an error code value, - * which must not indicate a failure before the function call. + * @param pErrorCode must be a valid pointer to an error code value. * * @return The levels array for the text, * or NULL if an error occurs. @@ -777,8 +906,7 @@ ubidi_getLogicalRun(const UBiDi *pBiDi, int32_t logicalStart, * * @param pBiDi is the paragraph or line UBiDi object. * - * @param pErrorCode must be a valid pointer to an error code value, - * which must not indicate a failure before the function call. + * @param pErrorCode must be a valid pointer to an error code value. * * @return The number of runs. * @stable ICU 2.0 @@ -855,8 +983,7 @@ ubidi_getVisualRun(UBiDi *pBiDi, int32_t runIndex, * * @param logicalIndex is the index of a character in the text. * - * @param pErrorCode must be a valid pointer to an error code value, - * which must not indicate a failure before the function call. + * @param pErrorCode must be a valid pointer to an error code value. * * @return The visual position of this character. * @@ -879,8 +1006,7 @@ ubidi_getVisualIndex(UBiDi *pBiDi, int32_t logicalIndex, UErrorCode *pErrorCode) * * @param visualIndex is the visual position of a character. * - * @param pErrorCode must be a valid pointer to an error code value, - * which must not indicate a failure before the function call. + * @param pErrorCode must be a valid pointer to an error code value. * * @return The index of this character in the text. * @@ -902,8 +1028,7 @@ ubidi_getLogicalIndex(UBiDi *pBiDi, int32_t visualIndex, UErrorCode *pErrorCode) * The array does not need to be initialized.

* The index map will result in indexMap[logicalIndex]==visualIndex.

* - * @param pErrorCode must be a valid pointer to an error code value, - * which must not indicate a failure before the function call. + * @param pErrorCode must be a valid pointer to an error code value. * * @see ubidi_getVisualMap * @see ubidi_getVisualIndex @@ -923,8 +1048,7 @@ ubidi_getLogicalMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode); * The array does not need to be initialized.

* The index map will result in indexMap[visualIndex]==logicalIndex.

* - * @param pErrorCode must be a valid pointer to an error code value, - * which must not indicate a failure before the function call. + * @param pErrorCode must be a valid pointer to an error code value. * * @see ubidi_getLogicalMap * @see ubidi_getLogicalIndex @@ -1117,8 +1241,7 @@ ubidi_invertMap(const int32_t *srcMap, int32_t *destMap, int32_t length); * #UBIDI_OUTPUT_REVERSE, * #UBIDI_REMOVE_BIDI_CONTROLS * - * @param pErrorCode must be a valid pointer to an error code value, - * which must not indicate a failure before the function call. + * @param pErrorCode must be a valid pointer to an error code value. * * @return The length of the output string. * @stable ICU 2.0 @@ -1170,8 +1293,7 @@ ubidi_writeReordered(UBiDi *pBiDi, * how the reordered text is written. * See the options parameter in ubidi_writeReordered(). * - * @param pErrorCode must be a valid pointer to an error code value, - * which must not indicate a failure before the function call. + * @param pErrorCode must be a valid pointer to an error code value. * * @return The length of the output string. * @stable ICU 2.0 diff --git a/icu4c/source/test/cintltst/cbiditst.c b/icu4c/source/test/cintltst/cbiditst.c index 718dc33f7b..fbca8f91f4 100644 --- a/icu4c/source/test/cintltst/cbiditst.c +++ b/icu4c/source/test/cintltst/cbiditst.c @@ -69,6 +69,8 @@ static void TestReorder(void); static void TestFailureRecovery(void); +static void TestMultipleParagraphs(void); + /* helpers ------------------------------------------------------------------ */ static const char *levelString="..............................................................."; @@ -93,6 +95,7 @@ addComplexTest(TestNode** root) { addTest(root, doInverseBiDiTest, "complex/bidi/inverse"); addTest(root, TestReorder,"complex/bidi/TestReorder"); addTest(root, TestFailureRecovery,"complex/bidi/TestFailureRecovery"); + addTest(root, TestMultipleParagraphs,"complex/bidi/multipleParagraphs"); addTest(root, doArabicShapingTest, "complex/arabic-shaping/ArabicShapingTest"); addTest(root, doLamAlefSpecialVLTRArabicShapingTest, "complex/arabic-shaping/lamalef"); addTest(root, doTashkeelSpecialVLTRArabicShapingTest, "complex/arabic-shaping/tashkeel"); @@ -296,8 +299,6 @@ static void TestReorder(){ log_err("ubidi_writeReordered() destSize and srcSize do not match\n"); }else if(uprv_strncmp(visualOrder[i],chars,destSize)!=0){ log_err("ubidi_writeReordered() did not give expected results. Expected: %s Got: %s At Index: %d\n",visualOrder[i],chars,i); - - } free(src); free(dest); @@ -332,10 +333,7 @@ static void TestReorder(){ log_err("ubidi_writeReordered() destSize and srcSize do not match\n"); }else if(uprv_strncmp(visualOrder1[i],chars,destSize)!=0){ log_err("ubidi_writeReordered() did not give expected results for UBIDI_DO_MIRRORING+UBIDI_OUTPUT_REVERSE. Expected: %s Got: %s At Index: %d\n",visualOrder1[i],chars,i); - - } - free(src); free(dest); } @@ -371,10 +369,7 @@ static void TestReorder(){ }else*/ if(uprv_strncmp(visualOrder2[i],chars,destSize)!=0){ log_err("ubidi_writeReordered() did not give expected results for UBIDI_INSERT_LRM_FOR_NUMERIC+UBIDI_OUTPUT_REVERSE. Expected: %s Got: %s At Index: %d\n",visualOrder2[i],chars,i); - - } - free(src); free(dest); } @@ -410,10 +405,7 @@ static void TestReorder(){ log_err("ubidi_writeReordered() destSize and srcSize do not match. Dest Size = %d Source Size = %d\n",destSize,srcSize ); }else if(uprv_strncmp(visualOrder3[i],chars,destSize)!=0){ log_err("ubidi_writeReordered() did not give expected results for UBIDI_OUTPUT_REVERSE. Expected: %s Got: %s At Index: %d\n",visualOrder3[i],chars,i); - - } - free(src); free(dest); } @@ -450,7 +442,6 @@ static void TestReorder(){ }else*/ if(uprv_strncmp(visualOrder4[i],chars,destSize)!=0){ log_err("ubidi_writeReordered() did not give expected results for UBIDI_DO_MIRRORING+UBIDI_REMOVE_BIDI_CONTROLS. Expected: %s Got: %s At Index: %d\n",visualOrder4[i],chars,i); } - free(src); free(dest); } @@ -726,6 +717,211 @@ static void TestFailureRecovery(void) { } } +static void TestMultipleParagraphs(void) { + #define MAXLEN 100 + static const char* const text = "__ABC\\u001c" /* Para #0 offset 0 */ + "__\\u05d0DE\\u001c" /* 1 6 */ + "__123\\u001c" /* 2 12 */ + "\\u000d\\u000a" /* 3 18 */ + "FG\\u000d" /* 4 20 */ + "\\u000d" /* 5 23 */ + "HI\\u000d\\u000a" /* 6 24 */ + "\\u000d\\u000a" /* 7 28 */ + "\\u000a" /* 8 30 */ + "\\u000a" /* 9 31 */ + "JK\\u001c"; /* 10 32 */ + static const int32_t paraCount=11; + static const int32_t paraBounds[]={0, 6, 12, 18, 20, 23, 24, 28, 30, 31, 32, 35}; + static const UBiDiLevel paraLevels[]={UBIDI_LTR, UBIDI_RTL, UBIDI_DEFAULT_LTR, UBIDI_DEFAULT_RTL, 22, 23}; + static const UBiDiLevel multiLevels[6][11] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23}; + UBiDiLevel gotLevel; + const UBiDiLevel* gotLevels; + UBool isOrderParagraphsLTR; + UChar src[MAXLEN]; + UErrorCode errorCode=U_ZERO_ERROR; + UBiDi* pBidi=ubidi_open(); + UBiDi* pLine; + int32_t srcSize, count, paraStart, paraLimit, paraIndex, length; + int i, k; + u_unescape(text, src, MAXLEN); + srcSize=u_strlen(src); + ubidi_setPara(pBidi, src, srcSize, UBIDI_LTR, NULL, &errorCode); + if(U_FAILURE(errorCode)){ + log_err("ubidi_setPara failed, paraLevel=%d, errorCode %s\n", + UBIDI_LTR, u_errorName(errorCode)); + ubidi_close(pBidi); + return; + } + /* check paragraph count and boundaries */ + if (paraCount!=(count=ubidi_countParagraphs(pBidi))) { + log_err("ubidi_countParagraphs returned %d, should be %d\n", + count, paraCount); + } + for (i=0; i