ICU-3660 add support for prologue and epilogue

X-SVN-Rev: 29596
This commit is contained in:
Matitiahu Allouche 2011-03-10 12:47:13 +00:00
parent 4d5351b108
commit cd4a7ba1c6
4 changed files with 373 additions and 13 deletions

View File

@ -1,7 +1,7 @@
/*
******************************************************************************
*
* Copyright (C) 1999-2010, International Business Machines
* Copyright (C) 1999-2011, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
@ -332,7 +332,7 @@ int32_t length){
int32_t i;
UChar32 uchar;
UCharDirection dir;
if( text==NULL || length<-1 ){
return UBIDI_NEUTRAL;
}
@ -355,6 +355,31 @@ int32_t length){
/* perform (P2)..(P3) ------------------------------------------------------- */
static DirProp
firstL_R_AL(UBiDi *pBiDi) {
/* return first strong char after the last B in prologue if any */
const UChar *text=pBiDi->prologue;
int32_t length=pBiDi->proLength;
int32_t i;
UChar32 uchar;
DirProp dirProp, result=ON;
for(i=0; i<length; ) {
/* i is incremented by U16_NEXT */
U16_NEXT(text, i, length, uchar);
dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar);
if(result==ON) {
if(dirProp==L || dirProp==R || dirProp==AL) {
result=dirProp;
}
} else {
if(dirProp==B) {
result=ON;
}
}
}
return result;
}
/*
* Get the directional properties for the text,
* calculate the flags bit-set, and
@ -397,10 +422,17 @@ getDirProps(UBiDi *pBiDi) {
lastStrongLTR=0;
}
if(isDefaultLevel) {
DirProp lastStrong;
paraDirDefault=pBiDi->paraLevel&1 ? CONTEXT_RTL : 0;
paraDir=paraDirDefault;
lastStrongDir=paraDirDefault;
state=LOOKING_FOR_STRONG;
if(pBiDi->proLength>0 &&
(lastStrong=firstL_R_AL(pBiDi))!=ON) {
paraDir=(lastStrong==L) ? 0 : CONTEXT_RTL;
state=FOUND_STRONG_CHAR;
} else {
paraDir=paraDirDefault;
state=LOOKING_FOR_STRONG;
}
lastStrongDir=paraDir;
} else {
state=NOT_CONTEXTUAL;
paraDir=0;
@ -1405,6 +1437,59 @@ processPropertySeq(UBiDi *pBiDi, LevState *pLevState, uint8_t _prop,
}
}
static DirProp
lastL_R_AL(UBiDi *pBiDi) {
/* return last strong char at the end of the prologue */
const UChar *text=pBiDi->prologue;
int32_t length=pBiDi->proLength;
int32_t i;
UChar32 uchar;
DirProp dirProp;
for(i=length; i>0; ) {
/* i is decremented by U16_PREV */
U16_PREV(text, 0, i, uchar);
dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar);
if(dirProp==L) {
return DirProp_L;
}
if(dirProp==R || dirProp==AL) {
return DirProp_R;
}
if(dirProp==B) {
return DirProp_ON;
}
}
return DirProp_ON;
}
static DirProp
firstL_R_AL_EN_AN(UBiDi *pBiDi) {
/* return first strong char or digit in epilogue */
const UChar *text=pBiDi->epilogue;
int32_t length=pBiDi->epiLength;
int32_t i;
UChar32 uchar;
DirProp dirProp;
for(i=0; i<length; ) {
/* i is incremented by U16_NEXT */
U16_NEXT(text, i, length, uchar);
dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar);
if(dirProp==L) {
return DirProp_L;
}
if(dirProp==R || dirProp==AL) {
return DirProp_R;
}
if(dirProp==EN) {
return DirProp_EN;
}
if(dirProp==AN) {
return DirProp_AN;
}
}
return DirProp_ON;
}
static void
resolveImplicitLevels(UBiDi *pBiDi,
int32_t start, int32_t limit,
@ -1439,6 +1524,12 @@ resolveImplicitLevels(UBiDi *pBiDi,
levState.runLevel=pBiDi->levels[start];
levState.pImpTab=(const ImpTab*)((pBiDi->pImpTabPair)->pImpTab)[levState.runLevel&1];
levState.pImpAct=(const ImpAct*)((pBiDi->pImpTabPair)->pImpAct)[levState.runLevel&1];
if(start==0 && pBiDi->proLength>0) {
DirProp lastStrong=lastL_R_AL(pBiDi);
if(lastStrong!=DirProp_ON) {
sor=lastStrong;
}
}
processPropertySeq(pBiDi, &levState, sor, start, start);
/* initialize for property state table */
if(NO_CONTEXT_RTL(dirProps[start])==NSM) {
@ -1516,6 +1607,12 @@ resolveImplicitLevels(UBiDi *pBiDi,
}
}
/* flush possible pending sequence, e.g. ON */
if(limit==pBiDi->length && pBiDi->epiLength>0) {
DirProp firstStrong=firstL_R_AL_EN_AN(pBiDi);
if(firstStrong!=DirProp_ON) {
eor=firstStrong;
}
}
processPropertySeq(pBiDi, &levState, eor, limit, limit);
}
@ -1566,6 +1663,40 @@ adjustWSLevels(UBiDi *pBiDi) {
}
}
U_DRAFT void U_EXPORT2
ubidi_setContext(UBiDi *pBiDi,
const UChar *prologue, int32_t proLength,
const UChar *epilogue, int32_t epiLength,
UErrorCode *pErrorCode) {
/* check the argument values */
RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
if(pBiDi==NULL || proLength<-1 || epiLength<-1 ||
(prologue==NULL && proLength!=0) || (epilogue==NULL && epiLength!=0)) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return;
}
if(proLength==-1) {
pBiDi->proLength=u_strlen(prologue);
} else {
pBiDi->proLength=proLength;
}
if(epiLength==-1) {
pBiDi->epiLength=u_strlen(epilogue);
} else {
pBiDi->epiLength=epiLength;
}
pBiDi->prologue=prologue;
pBiDi->epilogue=epilogue;
}
static void
setParaSuccess(UBiDi *pBiDi) {
pBiDi->proLength=0; /* forget the last context */
pBiDi->epiLength=0;
pBiDi->pParaBiDi=pBiDi; /* mark successful setPara */
}
#define BIDI_MIN(x, y) ((x)<(y) ? (x) : (y))
#define BIDI_ABS(x) ((x)>=0 ? (x) : (-(x)))
static void
@ -1823,7 +1954,7 @@ ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length,
pBiDi->runCount=0;
pBiDi->paraCount=0;
pBiDi->pParaBiDi=pBiDi; /* mark successful setPara */
setParaSuccess(pBiDi); /* mark successful setPara */
return;
}
@ -2056,7 +2187,7 @@ ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length,
} else {
pBiDi->resultLength += pBiDi->insertPoints.size;
}
pBiDi->pParaBiDi=pBiDi; /* mark successful setPara */
setParaSuccess(pBiDi); /* mark successful setPara */
}
U_CAPI void U_EXPORT2

View File

@ -1,7 +1,7 @@
/*
******************************************************************************
*
* Copyright (C) 1999-2007, International Business Machines
* Copyright (C) 1999-2011, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
@ -257,6 +257,12 @@ struct UBiDi {
/* must be one of UBIDI_DEFAULT_xxx or 0 if not contextual */
UBiDiLevel defaultParaLevel;
/* context data */
const UChar *prologue;
int32_t proLength;
const UChar *epilogue;
int32_t epiLength;
/* the following is set in ubidi_setPara, used in processPropertySeq */
const struct ImpTabPair * pImpTabPair; /* pointer to levels state table pair */

View File

@ -1,7 +1,7 @@
/*
******************************************************************************
*
* Copyright (C) 1999-2010, International Business Machines
* Copyright (C) 1999-2011, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
@ -1044,6 +1044,96 @@ ubidi_setReorderingOptions(UBiDi *pBiDi, uint32_t reorderingOptions);
U_STABLE uint32_t U_EXPORT2
ubidi_getReorderingOptions(UBiDi *pBiDi);
/**
* Set the context before a call to ubidi_setPara().<p>
*
* ubidi_setPara() computes the left-right directionality for a given piece
* of text which is supplied as one of its arguments. Sometimes this piece
* of text (the "main text") should be considered in context, because text
* appearing before ("prologue") and/or after ("epilogue") the main text
* may affect the result of this computation.<p>
*
* This function specifies the prologue and/or the epilogue for the next
* call to ubidi_setPara(). The characters specified as prologue and
* epilogue should not be modified by the calling program until the call
* to ubidi_setPara() has returned. If successive calls to ubidi_setPara()
* all need specification of a context, ubidi_setContext() must be called
* before each call to ubidi_setPara(). In other words, a context is not
* "remembered" after the following successful call to ubidi_setPara().<p>
*
* If a call to ubidi_setPara() specifies UBIDI_DEFAULT_LTR or
* UBIDI_DEFAULT_RTL as paraLevel and is preceded by a call to
* ubidi_setContext() which specifies a prologue, the paragraph level will
* be computed taking in consideration the text in the prologue.<p>
*
* When ubidi_setPara() is called without a previous call to
* ubidi_setContext, the main text is handled as if preceded and followed
* by strong directional characters at the current paragraph level.
* Calling ubidi_setContext() with specification of a prologue will change
* this behavior by handling the main text as if preceded by the last
* strong character appearing in the prologue, if any.
* Calling ubidi_setContext() with specification of an epilogue will change
* the behavior of ubidi_setPara() by handling the main text as if followed
* by the first strong character or digit appearing in the epilogue, if any.<p>
*
* Note 1: if <code>ubidi_setContext</code> is called repeatedly without
* calling <code>ubidi_setPara</code>, the earlier calls have no effect,
* only the last call will be remembered for the next call to
* <code>ubidi_setPara</code>.<p>
*
* Note 2: calling <code>ubidi_setContext(pBiDi, NULL, 0, NULL, 0, &errorCode)</code>
* cancels any previous setting of non-empty prologue or epilogue.
* The next call to <code>ubidi_setPara()</code> will process no
* prologue or epilogue.<p>
*
* Note 3: users must be aware that even after setting the context
* before a call to ubidi_setPara() to perform e.g. a logical to visual
* transformation, the resulting string may not be identical to what it
* would have been if all the text, including prologue and epilogue, had
* been processed together.<br>
* Example (upper case letters represent RTL characters):<br>
* &nbsp;&nbsp;prologue = "<code>abc DE</code>"<br>
* &nbsp;&nbsp;epilogue = none<br>
* &nbsp;&nbsp;main text = "<code>FGH xyz</code>"<br>
* &nbsp;&nbsp;paraLevel = UBIDI_LTR<br>
* &nbsp;&nbsp;display without prologue = "<code>HGF xyz</code>"
* ("HGF" is adjacent to "xyz")<br>
* &nbsp;&nbsp;display with prologue = "<code>abc HGFED xyz</code>"
* ("HGF" is not adjacent to "xyz")<br>
*
* @param pBiDi is a paragraph <code>UBiDi</code> object.
*
* @param prologue is a pointer to the text which precedes the text that
* will be specified in a coming call to ubidi_setPara().
* If there is no prologue to consider, then <code>proLength</code>
* must be zero and this pointer can be NULL.
*
* @param proLength is the length of the prologue; if <code>proLength==-1</code>
* then the prologue must be zero-terminated.
* Otherwise proLength must be >= 0. If <code>proLength==0</code>, it means
* that there is no prologue to consider.
*
* @param epilogue is a pointer to the text which follows the text that
* will be specified in a coming call to ubidi_setPara().
* If there is no epilogue to consider, then <code>epiLength</code>
* must be zero and this pointer can be NULL.
*
* @param epiLength is the length of the epilogue; if <code>epiLength==-1</code>
* then the epilogue must be zero-terminated.
* Otherwise epiLength must be >= 0. If <code>epiLength==0</code>, it means
* that there is no epilogue to consider.
*
* @param pErrorCode must be a valid pointer to an error code value.
*
* @see ubidi_setPara
* @draft ICU 4.8
*/
U_DRAFT void U_EXPORT2
ubidi_setContext(UBiDi *pBiDi,
const UChar *prologue, int32_t proLength,
const UChar *epilogue, int32_t epiLength,
UErrorCode *pErrorCode);
/**
* Perform the Unicode Bidi algorithm. It is defined in the
* <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Anned #9</a>,

View File

@ -1,6 +1,6 @@
/********************************************************************
* COPYRIGHT:
* Copyright (c) 1997-2010, International Business Machines Corporation and
* Copyright (c) 1997-2011, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
/* file name: cbiditst.cpp
@ -74,6 +74,8 @@ static void testMultipleParagraphs(void);
static void testGetBaseDirection(void);
static void testContext(void);
/* new BIDI API */
static void testReorderingMode(void);
static void testReorderRunsOnly(void);
@ -118,13 +120,14 @@ addComplexTest(TestNode** root) {
addTest(root, testReorderRunsOnly, "complex/bidi/TestReorderRunsOnly");
addTest(root, testStreaming, "complex/bidi/TestStreaming");
addTest(root, testClassOverride, "complex/bidi/TestClassOverride");
addTest(root, testGetBaseDirection, "complex/bidi/testGetBaseDirection");
addTest(root, testContext, "complex/bidi/testContext");
addTest(root, doArabicShapingTest, "complex/arabic-shaping/ArabicShapingTest");
addTest(root, doLamAlefSpecialVLTRArabicShapingTest, "complex/arabic-shaping/lamalef");
addTest(root, doTashkeelSpecialVLTRArabicShapingTest, "complex/arabic-shaping/tashkeel");
addTest(root, doLOGICALArabicDeShapingTest, "complex/arabic-shaping/unshaping");
addTest(root, doArabicShapingTestForBug5421, "complex/arabic-shaping/bug-5421");
addTest(root, testGetBaseDirection, "complex/bidi/testGetBaseDirection");
}
static void
@ -340,6 +343,7 @@ static int pseudoToU16(const int length, const char * input, UChar * output)
}
for (i = 0; i < length; i++)
output[i] = pseudoToUChar[(uint8_t)input[i]];
output[length] = 0;
return length;
}
@ -1210,8 +1214,8 @@ static void testGetBaseDirection(void) {
for(i=0; i<LENGTHOF(testCases); ++i) {
dir = ubidi_getBaseDirection(testCases[i].s, testCases[i].length );
log_verbose("Testing case %d\tReceived dir %d\n", i, dir);
if (dir != expectedDir[i])
log_err("\nFailed getBaseDirection case %d Expected %d \tReceived %d\n",
if (dir != expectedDir[i])
log_err("\nFailed getBaseDirection case %d Expected %d \tReceived %d\n",
i, expectedDir[i], dir);
}
@ -4045,3 +4049,132 @@ checkMaps(UBiDi *pBiDi, int32_t stringIndex, const char *src, const char *dest,
return testOK;
}
static UBool
assertIllegalArgument(const char* message, UErrorCode* rc) {
if (*rc != U_ILLEGAL_ARGUMENT_ERROR) {
log_err("%s() failed with error %s.\n", message, myErrorName(*rc));
return FALSE;
}
return TRUE;
}
typedef struct {
char* prologue;
char* source;
char* epilogue;
char* expected;
UBiDiLevel paraLevel;
} contextCase;
static const contextCase contextData[] = {
/*00*/ {"", "", "", "", UBIDI_LTR},
/*01*/ {"", ".-=JKL-+*", "", ".-=LKJ-+*", UBIDI_LTR},
/*02*/ {" ", ".-=JKL-+*", " ", ".-=LKJ-+*", UBIDI_LTR},
/*03*/ {"a", ".-=JKL-+*", "b", ".-=LKJ-+*", UBIDI_LTR},
/*04*/ {"D", ".-=JKL-+*", "", "LKJ=-.-+*", UBIDI_LTR},
/*05*/ {"", ".-=JKL-+*", " D", ".-=*+-LKJ", UBIDI_LTR},
/*06*/ {"", ".-=JKL-+*", " 2", ".-=*+-LKJ", UBIDI_LTR},
/*07*/ {"", ".-=JKL-+*", " 7", ".-=*+-LKJ", UBIDI_LTR},
/*08*/ {" G 1", ".-=JKL-+*", " H", "*+-LKJ=-.", UBIDI_LTR},
/*09*/ {"7", ".-=JKL-+*", " H", ".-=*+-LKJ", UBIDI_LTR},
/*10*/ {"", ".-=abc-+*", "", "*+-abc=-.", UBIDI_RTL},
/*11*/ {" ", ".-=abc-+*", " ", "*+-abc=-.", UBIDI_RTL},
/*12*/ {"D", ".-=abc-+*", "G", "*+-abc=-.", UBIDI_RTL},
/*13*/ {"x", ".-=abc-+*", "", "*+-.-=abc", UBIDI_RTL},
/*14*/ {"", ".-=abc-+*", " y", "abc-+*=-.", UBIDI_RTL},
/*15*/ {"", ".-=abc-+*", " 2", "abc-+*=-.", UBIDI_RTL},
/*16*/ {" x 1", ".-=abc-+*", " 2", ".-=abc-+*", UBIDI_RTL},
/*17*/ {" x 7", ".-=abc-+*", " 8", "*+-.-=abc", UBIDI_RTL},
/*18*/ {"x|", ".-=abc-+*", " 8", "*+-abc=-.", UBIDI_RTL},
/*19*/ {"G|y", ".-=abc-+*", " 8", "*+-.-=abc", UBIDI_RTL},
/*20*/ {"", ".-=", "", ".-=", UBIDI_DEFAULT_LTR},
/*21*/ {"D", ".-=", "", "=-.", UBIDI_DEFAULT_LTR},
/*22*/ {"G", ".-=", "", "=-.", UBIDI_DEFAULT_LTR},
/*23*/ {"xG", ".-=", "", ".-=", UBIDI_DEFAULT_LTR},
/*24*/ {"x|G", ".-=", "", "=-.", UBIDI_DEFAULT_LTR},
/*25*/ {"x|G", ".-=|-+*", "", "=-.|-+*", UBIDI_DEFAULT_LTR},
};
#define CONTEXT_COUNT LENGTHOF(contextData)
static void
testContext(void) {
UChar prologue[MAXLEN], epilogue[MAXLEN], src[MAXLEN], dest[MAXLEN];
char destChars[MAXLEN];
UBiDi *pBiDi = NULL;
UErrorCode rc;
int32_t proLength, epiLength, srcLen, destLen, tc;
contextCase cc;
UBool testOK = TRUE;
log_verbose("\nEntering TestContext \n\n");
/* test null BiDi object */
rc = U_ZERO_ERROR;
ubidi_setContext(pBiDi, NULL, 0, NULL, 0, &rc);
testOK &= assertIllegalArgument("Error when BiDi object is null", &rc);
pBiDi = getBiDiObject();
ubidi_orderParagraphsLTR(pBiDi, TRUE);
/* test proLength < -1 */
rc = U_ZERO_ERROR;
ubidi_setContext(pBiDi, NULL, -2, NULL, 0, &rc);
testOK &= assertIllegalArgument("Error when proLength < -1", &rc);
/* test epiLength < -1 */
rc = U_ZERO_ERROR;
ubidi_setContext(pBiDi, NULL, 0, NULL, -2, &rc);
testOK &= assertIllegalArgument("Error when epiLength < -1", &rc);
/* test prologue == NULL */
rc = U_ZERO_ERROR;
ubidi_setContext(pBiDi, NULL, 3, NULL, 0, &rc);
testOK &= assertIllegalArgument("Prologue is NULL", &rc);
/* test epilogue == NULL */
rc = U_ZERO_ERROR;
ubidi_setContext(pBiDi, NULL, 0, NULL, 4, &rc);
testOK &= assertIllegalArgument("Epilogue is NULL", &rc);
for (tc = 0; tc < CONTEXT_COUNT; tc++) {
cc = contextData[tc];
proLength = strlen(cc.prologue);
pseudoToU16(proLength, cc.prologue, prologue);
epiLength = strlen(cc.epilogue);
pseudoToU16(epiLength, cc.epilogue, epilogue);
/* in the call below, prologue and epilogue are swapped to show
that the next call will override this call */
rc = U_ZERO_ERROR;
ubidi_setContext(pBiDi, epilogue, epiLength, prologue, proLength, &rc);
testOK &= assertSuccessful("swapped ubidi_setContext", &rc);
ubidi_setContext(pBiDi, prologue, -1, epilogue, -1, &rc);
testOK &= assertSuccessful("regular ubidi_setContext", &rc);
srcLen = strlen(cc.source);
pseudoToU16(srcLen, cc.source, src);
ubidi_setPara(pBiDi, src, srcLen, cc.paraLevel, NULL, &rc);
testOK &= assertSuccessful("ubidi_setPara", &rc);
destLen = ubidi_writeReordered(pBiDi, dest, MAXLEN, UBIDI_DO_MIRRORING, &rc);
assertSuccessful("ubidi_writeReordered", &rc);
u16ToPseudo(destLen, dest, destChars);
if (uprv_strcmp(cc.expected, destChars)) {
char formatChars[MAXLEN];
log_err("\nActual and expected output mismatch on case %d.\n"
"%20s %s\n%20s %s\n%20s %s\n%20s %s\n%20s %s\n%20s %s\n%20s %d\n%20s %u\n%20s %d\n",
tc,
"Prologue:", cc.prologue,
"Input:", cc.source,
"Epilogue:", cc.epilogue,
"Expected output:", cc.expected,
"Actual output:", destChars,
"Levels:", formatLevels(pBiDi, formatChars),
"Reordering mode:", ubidi_getReorderingMode(pBiDi),
"Paragraph level:", ubidi_getParaLevel(pBiDi),
"Reordering option:", ubidi_getReorderingOptions(pBiDi));
testOK = FALSE;
}
}
if (testOK == TRUE) {
log_verbose("\nContext test OK\n");
}
ubidi_close(pBiDi);
log_verbose("\nExiting TestContext \n\n");
}