/******************************************************************** * COPYRIGHT: * Copyright (c) 1997-1999, International Business Machines Corporation and * others. All Rights Reserved. ********************************************************************/ /* file name: cbiditst.cpp * encoding: US-ASCII * tab size: 8 (not used) * indentation:4 * * created on: 1999sep27 * created by: Markus W. Scherer */ #include "cintltst.h" #include "unicode/utypes.h" #include "unicode/uchar.h" #include "unicode/ustring.h" #include "unicode/ubidi.h" #include "unicode/ushape.h" #include "cmemory.h" #include "cbiditst.h" #define LENGTHOF(array) (sizeof(array)/sizeof((array)[0])) /* prototypes ---------------------------------------------------------------*/ static void doBiDiTest(void); static void doTests(UBiDi *pBiDi, UBiDi *pLine); static void doTest(UBiDi *pBiDi, int testNumber, BiDiTestData *test, UTextOffset lineStart); static void testReordering(UBiDi *pBiDi, int testNumber); static void doInverseBiDiTest(void); static void testManyInverseBiDi(UBiDi *pBiDi, UBiDiLevel direction); static void testInverseBiDi(UBiDi *pBiDi, const UChar *src, int32_t srcLength, UBiDiLevel direction, UErrorCode *pErrorCode); static void testWriteReverse(void); static void doArabicShapingTest(void); /* helpers ------------------------------------------------------------------ */ static const char *levelString="..............................................................."; static UChar * getStringFromDirProps(const uint8_t *dirProps, UTextOffset length); static void printUnicode(const UChar *s, int32_t length, const UBiDiLevel *levels); /* regression tests ---------------------------------------------------------*/ extern void addComplexTest(TestNode** root) { addTest(root, doBiDiTest, "complex/bidi"); addTest(root, doInverseBiDiTest, "complex/invbidi"); addTest(root, doArabicShapingTest, "complex/arabic-shaping"); } static void doBiDiTest() { UBiDi *pBiDi, *pLine=NULL; UErrorCode errorCode=U_ZERO_ERROR; log_verbose("*** bidi regression test ***\n"); pBiDi=ubidi_openSized(MAX_STRING_LENGTH, 0, &errorCode); if(pBiDi!=NULL) { pLine=ubidi_open(); if(pLine!=NULL) { doTests(pBiDi, pLine); } else { log_err("ubidi_open() returned NULL, out of memory\n"); } } else { log_err("ubidi_openSized() returned NULL, errorCode %s\n", myErrorName(errorCode)); } if(pLine!=NULL) { ubidi_close(pLine); } if(pBiDi!=NULL) { ubidi_close(pBiDi); } log_verbose("*** bidi regression test finished ***\n"); } static void doTests(UBiDi *pBiDi, UBiDi *pLine) { int i; UChar *s; UErrorCode errorCode; UTextOffset lineStart; UBiDiLevel paraLevel; for(i=0; itext+lineStart; const UBiDiLevel *levels=test->levels; const uint8_t *visualMap=test->visualMap; UTextOffset i, len=ubidi_getLength(pBiDi), logicalIndex, runCount; UErrorCode errorCode=U_ZERO_ERROR; UBiDiLevel level, level2; testReordering(pBiDi, testNumber); for(i=0; i0) { log_verbose(","); } log_verbose(" %d", ubidi_getLevelAt(pBiDi, i)); } log_verbose("\n--reordered:"); for(i=0; i0) { log_verbose(","); } log_verbose(" %d", ubidi_getVisualIndex(pBiDi, i, &errorCode)); } log_verbose("\n"); if(test->direction!=ubidi_getDirection(pBiDi)) { log_err("ubidi_getDirection(tests[%d]): wrong direction %d\n", testNumber, ubidi_getDirection(pBiDi)); } if(test->resultLevel!=ubidi_getParaLevel(pBiDi)) { log_err("ubidi_getParaLevel(tests[%d]): wrong paragraph level %d\n", testNumber, ubidi_getParaLevel(pBiDi)); } for(i=0; i0); } else { logicalStart+=runLength; /* logicalLimit */ do { /* RTL */ visualMap4[visualIndex++]=--logicalStart; } while(--runLength>0); } } /* print all the maps */ log_verbose("logical maps:\n"); for(i=0; ilogical->visual did not roundtrip the text;\n" " turn on verbose mode to see details\n"); } } static void testWriteReverse() { /* U+064e and U+0650 are combining marks (Mn) */ static const UChar forward[]={ 0x200f, 0x627, 0x64e, 0x650, 0x20, 0x28, 0x31, 0x29 }, reverseKeepCombining[]={ 0x29, 0x31, 0x28, 0x20, 0x627, 0x64e, 0x650, 0x200f }, reverseRemoveControlsKeepCombiningDoMirror[]={ 0x28, 0x31, 0x29, 0x20, 0x627, 0x64e, 0x650 }; static UChar reverse[10]; UErrorCode errorCode; int32_t length; /* test ubidi_writeReverse() with "interesting" options */ errorCode=U_ZERO_ERROR; length=ubidi_writeReverse(forward, LENGTHOF(forward), reverse, LENGTHOF(reverse), UBIDI_KEEP_BASE_COMBINING, &errorCode); if(U_FAILURE(errorCode) || length!=LENGTHOF(reverseKeepCombining) || uprv_memcmp(reverse, reverseKeepCombining, length*U_SIZEOF_UCHAR)!=0) { log_err("failure in ubidi_writeReverse(UBIDI_KEEP_BASE_COMBINING): length=%d (should be %d), error code %s\n", length, LENGTHOF(reverseKeepCombining), u_errorName(errorCode)); } uprv_memset(reverse, 0xa5, LENGTHOF(reverse)*U_SIZEOF_UCHAR); errorCode=U_ZERO_ERROR; length=ubidi_writeReverse(forward, LENGTHOF(forward), reverse, LENGTHOF(reverse), UBIDI_REMOVE_BIDI_CONTROLS|UBIDI_DO_MIRRORING|UBIDI_KEEP_BASE_COMBINING, &errorCode); if(U_FAILURE(errorCode) || length!=LENGTHOF(reverseRemoveControlsKeepCombiningDoMirror) || uprv_memcmp(reverse, reverseRemoveControlsKeepCombiningDoMirror, length*U_SIZEOF_UCHAR)!=0) { log_err("failure in ubidi_writeReverse(UBIDI_REMOVE_BIDI_CONTROLS|UBIDI_DO_MIRRORING|UBIDI_KEEP_BASE_COMBINING):\n" " length=%d (should be %d), error code %s\n", length, LENGTHOF(reverseRemoveControlsKeepCombiningDoMirror), u_errorName(errorCode)); } } /* arabic shaping ----------------------------------------------------------- */ static void doArabicShapingTest() { static const UChar source[]={ 0x31, /* en:1 */ 0x627, /* arabic:alef */ 0x32, /* en:2 */ 0x6f3, /* an:3 */ 0x61, /* latin:a */ 0x34, /* en:4 */ 0 }, en2an[]={ 0x661, 0x627, 0x662, 0x6f3, 0x61, 0x664, 0 }, an2en[]={ 0x31, 0x627, 0x32, 0x33, 0x61, 0x34, 0 }, logical_alen2an_init_lr[]={ 0x31, 0x627, 0x662, 0x6f3, 0x61, 0x34, 0 }, logical_alen2an_init_al[]={ 0x6f1, 0x627, 0x6f2, 0x6f3, 0x61, 0x34, 0 }, reverse_alen2an_init_lr[]={ 0x661, 0x627, 0x32, 0x6f3, 0x61, 0x34, 0 }, reverse_alen2an_init_al[]={ 0x6f1, 0x627, 0x32, 0x6f3, 0x61, 0x6f4, 0 }; UChar dest[8]; UErrorCode errorCode; int32_t length; /* test number shaping */ /* european->arabic */ errorCode=U_ZERO_ERROR; length=u_shapeArabic(source, LENGTHOF(source), dest, LENGTHOF(dest), U_SHAPE_DIGITS_EN2AN|U_SHAPE_DIGIT_TYPE_AN, &errorCode); if(U_FAILURE(errorCode) || length!=LENGTHOF(source) || uprv_memcmp(dest, en2an, length*U_SIZEOF_UCHAR)!=0) { log_err("failure in u_shapeArabic(en2an)\n"); } /* arabic->european */ errorCode=U_ZERO_ERROR; length=u_shapeArabic(source, -1, dest, LENGTHOF(dest), U_SHAPE_DIGITS_AN2EN|U_SHAPE_DIGIT_TYPE_AN_EXTENDED, &errorCode); if(U_FAILURE(errorCode) || length!=u_strlen(source) || uprv_memcmp(dest, an2en, length*U_SIZEOF_UCHAR)!=0) { log_err("failure in u_shapeArabic(an2en)\n"); } /* european->arabic with context, logical order, initial state not AL */ errorCode=U_ZERO_ERROR; length=u_shapeArabic(source, LENGTHOF(source), dest, LENGTHOF(dest), U_SHAPE_DIGITS_ALEN2AN_INIT_LR|U_SHAPE_DIGIT_TYPE_AN, &errorCode); if(U_FAILURE(errorCode) || length!=LENGTHOF(source) || uprv_memcmp(dest, logical_alen2an_init_lr, length*U_SIZEOF_UCHAR)!=0) { log_err("failure in u_shapeArabic(logical_alen2an_init_lr)\n"); } /* european->arabic with context, logical order, initial state AL */ errorCode=U_ZERO_ERROR; length=u_shapeArabic(source, LENGTHOF(source), dest, LENGTHOF(dest), U_SHAPE_DIGITS_ALEN2AN_INIT_AL|U_SHAPE_DIGIT_TYPE_AN_EXTENDED, &errorCode); if(U_FAILURE(errorCode) || length!=LENGTHOF(source) || uprv_memcmp(dest, logical_alen2an_init_al, length*U_SIZEOF_UCHAR)!=0) { log_err("failure in u_shapeArabic(logical_alen2an_init_al)\n"); } /* european->arabic with context, reverse order, initial state not AL */ errorCode=U_ZERO_ERROR; length=u_shapeArabic(source, LENGTHOF(source), dest, LENGTHOF(dest), U_SHAPE_DIGITS_ALEN2AN_INIT_LR|U_SHAPE_DIGIT_TYPE_AN|U_SHAPE_TEXT_DIRECTION_VISUAL_LTR, &errorCode); if(U_FAILURE(errorCode) || length!=LENGTHOF(source) || uprv_memcmp(dest, reverse_alen2an_init_lr, length*U_SIZEOF_UCHAR)!=0) { log_err("failure in u_shapeArabic(reverse_alen2an_init_lr)\n"); } /* european->arabic with context, reverse order, initial state AL */ errorCode=U_ZERO_ERROR; length=u_shapeArabic(source, LENGTHOF(source), dest, LENGTHOF(dest), U_SHAPE_DIGITS_ALEN2AN_INIT_AL|U_SHAPE_DIGIT_TYPE_AN_EXTENDED|U_SHAPE_TEXT_DIRECTION_VISUAL_LTR, &errorCode); if(U_FAILURE(errorCode) || length!=LENGTHOF(source) || uprv_memcmp(dest, reverse_alen2an_init_al, length*U_SIZEOF_UCHAR)!=0) { log_err("failure in u_shapeArabic(reverse_alen2an_init_al)\n"); } /* test noop */ errorCode=U_ZERO_ERROR; length=u_shapeArabic(source, LENGTHOF(source), dest, LENGTHOF(dest), 0, &errorCode); if(U_FAILURE(errorCode) || length!=LENGTHOF(source) || uprv_memcmp(dest, source, length*U_SIZEOF_UCHAR)!=0) { log_err("failure in u_shapeArabic(noop)\n"); } errorCode=U_ZERO_ERROR; length=u_shapeArabic(source, 0, dest, LENGTHOF(dest), U_SHAPE_DIGITS_EN2AN|U_SHAPE_DIGIT_TYPE_AN, &errorCode); if(U_FAILURE(errorCode) || length!=0) { log_err("failure in u_shapeArabic(en2an, sourceLength=0), returned %d/%s\n", u_errorName(errorCode), LENGTHOF(source)); } /* preflight digit shaping */ errorCode=U_ZERO_ERROR; length=u_shapeArabic(source, LENGTHOF(source), NULL, 0, U_SHAPE_DIGITS_EN2AN|U_SHAPE_DIGIT_TYPE_AN, &errorCode); if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=LENGTHOF(source)) { log_err("failure in u_shapeArabic(en2an preflighting), returned %d/%s instead of %d/U_BUFFER_OVERFLOW_ERROR\n", length, u_errorName(errorCode), LENGTHOF(source)); } /* test illegal arguments */ errorCode=U_ZERO_ERROR; length=u_shapeArabic(NULL, LENGTHOF(source), dest, LENGTHOF(dest), U_SHAPE_DIGITS_EN2AN|U_SHAPE_DIGIT_TYPE_AN, &errorCode); if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { log_err("failure in u_shapeArabic(source=NULL), returned %s instead of U_ILLEGAL_ARGUMENT_ERROR\n", u_errorName(errorCode)); } errorCode=U_ZERO_ERROR; length=u_shapeArabic(source, -2, dest, LENGTHOF(dest), U_SHAPE_DIGITS_EN2AN|U_SHAPE_DIGIT_TYPE_AN, &errorCode); if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { log_err("failure in u_shapeArabic(sourceLength=-2), returned %s instead of U_ILLEGAL_ARGUMENT_ERROR\n", u_errorName(errorCode)); } errorCode=U_ZERO_ERROR; length=u_shapeArabic(source, LENGTHOF(source), NULL, LENGTHOF(dest), U_SHAPE_DIGITS_EN2AN|U_SHAPE_DIGIT_TYPE_AN, &errorCode); if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { log_err("failure in u_shapeArabic(dest=NULL), returned %s instead of U_ILLEGAL_ARGUMENT_ERROR\n", u_errorName(errorCode)); } errorCode=U_ZERO_ERROR; length=u_shapeArabic(source, LENGTHOF(source), dest, -1, U_SHAPE_DIGITS_EN2AN|U_SHAPE_DIGIT_TYPE_AN, &errorCode); if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { log_err("failure in u_shapeArabic(destSize=-1), returned %s instead of U_ILLEGAL_ARGUMENT_ERROR\n", u_errorName(errorCode)); } errorCode=U_ZERO_ERROR; length=u_shapeArabic(source, LENGTHOF(source), dest, LENGTHOF(dest), U_SHAPE_LENGTH_RESERVED|U_SHAPE_DIGITS_EN2AN|U_SHAPE_DIGIT_TYPE_AN, &errorCode); if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { log_err("failure in u_shapeArabic(U_SHAPE_LENGTH_RESERVED), returned %s instead of U_ILLEGAL_ARGUMENT_ERROR\n", u_errorName(errorCode)); } errorCode=U_ZERO_ERROR; length=u_shapeArabic(source, LENGTHOF(source), dest, LENGTHOF(dest), U_SHAPE_LETTERS_RESERVED|U_SHAPE_DIGITS_EN2AN|U_SHAPE_DIGIT_TYPE_AN, &errorCode); if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { log_err("failure in u_shapeArabic(U_SHAPE_LETTERS_RESERVED), returned %s instead of U_ILLEGAL_ARGUMENT_ERROR\n", u_errorName(errorCode)); } errorCode=U_ZERO_ERROR; length=u_shapeArabic(source, LENGTHOF(source), dest, LENGTHOF(dest), U_SHAPE_DIGITS_RESERVED|U_SHAPE_DIGIT_TYPE_AN, &errorCode); if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { log_err("failure in u_shapeArabic(U_SHAPE_DIGITS_RESERVED), returned %s instead of U_ILLEGAL_ARGUMENT_ERROR\n", u_errorName(errorCode)); } errorCode=U_ZERO_ERROR; length=u_shapeArabic(source, LENGTHOF(source), dest, LENGTHOF(dest), U_SHAPE_DIGITS_EN2AN|U_SHAPE_DIGIT_TYPE_RESERVED, &errorCode); if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { log_err("failure in u_shapeArabic(U_SHAPE_DIGIT_TYPE_RESERVED), returned %s instead of U_ILLEGAL_ARGUMENT_ERROR\n", u_errorName(errorCode)); } errorCode=U_ZERO_ERROR; length=u_shapeArabic(source, LENGTHOF(source), (UChar *)(source+2), LENGTHOF(dest), /* overlap source and destination */ U_SHAPE_DIGITS_EN2AN|U_SHAPE_DIGIT_TYPE_AN, &errorCode); if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { log_err("failure in u_shapeArabic(U_SHAPE_DIGIT_TYPE_RESERVED), returned %s instead of U_ILLEGAL_ARGUMENT_ERROR\n", u_errorName(errorCode)); } /* test that letter shaping sets "unsupported" */ errorCode=U_ZERO_ERROR; length=u_shapeArabic(source, LENGTHOF(source), dest, LENGTHOF(dest), U_SHAPE_LETTERS_SHAPE, &errorCode); if(errorCode!=U_UNSUPPORTED_ERROR) { log_err("u_shapeArabic(shape letters) does not return U_UNSUPPORTED_ERROR but %s\n", u_errorName(errorCode)); } } /* helpers ------------------------------------------------------------------ */ /* return a string with characters according to the desired directional properties */ static UChar * getStringFromDirProps(const uint8_t *dirProps, UTextOffset length) { static UChar s[MAX_STRING_LENGTH]; UTextOffset i; /* this part would have to be modified for UTF-x */ for(i=0; i