ICU-10647 fixes to UBA implementation to make it behave exactly like the reference implementations even for all the known weird cases.
X-SVN-Rev: 34934
This commit is contained in:
parent
281cbf5106
commit
11bda31f78
File diff suppressed because it is too large
Load Diff
@ -1,7 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
******************************************************************************
|
******************************************************************************
|
||||||
*
|
*
|
||||||
* Copyright (C) 1999-2013, International Business Machines
|
* Copyright (C) 1999-2014, International Business Machines
|
||||||
* Corporation and others. All Rights Reserved.
|
* Corporation and others. All Rights Reserved.
|
||||||
*
|
*
|
||||||
******************************************************************************
|
******************************************************************************
|
||||||
@ -17,7 +17,7 @@
|
|||||||
#ifndef UBIDIIMP_H
|
#ifndef UBIDIIMP_H
|
||||||
#define UBIDIIMP_H
|
#define UBIDIIMP_H
|
||||||
|
|
||||||
/* set import/export definitions */
|
/* set import/export definitions */
|
||||||
#ifdef U_COMMON_IMPLEMENTATION
|
#ifdef U_COMMON_IMPLEMENTATION
|
||||||
|
|
||||||
#include "unicode/utypes.h"
|
#include "unicode/utypes.h"
|
||||||
@ -57,24 +57,24 @@ enum {
|
|||||||
LRI=U_LEFT_TO_RIGHT_ISOLATE, /* 20 */
|
LRI=U_LEFT_TO_RIGHT_ISOLATE, /* 20 */
|
||||||
RLI=U_RIGHT_TO_LEFT_ISOLATE, /* 21 */
|
RLI=U_RIGHT_TO_LEFT_ISOLATE, /* 21 */
|
||||||
PDI=U_POP_DIRECTIONAL_ISOLATE, /* 22 */
|
PDI=U_POP_DIRECTIONAL_ISOLATE, /* 22 */
|
||||||
ENL, /* 23 */
|
ENL, /* EN after W7 */ /* 23 */
|
||||||
ENR, /* 24 */
|
ENR, /* EN not subject to W7 */ /* 24 */
|
||||||
dirPropCount
|
dirPropCount
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/* Sometimes, bit values are more appropriate
|
||||||
* Sometimes, bit values are more appropriate
|
to deal with directionality properties.
|
||||||
* to deal with directionality properties.
|
Abbreviations in these macro names refer to names
|
||||||
* Abbreviations in these macro names refer to names
|
used in the BiDi algorithm.
|
||||||
* used in the BiDi algorithm.
|
*/
|
||||||
*/
|
|
||||||
#define DIRPROP_FLAG(dir) (1UL<<(dir))
|
#define DIRPROP_FLAG(dir) (1UL<<(dir))
|
||||||
|
#define PURE_DIRPROP(prop) ((prop)&~0xE0) ?????????????????????????
|
||||||
|
|
||||||
/* special flag for multiple runs from explicit embedding codes */
|
/* special flag for multiple runs from explicit embedding codes */
|
||||||
#define DIRPROP_FLAG_MULTI_RUNS (1UL<<31)
|
#define DIRPROP_FLAG_MULTI_RUNS (1UL<<31)
|
||||||
|
|
||||||
/* are there any characters that are LTR or RTL? */
|
/* are there any characters that are LTR or RTL? */
|
||||||
#define MASK_LTR (DIRPROP_FLAG(L)|DIRPROP_FLAG(EN)|DIRPROP_FLAG(AN)|DIRPROP_FLAG(LRE)|DIRPROP_FLAG(LRO)|DIRPROP_FLAG(LRI))
|
#define MASK_LTR (DIRPROP_FLAG(L)|DIRPROP_FLAG(EN)|DIRPROP_FLAG(ENL)|DIRPROP_FLAG(ENR)|DIRPROP_FLAG(AN)|DIRPROP_FLAG(LRE)|DIRPROP_FLAG(LRO)|DIRPROP_FLAG(LRI))
|
||||||
#define MASK_RTL (DIRPROP_FLAG(R)|DIRPROP_FLAG(AL)|DIRPROP_FLAG(RLE)|DIRPROP_FLAG(RLO)|DIRPROP_FLAG(RLI))
|
#define MASK_RTL (DIRPROP_FLAG(R)|DIRPROP_FLAG(AL)|DIRPROP_FLAG(RLE)|DIRPROP_FLAG(RLO)|DIRPROP_FLAG(RLI))
|
||||||
#define MASK_R_AL (DIRPROP_FLAG(R)|DIRPROP_FLAG(AL))
|
#define MASK_R_AL (DIRPROP_FLAG(R)|DIRPROP_FLAG(AL))
|
||||||
#define MASK_STRONG_EN_AN (DIRPROP_FLAG(L)|DIRPROP_FLAG(R)|DIRPROP_FLAG(AL)|DIRPROP_FLAG(EN)|DIRPROP_FLAG(AN))
|
#define MASK_STRONG_EN_AN (DIRPROP_FLAG(L)|DIRPROP_FLAG(R)|DIRPROP_FLAG(AL)|DIRPROP_FLAG(EN)|DIRPROP_FLAG(AN))
|
||||||
@ -97,9 +97,9 @@ enum {
|
|||||||
#define MASK_POSSIBLE_N (DIRPROP_FLAG(ON)|DIRPROP_FLAG(CS)|DIRPROP_FLAG(ES)|DIRPROP_FLAG(ET)|MASK_WS)
|
#define MASK_POSSIBLE_N (DIRPROP_FLAG(ON)|DIRPROP_FLAG(CS)|DIRPROP_FLAG(ES)|DIRPROP_FLAG(ET)|MASK_WS)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* These types may be changed to "e",
|
* These types may be changed to "e",
|
||||||
* the embedding type (L or R) of the run,
|
* the embedding type (L or R) of the run,
|
||||||
* in the BiDi algorithm (N2)
|
* in the BiDi algorithm (N2)
|
||||||
*/
|
*/
|
||||||
#define MASK_EMBEDDING (DIRPROP_FLAG(NSM)|MASK_POSSIBLE_N)
|
#define MASK_EMBEDDING (DIRPROP_FLAG(NSM)|MASK_POSSIBLE_N)
|
||||||
|
|
||||||
@ -109,17 +109,8 @@ enum {
|
|||||||
#define IS_DEFAULT_LEVEL(level) ((level)>=0xfe)
|
#define IS_DEFAULT_LEVEL(level) ((level)>=0xfe)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The following bit is ORed to the property of directional control
|
* The following bit is used for the directional isolate status.
|
||||||
* characters which are ignored: unmatched PDF or PDI; LRx, RLx or FSI
|
* Stack entries corresponding to isolate sequences are greater than ISOLATE.
|
||||||
* which would exceed the maximum explicit bidi level.
|
|
||||||
*/
|
|
||||||
#define IGNORE_CC 0x40
|
|
||||||
|
|
||||||
#define PURE_DIRPROP(prop) ((prop)&~IGNORE_CC)
|
|
||||||
|
|
||||||
/*
|
|
||||||
* The following bit is used for the directional isolate status.
|
|
||||||
* Stack entries corresponding to isolate sequences are greater than ISOLATE.
|
|
||||||
*/
|
*/
|
||||||
#define ISOLATE 0x0100
|
#define ISOLATE 0x0100
|
||||||
|
|
||||||
@ -168,14 +159,13 @@ typedef struct Opening {
|
|||||||
} Opening;
|
} Opening;
|
||||||
|
|
||||||
typedef struct IsoRun {
|
typedef struct IsoRun {
|
||||||
int32_t lastStrongPos; /* position of last strong char found in this run */
|
int32_t contextPos; /* position of char determining context */
|
||||||
int32_t contextPos; /* position of last char defining context */
|
|
||||||
uint16_t start; /* index of first opening entry for this run */
|
uint16_t start; /* index of first opening entry for this run */
|
||||||
uint16_t limit; /* index after last opening entry for this run */
|
uint16_t limit; /* index after last opening entry for this run */
|
||||||
UBiDiLevel level; /* level of this run */
|
UBiDiLevel level; /* level of this run */
|
||||||
DirProp lastStrong; /* bidi class of last strong char found in this run */
|
DirProp lastStrong; /* bidi class of last strong char found in this run */
|
||||||
|
DirProp lastBase; /* bidi class of last base char found in this run */
|
||||||
UBiDiDirection contextDir; /* L or R to use as context for following openings */
|
UBiDiDirection contextDir; /* L or R to use as context for following openings */
|
||||||
uint8_t filler; /* to complete a nice multiple of 4 chars */
|
|
||||||
} IsoRun;
|
} IsoRun;
|
||||||
|
|
||||||
typedef struct BracketData {
|
typedef struct BracketData {
|
||||||
@ -192,6 +182,7 @@ typedef struct BracketData {
|
|||||||
} BracketData;
|
} BracketData;
|
||||||
|
|
||||||
typedef struct Isolate {
|
typedef struct Isolate {
|
||||||
|
int32_t startON;
|
||||||
int32_t start1;
|
int32_t start1;
|
||||||
int16_t stateImp;
|
int16_t stateImp;
|
||||||
int16_t state;
|
int16_t state;
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
******************************************************************************
|
******************************************************************************
|
||||||
*
|
*
|
||||||
* Copyright (C) 1999-2013, International Business Machines
|
* Copyright (C) 1999-2014, International Business Machines
|
||||||
* Corporation and others. All Rights Reserved.
|
* Corporation and others. All Rights Reserved.
|
||||||
*
|
*
|
||||||
******************************************************************************
|
******************************************************************************
|
||||||
@ -110,7 +110,7 @@ setTrailingWSStart(UBiDi *pBiDi) {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
/* go backwards across all WS, BN, explicit codes */
|
/* go backwards across all WS, BN, explicit codes */
|
||||||
while(start>0 && DIRPROP_FLAG(PURE_DIRPROP(dirProps[start-1]))&MASK_WS) {
|
while(start>0 && DIRPROP_FLAG(dirProps[start-1])&MASK_WS) {
|
||||||
--start;
|
--start;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user