98ab23527c
X-SVN-Rev: 19750
318 lines
11 KiB
C
318 lines
11 KiB
C
/*
|
|
******************************************************************************
|
|
*
|
|
* Copyright (C) 1999-2006, International Business Machines
|
|
* Corporation and others. All Rights Reserved.
|
|
*
|
|
******************************************************************************
|
|
* file name: ubidiimp.h
|
|
* encoding: US-ASCII
|
|
* tab size: 8 (not used)
|
|
* indentation:4
|
|
*
|
|
* created on: 1999aug06
|
|
* created by: Markus W. Scherer
|
|
*/
|
|
|
|
#ifndef UBIDIIMP_H
|
|
#define UBIDIIMP_H
|
|
|
|
/* set import/export definitions */
|
|
#ifdef U_COMMON_IMPLEMENTATION
|
|
|
|
#include "unicode/utypes.h"
|
|
#include "unicode/uchar.h"
|
|
#include "ubidi_props.h"
|
|
|
|
/* miscellaneous definitions ---------------------------------------------- */
|
|
|
|
typedef uint8_t DirProp;
|
|
typedef uint32_t Flags;
|
|
|
|
/* Comparing the description of the BiDi algorithm with this implementation
|
|
is easier with the same names for the BiDi types in the code as there.
|
|
See UCharDirection in uchar.h .
|
|
*/
|
|
enum {
|
|
L= U_LEFT_TO_RIGHT,
|
|
R= U_RIGHT_TO_LEFT,
|
|
EN= U_EUROPEAN_NUMBER,
|
|
ES= U_EUROPEAN_NUMBER_SEPARATOR,
|
|
ET= U_EUROPEAN_NUMBER_TERMINATOR,
|
|
AN= U_ARABIC_NUMBER,
|
|
CS= U_COMMON_NUMBER_SEPARATOR,
|
|
B= U_BLOCK_SEPARATOR,
|
|
S= U_SEGMENT_SEPARATOR,
|
|
WS= U_WHITE_SPACE_NEUTRAL,
|
|
ON= U_OTHER_NEUTRAL,
|
|
LRE=U_LEFT_TO_RIGHT_EMBEDDING,
|
|
LRO=U_LEFT_TO_RIGHT_OVERRIDE,
|
|
AL= U_RIGHT_TO_LEFT_ARABIC,
|
|
RLE=U_RIGHT_TO_LEFT_EMBEDDING,
|
|
RLO=U_RIGHT_TO_LEFT_OVERRIDE,
|
|
PDF=U_POP_DIRECTIONAL_FORMAT,
|
|
NSM=U_DIR_NON_SPACING_MARK,
|
|
BN= U_BOUNDARY_NEUTRAL,
|
|
dirPropCount
|
|
};
|
|
|
|
/*
|
|
* Sometimes, bit values are more appropriate
|
|
* to deal with directionality properties.
|
|
* Abbreviations in these macro names refer to names
|
|
* used in the BiDi algorithm.
|
|
*/
|
|
#define DIRPROP_FLAG(dir) (1UL<<(dir))
|
|
|
|
/* special flag for multiple runs from explicit embedding codes */
|
|
#define DIRPROP_FLAG_MULTI_RUNS (1UL<<31)
|
|
|
|
/* are there any characters that are LTR or RTL? */
|
|
#define MASK_LTR (DIRPROP_FLAG(L)|DIRPROP_FLAG(EN)|DIRPROP_FLAG(AN)|DIRPROP_FLAG(LRE)|DIRPROP_FLAG(LRO))
|
|
#define MASK_RTL (DIRPROP_FLAG(R)|DIRPROP_FLAG(AL)|DIRPROP_FLAG(RLE)|DIRPROP_FLAG(RLO))
|
|
|
|
/* explicit embedding codes */
|
|
#define MASK_LRX (DIRPROP_FLAG(LRE)|DIRPROP_FLAG(LRO))
|
|
#define MASK_RLX (DIRPROP_FLAG(RLE)|DIRPROP_FLAG(RLO))
|
|
#define MASK_OVERRIDE (DIRPROP_FLAG(LRO)|DIRPROP_FLAG(RLO))
|
|
|
|
#define MASK_EXPLICIT (MASK_LRX|MASK_RLX|DIRPROP_FLAG(PDF))
|
|
#define MASK_BN_EXPLICIT (DIRPROP_FLAG(BN)|MASK_EXPLICIT)
|
|
|
|
/* paragraph and segment separators */
|
|
#define MASK_B_S (DIRPROP_FLAG(B)|DIRPROP_FLAG(S))
|
|
|
|
/* all types that are counted as White Space or Neutral in some steps */
|
|
#define MASK_WS (MASK_B_S|DIRPROP_FLAG(WS)|MASK_BN_EXPLICIT)
|
|
#define MASK_N (DIRPROP_FLAG(ON)|MASK_WS)
|
|
|
|
/* all types that are included in a sequence of European Terminators for (W5) */
|
|
#define MASK_ET_NSM_BN (DIRPROP_FLAG(ET)|DIRPROP_FLAG(NSM)|MASK_BN_EXPLICIT)
|
|
|
|
/* types that are neutrals or could becomes neutrals in (Wn) */
|
|
#define MASK_POSSIBLE_N (DIRPROP_FLAG(CS)|DIRPROP_FLAG(ES)|DIRPROP_FLAG(ET)|MASK_N)
|
|
|
|
/*
|
|
* These types may be changed to "e",
|
|
* the embedding type (L or R) of the run,
|
|
* in the BiDi algorithm (N2)
|
|
*/
|
|
#define MASK_EMBEDDING (DIRPROP_FLAG(NSM)|MASK_POSSIBLE_N)
|
|
|
|
/* the dirProp's L and R are defined to 0 and 1 values in UCharDirection */
|
|
#define GET_LR_FROM_LEVEL(level) ((DirProp)((level)&1))
|
|
|
|
#define IS_DEFAULT_LEVEL(level) ((level)>=0xfe)
|
|
|
|
/*
|
|
* The following bit is ORed to the property of characters in paragraphs
|
|
* with contextual RTL direction when paraLevel is contextual.
|
|
*/
|
|
#define CONTEXT_RTL 0x80
|
|
#define NO_CONTEXT_RTL(dir) ((dir)&~CONTEXT_RTL)
|
|
/*
|
|
* The following is a variant of DIRPROP_FLAG which ignores the CONTEXT_RTL bit.
|
|
*/
|
|
#define DIRPROP_FLAG_NC(dir) (1UL<<(NO_CONTEXT_RTL(dir)))
|
|
|
|
#define GET_PARALEVEL(ubidi, index) \
|
|
(UBiDiLevel)((ubidi)->defaultParaLevel ? (ubidi)->dirProps[index]>>7 \
|
|
: (ubidi)->paraLevel)
|
|
|
|
/* Paragraph type for multiple paragraph support ---------------------------- */
|
|
typedef int32_t Para;
|
|
|
|
#define CR 0x000D
|
|
#define LF 0x000A
|
|
|
|
/* Run structure for reordering --------------------------------------------- */
|
|
|
|
typedef struct Run {
|
|
int32_t logicalStart, /* first character of the run; b31 indicates even/odd level */
|
|
visualLimit; /* last visual position of the run +1 */
|
|
} Run;
|
|
|
|
/* in a Run, logicalStart will get this bit set if the run level is odd */
|
|
#define INDEX_ODD_BIT (1UL<<31)
|
|
|
|
#define MAKE_INDEX_ODD_PAIR(index, level) (index|((int32_t)level<<31))
|
|
#define ADD_ODD_BIT_FROM_LEVEL(x, level) ((x)|=((int32_t)level<<31))
|
|
#define REMOVE_ODD_BIT(x) ((x)&=~INDEX_ODD_BIT)
|
|
|
|
#define GET_INDEX(x) (x&~INDEX_ODD_BIT)
|
|
#define GET_ODD_BIT(x) ((uint32_t)x>>31)
|
|
#define IS_ODD_RUN(x) ((x&INDEX_ODD_BIT)!=0)
|
|
#define IS_EVEN_RUN(x) ((x&INDEX_ODD_BIT)==0)
|
|
|
|
U_CFUNC UBool
|
|
ubidi_getRuns(UBiDi *pBiDi);
|
|
|
|
/* InsertPoints structure for noting where to put BiDi marks ---------------- */
|
|
|
|
typedef struct Point {
|
|
int32_t pos; /* position in text */
|
|
UChar c; /* UChar to insert */
|
|
char where; /* BEFORE or AFTER */
|
|
char filler; /* pack to 8 bytes */
|
|
} Point;
|
|
|
|
typedef struct InsertPoints {
|
|
int32_t capacity; /* number of points allocated */
|
|
int32_t size; /* number of points used */
|
|
int32_t confirmed; /* number of points confirmed */
|
|
UErrorCode errorCode; /* for eventual memory shortage */
|
|
Point *points; /* pointer to array of points */
|
|
} InsertPoints;
|
|
|
|
|
|
/* UBiDi structure ----------------------------------------------------------- */
|
|
|
|
struct UBiDi {
|
|
/* pointer to parent paragraph object (pointer to self if this object is
|
|
* a paragraph object); set to NULL in a newly opened object; set to a
|
|
* real value after a successful execution of ubidi_setPara or ubidi_setLine
|
|
*/
|
|
const UBiDi * pParaBiDi;
|
|
|
|
const UBiDiProps *bdp;
|
|
|
|
/* alias pointer to the current text */
|
|
const UChar *text;
|
|
|
|
/* length of the current text */
|
|
int32_t originalLength;
|
|
|
|
/* if the UBIDI_OPTION_STREAMING option is set, this is the length
|
|
* of text actually processed by ubidi_setPara, which may be shorter than
|
|
* the original length.
|
|
* Otherwise, it is identical to the original length.
|
|
*/
|
|
int32_t length;
|
|
|
|
/* if the UBIDI_OPTION_REMOVE_CONTROLS option is set, and/or
|
|
* marks are allowed to be inserted in one of the reordering mode, the
|
|
* length of the result string may be different from the processed length.
|
|
*/
|
|
int32_t resultLength;
|
|
|
|
/* memory sizes in bytes */
|
|
int32_t dirPropsSize, levelsSize, parasSize, runsSize;
|
|
|
|
/* allocated memory */
|
|
DirProp *dirPropsMemory;
|
|
UBiDiLevel *levelsMemory;
|
|
Para *parasMemory;
|
|
Run *runsMemory;
|
|
|
|
/* indicators for whether memory may be allocated after ubidi_open() */
|
|
UBool mayAllocateText, mayAllocateRuns;
|
|
|
|
/* arrays with one value per text-character */
|
|
const DirProp *dirProps;
|
|
UBiDiLevel *levels;
|
|
|
|
/* are we performing an approximation of the "inverse BiDi" algorithm? */
|
|
UBool isInverse;
|
|
|
|
/* are we using the basic algorithm or its variation? */
|
|
UBiDiReorderingMode reorderingMode;
|
|
|
|
/* UBIDI_REORDER_xxx values must be ordered so that all the regular
|
|
* logical to visual modes come first, and all inverse BiDi modes
|
|
* come last.
|
|
*/
|
|
#define UBIDI_REORDER_LAST_LOGICAL_TO_VISUAL UBIDI_REORDER_NUMBERS_SPECIAL
|
|
|
|
/* bitmask for reordering options */
|
|
uint16_t reorderingOptions;
|
|
|
|
/* must block separators receive level 0? */
|
|
UBool orderParagraphsLTR;
|
|
|
|
/* the paragraph level */
|
|
UBiDiLevel paraLevel;
|
|
/* original paraLevel when contextual */
|
|
/* must be one of UBIDI_DEFAULT_xxx or 0 if not contextual */
|
|
UBiDiLevel defaultParaLevel;
|
|
|
|
/* the following is set in ubidi_setPara, used in processPropertySeq */
|
|
const struct ImpTabPair * pImpTabPair; /* pointer to levels state table pair */
|
|
|
|
/* the overall paragraph or line directionality - see UBiDiDirection */
|
|
UBiDiDirection direction;
|
|
|
|
/* flags is a bit set for which directional properties are in the text */
|
|
Flags flags;
|
|
|
|
/* characters after trailingWSStart are WS and are */
|
|
/* implicitly at the paraLevel (rule (L1)) - levels may not reflect that */
|
|
int32_t trailingWSStart;
|
|
|
|
/* fields for paragraph handling */
|
|
int32_t paraCount; /* set in getDirProps() */
|
|
Para *paras; /* limits of paragraphs, filled in
|
|
ResolveExplicitLevels() or CheckExplicitLevels() */
|
|
|
|
/* for single paragraph text, we only need a tiny array of paras (no malloc()) */
|
|
Para simpleParas[1];
|
|
|
|
/* fields for line reordering */
|
|
int32_t runCount; /* ==-1: runs not set up yet */
|
|
Run *runs;
|
|
|
|
/* for non-mixed text, we only need a tiny array of runs (no malloc()) */
|
|
Run simpleRuns[1];
|
|
|
|
/* for inverse Bidi with insertion of directional marks */
|
|
InsertPoints insertPoints;
|
|
|
|
/* for option UBIDI_OPTION_REMOVE_CONTROLS */
|
|
int32_t countBiDiControls;
|
|
|
|
/* for Bidi class callback */
|
|
UBiDiClassCallback *fnClassCallback; /* action pointer */
|
|
const void *coClassCallback; /* context pointer */
|
|
};
|
|
|
|
#define IS_VALID_PARA(x) ((x) && ((x)->pParaBiDi==(x)))
|
|
#define IS_VALID_LINE(x) ((x) && ((x)->pParaBiDi) && ((x)->pParaBiDi->pParaBiDi==(x)->pParaBiDi))
|
|
#define IS_VALID_PARA_OR_LINE(x) ((x) && ((x)->pParaBiDi==(x) || (((x)->pParaBiDi) && (x)->pParaBiDi->pParaBiDi==(x)->pParaBiDi)))
|
|
|
|
/* helper function to (re)allocate memory if allowed */
|
|
U_CFUNC UBool
|
|
ubidi_getMemory(void **pMemory, int32_t *pSize, UBool mayAllocate, int32_t sizeNeeded);
|
|
|
|
/* helper macros for each allocated array in UBiDi */
|
|
#define getDirPropsMemory(pBiDi, length) \
|
|
ubidi_getMemory((void **)&(pBiDi)->dirPropsMemory, &(pBiDi)->dirPropsSize, \
|
|
(pBiDi)->mayAllocateText, (length))
|
|
|
|
#define getLevelsMemory(pBiDi, length) \
|
|
ubidi_getMemory((void **)&(pBiDi)->levelsMemory, &(pBiDi)->levelsSize, \
|
|
(pBiDi)->mayAllocateText, (length))
|
|
|
|
#define getRunsMemory(pBiDi, length) \
|
|
ubidi_getMemory((void **)&(pBiDi)->runsMemory, &(pBiDi)->runsSize, \
|
|
(pBiDi)->mayAllocateRuns, (length)*sizeof(Run))
|
|
|
|
/* additional macros used by ubidi_open() - always allow allocation */
|
|
#define getInitialDirPropsMemory(pBiDi, length) \
|
|
ubidi_getMemory((void **)&(pBiDi)->dirPropsMemory, &(pBiDi)->dirPropsSize, \
|
|
TRUE, (length))
|
|
|
|
#define getInitialLevelsMemory(pBiDi, length) \
|
|
ubidi_getMemory((void **)&(pBiDi)->levelsMemory, &(pBiDi)->levelsSize, \
|
|
TRUE, (length))
|
|
|
|
#define getInitialParasMemory(pBiDi, length) \
|
|
ubidi_getMemory((void **)&(pBiDi)->parasMemory, &(pBiDi)->parasSize, \
|
|
TRUE, (length)*sizeof(Para))
|
|
|
|
#define getInitialRunsMemory(pBiDi, length) \
|
|
ubidi_getMemory((void **)&(pBiDi)->runsMemory, &(pBiDi)->runsSize, \
|
|
TRUE, (length)*sizeof(Run))
|
|
|
|
#endif
|
|
|
|
#endif
|