move the default text breaking algorithm impl from HarfBuzz to Qt
there are several reasons to do this: * text breaking is not a shaper's job; * since the text breaking rules are bound to a specific Unicode version, updating Qt's internal unicode data would require updating the data in HB as well; * makes porting to HurfBuzz-NG some easier Change-Id: I0bbf8e8a343bc074696f4ddf2ae4e7fa32a61629 Reviewed-by: Lars Knoll <lars.knoll@nokia.com>
This commit is contained in:
parent
8c0048a377
commit
cbfdec6603
67
src/3rdparty/harfbuzz/src/harfbuzz-external.h
vendored
67
src/3rdparty/harfbuzz/src/harfbuzz-external.h
vendored
@ -34,23 +34,7 @@ HB_BEGIN_HEADER
|
||||
They need to be provided by the application/library
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
see http://www.unicode.org/reports/tr14/tr14-19.html
|
||||
we don't use the XX, AI and CB properties and map them to AL instead.
|
||||
as we don't support any EBDIC based OS'es, NL is ignored and mapped to AL as well.
|
||||
*/
|
||||
typedef enum {
|
||||
HB_LineBreak_OP, HB_LineBreak_CL, HB_LineBreak_QU, HB_LineBreak_GL, HB_LineBreak_NS,
|
||||
HB_LineBreak_EX, HB_LineBreak_SY, HB_LineBreak_IS, HB_LineBreak_PR, HB_LineBreak_PO,
|
||||
HB_LineBreak_NU, HB_LineBreak_AL, HB_LineBreak_ID, HB_LineBreak_IN, HB_LineBreak_HY,
|
||||
HB_LineBreak_BA, HB_LineBreak_BB, HB_LineBreak_B2, HB_LineBreak_ZW, HB_LineBreak_CM,
|
||||
HB_LineBreak_WJ, HB_LineBreak_H2, HB_LineBreak_H3, HB_LineBreak_JL, HB_LineBreak_JV,
|
||||
HB_LineBreak_JT, HB_LineBreak_SA, HB_LineBreak_SG,
|
||||
HB_LineBreak_SP, HB_LineBreak_CR, HB_LineBreak_LF, HB_LineBreak_BK
|
||||
} HB_LineBreakClass;
|
||||
|
||||
typedef enum
|
||||
typedef enum
|
||||
{
|
||||
HB_Mark_NonSpacing, /* Mn */
|
||||
HB_Mark_SpacingCombining, /* Mc */
|
||||
@ -90,55 +74,6 @@ typedef enum
|
||||
HB_Symbol_Other /* So */
|
||||
} HB_CharCategory;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
HB_Grapheme_Other,
|
||||
HB_Grapheme_CR,
|
||||
HB_Grapheme_LF,
|
||||
HB_Grapheme_Control,
|
||||
HB_Grapheme_Extend,
|
||||
HB_Grapheme_L,
|
||||
HB_Grapheme_V,
|
||||
HB_Grapheme_T,
|
||||
HB_Grapheme_LV,
|
||||
HB_Grapheme_LVT
|
||||
} HB_GraphemeClass;
|
||||
|
||||
|
||||
typedef enum
|
||||
{
|
||||
HB_Word_Other,
|
||||
HB_Word_Format,
|
||||
HB_Word_Katakana,
|
||||
HB_Word_ALetter,
|
||||
HB_Word_MidLetter,
|
||||
HB_Word_MidNum,
|
||||
HB_Word_Numeric,
|
||||
HB_Word_ExtendNumLet
|
||||
} HB_WordClass;
|
||||
|
||||
|
||||
typedef enum
|
||||
{
|
||||
HB_Sentence_Other,
|
||||
HB_Sentence_Sep,
|
||||
HB_Sentence_Format,
|
||||
HB_Sentence_Sp,
|
||||
HB_Sentence_Lower,
|
||||
HB_Sentence_Upper,
|
||||
HB_Sentence_OLetter,
|
||||
HB_Sentence_Numeric,
|
||||
HB_Sentence_ATerm,
|
||||
HB_Sentence_STerm,
|
||||
HB_Sentence_Close
|
||||
} HB_SentenceClass;
|
||||
|
||||
HB_GraphemeClass HB_GetGraphemeClass(HB_UChar32 ch);
|
||||
HB_WordClass HB_GetWordClass(HB_UChar32 ch);
|
||||
HB_SentenceClass HB_GetSentenceClass(HB_UChar32 ch);
|
||||
HB_LineBreakClass HB_GetLineBreakClass(HB_UChar32 ch);
|
||||
|
||||
void HB_GetGraphemeAndLineBreakClass(HB_UChar32 ch, HB_GraphemeClass *grapheme, HB_LineBreakClass *lineBreak);
|
||||
void HB_GetUnicodeCharProperties(HB_UChar32 ch, HB_CharCategory *category, int *combiningClass);
|
||||
HB_CharCategory HB_GetUnicodeCharCategory(HB_UChar32 ch);
|
||||
int HB_GetUnicodeCharCombiningClass(HB_UChar32 ch);
|
||||
|
340
src/3rdparty/harfbuzz/src/harfbuzz-shaper.cpp
vendored
340
src/3rdparty/harfbuzz/src/harfbuzz-shaper.cpp
vendored
@ -32,205 +32,6 @@
|
||||
#define HB_MIN(a, b) ((a) < (b) ? (a) : (b))
|
||||
#define HB_MAX(a, b) ((a) > (b) ? (a) : (b))
|
||||
|
||||
// -----------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// The line break algorithm. See http://www.unicode.org/reports/tr14/tr14-13.html
|
||||
//
|
||||
// -----------------------------------------------------------------------------------------------------
|
||||
|
||||
/* The Unicode algorithm does in our opinion allow line breaks at some
|
||||
places they shouldn't be allowed. The following changes were thus
|
||||
made in comparison to the Unicode reference:
|
||||
|
||||
EX->AL from DB to IB
|
||||
SY->AL from DB to IB
|
||||
SY->PO from DB to IB
|
||||
SY->PR from DB to IB
|
||||
SY->OP from DB to IB
|
||||
AL->PR from DB to IB
|
||||
AL->PO from DB to IB
|
||||
PR->PR from DB to IB
|
||||
PO->PO from DB to IB
|
||||
PR->PO from DB to IB
|
||||
PO->PR from DB to IB
|
||||
HY->PO from DB to IB
|
||||
HY->PR from DB to IB
|
||||
HY->OP from DB to IB
|
||||
NU->EX from PB to IB
|
||||
EX->PO from DB to IB
|
||||
*/
|
||||
|
||||
// The following line break classes are not treated by the table:
|
||||
// AI, BK, CB, CR, LF, NL, SA, SG, SP, XX
|
||||
|
||||
enum break_class {
|
||||
// the first 4 values have to agree with the enum in QCharAttributes
|
||||
ProhibitedBreak, // PB in table
|
||||
DirectBreak, // DB in table
|
||||
IndirectBreak, // IB in table
|
||||
CombiningIndirectBreak, // CI in table
|
||||
CombiningProhibitedBreak // CP in table
|
||||
};
|
||||
#define DB DirectBreak
|
||||
#define IB IndirectBreak
|
||||
#define CI CombiningIndirectBreak
|
||||
#define CP CombiningProhibitedBreak
|
||||
#define PB ProhibitedBreak
|
||||
|
||||
static const hb_uint8 breakTable[HB_LineBreak_JT+1][HB_LineBreak_JT+1] =
|
||||
{
|
||||
/* OP CL QU GL NS EX SY IS PR PO NU AL ID IN HY BA BB B2 ZW CM WJ H2 H3 JL JV JT */
|
||||
/* OP */ { PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, CP, PB, PB, PB, PB, PB, PB },
|
||||
/* CL */ { DB, PB, IB, IB, PB, PB, PB, PB, IB, IB, IB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
||||
/* QU */ { PB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, PB, CI, PB, IB, IB, IB, IB, IB },
|
||||
/* GL */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, PB, CI, PB, IB, IB, IB, IB, IB },
|
||||
/* NS */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, DB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
||||
/* EX */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
||||
/* SY */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
||||
/* IS */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, DB, IB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
||||
/* PR */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, DB, IB, IB, DB, DB, PB, CI, PB, IB, IB, IB, IB, IB },
|
||||
/* PO */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
||||
/* NU */ { IB, PB, IB, IB, IB, IB, PB, PB, IB, IB, IB, IB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
||||
/* AL */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
||||
/* ID */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
||||
/* IN */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
||||
/* HY */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, DB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
||||
/* BA */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, DB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
||||
/* BB */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, PB, CI, PB, IB, IB, IB, IB, IB },
|
||||
/* B2 */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, DB, DB, DB, IB, IB, DB, PB, PB, CI, PB, DB, DB, DB, DB, DB },
|
||||
/* ZW */ { DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, PB, DB, DB, DB, DB, DB, DB, DB },
|
||||
/* CM */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, DB, IB, IB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
||||
/* WJ */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, PB, CI, PB, IB, IB, IB, IB, IB },
|
||||
/* H2 */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, IB, IB },
|
||||
/* H3 */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, IB },
|
||||
/* JL */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, IB, IB, IB, IB, DB },
|
||||
/* JV */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, IB, IB },
|
||||
/* JT */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, IB }
|
||||
};
|
||||
#undef DB
|
||||
#undef IB
|
||||
#undef CI
|
||||
#undef CP
|
||||
#undef PB
|
||||
|
||||
static const hb_uint8 graphemeTable[HB_Grapheme_LVT + 1][HB_Grapheme_LVT + 1] =
|
||||
{
|
||||
// Other, CR, LF, Control,Extend,L, V, T, LV, LVT
|
||||
{ true , true , true , true , true , true , true , true , true , true }, // Other,
|
||||
{ true , true , true , true , true , true , true , true , true , true }, // CR,
|
||||
{ true , false, true , true , true , true , true , true , true , true }, // LF,
|
||||
{ true , true , true , true , true , true , true , true , true , true }, // Control,
|
||||
{ false, true , true , true , false, false, false, false, false, false }, // Extend,
|
||||
{ true , true , true , true , true , false, true , true , true , true }, // L,
|
||||
{ true , true , true , true , true , false, false, true , false, true }, // V,
|
||||
{ true , true , true , true , true , true , false, false, false, false }, // T,
|
||||
{ true , true , true , true , true , false, true , true , true , true }, // LV,
|
||||
{ true , true , true , true , true , false, true , true , true , true }, // LVT
|
||||
};
|
||||
|
||||
static void calcLineBreaks(const HB_UChar16 *uc, hb_uint32 len, HB_CharAttributes *charAttributes)
|
||||
{
|
||||
if (!len)
|
||||
return;
|
||||
|
||||
// ##### can this fail if the first char is a surrogate?
|
||||
HB_LineBreakClass cls;
|
||||
HB_GraphemeClass grapheme;
|
||||
HB_GetGraphemeAndLineBreakClass(*uc, &grapheme, &cls);
|
||||
// handle case where input starts with an LF
|
||||
if (cls == HB_LineBreak_LF)
|
||||
cls = HB_LineBreak_BK;
|
||||
|
||||
charAttributes[0].whiteSpace = (cls == HB_LineBreak_SP || cls == HB_LineBreak_BK);
|
||||
charAttributes[0].charStop = true;
|
||||
|
||||
int lcls = cls;
|
||||
for (hb_uint32 i = 1; i < len; ++i) {
|
||||
charAttributes[i].whiteSpace = false;
|
||||
charAttributes[i].charStop = true;
|
||||
|
||||
HB_UChar32 code = uc[i];
|
||||
HB_GraphemeClass ngrapheme;
|
||||
HB_LineBreakClass ncls;
|
||||
HB_GetGraphemeAndLineBreakClass(code, &ngrapheme, &ncls);
|
||||
charAttributes[i].charStop = graphemeTable[ngrapheme][grapheme];
|
||||
// handle surrogates
|
||||
if (ncls == HB_LineBreak_SG) {
|
||||
if (HB_IsHighSurrogate(uc[i]) && i < len - 1 && HB_IsLowSurrogate(uc[i+1])) {
|
||||
continue;
|
||||
} else if (HB_IsLowSurrogate(uc[i]) && HB_IsHighSurrogate(uc[i-1])) {
|
||||
code = HB_SurrogateToUcs4(uc[i-1], uc[i]);
|
||||
HB_GetGraphemeAndLineBreakClass(code, &ngrapheme, &ncls);
|
||||
charAttributes[i].charStop = false;
|
||||
} else {
|
||||
ncls = HB_LineBreak_AL;
|
||||
}
|
||||
}
|
||||
|
||||
// set white space and char stop flag
|
||||
if (ncls >= HB_LineBreak_SP)
|
||||
charAttributes[i].whiteSpace = true;
|
||||
|
||||
HB_LineBreakType lineBreakType = HB_NoBreak;
|
||||
if (cls >= HB_LineBreak_LF) {
|
||||
lineBreakType = HB_ForcedBreak;
|
||||
} else if(cls == HB_LineBreak_CR) {
|
||||
lineBreakType = (ncls == HB_LineBreak_LF) ? HB_NoBreak : HB_ForcedBreak;
|
||||
}
|
||||
|
||||
if (ncls == HB_LineBreak_SP)
|
||||
goto next_no_cls_update;
|
||||
if (ncls >= HB_LineBreak_CR)
|
||||
goto next;
|
||||
|
||||
{
|
||||
int tcls = ncls;
|
||||
// for south east asian chars that require a complex (dictionary analysis), the unicode
|
||||
// standard recommends to treat them as AL. thai_attributes and other attribute methods that
|
||||
// do dictionary analysis can override
|
||||
if (tcls >= HB_LineBreak_SA)
|
||||
tcls = HB_LineBreak_AL;
|
||||
if (cls >= HB_LineBreak_SA)
|
||||
cls = HB_LineBreak_AL;
|
||||
|
||||
int brk = breakTable[cls][tcls];
|
||||
switch (brk) {
|
||||
case DirectBreak:
|
||||
lineBreakType = HB_Break;
|
||||
if (uc[i-1] == 0xad) // soft hyphen
|
||||
lineBreakType = HB_SoftHyphen;
|
||||
break;
|
||||
case IndirectBreak:
|
||||
lineBreakType = (lcls == HB_LineBreak_SP) ? HB_Break : HB_NoBreak;
|
||||
break;
|
||||
case CombiningIndirectBreak:
|
||||
lineBreakType = HB_NoBreak;
|
||||
if (lcls == HB_LineBreak_SP){
|
||||
if (i > 1)
|
||||
charAttributes[i-2].lineBreakType = HB_Break;
|
||||
} else {
|
||||
goto next_no_cls_update;
|
||||
}
|
||||
break;
|
||||
case CombiningProhibitedBreak:
|
||||
lineBreakType = HB_NoBreak;
|
||||
if (lcls != HB_LineBreak_SP)
|
||||
goto next_no_cls_update;
|
||||
case ProhibitedBreak:
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
next:
|
||||
cls = ncls;
|
||||
next_no_cls_update:
|
||||
lcls = ncls;
|
||||
grapheme = ngrapheme;
|
||||
charAttributes[i-1].lineBreakType = lineBreakType;
|
||||
}
|
||||
charAttributes[len-1].lineBreakType = HB_ForcedBreak;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// Basic processing
|
||||
@ -679,13 +480,12 @@ const HB_ScriptEngine HB_ScriptEngines[] = {
|
||||
{ HB_ArabicShape, 0}
|
||||
};
|
||||
|
||||
void HB_GetCharAttributes(const HB_UChar16 *string, hb_uint32 stringLength,
|
||||
const HB_ScriptItem *items, hb_uint32 numItems,
|
||||
HB_CharAttributes *attributes)
|
||||
void HB_GetTailoredCharAttributes(const HB_UChar16 *string, hb_uint32 stringLength,
|
||||
const HB_ScriptItem *items, hb_uint32 numItems,
|
||||
HB_CharAttributes *attributes)
|
||||
{
|
||||
memset(attributes, 0, stringLength * sizeof(HB_CharAttributes));
|
||||
calcLineBreaks(string, stringLength, attributes);
|
||||
|
||||
if (stringLength == 0)
|
||||
return;
|
||||
for (hb_uint32 i = 0; i < numItems; ++i) {
|
||||
HB_Script script = items[i].script;
|
||||
if (script == HB_Script_Inherited)
|
||||
@ -698,136 +498,6 @@ void HB_GetCharAttributes(const HB_UChar16 *string, hb_uint32 stringLength,
|
||||
}
|
||||
|
||||
|
||||
enum BreakRule { NoBreak = 0, Break = 1, Middle = 2 };
|
||||
|
||||
static const hb_uint8 wordbreakTable[HB_Word_ExtendNumLet + 1][HB_Word_ExtendNumLet + 1] = {
|
||||
// Other Format Katakana ALetter MidLetter MidNum Numeric ExtendNumLet
|
||||
{ Break, Break, Break, Break, Break, Break, Break, Break }, // Other
|
||||
{ Break, Break, Break, Break, Break, Break, Break, Break }, // Format
|
||||
{ Break, Break, NoBreak, Break, Break, Break, Break, NoBreak }, // Katakana
|
||||
{ Break, Break, Break, NoBreak, Middle, Break, NoBreak, NoBreak }, // ALetter
|
||||
{ Break, Break, Break, Break, Break, Break, Break, Break }, // MidLetter
|
||||
{ Break, Break, Break, Break, Break, Break, Break, Break }, // MidNum
|
||||
{ Break, Break, Break, NoBreak, Break, Middle, NoBreak, NoBreak }, // Numeric
|
||||
{ Break, Break, NoBreak, NoBreak, Break, Break, NoBreak, NoBreak }, // ExtendNumLet
|
||||
};
|
||||
|
||||
void HB_GetWordBoundaries(const HB_UChar16 *string, hb_uint32 stringLength,
|
||||
const HB_ScriptItem * /*items*/, hb_uint32 /*numItems*/,
|
||||
HB_CharAttributes *attributes)
|
||||
{
|
||||
if (stringLength == 0)
|
||||
return;
|
||||
unsigned int brk = HB_GetWordClass(string[0]);
|
||||
attributes[0].wordBoundary = true;
|
||||
for (hb_uint32 i = 1; i < stringLength; ++i) {
|
||||
if (!attributes[i].charStop) {
|
||||
attributes[i].wordBoundary = false;
|
||||
continue;
|
||||
}
|
||||
hb_uint32 nbrk = HB_GetWordClass(string[i]);
|
||||
if (nbrk == HB_Word_Format) {
|
||||
attributes[i].wordBoundary = (HB_GetSentenceClass(string[i-1]) == HB_Sentence_Sep);
|
||||
continue;
|
||||
}
|
||||
BreakRule rule = (BreakRule)wordbreakTable[brk][nbrk];
|
||||
if (rule == Middle) {
|
||||
rule = Break;
|
||||
hb_uint32 lookahead = i + 1;
|
||||
while (lookahead < stringLength) {
|
||||
hb_uint32 testbrk = HB_GetWordClass(string[lookahead]);
|
||||
if (testbrk == HB_Word_Format && HB_GetSentenceClass(string[lookahead]) != HB_Sentence_Sep) {
|
||||
++lookahead;
|
||||
continue;
|
||||
}
|
||||
if (testbrk == brk) {
|
||||
rule = NoBreak;
|
||||
while (i < lookahead)
|
||||
attributes[i++].wordBoundary = false;
|
||||
nbrk = testbrk;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
attributes[i].wordBoundary = (rule == Break);
|
||||
brk = nbrk;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
enum SentenceBreakStates {
|
||||
SB_Initial,
|
||||
SB_Upper,
|
||||
SB_UpATerm,
|
||||
SB_ATerm,
|
||||
SB_ATermC,
|
||||
SB_ACS,
|
||||
SB_STerm,
|
||||
SB_STermC,
|
||||
SB_SCS,
|
||||
SB_BAfter,
|
||||
SB_Break,
|
||||
SB_Look
|
||||
};
|
||||
|
||||
static const hb_uint8 sentenceBreakTable[HB_Sentence_Close + 1][HB_Sentence_Close + 1] = {
|
||||
// Other Sep Format Sp Lower Upper OLetter Numeric ATerm STerm Close
|
||||
{ SB_Initial, SB_BAfter , SB_Initial, SB_Initial, SB_Initial, SB_Upper , SB_Initial, SB_Initial, SB_ATerm , SB_STerm , SB_Initial }, // SB_Initial,
|
||||
{ SB_Initial, SB_BAfter , SB_Upper , SB_Initial, SB_Initial, SB_Upper , SB_Initial, SB_Initial, SB_UpATerm, SB_STerm , SB_Initial }, // SB_Upper
|
||||
|
||||
{ SB_Look , SB_BAfter , SB_UpATerm, SB_ACS , SB_Initial, SB_Upper , SB_Break , SB_Initial, SB_ATerm , SB_STerm , SB_ATermC }, // SB_UpATerm
|
||||
{ SB_Look , SB_BAfter , SB_ATerm , SB_ACS , SB_Initial, SB_Break , SB_Break , SB_Initial, SB_ATerm , SB_STerm , SB_ATermC }, // SB_ATerm
|
||||
{ SB_Look , SB_BAfter , SB_ATermC , SB_ACS , SB_Initial, SB_Break , SB_Break , SB_Look , SB_ATerm , SB_STerm , SB_ATermC }, // SB_ATermC,
|
||||
{ SB_Look , SB_BAfter , SB_ACS , SB_ACS , SB_Initial, SB_Break , SB_Break , SB_Look , SB_ATerm , SB_STerm , SB_Look }, // SB_ACS,
|
||||
|
||||
{ SB_Break , SB_BAfter , SB_STerm , SB_SCS , SB_Break , SB_Break , SB_Break , SB_Break , SB_ATerm , SB_STerm , SB_STermC }, // SB_STerm,
|
||||
{ SB_Break , SB_BAfter , SB_STermC , SB_SCS , SB_Break , SB_Break , SB_Break , SB_Break , SB_ATerm , SB_STerm , SB_STermC }, // SB_STermC,
|
||||
{ SB_Break , SB_BAfter , SB_SCS , SB_SCS , SB_Break , SB_Break , SB_Break , SB_Break , SB_ATerm , SB_STerm , SB_Break }, // SB_SCS,
|
||||
{ SB_Break , SB_Break , SB_Break , SB_Break , SB_Break , SB_Break , SB_Break , SB_Break , SB_Break , SB_Break , SB_Break }, // SB_BAfter,
|
||||
};
|
||||
|
||||
void HB_GetSentenceBoundaries(const HB_UChar16 *string, hb_uint32 stringLength,
|
||||
const HB_ScriptItem * /*items*/, hb_uint32 /*numItems*/,
|
||||
HB_CharAttributes *attributes)
|
||||
{
|
||||
if (stringLength == 0)
|
||||
return;
|
||||
hb_uint32 brk = sentenceBreakTable[SB_Initial][HB_GetSentenceClass(string[0])];
|
||||
attributes[0].sentenceBoundary = true;
|
||||
for (hb_uint32 i = 1; i < stringLength; ++i) {
|
||||
if (!attributes[i].charStop) {
|
||||
attributes[i].sentenceBoundary = false;
|
||||
continue;
|
||||
}
|
||||
brk = sentenceBreakTable[brk][HB_GetSentenceClass(string[i])];
|
||||
if (brk == SB_Look) {
|
||||
brk = SB_Break;
|
||||
hb_uint32 lookahead = i + 1;
|
||||
while (lookahead < stringLength) {
|
||||
hb_uint32 sbrk = HB_GetSentenceClass(string[lookahead]);
|
||||
if (sbrk != HB_Sentence_Other && sbrk != HB_Sentence_Numeric && sbrk != HB_Sentence_Close) {
|
||||
break;
|
||||
} else if (sbrk == HB_Sentence_Lower) {
|
||||
brk = SB_Initial;
|
||||
break;
|
||||
}
|
||||
++lookahead;
|
||||
}
|
||||
if (brk == SB_Initial) {
|
||||
while (i < lookahead)
|
||||
attributes[i++].sentenceBoundary = false;
|
||||
}
|
||||
}
|
||||
if (brk == SB_Break) {
|
||||
attributes[i].sentenceBoundary = true;
|
||||
brk = sentenceBreakTable[SB_Initial][HB_GetSentenceClass(string[i])];
|
||||
} else {
|
||||
attributes[i].sentenceBoundary = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static inline char *tag_to_string(HB_UInt tag)
|
||||
{
|
||||
static char string[5];
|
||||
|
16
src/3rdparty/harfbuzz/src/harfbuzz-shaper.h
vendored
16
src/3rdparty/harfbuzz/src/harfbuzz-shaper.h
vendored
@ -143,19 +143,9 @@ typedef struct {
|
||||
hb_bitfield unused :2;
|
||||
} HB_CharAttributes;
|
||||
|
||||
void HB_GetCharAttributes(const HB_UChar16 *string, hb_uint32 stringLength,
|
||||
const HB_ScriptItem *items, hb_uint32 numItems,
|
||||
HB_CharAttributes *attributes);
|
||||
|
||||
/* requires HB_GetCharAttributes to be called before */
|
||||
void HB_GetWordBoundaries(const HB_UChar16 *string, hb_uint32 stringLength,
|
||||
const HB_ScriptItem *items, hb_uint32 numItems,
|
||||
HB_CharAttributes *attributes);
|
||||
|
||||
/* requires HB_GetCharAttributes to be called before */
|
||||
void HB_GetSentenceBoundaries(const HB_UChar16 *string, hb_uint32 stringLength,
|
||||
const HB_ScriptItem *items, hb_uint32 numItems,
|
||||
HB_CharAttributes *attributes);
|
||||
void HB_GetTailoredCharAttributes(const HB_UChar16 *string, hb_uint32 stringLength,
|
||||
const HB_ScriptItem *items, hb_uint32 numItems,
|
||||
HB_CharAttributes *attributes);
|
||||
|
||||
|
||||
typedef enum {
|
||||
|
2
src/3rdparty/harfbuzz/tests/Makefile.am
vendored
2
src/3rdparty/harfbuzz/tests/Makefile.am
vendored
@ -2,6 +2,6 @@
|
||||
SUBDIRS =
|
||||
|
||||
if QT
|
||||
SUBDIRS += linebreaking shaping
|
||||
SUBDIRS += shaping
|
||||
endif
|
||||
|
||||
|
@ -1,4 +0,0 @@
|
||||
.deps
|
||||
linebreaking
|
||||
*.moc
|
||||
*.o
|
@ -1,12 +0,0 @@
|
||||
|
||||
check_PROGRAMS = linebreaking
|
||||
|
||||
linebreaking_SOURCES = main.cpp harfbuzz-qt.cpp
|
||||
linebreaking_LDADD = $(QT_GUI_LIBS) $(QT_QTEST_LIBS) ../../src/libharfbuzz-1.la
|
||||
|
||||
main.o: main.moc
|
||||
|
||||
main.moc: $(srcdir)/main.cpp
|
||||
$(QT_MOC) -o main.moc $(srcdir)/main.cpp
|
||||
|
||||
INCLUDES = -I$(top_srcdir)/src $(FREETYPE_CFLAGS) $(QT_GUI_CFLAGS) $(QT_QTEST_CFLAGS)
|
230
src/3rdparty/harfbuzz/tests/linebreaking/main.cpp
vendored
230
src/3rdparty/harfbuzz/tests/linebreaking/main.cpp
vendored
@ -1,230 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies)
|
||||
*
|
||||
* This is part of HarfBuzz, an OpenType Layout engine library.
|
||||
*
|
||||
* Permission is hereby granted, without written agreement and without
|
||||
* license or royalty fees, to use, copy, modify, and distribute this
|
||||
* software and its documentation for any purpose, provided that the
|
||||
* above copyright notice and the following two paragraphs appear in
|
||||
* all copies of this software.
|
||||
*
|
||||
* IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
|
||||
* DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
|
||||
* ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
|
||||
* IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
|
||||
* DAMAGE.
|
||||
*
|
||||
* THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
|
||||
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
|
||||
* ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
|
||||
* PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
|
||||
*/
|
||||
|
||||
/*
|
||||
!!!!!! Warning !!!!!
|
||||
Please don't save this file in emacs. It contains utf8 text sequences emacs will
|
||||
silently convert to a series of question marks.
|
||||
*/
|
||||
#include <QtTest/QtTest>
|
||||
#include <QtCore/qdebug.h>
|
||||
|
||||
#include <harfbuzz-shaper.h>
|
||||
|
||||
static QVector<HB_CharAttributes> getCharAttributes(const QString &str, HB_Script script = HB_Script_Common)
|
||||
{
|
||||
QVector<HB_CharAttributes> attrs(str.length());
|
||||
HB_ScriptItem item;
|
||||
item.pos = 0;
|
||||
item.length = str.length();
|
||||
item.script = script;
|
||||
HB_GetCharAttributes(str.utf16(), str.length(),
|
||||
&item, 1,
|
||||
attrs.data());
|
||||
return attrs;
|
||||
}
|
||||
|
||||
class tst_CharAttributes : public QObject
|
||||
{
|
||||
Q_OBJECT
|
||||
|
||||
public:
|
||||
tst_CharAttributes();
|
||||
virtual ~tst_CharAttributes();
|
||||
|
||||
public slots:
|
||||
void init();
|
||||
void cleanup();
|
||||
private slots:
|
||||
void lineBreaking();
|
||||
void charWordStopOnLineSeparator();
|
||||
void charStopForSurrogatePairs();
|
||||
void thaiWordBreak();
|
||||
};
|
||||
|
||||
|
||||
tst_CharAttributes::tst_CharAttributes()
|
||||
{
|
||||
}
|
||||
|
||||
tst_CharAttributes::~tst_CharAttributes()
|
||||
{
|
||||
}
|
||||
|
||||
void tst_CharAttributes::init()
|
||||
{
|
||||
}
|
||||
|
||||
void tst_CharAttributes::cleanup()
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
void tst_CharAttributes::lineBreaking()
|
||||
{
|
||||
struct Breaks {
|
||||
const char *utf8;
|
||||
uchar breaks[32];
|
||||
};
|
||||
Breaks brks[] = {
|
||||
{ "11", { false, 0xff } },
|
||||
{ "aa", { false, 0xff } },
|
||||
{ "++", { false, 0xff } },
|
||||
{ "--", { false, 0xff } },
|
||||
{ "((", { false, 0xff } },
|
||||
{ "))", { false, 0xff } },
|
||||
{ "..", { false, 0xff } },
|
||||
{ "\"\"", { false, 0xff } },
|
||||
{ "$$", { false, 0xff } },
|
||||
{ "!!", { false, 0xff } },
|
||||
{ "??", { false, 0xff } },
|
||||
{ ",,", { false, 0xff } },
|
||||
|
||||
{ ")()", { true, false, 0xff } },
|
||||
{ "?!?", { false, false, 0xff } },
|
||||
{ ".,.", { false, false, 0xff } },
|
||||
{ "+-+", { false, false, 0xff } },
|
||||
{ "+=+", { false, false, 0xff } },
|
||||
{ "+(+", { false, false, 0xff } },
|
||||
{ "+)+", { false, false, 0xff } },
|
||||
|
||||
{ "a b", { false, true, 0xff } },
|
||||
{ "a(b", { false, false, 0xff } },
|
||||
{ "a)b", { false, false, 0xff } },
|
||||
{ "a-b", { false, true, 0xff } },
|
||||
{ "a.b", { false, false, 0xff } },
|
||||
{ "a+b", { false, false, 0xff } },
|
||||
{ "a?b", { false, false, 0xff } },
|
||||
{ "a!b", { false, false, 0xff } },
|
||||
{ "a$b", { false, false, 0xff } },
|
||||
{ "a,b", { false, false, 0xff } },
|
||||
{ "a/b", { false, false, 0xff } },
|
||||
{ "1/2", { false, false, 0xff } },
|
||||
{ "./.", { false, false, 0xff } },
|
||||
{ ",/,", { false, false, 0xff } },
|
||||
{ "!/!", { false, false, 0xff } },
|
||||
{ "\\/\\", { false, false, 0xff } },
|
||||
{ "1 2", { false, true, 0xff } },
|
||||
{ "1(2", { false, false, 0xff } },
|
||||
{ "1)2", { false, false, 0xff } },
|
||||
{ "1-2", { false, false, 0xff } },
|
||||
{ "1.2", { false, false, 0xff } },
|
||||
{ "1+2", { false, false, 0xff } },
|
||||
{ "1?2", { false, true, 0xff } },
|
||||
{ "1!2", { false, true, 0xff } },
|
||||
{ "1$2", { false, false, 0xff } },
|
||||
{ "1,2", { false, false, 0xff } },
|
||||
{ "1/2", { false, false, 0xff } },
|
||||
{ "\330\260\331\216\331\204\331\220\331\203\331\216", { false, false, false, false, false, 0xff } },
|
||||
{ "\330\247\331\204\331\205 \330\247\331\204\331\205", { false, false, false, true, false, false, 0xff } },
|
||||
{ "1#2", { false, false, 0xff } },
|
||||
{ "!#!", { false, false, 0xff } },
|
||||
{ 0, {} }
|
||||
};
|
||||
Breaks *b = brks;
|
||||
while (b->utf8) {
|
||||
QString str = QString::fromUtf8(b->utf8);
|
||||
|
||||
QVector<HB_CharAttributes> attrs = getCharAttributes(str);
|
||||
|
||||
int i;
|
||||
for (i = 0; i < (int)str.length() - 1; ++i) {
|
||||
QVERIFY(b->breaks[i] != 0xff);
|
||||
if ( (attrs[i].lineBreakType != HB_NoBreak) != (bool)b->breaks[i] ) {
|
||||
qDebug("test case \"%s\" failed at char %d; break type: %d", b->utf8, i, attrs[i].lineBreakType);
|
||||
QCOMPARE( (attrs[i].lineBreakType != HB_NoBreak), (bool)b->breaks[i] );
|
||||
}
|
||||
}
|
||||
QVERIFY(attrs[i].lineBreakType == HB_ForcedBreak);
|
||||
QCOMPARE(b->breaks[i], (uchar)0xff);
|
||||
++b;
|
||||
}
|
||||
}
|
||||
|
||||
void tst_CharAttributes::charWordStopOnLineSeparator()
|
||||
{
|
||||
const QChar lineSeparator(QChar::LineSeparator);
|
||||
QString txt;
|
||||
txt.append(lineSeparator);
|
||||
txt.append(lineSeparator);
|
||||
QVector<HB_CharAttributes> attrs = getCharAttributes(txt);
|
||||
QVERIFY(attrs[1].charStop);
|
||||
}
|
||||
|
||||
void tst_CharAttributes::charStopForSurrogatePairs()
|
||||
{
|
||||
QString txt;
|
||||
txt.append("a");
|
||||
txt.append(0xd87e);
|
||||
txt.append(0xdc25);
|
||||
txt.append("b");
|
||||
QVector<HB_CharAttributes> attrs = getCharAttributes(txt);
|
||||
QVERIFY(attrs[0].charStop);
|
||||
QVERIFY(attrs[1].charStop);
|
||||
QVERIFY(!attrs[2].charStop);
|
||||
QVERIFY(attrs[3].charStop);
|
||||
}
|
||||
|
||||
void tst_CharAttributes::thaiWordBreak()
|
||||
{
|
||||
// สวัสดีครับ นี่เป็นการงทดสอบตัวเอ
|
||||
QTextCodec *codec = QTextCodec::codecForMib(2259);
|
||||
QString txt = codec->toUnicode(QByteArray("\xca\xc7\xd1\xca\xb4\xd5\xa4\xc3\xd1\xba\x20\xb9\xd5\xe8\xe0\xbb\xe7\xb9\xa1\xd2\xc3\xb7\xb4\xca\xcd\xba\xb5\xd1\xc7\xe0\xcd\xa7"));
|
||||
|
||||
|
||||
QCOMPARE(txt.length(), 32);
|
||||
QVector<HB_CharAttributes> attrs = getCharAttributes(txt, HB_Script_Thai);
|
||||
QVERIFY(attrs[0].lineBreakType == HB_NoBreak);
|
||||
QVERIFY(attrs[1].lineBreakType == HB_NoBreak);
|
||||
QVERIFY(attrs[2].lineBreakType == HB_NoBreak);
|
||||
QVERIFY(attrs[3].lineBreakType == HB_NoBreak);
|
||||
QVERIFY(attrs[4].lineBreakType == HB_NoBreak);
|
||||
QVERIFY(attrs[5].lineBreakType == HB_Break);
|
||||
QVERIFY(attrs[6].lineBreakType == HB_NoBreak);
|
||||
QVERIFY(attrs[7].lineBreakType == HB_NoBreak);
|
||||
QVERIFY(attrs[8].lineBreakType == HB_NoBreak);
|
||||
QVERIFY(attrs[9].lineBreakType == HB_NoBreak);
|
||||
QVERIFY(attrs[10].lineBreakType == HB_Break);
|
||||
QVERIFY(attrs[11].lineBreakType == HB_NoBreak);
|
||||
QVERIFY(attrs[12].lineBreakType == HB_NoBreak);
|
||||
QVERIFY(attrs[13].lineBreakType == HB_Break);
|
||||
QVERIFY(attrs[14].lineBreakType == HB_NoBreak);
|
||||
QVERIFY(attrs[15].lineBreakType == HB_NoBreak);
|
||||
QVERIFY(attrs[16].lineBreakType == HB_NoBreak);
|
||||
QVERIFY(attrs[17].lineBreakType == HB_Break);
|
||||
QVERIFY(attrs[18].lineBreakType == HB_NoBreak);
|
||||
QVERIFY(attrs[19].lineBreakType == HB_NoBreak);
|
||||
QVERIFY(attrs[20].lineBreakType == HB_Break);
|
||||
QVERIFY(attrs[21].lineBreakType == HB_NoBreak);
|
||||
QVERIFY(attrs[22].lineBreakType == HB_NoBreak);
|
||||
QVERIFY(attrs[23].lineBreakType == HB_NoBreak);
|
||||
QVERIFY(attrs[24].lineBreakType == HB_NoBreak);
|
||||
QVERIFY(attrs[25].lineBreakType == HB_Break);
|
||||
QVERIFY(attrs[26].lineBreakType == HB_NoBreak);
|
||||
for (int i = 27; i < 32; ++i)
|
||||
QVERIFY(attrs[i].lineBreakType == HB_NoBreak);
|
||||
}
|
||||
|
||||
QTEST_MAIN(tst_CharAttributes)
|
||||
#include "main.moc"
|
@ -1,7 +1,7 @@
|
||||
|
||||
check_PROGRAMS = shaping
|
||||
|
||||
shaping_SOURCES = main.cpp ../linebreaking/harfbuzz-qt.cpp
|
||||
shaping_SOURCES = main.cpp harfbuzz-qt.cpp
|
||||
shaping_LDADD = $(QT_GUI_LIBS) $(QT_QTEST_LIBS) ../../src/libharfbuzz-1.la
|
||||
|
||||
main.o: main.moc
|
||||
|
@ -23,21 +23,11 @@
|
||||
*/
|
||||
|
||||
#include <harfbuzz-external.h>
|
||||
#include <Qt/private/qunicodetables_p.h>
|
||||
#include <QChar>
|
||||
#include <QLibrary>
|
||||
#include <QTextCodec>
|
||||
|
||||
extern "C" {
|
||||
|
||||
HB_LineBreakClass HB_GetLineBreakClass(HB_UChar32 ch)
|
||||
{
|
||||
#if QT_VERSION >= 0x040300
|
||||
return (HB_LineBreakClass)QUnicodeTables::lineBreakClass(ch);
|
||||
#else
|
||||
#error "This test currently requires Qt >= 4.3"
|
||||
#endif
|
||||
}
|
||||
|
||||
void HB_GetUnicodeCharProperties(HB_UChar32 ch, HB_CharCategory *category, int *combiningClass)
|
||||
{
|
||||
*category = (HB_CharCategory)QChar::category(ch);
|
||||
@ -59,26 +49,6 @@ HB_UChar16 HB_GetMirroredChar(HB_UChar16 ch)
|
||||
return QChar::mirroredChar(ch);
|
||||
}
|
||||
|
||||
HB_WordClass HB_GetWordClass(HB_UChar32 ch)
|
||||
{
|
||||
const QUnicodeTables::Properties *prop = QUnicodeTables::properties(ch);
|
||||
return (HB_WordClass) prop->wordBreak;
|
||||
}
|
||||
|
||||
|
||||
HB_SentenceClass HB_GetSentenceClass(HB_UChar32 ch)
|
||||
{
|
||||
const QUnicodeTables::Properties *prop = QUnicodeTables::properties(ch);
|
||||
return (HB_SentenceClass) prop->sentenceBreak;
|
||||
}
|
||||
|
||||
void HB_GetGraphemeAndLineBreakClass(HB_UChar32 ch, HB_GraphemeClass *grapheme, HB_LineBreakClass *lineBreak)
|
||||
{
|
||||
const QUnicodeTables::Properties *prop = QUnicodeTables::properties(ch);
|
||||
*grapheme = (HB_GraphemeClass) prop->graphemeBreak;
|
||||
*lineBreak = (HB_LineBreakClass) prop->line_break_class;
|
||||
}
|
||||
|
||||
void (*HB_Library_Resolve(const char *library, int version, const char *symbol))()
|
||||
{
|
||||
return QLibrary::resolve(library, version, symbol);
|
@ -39,47 +39,15 @@
|
||||
**
|
||||
****************************************************************************/
|
||||
|
||||
#include "qharfbuzz_p.h"
|
||||
|
||||
#include "qunicodetables_p.h"
|
||||
#include "qlibrary.h"
|
||||
#include "qtextcodec.h"
|
||||
|
||||
#include "qharfbuzz_p.h"
|
||||
|
||||
QT_USE_NAMESPACE
|
||||
|
||||
extern "C" {
|
||||
|
||||
HB_GraphemeClass HB_GetGraphemeClass(HB_UChar32 ch)
|
||||
{
|
||||
const QUnicodeTables::Properties *prop = QUnicodeTables::properties(ch);
|
||||
return (HB_GraphemeClass) prop->graphemeBreak;
|
||||
}
|
||||
|
||||
HB_WordClass HB_GetWordClass(HB_UChar32 ch)
|
||||
{
|
||||
const QUnicodeTables::Properties *prop = QUnicodeTables::properties(ch);
|
||||
return (HB_WordClass) prop->wordBreak;
|
||||
}
|
||||
|
||||
HB_SentenceClass HB_GetSentenceClass(HB_UChar32 ch)
|
||||
{
|
||||
const QUnicodeTables::Properties *prop = QUnicodeTables::properties(ch);
|
||||
return (HB_SentenceClass) prop->sentenceBreak;
|
||||
}
|
||||
|
||||
HB_LineBreakClass HB_GetLineBreakClass(HB_UChar32 ch)
|
||||
{
|
||||
return (HB_LineBreakClass)QUnicodeTables::lineBreakClass(ch);
|
||||
}
|
||||
|
||||
|
||||
void HB_GetGraphemeAndLineBreakClass(HB_UChar32 ch, HB_GraphemeClass *grapheme, HB_LineBreakClass *lineBreak)
|
||||
{
|
||||
const QUnicodeTables::Properties *prop = QUnicodeTables::properties(ch);
|
||||
*grapheme = (HB_GraphemeClass) prop->graphemeBreak;
|
||||
*lineBreak = (HB_LineBreakClass) prop->line_break_class;
|
||||
}
|
||||
|
||||
void HB_GetUnicodeCharProperties(HB_UChar32 ch, HB_CharCategory *category, int *combiningClass)
|
||||
{
|
||||
const QUnicodeTables::Properties *prop = QUnicodeTables::properties(ch);
|
||||
@ -135,11 +103,4 @@ void qHBFreeFace(HB_Face face)
|
||||
HB_FreeFace(face);
|
||||
}
|
||||
|
||||
void qGetCharAttributes(const HB_UChar16 *string, hb_uint32 stringLength,
|
||||
const HB_ScriptItem *items, hb_uint32 numItems,
|
||||
HB_CharAttributes *attributes)
|
||||
{
|
||||
HB_GetCharAttributes(string, stringLength, items, numItems, attributes);
|
||||
}
|
||||
|
||||
QT_END_NAMESPACE
|
||||
|
@ -58,11 +58,6 @@
|
||||
|
||||
QT_BEGIN_NAMESPACE
|
||||
|
||||
// temporary forward until all the textengine code has been moved to QtCore
|
||||
Q_CORE_EXPORT void qGetCharAttributes(const HB_UChar16 *string, hb_uint32 stringLength,
|
||||
const HB_ScriptItem *items, hb_uint32 numItems,
|
||||
HB_CharAttributes *attributes);
|
||||
|
||||
Q_CORE_EXPORT HB_Bool qShapeItem(HB_ShaperItem *item);
|
||||
|
||||
// ### temporary
|
||||
|
@ -40,9 +40,9 @@
|
||||
****************************************************************************/
|
||||
#include <QtCore/qtextboundaryfinder.h>
|
||||
#include <QtCore/qvarlengtharray.h>
|
||||
|
||||
#include <private/qunicodetables_p.h>
|
||||
#include <qdebug.h>
|
||||
#include "private/qharfbuzz_p.h"
|
||||
#include <private/qunicodetools_p.h>
|
||||
|
||||
QT_BEGIN_NAMESPACE
|
||||
|
||||
@ -93,11 +93,12 @@ static void init(QTextBoundaryFinder::BoundaryType type, const QChar *chars, int
|
||||
scriptItems.append(item);
|
||||
}
|
||||
|
||||
qGetCharAttributes(string, length, scriptItems.data(), scriptItems.count(), attributes);
|
||||
QCharAttributeOptions options = 0;
|
||||
if (type == QTextBoundaryFinder::Word)
|
||||
HB_GetWordBoundaries(string, length, scriptItems.data(), scriptItems.count(), attributes);
|
||||
options |= GetWordBreaks;
|
||||
else if (type == QTextBoundaryFinder::Sentence)
|
||||
HB_GetSentenceBoundaries(string, length, scriptItems.data(), scriptItems.count(), attributes);
|
||||
options |= GetSentenceBreaks;
|
||||
qGetCharAttributes(string, length, scriptItems.data(), scriptItems.count(), attributes, options);
|
||||
}
|
||||
|
||||
/*!
|
||||
|
398
src/corelib/tools/qunicodetools.cpp
Normal file
398
src/corelib/tools/qunicodetools.cpp
Normal file
@ -0,0 +1,398 @@
|
||||
/****************************************************************************
|
||||
**
|
||||
** Copyright (C) 2012 Nokia Corporation and/or its subsidiary(-ies).
|
||||
** Contact: http://www.qt-project.org/
|
||||
**
|
||||
** This file is part of the QtCore module of the Qt Toolkit.
|
||||
**
|
||||
** $QT_BEGIN_LICENSE:LGPL$
|
||||
** GNU Lesser General Public License Usage
|
||||
** This file may be used under the terms of the GNU Lesser General Public
|
||||
** License version 2.1 as published by the Free Software Foundation and
|
||||
** appearing in the file LICENSE.LGPL included in the packaging of this
|
||||
** file. Please review the following information to ensure the GNU Lesser
|
||||
** General Public License version 2.1 requirements will be met:
|
||||
** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
|
||||
**
|
||||
** In addition, as a special exception, Nokia gives you certain additional
|
||||
** rights. These rights are described in the Nokia Qt LGPL Exception
|
||||
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
|
||||
**
|
||||
** GNU General Public License Usage
|
||||
** Alternatively, this file may be used under the terms of the GNU General
|
||||
** Public License version 3.0 as published by the Free Software Foundation
|
||||
** and appearing in the file LICENSE.GPL included in the packaging of this
|
||||
** file. Please review the following information to ensure the GNU General
|
||||
** Public License version 3.0 requirements will be met:
|
||||
** http://www.gnu.org/copyleft/gpl.html.
|
||||
**
|
||||
** Other Usage
|
||||
** Alternatively, this file may be used in accordance with the terms and
|
||||
** conditions contained in a signed written agreement between you and Nokia.
|
||||
**
|
||||
**
|
||||
**
|
||||
**
|
||||
**
|
||||
**
|
||||
** $QT_END_LICENSE$
|
||||
**
|
||||
****************************************************************************/
|
||||
|
||||
#include "qunicodetools_p.h"
|
||||
|
||||
#include "qunicodetables_p.h"
|
||||
|
||||
QT_BEGIN_NAMESPACE
|
||||
|
||||
// -----------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// The line breaking algorithm. See http://www.unicode.org/reports/tr14/tr14-19.html
|
||||
//
|
||||
// -----------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// The text boundaries determination algorithm. See http://www.unicode.org/reports/tr29/tr29-11.html
|
||||
//
|
||||
// -----------------------------------------------------------------------------------------------------
|
||||
|
||||
namespace {
|
||||
|
||||
/* The Unicode algorithm does in our opinion allow line breaks at some
|
||||
places they shouldn't be allowed. The following changes were thus
|
||||
made in comparison to the Unicode reference:
|
||||
|
||||
EX->AL from DB to IB
|
||||
SY->AL from DB to IB
|
||||
SY->PO from DB to IB
|
||||
SY->PR from DB to IB
|
||||
SY->OP from DB to IB
|
||||
AL->PR from DB to IB
|
||||
AL->PO from DB to IB
|
||||
PR->PR from DB to IB
|
||||
PO->PO from DB to IB
|
||||
PR->PO from DB to IB
|
||||
PO->PR from DB to IB
|
||||
HY->PO from DB to IB
|
||||
HY->PR from DB to IB
|
||||
HY->OP from DB to IB
|
||||
NU->EX from PB to IB
|
||||
EX->PO from DB to IB
|
||||
*/
|
||||
|
||||
// The following line break classes are not treated by the table:
|
||||
// AI, BK, CB, CR, LF, NL, SA, SG, SP, XX
|
||||
|
||||
enum LineBreakRule {
|
||||
ProhibitedBreak, // PB in table
|
||||
DirectBreak, // DB in table
|
||||
IndirectBreak, // IB in table
|
||||
CombiningIndirectBreak, // CI in table
|
||||
CombiningProhibitedBreak // CP in table
|
||||
};
|
||||
#define DB DirectBreak
|
||||
#define IB IndirectBreak
|
||||
#define CI CombiningIndirectBreak
|
||||
#define CP CombiningProhibitedBreak
|
||||
#define PB ProhibitedBreak
|
||||
static const uchar lineBreakTable[QUnicodeTables::LineBreak_JT + 1][QUnicodeTables::LineBreak_JT + 1] = {
|
||||
/* OP CL QU GL NS EX SY IS PR PO NU AL ID IN HY BA BB B2 ZW CM WJ H2 H3 JL JV JT */
|
||||
/* OP */ { PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, CP, PB, PB, PB, PB, PB, PB },
|
||||
/* CL */ { DB, PB, IB, IB, PB, PB, PB, PB, IB, IB, IB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
||||
/* QU */ { PB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, PB, CI, PB, IB, IB, IB, IB, IB },
|
||||
/* GL */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, PB, CI, PB, IB, IB, IB, IB, IB },
|
||||
/* NS */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, DB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
||||
/* EX */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
||||
/* SY */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
||||
/* IS */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, DB, IB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
||||
/* PR */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, DB, IB, IB, DB, DB, PB, CI, PB, IB, IB, IB, IB, IB },
|
||||
/* PO */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
||||
/* NU */ { IB, PB, IB, IB, IB, IB, PB, PB, IB, IB, IB, IB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
||||
/* AL */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
||||
/* ID */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
||||
/* IN */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
||||
/* HY */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, DB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
||||
/* BA */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, DB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
||||
/* BB */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, PB, CI, PB, IB, IB, IB, IB, IB },
|
||||
/* B2 */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, DB, DB, DB, IB, IB, DB, PB, PB, CI, PB, DB, DB, DB, DB, DB },
|
||||
/* ZW */ { DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, PB, DB, DB, DB, DB, DB, DB, DB },
|
||||
/* CM */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, DB, IB, IB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
||||
/* WJ */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, PB, CI, PB, IB, IB, IB, IB, IB },
|
||||
/* H2 */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, IB, IB },
|
||||
/* H3 */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, IB },
|
||||
/* JL */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, IB, IB, IB, IB, DB },
|
||||
/* JV */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, IB, IB },
|
||||
/* JT */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, IB }
|
||||
};
|
||||
#undef DB
|
||||
#undef IB
|
||||
#undef CI
|
||||
#undef CP
|
||||
#undef PB
|
||||
|
||||
static const uchar graphemeBreakTable[QUnicodeTables::GraphemeBreakLVT + 1][QUnicodeTables::GraphemeBreakLVT + 1] = {
|
||||
// Other, CR, LF, Control, Extend, L, V, T, LV, LVT
|
||||
{ true , true , true , true , true , true , true , true , true , true }, // Other,
|
||||
{ true , true , true , true , true , true , true , true , true , true }, // CR,
|
||||
{ true , false, true , true , true , true , true , true , true , true }, // LF,
|
||||
{ true , true , true , true , true , true , true , true , true , true }, // Control,
|
||||
{ false, true , true , true , false, false, false, false, false, false }, // Extend,
|
||||
{ true , true , true , true , true , false, true , true , true , true }, // L,
|
||||
{ true , true , true , true , true , false, false, true , false, true }, // V,
|
||||
{ true , true , true , true , true , true , false, false, false, false }, // T,
|
||||
{ true , true , true , true , true , false, true , true , true , true }, // LV,
|
||||
{ true , true , true , true , true , false, true , true , true , true }, // LVT
|
||||
};
|
||||
|
||||
static void calcGraphemeAndLineBreaks(const ushort *string, quint32 len, HB_CharAttributes *attributes)
|
||||
{
|
||||
// ##### can this fail if the first char is a surrogate?
|
||||
const QUnicodeTables::Properties *prop = QUnicodeTables::properties(string[0]);
|
||||
QUnicodeTables::GraphemeBreak grapheme = (QUnicodeTables::GraphemeBreak) prop->graphemeBreak;
|
||||
QUnicodeTables::LineBreakClass cls = (QUnicodeTables::LineBreakClass) prop->line_break_class;
|
||||
// handle case where input starts with an LF
|
||||
if (cls == QUnicodeTables::LineBreak_LF)
|
||||
cls = QUnicodeTables::LineBreak_BK;
|
||||
|
||||
attributes[0].whiteSpace = (cls == QUnicodeTables::LineBreak_SP || cls == QUnicodeTables::LineBreak_BK);
|
||||
attributes[0].charStop = true;
|
||||
|
||||
int lcls = cls;
|
||||
for (quint32 i = 1; i < len; ++i) {
|
||||
attributes[i].whiteSpace = false;
|
||||
attributes[i].charStop = true;
|
||||
|
||||
uint ucs4 = string[i];
|
||||
prop = QUnicodeTables::properties(ucs4);
|
||||
QUnicodeTables::GraphemeBreak ngrapheme = (QUnicodeTables::GraphemeBreak) prop->graphemeBreak;
|
||||
QUnicodeTables::LineBreakClass ncls = (QUnicodeTables::LineBreakClass) prop->line_break_class;
|
||||
attributes[i].charStop = graphemeBreakTable[ngrapheme][grapheme];
|
||||
// handle surrogates
|
||||
if (ncls == QUnicodeTables::LineBreak_SG) {
|
||||
if (QChar::isHighSurrogate(string[i]) && i < len - 1 && QChar::isLowSurrogate(string[i+1])) {
|
||||
continue;
|
||||
} else if (QChar::isLowSurrogate(string[i]) && QChar::isHighSurrogate(string[i-1])) {
|
||||
ucs4 = QChar::surrogateToUcs4(string[i-1], string[i]);
|
||||
prop = QUnicodeTables::properties(ucs4);
|
||||
ngrapheme = (QUnicodeTables::GraphemeBreak) prop->graphemeBreak;
|
||||
ncls = (QUnicodeTables::LineBreakClass) prop->line_break_class;
|
||||
attributes[i].charStop = false;
|
||||
} else {
|
||||
ncls = QUnicodeTables::LineBreak_AL;
|
||||
}
|
||||
}
|
||||
|
||||
// set white space and char stop flag
|
||||
if (ncls >= QUnicodeTables::LineBreak_SP)
|
||||
attributes[i].whiteSpace = true;
|
||||
|
||||
HB_LineBreakType lineBreakType = HB_NoBreak;
|
||||
if (cls >= QUnicodeTables::LineBreak_LF) {
|
||||
lineBreakType = HB_ForcedBreak;
|
||||
} else if (cls == QUnicodeTables::LineBreak_CR) {
|
||||
lineBreakType = (ncls == QUnicodeTables::LineBreak_LF) ? HB_NoBreak : HB_ForcedBreak;
|
||||
}
|
||||
|
||||
if (ncls == QUnicodeTables::LineBreak_SP)
|
||||
goto next_no_cls_update;
|
||||
if (ncls >= QUnicodeTables::LineBreak_CR)
|
||||
goto next;
|
||||
|
||||
{
|
||||
int tcls = ncls;
|
||||
// for south east asian chars that require a complex (dictionary analysis), the unicode
|
||||
// standard recommends to treat them as AL. thai_attributes and other attribute methods that
|
||||
// do dictionary analysis can override
|
||||
if (tcls >= QUnicodeTables::LineBreak_SA)
|
||||
tcls = QUnicodeTables::LineBreak_AL;
|
||||
if (cls >= QUnicodeTables::LineBreak_SA)
|
||||
cls = QUnicodeTables::LineBreak_AL;
|
||||
|
||||
int brk = lineBreakTable[cls][tcls];
|
||||
switch (brk) {
|
||||
case DirectBreak:
|
||||
lineBreakType = HB_Break;
|
||||
if (string[i-1] == 0xad) // soft hyphen
|
||||
lineBreakType = HB_SoftHyphen;
|
||||
break;
|
||||
case IndirectBreak:
|
||||
lineBreakType = (lcls == QUnicodeTables::LineBreak_SP) ? HB_Break : HB_NoBreak;
|
||||
break;
|
||||
case CombiningIndirectBreak:
|
||||
lineBreakType = HB_NoBreak;
|
||||
if (lcls == QUnicodeTables::LineBreak_SP){
|
||||
if (i > 1)
|
||||
attributes[i-2].lineBreakType = HB_Break;
|
||||
} else {
|
||||
goto next_no_cls_update;
|
||||
}
|
||||
break;
|
||||
case CombiningProhibitedBreak:
|
||||
lineBreakType = HB_NoBreak;
|
||||
if (lcls != QUnicodeTables::LineBreak_SP)
|
||||
goto next_no_cls_update;
|
||||
case ProhibitedBreak:
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
next:
|
||||
cls = ncls;
|
||||
next_no_cls_update:
|
||||
lcls = ncls;
|
||||
grapheme = ngrapheme;
|
||||
attributes[i-1].lineBreakType = lineBreakType;
|
||||
}
|
||||
attributes[len-1].lineBreakType = HB_ForcedBreak;
|
||||
}
|
||||
|
||||
|
||||
enum WordBreakRule { NoBreak = 0, Break = 1, Middle = 2 };
|
||||
|
||||
static const uchar wordBreakTable[QUnicodeTables::WordBreakExtendNumLet + 1][QUnicodeTables::WordBreakExtendNumLet + 1] = {
|
||||
// Other Format Katakana ALetter MidLetter MidNum Numeric ExtendNumLet
|
||||
{ Break , Break , Break , Break , Break , Break , Break , Break }, // Other
|
||||
{ Break , Break , Break , Break , Break , Break , Break , Break }, // Format
|
||||
{ Break , Break , NoBreak, Break , Break , Break , Break , NoBreak }, // Katakana
|
||||
{ Break , Break , Break , NoBreak, Middle , Break , NoBreak, NoBreak }, // ALetter
|
||||
{ Break , Break , Break , Break , Break , Break , Break , Break }, // MidLetter
|
||||
{ Break , Break , Break , Break , Break , Break , Break , Break }, // MidNum
|
||||
{ Break , Break , Break , NoBreak, Break , Middle , NoBreak, NoBreak }, // Numeric
|
||||
{ Break , Break , NoBreak, NoBreak, Break , Break , NoBreak, NoBreak }, // ExtendNumLet
|
||||
};
|
||||
|
||||
static void calcWordBreaks(const ushort *string, quint32 len, HB_CharAttributes *attributes)
|
||||
{
|
||||
quint32 brk = QUnicodeTables::wordBreakClass(string[0]);
|
||||
|
||||
attributes[0].wordBoundary = true;
|
||||
|
||||
for (quint32 i = 1; i < len; ++i) {
|
||||
if (!attributes[i].charStop) {
|
||||
attributes[i].wordBoundary = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
quint32 nbrk = QUnicodeTables::wordBreakClass(string[i]);
|
||||
if (nbrk == QUnicodeTables::WordBreakFormat) {
|
||||
attributes[i].wordBoundary = (QUnicodeTables::sentenceBreakClass(string[i-1]) == QUnicodeTables::SentenceBreakSep);
|
||||
continue;
|
||||
}
|
||||
|
||||
WordBreakRule rule = (WordBreakRule)wordBreakTable[brk][nbrk];
|
||||
if (rule == Middle) {
|
||||
rule = Break;
|
||||
quint32 lookahead = i + 1;
|
||||
while (lookahead < len) {
|
||||
quint32 testbrk = QUnicodeTables::wordBreakClass(string[lookahead]);
|
||||
if (testbrk == QUnicodeTables::WordBreakFormat
|
||||
&& QUnicodeTables::sentenceBreakClass(string[lookahead]) != QUnicodeTables::SentenceBreakSep) {
|
||||
++lookahead;
|
||||
continue;
|
||||
}
|
||||
if (testbrk == brk) {
|
||||
rule = NoBreak;
|
||||
while (i < lookahead)
|
||||
attributes[i++].wordBoundary = false;
|
||||
nbrk = testbrk;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
attributes[i].wordBoundary = (rule == Break);
|
||||
brk = nbrk;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
enum SentenceBreakState {
|
||||
SB_Initial,
|
||||
SB_Upper,
|
||||
SB_UpATerm,
|
||||
SB_ATerm,
|
||||
SB_ATermC,
|
||||
SB_ACS,
|
||||
SB_STerm,
|
||||
SB_STermC,
|
||||
SB_SCS,
|
||||
SB_BAfter,
|
||||
SB_Break,
|
||||
SB_Lookup
|
||||
};
|
||||
|
||||
static const uchar sentenceBreakTable[SB_Lookup + 1][QUnicodeTables::SentenceBreakClose + 1] = {
|
||||
// Other Sep Format Sp Lower Upper OLetter Numeric ATerm STerm Close
|
||||
{ SB_Initial, SB_BAfter , SB_Initial, SB_Initial, SB_Initial, SB_Upper , SB_Initial, SB_Initial, SB_ATerm , SB_STerm , SB_Initial }, // SB_Initial,
|
||||
{ SB_Initial, SB_BAfter , SB_Upper , SB_Initial, SB_Initial, SB_Upper , SB_Initial, SB_Initial, SB_UpATerm, SB_STerm , SB_Initial }, // SB_Upper
|
||||
|
||||
{ SB_Lookup , SB_BAfter , SB_UpATerm, SB_ACS , SB_Initial, SB_Upper , SB_Break , SB_Initial, SB_ATerm , SB_STerm , SB_ATermC }, // SB_UpATerm
|
||||
{ SB_Lookup , SB_BAfter , SB_ATerm , SB_ACS , SB_Initial, SB_Break , SB_Break , SB_Initial, SB_ATerm , SB_STerm , SB_ATermC }, // SB_ATerm
|
||||
{ SB_Lookup , SB_BAfter , SB_ATermC , SB_ACS , SB_Initial, SB_Break , SB_Break , SB_Lookup , SB_ATerm , SB_STerm , SB_ATermC }, // SB_ATermC,
|
||||
{ SB_Lookup , SB_BAfter , SB_ACS , SB_ACS , SB_Initial, SB_Break , SB_Break , SB_Lookup , SB_ATerm , SB_STerm , SB_Lookup }, // SB_ACS,
|
||||
|
||||
{ SB_Break , SB_BAfter , SB_STerm , SB_SCS , SB_Break , SB_Break , SB_Break , SB_Break , SB_ATerm , SB_STerm , SB_STermC }, // SB_STerm,
|
||||
{ SB_Break , SB_BAfter , SB_STermC , SB_SCS , SB_Break , SB_Break , SB_Break , SB_Break , SB_ATerm , SB_STerm , SB_STermC }, // SB_STermC,
|
||||
{ SB_Break , SB_BAfter , SB_SCS , SB_SCS , SB_Break , SB_Break , SB_Break , SB_Break , SB_ATerm , SB_STerm , SB_Break }, // SB_SCS,
|
||||
{ SB_Break , SB_Break , SB_Break , SB_Break , SB_Break , SB_Break , SB_Break , SB_Break , SB_Break , SB_Break , SB_Break }, // SB_BAfter,
|
||||
};
|
||||
|
||||
static void calcSentenceBreaks(const ushort *string, quint32 len, HB_CharAttributes *attributes)
|
||||
{
|
||||
quint32 brk = sentenceBreakTable[SB_Initial][QUnicodeTables::sentenceBreakClass(string[0])];
|
||||
attributes[0].sentenceBoundary = true;
|
||||
for (quint32 i = 1; i < len; ++i) {
|
||||
if (!attributes[i].charStop) {
|
||||
attributes[i].sentenceBoundary = false;
|
||||
continue;
|
||||
}
|
||||
brk = sentenceBreakTable[brk][QUnicodeTables::sentenceBreakClass(string[i])];
|
||||
if (brk == SB_Lookup) {
|
||||
brk = SB_Break;
|
||||
quint32 lookahead = i + 1;
|
||||
while (lookahead < len) {
|
||||
quint32 sbrk = QUnicodeTables::sentenceBreakClass(string[lookahead]);
|
||||
if (sbrk != QUnicodeTables::SentenceBreakOther
|
||||
&& sbrk != QUnicodeTables::SentenceBreakNumeric
|
||||
&& sbrk != QUnicodeTables::SentenceBreakClose) {
|
||||
break;
|
||||
} else if (sbrk == QUnicodeTables::SentenceBreakLower) {
|
||||
brk = SB_Initial;
|
||||
break;
|
||||
}
|
||||
++lookahead;
|
||||
}
|
||||
if (brk == SB_Initial) {
|
||||
while (i < lookahead)
|
||||
attributes[i++].sentenceBoundary = false;
|
||||
}
|
||||
}
|
||||
if (brk == SB_Break) {
|
||||
attributes[i].sentenceBoundary = true;
|
||||
brk = sentenceBreakTable[SB_Initial][QUnicodeTables::sentenceBreakClass(string[i])];
|
||||
} else {
|
||||
attributes[i].sentenceBoundary = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
|
||||
Q_CORE_EXPORT void qGetCharAttributes(const ushort *string, int length,
|
||||
const HB_ScriptItem *items, int numItems,
|
||||
HB_CharAttributes *attributes, QCharAttributeOptions options)
|
||||
{
|
||||
if (length <= 0)
|
||||
return;
|
||||
|
||||
memset(attributes, 0, length * sizeof(HB_CharAttributes));
|
||||
|
||||
calcGraphemeAndLineBreaks(string, length, attributes);
|
||||
if (options & GetWordBreaks)
|
||||
calcWordBreaks(string, length, attributes);
|
||||
if (options & GetSentenceBreaks)
|
||||
calcSentenceBreaks(string, length, attributes);
|
||||
|
||||
HB_GetTailoredCharAttributes(string, length, items, numItems, attributes);
|
||||
}
|
||||
|
||||
QT_END_NAMESPACE
|
76
src/corelib/tools/qunicodetools_p.h
Normal file
76
src/corelib/tools/qunicodetools_p.h
Normal file
@ -0,0 +1,76 @@
|
||||
/****************************************************************************
|
||||
**
|
||||
** Copyright (C) 2012 Nokia Corporation and/or its subsidiary(-ies).
|
||||
** Contact: http://www.qt-project.org/
|
||||
**
|
||||
** This file is part of the QtCore module of the Qt Toolkit.
|
||||
**
|
||||
** $QT_BEGIN_LICENSE:LGPL$
|
||||
** GNU Lesser General Public License Usage
|
||||
** This file may be used under the terms of the GNU Lesser General Public
|
||||
** License version 2.1 as published by the Free Software Foundation and
|
||||
** appearing in the file LICENSE.LGPL included in the packaging of this
|
||||
** file. Please review the following information to ensure the GNU Lesser
|
||||
** General Public License version 2.1 requirements will be met:
|
||||
** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
|
||||
**
|
||||
** In addition, as a special exception, Nokia gives you certain additional
|
||||
** rights. These rights are described in the Nokia Qt LGPL Exception
|
||||
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
|
||||
**
|
||||
** GNU General Public License Usage
|
||||
** Alternatively, this file may be used under the terms of the GNU General
|
||||
** Public License version 3.0 as published by the Free Software Foundation
|
||||
** and appearing in the file LICENSE.GPL included in the packaging of this
|
||||
** file. Please review the following information to ensure the GNU General
|
||||
** Public License version 3.0 requirements will be met:
|
||||
** http://www.gnu.org/copyleft/gpl.html.
|
||||
**
|
||||
** Other Usage
|
||||
** Alternatively, this file may be used in accordance with the terms and
|
||||
** conditions contained in a signed written agreement between you and Nokia.
|
||||
**
|
||||
**
|
||||
**
|
||||
**
|
||||
**
|
||||
**
|
||||
** $QT_END_LICENSE$
|
||||
**
|
||||
****************************************************************************/
|
||||
|
||||
#ifndef QUNICODETOOLS_P_H
|
||||
#define QUNICODETOOLS_P_H
|
||||
|
||||
//
|
||||
// W A R N I N G
|
||||
// -------------
|
||||
//
|
||||
// This file is not part of the Qt API. It exists for the convenience
|
||||
// of other Qt classes. This header file may change from version to
|
||||
// version without notice, or even be removed.
|
||||
//
|
||||
// We mean it.
|
||||
//
|
||||
|
||||
#include <QtCore/qglobal.h>
|
||||
#include <harfbuzz-shaper.h>
|
||||
|
||||
QT_BEGIN_NAMESPACE
|
||||
|
||||
Q_DECLARE_TYPEINFO(HB_CharAttributes, Q_PRIMITIVE_TYPE);
|
||||
Q_DECLARE_TYPEINFO(HB_ScriptItem, Q_PRIMITIVE_TYPE);
|
||||
|
||||
enum QCharAttributeOption {
|
||||
GetWordBreaks = 1,
|
||||
GetSentenceBreaks = 2
|
||||
};
|
||||
Q_DECLARE_FLAGS(QCharAttributeOptions, QCharAttributeOption)
|
||||
|
||||
Q_CORE_EXPORT void qGetCharAttributes(const ushort *string, int length,
|
||||
const HB_ScriptItem *items, int numItems,
|
||||
HB_CharAttributes *attributes, QCharAttributeOptions options = QFlag(0));
|
||||
|
||||
QT_END_NAMESPACE
|
||||
|
||||
#endif // QUNICODETOOLS_P_H
|
@ -55,6 +55,7 @@ HEADERS += \
|
||||
tools/qtimeline.h \
|
||||
tools/qelapsedtimer.h \
|
||||
tools/qunicodetables_p.h \
|
||||
tools/qunicodetools_p.h \
|
||||
tools/qvarlengtharray.h \
|
||||
tools/qvector.h
|
||||
|
||||
@ -92,6 +93,7 @@ SOURCES += \
|
||||
tools/qstringlist.cpp \
|
||||
tools/qtextboundaryfinder.cpp \
|
||||
tools/qtimeline.cpp \
|
||||
tools/qunicodetools.cpp \
|
||||
tools/qvector.cpp \
|
||||
tools/qvsnprintf.cpp
|
||||
|
||||
|
@ -52,6 +52,7 @@
|
||||
#include "qfontengine_p.h"
|
||||
#include "qstring.h"
|
||||
#include <private/qunicodetables_p.h>
|
||||
#include <private/qunicodetools_p.h>
|
||||
#include "qtextdocument_p.h"
|
||||
#include "qrawfont.h"
|
||||
#include "qrawfont_p.h"
|
||||
|
@ -42,6 +42,7 @@
|
||||
#include <QtTest/QtTest>
|
||||
|
||||
#include <qtextboundaryfinder.h>
|
||||
#include <qtextcodec.h>
|
||||
#include <qfile.h>
|
||||
#include <qdebug.h>
|
||||
|
||||
@ -61,6 +62,7 @@ private slots:
|
||||
void toNextBoundary();
|
||||
void toPreviousBoundary_data();
|
||||
void toPreviousBoundary();
|
||||
void thaiLineBreak();
|
||||
};
|
||||
|
||||
void tst_QTextBoundaryFinder::init()
|
||||
@ -382,7 +384,95 @@ void tst_QTextBoundaryFinder::toPreviousBoundary()
|
||||
QCOMPARE(boundaries, foundBoundaries);
|
||||
}
|
||||
|
||||
#include <qlibrary.h>
|
||||
|
||||
#define LIBTHAI_MAJOR 0
|
||||
typedef int (*th_brk_def) (const unsigned char*, int*, size_t);
|
||||
static th_brk_def th_brk = 0;
|
||||
|
||||
static bool init_libthai()
|
||||
{
|
||||
#if !defined(QT_NO_LIBRARY)
|
||||
static bool triedResolve = false;
|
||||
if (!triedResolve) {
|
||||
th_brk = (th_brk_def) QLibrary::resolve("thai", (int)LIBTHAI_MAJOR, "th_brk");
|
||||
triedResolve = true;
|
||||
}
|
||||
#endif
|
||||
return th_brk != 0;
|
||||
}
|
||||
|
||||
void tst_QTextBoundaryFinder::thaiLineBreak()
|
||||
{
|
||||
if (!init_libthai())
|
||||
QSKIP("This test requires libThai-0.1.1x to be installed.");
|
||||
#if 0
|
||||
// สวัสดีครับ นี่เป็นการงทดสอบตัวเอ
|
||||
QTextCodec *codec = QTextCodec::codecForMib(2259);
|
||||
QString text = codec->toUnicode(QByteArray("\xca\xc7\xd1\xca\xb4\xd5\xa4\xc3\xd1\xba\x20\xb9\xd5\xe8\xe0\xbb\xe7\xb9\xa1\xd2\xc3\xb7\xb4\xca\xcd\xba\xb5\xd1\xc7\xe0\xcd\xa7"));
|
||||
QCOMPARE(text.length(), 32);
|
||||
|
||||
QTextBoundaryFinder finder(QTextBoundaryFinder::Line, text);
|
||||
finder.setPosition(0);
|
||||
QVERIFY(finder.isAtBoundary());
|
||||
finder.setPosition(1);
|
||||
QVERIFY(!finder.isAtBoundary());
|
||||
finder.setPosition(2);
|
||||
QVERIFY(!finder.isAtBoundary());
|
||||
finder.setPosition(3);
|
||||
QVERIFY(!finder.isAtBoundary());
|
||||
finder.setPosition(4);
|
||||
QVERIFY(!finder.isAtBoundary());
|
||||
finder.setPosition(5);
|
||||
QVERIFY(!finder.isAtBoundary());
|
||||
finder.setPosition(6);
|
||||
QVERIFY(finder.isAtBoundary());
|
||||
finder.setPosition(7);
|
||||
QVERIFY(finder.isAtBoundary());
|
||||
finder.setPosition(8);
|
||||
QVERIFY(!finder.isAtBoundary());
|
||||
finder.setPosition(9);
|
||||
QVERIFY(!finder.isAtBoundary());
|
||||
finder.setPosition(10);
|
||||
QVERIFY(!finder.isAtBoundary());
|
||||
finder.setPosition(11);
|
||||
QVERIFY(finder.isAtBoundary());
|
||||
finder.setPosition(12);
|
||||
QVERIFY(!finder.isAtBoundary());
|
||||
finder.setPosition(13);
|
||||
QVERIFY(!finder.isAtBoundary());
|
||||
finder.setPosition(14);
|
||||
QVERIFY(finder.isAtBoundary());
|
||||
finder.setPosition(15);
|
||||
QVERIFY(!finder.isAtBoundary());
|
||||
finder.setPosition(16);
|
||||
QVERIFY(!finder.isAtBoundary());
|
||||
finder.setPosition(17);
|
||||
QVERIFY(!finder.isAtBoundary());
|
||||
finder.setPosition(18);
|
||||
QVERIFY(finder.isAtBoundary());
|
||||
finder.setPosition(19);
|
||||
QVERIFY(!finder.isAtBoundary());
|
||||
finder.setPosition(20);
|
||||
QVERIFY(finder.isAtBoundary());
|
||||
finder.setPosition(21);
|
||||
QVERIFY(finder.isAtBoundary());
|
||||
finder.setPosition(22);
|
||||
QVERIFY(!finder.isAtBoundary());
|
||||
finder.setPosition(23);
|
||||
QVERIFY(!finder.isAtBoundary());
|
||||
finder.setPosition(24);
|
||||
QVERIFY(!finder.isAtBoundary());
|
||||
finder.setPosition(25);
|
||||
QVERIFY(finder.isAtBoundary());
|
||||
finder.setPosition(26);
|
||||
QVERIFY(finder.isAtBoundary());
|
||||
for (int i = 27; i < 32; ++i) {
|
||||
finder.setPosition(i);
|
||||
QVERIFY(!finder.isAtBoundary());
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
QTEST_MAIN(tst_QTextBoundaryFinder)
|
||||
|
Loading…
Reference in New Issue
Block a user