diff --git a/src/3rdparty/harfbuzz/src/harfbuzz-external.h b/src/3rdparty/harfbuzz/src/harfbuzz-external.h index 1f7ae1c902..8daf8a69e9 100644 --- a/src/3rdparty/harfbuzz/src/harfbuzz-external.h +++ b/src/3rdparty/harfbuzz/src/harfbuzz-external.h @@ -34,23 +34,7 @@ HB_BEGIN_HEADER They need to be provided by the application/library */ - -/* - see http://www.unicode.org/reports/tr14/tr14-19.html - we don't use the XX, AI and CB properties and map them to AL instead. - as we don't support any EBDIC based OS'es, NL is ignored and mapped to AL as well. -*/ -typedef enum { - HB_LineBreak_OP, HB_LineBreak_CL, HB_LineBreak_QU, HB_LineBreak_GL, HB_LineBreak_NS, - HB_LineBreak_EX, HB_LineBreak_SY, HB_LineBreak_IS, HB_LineBreak_PR, HB_LineBreak_PO, - HB_LineBreak_NU, HB_LineBreak_AL, HB_LineBreak_ID, HB_LineBreak_IN, HB_LineBreak_HY, - HB_LineBreak_BA, HB_LineBreak_BB, HB_LineBreak_B2, HB_LineBreak_ZW, HB_LineBreak_CM, - HB_LineBreak_WJ, HB_LineBreak_H2, HB_LineBreak_H3, HB_LineBreak_JL, HB_LineBreak_JV, - HB_LineBreak_JT, HB_LineBreak_SA, HB_LineBreak_SG, - HB_LineBreak_SP, HB_LineBreak_CR, HB_LineBreak_LF, HB_LineBreak_BK -} HB_LineBreakClass; - -typedef enum +typedef enum { HB_Mark_NonSpacing, /* Mn */ HB_Mark_SpacingCombining, /* Mc */ @@ -90,55 +74,6 @@ typedef enum HB_Symbol_Other /* So */ } HB_CharCategory; -typedef enum -{ - HB_Grapheme_Other, - HB_Grapheme_CR, - HB_Grapheme_LF, - HB_Grapheme_Control, - HB_Grapheme_Extend, - HB_Grapheme_L, - HB_Grapheme_V, - HB_Grapheme_T, - HB_Grapheme_LV, - HB_Grapheme_LVT -} HB_GraphemeClass; - - -typedef enum -{ - HB_Word_Other, - HB_Word_Format, - HB_Word_Katakana, - HB_Word_ALetter, - HB_Word_MidLetter, - HB_Word_MidNum, - HB_Word_Numeric, - HB_Word_ExtendNumLet -} HB_WordClass; - - -typedef enum -{ - HB_Sentence_Other, - HB_Sentence_Sep, - HB_Sentence_Format, - HB_Sentence_Sp, - HB_Sentence_Lower, - HB_Sentence_Upper, - HB_Sentence_OLetter, - HB_Sentence_Numeric, - HB_Sentence_ATerm, - HB_Sentence_STerm, - HB_Sentence_Close -} HB_SentenceClass; - -HB_GraphemeClass HB_GetGraphemeClass(HB_UChar32 ch); -HB_WordClass HB_GetWordClass(HB_UChar32 ch); -HB_SentenceClass HB_GetSentenceClass(HB_UChar32 ch); -HB_LineBreakClass HB_GetLineBreakClass(HB_UChar32 ch); - -void HB_GetGraphemeAndLineBreakClass(HB_UChar32 ch, HB_GraphemeClass *grapheme, HB_LineBreakClass *lineBreak); void HB_GetUnicodeCharProperties(HB_UChar32 ch, HB_CharCategory *category, int *combiningClass); HB_CharCategory HB_GetUnicodeCharCategory(HB_UChar32 ch); int HB_GetUnicodeCharCombiningClass(HB_UChar32 ch); diff --git a/src/3rdparty/harfbuzz/src/harfbuzz-shaper.cpp b/src/3rdparty/harfbuzz/src/harfbuzz-shaper.cpp index f6900325bc..2e1b5322d2 100644 --- a/src/3rdparty/harfbuzz/src/harfbuzz-shaper.cpp +++ b/src/3rdparty/harfbuzz/src/harfbuzz-shaper.cpp @@ -32,205 +32,6 @@ #define HB_MIN(a, b) ((a) < (b) ? (a) : (b)) #define HB_MAX(a, b) ((a) > (b) ? (a) : (b)) -// ----------------------------------------------------------------------------------------------------- -// -// The line break algorithm. See http://www.unicode.org/reports/tr14/tr14-13.html -// -// ----------------------------------------------------------------------------------------------------- - -/* The Unicode algorithm does in our opinion allow line breaks at some - places they shouldn't be allowed. The following changes were thus - made in comparison to the Unicode reference: - - EX->AL from DB to IB - SY->AL from DB to IB - SY->PO from DB to IB - SY->PR from DB to IB - SY->OP from DB to IB - AL->PR from DB to IB - AL->PO from DB to IB - PR->PR from DB to IB - PO->PO from DB to IB - PR->PO from DB to IB - PO->PR from DB to IB - HY->PO from DB to IB - HY->PR from DB to IB - HY->OP from DB to IB - NU->EX from PB to IB - EX->PO from DB to IB -*/ - -// The following line break classes are not treated by the table: -// AI, BK, CB, CR, LF, NL, SA, SG, SP, XX - -enum break_class { - // the first 4 values have to agree with the enum in QCharAttributes - ProhibitedBreak, // PB in table - DirectBreak, // DB in table - IndirectBreak, // IB in table - CombiningIndirectBreak, // CI in table - CombiningProhibitedBreak // CP in table -}; -#define DB DirectBreak -#define IB IndirectBreak -#define CI CombiningIndirectBreak -#define CP CombiningProhibitedBreak -#define PB ProhibitedBreak - -static const hb_uint8 breakTable[HB_LineBreak_JT+1][HB_LineBreak_JT+1] = -{ -/* OP CL QU GL NS EX SY IS PR PO NU AL ID IN HY BA BB B2 ZW CM WJ H2 H3 JL JV JT */ -/* OP */ { PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, CP, PB, PB, PB, PB, PB, PB }, -/* CL */ { DB, PB, IB, IB, PB, PB, PB, PB, IB, IB, IB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB }, -/* QU */ { PB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, PB, CI, PB, IB, IB, IB, IB, IB }, -/* GL */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, PB, CI, PB, IB, IB, IB, IB, IB }, -/* NS */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, DB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB }, -/* EX */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB }, -/* SY */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB }, -/* IS */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, DB, IB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB }, -/* PR */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, DB, IB, IB, DB, DB, PB, CI, PB, IB, IB, IB, IB, IB }, -/* PO */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB }, -/* NU */ { IB, PB, IB, IB, IB, IB, PB, PB, IB, IB, IB, IB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB }, -/* AL */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB }, -/* ID */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB }, -/* IN */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB }, -/* HY */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, DB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB }, -/* BA */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, DB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB }, -/* BB */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, PB, CI, PB, IB, IB, IB, IB, IB }, -/* B2 */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, DB, DB, DB, IB, IB, DB, PB, PB, CI, PB, DB, DB, DB, DB, DB }, -/* ZW */ { DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, PB, DB, DB, DB, DB, DB, DB, DB }, -/* CM */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, DB, IB, IB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB }, -/* WJ */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, PB, CI, PB, IB, IB, IB, IB, IB }, -/* H2 */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, IB, IB }, -/* H3 */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, IB }, -/* JL */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, IB, IB, IB, IB, DB }, -/* JV */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, IB, IB }, -/* JT */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, IB } -}; -#undef DB -#undef IB -#undef CI -#undef CP -#undef PB - -static const hb_uint8 graphemeTable[HB_Grapheme_LVT + 1][HB_Grapheme_LVT + 1] = -{ -// Other, CR, LF, Control,Extend,L, V, T, LV, LVT - { true , true , true , true , true , true , true , true , true , true }, // Other, - { true , true , true , true , true , true , true , true , true , true }, // CR, - { true , false, true , true , true , true , true , true , true , true }, // LF, - { true , true , true , true , true , true , true , true , true , true }, // Control, - { false, true , true , true , false, false, false, false, false, false }, // Extend, - { true , true , true , true , true , false, true , true , true , true }, // L, - { true , true , true , true , true , false, false, true , false, true }, // V, - { true , true , true , true , true , true , false, false, false, false }, // T, - { true , true , true , true , true , false, true , true , true , true }, // LV, - { true , true , true , true , true , false, true , true , true , true }, // LVT -}; - -static void calcLineBreaks(const HB_UChar16 *uc, hb_uint32 len, HB_CharAttributes *charAttributes) -{ - if (!len) - return; - - // ##### can this fail if the first char is a surrogate? - HB_LineBreakClass cls; - HB_GraphemeClass grapheme; - HB_GetGraphemeAndLineBreakClass(*uc, &grapheme, &cls); - // handle case where input starts with an LF - if (cls == HB_LineBreak_LF) - cls = HB_LineBreak_BK; - - charAttributes[0].whiteSpace = (cls == HB_LineBreak_SP || cls == HB_LineBreak_BK); - charAttributes[0].charStop = true; - - int lcls = cls; - for (hb_uint32 i = 1; i < len; ++i) { - charAttributes[i].whiteSpace = false; - charAttributes[i].charStop = true; - - HB_UChar32 code = uc[i]; - HB_GraphemeClass ngrapheme; - HB_LineBreakClass ncls; - HB_GetGraphemeAndLineBreakClass(code, &ngrapheme, &ncls); - charAttributes[i].charStop = graphemeTable[ngrapheme][grapheme]; - // handle surrogates - if (ncls == HB_LineBreak_SG) { - if (HB_IsHighSurrogate(uc[i]) && i < len - 1 && HB_IsLowSurrogate(uc[i+1])) { - continue; - } else if (HB_IsLowSurrogate(uc[i]) && HB_IsHighSurrogate(uc[i-1])) { - code = HB_SurrogateToUcs4(uc[i-1], uc[i]); - HB_GetGraphemeAndLineBreakClass(code, &ngrapheme, &ncls); - charAttributes[i].charStop = false; - } else { - ncls = HB_LineBreak_AL; - } - } - - // set white space and char stop flag - if (ncls >= HB_LineBreak_SP) - charAttributes[i].whiteSpace = true; - - HB_LineBreakType lineBreakType = HB_NoBreak; - if (cls >= HB_LineBreak_LF) { - lineBreakType = HB_ForcedBreak; - } else if(cls == HB_LineBreak_CR) { - lineBreakType = (ncls == HB_LineBreak_LF) ? HB_NoBreak : HB_ForcedBreak; - } - - if (ncls == HB_LineBreak_SP) - goto next_no_cls_update; - if (ncls >= HB_LineBreak_CR) - goto next; - - { - int tcls = ncls; - // for south east asian chars that require a complex (dictionary analysis), the unicode - // standard recommends to treat them as AL. thai_attributes and other attribute methods that - // do dictionary analysis can override - if (tcls >= HB_LineBreak_SA) - tcls = HB_LineBreak_AL; - if (cls >= HB_LineBreak_SA) - cls = HB_LineBreak_AL; - - int brk = breakTable[cls][tcls]; - switch (brk) { - case DirectBreak: - lineBreakType = HB_Break; - if (uc[i-1] == 0xad) // soft hyphen - lineBreakType = HB_SoftHyphen; - break; - case IndirectBreak: - lineBreakType = (lcls == HB_LineBreak_SP) ? HB_Break : HB_NoBreak; - break; - case CombiningIndirectBreak: - lineBreakType = HB_NoBreak; - if (lcls == HB_LineBreak_SP){ - if (i > 1) - charAttributes[i-2].lineBreakType = HB_Break; - } else { - goto next_no_cls_update; - } - break; - case CombiningProhibitedBreak: - lineBreakType = HB_NoBreak; - if (lcls != HB_LineBreak_SP) - goto next_no_cls_update; - case ProhibitedBreak: - default: - break; - } - } - next: - cls = ncls; - next_no_cls_update: - lcls = ncls; - grapheme = ngrapheme; - charAttributes[i-1].lineBreakType = lineBreakType; - } - charAttributes[len-1].lineBreakType = HB_ForcedBreak; -} - // -------------------------------------------------------------------------------------------------------------------------------------------- // // Basic processing @@ -679,13 +480,12 @@ const HB_ScriptEngine HB_ScriptEngines[] = { { HB_ArabicShape, 0} }; -void HB_GetCharAttributes(const HB_UChar16 *string, hb_uint32 stringLength, - const HB_ScriptItem *items, hb_uint32 numItems, - HB_CharAttributes *attributes) +void HB_GetTailoredCharAttributes(const HB_UChar16 *string, hb_uint32 stringLength, + const HB_ScriptItem *items, hb_uint32 numItems, + HB_CharAttributes *attributes) { - memset(attributes, 0, stringLength * sizeof(HB_CharAttributes)); - calcLineBreaks(string, stringLength, attributes); - + if (stringLength == 0) + return; for (hb_uint32 i = 0; i < numItems; ++i) { HB_Script script = items[i].script; if (script == HB_Script_Inherited) @@ -698,136 +498,6 @@ void HB_GetCharAttributes(const HB_UChar16 *string, hb_uint32 stringLength, } -enum BreakRule { NoBreak = 0, Break = 1, Middle = 2 }; - -static const hb_uint8 wordbreakTable[HB_Word_ExtendNumLet + 1][HB_Word_ExtendNumLet + 1] = { -// Other Format Katakana ALetter MidLetter MidNum Numeric ExtendNumLet - { Break, Break, Break, Break, Break, Break, Break, Break }, // Other - { Break, Break, Break, Break, Break, Break, Break, Break }, // Format - { Break, Break, NoBreak, Break, Break, Break, Break, NoBreak }, // Katakana - { Break, Break, Break, NoBreak, Middle, Break, NoBreak, NoBreak }, // ALetter - { Break, Break, Break, Break, Break, Break, Break, Break }, // MidLetter - { Break, Break, Break, Break, Break, Break, Break, Break }, // MidNum - { Break, Break, Break, NoBreak, Break, Middle, NoBreak, NoBreak }, // Numeric - { Break, Break, NoBreak, NoBreak, Break, Break, NoBreak, NoBreak }, // ExtendNumLet -}; - -void HB_GetWordBoundaries(const HB_UChar16 *string, hb_uint32 stringLength, - const HB_ScriptItem * /*items*/, hb_uint32 /*numItems*/, - HB_CharAttributes *attributes) -{ - if (stringLength == 0) - return; - unsigned int brk = HB_GetWordClass(string[0]); - attributes[0].wordBoundary = true; - for (hb_uint32 i = 1; i < stringLength; ++i) { - if (!attributes[i].charStop) { - attributes[i].wordBoundary = false; - continue; - } - hb_uint32 nbrk = HB_GetWordClass(string[i]); - if (nbrk == HB_Word_Format) { - attributes[i].wordBoundary = (HB_GetSentenceClass(string[i-1]) == HB_Sentence_Sep); - continue; - } - BreakRule rule = (BreakRule)wordbreakTable[brk][nbrk]; - if (rule == Middle) { - rule = Break; - hb_uint32 lookahead = i + 1; - while (lookahead < stringLength) { - hb_uint32 testbrk = HB_GetWordClass(string[lookahead]); - if (testbrk == HB_Word_Format && HB_GetSentenceClass(string[lookahead]) != HB_Sentence_Sep) { - ++lookahead; - continue; - } - if (testbrk == brk) { - rule = NoBreak; - while (i < lookahead) - attributes[i++].wordBoundary = false; - nbrk = testbrk; - } - break; - } - } - attributes[i].wordBoundary = (rule == Break); - brk = nbrk; - } -} - - -enum SentenceBreakStates { - SB_Initial, - SB_Upper, - SB_UpATerm, - SB_ATerm, - SB_ATermC, - SB_ACS, - SB_STerm, - SB_STermC, - SB_SCS, - SB_BAfter, - SB_Break, - SB_Look -}; - -static const hb_uint8 sentenceBreakTable[HB_Sentence_Close + 1][HB_Sentence_Close + 1] = { -// Other Sep Format Sp Lower Upper OLetter Numeric ATerm STerm Close - { SB_Initial, SB_BAfter , SB_Initial, SB_Initial, SB_Initial, SB_Upper , SB_Initial, SB_Initial, SB_ATerm , SB_STerm , SB_Initial }, // SB_Initial, - { SB_Initial, SB_BAfter , SB_Upper , SB_Initial, SB_Initial, SB_Upper , SB_Initial, SB_Initial, SB_UpATerm, SB_STerm , SB_Initial }, // SB_Upper - - { SB_Look , SB_BAfter , SB_UpATerm, SB_ACS , SB_Initial, SB_Upper , SB_Break , SB_Initial, SB_ATerm , SB_STerm , SB_ATermC }, // SB_UpATerm - { SB_Look , SB_BAfter , SB_ATerm , SB_ACS , SB_Initial, SB_Break , SB_Break , SB_Initial, SB_ATerm , SB_STerm , SB_ATermC }, // SB_ATerm - { SB_Look , SB_BAfter , SB_ATermC , SB_ACS , SB_Initial, SB_Break , SB_Break , SB_Look , SB_ATerm , SB_STerm , SB_ATermC }, // SB_ATermC, - { SB_Look , SB_BAfter , SB_ACS , SB_ACS , SB_Initial, SB_Break , SB_Break , SB_Look , SB_ATerm , SB_STerm , SB_Look }, // SB_ACS, - - { SB_Break , SB_BAfter , SB_STerm , SB_SCS , SB_Break , SB_Break , SB_Break , SB_Break , SB_ATerm , SB_STerm , SB_STermC }, // SB_STerm, - { SB_Break , SB_BAfter , SB_STermC , SB_SCS , SB_Break , SB_Break , SB_Break , SB_Break , SB_ATerm , SB_STerm , SB_STermC }, // SB_STermC, - { SB_Break , SB_BAfter , SB_SCS , SB_SCS , SB_Break , SB_Break , SB_Break , SB_Break , SB_ATerm , SB_STerm , SB_Break }, // SB_SCS, - { SB_Break , SB_Break , SB_Break , SB_Break , SB_Break , SB_Break , SB_Break , SB_Break , SB_Break , SB_Break , SB_Break }, // SB_BAfter, -}; - -void HB_GetSentenceBoundaries(const HB_UChar16 *string, hb_uint32 stringLength, - const HB_ScriptItem * /*items*/, hb_uint32 /*numItems*/, - HB_CharAttributes *attributes) -{ - if (stringLength == 0) - return; - hb_uint32 brk = sentenceBreakTable[SB_Initial][HB_GetSentenceClass(string[0])]; - attributes[0].sentenceBoundary = true; - for (hb_uint32 i = 1; i < stringLength; ++i) { - if (!attributes[i].charStop) { - attributes[i].sentenceBoundary = false; - continue; - } - brk = sentenceBreakTable[brk][HB_GetSentenceClass(string[i])]; - if (brk == SB_Look) { - brk = SB_Break; - hb_uint32 lookahead = i + 1; - while (lookahead < stringLength) { - hb_uint32 sbrk = HB_GetSentenceClass(string[lookahead]); - if (sbrk != HB_Sentence_Other && sbrk != HB_Sentence_Numeric && sbrk != HB_Sentence_Close) { - break; - } else if (sbrk == HB_Sentence_Lower) { - brk = SB_Initial; - break; - } - ++lookahead; - } - if (brk == SB_Initial) { - while (i < lookahead) - attributes[i++].sentenceBoundary = false; - } - } - if (brk == SB_Break) { - attributes[i].sentenceBoundary = true; - brk = sentenceBreakTable[SB_Initial][HB_GetSentenceClass(string[i])]; - } else { - attributes[i].sentenceBoundary = false; - } - } -} - - static inline char *tag_to_string(HB_UInt tag) { static char string[5]; diff --git a/src/3rdparty/harfbuzz/src/harfbuzz-shaper.h b/src/3rdparty/harfbuzz/src/harfbuzz-shaper.h index f225a86525..6dfcdd20a7 100644 --- a/src/3rdparty/harfbuzz/src/harfbuzz-shaper.h +++ b/src/3rdparty/harfbuzz/src/harfbuzz-shaper.h @@ -143,19 +143,9 @@ typedef struct { hb_bitfield unused :2; } HB_CharAttributes; -void HB_GetCharAttributes(const HB_UChar16 *string, hb_uint32 stringLength, - const HB_ScriptItem *items, hb_uint32 numItems, - HB_CharAttributes *attributes); - -/* requires HB_GetCharAttributes to be called before */ -void HB_GetWordBoundaries(const HB_UChar16 *string, hb_uint32 stringLength, - const HB_ScriptItem *items, hb_uint32 numItems, - HB_CharAttributes *attributes); - -/* requires HB_GetCharAttributes to be called before */ -void HB_GetSentenceBoundaries(const HB_UChar16 *string, hb_uint32 stringLength, - const HB_ScriptItem *items, hb_uint32 numItems, - HB_CharAttributes *attributes); +void HB_GetTailoredCharAttributes(const HB_UChar16 *string, hb_uint32 stringLength, + const HB_ScriptItem *items, hb_uint32 numItems, + HB_CharAttributes *attributes); typedef enum { diff --git a/src/3rdparty/harfbuzz/tests/Makefile.am b/src/3rdparty/harfbuzz/tests/Makefile.am index febf8909fa..9196ad730f 100644 --- a/src/3rdparty/harfbuzz/tests/Makefile.am +++ b/src/3rdparty/harfbuzz/tests/Makefile.am @@ -2,6 +2,6 @@ SUBDIRS = if QT -SUBDIRS += linebreaking shaping +SUBDIRS += shaping endif diff --git a/src/3rdparty/harfbuzz/tests/linebreaking/.gitignore b/src/3rdparty/harfbuzz/tests/linebreaking/.gitignore deleted file mode 100644 index 81e019d5b9..0000000000 --- a/src/3rdparty/harfbuzz/tests/linebreaking/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ -.deps -linebreaking -*.moc -*.o diff --git a/src/3rdparty/harfbuzz/tests/linebreaking/Makefile.am b/src/3rdparty/harfbuzz/tests/linebreaking/Makefile.am deleted file mode 100644 index b710896d6f..0000000000 --- a/src/3rdparty/harfbuzz/tests/linebreaking/Makefile.am +++ /dev/null @@ -1,12 +0,0 @@ - -check_PROGRAMS = linebreaking - -linebreaking_SOURCES = main.cpp harfbuzz-qt.cpp -linebreaking_LDADD = $(QT_GUI_LIBS) $(QT_QTEST_LIBS) ../../src/libharfbuzz-1.la - -main.o: main.moc - -main.moc: $(srcdir)/main.cpp - $(QT_MOC) -o main.moc $(srcdir)/main.cpp - -INCLUDES = -I$(top_srcdir)/src $(FREETYPE_CFLAGS) $(QT_GUI_CFLAGS) $(QT_QTEST_CFLAGS) diff --git a/src/3rdparty/harfbuzz/tests/linebreaking/main.cpp b/src/3rdparty/harfbuzz/tests/linebreaking/main.cpp deleted file mode 100644 index 3b2734ac02..0000000000 --- a/src/3rdparty/harfbuzz/tests/linebreaking/main.cpp +++ /dev/null @@ -1,230 +0,0 @@ -/* - * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies) - * - * This is part of HarfBuzz, an OpenType Layout engine library. - * - * Permission is hereby granted, without written agreement and without - * license or royalty fees, to use, copy, modify, and distribute this - * software and its documentation for any purpose, provided that the - * above copyright notice and the following two paragraphs appear in - * all copies of this software. - * - * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR - * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES - * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN - * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH - * DAMAGE. - * - * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, - * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND - * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS - * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO - * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. - */ - -/* - !!!!!! Warning !!!!! - Please don't save this file in emacs. It contains utf8 text sequences emacs will - silently convert to a series of question marks. - */ -#include -#include - -#include - -static QVector getCharAttributes(const QString &str, HB_Script script = HB_Script_Common) -{ - QVector attrs(str.length()); - HB_ScriptItem item; - item.pos = 0; - item.length = str.length(); - item.script = script; - HB_GetCharAttributes(str.utf16(), str.length(), - &item, 1, - attrs.data()); - return attrs; -} - -class tst_CharAttributes : public QObject -{ - Q_OBJECT - -public: - tst_CharAttributes(); - virtual ~tst_CharAttributes(); - -public slots: - void init(); - void cleanup(); -private slots: - void lineBreaking(); - void charWordStopOnLineSeparator(); - void charStopForSurrogatePairs(); - void thaiWordBreak(); -}; - - -tst_CharAttributes::tst_CharAttributes() -{ -} - -tst_CharAttributes::~tst_CharAttributes() -{ -} - -void tst_CharAttributes::init() -{ -} - -void tst_CharAttributes::cleanup() -{ -} - - -void tst_CharAttributes::lineBreaking() -{ - struct Breaks { - const char *utf8; - uchar breaks[32]; - }; - Breaks brks[] = { - { "11", { false, 0xff } }, - { "aa", { false, 0xff } }, - { "++", { false, 0xff } }, - { "--", { false, 0xff } }, - { "((", { false, 0xff } }, - { "))", { false, 0xff } }, - { "..", { false, 0xff } }, - { "\"\"", { false, 0xff } }, - { "$$", { false, 0xff } }, - { "!!", { false, 0xff } }, - { "??", { false, 0xff } }, - { ",,", { false, 0xff } }, - - { ")()", { true, false, 0xff } }, - { "?!?", { false, false, 0xff } }, - { ".,.", { false, false, 0xff } }, - { "+-+", { false, false, 0xff } }, - { "+=+", { false, false, 0xff } }, - { "+(+", { false, false, 0xff } }, - { "+)+", { false, false, 0xff } }, - - { "a b", { false, true, 0xff } }, - { "a(b", { false, false, 0xff } }, - { "a)b", { false, false, 0xff } }, - { "a-b", { false, true, 0xff } }, - { "a.b", { false, false, 0xff } }, - { "a+b", { false, false, 0xff } }, - { "a?b", { false, false, 0xff } }, - { "a!b", { false, false, 0xff } }, - { "a$b", { false, false, 0xff } }, - { "a,b", { false, false, 0xff } }, - { "a/b", { false, false, 0xff } }, - { "1/2", { false, false, 0xff } }, - { "./.", { false, false, 0xff } }, - { ",/,", { false, false, 0xff } }, - { "!/!", { false, false, 0xff } }, - { "\\/\\", { false, false, 0xff } }, - { "1 2", { false, true, 0xff } }, - { "1(2", { false, false, 0xff } }, - { "1)2", { false, false, 0xff } }, - { "1-2", { false, false, 0xff } }, - { "1.2", { false, false, 0xff } }, - { "1+2", { false, false, 0xff } }, - { "1?2", { false, true, 0xff } }, - { "1!2", { false, true, 0xff } }, - { "1$2", { false, false, 0xff } }, - { "1,2", { false, false, 0xff } }, - { "1/2", { false, false, 0xff } }, - { "\330\260\331\216\331\204\331\220\331\203\331\216", { false, false, false, false, false, 0xff } }, - { "\330\247\331\204\331\205 \330\247\331\204\331\205", { false, false, false, true, false, false, 0xff } }, - { "1#2", { false, false, 0xff } }, - { "!#!", { false, false, 0xff } }, - { 0, {} } - }; - Breaks *b = brks; - while (b->utf8) { - QString str = QString::fromUtf8(b->utf8); - - QVector attrs = getCharAttributes(str); - - int i; - for (i = 0; i < (int)str.length() - 1; ++i) { - QVERIFY(b->breaks[i] != 0xff); - if ( (attrs[i].lineBreakType != HB_NoBreak) != (bool)b->breaks[i] ) { - qDebug("test case \"%s\" failed at char %d; break type: %d", b->utf8, i, attrs[i].lineBreakType); - QCOMPARE( (attrs[i].lineBreakType != HB_NoBreak), (bool)b->breaks[i] ); - } - } - QVERIFY(attrs[i].lineBreakType == HB_ForcedBreak); - QCOMPARE(b->breaks[i], (uchar)0xff); - ++b; - } -} - -void tst_CharAttributes::charWordStopOnLineSeparator() -{ - const QChar lineSeparator(QChar::LineSeparator); - QString txt; - txt.append(lineSeparator); - txt.append(lineSeparator); - QVector attrs = getCharAttributes(txt); - QVERIFY(attrs[1].charStop); -} - -void tst_CharAttributes::charStopForSurrogatePairs() -{ - QString txt; - txt.append("a"); - txt.append(0xd87e); - txt.append(0xdc25); - txt.append("b"); - QVector attrs = getCharAttributes(txt); - QVERIFY(attrs[0].charStop); - QVERIFY(attrs[1].charStop); - QVERIFY(!attrs[2].charStop); - QVERIFY(attrs[3].charStop); -} - -void tst_CharAttributes::thaiWordBreak() -{ - // สวัสดีครับ นี่เป็นการงทดสอบตัวเอ - QTextCodec *codec = QTextCodec::codecForMib(2259); - QString txt = codec->toUnicode(QByteArray("\xca\xc7\xd1\xca\xb4\xd5\xa4\xc3\xd1\xba\x20\xb9\xd5\xe8\xe0\xbb\xe7\xb9\xa1\xd2\xc3\xb7\xb4\xca\xcd\xba\xb5\xd1\xc7\xe0\xcd\xa7")); - - - QCOMPARE(txt.length(), 32); - QVector attrs = getCharAttributes(txt, HB_Script_Thai); - QVERIFY(attrs[0].lineBreakType == HB_NoBreak); - QVERIFY(attrs[1].lineBreakType == HB_NoBreak); - QVERIFY(attrs[2].lineBreakType == HB_NoBreak); - QVERIFY(attrs[3].lineBreakType == HB_NoBreak); - QVERIFY(attrs[4].lineBreakType == HB_NoBreak); - QVERIFY(attrs[5].lineBreakType == HB_Break); - QVERIFY(attrs[6].lineBreakType == HB_NoBreak); - QVERIFY(attrs[7].lineBreakType == HB_NoBreak); - QVERIFY(attrs[8].lineBreakType == HB_NoBreak); - QVERIFY(attrs[9].lineBreakType == HB_NoBreak); - QVERIFY(attrs[10].lineBreakType == HB_Break); - QVERIFY(attrs[11].lineBreakType == HB_NoBreak); - QVERIFY(attrs[12].lineBreakType == HB_NoBreak); - QVERIFY(attrs[13].lineBreakType == HB_Break); - QVERIFY(attrs[14].lineBreakType == HB_NoBreak); - QVERIFY(attrs[15].lineBreakType == HB_NoBreak); - QVERIFY(attrs[16].lineBreakType == HB_NoBreak); - QVERIFY(attrs[17].lineBreakType == HB_Break); - QVERIFY(attrs[18].lineBreakType == HB_NoBreak); - QVERIFY(attrs[19].lineBreakType == HB_NoBreak); - QVERIFY(attrs[20].lineBreakType == HB_Break); - QVERIFY(attrs[21].lineBreakType == HB_NoBreak); - QVERIFY(attrs[22].lineBreakType == HB_NoBreak); - QVERIFY(attrs[23].lineBreakType == HB_NoBreak); - QVERIFY(attrs[24].lineBreakType == HB_NoBreak); - QVERIFY(attrs[25].lineBreakType == HB_Break); - QVERIFY(attrs[26].lineBreakType == HB_NoBreak); - for (int i = 27; i < 32; ++i) - QVERIFY(attrs[i].lineBreakType == HB_NoBreak); -} - -QTEST_MAIN(tst_CharAttributes) -#include "main.moc" diff --git a/src/3rdparty/harfbuzz/tests/shaping/Makefile.am b/src/3rdparty/harfbuzz/tests/shaping/Makefile.am index 31c6db73b4..5a7da0abf3 100644 --- a/src/3rdparty/harfbuzz/tests/shaping/Makefile.am +++ b/src/3rdparty/harfbuzz/tests/shaping/Makefile.am @@ -1,7 +1,7 @@ check_PROGRAMS = shaping -shaping_SOURCES = main.cpp ../linebreaking/harfbuzz-qt.cpp +shaping_SOURCES = main.cpp harfbuzz-qt.cpp shaping_LDADD = $(QT_GUI_LIBS) $(QT_QTEST_LIBS) ../../src/libharfbuzz-1.la main.o: main.moc diff --git a/src/3rdparty/harfbuzz/tests/linebreaking/harfbuzz-qt.cpp b/src/3rdparty/harfbuzz/tests/shaping/harfbuzz-qt.cpp similarity index 65% rename from src/3rdparty/harfbuzz/tests/linebreaking/harfbuzz-qt.cpp rename to src/3rdparty/harfbuzz/tests/shaping/harfbuzz-qt.cpp index 2c261639ad..924a97da27 100644 --- a/src/3rdparty/harfbuzz/tests/linebreaking/harfbuzz-qt.cpp +++ b/src/3rdparty/harfbuzz/tests/shaping/harfbuzz-qt.cpp @@ -23,21 +23,11 @@ */ #include -#include +#include #include -#include extern "C" { -HB_LineBreakClass HB_GetLineBreakClass(HB_UChar32 ch) -{ -#if QT_VERSION >= 0x040300 - return (HB_LineBreakClass)QUnicodeTables::lineBreakClass(ch); -#else -#error "This test currently requires Qt >= 4.3" -#endif -} - void HB_GetUnicodeCharProperties(HB_UChar32 ch, HB_CharCategory *category, int *combiningClass) { *category = (HB_CharCategory)QChar::category(ch); @@ -59,26 +49,6 @@ HB_UChar16 HB_GetMirroredChar(HB_UChar16 ch) return QChar::mirroredChar(ch); } -HB_WordClass HB_GetWordClass(HB_UChar32 ch) -{ - const QUnicodeTables::Properties *prop = QUnicodeTables::properties(ch); - return (HB_WordClass) prop->wordBreak; -} - - -HB_SentenceClass HB_GetSentenceClass(HB_UChar32 ch) -{ - const QUnicodeTables::Properties *prop = QUnicodeTables::properties(ch); - return (HB_SentenceClass) prop->sentenceBreak; -} - -void HB_GetGraphemeAndLineBreakClass(HB_UChar32 ch, HB_GraphemeClass *grapheme, HB_LineBreakClass *lineBreak) -{ - const QUnicodeTables::Properties *prop = QUnicodeTables::properties(ch); - *grapheme = (HB_GraphemeClass) prop->graphemeBreak; - *lineBreak = (HB_LineBreakClass) prop->line_break_class; -} - void (*HB_Library_Resolve(const char *library, int version, const char *symbol))() { return QLibrary::resolve(library, version, symbol); diff --git a/src/corelib/tools/qharfbuzz.cpp b/src/corelib/tools/qharfbuzz.cpp index 11126b814d..97b6c1e8bb 100644 --- a/src/corelib/tools/qharfbuzz.cpp +++ b/src/corelib/tools/qharfbuzz.cpp @@ -39,47 +39,15 @@ ** ****************************************************************************/ +#include "qharfbuzz_p.h" + #include "qunicodetables_p.h" #include "qlibrary.h" -#include "qtextcodec.h" - -#include "qharfbuzz_p.h" QT_USE_NAMESPACE extern "C" { -HB_GraphemeClass HB_GetGraphemeClass(HB_UChar32 ch) -{ - const QUnicodeTables::Properties *prop = QUnicodeTables::properties(ch); - return (HB_GraphemeClass) prop->graphemeBreak; -} - -HB_WordClass HB_GetWordClass(HB_UChar32 ch) -{ - const QUnicodeTables::Properties *prop = QUnicodeTables::properties(ch); - return (HB_WordClass) prop->wordBreak; -} - -HB_SentenceClass HB_GetSentenceClass(HB_UChar32 ch) -{ - const QUnicodeTables::Properties *prop = QUnicodeTables::properties(ch); - return (HB_SentenceClass) prop->sentenceBreak; -} - -HB_LineBreakClass HB_GetLineBreakClass(HB_UChar32 ch) -{ - return (HB_LineBreakClass)QUnicodeTables::lineBreakClass(ch); -} - - -void HB_GetGraphemeAndLineBreakClass(HB_UChar32 ch, HB_GraphemeClass *grapheme, HB_LineBreakClass *lineBreak) -{ - const QUnicodeTables::Properties *prop = QUnicodeTables::properties(ch); - *grapheme = (HB_GraphemeClass) prop->graphemeBreak; - *lineBreak = (HB_LineBreakClass) prop->line_break_class; -} - void HB_GetUnicodeCharProperties(HB_UChar32 ch, HB_CharCategory *category, int *combiningClass) { const QUnicodeTables::Properties *prop = QUnicodeTables::properties(ch); @@ -135,11 +103,4 @@ void qHBFreeFace(HB_Face face) HB_FreeFace(face); } -void qGetCharAttributes(const HB_UChar16 *string, hb_uint32 stringLength, - const HB_ScriptItem *items, hb_uint32 numItems, - HB_CharAttributes *attributes) -{ - HB_GetCharAttributes(string, stringLength, items, numItems, attributes); -} - QT_END_NAMESPACE diff --git a/src/corelib/tools/qharfbuzz_p.h b/src/corelib/tools/qharfbuzz_p.h index 3cef3a55dd..72d5bda77f 100644 --- a/src/corelib/tools/qharfbuzz_p.h +++ b/src/corelib/tools/qharfbuzz_p.h @@ -58,11 +58,6 @@ QT_BEGIN_NAMESPACE -// temporary forward until all the textengine code has been moved to QtCore -Q_CORE_EXPORT void qGetCharAttributes(const HB_UChar16 *string, hb_uint32 stringLength, - const HB_ScriptItem *items, hb_uint32 numItems, - HB_CharAttributes *attributes); - Q_CORE_EXPORT HB_Bool qShapeItem(HB_ShaperItem *item); // ### temporary diff --git a/src/corelib/tools/qtextboundaryfinder.cpp b/src/corelib/tools/qtextboundaryfinder.cpp index 20ee954efd..c6c88ea8aa 100644 --- a/src/corelib/tools/qtextboundaryfinder.cpp +++ b/src/corelib/tools/qtextboundaryfinder.cpp @@ -40,9 +40,9 @@ ****************************************************************************/ #include #include + #include -#include -#include "private/qharfbuzz_p.h" +#include QT_BEGIN_NAMESPACE @@ -93,11 +93,12 @@ static void init(QTextBoundaryFinder::BoundaryType type, const QChar *chars, int scriptItems.append(item); } - qGetCharAttributes(string, length, scriptItems.data(), scriptItems.count(), attributes); + QCharAttributeOptions options = 0; if (type == QTextBoundaryFinder::Word) - HB_GetWordBoundaries(string, length, scriptItems.data(), scriptItems.count(), attributes); + options |= GetWordBreaks; else if (type == QTextBoundaryFinder::Sentence) - HB_GetSentenceBoundaries(string, length, scriptItems.data(), scriptItems.count(), attributes); + options |= GetSentenceBreaks; + qGetCharAttributes(string, length, scriptItems.data(), scriptItems.count(), attributes, options); } /*! diff --git a/src/corelib/tools/qunicodetools.cpp b/src/corelib/tools/qunicodetools.cpp new file mode 100644 index 0000000000..814eba771a --- /dev/null +++ b/src/corelib/tools/qunicodetools.cpp @@ -0,0 +1,398 @@ +/**************************************************************************** +** +** Copyright (C) 2012 Nokia Corporation and/or its subsidiary(-ies). +** Contact: http://www.qt-project.org/ +** +** This file is part of the QtCore module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL$ +** GNU Lesser General Public License Usage +** This file may be used under the terms of the GNU Lesser General Public +** License version 2.1 as published by the Free Software Foundation and +** appearing in the file LICENSE.LGPL included in the packaging of this +** file. Please review the following information to ensure the GNU Lesser +** General Public License version 2.1 requirements will be met: +** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. +** +** In addition, as a special exception, Nokia gives you certain additional +** rights. These rights are described in the Nokia Qt LGPL Exception +** version 1.1, included in the file LGPL_EXCEPTION.txt in this package. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU General +** Public License version 3.0 as published by the Free Software Foundation +** and appearing in the file LICENSE.GPL included in the packaging of this +** file. Please review the following information to ensure the GNU General +** Public License version 3.0 requirements will be met: +** http://www.gnu.org/copyleft/gpl.html. +** +** Other Usage +** Alternatively, this file may be used in accordance with the terms and +** conditions contained in a signed written agreement between you and Nokia. +** +** +** +** +** +** +** $QT_END_LICENSE$ +** +****************************************************************************/ + +#include "qunicodetools_p.h" + +#include "qunicodetables_p.h" + +QT_BEGIN_NAMESPACE + +// ----------------------------------------------------------------------------------------------------- +// +// The line breaking algorithm. See http://www.unicode.org/reports/tr14/tr14-19.html +// +// ----------------------------------------------------------------------------------------------------- +// +// The text boundaries determination algorithm. See http://www.unicode.org/reports/tr29/tr29-11.html +// +// ----------------------------------------------------------------------------------------------------- + +namespace { + +/* The Unicode algorithm does in our opinion allow line breaks at some + places they shouldn't be allowed. The following changes were thus + made in comparison to the Unicode reference: + + EX->AL from DB to IB + SY->AL from DB to IB + SY->PO from DB to IB + SY->PR from DB to IB + SY->OP from DB to IB + AL->PR from DB to IB + AL->PO from DB to IB + PR->PR from DB to IB + PO->PO from DB to IB + PR->PO from DB to IB + PO->PR from DB to IB + HY->PO from DB to IB + HY->PR from DB to IB + HY->OP from DB to IB + NU->EX from PB to IB + EX->PO from DB to IB +*/ + +// The following line break classes are not treated by the table: +// AI, BK, CB, CR, LF, NL, SA, SG, SP, XX + +enum LineBreakRule { + ProhibitedBreak, // PB in table + DirectBreak, // DB in table + IndirectBreak, // IB in table + CombiningIndirectBreak, // CI in table + CombiningProhibitedBreak // CP in table +}; +#define DB DirectBreak +#define IB IndirectBreak +#define CI CombiningIndirectBreak +#define CP CombiningProhibitedBreak +#define PB ProhibitedBreak +static const uchar lineBreakTable[QUnicodeTables::LineBreak_JT + 1][QUnicodeTables::LineBreak_JT + 1] = { +/* OP CL QU GL NS EX SY IS PR PO NU AL ID IN HY BA BB B2 ZW CM WJ H2 H3 JL JV JT */ +/* OP */ { PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, CP, PB, PB, PB, PB, PB, PB }, +/* CL */ { DB, PB, IB, IB, PB, PB, PB, PB, IB, IB, IB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB }, +/* QU */ { PB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, PB, CI, PB, IB, IB, IB, IB, IB }, +/* GL */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, PB, CI, PB, IB, IB, IB, IB, IB }, +/* NS */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, DB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB }, +/* EX */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB }, +/* SY */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB }, +/* IS */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, DB, IB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB }, +/* PR */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, DB, IB, IB, DB, DB, PB, CI, PB, IB, IB, IB, IB, IB }, +/* PO */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB }, +/* NU */ { IB, PB, IB, IB, IB, IB, PB, PB, IB, IB, IB, IB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB }, +/* AL */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB }, +/* ID */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB }, +/* IN */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB }, +/* HY */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, DB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB }, +/* BA */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, DB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB }, +/* BB */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, PB, CI, PB, IB, IB, IB, IB, IB }, +/* B2 */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, DB, DB, DB, IB, IB, DB, PB, PB, CI, PB, DB, DB, DB, DB, DB }, +/* ZW */ { DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, PB, DB, DB, DB, DB, DB, DB, DB }, +/* CM */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, DB, IB, IB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB }, +/* WJ */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, PB, CI, PB, IB, IB, IB, IB, IB }, +/* H2 */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, IB, IB }, +/* H3 */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, IB }, +/* JL */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, IB, IB, IB, IB, DB }, +/* JV */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, IB, IB }, +/* JT */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, IB } +}; +#undef DB +#undef IB +#undef CI +#undef CP +#undef PB + +static const uchar graphemeBreakTable[QUnicodeTables::GraphemeBreakLVT + 1][QUnicodeTables::GraphemeBreakLVT + 1] = { +// Other, CR, LF, Control, Extend, L, V, T, LV, LVT + { true , true , true , true , true , true , true , true , true , true }, // Other, + { true , true , true , true , true , true , true , true , true , true }, // CR, + { true , false, true , true , true , true , true , true , true , true }, // LF, + { true , true , true , true , true , true , true , true , true , true }, // Control, + { false, true , true , true , false, false, false, false, false, false }, // Extend, + { true , true , true , true , true , false, true , true , true , true }, // L, + { true , true , true , true , true , false, false, true , false, true }, // V, + { true , true , true , true , true , true , false, false, false, false }, // T, + { true , true , true , true , true , false, true , true , true , true }, // LV, + { true , true , true , true , true , false, true , true , true , true }, // LVT +}; + +static void calcGraphemeAndLineBreaks(const ushort *string, quint32 len, HB_CharAttributes *attributes) +{ + // ##### can this fail if the first char is a surrogate? + const QUnicodeTables::Properties *prop = QUnicodeTables::properties(string[0]); + QUnicodeTables::GraphemeBreak grapheme = (QUnicodeTables::GraphemeBreak) prop->graphemeBreak; + QUnicodeTables::LineBreakClass cls = (QUnicodeTables::LineBreakClass) prop->line_break_class; + // handle case where input starts with an LF + if (cls == QUnicodeTables::LineBreak_LF) + cls = QUnicodeTables::LineBreak_BK; + + attributes[0].whiteSpace = (cls == QUnicodeTables::LineBreak_SP || cls == QUnicodeTables::LineBreak_BK); + attributes[0].charStop = true; + + int lcls = cls; + for (quint32 i = 1; i < len; ++i) { + attributes[i].whiteSpace = false; + attributes[i].charStop = true; + + uint ucs4 = string[i]; + prop = QUnicodeTables::properties(ucs4); + QUnicodeTables::GraphemeBreak ngrapheme = (QUnicodeTables::GraphemeBreak) prop->graphemeBreak; + QUnicodeTables::LineBreakClass ncls = (QUnicodeTables::LineBreakClass) prop->line_break_class; + attributes[i].charStop = graphemeBreakTable[ngrapheme][grapheme]; + // handle surrogates + if (ncls == QUnicodeTables::LineBreak_SG) { + if (QChar::isHighSurrogate(string[i]) && i < len - 1 && QChar::isLowSurrogate(string[i+1])) { + continue; + } else if (QChar::isLowSurrogate(string[i]) && QChar::isHighSurrogate(string[i-1])) { + ucs4 = QChar::surrogateToUcs4(string[i-1], string[i]); + prop = QUnicodeTables::properties(ucs4); + ngrapheme = (QUnicodeTables::GraphemeBreak) prop->graphemeBreak; + ncls = (QUnicodeTables::LineBreakClass) prop->line_break_class; + attributes[i].charStop = false; + } else { + ncls = QUnicodeTables::LineBreak_AL; + } + } + + // set white space and char stop flag + if (ncls >= QUnicodeTables::LineBreak_SP) + attributes[i].whiteSpace = true; + + HB_LineBreakType lineBreakType = HB_NoBreak; + if (cls >= QUnicodeTables::LineBreak_LF) { + lineBreakType = HB_ForcedBreak; + } else if (cls == QUnicodeTables::LineBreak_CR) { + lineBreakType = (ncls == QUnicodeTables::LineBreak_LF) ? HB_NoBreak : HB_ForcedBreak; + } + + if (ncls == QUnicodeTables::LineBreak_SP) + goto next_no_cls_update; + if (ncls >= QUnicodeTables::LineBreak_CR) + goto next; + + { + int tcls = ncls; + // for south east asian chars that require a complex (dictionary analysis), the unicode + // standard recommends to treat them as AL. thai_attributes and other attribute methods that + // do dictionary analysis can override + if (tcls >= QUnicodeTables::LineBreak_SA) + tcls = QUnicodeTables::LineBreak_AL; + if (cls >= QUnicodeTables::LineBreak_SA) + cls = QUnicodeTables::LineBreak_AL; + + int brk = lineBreakTable[cls][tcls]; + switch (brk) { + case DirectBreak: + lineBreakType = HB_Break; + if (string[i-1] == 0xad) // soft hyphen + lineBreakType = HB_SoftHyphen; + break; + case IndirectBreak: + lineBreakType = (lcls == QUnicodeTables::LineBreak_SP) ? HB_Break : HB_NoBreak; + break; + case CombiningIndirectBreak: + lineBreakType = HB_NoBreak; + if (lcls == QUnicodeTables::LineBreak_SP){ + if (i > 1) + attributes[i-2].lineBreakType = HB_Break; + } else { + goto next_no_cls_update; + } + break; + case CombiningProhibitedBreak: + lineBreakType = HB_NoBreak; + if (lcls != QUnicodeTables::LineBreak_SP) + goto next_no_cls_update; + case ProhibitedBreak: + default: + break; + } + } + next: + cls = ncls; + next_no_cls_update: + lcls = ncls; + grapheme = ngrapheme; + attributes[i-1].lineBreakType = lineBreakType; + } + attributes[len-1].lineBreakType = HB_ForcedBreak; +} + + +enum WordBreakRule { NoBreak = 0, Break = 1, Middle = 2 }; + +static const uchar wordBreakTable[QUnicodeTables::WordBreakExtendNumLet + 1][QUnicodeTables::WordBreakExtendNumLet + 1] = { +// Other Format Katakana ALetter MidLetter MidNum Numeric ExtendNumLet + { Break , Break , Break , Break , Break , Break , Break , Break }, // Other + { Break , Break , Break , Break , Break , Break , Break , Break }, // Format + { Break , Break , NoBreak, Break , Break , Break , Break , NoBreak }, // Katakana + { Break , Break , Break , NoBreak, Middle , Break , NoBreak, NoBreak }, // ALetter + { Break , Break , Break , Break , Break , Break , Break , Break }, // MidLetter + { Break , Break , Break , Break , Break , Break , Break , Break }, // MidNum + { Break , Break , Break , NoBreak, Break , Middle , NoBreak, NoBreak }, // Numeric + { Break , Break , NoBreak, NoBreak, Break , Break , NoBreak, NoBreak }, // ExtendNumLet +}; + +static void calcWordBreaks(const ushort *string, quint32 len, HB_CharAttributes *attributes) +{ + quint32 brk = QUnicodeTables::wordBreakClass(string[0]); + + attributes[0].wordBoundary = true; + + for (quint32 i = 1; i < len; ++i) { + if (!attributes[i].charStop) { + attributes[i].wordBoundary = false; + continue; + } + + quint32 nbrk = QUnicodeTables::wordBreakClass(string[i]); + if (nbrk == QUnicodeTables::WordBreakFormat) { + attributes[i].wordBoundary = (QUnicodeTables::sentenceBreakClass(string[i-1]) == QUnicodeTables::SentenceBreakSep); + continue; + } + + WordBreakRule rule = (WordBreakRule)wordBreakTable[brk][nbrk]; + if (rule == Middle) { + rule = Break; + quint32 lookahead = i + 1; + while (lookahead < len) { + quint32 testbrk = QUnicodeTables::wordBreakClass(string[lookahead]); + if (testbrk == QUnicodeTables::WordBreakFormat + && QUnicodeTables::sentenceBreakClass(string[lookahead]) != QUnicodeTables::SentenceBreakSep) { + ++lookahead; + continue; + } + if (testbrk == brk) { + rule = NoBreak; + while (i < lookahead) + attributes[i++].wordBoundary = false; + nbrk = testbrk; + } + break; + } + } + attributes[i].wordBoundary = (rule == Break); + brk = nbrk; + } +} + + +enum SentenceBreakState { + SB_Initial, + SB_Upper, + SB_UpATerm, + SB_ATerm, + SB_ATermC, + SB_ACS, + SB_STerm, + SB_STermC, + SB_SCS, + SB_BAfter, + SB_Break, + SB_Lookup +}; + +static const uchar sentenceBreakTable[SB_Lookup + 1][QUnicodeTables::SentenceBreakClose + 1] = { +// Other Sep Format Sp Lower Upper OLetter Numeric ATerm STerm Close + { SB_Initial, SB_BAfter , SB_Initial, SB_Initial, SB_Initial, SB_Upper , SB_Initial, SB_Initial, SB_ATerm , SB_STerm , SB_Initial }, // SB_Initial, + { SB_Initial, SB_BAfter , SB_Upper , SB_Initial, SB_Initial, SB_Upper , SB_Initial, SB_Initial, SB_UpATerm, SB_STerm , SB_Initial }, // SB_Upper + + { SB_Lookup , SB_BAfter , SB_UpATerm, SB_ACS , SB_Initial, SB_Upper , SB_Break , SB_Initial, SB_ATerm , SB_STerm , SB_ATermC }, // SB_UpATerm + { SB_Lookup , SB_BAfter , SB_ATerm , SB_ACS , SB_Initial, SB_Break , SB_Break , SB_Initial, SB_ATerm , SB_STerm , SB_ATermC }, // SB_ATerm + { SB_Lookup , SB_BAfter , SB_ATermC , SB_ACS , SB_Initial, SB_Break , SB_Break , SB_Lookup , SB_ATerm , SB_STerm , SB_ATermC }, // SB_ATermC, + { SB_Lookup , SB_BAfter , SB_ACS , SB_ACS , SB_Initial, SB_Break , SB_Break , SB_Lookup , SB_ATerm , SB_STerm , SB_Lookup }, // SB_ACS, + + { SB_Break , SB_BAfter , SB_STerm , SB_SCS , SB_Break , SB_Break , SB_Break , SB_Break , SB_ATerm , SB_STerm , SB_STermC }, // SB_STerm, + { SB_Break , SB_BAfter , SB_STermC , SB_SCS , SB_Break , SB_Break , SB_Break , SB_Break , SB_ATerm , SB_STerm , SB_STermC }, // SB_STermC, + { SB_Break , SB_BAfter , SB_SCS , SB_SCS , SB_Break , SB_Break , SB_Break , SB_Break , SB_ATerm , SB_STerm , SB_Break }, // SB_SCS, + { SB_Break , SB_Break , SB_Break , SB_Break , SB_Break , SB_Break , SB_Break , SB_Break , SB_Break , SB_Break , SB_Break }, // SB_BAfter, +}; + +static void calcSentenceBreaks(const ushort *string, quint32 len, HB_CharAttributes *attributes) +{ + quint32 brk = sentenceBreakTable[SB_Initial][QUnicodeTables::sentenceBreakClass(string[0])]; + attributes[0].sentenceBoundary = true; + for (quint32 i = 1; i < len; ++i) { + if (!attributes[i].charStop) { + attributes[i].sentenceBoundary = false; + continue; + } + brk = sentenceBreakTable[brk][QUnicodeTables::sentenceBreakClass(string[i])]; + if (brk == SB_Lookup) { + brk = SB_Break; + quint32 lookahead = i + 1; + while (lookahead < len) { + quint32 sbrk = QUnicodeTables::sentenceBreakClass(string[lookahead]); + if (sbrk != QUnicodeTables::SentenceBreakOther + && sbrk != QUnicodeTables::SentenceBreakNumeric + && sbrk != QUnicodeTables::SentenceBreakClose) { + break; + } else if (sbrk == QUnicodeTables::SentenceBreakLower) { + brk = SB_Initial; + break; + } + ++lookahead; + } + if (brk == SB_Initial) { + while (i < lookahead) + attributes[i++].sentenceBoundary = false; + } + } + if (brk == SB_Break) { + attributes[i].sentenceBoundary = true; + brk = sentenceBreakTable[SB_Initial][QUnicodeTables::sentenceBreakClass(string[i])]; + } else { + attributes[i].sentenceBoundary = false; + } + } +} + +} // namespace + + +Q_CORE_EXPORT void qGetCharAttributes(const ushort *string, int length, + const HB_ScriptItem *items, int numItems, + HB_CharAttributes *attributes, QCharAttributeOptions options) +{ + if (length <= 0) + return; + + memset(attributes, 0, length * sizeof(HB_CharAttributes)); + + calcGraphemeAndLineBreaks(string, length, attributes); + if (options & GetWordBreaks) + calcWordBreaks(string, length, attributes); + if (options & GetSentenceBreaks) + calcSentenceBreaks(string, length, attributes); + + HB_GetTailoredCharAttributes(string, length, items, numItems, attributes); +} + +QT_END_NAMESPACE diff --git a/src/corelib/tools/qunicodetools_p.h b/src/corelib/tools/qunicodetools_p.h new file mode 100644 index 0000000000..f546481aa9 --- /dev/null +++ b/src/corelib/tools/qunicodetools_p.h @@ -0,0 +1,76 @@ +/**************************************************************************** +** +** Copyright (C) 2012 Nokia Corporation and/or its subsidiary(-ies). +** Contact: http://www.qt-project.org/ +** +** This file is part of the QtCore module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL$ +** GNU Lesser General Public License Usage +** This file may be used under the terms of the GNU Lesser General Public +** License version 2.1 as published by the Free Software Foundation and +** appearing in the file LICENSE.LGPL included in the packaging of this +** file. Please review the following information to ensure the GNU Lesser +** General Public License version 2.1 requirements will be met: +** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. +** +** In addition, as a special exception, Nokia gives you certain additional +** rights. These rights are described in the Nokia Qt LGPL Exception +** version 1.1, included in the file LGPL_EXCEPTION.txt in this package. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU General +** Public License version 3.0 as published by the Free Software Foundation +** and appearing in the file LICENSE.GPL included in the packaging of this +** file. Please review the following information to ensure the GNU General +** Public License version 3.0 requirements will be met: +** http://www.gnu.org/copyleft/gpl.html. +** +** Other Usage +** Alternatively, this file may be used in accordance with the terms and +** conditions contained in a signed written agreement between you and Nokia. +** +** +** +** +** +** +** $QT_END_LICENSE$ +** +****************************************************************************/ + +#ifndef QUNICODETOOLS_P_H +#define QUNICODETOOLS_P_H + +// +// W A R N I N G +// ------------- +// +// This file is not part of the Qt API. It exists for the convenience +// of other Qt classes. This header file may change from version to +// version without notice, or even be removed. +// +// We mean it. +// + +#include +#include + +QT_BEGIN_NAMESPACE + +Q_DECLARE_TYPEINFO(HB_CharAttributes, Q_PRIMITIVE_TYPE); +Q_DECLARE_TYPEINFO(HB_ScriptItem, Q_PRIMITIVE_TYPE); + +enum QCharAttributeOption { + GetWordBreaks = 1, + GetSentenceBreaks = 2 +}; +Q_DECLARE_FLAGS(QCharAttributeOptions, QCharAttributeOption) + +Q_CORE_EXPORT void qGetCharAttributes(const ushort *string, int length, + const HB_ScriptItem *items, int numItems, + HB_CharAttributes *attributes, QCharAttributeOptions options = QFlag(0)); + +QT_END_NAMESPACE + +#endif // QUNICODETOOLS_P_H diff --git a/src/corelib/tools/tools.pri b/src/corelib/tools/tools.pri index ec02454c67..386db9665b 100644 --- a/src/corelib/tools/tools.pri +++ b/src/corelib/tools/tools.pri @@ -55,6 +55,7 @@ HEADERS += \ tools/qtimeline.h \ tools/qelapsedtimer.h \ tools/qunicodetables_p.h \ + tools/qunicodetools_p.h \ tools/qvarlengtharray.h \ tools/qvector.h @@ -92,6 +93,7 @@ SOURCES += \ tools/qstringlist.cpp \ tools/qtextboundaryfinder.cpp \ tools/qtimeline.cpp \ + tools/qunicodetools.cpp \ tools/qvector.cpp \ tools/qvsnprintf.cpp diff --git a/src/gui/text/qtextengine.cpp b/src/gui/text/qtextengine.cpp index 20f85d02aa..5dd0cb8c6c 100644 --- a/src/gui/text/qtextengine.cpp +++ b/src/gui/text/qtextengine.cpp @@ -52,6 +52,7 @@ #include "qfontengine_p.h" #include "qstring.h" #include +#include #include "qtextdocument_p.h" #include "qrawfont.h" #include "qrawfont_p.h" diff --git a/tests/auto/corelib/tools/qtextboundaryfinder/tst_qtextboundaryfinder.cpp b/tests/auto/corelib/tools/qtextboundaryfinder/tst_qtextboundaryfinder.cpp index b8ae709fd0..e6f33eb756 100644 --- a/tests/auto/corelib/tools/qtextboundaryfinder/tst_qtextboundaryfinder.cpp +++ b/tests/auto/corelib/tools/qtextboundaryfinder/tst_qtextboundaryfinder.cpp @@ -42,6 +42,7 @@ #include #include +#include #include #include @@ -61,6 +62,7 @@ private slots: void toNextBoundary(); void toPreviousBoundary_data(); void toPreviousBoundary(); + void thaiLineBreak(); }; void tst_QTextBoundaryFinder::init() @@ -382,7 +384,95 @@ void tst_QTextBoundaryFinder::toPreviousBoundary() QCOMPARE(boundaries, foundBoundaries); } +#include +#define LIBTHAI_MAJOR 0 +typedef int (*th_brk_def) (const unsigned char*, int*, size_t); +static th_brk_def th_brk = 0; + +static bool init_libthai() +{ +#if !defined(QT_NO_LIBRARY) + static bool triedResolve = false; + if (!triedResolve) { + th_brk = (th_brk_def) QLibrary::resolve("thai", (int)LIBTHAI_MAJOR, "th_brk"); + triedResolve = true; + } +#endif + return th_brk != 0; +} + +void tst_QTextBoundaryFinder::thaiLineBreak() +{ + if (!init_libthai()) + QSKIP("This test requires libThai-0.1.1x to be installed."); +#if 0 + // สวัสดีครับ นี่เป็นการงทดสอบตัวเอ + QTextCodec *codec = QTextCodec::codecForMib(2259); + QString text = codec->toUnicode(QByteArray("\xca\xc7\xd1\xca\xb4\xd5\xa4\xc3\xd1\xba\x20\xb9\xd5\xe8\xe0\xbb\xe7\xb9\xa1\xd2\xc3\xb7\xb4\xca\xcd\xba\xb5\xd1\xc7\xe0\xcd\xa7")); + QCOMPARE(text.length(), 32); + + QTextBoundaryFinder finder(QTextBoundaryFinder::Line, text); + finder.setPosition(0); + QVERIFY(finder.isAtBoundary()); + finder.setPosition(1); + QVERIFY(!finder.isAtBoundary()); + finder.setPosition(2); + QVERIFY(!finder.isAtBoundary()); + finder.setPosition(3); + QVERIFY(!finder.isAtBoundary()); + finder.setPosition(4); + QVERIFY(!finder.isAtBoundary()); + finder.setPosition(5); + QVERIFY(!finder.isAtBoundary()); + finder.setPosition(6); + QVERIFY(finder.isAtBoundary()); + finder.setPosition(7); + QVERIFY(finder.isAtBoundary()); + finder.setPosition(8); + QVERIFY(!finder.isAtBoundary()); + finder.setPosition(9); + QVERIFY(!finder.isAtBoundary()); + finder.setPosition(10); + QVERIFY(!finder.isAtBoundary()); + finder.setPosition(11); + QVERIFY(finder.isAtBoundary()); + finder.setPosition(12); + QVERIFY(!finder.isAtBoundary()); + finder.setPosition(13); + QVERIFY(!finder.isAtBoundary()); + finder.setPosition(14); + QVERIFY(finder.isAtBoundary()); + finder.setPosition(15); + QVERIFY(!finder.isAtBoundary()); + finder.setPosition(16); + QVERIFY(!finder.isAtBoundary()); + finder.setPosition(17); + QVERIFY(!finder.isAtBoundary()); + finder.setPosition(18); + QVERIFY(finder.isAtBoundary()); + finder.setPosition(19); + QVERIFY(!finder.isAtBoundary()); + finder.setPosition(20); + QVERIFY(finder.isAtBoundary()); + finder.setPosition(21); + QVERIFY(finder.isAtBoundary()); + finder.setPosition(22); + QVERIFY(!finder.isAtBoundary()); + finder.setPosition(23); + QVERIFY(!finder.isAtBoundary()); + finder.setPosition(24); + QVERIFY(!finder.isAtBoundary()); + finder.setPosition(25); + QVERIFY(finder.isAtBoundary()); + finder.setPosition(26); + QVERIFY(finder.isAtBoundary()); + for (int i = 27; i < 32; ++i) { + finder.setPosition(i); + QVERIFY(!finder.isAtBoundary()); + } +#endif +} QTEST_MAIN(tst_QTextBoundaryFinder)