move the default text breaking algorithm impl from HarfBuzz to Qt

there are several reasons to do this: * text breaking is not a shaper's job; * since the text breaking rules are bound to a specific Unicode version, updating Qt's internal unicode data would require updating the data in HB as well; * makes porting to HurfBuzz-NG some easier Change-Id: I0bbf8e8a343bc074696f4ddf2ae4e7fa32a61629 Reviewed-by: Lars Knoll <lars.knoll@nokia.com>
2012-05-10 10:31:16 +03:00 · 2012-05-10 10:31:16 +03:00 · cbfdec6603
commit cbfdec6603
parent 8c0048a377
17 changed files with 587 additions and 744 deletions
--- a/src/3rdparty/harfbuzz/src/harfbuzz-external.h
+++ b/src/3rdparty/harfbuzz/src/harfbuzz-external.h
@ -34,23 +34,7 @@ HB_BEGIN_HEADER
   They need to be provided by the application/library
 */

-
-/*
- see http://www.unicode.org/reports/tr14/tr14-19.html
- we don't use the XX, AI and CB properties and map them to AL instead.
- as we don't support any EBDIC based OS'es, NL is ignored and mapped to AL as well.
-*/
-typedef enum {
-    HB_LineBreak_OP, HB_LineBreak_CL, HB_LineBreak_QU, HB_LineBreak_GL, HB_LineBreak_NS,
-    HB_LineBreak_EX, HB_LineBreak_SY, HB_LineBreak_IS, HB_LineBreak_PR, HB_LineBreak_PO,
-    HB_LineBreak_NU, HB_LineBreak_AL, HB_LineBreak_ID, HB_LineBreak_IN, HB_LineBreak_HY,
-    HB_LineBreak_BA, HB_LineBreak_BB, HB_LineBreak_B2, HB_LineBreak_ZW, HB_LineBreak_CM,
-    HB_LineBreak_WJ, HB_LineBreak_H2, HB_LineBreak_H3, HB_LineBreak_JL, HB_LineBreak_JV,
-    HB_LineBreak_JT, HB_LineBreak_SA, HB_LineBreak_SG,
-    HB_LineBreak_SP, HB_LineBreak_CR, HB_LineBreak_LF, HB_LineBreak_BK
-} HB_LineBreakClass;
-
-typedef enum 
+typedef enum
 {
    HB_Mark_NonSpacing,          /*   Mn */
    HB_Mark_SpacingCombining,    /*   Mc */
@ -90,55 +74,6 @@ typedef enum
    HB_Symbol_Other              /*   So */
 } HB_CharCategory;

-typedef enum
-{
-    HB_Grapheme_Other, 
-    HB_Grapheme_CR,
-    HB_Grapheme_LF,
-    HB_Grapheme_Control,
-    HB_Grapheme_Extend,
-    HB_Grapheme_L, 
-    HB_Grapheme_V, 
-    HB_Grapheme_T, 
-    HB_Grapheme_LV, 
-    HB_Grapheme_LVT
-} HB_GraphemeClass;
-
-
-typedef enum
-{
-    HB_Word_Other,
-    HB_Word_Format,
-    HB_Word_Katakana,
-    HB_Word_ALetter,
-    HB_Word_MidLetter,
-    HB_Word_MidNum,
-    HB_Word_Numeric,
-    HB_Word_ExtendNumLet
-} HB_WordClass;
-
-
-typedef enum
-{
-    HB_Sentence_Other,
-    HB_Sentence_Sep,
-    HB_Sentence_Format,
-    HB_Sentence_Sp,
-    HB_Sentence_Lower,
-    HB_Sentence_Upper,
-    HB_Sentence_OLetter,
-    HB_Sentence_Numeric,
-    HB_Sentence_ATerm,
-    HB_Sentence_STerm,
-    HB_Sentence_Close
-} HB_SentenceClass;
-
-HB_GraphemeClass HB_GetGraphemeClass(HB_UChar32 ch);
-HB_WordClass HB_GetWordClass(HB_UChar32 ch);
-HB_SentenceClass HB_GetSentenceClass(HB_UChar32 ch);
-HB_LineBreakClass HB_GetLineBreakClass(HB_UChar32 ch);
-
-void HB_GetGraphemeAndLineBreakClass(HB_UChar32 ch, HB_GraphemeClass *grapheme, HB_LineBreakClass *lineBreak);
 void HB_GetUnicodeCharProperties(HB_UChar32 ch, HB_CharCategory *category, int *combiningClass);
 HB_CharCategory HB_GetUnicodeCharCategory(HB_UChar32 ch);
 int HB_GetUnicodeCharCombiningClass(HB_UChar32 ch);
--- a/src/3rdparty/harfbuzz/src/harfbuzz-shaper.cpp
+++ b/src/3rdparty/harfbuzz/src/harfbuzz-shaper.cpp
@ -32,205 +32,6 @@
 #define HB_MIN(a, b) ((a) < (b) ? (a) : (b))
 #define HB_MAX(a, b) ((a) > (b) ? (a) : (b))

-// -----------------------------------------------------------------------------------------------------
-//
-// The line break algorithm. See http://www.unicode.org/reports/tr14/tr14-13.html
-//
-// -----------------------------------------------------------------------------------------------------
-
-/* The Unicode algorithm does in our opinion allow line breaks at some
-   places they shouldn't be allowed. The following changes were thus
-   made in comparison to the Unicode reference:
-
-   EX->AL from DB to IB
-   SY->AL from DB to IB
-   SY->PO from DB to IB
-   SY->PR from DB to IB
-   SY->OP from DB to IB
-   AL->PR from DB to IB
-   AL->PO from DB to IB
-   PR->PR from DB to IB
-   PO->PO from DB to IB
-   PR->PO from DB to IB
-   PO->PR from DB to IB
-   HY->PO from DB to IB
-   HY->PR from DB to IB
-   HY->OP from DB to IB
-   NU->EX from PB to IB
-   EX->PO from DB to IB
-*/
-
-// The following line break classes are not treated by the table:
-//  AI, BK, CB, CR, LF, NL, SA, SG, SP, XX
-
-enum break_class {
-    // the first 4 values have to agree with the enum in QCharAttributes
-    ProhibitedBreak,            // PB in table
-    DirectBreak,                // DB in table
-    IndirectBreak,              // IB in table
-    CombiningIndirectBreak,     // CI in table
-    CombiningProhibitedBreak    // CP in table
-};
-#define DB DirectBreak
-#define IB IndirectBreak
-#define CI CombiningIndirectBreak
-#define CP CombiningProhibitedBreak
-#define PB ProhibitedBreak
-
-static const hb_uint8 breakTable[HB_LineBreak_JT+1][HB_LineBreak_JT+1] =
-{
-/*          OP  CL  QU  GL  NS  EX  SY  IS  PR  PO  NU  AL  ID  IN  HY  BA  BB  B2  ZW  CM  WJ  H2  H3  JL  JV  JT */
-/* OP */ { PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, CP, PB, PB, PB, PB, PB, PB },
-/* CL */ { DB, PB, IB, IB, PB, PB, PB, PB, IB, IB, IB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
-/* QU */ { PB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, PB, CI, PB, IB, IB, IB, IB, IB },
-/* GL */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, PB, CI, PB, IB, IB, IB, IB, IB },
-/* NS */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, DB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
-/* EX */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
-/* SY */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
-/* IS */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, DB, IB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
-/* PR */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, DB, IB, IB, DB, DB, PB, CI, PB, IB, IB, IB, IB, IB },
-/* PO */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
-/* NU */ { IB, PB, IB, IB, IB, IB, PB, PB, IB, IB, IB, IB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
-/* AL */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
-/* ID */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
-/* IN */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
-/* HY */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, DB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
-/* BA */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, DB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
-/* BB */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, PB, CI, PB, IB, IB, IB, IB, IB },
-/* B2 */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, DB, DB, DB, IB, IB, DB, PB, PB, CI, PB, DB, DB, DB, DB, DB },
-/* ZW */ { DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, PB, DB, DB, DB, DB, DB, DB, DB },
-/* CM */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, DB, IB, IB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
-/* WJ */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, PB, CI, PB, IB, IB, IB, IB, IB },
-/* H2 */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, IB, IB },
-/* H3 */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, IB },
-/* JL */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, IB, IB, IB, IB, DB },
-/* JV */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, IB, IB },
-/* JT */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, IB }
-};
-#undef DB
-#undef IB
-#undef CI
-#undef CP
-#undef PB
-
-static const hb_uint8 graphemeTable[HB_Grapheme_LVT + 1][HB_Grapheme_LVT + 1] =
-{
-//      Other, CR,    LF,    Control,Extend,L,    V,     T,     LV,    LVT
-    { true , true , true , true , true , true , true , true , true , true  }, // Other, 
-    { true , true , true , true , true , true , true , true , true , true  }, // CR,
-    { true , false, true , true , true , true , true , true , true , true  }, // LF,
-    { true , true , true , true , true , true , true , true , true , true  }, // Control,
-    { false, true , true , true , false, false, false, false, false, false }, // Extend,
-    { true , true , true , true , true , false, true , true , true , true  }, // L, 
-    { true , true , true , true , true , false, false, true , false, true  }, // V, 
-    { true , true , true , true , true , true , false, false, false, false }, // T, 
-    { true , true , true , true , true , false, true , true , true , true  }, // LV, 
-    { true , true , true , true , true , false, true , true , true , true  }, // LVT
-};
-    
-static void calcLineBreaks(const HB_UChar16 *uc, hb_uint32 len, HB_CharAttributes *charAttributes)
-{
-    if (!len)
-        return;
-
-    // ##### can this fail if the first char is a surrogate?
-    HB_LineBreakClass cls;
-    HB_GraphemeClass grapheme;
-    HB_GetGraphemeAndLineBreakClass(*uc, &grapheme, &cls);
-    // handle case where input starts with an LF
-    if (cls == HB_LineBreak_LF)
-        cls = HB_LineBreak_BK;
-
-    charAttributes[0].whiteSpace = (cls == HB_LineBreak_SP || cls == HB_LineBreak_BK);
-    charAttributes[0].charStop = true;
-
-    int lcls = cls;
-    for (hb_uint32 i = 1; i < len; ++i) {
-        charAttributes[i].whiteSpace = false;
-        charAttributes[i].charStop = true;
-
-        HB_UChar32 code = uc[i];
-        HB_GraphemeClass ngrapheme;
-        HB_LineBreakClass ncls;
-        HB_GetGraphemeAndLineBreakClass(code, &ngrapheme, &ncls);
-        charAttributes[i].charStop = graphemeTable[ngrapheme][grapheme];
-        // handle surrogates
-        if (ncls == HB_LineBreak_SG) {
-            if (HB_IsHighSurrogate(uc[i]) && i < len - 1 && HB_IsLowSurrogate(uc[i+1])) {
-                continue;
-            } else if (HB_IsLowSurrogate(uc[i]) && HB_IsHighSurrogate(uc[i-1])) {
-                code = HB_SurrogateToUcs4(uc[i-1], uc[i]);
-                HB_GetGraphemeAndLineBreakClass(code, &ngrapheme, &ncls);
-                charAttributes[i].charStop = false;
-            } else {
-                ncls = HB_LineBreak_AL;
-            }
-        }
-
-        // set white space and char stop flag
-        if (ncls >= HB_LineBreak_SP)
-            charAttributes[i].whiteSpace = true;
-
-        HB_LineBreakType lineBreakType = HB_NoBreak;
-        if (cls >= HB_LineBreak_LF) {
-            lineBreakType = HB_ForcedBreak;
-        } else if(cls == HB_LineBreak_CR) {
-            lineBreakType = (ncls == HB_LineBreak_LF) ? HB_NoBreak : HB_ForcedBreak;
-        }
-
-        if (ncls == HB_LineBreak_SP)
-            goto next_no_cls_update;
-        if (ncls >= HB_LineBreak_CR)
-            goto next;
-
-        {
-            int tcls = ncls;
-            // for south east asian chars that require a complex (dictionary analysis), the unicode
-            // standard recommends to treat them as AL. thai_attributes and other attribute methods that
-            // do dictionary analysis can override
-            if (tcls >= HB_LineBreak_SA)
-                tcls = HB_LineBreak_AL;
-            if (cls >= HB_LineBreak_SA)
-                cls = HB_LineBreak_AL;
-
-            int brk = breakTable[cls][tcls];
-            switch (brk) {
-            case DirectBreak:
-                lineBreakType = HB_Break;
-                if (uc[i-1] == 0xad) // soft hyphen
-                    lineBreakType = HB_SoftHyphen;
-                break;
-            case IndirectBreak:
-                lineBreakType = (lcls == HB_LineBreak_SP) ? HB_Break : HB_NoBreak;
-                break;
-            case CombiningIndirectBreak:
-                lineBreakType = HB_NoBreak;
-                if (lcls == HB_LineBreak_SP){
-                    if (i > 1)
-                        charAttributes[i-2].lineBreakType = HB_Break;
-                } else {
-                    goto next_no_cls_update;
-                }
-                break;
-            case CombiningProhibitedBreak:
-                lineBreakType = HB_NoBreak;
-                if (lcls != HB_LineBreak_SP)
-                    goto next_no_cls_update;
-            case ProhibitedBreak:
-            default:
-                break;
-            }
-        }
-    next:
-        cls = ncls;
-    next_no_cls_update:
-        lcls = ncls;
-        grapheme = ngrapheme;
-        charAttributes[i-1].lineBreakType = lineBreakType;
-    }
-    charAttributes[len-1].lineBreakType = HB_ForcedBreak;
-}
-
 // --------------------------------------------------------------------------------------------------------------------------------------------
 //
 // Basic processing
@ -679,13 +480,12 @@ const HB_ScriptEngine HB_ScriptEngines[] = {
    { HB_ArabicShape, 0}
 };

-void HB_GetCharAttributes(const HB_UChar16 *string, hb_uint32 stringLength,
-                          const HB_ScriptItem *items, hb_uint32 numItems,
-                          HB_CharAttributes *attributes)
+void HB_GetTailoredCharAttributes(const HB_UChar16 *string, hb_uint32 stringLength,
+                                  const HB_ScriptItem *items, hb_uint32 numItems,
+                                  HB_CharAttributes *attributes)
 {
-    memset(attributes, 0, stringLength * sizeof(HB_CharAttributes));
-    calcLineBreaks(string, stringLength, attributes);
-
+    if (stringLength == 0)
+        return;
    for (hb_uint32 i = 0; i < numItems; ++i) {
        HB_Script script = items[i].script;
        if (script == HB_Script_Inherited)
@ -698,136 +498,6 @@ void HB_GetCharAttributes(const HB_UChar16 *string, hb_uint32 stringLength,
 }


-enum BreakRule { NoBreak = 0, Break = 1, Middle = 2 };
-
-static const hb_uint8 wordbreakTable[HB_Word_ExtendNumLet + 1][HB_Word_ExtendNumLet + 1] = {
-//        Other    Format   Katakana ALetter  MidLetter MidNum  Numeric  ExtendNumLet
-    {   Break,   Break,   Break,   Break,   Break,   Break,   Break,   Break }, // Other
-    {   Break,   Break,   Break,   Break,   Break,   Break,   Break,   Break }, // Format 
-    {   Break,   Break, NoBreak,   Break,   Break,   Break,   Break, NoBreak }, // Katakana
-    {   Break,   Break,   Break, NoBreak,  Middle,   Break, NoBreak, NoBreak }, // ALetter
-    {   Break,   Break,   Break,   Break,   Break,   Break,   Break,   Break }, // MidLetter
-    {   Break,   Break,   Break,   Break,   Break,   Break,   Break,   Break }, // MidNum
-    {   Break,   Break,   Break, NoBreak,   Break,  Middle, NoBreak, NoBreak }, // Numeric
-    {   Break,   Break, NoBreak, NoBreak,   Break,   Break, NoBreak, NoBreak }, // ExtendNumLet
-};
-
-void HB_GetWordBoundaries(const HB_UChar16 *string, hb_uint32 stringLength,
-                          const HB_ScriptItem * /*items*/, hb_uint32 /*numItems*/,
-                          HB_CharAttributes *attributes)
-{
-    if (stringLength == 0)
-        return;
-    unsigned int brk = HB_GetWordClass(string[0]);
-    attributes[0].wordBoundary = true;
-    for (hb_uint32 i = 1; i < stringLength; ++i) {
-        if (!attributes[i].charStop) {
-            attributes[i].wordBoundary = false;
-            continue;
-        }
-        hb_uint32 nbrk = HB_GetWordClass(string[i]);
-        if (nbrk == HB_Word_Format) {
-            attributes[i].wordBoundary = (HB_GetSentenceClass(string[i-1]) == HB_Sentence_Sep);
-            continue;
-        }
-        BreakRule rule = (BreakRule)wordbreakTable[brk][nbrk];
-        if (rule == Middle) {
-            rule = Break;
-            hb_uint32 lookahead = i + 1;
-            while (lookahead < stringLength) {
-                hb_uint32 testbrk = HB_GetWordClass(string[lookahead]);
-                if (testbrk == HB_Word_Format && HB_GetSentenceClass(string[lookahead]) != HB_Sentence_Sep) {
-                    ++lookahead;
-                    continue;
-                }
-                if (testbrk == brk) {
-                    rule = NoBreak;
-                    while (i < lookahead)
-                        attributes[i++].wordBoundary = false;
-                    nbrk = testbrk;
-                }
-                break;
-            }
-        }
-        attributes[i].wordBoundary = (rule == Break);
-        brk = nbrk;
-    }
-}
-
-
-enum SentenceBreakStates {
-    SB_Initial,
-    SB_Upper,
-    SB_UpATerm, 
-    SB_ATerm,
-    SB_ATermC, 
-    SB_ACS, 
-    SB_STerm, 
-    SB_STermC, 
-    SB_SCS,
-    SB_BAfter, 
-    SB_Break,
-    SB_Look
-};
-
-static const hb_uint8 sentenceBreakTable[HB_Sentence_Close + 1][HB_Sentence_Close + 1] = {
-//        Other       Sep         Format      Sp          Lower       Upper       OLetter     Numeric     ATerm       STerm       Close
-      { SB_Initial, SB_BAfter , SB_Initial, SB_Initial, SB_Initial, SB_Upper  , SB_Initial, SB_Initial, SB_ATerm  , SB_STerm  , SB_Initial }, // SB_Initial,
-      { SB_Initial, SB_BAfter , SB_Upper  , SB_Initial, SB_Initial, SB_Upper  , SB_Initial, SB_Initial, SB_UpATerm, SB_STerm  , SB_Initial }, // SB_Upper
-      
-      { SB_Look   , SB_BAfter , SB_UpATerm, SB_ACS    , SB_Initial, SB_Upper  , SB_Break  , SB_Initial, SB_ATerm  , SB_STerm  , SB_ATermC  }, // SB_UpATerm
-      { SB_Look   , SB_BAfter , SB_ATerm  , SB_ACS    , SB_Initial, SB_Break  , SB_Break  , SB_Initial, SB_ATerm  , SB_STerm  , SB_ATermC  }, // SB_ATerm
-      { SB_Look   , SB_BAfter , SB_ATermC , SB_ACS    , SB_Initial, SB_Break  , SB_Break  , SB_Look   , SB_ATerm  , SB_STerm  , SB_ATermC  }, // SB_ATermC,
-      { SB_Look   , SB_BAfter , SB_ACS    , SB_ACS    , SB_Initial, SB_Break  , SB_Break  , SB_Look   , SB_ATerm  , SB_STerm  , SB_Look    }, // SB_ACS,
-      
-      { SB_Break  , SB_BAfter , SB_STerm  , SB_SCS    , SB_Break  , SB_Break  , SB_Break  , SB_Break  , SB_ATerm  , SB_STerm  , SB_STermC  }, // SB_STerm,
-      { SB_Break  , SB_BAfter , SB_STermC , SB_SCS    , SB_Break  , SB_Break  , SB_Break  , SB_Break  , SB_ATerm  , SB_STerm  , SB_STermC  }, // SB_STermC,
-      { SB_Break  , SB_BAfter , SB_SCS    , SB_SCS    , SB_Break  , SB_Break  , SB_Break  , SB_Break  , SB_ATerm  , SB_STerm  , SB_Break   }, // SB_SCS,
-      { SB_Break  , SB_Break  , SB_Break  , SB_Break  , SB_Break  , SB_Break  , SB_Break  , SB_Break  , SB_Break  , SB_Break  , SB_Break   }, // SB_BAfter,
-};
-
-void HB_GetSentenceBoundaries(const HB_UChar16 *string, hb_uint32 stringLength,
-                              const HB_ScriptItem * /*items*/, hb_uint32 /*numItems*/,
-                              HB_CharAttributes *attributes)
-{
-    if (stringLength == 0)
-        return;
-    hb_uint32 brk = sentenceBreakTable[SB_Initial][HB_GetSentenceClass(string[0])];
-    attributes[0].sentenceBoundary = true;
-    for (hb_uint32 i = 1; i < stringLength; ++i) {
-        if (!attributes[i].charStop) {
-            attributes[i].sentenceBoundary = false;
-            continue;
-        }
-        brk = sentenceBreakTable[brk][HB_GetSentenceClass(string[i])];
-        if (brk == SB_Look) {
-            brk = SB_Break;
-            hb_uint32 lookahead = i + 1;
-            while (lookahead < stringLength) {
-                hb_uint32 sbrk = HB_GetSentenceClass(string[lookahead]);
-                if (sbrk != HB_Sentence_Other && sbrk != HB_Sentence_Numeric && sbrk != HB_Sentence_Close) {
-                    break;
-                } else if (sbrk == HB_Sentence_Lower) {
-                    brk = SB_Initial;
-                    break;
-                }
-                ++lookahead;
-            }
-            if (brk == SB_Initial) {
-                while (i < lookahead)
-                    attributes[i++].sentenceBoundary = false;
-            }
-        }
-        if (brk == SB_Break) {
-            attributes[i].sentenceBoundary = true;
-            brk = sentenceBreakTable[SB_Initial][HB_GetSentenceClass(string[i])];
-        } else {
-            attributes[i].sentenceBoundary = false;
-        }
-    }
-}
-
-
 static inline char *tag_to_string(HB_UInt tag)
 {
    static char string[5];
--- a/src/3rdparty/harfbuzz/src/harfbuzz-shaper.h
+++ b/src/3rdparty/harfbuzz/src/harfbuzz-shaper.h
@ -143,19 +143,9 @@ typedef struct {
    hb_bitfield unused                  :2;
 } HB_CharAttributes;

-void HB_GetCharAttributes(const HB_UChar16 *string, hb_uint32 stringLength,
-                          const HB_ScriptItem *items, hb_uint32 numItems,
-                          HB_CharAttributes *attributes);
-
-/* requires HB_GetCharAttributes to be called before */
-void HB_GetWordBoundaries(const HB_UChar16 *string, hb_uint32 stringLength,
-                          const HB_ScriptItem *items, hb_uint32 numItems,
-                          HB_CharAttributes *attributes);
-
-/* requires HB_GetCharAttributes to be called before */
-void HB_GetSentenceBoundaries(const HB_UChar16 *string, hb_uint32 stringLength,
-                              const HB_ScriptItem *items, hb_uint32 numItems,
-                              HB_CharAttributes *attributes);
+void HB_GetTailoredCharAttributes(const HB_UChar16 *string, hb_uint32 stringLength,
+                                  const HB_ScriptItem *items, hb_uint32 numItems,
+                                  HB_CharAttributes *attributes);


 typedef enum {
--- a/src/3rdparty/harfbuzz/tests/Makefile.am
+++ b/src/3rdparty/harfbuzz/tests/Makefile.am
@ -2,6 +2,6 @@
 SUBDIRS =

 if QT
-SUBDIRS += linebreaking shaping
+SUBDIRS += shaping
 endif

--- a/src/3rdparty/harfbuzz/tests/linebreaking/.gitignore
+++ b/src/3rdparty/harfbuzz/tests/linebreaking/.gitignore
@ -1,4 +0,0 @@
-.deps
-linebreaking
-*.moc
-*.o
--- a/src/3rdparty/harfbuzz/tests/linebreaking/Makefile.am
+++ b/src/3rdparty/harfbuzz/tests/linebreaking/Makefile.am
@ -1,12 +0,0 @@
-
-check_PROGRAMS = linebreaking
-
-linebreaking_SOURCES = main.cpp harfbuzz-qt.cpp
-linebreaking_LDADD = $(QT_GUI_LIBS) $(QT_QTEST_LIBS) ../../src/libharfbuzz-1.la
-
-main.o: main.moc
-
-main.moc: $(srcdir)/main.cpp
-	$(QT_MOC) -o main.moc $(srcdir)/main.cpp
-
-INCLUDES = -I$(top_srcdir)/src $(FREETYPE_CFLAGS) $(QT_GUI_CFLAGS) $(QT_QTEST_CFLAGS)
--- a/src/3rdparty/harfbuzz/tests/linebreaking/main.cpp
+++ b/src/3rdparty/harfbuzz/tests/linebreaking/main.cpp
@ -1,230 +0,0 @@
-/*
- * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies)
- *
- * This is part of HarfBuzz, an OpenType Layout engine library.
- *
- * Permission is hereby granted, without written agreement and without
- * license or royalty fees, to use, copy, modify, and distribute this
- * software and its documentation for any purpose, provided that the
- * above copyright notice and the following two paragraphs appear in
- * all copies of this software.
- *
- * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
- * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
- * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
- * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
- * DAMAGE.
- *
- * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
- * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
- * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
- * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
- * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
- */
-
-/*
-    !!!!!! Warning !!!!!
-    Please don't save this file in emacs. It contains utf8 text sequences emacs will
-    silently convert to a series of question marks.
- */
-#include <QtTest/QtTest>
-#include <QtCore/qdebug.h>
-
-#include <harfbuzz-shaper.h>
-
-static QVector<HB_CharAttributes> getCharAttributes(const QString &str, HB_Script script = HB_Script_Common)
-{
-    QVector<HB_CharAttributes> attrs(str.length());
-    HB_ScriptItem item;
-    item.pos = 0;
-    item.length = str.length();
-    item.script = script;
-    HB_GetCharAttributes(str.utf16(), str.length(),
-                         &item, 1,
-                         attrs.data());
-    return attrs;
-}
-
-class tst_CharAttributes : public QObject
-{
-    Q_OBJECT
-
-public:
-    tst_CharAttributes();
-    virtual ~tst_CharAttributes();
-
-public slots:
-    void init();
-    void cleanup();
-private slots:
-    void lineBreaking();
-    void charWordStopOnLineSeparator();
-    void charStopForSurrogatePairs();
-    void thaiWordBreak();
-};
-
-
-tst_CharAttributes::tst_CharAttributes()
-{
-}
-
-tst_CharAttributes::~tst_CharAttributes()
-{
-}
-
-void tst_CharAttributes::init()
-{
-}
-
-void tst_CharAttributes::cleanup()
-{
-}
-
-
-void tst_CharAttributes::lineBreaking()
-{
-    struct Breaks {
-	const char *utf8;
-	uchar breaks[32];
-    };
-    Breaks brks[] = {
-	{ "11", { false, 0xff } },
-	{ "aa", { false, 0xff } },
-	{ "++", { false, 0xff } },
-	{ "--", { false, 0xff } },
-	{ "((", { false, 0xff } },
-	{ "))", { false, 0xff } },
-	{ "..", { false, 0xff } },
-	{ "\"\"", { false, 0xff } },
-	{ "$$", { false, 0xff } },
-	{ "!!", { false, 0xff } },
-	{ "??", { false, 0xff } },
-	{ ",,", { false, 0xff } },
-
-	{ ")()", { true, false, 0xff } },
-	{ "?!?", { false, false, 0xff } },
-	{ ".,.", { false, false, 0xff } },
-	{ "+-+", { false, false, 0xff } },
-	{ "+=+", { false, false, 0xff } },
-	{ "+(+", { false, false, 0xff } },
-	{ "+)+", { false, false, 0xff } },
-
-	{ "a b", { false, true, 0xff } },
-	{ "a(b", { false, false, 0xff } },
-	{ "a)b", { false, false, 0xff } },
-	{ "a-b", { false, true, 0xff } },
-	{ "a.b", { false, false, 0xff } },
-	{ "a+b", { false, false, 0xff } },
-	{ "a?b", { false, false, 0xff } },
-	{ "a!b", { false, false, 0xff } },
-	{ "a$b", { false, false, 0xff } },
-	{ "a,b", { false, false, 0xff } },
-	{ "a/b", { false, false, 0xff } },
-	{ "1/2", { false, false, 0xff } },
-	{ "./.", { false, false, 0xff } },
-	{ ",/,", { false, false, 0xff } },
-	{ "!/!", { false, false, 0xff } },
-	{ "\\/\\", { false, false, 0xff } },
-	{ "1 2", { false, true, 0xff } },
-	{ "1(2", { false, false, 0xff } },
-	{ "1)2", { false, false, 0xff } },
-	{ "1-2", { false, false, 0xff } },
-	{ "1.2", { false, false, 0xff } },
-	{ "1+2", { false, false, 0xff } },
-	{ "1?2", { false, true, 0xff } },
-	{ "1!2", { false, true, 0xff } },
-	{ "1$2", { false, false, 0xff } },
-	{ "1,2", { false, false, 0xff } },
-	{ "1/2", { false, false, 0xff } },
-	{ "\330\260\331\216\331\204\331\220\331\203\331\216", { false, false, false, false, false, 0xff } },
-	{ "\330\247\331\204\331\205 \330\247\331\204\331\205", { false, false, false, true, false, false, 0xff } },
-	{ "1#2", { false, false, 0xff } },
-	{ "!#!", { false, false, 0xff } },
-	{ 0, {} }
-    };
-    Breaks *b = brks;
-    while (b->utf8) {
-        QString str = QString::fromUtf8(b->utf8);
-
-        QVector<HB_CharAttributes> attrs = getCharAttributes(str);
-
-        int i;
-        for (i = 0; i < (int)str.length() - 1; ++i) {
-            QVERIFY(b->breaks[i] != 0xff);
-            if ( (attrs[i].lineBreakType != HB_NoBreak) != (bool)b->breaks[i] ) {
-                qDebug("test case \"%s\" failed at char %d; break type: %d", b->utf8, i, attrs[i].lineBreakType);
-                QCOMPARE( (attrs[i].lineBreakType != HB_NoBreak), (bool)b->breaks[i] );
-            }
-        }
-        QVERIFY(attrs[i].lineBreakType == HB_ForcedBreak);
-        QCOMPARE(b->breaks[i], (uchar)0xff);
-        ++b;
-    }
-}
-
-void tst_CharAttributes::charWordStopOnLineSeparator()
-{
-    const QChar lineSeparator(QChar::LineSeparator);
-    QString txt;
-    txt.append(lineSeparator);
-    txt.append(lineSeparator);
-    QVector<HB_CharAttributes> attrs = getCharAttributes(txt);
-    QVERIFY(attrs[1].charStop);
-}
-
-void tst_CharAttributes::charStopForSurrogatePairs()
-{
-    QString txt;
-    txt.append("a");
-    txt.append(0xd87e);
-    txt.append(0xdc25);
-    txt.append("b");
-    QVector<HB_CharAttributes> attrs = getCharAttributes(txt);
-    QVERIFY(attrs[0].charStop);
-    QVERIFY(attrs[1].charStop);
-    QVERIFY(!attrs[2].charStop);
-    QVERIFY(attrs[3].charStop);
-}
-
-void tst_CharAttributes::thaiWordBreak()
-{
-    // สวัสดีครับ นี่เป็นการงทดสอบตัวเอ
-    QTextCodec *codec = QTextCodec::codecForMib(2259);
-    QString txt = codec->toUnicode(QByteArray("\xca\xc7\xd1\xca\xb4\xd5\xa4\xc3\xd1\xba\x20\xb9\xd5\xe8\xe0\xbb\xe7\xb9\xa1\xd2\xc3\xb7\xb4\xca\xcd\xba\xb5\xd1\xc7\xe0\xcd\xa7"));
-
-
-    QCOMPARE(txt.length(), 32);
-    QVector<HB_CharAttributes> attrs = getCharAttributes(txt, HB_Script_Thai);
-    QVERIFY(attrs[0].lineBreakType == HB_NoBreak);
-    QVERIFY(attrs[1].lineBreakType == HB_NoBreak);
-    QVERIFY(attrs[2].lineBreakType == HB_NoBreak);
-    QVERIFY(attrs[3].lineBreakType == HB_NoBreak);
-    QVERIFY(attrs[4].lineBreakType == HB_NoBreak);
-    QVERIFY(attrs[5].lineBreakType == HB_Break);
-    QVERIFY(attrs[6].lineBreakType == HB_NoBreak);
-    QVERIFY(attrs[7].lineBreakType == HB_NoBreak);
-    QVERIFY(attrs[8].lineBreakType == HB_NoBreak);
-    QVERIFY(attrs[9].lineBreakType == HB_NoBreak);
-    QVERIFY(attrs[10].lineBreakType == HB_Break);
-    QVERIFY(attrs[11].lineBreakType == HB_NoBreak);
-    QVERIFY(attrs[12].lineBreakType == HB_NoBreak);
-    QVERIFY(attrs[13].lineBreakType == HB_Break);
-    QVERIFY(attrs[14].lineBreakType == HB_NoBreak);
-    QVERIFY(attrs[15].lineBreakType == HB_NoBreak);
-    QVERIFY(attrs[16].lineBreakType == HB_NoBreak);
-    QVERIFY(attrs[17].lineBreakType == HB_Break);
-    QVERIFY(attrs[18].lineBreakType == HB_NoBreak);
-    QVERIFY(attrs[19].lineBreakType == HB_NoBreak);
-    QVERIFY(attrs[20].lineBreakType == HB_Break);
-    QVERIFY(attrs[21].lineBreakType == HB_NoBreak);
-    QVERIFY(attrs[22].lineBreakType == HB_NoBreak);
-    QVERIFY(attrs[23].lineBreakType == HB_NoBreak);
-    QVERIFY(attrs[24].lineBreakType == HB_NoBreak);
-    QVERIFY(attrs[25].lineBreakType == HB_Break);
-    QVERIFY(attrs[26].lineBreakType == HB_NoBreak);
-    for (int i = 27; i < 32; ++i)
-        QVERIFY(attrs[i].lineBreakType == HB_NoBreak);
-}
-
-QTEST_MAIN(tst_CharAttributes)
-#include "main.moc"
--- a/src/3rdparty/harfbuzz/tests/shaping/Makefile.am
+++ b/src/3rdparty/harfbuzz/tests/shaping/Makefile.am
@ -1,7 +1,7 @@

 check_PROGRAMS = shaping

-shaping_SOURCES = main.cpp ../linebreaking/harfbuzz-qt.cpp
+shaping_SOURCES = main.cpp harfbuzz-qt.cpp
 shaping_LDADD = $(QT_GUI_LIBS) $(QT_QTEST_LIBS) ../../src/libharfbuzz-1.la

 main.o: main.moc
--- a/src/3rdparty/harfbuzz/tests/linebreaking/harfbuzz-qt.cpp
+++ b/src/3rdparty/harfbuzz/tests/linebreaking/harfbuzz-qt.cpp
@ -23,21 +23,11 @@
 */

 #include <harfbuzz-external.h>
-#include <Qt/private/qunicodetables_p.h>
+#include <QChar>
 #include <QLibrary>
-#include <QTextCodec>

 extern "C" {

-HB_LineBreakClass HB_GetLineBreakClass(HB_UChar32 ch)
-{
-#if QT_VERSION >= 0x040300
-    return (HB_LineBreakClass)QUnicodeTables::lineBreakClass(ch);
-#else
-#error "This test currently requires Qt >= 4.3"
-#endif
-}
-
 void HB_GetUnicodeCharProperties(HB_UChar32 ch, HB_CharCategory *category, int *combiningClass)
 {
    *category = (HB_CharCategory)QChar::category(ch);
@ -59,26 +49,6 @@ HB_UChar16 HB_GetMirroredChar(HB_UChar16 ch)
    return QChar::mirroredChar(ch);
 }

-HB_WordClass HB_GetWordClass(HB_UChar32 ch)
-{
-    const QUnicodeTables::Properties *prop = QUnicodeTables::properties(ch);
-    return (HB_WordClass) prop->wordBreak;
-}
-
-
-HB_SentenceClass HB_GetSentenceClass(HB_UChar32 ch)
-{
-    const QUnicodeTables::Properties *prop = QUnicodeTables::properties(ch);
-    return (HB_SentenceClass) prop->sentenceBreak;
-}
-
-void HB_GetGraphemeAndLineBreakClass(HB_UChar32 ch, HB_GraphemeClass *grapheme, HB_LineBreakClass *lineBreak)
-{
-    const QUnicodeTables::Properties *prop = QUnicodeTables::properties(ch);
-    *grapheme = (HB_GraphemeClass) prop->graphemeBreak;
-    *lineBreak = (HB_LineBreakClass) prop->line_break_class;
-}
-
 void (*HB_Library_Resolve(const char *library, int version, const char *symbol))()
 {
    return QLibrary::resolve(library, version, symbol);
--- a/src/corelib/tools/qharfbuzz.cpp
+++ b/src/corelib/tools/qharfbuzz.cpp
@ -39,47 +39,15 @@
 **
 ****************************************************************************/

+#include "qharfbuzz_p.h"
+
 #include "qunicodetables_p.h"
 #include "qlibrary.h"
-#include "qtextcodec.h"
-
-#include "qharfbuzz_p.h"

 QT_USE_NAMESPACE

 extern "C" {

-HB_GraphemeClass HB_GetGraphemeClass(HB_UChar32 ch)
-{
-    const QUnicodeTables::Properties *prop = QUnicodeTables::properties(ch);
-    return (HB_GraphemeClass) prop->graphemeBreak;
-}
-
-HB_WordClass HB_GetWordClass(HB_UChar32 ch)
-{
-    const QUnicodeTables::Properties *prop = QUnicodeTables::properties(ch);
-    return (HB_WordClass) prop->wordBreak;
-}
-
-HB_SentenceClass HB_GetSentenceClass(HB_UChar32 ch)
-{
-    const QUnicodeTables::Properties *prop = QUnicodeTables::properties(ch);
-    return (HB_SentenceClass) prop->sentenceBreak;
-}
-
-HB_LineBreakClass HB_GetLineBreakClass(HB_UChar32 ch)
-{
-    return (HB_LineBreakClass)QUnicodeTables::lineBreakClass(ch);
-}
-
-
-void HB_GetGraphemeAndLineBreakClass(HB_UChar32 ch, HB_GraphemeClass *grapheme, HB_LineBreakClass *lineBreak)
-{
-    const QUnicodeTables::Properties *prop = QUnicodeTables::properties(ch);
-    *grapheme = (HB_GraphemeClass) prop->graphemeBreak;
-    *lineBreak = (HB_LineBreakClass) prop->line_break_class;
-}
-
 void HB_GetUnicodeCharProperties(HB_UChar32 ch, HB_CharCategory *category, int *combiningClass)
 {
    const QUnicodeTables::Properties *prop = QUnicodeTables::properties(ch);
@ -135,11 +103,4 @@ void qHBFreeFace(HB_Face face)
    HB_FreeFace(face);
 }

-void qGetCharAttributes(const HB_UChar16 *string, hb_uint32 stringLength,
-                        const HB_ScriptItem *items, hb_uint32 numItems,
-                        HB_CharAttributes *attributes)
-{
-    HB_GetCharAttributes(string, stringLength, items, numItems, attributes);
-}
-
 QT_END_NAMESPACE
--- a/src/corelib/tools/qharfbuzz_p.h
+++ b/src/corelib/tools/qharfbuzz_p.h
@ -58,11 +58,6 @@

 QT_BEGIN_NAMESPACE

-// temporary forward until all the textengine code has been moved to QtCore
-Q_CORE_EXPORT void qGetCharAttributes(const HB_UChar16 *string, hb_uint32 stringLength,
-                                      const HB_ScriptItem *items, hb_uint32 numItems,
-                                      HB_CharAttributes *attributes);
-
 Q_CORE_EXPORT HB_Bool qShapeItem(HB_ShaperItem *item);

 // ### temporary
--- a/src/corelib/tools/qtextboundaryfinder.cpp
+++ b/src/corelib/tools/qtextboundaryfinder.cpp
@ -40,9 +40,9 @@
 ****************************************************************************/
 #include <QtCore/qtextboundaryfinder.h>
 #include <QtCore/qvarlengtharray.h>
+
 #include <private/qunicodetables_p.h>
-#include <qdebug.h>
-#include "private/qharfbuzz_p.h"
+#include <private/qunicodetools_p.h>

 QT_BEGIN_NAMESPACE

@ -93,11 +93,12 @@ static void init(QTextBoundaryFinder::BoundaryType type, const QChar *chars, int
        scriptItems.append(item);
    }

-    qGetCharAttributes(string, length, scriptItems.data(), scriptItems.count(), attributes);
+    QCharAttributeOptions options = 0;
    if (type == QTextBoundaryFinder::Word)
-        HB_GetWordBoundaries(string, length, scriptItems.data(), scriptItems.count(), attributes);
+        options |= GetWordBreaks;
    else if (type == QTextBoundaryFinder::Sentence)
-        HB_GetSentenceBoundaries(string, length, scriptItems.data(), scriptItems.count(), attributes);
+        options |= GetSentenceBreaks;
+    qGetCharAttributes(string, length, scriptItems.data(), scriptItems.count(), attributes, options);
 }

 /*! 
--- a/src/corelib/tools/qunicodetools.cpp
+++ b/src/corelib/tools/qunicodetools.cpp
@ -0,0 +1,398 @@
+/****************************************************************************
+**
+** Copyright (C) 2012 Nokia Corporation and/or its subsidiary(-ies).
+** Contact: http://www.qt-project.org/
+**
+** This file is part of the QtCore module of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** GNU Lesser General Public License Usage
+** This file may be used under the terms of the GNU Lesser General Public
+** License version 2.1 as published by the Free Software Foundation and
+** appearing in the file LICENSE.LGPL included in the packaging of this
+** file. Please review the following information to ensure the GNU Lesser
+** General Public License version 2.1 requirements will be met:
+** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain additional
+** rights. These rights are described in the Nokia Qt LGPL Exception
+** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU General
+** Public License version 3.0 as published by the Free Software Foundation
+** and appearing in the file LICENSE.GPL included in the packaging of this
+** file. Please review the following information to ensure the GNU General
+** Public License version 3.0 requirements will be met:
+** http://www.gnu.org/copyleft/gpl.html.
+**
+** Other Usage
+** Alternatively, this file may be used in accordance with the terms and
+** conditions contained in a signed written agreement between you and Nokia.
+**
+**
+**
+**
+**
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#include "qunicodetools_p.h"
+
+#include "qunicodetables_p.h"
+
+QT_BEGIN_NAMESPACE
+
+// -----------------------------------------------------------------------------------------------------
+//
+// The line breaking algorithm. See http://www.unicode.org/reports/tr14/tr14-19.html
+//
+// -----------------------------------------------------------------------------------------------------
+//
+// The text boundaries determination algorithm. See http://www.unicode.org/reports/tr29/tr29-11.html
+//
+// -----------------------------------------------------------------------------------------------------
+
+namespace {
+
+/* The Unicode algorithm does in our opinion allow line breaks at some
+   places they shouldn't be allowed. The following changes were thus
+   made in comparison to the Unicode reference:
+
+   EX->AL from DB to IB
+   SY->AL from DB to IB
+   SY->PO from DB to IB
+   SY->PR from DB to IB
+   SY->OP from DB to IB
+   AL->PR from DB to IB
+   AL->PO from DB to IB
+   PR->PR from DB to IB
+   PO->PO from DB to IB
+   PR->PO from DB to IB
+   PO->PR from DB to IB
+   HY->PO from DB to IB
+   HY->PR from DB to IB
+   HY->OP from DB to IB
+   NU->EX from PB to IB
+   EX->PO from DB to IB
+*/
+
+// The following line break classes are not treated by the table:
+//  AI, BK, CB, CR, LF, NL, SA, SG, SP, XX
+
+enum LineBreakRule {
+    ProhibitedBreak,            // PB in table
+    DirectBreak,                // DB in table
+    IndirectBreak,              // IB in table
+    CombiningIndirectBreak,     // CI in table
+    CombiningProhibitedBreak    // CP in table
+};
+#define DB DirectBreak
+#define IB IndirectBreak
+#define CI CombiningIndirectBreak
+#define CP CombiningProhibitedBreak
+#define PB ProhibitedBreak
+static const uchar lineBreakTable[QUnicodeTables::LineBreak_JT + 1][QUnicodeTables::LineBreak_JT + 1] = {
+/*         OP  CL  QU  GL  NS  EX  SY  IS  PR  PO  NU  AL  ID  IN  HY  BA  BB  B2  ZW  CM  WJ  H2  H3  JL  JV  JT */
+/* OP */ { PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, CP, PB, PB, PB, PB, PB, PB },
+/* CL */ { DB, PB, IB, IB, PB, PB, PB, PB, IB, IB, IB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
+/* QU */ { PB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, PB, CI, PB, IB, IB, IB, IB, IB },
+/* GL */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, PB, CI, PB, IB, IB, IB, IB, IB },
+/* NS */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, DB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
+/* EX */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
+/* SY */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
+/* IS */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, DB, IB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
+/* PR */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, DB, IB, IB, DB, DB, PB, CI, PB, IB, IB, IB, IB, IB },
+/* PO */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
+/* NU */ { IB, PB, IB, IB, IB, IB, PB, PB, IB, IB, IB, IB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
+/* AL */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
+/* ID */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
+/* IN */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
+/* HY */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, DB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
+/* BA */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, DB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
+/* BB */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, PB, CI, PB, IB, IB, IB, IB, IB },
+/* B2 */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, DB, DB, DB, IB, IB, DB, PB, PB, CI, PB, DB, DB, DB, DB, DB },
+/* ZW */ { DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, PB, DB, DB, DB, DB, DB, DB, DB },
+/* CM */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, DB, IB, IB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
+/* WJ */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, PB, CI, PB, IB, IB, IB, IB, IB },
+/* H2 */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, IB, IB },
+/* H3 */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, IB },
+/* JL */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, IB, IB, IB, IB, DB },
+/* JV */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, IB, IB },
+/* JT */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, IB }
+};
+#undef DB
+#undef IB
+#undef CI
+#undef CP
+#undef PB
+
+static const uchar graphemeBreakTable[QUnicodeTables::GraphemeBreakLVT + 1][QUnicodeTables::GraphemeBreakLVT + 1] = {
+//    Other, CR,    LF,  Control, Extend, L,     V,     T,     LV,    LVT
+    { true , true , true , true , true , true , true , true , true , true  }, // Other,
+    { true , true , true , true , true , true , true , true , true , true  }, // CR,
+    { true , false, true , true , true , true , true , true , true , true  }, // LF,
+    { true , true , true , true , true , true , true , true , true , true  }, // Control,
+    { false, true , true , true , false, false, false, false, false, false }, // Extend,
+    { true , true , true , true , true , false, true , true , true , true  }, // L,
+    { true , true , true , true , true , false, false, true , false, true  }, // V,
+    { true , true , true , true , true , true , false, false, false, false }, // T,
+    { true , true , true , true , true , false, true , true , true , true  }, // LV,
+    { true , true , true , true , true , false, true , true , true , true  }, // LVT
+};
+
+static void calcGraphemeAndLineBreaks(const ushort *string, quint32 len, HB_CharAttributes *attributes)
+{
+    // ##### can this fail if the first char is a surrogate?
+    const QUnicodeTables::Properties *prop = QUnicodeTables::properties(string[0]);
+    QUnicodeTables::GraphemeBreak grapheme = (QUnicodeTables::GraphemeBreak) prop->graphemeBreak;
+    QUnicodeTables::LineBreakClass cls = (QUnicodeTables::LineBreakClass) prop->line_break_class;
+    // handle case where input starts with an LF
+    if (cls == QUnicodeTables::LineBreak_LF)
+        cls = QUnicodeTables::LineBreak_BK;
+
+    attributes[0].whiteSpace = (cls == QUnicodeTables::LineBreak_SP || cls == QUnicodeTables::LineBreak_BK);
+    attributes[0].charStop = true;
+
+    int lcls = cls;
+    for (quint32 i = 1; i < len; ++i) {
+        attributes[i].whiteSpace = false;
+        attributes[i].charStop = true;
+
+        uint ucs4 = string[i];
+        prop = QUnicodeTables::properties(ucs4);
+        QUnicodeTables::GraphemeBreak ngrapheme = (QUnicodeTables::GraphemeBreak) prop->graphemeBreak;
+        QUnicodeTables::LineBreakClass ncls = (QUnicodeTables::LineBreakClass) prop->line_break_class;
+        attributes[i].charStop = graphemeBreakTable[ngrapheme][grapheme];
+        // handle surrogates
+        if (ncls == QUnicodeTables::LineBreak_SG) {
+            if (QChar::isHighSurrogate(string[i]) && i < len - 1 && QChar::isLowSurrogate(string[i+1])) {
+                continue;
+            } else if (QChar::isLowSurrogate(string[i]) && QChar::isHighSurrogate(string[i-1])) {
+                ucs4 = QChar::surrogateToUcs4(string[i-1], string[i]);
+                prop = QUnicodeTables::properties(ucs4);
+                ngrapheme = (QUnicodeTables::GraphemeBreak) prop->graphemeBreak;
+                ncls = (QUnicodeTables::LineBreakClass) prop->line_break_class;
+                attributes[i].charStop = false;
+            } else {
+                ncls = QUnicodeTables::LineBreak_AL;
+            }
+        }
+
+        // set white space and char stop flag
+        if (ncls >= QUnicodeTables::LineBreak_SP)
+            attributes[i].whiteSpace = true;
+
+        HB_LineBreakType lineBreakType = HB_NoBreak;
+        if (cls >= QUnicodeTables::LineBreak_LF) {
+            lineBreakType = HB_ForcedBreak;
+        } else if (cls == QUnicodeTables::LineBreak_CR) {
+            lineBreakType = (ncls == QUnicodeTables::LineBreak_LF) ? HB_NoBreak : HB_ForcedBreak;
+        }
+
+        if (ncls == QUnicodeTables::LineBreak_SP)
+            goto next_no_cls_update;
+        if (ncls >= QUnicodeTables::LineBreak_CR)
+            goto next;
+
+        {
+            int tcls = ncls;
+            // for south east asian chars that require a complex (dictionary analysis), the unicode
+            // standard recommends to treat them as AL. thai_attributes and other attribute methods that
+            // do dictionary analysis can override
+            if (tcls >= QUnicodeTables::LineBreak_SA)
+                tcls = QUnicodeTables::LineBreak_AL;
+            if (cls >= QUnicodeTables::LineBreak_SA)
+                cls = QUnicodeTables::LineBreak_AL;
+
+            int brk = lineBreakTable[cls][tcls];
+            switch (brk) {
+            case DirectBreak:
+                lineBreakType = HB_Break;
+                if (string[i-1] == 0xad) // soft hyphen
+                    lineBreakType = HB_SoftHyphen;
+                break;
+            case IndirectBreak:
+                lineBreakType = (lcls == QUnicodeTables::LineBreak_SP) ? HB_Break : HB_NoBreak;
+                break;
+            case CombiningIndirectBreak:
+                lineBreakType = HB_NoBreak;
+                if (lcls == QUnicodeTables::LineBreak_SP){
+                    if (i > 1)
+                        attributes[i-2].lineBreakType = HB_Break;
+                } else {
+                    goto next_no_cls_update;
+                }
+                break;
+            case CombiningProhibitedBreak:
+                lineBreakType = HB_NoBreak;
+                if (lcls != QUnicodeTables::LineBreak_SP)
+                    goto next_no_cls_update;
+            case ProhibitedBreak:
+            default:
+                break;
+            }
+        }
+    next:
+        cls = ncls;
+    next_no_cls_update:
+        lcls = ncls;
+        grapheme = ngrapheme;
+        attributes[i-1].lineBreakType = lineBreakType;
+    }
+    attributes[len-1].lineBreakType = HB_ForcedBreak;
+}
+
+
+enum WordBreakRule { NoBreak = 0, Break = 1, Middle = 2 };
+
+static const uchar wordBreakTable[QUnicodeTables::WordBreakExtendNumLet + 1][QUnicodeTables::WordBreakExtendNumLet + 1] = {
+//    Other    Format   Katakana ALetter  MidLetter MidNum  Numeric  ExtendNumLet
+    { Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break   }, // Other
+    { Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break   }, // Format
+    { Break  , Break  , NoBreak, Break  , Break  , Break  , Break  , NoBreak }, // Katakana
+    { Break  , Break  , Break  , NoBreak, Middle , Break  , NoBreak, NoBreak }, // ALetter
+    { Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break   }, // MidLetter
+    { Break  , Break  , Break  , Break  , Break  , Break  , Break  , Break   }, // MidNum
+    { Break  , Break  , Break  , NoBreak, Break  , Middle , NoBreak, NoBreak }, // Numeric
+    { Break  , Break  , NoBreak, NoBreak, Break  , Break  , NoBreak, NoBreak }, // ExtendNumLet
+};
+
+static void calcWordBreaks(const ushort *string, quint32 len, HB_CharAttributes *attributes)
+{
+    quint32 brk = QUnicodeTables::wordBreakClass(string[0]);
+
+    attributes[0].wordBoundary = true;
+
+    for (quint32 i = 1; i < len; ++i) {
+        if (!attributes[i].charStop) {
+            attributes[i].wordBoundary = false;
+            continue;
+        }
+
+        quint32 nbrk = QUnicodeTables::wordBreakClass(string[i]);
+        if (nbrk == QUnicodeTables::WordBreakFormat) {
+            attributes[i].wordBoundary = (QUnicodeTables::sentenceBreakClass(string[i-1]) == QUnicodeTables::SentenceBreakSep);
+            continue;
+        }
+
+        WordBreakRule rule = (WordBreakRule)wordBreakTable[brk][nbrk];
+        if (rule == Middle) {
+            rule = Break;
+            quint32 lookahead = i + 1;
+            while (lookahead < len) {
+                quint32 testbrk = QUnicodeTables::wordBreakClass(string[lookahead]);
+                if (testbrk == QUnicodeTables::WordBreakFormat
+                    && QUnicodeTables::sentenceBreakClass(string[lookahead]) != QUnicodeTables::SentenceBreakSep) {
+                    ++lookahead;
+                    continue;
+                }
+                if (testbrk == brk) {
+                    rule = NoBreak;
+                    while (i < lookahead)
+                        attributes[i++].wordBoundary = false;
+                    nbrk = testbrk;
+                }
+                break;
+            }
+        }
+        attributes[i].wordBoundary = (rule == Break);
+        brk = nbrk;
+    }
+}
+
+
+enum SentenceBreakState {
+    SB_Initial,
+    SB_Upper,
+    SB_UpATerm,
+    SB_ATerm,
+    SB_ATermC,
+    SB_ACS,
+    SB_STerm,
+    SB_STermC,
+    SB_SCS,
+    SB_BAfter,
+    SB_Break,
+    SB_Lookup
+};
+
+static const uchar sentenceBreakTable[SB_Lookup + 1][QUnicodeTables::SentenceBreakClose + 1] = {
+//      Other       Sep         Format      Sp          Lower       Upper       OLetter     Numeric     ATerm       STerm       Close
+    { SB_Initial, SB_BAfter , SB_Initial, SB_Initial, SB_Initial, SB_Upper  , SB_Initial, SB_Initial, SB_ATerm  , SB_STerm  , SB_Initial }, // SB_Initial,
+    { SB_Initial, SB_BAfter , SB_Upper  , SB_Initial, SB_Initial, SB_Upper  , SB_Initial, SB_Initial, SB_UpATerm, SB_STerm  , SB_Initial }, // SB_Upper
+
+    { SB_Lookup , SB_BAfter , SB_UpATerm, SB_ACS    , SB_Initial, SB_Upper  , SB_Break  , SB_Initial, SB_ATerm  , SB_STerm  , SB_ATermC  }, // SB_UpATerm
+    { SB_Lookup , SB_BAfter , SB_ATerm  , SB_ACS    , SB_Initial, SB_Break  , SB_Break  , SB_Initial, SB_ATerm  , SB_STerm  , SB_ATermC  }, // SB_ATerm
+    { SB_Lookup , SB_BAfter , SB_ATermC , SB_ACS    , SB_Initial, SB_Break  , SB_Break  , SB_Lookup , SB_ATerm  , SB_STerm  , SB_ATermC  }, // SB_ATermC,
+    { SB_Lookup , SB_BAfter , SB_ACS    , SB_ACS    , SB_Initial, SB_Break  , SB_Break  , SB_Lookup , SB_ATerm  , SB_STerm  , SB_Lookup  }, // SB_ACS,
+
+    { SB_Break  , SB_BAfter , SB_STerm  , SB_SCS    , SB_Break  , SB_Break  , SB_Break  , SB_Break  , SB_ATerm  , SB_STerm  , SB_STermC  }, // SB_STerm,
+    { SB_Break  , SB_BAfter , SB_STermC , SB_SCS    , SB_Break  , SB_Break  , SB_Break  , SB_Break  , SB_ATerm  , SB_STerm  , SB_STermC  }, // SB_STermC,
+    { SB_Break  , SB_BAfter , SB_SCS    , SB_SCS    , SB_Break  , SB_Break  , SB_Break  , SB_Break  , SB_ATerm  , SB_STerm  , SB_Break   }, // SB_SCS,
+    { SB_Break  , SB_Break  , SB_Break  , SB_Break  , SB_Break  , SB_Break  , SB_Break  , SB_Break  , SB_Break  , SB_Break  , SB_Break   }, // SB_BAfter,
+};
+
+static void calcSentenceBreaks(const ushort *string, quint32 len, HB_CharAttributes *attributes)
+{
+    quint32 brk = sentenceBreakTable[SB_Initial][QUnicodeTables::sentenceBreakClass(string[0])];
+    attributes[0].sentenceBoundary = true;
+    for (quint32 i = 1; i < len; ++i) {
+        if (!attributes[i].charStop) {
+            attributes[i].sentenceBoundary = false;
+            continue;
+        }
+        brk = sentenceBreakTable[brk][QUnicodeTables::sentenceBreakClass(string[i])];
+        if (brk == SB_Lookup) {
+            brk = SB_Break;
+            quint32 lookahead = i + 1;
+            while (lookahead < len) {
+                quint32 sbrk = QUnicodeTables::sentenceBreakClass(string[lookahead]);
+                if (sbrk != QUnicodeTables::SentenceBreakOther
+                    && sbrk != QUnicodeTables::SentenceBreakNumeric
+                    && sbrk != QUnicodeTables::SentenceBreakClose) {
+                    break;
+                } else if (sbrk == QUnicodeTables::SentenceBreakLower) {
+                    brk = SB_Initial;
+                    break;
+                }
+                ++lookahead;
+            }
+            if (brk == SB_Initial) {
+                while (i < lookahead)
+                    attributes[i++].sentenceBoundary = false;
+            }
+        }
+        if (brk == SB_Break) {
+            attributes[i].sentenceBoundary = true;
+            brk = sentenceBreakTable[SB_Initial][QUnicodeTables::sentenceBreakClass(string[i])];
+        } else {
+            attributes[i].sentenceBoundary = false;
+        }
+    }
+}
+
+} // namespace
+
+
+Q_CORE_EXPORT void qGetCharAttributes(const ushort *string, int length,
+                                      const HB_ScriptItem *items, int numItems,
+                                      HB_CharAttributes *attributes, QCharAttributeOptions options)
+{
+    if (length <= 0)
+        return;
+
+    memset(attributes, 0, length * sizeof(HB_CharAttributes));
+
+    calcGraphemeAndLineBreaks(string, length, attributes);
+    if (options & GetWordBreaks)
+        calcWordBreaks(string, length, attributes);
+    if (options & GetSentenceBreaks)
+        calcSentenceBreaks(string, length, attributes);
+
+    HB_GetTailoredCharAttributes(string, length, items, numItems, attributes);
+}
+
+QT_END_NAMESPACE
--- a/src/corelib/tools/qunicodetools_p.h
+++ b/src/corelib/tools/qunicodetools_p.h
@ -0,0 +1,76 @@
+/****************************************************************************
+**
+** Copyright (C) 2012 Nokia Corporation and/or its subsidiary(-ies).
+** Contact: http://www.qt-project.org/
+**
+** This file is part of the QtCore module of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** GNU Lesser General Public License Usage
+** This file may be used under the terms of the GNU Lesser General Public
+** License version 2.1 as published by the Free Software Foundation and
+** appearing in the file LICENSE.LGPL included in the packaging of this
+** file. Please review the following information to ensure the GNU Lesser
+** General Public License version 2.1 requirements will be met:
+** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain additional
+** rights. These rights are described in the Nokia Qt LGPL Exception
+** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU General
+** Public License version 3.0 as published by the Free Software Foundation
+** and appearing in the file LICENSE.GPL included in the packaging of this
+** file. Please review the following information to ensure the GNU General
+** Public License version 3.0 requirements will be met:
+** http://www.gnu.org/copyleft/gpl.html.
+**
+** Other Usage
+** Alternatively, this file may be used in accordance with the terms and
+** conditions contained in a signed written agreement between you and Nokia.
+**
+**
+**
+**
+**
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#ifndef QUNICODETOOLS_P_H
+#define QUNICODETOOLS_P_H
+
+//
+//  W A R N I N G
+//  -------------
+//
+// This file is not part of the Qt API.  It exists for the convenience
+// of other Qt classes.  This header file may change from version to
+// version without notice, or even be removed.
+//
+// We mean it.
+//
+
+#include <QtCore/qglobal.h>
+#include <harfbuzz-shaper.h>
+
+QT_BEGIN_NAMESPACE
+
+Q_DECLARE_TYPEINFO(HB_CharAttributes, Q_PRIMITIVE_TYPE);
+Q_DECLARE_TYPEINFO(HB_ScriptItem, Q_PRIMITIVE_TYPE);
+
+enum QCharAttributeOption {
+    GetWordBreaks = 1,
+    GetSentenceBreaks = 2
+};
+Q_DECLARE_FLAGS(QCharAttributeOptions, QCharAttributeOption)
+
+Q_CORE_EXPORT void qGetCharAttributes(const ushort *string, int length,
+                                      const HB_ScriptItem *items, int numItems,
+                                      HB_CharAttributes *attributes, QCharAttributeOptions options = QFlag(0));
+
+QT_END_NAMESPACE
+
+#endif // QUNICODETOOLS_P_H
--- a/src/corelib/tools/tools.pri
+++ b/src/corelib/tools/tools.pri
@ -55,6 +55,7 @@ HEADERS +=  \
        tools/qtimeline.h \
        tools/qelapsedtimer.h \
        tools/qunicodetables_p.h \
+        tools/qunicodetools_p.h \
        tools/qvarlengtharray.h \
        tools/qvector.h

@ -92,6 +93,7 @@ SOURCES += \
        tools/qstringlist.cpp \
        tools/qtextboundaryfinder.cpp \
        tools/qtimeline.cpp \
+        tools/qunicodetools.cpp \
        tools/qvector.cpp \
        tools/qvsnprintf.cpp

--- a/src/gui/text/qtextengine.cpp
+++ b/src/gui/text/qtextengine.cpp
@ -52,6 +52,7 @@
 #include "qfontengine_p.h"
 #include "qstring.h"
 #include <private/qunicodetables_p.h>
+#include <private/qunicodetools_p.h>
 #include "qtextdocument_p.h"
 #include "qrawfont.h"
 #include "qrawfont_p.h"
--- a/tests/auto/corelib/tools/qtextboundaryfinder/tst_qtextboundaryfinder.cpp
+++ b/tests/auto/corelib/tools/qtextboundaryfinder/tst_qtextboundaryfinder.cpp
@ -42,6 +42,7 @@
 #include <QtTest/QtTest>

 #include <qtextboundaryfinder.h>
+#include <qtextcodec.h>
 #include <qfile.h>
 #include <qdebug.h>

@ -61,6 +62,7 @@ private slots:
    void toNextBoundary();
    void toPreviousBoundary_data();
    void toPreviousBoundary();
+    void thaiLineBreak();
 };

 void tst_QTextBoundaryFinder::init()
@ -382,7 +384,95 @@ void tst_QTextBoundaryFinder::toPreviousBoundary()
    QCOMPARE(boundaries, foundBoundaries);
 }

+#include <qlibrary.h>

+#define LIBTHAI_MAJOR   0
+typedef int (*th_brk_def) (const unsigned char*, int*, size_t);
+static th_brk_def th_brk = 0;
+
+static bool init_libthai()
+{
+#if !defined(QT_NO_LIBRARY)
+    static bool triedResolve = false;
+    if (!triedResolve) {
+        th_brk = (th_brk_def) QLibrary::resolve("thai", (int)LIBTHAI_MAJOR, "th_brk");
+        triedResolve = true;
+    }
+#endif
+    return th_brk != 0;
+}
+
+void tst_QTextBoundaryFinder::thaiLineBreak()
+{
+    if (!init_libthai())
+        QSKIP("This test requires libThai-0.1.1x to be installed.");
+#if 0
+    // สวัสดีครับ นี่เป็นการงทดสอบตัวเอ
+    QTextCodec *codec = QTextCodec::codecForMib(2259);
+    QString text = codec->toUnicode(QByteArray("\xca\xc7\xd1\xca\xb4\xd5\xa4\xc3\xd1\xba\x20\xb9\xd5\xe8\xe0\xbb\xe7\xb9\xa1\xd2\xc3\xb7\xb4\xca\xcd\xba\xb5\xd1\xc7\xe0\xcd\xa7"));
+    QCOMPARE(text.length(), 32);
+
+    QTextBoundaryFinder finder(QTextBoundaryFinder::Line, text);
+    finder.setPosition(0);
+    QVERIFY(finder.isAtBoundary());
+    finder.setPosition(1);
+    QVERIFY(!finder.isAtBoundary());
+    finder.setPosition(2);
+    QVERIFY(!finder.isAtBoundary());
+    finder.setPosition(3);
+    QVERIFY(!finder.isAtBoundary());
+    finder.setPosition(4);
+    QVERIFY(!finder.isAtBoundary());
+    finder.setPosition(5);
+    QVERIFY(!finder.isAtBoundary());
+    finder.setPosition(6);
+    QVERIFY(finder.isAtBoundary());
+    finder.setPosition(7);
+    QVERIFY(finder.isAtBoundary());
+    finder.setPosition(8);
+    QVERIFY(!finder.isAtBoundary());
+    finder.setPosition(9);
+    QVERIFY(!finder.isAtBoundary());
+    finder.setPosition(10);
+    QVERIFY(!finder.isAtBoundary());
+    finder.setPosition(11);
+    QVERIFY(finder.isAtBoundary());
+    finder.setPosition(12);
+    QVERIFY(!finder.isAtBoundary());
+    finder.setPosition(13);
+    QVERIFY(!finder.isAtBoundary());
+    finder.setPosition(14);
+    QVERIFY(finder.isAtBoundary());
+    finder.setPosition(15);
+    QVERIFY(!finder.isAtBoundary());
+    finder.setPosition(16);
+    QVERIFY(!finder.isAtBoundary());
+    finder.setPosition(17);
+    QVERIFY(!finder.isAtBoundary());
+    finder.setPosition(18);
+    QVERIFY(finder.isAtBoundary());
+    finder.setPosition(19);
+    QVERIFY(!finder.isAtBoundary());
+    finder.setPosition(20);
+    QVERIFY(finder.isAtBoundary());
+    finder.setPosition(21);
+    QVERIFY(finder.isAtBoundary());
+    finder.setPosition(22);
+    QVERIFY(!finder.isAtBoundary());
+    finder.setPosition(23);
+    QVERIFY(!finder.isAtBoundary());
+    finder.setPosition(24);
+    QVERIFY(!finder.isAtBoundary());
+    finder.setPosition(25);
+    QVERIFY(finder.isAtBoundary());
+    finder.setPosition(26);
+    QVERIFY(finder.isAtBoundary());
+    for (int i = 27; i < 32; ++i) {
+        finder.setPosition(i);
+        QVERIFY(!finder.isAtBoundary());
+    }
+#endif
+}


 QTEST_MAIN(tst_QTextBoundaryFinder)