move the default text breaking algorithm impl from HarfBuzz to Qt

there are several reasons to do this:
* text breaking is not a shaper's job;
* since the text breaking rules are bound to a specific Unicode version,
  updating Qt's internal unicode data would require updating the data in HB as well;
* makes porting to HurfBuzz-NG some easier

Change-Id: I0bbf8e8a343bc074696f4ddf2ae4e7fa32a61629
Reviewed-by: Lars Knoll <lars.knoll@nokia.com>
This commit is contained in:
Konstantin Ritt 2012-05-10 10:31:16 +03:00 committed by Qt by Nokia
parent 8c0048a377
commit cbfdec6603
17 changed files with 587 additions and 744 deletions

View File

@ -34,23 +34,7 @@ HB_BEGIN_HEADER
They need to be provided by the application/library
*/
/*
see http://www.unicode.org/reports/tr14/tr14-19.html
we don't use the XX, AI and CB properties and map them to AL instead.
as we don't support any EBDIC based OS'es, NL is ignored and mapped to AL as well.
*/
typedef enum {
HB_LineBreak_OP, HB_LineBreak_CL, HB_LineBreak_QU, HB_LineBreak_GL, HB_LineBreak_NS,
HB_LineBreak_EX, HB_LineBreak_SY, HB_LineBreak_IS, HB_LineBreak_PR, HB_LineBreak_PO,
HB_LineBreak_NU, HB_LineBreak_AL, HB_LineBreak_ID, HB_LineBreak_IN, HB_LineBreak_HY,
HB_LineBreak_BA, HB_LineBreak_BB, HB_LineBreak_B2, HB_LineBreak_ZW, HB_LineBreak_CM,
HB_LineBreak_WJ, HB_LineBreak_H2, HB_LineBreak_H3, HB_LineBreak_JL, HB_LineBreak_JV,
HB_LineBreak_JT, HB_LineBreak_SA, HB_LineBreak_SG,
HB_LineBreak_SP, HB_LineBreak_CR, HB_LineBreak_LF, HB_LineBreak_BK
} HB_LineBreakClass;
typedef enum
typedef enum
{
HB_Mark_NonSpacing, /* Mn */
HB_Mark_SpacingCombining, /* Mc */
@ -90,55 +74,6 @@ typedef enum
HB_Symbol_Other /* So */
} HB_CharCategory;
typedef enum
{
HB_Grapheme_Other,
HB_Grapheme_CR,
HB_Grapheme_LF,
HB_Grapheme_Control,
HB_Grapheme_Extend,
HB_Grapheme_L,
HB_Grapheme_V,
HB_Grapheme_T,
HB_Grapheme_LV,
HB_Grapheme_LVT
} HB_GraphemeClass;
typedef enum
{
HB_Word_Other,
HB_Word_Format,
HB_Word_Katakana,
HB_Word_ALetter,
HB_Word_MidLetter,
HB_Word_MidNum,
HB_Word_Numeric,
HB_Word_ExtendNumLet
} HB_WordClass;
typedef enum
{
HB_Sentence_Other,
HB_Sentence_Sep,
HB_Sentence_Format,
HB_Sentence_Sp,
HB_Sentence_Lower,
HB_Sentence_Upper,
HB_Sentence_OLetter,
HB_Sentence_Numeric,
HB_Sentence_ATerm,
HB_Sentence_STerm,
HB_Sentence_Close
} HB_SentenceClass;
HB_GraphemeClass HB_GetGraphemeClass(HB_UChar32 ch);
HB_WordClass HB_GetWordClass(HB_UChar32 ch);
HB_SentenceClass HB_GetSentenceClass(HB_UChar32 ch);
HB_LineBreakClass HB_GetLineBreakClass(HB_UChar32 ch);
void HB_GetGraphemeAndLineBreakClass(HB_UChar32 ch, HB_GraphemeClass *grapheme, HB_LineBreakClass *lineBreak);
void HB_GetUnicodeCharProperties(HB_UChar32 ch, HB_CharCategory *category, int *combiningClass);
HB_CharCategory HB_GetUnicodeCharCategory(HB_UChar32 ch);
int HB_GetUnicodeCharCombiningClass(HB_UChar32 ch);

View File

@ -32,205 +32,6 @@
#define HB_MIN(a, b) ((a) < (b) ? (a) : (b))
#define HB_MAX(a, b) ((a) > (b) ? (a) : (b))
// -----------------------------------------------------------------------------------------------------
//
// The line break algorithm. See http://www.unicode.org/reports/tr14/tr14-13.html
//
// -----------------------------------------------------------------------------------------------------
/* The Unicode algorithm does in our opinion allow line breaks at some
places they shouldn't be allowed. The following changes were thus
made in comparison to the Unicode reference:
EX->AL from DB to IB
SY->AL from DB to IB
SY->PO from DB to IB
SY->PR from DB to IB
SY->OP from DB to IB
AL->PR from DB to IB
AL->PO from DB to IB
PR->PR from DB to IB
PO->PO from DB to IB
PR->PO from DB to IB
PO->PR from DB to IB
HY->PO from DB to IB
HY->PR from DB to IB
HY->OP from DB to IB
NU->EX from PB to IB
EX->PO from DB to IB
*/
// The following line break classes are not treated by the table:
// AI, BK, CB, CR, LF, NL, SA, SG, SP, XX
enum break_class {
// the first 4 values have to agree with the enum in QCharAttributes
ProhibitedBreak, // PB in table
DirectBreak, // DB in table
IndirectBreak, // IB in table
CombiningIndirectBreak, // CI in table
CombiningProhibitedBreak // CP in table
};
#define DB DirectBreak
#define IB IndirectBreak
#define CI CombiningIndirectBreak
#define CP CombiningProhibitedBreak
#define PB ProhibitedBreak
static const hb_uint8 breakTable[HB_LineBreak_JT+1][HB_LineBreak_JT+1] =
{
/* OP CL QU GL NS EX SY IS PR PO NU AL ID IN HY BA BB B2 ZW CM WJ H2 H3 JL JV JT */
/* OP */ { PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, CP, PB, PB, PB, PB, PB, PB },
/* CL */ { DB, PB, IB, IB, PB, PB, PB, PB, IB, IB, IB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
/* QU */ { PB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, PB, CI, PB, IB, IB, IB, IB, IB },
/* GL */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, PB, CI, PB, IB, IB, IB, IB, IB },
/* NS */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, DB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
/* EX */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
/* SY */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
/* IS */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, DB, IB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
/* PR */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, DB, IB, IB, DB, DB, PB, CI, PB, IB, IB, IB, IB, IB },
/* PO */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
/* NU */ { IB, PB, IB, IB, IB, IB, PB, PB, IB, IB, IB, IB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
/* AL */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
/* ID */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
/* IN */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
/* HY */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, DB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
/* BA */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, DB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
/* BB */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, PB, CI, PB, IB, IB, IB, IB, IB },
/* B2 */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, DB, DB, DB, IB, IB, DB, PB, PB, CI, PB, DB, DB, DB, DB, DB },
/* ZW */ { DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, PB, DB, DB, DB, DB, DB, DB, DB },
/* CM */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, DB, IB, IB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
/* WJ */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, PB, CI, PB, IB, IB, IB, IB, IB },
/* H2 */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, IB, IB },
/* H3 */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, IB },
/* JL */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, IB, IB, IB, IB, DB },
/* JV */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, IB, IB },
/* JT */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, IB }
};
#undef DB
#undef IB
#undef CI
#undef CP
#undef PB
static const hb_uint8 graphemeTable[HB_Grapheme_LVT + 1][HB_Grapheme_LVT + 1] =
{
// Other, CR, LF, Control,Extend,L, V, T, LV, LVT
{ true , true , true , true , true , true , true , true , true , true }, // Other,
{ true , true , true , true , true , true , true , true , true , true }, // CR,
{ true , false, true , true , true , true , true , true , true , true }, // LF,
{ true , true , true , true , true , true , true , true , true , true }, // Control,
{ false, true , true , true , false, false, false, false, false, false }, // Extend,
{ true , true , true , true , true , false, true , true , true , true }, // L,
{ true , true , true , true , true , false, false, true , false, true }, // V,
{ true , true , true , true , true , true , false, false, false, false }, // T,
{ true , true , true , true , true , false, true , true , true , true }, // LV,
{ true , true , true , true , true , false, true , true , true , true }, // LVT
};
static void calcLineBreaks(const HB_UChar16 *uc, hb_uint32 len, HB_CharAttributes *charAttributes)
{
if (!len)
return;
// ##### can this fail if the first char is a surrogate?
HB_LineBreakClass cls;
HB_GraphemeClass grapheme;
HB_GetGraphemeAndLineBreakClass(*uc, &grapheme, &cls);
// handle case where input starts with an LF
if (cls == HB_LineBreak_LF)
cls = HB_LineBreak_BK;
charAttributes[0].whiteSpace = (cls == HB_LineBreak_SP || cls == HB_LineBreak_BK);
charAttributes[0].charStop = true;
int lcls = cls;
for (hb_uint32 i = 1; i < len; ++i) {
charAttributes[i].whiteSpace = false;
charAttributes[i].charStop = true;
HB_UChar32 code = uc[i];
HB_GraphemeClass ngrapheme;
HB_LineBreakClass ncls;
HB_GetGraphemeAndLineBreakClass(code, &ngrapheme, &ncls);
charAttributes[i].charStop = graphemeTable[ngrapheme][grapheme];
// handle surrogates
if (ncls == HB_LineBreak_SG) {
if (HB_IsHighSurrogate(uc[i]) && i < len - 1 && HB_IsLowSurrogate(uc[i+1])) {
continue;
} else if (HB_IsLowSurrogate(uc[i]) && HB_IsHighSurrogate(uc[i-1])) {
code = HB_SurrogateToUcs4(uc[i-1], uc[i]);
HB_GetGraphemeAndLineBreakClass(code, &ngrapheme, &ncls);
charAttributes[i].charStop = false;
} else {
ncls = HB_LineBreak_AL;
}
}
// set white space and char stop flag
if (ncls >= HB_LineBreak_SP)
charAttributes[i].whiteSpace = true;
HB_LineBreakType lineBreakType = HB_NoBreak;
if (cls >= HB_LineBreak_LF) {
lineBreakType = HB_ForcedBreak;
} else if(cls == HB_LineBreak_CR) {
lineBreakType = (ncls == HB_LineBreak_LF) ? HB_NoBreak : HB_ForcedBreak;
}
if (ncls == HB_LineBreak_SP)
goto next_no_cls_update;
if (ncls >= HB_LineBreak_CR)
goto next;
{
int tcls = ncls;
// for south east asian chars that require a complex (dictionary analysis), the unicode
// standard recommends to treat them as AL. thai_attributes and other attribute methods that
// do dictionary analysis can override
if (tcls >= HB_LineBreak_SA)
tcls = HB_LineBreak_AL;
if (cls >= HB_LineBreak_SA)
cls = HB_LineBreak_AL;
int brk = breakTable[cls][tcls];
switch (brk) {
case DirectBreak:
lineBreakType = HB_Break;
if (uc[i-1] == 0xad) // soft hyphen
lineBreakType = HB_SoftHyphen;
break;
case IndirectBreak:
lineBreakType = (lcls == HB_LineBreak_SP) ? HB_Break : HB_NoBreak;
break;
case CombiningIndirectBreak:
lineBreakType = HB_NoBreak;
if (lcls == HB_LineBreak_SP){
if (i > 1)
charAttributes[i-2].lineBreakType = HB_Break;
} else {
goto next_no_cls_update;
}
break;
case CombiningProhibitedBreak:
lineBreakType = HB_NoBreak;
if (lcls != HB_LineBreak_SP)
goto next_no_cls_update;
case ProhibitedBreak:
default:
break;
}
}
next:
cls = ncls;
next_no_cls_update:
lcls = ncls;
grapheme = ngrapheme;
charAttributes[i-1].lineBreakType = lineBreakType;
}
charAttributes[len-1].lineBreakType = HB_ForcedBreak;
}
// --------------------------------------------------------------------------------------------------------------------------------------------
//
// Basic processing
@ -679,13 +480,12 @@ const HB_ScriptEngine HB_ScriptEngines[] = {
{ HB_ArabicShape, 0}
};
void HB_GetCharAttributes(const HB_UChar16 *string, hb_uint32 stringLength,
const HB_ScriptItem *items, hb_uint32 numItems,
HB_CharAttributes *attributes)
void HB_GetTailoredCharAttributes(const HB_UChar16 *string, hb_uint32 stringLength,
const HB_ScriptItem *items, hb_uint32 numItems,
HB_CharAttributes *attributes)
{
memset(attributes, 0, stringLength * sizeof(HB_CharAttributes));
calcLineBreaks(string, stringLength, attributes);
if (stringLength == 0)
return;
for (hb_uint32 i = 0; i < numItems; ++i) {
HB_Script script = items[i].script;
if (script == HB_Script_Inherited)
@ -698,136 +498,6 @@ void HB_GetCharAttributes(const HB_UChar16 *string, hb_uint32 stringLength,
}
enum BreakRule { NoBreak = 0, Break = 1, Middle = 2 };
static const hb_uint8 wordbreakTable[HB_Word_ExtendNumLet + 1][HB_Word_ExtendNumLet + 1] = {
// Other Format Katakana ALetter MidLetter MidNum Numeric ExtendNumLet
{ Break, Break, Break, Break, Break, Break, Break, Break }, // Other
{ Break, Break, Break, Break, Break, Break, Break, Break }, // Format
{ Break, Break, NoBreak, Break, Break, Break, Break, NoBreak }, // Katakana
{ Break, Break, Break, NoBreak, Middle, Break, NoBreak, NoBreak }, // ALetter
{ Break, Break, Break, Break, Break, Break, Break, Break }, // MidLetter
{ Break, Break, Break, Break, Break, Break, Break, Break }, // MidNum
{ Break, Break, Break, NoBreak, Break, Middle, NoBreak, NoBreak }, // Numeric
{ Break, Break, NoBreak, NoBreak, Break, Break, NoBreak, NoBreak }, // ExtendNumLet
};
void HB_GetWordBoundaries(const HB_UChar16 *string, hb_uint32 stringLength,
const HB_ScriptItem * /*items*/, hb_uint32 /*numItems*/,
HB_CharAttributes *attributes)
{
if (stringLength == 0)
return;
unsigned int brk = HB_GetWordClass(string[0]);
attributes[0].wordBoundary = true;
for (hb_uint32 i = 1; i < stringLength; ++i) {
if (!attributes[i].charStop) {
attributes[i].wordBoundary = false;
continue;
}
hb_uint32 nbrk = HB_GetWordClass(string[i]);
if (nbrk == HB_Word_Format) {
attributes[i].wordBoundary = (HB_GetSentenceClass(string[i-1]) == HB_Sentence_Sep);
continue;
}
BreakRule rule = (BreakRule)wordbreakTable[brk][nbrk];
if (rule == Middle) {
rule = Break;
hb_uint32 lookahead = i + 1;
while (lookahead < stringLength) {
hb_uint32 testbrk = HB_GetWordClass(string[lookahead]);
if (testbrk == HB_Word_Format && HB_GetSentenceClass(string[lookahead]) != HB_Sentence_Sep) {
++lookahead;
continue;
}
if (testbrk == brk) {
rule = NoBreak;
while (i < lookahead)
attributes[i++].wordBoundary = false;
nbrk = testbrk;
}
break;
}
}
attributes[i].wordBoundary = (rule == Break);
brk = nbrk;
}
}
enum SentenceBreakStates {
SB_Initial,
SB_Upper,
SB_UpATerm,
SB_ATerm,
SB_ATermC,
SB_ACS,
SB_STerm,
SB_STermC,
SB_SCS,
SB_BAfter,
SB_Break,
SB_Look
};
static const hb_uint8 sentenceBreakTable[HB_Sentence_Close + 1][HB_Sentence_Close + 1] = {
// Other Sep Format Sp Lower Upper OLetter Numeric ATerm STerm Close
{ SB_Initial, SB_BAfter , SB_Initial, SB_Initial, SB_Initial, SB_Upper , SB_Initial, SB_Initial, SB_ATerm , SB_STerm , SB_Initial }, // SB_Initial,
{ SB_Initial, SB_BAfter , SB_Upper , SB_Initial, SB_Initial, SB_Upper , SB_Initial, SB_Initial, SB_UpATerm, SB_STerm , SB_Initial }, // SB_Upper
{ SB_Look , SB_BAfter , SB_UpATerm, SB_ACS , SB_Initial, SB_Upper , SB_Break , SB_Initial, SB_ATerm , SB_STerm , SB_ATermC }, // SB_UpATerm
{ SB_Look , SB_BAfter , SB_ATerm , SB_ACS , SB_Initial, SB_Break , SB_Break , SB_Initial, SB_ATerm , SB_STerm , SB_ATermC }, // SB_ATerm
{ SB_Look , SB_BAfter , SB_ATermC , SB_ACS , SB_Initial, SB_Break , SB_Break , SB_Look , SB_ATerm , SB_STerm , SB_ATermC }, // SB_ATermC,
{ SB_Look , SB_BAfter , SB_ACS , SB_ACS , SB_Initial, SB_Break , SB_Break , SB_Look , SB_ATerm , SB_STerm , SB_Look }, // SB_ACS,
{ SB_Break , SB_BAfter , SB_STerm , SB_SCS , SB_Break , SB_Break , SB_Break , SB_Break , SB_ATerm , SB_STerm , SB_STermC }, // SB_STerm,
{ SB_Break , SB_BAfter , SB_STermC , SB_SCS , SB_Break , SB_Break , SB_Break , SB_Break , SB_ATerm , SB_STerm , SB_STermC }, // SB_STermC,
{ SB_Break , SB_BAfter , SB_SCS , SB_SCS , SB_Break , SB_Break , SB_Break , SB_Break , SB_ATerm , SB_STerm , SB_Break }, // SB_SCS,
{ SB_Break , SB_Break , SB_Break , SB_Break , SB_Break , SB_Break , SB_Break , SB_Break , SB_Break , SB_Break , SB_Break }, // SB_BAfter,
};
void HB_GetSentenceBoundaries(const HB_UChar16 *string, hb_uint32 stringLength,
const HB_ScriptItem * /*items*/, hb_uint32 /*numItems*/,
HB_CharAttributes *attributes)
{
if (stringLength == 0)
return;
hb_uint32 brk = sentenceBreakTable[SB_Initial][HB_GetSentenceClass(string[0])];
attributes[0].sentenceBoundary = true;
for (hb_uint32 i = 1; i < stringLength; ++i) {
if (!attributes[i].charStop) {
attributes[i].sentenceBoundary = false;
continue;
}
brk = sentenceBreakTable[brk][HB_GetSentenceClass(string[i])];
if (brk == SB_Look) {
brk = SB_Break;
hb_uint32 lookahead = i + 1;
while (lookahead < stringLength) {
hb_uint32 sbrk = HB_GetSentenceClass(string[lookahead]);
if (sbrk != HB_Sentence_Other && sbrk != HB_Sentence_Numeric && sbrk != HB_Sentence_Close) {
break;
} else if (sbrk == HB_Sentence_Lower) {
brk = SB_Initial;
break;
}
++lookahead;
}
if (brk == SB_Initial) {
while (i < lookahead)
attributes[i++].sentenceBoundary = false;
}
}
if (brk == SB_Break) {
attributes[i].sentenceBoundary = true;
brk = sentenceBreakTable[SB_Initial][HB_GetSentenceClass(string[i])];
} else {
attributes[i].sentenceBoundary = false;
}
}
}
static inline char *tag_to_string(HB_UInt tag)
{
static char string[5];

View File

@ -143,19 +143,9 @@ typedef struct {
hb_bitfield unused :2;
} HB_CharAttributes;
void HB_GetCharAttributes(const HB_UChar16 *string, hb_uint32 stringLength,
const HB_ScriptItem *items, hb_uint32 numItems,
HB_CharAttributes *attributes);
/* requires HB_GetCharAttributes to be called before */
void HB_GetWordBoundaries(const HB_UChar16 *string, hb_uint32 stringLength,
const HB_ScriptItem *items, hb_uint32 numItems,
HB_CharAttributes *attributes);
/* requires HB_GetCharAttributes to be called before */
void HB_GetSentenceBoundaries(const HB_UChar16 *string, hb_uint32 stringLength,
const HB_ScriptItem *items, hb_uint32 numItems,
HB_CharAttributes *attributes);
void HB_GetTailoredCharAttributes(const HB_UChar16 *string, hb_uint32 stringLength,
const HB_ScriptItem *items, hb_uint32 numItems,
HB_CharAttributes *attributes);
typedef enum {

View File

@ -2,6 +2,6 @@
SUBDIRS =
if QT
SUBDIRS += linebreaking shaping
SUBDIRS += shaping
endif

View File

@ -1,4 +0,0 @@
.deps
linebreaking
*.moc
*.o

View File

@ -1,12 +0,0 @@
check_PROGRAMS = linebreaking
linebreaking_SOURCES = main.cpp harfbuzz-qt.cpp
linebreaking_LDADD = $(QT_GUI_LIBS) $(QT_QTEST_LIBS) ../../src/libharfbuzz-1.la
main.o: main.moc
main.moc: $(srcdir)/main.cpp
$(QT_MOC) -o main.moc $(srcdir)/main.cpp
INCLUDES = -I$(top_srcdir)/src $(FREETYPE_CFLAGS) $(QT_GUI_CFLAGS) $(QT_QTEST_CFLAGS)

View File

@ -1,230 +0,0 @@
/*
* Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies)
*
* This is part of HarfBuzz, an OpenType Layout engine library.
*
* Permission is hereby granted, without written agreement and without
* license or royalty fees, to use, copy, modify, and distribute this
* software and its documentation for any purpose, provided that the
* above copyright notice and the following two paragraphs appear in
* all copies of this software.
*
* IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
* DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
* ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
* IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*
* THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
* FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
* ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
* PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
*/
/*
!!!!!! Warning !!!!!
Please don't save this file in emacs. It contains utf8 text sequences emacs will
silently convert to a series of question marks.
*/
#include <QtTest/QtTest>
#include <QtCore/qdebug.h>
#include <harfbuzz-shaper.h>
static QVector<HB_CharAttributes> getCharAttributes(const QString &str, HB_Script script = HB_Script_Common)
{
QVector<HB_CharAttributes> attrs(str.length());
HB_ScriptItem item;
item.pos = 0;
item.length = str.length();
item.script = script;
HB_GetCharAttributes(str.utf16(), str.length(),
&item, 1,
attrs.data());
return attrs;
}
class tst_CharAttributes : public QObject
{
Q_OBJECT
public:
tst_CharAttributes();
virtual ~tst_CharAttributes();
public slots:
void init();
void cleanup();
private slots:
void lineBreaking();
void charWordStopOnLineSeparator();
void charStopForSurrogatePairs();
void thaiWordBreak();
};
tst_CharAttributes::tst_CharAttributes()
{
}
tst_CharAttributes::~tst_CharAttributes()
{
}
void tst_CharAttributes::init()
{
}
void tst_CharAttributes::cleanup()
{
}
void tst_CharAttributes::lineBreaking()
{
struct Breaks {
const char *utf8;
uchar breaks[32];
};
Breaks brks[] = {
{ "11", { false, 0xff } },
{ "aa", { false, 0xff } },
{ "++", { false, 0xff } },
{ "--", { false, 0xff } },
{ "((", { false, 0xff } },
{ "))", { false, 0xff } },
{ "..", { false, 0xff } },
{ "\"\"", { false, 0xff } },
{ "$$", { false, 0xff } },
{ "!!", { false, 0xff } },
{ "??", { false, 0xff } },
{ ",,", { false, 0xff } },
{ ")()", { true, false, 0xff } },
{ "?!?", { false, false, 0xff } },
{ ".,.", { false, false, 0xff } },
{ "+-+", { false, false, 0xff } },
{ "+=+", { false, false, 0xff } },
{ "+(+", { false, false, 0xff } },
{ "+)+", { false, false, 0xff } },
{ "a b", { false, true, 0xff } },
{ "a(b", { false, false, 0xff } },
{ "a)b", { false, false, 0xff } },
{ "a-b", { false, true, 0xff } },
{ "a.b", { false, false, 0xff } },
{ "a+b", { false, false, 0xff } },
{ "a?b", { false, false, 0xff } },
{ "a!b", { false, false, 0xff } },
{ "a$b", { false, false, 0xff } },
{ "a,b", { false, false, 0xff } },
{ "a/b", { false, false, 0xff } },
{ "1/2", { false, false, 0xff } },
{ "./.", { false, false, 0xff } },
{ ",/,", { false, false, 0xff } },
{ "!/!", { false, false, 0xff } },
{ "\\/\\", { false, false, 0xff } },
{ "1 2", { false, true, 0xff } },
{ "1(2", { false, false, 0xff } },
{ "1)2", { false, false, 0xff } },
{ "1-2", { false, false, 0xff } },
{ "1.2", { false, false, 0xff } },
{ "1+2", { false, false, 0xff } },
{ "1?2", { false, true, 0xff } },
{ "1!2", { false, true, 0xff } },
{ "1$2", { false, false, 0xff } },
{ "1,2", { false, false, 0xff } },
{ "1/2", { false, false, 0xff } },
{ "\330\260\331\216\331\204\331\220\331\203\331\216", { false, false, false, false, false, 0xff } },
{ "\330\247\331\204\331\205 \330\247\331\204\331\205", { false, false, false, true, false, false, 0xff } },
{ "1#2", { false, false, 0xff } },
{ "!#!", { false, false, 0xff } },
{ 0, {} }
};
Breaks *b = brks;
while (b->utf8) {
QString str = QString::fromUtf8(b->utf8);
QVector<HB_CharAttributes> attrs = getCharAttributes(str);
int i;
for (i = 0; i < (int)str.length() - 1; ++i) {
QVERIFY(b->breaks[i] != 0xff);
if ( (attrs[i].lineBreakType != HB_NoBreak) != (bool)b->breaks[i] ) {
qDebug("test case \"%s\" failed at char %d; break type: %d", b->utf8, i, attrs[i].lineBreakType);
QCOMPARE( (attrs[i].lineBreakType != HB_NoBreak), (bool)b->breaks[i] );
}
}
QVERIFY(attrs[i].lineBreakType == HB_ForcedBreak);
QCOMPARE(b->breaks[i], (uchar)0xff);
++b;
}
}
void tst_CharAttributes::charWordStopOnLineSeparator()
{
const QChar lineSeparator(QChar::LineSeparator);
QString txt;
txt.append(lineSeparator);
txt.append(lineSeparator);
QVector<HB_CharAttributes> attrs = getCharAttributes(txt);
QVERIFY(attrs[1].charStop);
}
void tst_CharAttributes::charStopForSurrogatePairs()
{
QString txt;
txt.append("a");
txt.append(0xd87e);
txt.append(0xdc25);
txt.append("b");
QVector<HB_CharAttributes> attrs = getCharAttributes(txt);
QVERIFY(attrs[0].charStop);
QVERIFY(attrs[1].charStop);
QVERIFY(!attrs[2].charStop);
QVERIFY(attrs[3].charStop);
}
void tst_CharAttributes::thaiWordBreak()
{
// สวัสดีครับ นี่เป็นการงทดสอบตัวเอ
QTextCodec *codec = QTextCodec::codecForMib(2259);
QString txt = codec->toUnicode(QByteArray("\xca\xc7\xd1\xca\xb4\xd5\xa4\xc3\xd1\xba\x20\xb9\xd5\xe8\xe0\xbb\xe7\xb9\xa1\xd2\xc3\xb7\xb4\xca\xcd\xba\xb5\xd1\xc7\xe0\xcd\xa7"));
QCOMPARE(txt.length(), 32);
QVector<HB_CharAttributes> attrs = getCharAttributes(txt, HB_Script_Thai);
QVERIFY(attrs[0].lineBreakType == HB_NoBreak);
QVERIFY(attrs[1].lineBreakType == HB_NoBreak);
QVERIFY(attrs[2].lineBreakType == HB_NoBreak);
QVERIFY(attrs[3].lineBreakType == HB_NoBreak);
QVERIFY(attrs[4].lineBreakType == HB_NoBreak);
QVERIFY(attrs[5].lineBreakType == HB_Break);
QVERIFY(attrs[6].lineBreakType == HB_NoBreak);
QVERIFY(attrs[7].lineBreakType == HB_NoBreak);
QVERIFY(attrs[8].lineBreakType == HB_NoBreak);
QVERIFY(attrs[9].lineBreakType == HB_NoBreak);
QVERIFY(attrs[10].lineBreakType == HB_Break);
QVERIFY(attrs[11].lineBreakType == HB_NoBreak);
QVERIFY(attrs[12].lineBreakType == HB_NoBreak);
QVERIFY(attrs[13].lineBreakType == HB_Break);
QVERIFY(attrs[14].lineBreakType == HB_NoBreak);
QVERIFY(attrs[15].lineBreakType == HB_NoBreak);
QVERIFY(attrs[16].lineBreakType == HB_NoBreak);
QVERIFY(attrs[17].lineBreakType == HB_Break);
QVERIFY(attrs[18].lineBreakType == HB_NoBreak);
QVERIFY(attrs[19].lineBreakType == HB_NoBreak);
QVERIFY(attrs[20].lineBreakType == HB_Break);
QVERIFY(attrs[21].lineBreakType == HB_NoBreak);
QVERIFY(attrs[22].lineBreakType == HB_NoBreak);
QVERIFY(attrs[23].lineBreakType == HB_NoBreak);
QVERIFY(attrs[24].lineBreakType == HB_NoBreak);
QVERIFY(attrs[25].lineBreakType == HB_Break);
QVERIFY(attrs[26].lineBreakType == HB_NoBreak);
for (int i = 27; i < 32; ++i)
QVERIFY(attrs[i].lineBreakType == HB_NoBreak);
}
QTEST_MAIN(tst_CharAttributes)
#include "main.moc"

View File

@ -1,7 +1,7 @@
check_PROGRAMS = shaping
shaping_SOURCES = main.cpp ../linebreaking/harfbuzz-qt.cpp
shaping_SOURCES = main.cpp harfbuzz-qt.cpp
shaping_LDADD = $(QT_GUI_LIBS) $(QT_QTEST_LIBS) ../../src/libharfbuzz-1.la
main.o: main.moc

View File

@ -23,21 +23,11 @@
*/
#include <harfbuzz-external.h>
#include <Qt/private/qunicodetables_p.h>
#include <QChar>
#include <QLibrary>
#include <QTextCodec>
extern "C" {
HB_LineBreakClass HB_GetLineBreakClass(HB_UChar32 ch)
{
#if QT_VERSION >= 0x040300
return (HB_LineBreakClass)QUnicodeTables::lineBreakClass(ch);
#else
#error "This test currently requires Qt >= 4.3"
#endif
}
void HB_GetUnicodeCharProperties(HB_UChar32 ch, HB_CharCategory *category, int *combiningClass)
{
*category = (HB_CharCategory)QChar::category(ch);
@ -59,26 +49,6 @@ HB_UChar16 HB_GetMirroredChar(HB_UChar16 ch)
return QChar::mirroredChar(ch);
}
HB_WordClass HB_GetWordClass(HB_UChar32 ch)
{
const QUnicodeTables::Properties *prop = QUnicodeTables::properties(ch);
return (HB_WordClass) prop->wordBreak;
}
HB_SentenceClass HB_GetSentenceClass(HB_UChar32 ch)
{
const QUnicodeTables::Properties *prop = QUnicodeTables::properties(ch);
return (HB_SentenceClass) prop->sentenceBreak;
}
void HB_GetGraphemeAndLineBreakClass(HB_UChar32 ch, HB_GraphemeClass *grapheme, HB_LineBreakClass *lineBreak)
{
const QUnicodeTables::Properties *prop = QUnicodeTables::properties(ch);
*grapheme = (HB_GraphemeClass) prop->graphemeBreak;
*lineBreak = (HB_LineBreakClass) prop->line_break_class;
}
void (*HB_Library_Resolve(const char *library, int version, const char *symbol))()
{
return QLibrary::resolve(library, version, symbol);

View File

@ -39,47 +39,15 @@
**
****************************************************************************/
#include "qharfbuzz_p.h"
#include "qunicodetables_p.h"
#include "qlibrary.h"
#include "qtextcodec.h"
#include "qharfbuzz_p.h"
QT_USE_NAMESPACE
extern "C" {
HB_GraphemeClass HB_GetGraphemeClass(HB_UChar32 ch)
{
const QUnicodeTables::Properties *prop = QUnicodeTables::properties(ch);
return (HB_GraphemeClass) prop->graphemeBreak;
}
HB_WordClass HB_GetWordClass(HB_UChar32 ch)
{
const QUnicodeTables::Properties *prop = QUnicodeTables::properties(ch);
return (HB_WordClass) prop->wordBreak;
}
HB_SentenceClass HB_GetSentenceClass(HB_UChar32 ch)
{
const QUnicodeTables::Properties *prop = QUnicodeTables::properties(ch);
return (HB_SentenceClass) prop->sentenceBreak;
}
HB_LineBreakClass HB_GetLineBreakClass(HB_UChar32 ch)
{
return (HB_LineBreakClass)QUnicodeTables::lineBreakClass(ch);
}
void HB_GetGraphemeAndLineBreakClass(HB_UChar32 ch, HB_GraphemeClass *grapheme, HB_LineBreakClass *lineBreak)
{
const QUnicodeTables::Properties *prop = QUnicodeTables::properties(ch);
*grapheme = (HB_GraphemeClass) prop->graphemeBreak;
*lineBreak = (HB_LineBreakClass) prop->line_break_class;
}
void HB_GetUnicodeCharProperties(HB_UChar32 ch, HB_CharCategory *category, int *combiningClass)
{
const QUnicodeTables::Properties *prop = QUnicodeTables::properties(ch);
@ -135,11 +103,4 @@ void qHBFreeFace(HB_Face face)
HB_FreeFace(face);
}
void qGetCharAttributes(const HB_UChar16 *string, hb_uint32 stringLength,
const HB_ScriptItem *items, hb_uint32 numItems,
HB_CharAttributes *attributes)
{
HB_GetCharAttributes(string, stringLength, items, numItems, attributes);
}
QT_END_NAMESPACE

View File

@ -58,11 +58,6 @@
QT_BEGIN_NAMESPACE
// temporary forward until all the textengine code has been moved to QtCore
Q_CORE_EXPORT void qGetCharAttributes(const HB_UChar16 *string, hb_uint32 stringLength,
const HB_ScriptItem *items, hb_uint32 numItems,
HB_CharAttributes *attributes);
Q_CORE_EXPORT HB_Bool qShapeItem(HB_ShaperItem *item);
// ### temporary

View File

@ -40,9 +40,9 @@
****************************************************************************/
#include <QtCore/qtextboundaryfinder.h>
#include <QtCore/qvarlengtharray.h>
#include <private/qunicodetables_p.h>
#include <qdebug.h>
#include "private/qharfbuzz_p.h"
#include <private/qunicodetools_p.h>
QT_BEGIN_NAMESPACE
@ -93,11 +93,12 @@ static void init(QTextBoundaryFinder::BoundaryType type, const QChar *chars, int
scriptItems.append(item);
}
qGetCharAttributes(string, length, scriptItems.data(), scriptItems.count(), attributes);
QCharAttributeOptions options = 0;
if (type == QTextBoundaryFinder::Word)
HB_GetWordBoundaries(string, length, scriptItems.data(), scriptItems.count(), attributes);
options |= GetWordBreaks;
else if (type == QTextBoundaryFinder::Sentence)
HB_GetSentenceBoundaries(string, length, scriptItems.data(), scriptItems.count(), attributes);
options |= GetSentenceBreaks;
qGetCharAttributes(string, length, scriptItems.data(), scriptItems.count(), attributes, options);
}
/*!

View File

@ -0,0 +1,398 @@
/****************************************************************************
**
** Copyright (C) 2012 Nokia Corporation and/or its subsidiary(-ies).
** Contact: http://www.qt-project.org/
**
** This file is part of the QtCore module of the Qt Toolkit.
**
** $QT_BEGIN_LICENSE:LGPL$
** GNU Lesser General Public License Usage
** This file may be used under the terms of the GNU Lesser General Public
** License version 2.1 as published by the Free Software Foundation and
** appearing in the file LICENSE.LGPL included in the packaging of this
** file. Please review the following information to ensure the GNU Lesser
** General Public License version 2.1 requirements will be met:
** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
**
** In addition, as a special exception, Nokia gives you certain additional
** rights. These rights are described in the Nokia Qt LGPL Exception
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
**
** GNU General Public License Usage
** Alternatively, this file may be used under the terms of the GNU General
** Public License version 3.0 as published by the Free Software Foundation
** and appearing in the file LICENSE.GPL included in the packaging of this
** file. Please review the following information to ensure the GNU General
** Public License version 3.0 requirements will be met:
** http://www.gnu.org/copyleft/gpl.html.
**
** Other Usage
** Alternatively, this file may be used in accordance with the terms and
** conditions contained in a signed written agreement between you and Nokia.
**
**
**
**
**
**
** $QT_END_LICENSE$
**
****************************************************************************/
#include "qunicodetools_p.h"
#include "qunicodetables_p.h"
QT_BEGIN_NAMESPACE
// -----------------------------------------------------------------------------------------------------
//
// The line breaking algorithm. See http://www.unicode.org/reports/tr14/tr14-19.html
//
// -----------------------------------------------------------------------------------------------------
//
// The text boundaries determination algorithm. See http://www.unicode.org/reports/tr29/tr29-11.html
//
// -----------------------------------------------------------------------------------------------------
namespace {
/* The Unicode algorithm does in our opinion allow line breaks at some
places they shouldn't be allowed. The following changes were thus
made in comparison to the Unicode reference:
EX->AL from DB to IB
SY->AL from DB to IB
SY->PO from DB to IB
SY->PR from DB to IB
SY->OP from DB to IB
AL->PR from DB to IB
AL->PO from DB to IB
PR->PR from DB to IB
PO->PO from DB to IB
PR->PO from DB to IB
PO->PR from DB to IB
HY->PO from DB to IB
HY->PR from DB to IB
HY->OP from DB to IB
NU->EX from PB to IB
EX->PO from DB to IB
*/
// The following line break classes are not treated by the table:
// AI, BK, CB, CR, LF, NL, SA, SG, SP, XX
enum LineBreakRule {
ProhibitedBreak, // PB in table
DirectBreak, // DB in table
IndirectBreak, // IB in table
CombiningIndirectBreak, // CI in table
CombiningProhibitedBreak // CP in table
};
#define DB DirectBreak
#define IB IndirectBreak
#define CI CombiningIndirectBreak
#define CP CombiningProhibitedBreak
#define PB ProhibitedBreak
static const uchar lineBreakTable[QUnicodeTables::LineBreak_JT + 1][QUnicodeTables::LineBreak_JT + 1] = {
/* OP CL QU GL NS EX SY IS PR PO NU AL ID IN HY BA BB B2 ZW CM WJ H2 H3 JL JV JT */
/* OP */ { PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, CP, PB, PB, PB, PB, PB, PB },
/* CL */ { DB, PB, IB, IB, PB, PB, PB, PB, IB, IB, IB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
/* QU */ { PB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, PB, CI, PB, IB, IB, IB, IB, IB },
/* GL */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, PB, CI, PB, IB, IB, IB, IB, IB },
/* NS */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, DB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
/* EX */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
/* SY */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
/* IS */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, DB, IB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
/* PR */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, DB, IB, IB, DB, DB, PB, CI, PB, IB, IB, IB, IB, IB },
/* PO */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
/* NU */ { IB, PB, IB, IB, IB, IB, PB, PB, IB, IB, IB, IB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
/* AL */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
/* ID */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
/* IN */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
/* HY */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, DB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
/* BA */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, DB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
/* BB */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, PB, CI, PB, IB, IB, IB, IB, IB },
/* B2 */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, DB, DB, DB, IB, IB, DB, PB, PB, CI, PB, DB, DB, DB, DB, DB },
/* ZW */ { DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, PB, DB, DB, DB, DB, DB, DB, DB },
/* CM */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, DB, IB, IB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
/* WJ */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, PB, CI, PB, IB, IB, IB, IB, IB },
/* H2 */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, IB, IB },
/* H3 */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, IB },
/* JL */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, IB, IB, IB, IB, DB },
/* JV */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, IB, IB },
/* JT */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, IB }
};
#undef DB
#undef IB
#undef CI
#undef CP
#undef PB
static const uchar graphemeBreakTable[QUnicodeTables::GraphemeBreakLVT + 1][QUnicodeTables::GraphemeBreakLVT + 1] = {
// Other, CR, LF, Control, Extend, L, V, T, LV, LVT
{ true , true , true , true , true , true , true , true , true , true }, // Other,
{ true , true , true , true , true , true , true , true , true , true }, // CR,
{ true , false, true , true , true , true , true , true , true , true }, // LF,
{ true , true , true , true , true , true , true , true , true , true }, // Control,
{ false, true , true , true , false, false, false, false, false, false }, // Extend,
{ true , true , true , true , true , false, true , true , true , true }, // L,
{ true , true , true , true , true , false, false, true , false, true }, // V,
{ true , true , true , true , true , true , false, false, false, false }, // T,
{ true , true , true , true , true , false, true , true , true , true }, // LV,
{ true , true , true , true , true , false, true , true , true , true }, // LVT
};
static void calcGraphemeAndLineBreaks(const ushort *string, quint32 len, HB_CharAttributes *attributes)
{
// ##### can this fail if the first char is a surrogate?
const QUnicodeTables::Properties *prop = QUnicodeTables::properties(string[0]);
QUnicodeTables::GraphemeBreak grapheme = (QUnicodeTables::GraphemeBreak) prop->graphemeBreak;
QUnicodeTables::LineBreakClass cls = (QUnicodeTables::LineBreakClass) prop->line_break_class;
// handle case where input starts with an LF
if (cls == QUnicodeTables::LineBreak_LF)
cls = QUnicodeTables::LineBreak_BK;
attributes[0].whiteSpace = (cls == QUnicodeTables::LineBreak_SP || cls == QUnicodeTables::LineBreak_BK);
attributes[0].charStop = true;
int lcls = cls;
for (quint32 i = 1; i < len; ++i) {
attributes[i].whiteSpace = false;
attributes[i].charStop = true;
uint ucs4 = string[i];
prop = QUnicodeTables::properties(ucs4);
QUnicodeTables::GraphemeBreak ngrapheme = (QUnicodeTables::GraphemeBreak) prop->graphemeBreak;
QUnicodeTables::LineBreakClass ncls = (QUnicodeTables::LineBreakClass) prop->line_break_class;
attributes[i].charStop = graphemeBreakTable[ngrapheme][grapheme];
// handle surrogates
if (ncls == QUnicodeTables::LineBreak_SG) {
if (QChar::isHighSurrogate(string[i]) && i < len - 1 && QChar::isLowSurrogate(string[i+1])) {
continue;
} else if (QChar::isLowSurrogate(string[i]) && QChar::isHighSurrogate(string[i-1])) {
ucs4 = QChar::surrogateToUcs4(string[i-1], string[i]);
prop = QUnicodeTables::properties(ucs4);
ngrapheme = (QUnicodeTables::GraphemeBreak) prop->graphemeBreak;
ncls = (QUnicodeTables::LineBreakClass) prop->line_break_class;
attributes[i].charStop = false;
} else {
ncls = QUnicodeTables::LineBreak_AL;
}
}
// set white space and char stop flag
if (ncls >= QUnicodeTables::LineBreak_SP)
attributes[i].whiteSpace = true;
HB_LineBreakType lineBreakType = HB_NoBreak;
if (cls >= QUnicodeTables::LineBreak_LF) {
lineBreakType = HB_ForcedBreak;
} else if (cls == QUnicodeTables::LineBreak_CR) {
lineBreakType = (ncls == QUnicodeTables::LineBreak_LF) ? HB_NoBreak : HB_ForcedBreak;
}
if (ncls == QUnicodeTables::LineBreak_SP)
goto next_no_cls_update;
if (ncls >= QUnicodeTables::LineBreak_CR)
goto next;
{
int tcls = ncls;
// for south east asian chars that require a complex (dictionary analysis), the unicode
// standard recommends to treat them as AL. thai_attributes and other attribute methods that
// do dictionary analysis can override
if (tcls >= QUnicodeTables::LineBreak_SA)
tcls = QUnicodeTables::LineBreak_AL;
if (cls >= QUnicodeTables::LineBreak_SA)
cls = QUnicodeTables::LineBreak_AL;
int brk = lineBreakTable[cls][tcls];
switch (brk) {
case DirectBreak:
lineBreakType = HB_Break;
if (string[i-1] == 0xad) // soft hyphen
lineBreakType = HB_SoftHyphen;
break;
case IndirectBreak:
lineBreakType = (lcls == QUnicodeTables::LineBreak_SP) ? HB_Break : HB_NoBreak;
break;
case CombiningIndirectBreak:
lineBreakType = HB_NoBreak;
if (lcls == QUnicodeTables::LineBreak_SP){
if (i > 1)
attributes[i-2].lineBreakType = HB_Break;
} else {
goto next_no_cls_update;
}
break;
case CombiningProhibitedBreak:
lineBreakType = HB_NoBreak;
if (lcls != QUnicodeTables::LineBreak_SP)
goto next_no_cls_update;
case ProhibitedBreak:
default:
break;
}
}
next:
cls = ncls;
next_no_cls_update:
lcls = ncls;
grapheme = ngrapheme;
attributes[i-1].lineBreakType = lineBreakType;
}
attributes[len-1].lineBreakType = HB_ForcedBreak;
}
enum WordBreakRule { NoBreak = 0, Break = 1, Middle = 2 };
static const uchar wordBreakTable[QUnicodeTables::WordBreakExtendNumLet + 1][QUnicodeTables::WordBreakExtendNumLet + 1] = {
// Other Format Katakana ALetter MidLetter MidNum Numeric ExtendNumLet
{ Break , Break , Break , Break , Break , Break , Break , Break }, // Other
{ Break , Break , Break , Break , Break , Break , Break , Break }, // Format
{ Break , Break , NoBreak, Break , Break , Break , Break , NoBreak }, // Katakana
{ Break , Break , Break , NoBreak, Middle , Break , NoBreak, NoBreak }, // ALetter
{ Break , Break , Break , Break , Break , Break , Break , Break }, // MidLetter
{ Break , Break , Break , Break , Break , Break , Break , Break }, // MidNum
{ Break , Break , Break , NoBreak, Break , Middle , NoBreak, NoBreak }, // Numeric
{ Break , Break , NoBreak, NoBreak, Break , Break , NoBreak, NoBreak }, // ExtendNumLet
};
static void calcWordBreaks(const ushort *string, quint32 len, HB_CharAttributes *attributes)
{
quint32 brk = QUnicodeTables::wordBreakClass(string[0]);
attributes[0].wordBoundary = true;
for (quint32 i = 1; i < len; ++i) {
if (!attributes[i].charStop) {
attributes[i].wordBoundary = false;
continue;
}
quint32 nbrk = QUnicodeTables::wordBreakClass(string[i]);
if (nbrk == QUnicodeTables::WordBreakFormat) {
attributes[i].wordBoundary = (QUnicodeTables::sentenceBreakClass(string[i-1]) == QUnicodeTables::SentenceBreakSep);
continue;
}
WordBreakRule rule = (WordBreakRule)wordBreakTable[brk][nbrk];
if (rule == Middle) {
rule = Break;
quint32 lookahead = i + 1;
while (lookahead < len) {
quint32 testbrk = QUnicodeTables::wordBreakClass(string[lookahead]);
if (testbrk == QUnicodeTables::WordBreakFormat
&& QUnicodeTables::sentenceBreakClass(string[lookahead]) != QUnicodeTables::SentenceBreakSep) {
++lookahead;
continue;
}
if (testbrk == brk) {
rule = NoBreak;
while (i < lookahead)
attributes[i++].wordBoundary = false;
nbrk = testbrk;
}
break;
}
}
attributes[i].wordBoundary = (rule == Break);
brk = nbrk;
}
}
enum SentenceBreakState {
SB_Initial,
SB_Upper,
SB_UpATerm,
SB_ATerm,
SB_ATermC,
SB_ACS,
SB_STerm,
SB_STermC,
SB_SCS,
SB_BAfter,
SB_Break,
SB_Lookup
};
static const uchar sentenceBreakTable[SB_Lookup + 1][QUnicodeTables::SentenceBreakClose + 1] = {
// Other Sep Format Sp Lower Upper OLetter Numeric ATerm STerm Close
{ SB_Initial, SB_BAfter , SB_Initial, SB_Initial, SB_Initial, SB_Upper , SB_Initial, SB_Initial, SB_ATerm , SB_STerm , SB_Initial }, // SB_Initial,
{ SB_Initial, SB_BAfter , SB_Upper , SB_Initial, SB_Initial, SB_Upper , SB_Initial, SB_Initial, SB_UpATerm, SB_STerm , SB_Initial }, // SB_Upper
{ SB_Lookup , SB_BAfter , SB_UpATerm, SB_ACS , SB_Initial, SB_Upper , SB_Break , SB_Initial, SB_ATerm , SB_STerm , SB_ATermC }, // SB_UpATerm
{ SB_Lookup , SB_BAfter , SB_ATerm , SB_ACS , SB_Initial, SB_Break , SB_Break , SB_Initial, SB_ATerm , SB_STerm , SB_ATermC }, // SB_ATerm
{ SB_Lookup , SB_BAfter , SB_ATermC , SB_ACS , SB_Initial, SB_Break , SB_Break , SB_Lookup , SB_ATerm , SB_STerm , SB_ATermC }, // SB_ATermC,
{ SB_Lookup , SB_BAfter , SB_ACS , SB_ACS , SB_Initial, SB_Break , SB_Break , SB_Lookup , SB_ATerm , SB_STerm , SB_Lookup }, // SB_ACS,
{ SB_Break , SB_BAfter , SB_STerm , SB_SCS , SB_Break , SB_Break , SB_Break , SB_Break , SB_ATerm , SB_STerm , SB_STermC }, // SB_STerm,
{ SB_Break , SB_BAfter , SB_STermC , SB_SCS , SB_Break , SB_Break , SB_Break , SB_Break , SB_ATerm , SB_STerm , SB_STermC }, // SB_STermC,
{ SB_Break , SB_BAfter , SB_SCS , SB_SCS , SB_Break , SB_Break , SB_Break , SB_Break , SB_ATerm , SB_STerm , SB_Break }, // SB_SCS,
{ SB_Break , SB_Break , SB_Break , SB_Break , SB_Break , SB_Break , SB_Break , SB_Break , SB_Break , SB_Break , SB_Break }, // SB_BAfter,
};
static void calcSentenceBreaks(const ushort *string, quint32 len, HB_CharAttributes *attributes)
{
quint32 brk = sentenceBreakTable[SB_Initial][QUnicodeTables::sentenceBreakClass(string[0])];
attributes[0].sentenceBoundary = true;
for (quint32 i = 1; i < len; ++i) {
if (!attributes[i].charStop) {
attributes[i].sentenceBoundary = false;
continue;
}
brk = sentenceBreakTable[brk][QUnicodeTables::sentenceBreakClass(string[i])];
if (brk == SB_Lookup) {
brk = SB_Break;
quint32 lookahead = i + 1;
while (lookahead < len) {
quint32 sbrk = QUnicodeTables::sentenceBreakClass(string[lookahead]);
if (sbrk != QUnicodeTables::SentenceBreakOther
&& sbrk != QUnicodeTables::SentenceBreakNumeric
&& sbrk != QUnicodeTables::SentenceBreakClose) {
break;
} else if (sbrk == QUnicodeTables::SentenceBreakLower) {
brk = SB_Initial;
break;
}
++lookahead;
}
if (brk == SB_Initial) {
while (i < lookahead)
attributes[i++].sentenceBoundary = false;
}
}
if (brk == SB_Break) {
attributes[i].sentenceBoundary = true;
brk = sentenceBreakTable[SB_Initial][QUnicodeTables::sentenceBreakClass(string[i])];
} else {
attributes[i].sentenceBoundary = false;
}
}
}
} // namespace
Q_CORE_EXPORT void qGetCharAttributes(const ushort *string, int length,
const HB_ScriptItem *items, int numItems,
HB_CharAttributes *attributes, QCharAttributeOptions options)
{
if (length <= 0)
return;
memset(attributes, 0, length * sizeof(HB_CharAttributes));
calcGraphemeAndLineBreaks(string, length, attributes);
if (options & GetWordBreaks)
calcWordBreaks(string, length, attributes);
if (options & GetSentenceBreaks)
calcSentenceBreaks(string, length, attributes);
HB_GetTailoredCharAttributes(string, length, items, numItems, attributes);
}
QT_END_NAMESPACE

View File

@ -0,0 +1,76 @@
/****************************************************************************
**
** Copyright (C) 2012 Nokia Corporation and/or its subsidiary(-ies).
** Contact: http://www.qt-project.org/
**
** This file is part of the QtCore module of the Qt Toolkit.
**
** $QT_BEGIN_LICENSE:LGPL$
** GNU Lesser General Public License Usage
** This file may be used under the terms of the GNU Lesser General Public
** License version 2.1 as published by the Free Software Foundation and
** appearing in the file LICENSE.LGPL included in the packaging of this
** file. Please review the following information to ensure the GNU Lesser
** General Public License version 2.1 requirements will be met:
** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
**
** In addition, as a special exception, Nokia gives you certain additional
** rights. These rights are described in the Nokia Qt LGPL Exception
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
**
** GNU General Public License Usage
** Alternatively, this file may be used under the terms of the GNU General
** Public License version 3.0 as published by the Free Software Foundation
** and appearing in the file LICENSE.GPL included in the packaging of this
** file. Please review the following information to ensure the GNU General
** Public License version 3.0 requirements will be met:
** http://www.gnu.org/copyleft/gpl.html.
**
** Other Usage
** Alternatively, this file may be used in accordance with the terms and
** conditions contained in a signed written agreement between you and Nokia.
**
**
**
**
**
**
** $QT_END_LICENSE$
**
****************************************************************************/
#ifndef QUNICODETOOLS_P_H
#define QUNICODETOOLS_P_H
//
// W A R N I N G
// -------------
//
// This file is not part of the Qt API. It exists for the convenience
// of other Qt classes. This header file may change from version to
// version without notice, or even be removed.
//
// We mean it.
//
#include <QtCore/qglobal.h>
#include <harfbuzz-shaper.h>
QT_BEGIN_NAMESPACE
Q_DECLARE_TYPEINFO(HB_CharAttributes, Q_PRIMITIVE_TYPE);
Q_DECLARE_TYPEINFO(HB_ScriptItem, Q_PRIMITIVE_TYPE);
enum QCharAttributeOption {
GetWordBreaks = 1,
GetSentenceBreaks = 2
};
Q_DECLARE_FLAGS(QCharAttributeOptions, QCharAttributeOption)
Q_CORE_EXPORT void qGetCharAttributes(const ushort *string, int length,
const HB_ScriptItem *items, int numItems,
HB_CharAttributes *attributes, QCharAttributeOptions options = QFlag(0));
QT_END_NAMESPACE
#endif // QUNICODETOOLS_P_H

View File

@ -55,6 +55,7 @@ HEADERS += \
tools/qtimeline.h \
tools/qelapsedtimer.h \
tools/qunicodetables_p.h \
tools/qunicodetools_p.h \
tools/qvarlengtharray.h \
tools/qvector.h
@ -92,6 +93,7 @@ SOURCES += \
tools/qstringlist.cpp \
tools/qtextboundaryfinder.cpp \
tools/qtimeline.cpp \
tools/qunicodetools.cpp \
tools/qvector.cpp \
tools/qvsnprintf.cpp

View File

@ -52,6 +52,7 @@
#include "qfontengine_p.h"
#include "qstring.h"
#include <private/qunicodetables_p.h>
#include <private/qunicodetools_p.h>
#include "qtextdocument_p.h"
#include "qrawfont.h"
#include "qrawfont_p.h"

View File

@ -42,6 +42,7 @@
#include <QtTest/QtTest>
#include <qtextboundaryfinder.h>
#include <qtextcodec.h>
#include <qfile.h>
#include <qdebug.h>
@ -61,6 +62,7 @@ private slots:
void toNextBoundary();
void toPreviousBoundary_data();
void toPreviousBoundary();
void thaiLineBreak();
};
void tst_QTextBoundaryFinder::init()
@ -382,7 +384,95 @@ void tst_QTextBoundaryFinder::toPreviousBoundary()
QCOMPARE(boundaries, foundBoundaries);
}
#include <qlibrary.h>
#define LIBTHAI_MAJOR 0
typedef int (*th_brk_def) (const unsigned char*, int*, size_t);
static th_brk_def th_brk = 0;
static bool init_libthai()
{
#if !defined(QT_NO_LIBRARY)
static bool triedResolve = false;
if (!triedResolve) {
th_brk = (th_brk_def) QLibrary::resolve("thai", (int)LIBTHAI_MAJOR, "th_brk");
triedResolve = true;
}
#endif
return th_brk != 0;
}
void tst_QTextBoundaryFinder::thaiLineBreak()
{
if (!init_libthai())
QSKIP("This test requires libThai-0.1.1x to be installed.");
#if 0
// สวัสดีครับ นี่เป็นการงทดสอบตัวเอ
QTextCodec *codec = QTextCodec::codecForMib(2259);
QString text = codec->toUnicode(QByteArray("\xca\xc7\xd1\xca\xb4\xd5\xa4\xc3\xd1\xba\x20\xb9\xd5\xe8\xe0\xbb\xe7\xb9\xa1\xd2\xc3\xb7\xb4\xca\xcd\xba\xb5\xd1\xc7\xe0\xcd\xa7"));
QCOMPARE(text.length(), 32);
QTextBoundaryFinder finder(QTextBoundaryFinder::Line, text);
finder.setPosition(0);
QVERIFY(finder.isAtBoundary());
finder.setPosition(1);
QVERIFY(!finder.isAtBoundary());
finder.setPosition(2);
QVERIFY(!finder.isAtBoundary());
finder.setPosition(3);
QVERIFY(!finder.isAtBoundary());
finder.setPosition(4);
QVERIFY(!finder.isAtBoundary());
finder.setPosition(5);
QVERIFY(!finder.isAtBoundary());
finder.setPosition(6);
QVERIFY(finder.isAtBoundary());
finder.setPosition(7);
QVERIFY(finder.isAtBoundary());
finder.setPosition(8);
QVERIFY(!finder.isAtBoundary());
finder.setPosition(9);
QVERIFY(!finder.isAtBoundary());
finder.setPosition(10);
QVERIFY(!finder.isAtBoundary());
finder.setPosition(11);
QVERIFY(finder.isAtBoundary());
finder.setPosition(12);
QVERIFY(!finder.isAtBoundary());
finder.setPosition(13);
QVERIFY(!finder.isAtBoundary());
finder.setPosition(14);
QVERIFY(finder.isAtBoundary());
finder.setPosition(15);
QVERIFY(!finder.isAtBoundary());
finder.setPosition(16);
QVERIFY(!finder.isAtBoundary());
finder.setPosition(17);
QVERIFY(!finder.isAtBoundary());
finder.setPosition(18);
QVERIFY(finder.isAtBoundary());
finder.setPosition(19);
QVERIFY(!finder.isAtBoundary());
finder.setPosition(20);
QVERIFY(finder.isAtBoundary());
finder.setPosition(21);
QVERIFY(finder.isAtBoundary());
finder.setPosition(22);
QVERIFY(!finder.isAtBoundary());
finder.setPosition(23);
QVERIFY(!finder.isAtBoundary());
finder.setPosition(24);
QVERIFY(!finder.isAtBoundary());
finder.setPosition(25);
QVERIFY(finder.isAtBoundary());
finder.setPosition(26);
QVERIFY(finder.isAtBoundary());
for (int i = 27; i < 32; ++i) {
finder.setPosition(i);
QVERIFY(!finder.isAtBoundary());
}
#endif
}
QTEST_MAIN(tst_QTextBoundaryFinder)