ICU-10688 branch, work in progress.
X-SVN-Rev: 40686
This commit is contained in:
parent
c67d9d0a4a
commit
ca7b62180e
@ -59,58 +59,47 @@ LanguageBreakFactory::~LanguageBreakFactory() {
|
|||||||
******************************************************************
|
******************************************************************
|
||||||
*/
|
*/
|
||||||
|
|
||||||
UnhandledEngine::UnhandledEngine(UErrorCode &/*status*/) {
|
UnhandledEngine::UnhandledEngine(UErrorCode &status) : fHandled(nullptr) {
|
||||||
for (int32_t i = 0; i < UPRV_LENGTHOF(fHandled); ++i) {
|
(void)status;
|
||||||
fHandled[i] = 0;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
UnhandledEngine::~UnhandledEngine() {
|
UnhandledEngine::~UnhandledEngine() {
|
||||||
for (int32_t i = 0; i < UPRV_LENGTHOF(fHandled); ++i) {
|
delete fHandled;
|
||||||
if (fHandled[i] != 0) {
|
fHandled = nullptr;
|
||||||
delete fHandled[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
UBool
|
UBool
|
||||||
UnhandledEngine::handles(UChar32 c, int32_t breakType) const {
|
UnhandledEngine::handles(UChar32 c) const {
|
||||||
return (breakType >= 0 && breakType < UPRV_LENGTHOF(fHandled)
|
return fHandled && fHandled->contains(c);
|
||||||
&& fHandled[breakType] != 0 && fHandled[breakType]->contains(c));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int32_t
|
int32_t
|
||||||
UnhandledEngine::findBreaks( UText *text,
|
UnhandledEngine::findBreaks( UText *text,
|
||||||
int32_t /* startPos */,
|
int32_t /* startPos */,
|
||||||
int32_t endPos,
|
int32_t endPos,
|
||||||
int32_t breakType,
|
|
||||||
UVector32 &/*foundBreaks*/ ) const {
|
UVector32 &/*foundBreaks*/ ) const {
|
||||||
if (breakType >= 0 && breakType < UPRV_LENGTHOF(fHandled)) {
|
UChar32 c = utext_current32(text);
|
||||||
UChar32 c = utext_current32(text);
|
while((int32_t)utext_getNativeIndex(text) < endPos && fHandled->contains(c)) {
|
||||||
while((int32_t)utext_getNativeIndex(text) < endPos && fHandled[breakType]->contains(c)) {
|
utext_next32(text); // TODO: recast loop to work with post-increment operations.
|
||||||
utext_next32(text); // TODO: recast loop to work with post-increment operations.
|
c = utext_current32(text);
|
||||||
c = utext_current32(text);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
UnhandledEngine::handleCharacter(UChar32 c, int32_t breakType) {
|
UnhandledEngine::handleCharacter(UChar32 c) {
|
||||||
if (breakType >= 0 && breakType < UPRV_LENGTHOF(fHandled)) {
|
if (fHandled == nullptr) {
|
||||||
if (fHandled[breakType] == 0) {
|
fHandled = new UnicodeSet();
|
||||||
fHandled[breakType] = new UnicodeSet();
|
if (fHandled == nullptr) {
|
||||||
if (fHandled[breakType] == 0) {
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!fHandled[breakType]->contains(c)) {
|
if (!fHandled->contains(c)) {
|
||||||
UErrorCode status = U_ZERO_ERROR;
|
UErrorCode status = U_ZERO_ERROR;
|
||||||
// Apply the entire script of the character.
|
// Apply the entire script of the character.
|
||||||
int32_t script = u_getIntPropertyValue(c, UCHAR_SCRIPT);
|
int32_t script = u_getIntPropertyValue(c, UCHAR_SCRIPT);
|
||||||
fHandled[breakType]->applyIntPropertyValue(UCHAR_SCRIPT, script, status);
|
fHandled->applyIntPropertyValue(UCHAR_SCRIPT, script, status);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -138,7 +127,7 @@ U_NAMESPACE_BEGIN
|
|||||||
static UMutex gBreakEngineMutex = U_MUTEX_INITIALIZER;
|
static UMutex gBreakEngineMutex = U_MUTEX_INITIALIZER;
|
||||||
|
|
||||||
const LanguageBreakEngine *
|
const LanguageBreakEngine *
|
||||||
ICULanguageBreakFactory::getEngineFor(UChar32 c, int32_t breakType) {
|
ICULanguageBreakFactory::getEngineFor(UChar32 c) {
|
||||||
const LanguageBreakEngine *lbe = NULL;
|
const LanguageBreakEngine *lbe = NULL;
|
||||||
UErrorCode status = U_ZERO_ERROR;
|
UErrorCode status = U_ZERO_ERROR;
|
||||||
|
|
||||||
@ -156,14 +145,14 @@ ICULanguageBreakFactory::getEngineFor(UChar32 c, int32_t breakType) {
|
|||||||
int32_t i = fEngines->size();
|
int32_t i = fEngines->size();
|
||||||
while (--i >= 0) {
|
while (--i >= 0) {
|
||||||
lbe = (const LanguageBreakEngine *)(fEngines->elementAt(i));
|
lbe = (const LanguageBreakEngine *)(fEngines->elementAt(i));
|
||||||
if (lbe != NULL && lbe->handles(c, breakType)) {
|
if (lbe != NULL && lbe->handles(c)) {
|
||||||
return lbe;
|
return lbe;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// We didn't find an engine. Create one.
|
// We didn't find an engine. Create one.
|
||||||
lbe = loadEngineFor(c, breakType);
|
lbe = loadEngineFor(c);
|
||||||
if (lbe != NULL) {
|
if (lbe != NULL) {
|
||||||
fEngines->push((void *)lbe, status);
|
fEngines->push((void *)lbe, status);
|
||||||
}
|
}
|
||||||
@ -171,11 +160,11 @@ ICULanguageBreakFactory::getEngineFor(UChar32 c, int32_t breakType) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const LanguageBreakEngine *
|
const LanguageBreakEngine *
|
||||||
ICULanguageBreakFactory::loadEngineFor(UChar32 c, int32_t breakType) {
|
ICULanguageBreakFactory::loadEngineFor(UChar32 c) {
|
||||||
UErrorCode status = U_ZERO_ERROR;
|
UErrorCode status = U_ZERO_ERROR;
|
||||||
UScriptCode code = uscript_getScript(c, &status);
|
UScriptCode code = uscript_getScript(c, &status);
|
||||||
if (U_SUCCESS(status)) {
|
if (U_SUCCESS(status)) {
|
||||||
DictionaryMatcher *m = loadDictionaryMatcherFor(code, breakType);
|
DictionaryMatcher *m = loadDictionaryMatcherFor(code);
|
||||||
if (m != NULL) {
|
if (m != NULL) {
|
||||||
const LanguageBreakEngine *engine = NULL;
|
const LanguageBreakEngine *engine = NULL;
|
||||||
switch(code) {
|
switch(code) {
|
||||||
@ -236,7 +225,7 @@ ICULanguageBreakFactory::loadEngineFor(UChar32 c, int32_t breakType) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
DictionaryMatcher *
|
DictionaryMatcher *
|
||||||
ICULanguageBreakFactory::loadDictionaryMatcherFor(UScriptCode script, int32_t /* brkType */) {
|
ICULanguageBreakFactory::loadDictionaryMatcherFor(UScriptCode script) {
|
||||||
UErrorCode status = U_ZERO_ERROR;
|
UErrorCode status = U_ZERO_ERROR;
|
||||||
// open root from brkitr tree.
|
// open root from brkitr tree.
|
||||||
UResourceBundle *b = ures_open(U_ICUDATA_BRKITR, "", &status);
|
UResourceBundle *b = ures_open(U_ICUDATA_BRKITR, "", &status);
|
||||||
|
@ -54,11 +54,10 @@ class LanguageBreakEngine : public UMemory {
|
|||||||
* a particular kind of break.</p>
|
* a particular kind of break.</p>
|
||||||
*
|
*
|
||||||
* @param c A character which begins a run that the engine might handle
|
* @param c A character which begins a run that the engine might handle
|
||||||
* @param breakType The type of text break which the caller wants to determine
|
|
||||||
* @return TRUE if this engine handles the particular character and break
|
* @return TRUE if this engine handles the particular character and break
|
||||||
* type.
|
* type.
|
||||||
*/
|
*/
|
||||||
virtual UBool handles(UChar32 c, int32_t breakType) const = 0;
|
virtual UBool handles(UChar32 c) const = 0;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* <p>Find any breaks within a run in the supplied text.</p>
|
* <p>Find any breaks within a run in the supplied text.</p>
|
||||||
@ -68,14 +67,12 @@ class LanguageBreakEngine : public UMemory {
|
|||||||
* is capable of handling.
|
* is capable of handling.
|
||||||
* @param startPos The start of the run within the supplied text.
|
* @param startPos The start of the run within the supplied text.
|
||||||
* @param endPos The end of the run within the supplied text.
|
* @param endPos The end of the run within the supplied text.
|
||||||
* @param breakType The type of break desired, or -1.
|
|
||||||
* @param foundBreaks A Vector of int32_t to receive the breaks.
|
* @param foundBreaks A Vector of int32_t to receive the breaks.
|
||||||
* @return The number of breaks found.
|
* @return The number of breaks found.
|
||||||
*/
|
*/
|
||||||
virtual int32_t findBreaks( UText *text,
|
virtual int32_t findBreaks( UText *text,
|
||||||
int32_t startPos,
|
int32_t startPos,
|
||||||
int32_t endPos,
|
int32_t endPos,
|
||||||
int32_t breakType,
|
|
||||||
UVector32 &foundBreaks ) const = 0;
|
UVector32 &foundBreaks ) const = 0;
|
||||||
|
|
||||||
};
|
};
|
||||||
@ -125,11 +122,9 @@ class LanguageBreakFactory : public UMemory {
|
|||||||
*
|
*
|
||||||
* @param c A character that begins a run for which a LanguageBreakEngine is
|
* @param c A character that begins a run for which a LanguageBreakEngine is
|
||||||
* sought.
|
* sought.
|
||||||
* @param breakType The kind of text break for which a LanguageBreakEngine is
|
|
||||||
* sought.
|
|
||||||
* @return A LanguageBreakEngine with the desired characteristics, or 0.
|
* @return A LanguageBreakEngine with the desired characteristics, or 0.
|
||||||
*/
|
*/
|
||||||
virtual const LanguageBreakEngine *getEngineFor(UChar32 c, int32_t breakType) = 0;
|
virtual const LanguageBreakEngine *getEngineFor(UChar32 c) = 0;
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -152,11 +147,11 @@ class UnhandledEngine : public LanguageBreakEngine {
|
|||||||
private:
|
private:
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The sets of characters handled, for each break type
|
* The sets of characters handled.
|
||||||
* @internal
|
* @internal
|
||||||
*/
|
*/
|
||||||
|
|
||||||
UnicodeSet *fHandled[4];
|
UnicodeSet *fHandled;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
|
||||||
@ -176,11 +171,10 @@ class UnhandledEngine : public LanguageBreakEngine {
|
|||||||
* a particular kind of break.</p>
|
* a particular kind of break.</p>
|
||||||
*
|
*
|
||||||
* @param c A character which begins a run that the engine might handle
|
* @param c A character which begins a run that the engine might handle
|
||||||
* @param breakType The type of text break which the caller wants to determine
|
|
||||||
* @return TRUE if this engine handles the particular character and break
|
* @return TRUE if this engine handles the particular character and break
|
||||||
* type.
|
* type.
|
||||||
*/
|
*/
|
||||||
virtual UBool handles(UChar32 c, int32_t breakType) const;
|
virtual UBool handles(UChar32 c) const;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* <p>Find any breaks within a run in the supplied text.</p>
|
* <p>Find any breaks within a run in the supplied text.</p>
|
||||||
@ -190,23 +184,20 @@ class UnhandledEngine : public LanguageBreakEngine {
|
|||||||
* is capable of handling.
|
* is capable of handling.
|
||||||
* @param startPos The start of the run within the supplied text.
|
* @param startPos The start of the run within the supplied text.
|
||||||
* @param endPos The end of the run within the supplied text.
|
* @param endPos The end of the run within the supplied text.
|
||||||
* @param breakType The type of break desired, or -1.
|
|
||||||
* @param foundBreaks An allocated C array of the breaks found, if any
|
* @param foundBreaks An allocated C array of the breaks found, if any
|
||||||
* @return The number of breaks found.
|
* @return The number of breaks found.
|
||||||
*/
|
*/
|
||||||
virtual int32_t findBreaks( UText *text,
|
virtual int32_t findBreaks( UText *text,
|
||||||
int32_t startPos,
|
int32_t startPos,
|
||||||
int32_t endPos,
|
int32_t endPos,
|
||||||
int32_t breakType,
|
|
||||||
UVector32 &foundBreaks ) const;
|
UVector32 &foundBreaks ) const;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* <p>Tell the engine to handle a particular character and break type.</p>
|
* <p>Tell the engine to handle a particular character and break type.</p>
|
||||||
*
|
*
|
||||||
* @param c A character which the engine should handle
|
* @param c A character which the engine should handle
|
||||||
* @param breakType The type of text break for which the engine should handle c
|
|
||||||
*/
|
*/
|
||||||
virtual void handleCharacter(UChar32 c, int32_t breakType);
|
virtual void handleCharacter(UChar32 c);
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -250,11 +241,9 @@ class ICULanguageBreakFactory : public LanguageBreakFactory {
|
|||||||
*
|
*
|
||||||
* @param c A character that begins a run for which a LanguageBreakEngine is
|
* @param c A character that begins a run for which a LanguageBreakEngine is
|
||||||
* sought.
|
* sought.
|
||||||
* @param breakType The kind of text break for which a LanguageBreakEngine is
|
|
||||||
* sought.
|
|
||||||
* @return A LanguageBreakEngine with the desired characteristics, or 0.
|
* @return A LanguageBreakEngine with the desired characteristics, or 0.
|
||||||
*/
|
*/
|
||||||
virtual const LanguageBreakEngine *getEngineFor(UChar32 c, int32_t breakType);
|
virtual const LanguageBreakEngine *getEngineFor(UChar32 c);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
/**
|
/**
|
||||||
@ -263,21 +252,17 @@ protected:
|
|||||||
*
|
*
|
||||||
* @param c A character that begins a run for which a LanguageBreakEngine is
|
* @param c A character that begins a run for which a LanguageBreakEngine is
|
||||||
* sought.
|
* sought.
|
||||||
* @param breakType The kind of text break for which a LanguageBreakEngine is
|
|
||||||
* sought.
|
|
||||||
* @return A LanguageBreakEngine with the desired characteristics, or 0.
|
* @return A LanguageBreakEngine with the desired characteristics, or 0.
|
||||||
*/
|
*/
|
||||||
virtual const LanguageBreakEngine *loadEngineFor(UChar32 c, int32_t breakType);
|
virtual const LanguageBreakEngine *loadEngineFor(UChar32 c);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* <p>Create a DictionaryMatcher for the specified script and break type.</p>
|
* <p>Create a DictionaryMatcher for the specified script and break type.</p>
|
||||||
* @param script An ISO 15924 script code that identifies the dictionary to be
|
* @param script An ISO 15924 script code that identifies the dictionary to be
|
||||||
* created.
|
* created.
|
||||||
* @param breakType The kind of text break for which a dictionary is
|
|
||||||
* sought.
|
|
||||||
* @return A DictionaryMatcher with the desired characteristics, or NULL.
|
* @return A DictionaryMatcher with the desired characteristics, or NULL.
|
||||||
*/
|
*/
|
||||||
virtual DictionaryMatcher *loadDictionaryMatcherFor(UScriptCode script, int32_t breakType);
|
virtual DictionaryMatcher *loadDictionaryMatcherFor(UScriptCode script);
|
||||||
};
|
};
|
||||||
|
|
||||||
U_NAMESPACE_END
|
U_NAMESPACE_END
|
||||||
|
@ -52,7 +52,7 @@ U_NAMESPACE_BEGIN
|
|||||||
// -------------------------------------
|
// -------------------------------------
|
||||||
|
|
||||||
BreakIterator*
|
BreakIterator*
|
||||||
BreakIterator::buildInstance(const Locale& loc, const char *type, int32_t kind, UErrorCode &status)
|
BreakIterator::buildInstance(const Locale& loc, const char *type, UErrorCode &status)
|
||||||
{
|
{
|
||||||
char fnbuff[256];
|
char fnbuff[256];
|
||||||
char ext[4]={'\0'};
|
char ext[4]={'\0'};
|
||||||
@ -121,7 +121,6 @@ BreakIterator::buildInstance(const Locale& loc, const char *type, int32_t kind,
|
|||||||
U_LOCALE_BASED(locBased, *(BreakIterator*)result);
|
U_LOCALE_BASED(locBased, *(BreakIterator*)result);
|
||||||
locBased.setLocaleIDs(ures_getLocaleByType(b, ULOC_VALID_LOCALE, &status),
|
locBased.setLocaleIDs(ures_getLocaleByType(b, ULOC_VALID_LOCALE, &status),
|
||||||
actualLocale.data());
|
actualLocale.data());
|
||||||
result->setBreakType(kind);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ures_close(b);
|
ures_close(b);
|
||||||
@ -413,10 +412,10 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status)
|
|||||||
BreakIterator *result = NULL;
|
BreakIterator *result = NULL;
|
||||||
switch (kind) {
|
switch (kind) {
|
||||||
case UBRK_CHARACTER:
|
case UBRK_CHARACTER:
|
||||||
result = BreakIterator::buildInstance(loc, "grapheme", kind, status);
|
result = BreakIterator::buildInstance(loc, "grapheme", status);
|
||||||
break;
|
break;
|
||||||
case UBRK_WORD:
|
case UBRK_WORD:
|
||||||
result = BreakIterator::buildInstance(loc, "word", kind, status);
|
result = BreakIterator::buildInstance(loc, "word", status);
|
||||||
break;
|
break;
|
||||||
case UBRK_LINE:
|
case UBRK_LINE:
|
||||||
uprv_strcpy(lbType, "line");
|
uprv_strcpy(lbType, "line");
|
||||||
@ -429,10 +428,10 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status)
|
|||||||
uprv_strcat(lbType, lbKeyValue);
|
uprv_strcat(lbType, lbKeyValue);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
result = BreakIterator::buildInstance(loc, lbType, kind, status);
|
result = BreakIterator::buildInstance(loc, lbType, status);
|
||||||
break;
|
break;
|
||||||
case UBRK_SENTENCE:
|
case UBRK_SENTENCE:
|
||||||
result = BreakIterator::buildInstance(loc, "sentence", kind, status);
|
result = BreakIterator::buildInstance(loc, "sentence", status);
|
||||||
#if !UCONFIG_NO_FILTERED_BREAK_ITERATION
|
#if !UCONFIG_NO_FILTERED_BREAK_ITERATION
|
||||||
{
|
{
|
||||||
char ssKeyValue[kKeyValueLenMax] = {0};
|
char ssKeyValue[kKeyValueLenMax] = {0};
|
||||||
@ -449,7 +448,7 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status)
|
|||||||
#endif
|
#endif
|
||||||
break;
|
break;
|
||||||
case UBRK_TITLE:
|
case UBRK_TITLE:
|
||||||
result = BreakIterator::buildInstance(loc, "title", kind, status);
|
result = BreakIterator::buildInstance(loc, "title", status);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
status = U_ILLEGAL_ARGUMENT_ERROR;
|
status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||||
|
@ -37,9 +37,8 @@ DictionaryBreakEngine::~DictionaryBreakEngine() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
UBool
|
UBool
|
||||||
DictionaryBreakEngine::handles(UChar32 c, int32_t breakType) const {
|
DictionaryBreakEngine::handles(UChar32 c) const {
|
||||||
return (breakType >= 0 && breakType < 32 && (((uint32_t)1 << breakType) & fTypes)
|
return fSet.contains(c);
|
||||||
&& fSet.contains(c));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int32_t
|
int32_t
|
||||||
|
@ -42,27 +42,12 @@ class DictionaryBreakEngine : public LanguageBreakEngine {
|
|||||||
|
|
||||||
UnicodeSet fSet;
|
UnicodeSet fSet;
|
||||||
|
|
||||||
/**
|
|
||||||
* The set of break types handled by this engine
|
|
||||||
* @internal
|
|
||||||
*/
|
|
||||||
|
|
||||||
uint32_t fTypes;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* <p>Default constructor.</p>
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
DictionaryBreakEngine();
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* <p>Constructor setting the break types handled.</p>
|
* <p>Constructor </p>
|
||||||
*
|
|
||||||
* @param breakTypes A bitmap of types handled by the engine.
|
|
||||||
*/
|
*/
|
||||||
DictionaryBreakEngine( uint32_t breakTypes );
|
DictionaryBreakEngine();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* <p>Virtual destructor.</p>
|
* <p>Virtual destructor.</p>
|
||||||
@ -78,7 +63,7 @@ class DictionaryBreakEngine : public LanguageBreakEngine {
|
|||||||
* @return TRUE if this engine handles the particular character and break
|
* @return TRUE if this engine handles the particular character and break
|
||||||
* type.
|
* type.
|
||||||
*/
|
*/
|
||||||
virtual UBool handles( UChar32 c, int32_t breakType ) const;
|
virtual UBool handles(UChar32 c) const;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* <p>Find any breaks within a run in the supplied text.</p>
|
* <p>Find any breaks within a run in the supplied text.</p>
|
||||||
@ -88,14 +73,12 @@ class DictionaryBreakEngine : public LanguageBreakEngine {
|
|||||||
* that starts from the first character in the range.
|
* that starts from the first character in the range.
|
||||||
* @param startPos The start of the run within the supplied text.
|
* @param startPos The start of the run within the supplied text.
|
||||||
* @param endPos The end of the run within the supplied text.
|
* @param endPos The end of the run within the supplied text.
|
||||||
* @param breakType The type of break desired, or -1.
|
|
||||||
* @param foundBreaks vector of int32_t to receive the break positions
|
* @param foundBreaks vector of int32_t to receive the break positions
|
||||||
* @return The number of breaks found.
|
* @return The number of breaks found.
|
||||||
*/
|
*/
|
||||||
virtual int32_t findBreaks( UText *text,
|
virtual int32_t findBreaks( UText *text,
|
||||||
int32_t startPos,
|
int32_t startPos,
|
||||||
int32_t endPos,
|
int32_t endPos,
|
||||||
int32_t breakType,
|
|
||||||
UVector32 &foundBreaks ) const;
|
UVector32 &foundBreaks ) const;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
@ -217,7 +217,6 @@ RuleBasedBreakIterator::operator=(const RuleBasedBreakIterator& that) {
|
|||||||
}
|
}
|
||||||
BreakIterator::operator=(that);
|
BreakIterator::operator=(that);
|
||||||
|
|
||||||
fBreakType = that.fBreakType;
|
|
||||||
if (fLanguageBreakEngines != NULL) {
|
if (fLanguageBreakEngines != NULL) {
|
||||||
delete fLanguageBreakEngines;
|
delete fLanguageBreakEngines;
|
||||||
fLanguageBreakEngines = NULL; // Just rebuild for now
|
fLanguageBreakEngines = NULL; // Just rebuild for now
|
||||||
@ -278,11 +277,6 @@ void RuleBasedBreakIterator::init(UErrorCode &status) {
|
|||||||
fRuleStatusIndex = 0;
|
fRuleStatusIndex = 0;
|
||||||
fDone = false;
|
fDone = false;
|
||||||
fDictionaryCharCount = 0;
|
fDictionaryCharCount = 0;
|
||||||
fBreakType = UBRK_WORD; // Defaulting BreakType to word gives reasonable
|
|
||||||
// dictionary behavior for Break Iterators that are
|
|
||||||
// built from rules. Even better would be the ability to
|
|
||||||
// declare the type in the rules.
|
|
||||||
|
|
||||||
fLanguageBreakEngines = NULL;
|
fLanguageBreakEngines = NULL;
|
||||||
fUnhandledBreakEngine = NULL;
|
fUnhandledBreakEngine = NULL;
|
||||||
fBreakCache = NULL;
|
fBreakCache = NULL;
|
||||||
@ -1290,14 +1284,14 @@ RuleBasedBreakIterator::getLanguageBreakEngine(UChar32 c) {
|
|||||||
int32_t i = fLanguageBreakEngines->size();
|
int32_t i = fLanguageBreakEngines->size();
|
||||||
while (--i >= 0) {
|
while (--i >= 0) {
|
||||||
lbe = (const LanguageBreakEngine *)(fLanguageBreakEngines->elementAt(i));
|
lbe = (const LanguageBreakEngine *)(fLanguageBreakEngines->elementAt(i));
|
||||||
if (lbe->handles(c, fBreakType)) {
|
if (lbe->handles(c)) {
|
||||||
return lbe;
|
return lbe;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// No existing dictionary took the character. See if a factory wants to
|
// No existing dictionary took the character. See if a factory wants to
|
||||||
// give us a new LanguageBreakEngine for this character.
|
// give us a new LanguageBreakEngine for this character.
|
||||||
lbe = getLanguageBreakEngineFromFactory(c, fBreakType);
|
lbe = getLanguageBreakEngineFromFactory(c);
|
||||||
|
|
||||||
// If we got one, use it and push it on our stack.
|
// If we got one, use it and push it on our stack.
|
||||||
if (lbe != NULL) {
|
if (lbe != NULL) {
|
||||||
@ -1327,21 +1321,11 @@ RuleBasedBreakIterator::getLanguageBreakEngine(UChar32 c) {
|
|||||||
|
|
||||||
// Tell the reject engine about the character; at its discretion, it may
|
// Tell the reject engine about the character; at its discretion, it may
|
||||||
// add more than just the one character.
|
// add more than just the one character.
|
||||||
fUnhandledBreakEngine->handleCharacter(c, fBreakType);
|
fUnhandledBreakEngine->handleCharacter(c);
|
||||||
|
|
||||||
return fUnhandledBreakEngine;
|
return fUnhandledBreakEngine;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*int32_t RuleBasedBreakIterator::getBreakType() const {
|
|
||||||
return fBreakType;
|
|
||||||
}*/
|
|
||||||
|
|
||||||
void RuleBasedBreakIterator::setBreakType(int32_t type) {
|
|
||||||
fBreakType = type;
|
|
||||||
}
|
|
||||||
|
|
||||||
void RuleBasedBreakIterator::dumpCache() {
|
void RuleBasedBreakIterator::dumpCache() {
|
||||||
fBreakCache->dumpCache();
|
fBreakCache->dumpCache();
|
||||||
}
|
}
|
||||||
|
@ -168,7 +168,7 @@ void RuleBasedBreakIterator::DictionaryCache::populateDictionary(int32_t startPo
|
|||||||
// Ask the language object if there are any breaks. It will add them to the cache and
|
// Ask the language object if there are any breaks. It will add them to the cache and
|
||||||
// leave the text pointer on the other side of its range, ready to search for the next one.
|
// leave the text pointer on the other side of its range, ready to search for the next one.
|
||||||
if (lbe != NULL) {
|
if (lbe != NULL) {
|
||||||
foundBreakCount += lbe->findBreaks(text, rangeStart, rangeEnd, fBI->fBreakType, *fBreaks);
|
foundBreakCount += lbe->findBreaks(text, rangeStart, rangeEnd, *fBreaks);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Reload the loop variables for the next go-round
|
// Reload the loop variables for the next go-round
|
||||||
|
@ -616,7 +616,7 @@ public:
|
|||||||
virtual BreakIterator &refreshInputText(UText *input, UErrorCode &status) = 0;
|
virtual BreakIterator &refreshInputText(UText *input, UErrorCode &status) = 0;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
static BreakIterator* buildInstance(const Locale& loc, const char *type, int32_t kind, UErrorCode& status);
|
static BreakIterator* buildInstance(const Locale& loc, const char *type, UErrorCode& status);
|
||||||
static BreakIterator* createInstance(const Locale& loc, int32_t kind, UErrorCode& status);
|
static BreakIterator* createInstance(const Locale& loc, int32_t kind, UErrorCode& status);
|
||||||
static BreakIterator* makeInstance(const Locale& loc, int32_t kind, UErrorCode& status);
|
static BreakIterator* makeInstance(const Locale& loc, int32_t kind, UErrorCode& status);
|
||||||
|
|
||||||
|
@ -149,13 +149,6 @@ private:
|
|||||||
*/
|
*/
|
||||||
UnhandledEngine *fUnhandledBreakEngine;
|
UnhandledEngine *fUnhandledBreakEngine;
|
||||||
|
|
||||||
/**
|
|
||||||
*
|
|
||||||
* The type of the break iterator, or -1 if it has not been set.
|
|
||||||
* @internal
|
|
||||||
*/
|
|
||||||
int32_t fBreakType;
|
|
||||||
|
|
||||||
//=======================================================================
|
//=======================================================================
|
||||||
// constructors
|
// constructors
|
||||||
//=======================================================================
|
//=======================================================================
|
||||||
@ -645,12 +638,6 @@ private:
|
|||||||
*/
|
*/
|
||||||
void reset(void);
|
void reset(void);
|
||||||
|
|
||||||
/**
|
|
||||||
* Set the type of the break iterator.
|
|
||||||
* @internal
|
|
||||||
*/
|
|
||||||
void setBreakType(int32_t type);
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Common initialization function, used by constructors and bufferClone.
|
* Common initialization function, used by constructors and bufferClone.
|
||||||
* @internal
|
* @internal
|
||||||
|
Loading…
Reference in New Issue
Block a user