From 505ae092f09411bb52c46288dd752fc66537183b Mon Sep 17 00:00:00 2001 From: Deborah Goldsmith Date: Wed, 2 May 2007 23:07:12 +0000 Subject: [PATCH] ICU-5695 Reorganize ICULanguageBreakFactory for more flexibility X-SVN-Rev: 21484 --- icu4c/source/common/brkeng.cpp | 183 ++++++++++++++++++++++----------- icu4c/source/common/brkeng.h | 35 ++++++- 2 files changed, 152 insertions(+), 66 deletions(-) diff --git a/icu4c/source/common/brkeng.cpp b/icu4c/source/common/brkeng.cpp index e6c4226785..6b32d25c9c 100644 --- a/icu4c/source/common/brkeng.cpp +++ b/icu4c/source/common/brkeng.cpp @@ -19,6 +19,7 @@ #include "unicode/udata.h" #include "unicode/putil.h" #include "unicode/ustring.h" +#include "unicode/uscript.h" #include "uvector.h" #include "umutex.h" #include "uresimp.h" @@ -137,82 +138,60 @@ U_NAMESPACE_BEGIN const LanguageBreakEngine * ICULanguageBreakFactory::getEngineFor(UChar32 c, int32_t breakType) { UBool needsInit; + int32_t i; + const LanguageBreakEngine *lbe = NULL; UErrorCode status = U_ZERO_ERROR; - UMTX_CHECK(NULL, (UBool)(fEngines == NULL), needsInit); + + umtx_lock(NULL); + needsInit = (UBool)(fEngines == NULL); + if (!needsInit) { + i = fEngines->size(); + while (--i >= 0) { + lbe = (const LanguageBreakEngine *)(fEngines->elementAt(i)); + if (lbe != NULL && lbe->handles(c, breakType)) { + break; + } + lbe = NULL; + } + } + umtx_unlock(NULL); + + if (lbe != NULL) { + return lbe; + } if (needsInit) { UStack *engines = new UStack(_deleteEngine, NULL, status); if (U_SUCCESS(status) && engines == NULL) { status = U_MEMORY_ALLOCATION_ERROR; } - // TODO: add locale parameter, check "dictionaries" in locale - // TODO: generalize once we can figure out how to parameterize engines - // instead of having different subclasses. Right now it needs to check - // for the key of each particular subclass. - - // Open root from brkitr tree. - char dictnbuff[256]; - char ext[4]={'\0'}; - - UResourceBundle *b = ures_open(U_ICUDATA_BRKITR, "", &status); - b = ures_getByKeyWithFallback(b, "dictionaries", b, &status); - b = ures_getByKeyWithFallback(b, "Thai", b, &status); - int32_t dictnlength = 0; - const UChar *dictfname = ures_getString(b, &dictnlength, &status); - if (U_SUCCESS(status) && (size_t)dictnlength >= sizeof(dictnbuff)) { - dictnlength = 0; - status = U_BUFFER_OVERFLOW_ERROR; - } - if (U_SUCCESS(status) && dictfname) { - UChar* extStart=u_strchr(dictfname, 0x002e); - int len = 0; - if(extStart!=NULL){ - len = extStart-dictfname; - u_UCharsToChars(extStart+1, ext, sizeof(ext)); // nul terminates the buff - u_UCharsToChars(dictfname, dictnbuff, len); - } - dictnbuff[len]=0; // nul terminate - } - ures_close(b); - UDataMemory *file = udata_open(U_ICUDATA_BRKITR, ext, dictnbuff, &status); - if (U_SUCCESS(status)) { - const CompactTrieDictionary *dict = new CompactTrieDictionary( - file, status); - if (U_SUCCESS(status) && dict == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; - } - if (U_FAILURE(status)) { - delete dict; - dict = NULL; - } - const ThaiBreakEngine *thai = new ThaiBreakEngine(dict, status); - if (thai == NULL) { - delete dict; - if (U_SUCCESS(status)) { - status = U_MEMORY_ALLOCATION_ERROR; - } - } - if (U_SUCCESS(status)) { - engines->push((void *)thai, status); - } - else { - delete thai; - } - } - umtx_lock(NULL); - if (fEngines == NULL) { - fEngines = engines; + else if (U_FAILURE(status)) { + delete engines; engines = NULL; } - umtx_unlock(NULL); - delete engines; + else { + umtx_lock(NULL); + if (fEngines == NULL) { + fEngines = engines; + engines = NULL; + } + umtx_unlock(NULL); + delete engines; + } } if (fEngines == NULL) { return NULL; } - int32_t i = fEngines->size(); - const LanguageBreakEngine *lbe = NULL; + + // We didn't find an engine the first time through, or there was no + // stack. Create an engine. + const LanguageBreakEngine *newlbe = loadEngineFor(c, breakType); + + // Now get the lock, and see if someone else has created it in the + // meantime + umtx_lock(NULL); + i = fEngines->size(); while (--i >= 0) { lbe = (const LanguageBreakEngine *)(fEngines->elementAt(i)); if (lbe != NULL && lbe->handles(c, breakType)) { @@ -220,9 +199,89 @@ ICULanguageBreakFactory::getEngineFor(UChar32 c, int32_t breakType) { } lbe = NULL; } + if (lbe == NULL && newlbe != NULL) { + fEngines->push((void *)newlbe, status); + lbe = newlbe; + newlbe = NULL; + } + umtx_unlock(NULL); + + delete newlbe; + return lbe; } +const LanguageBreakEngine * +ICULanguageBreakFactory::loadEngineFor(UChar32 c, int32_t breakType) { + UErrorCode status = U_ZERO_ERROR; + UScriptCode code = uscript_getScript(c, &status); + if (U_SUCCESS(status)) { + const CompactTrieDictionary *dict = loadDictionaryFor(code, breakType); + if (dict != NULL) { + const LanguageBreakEngine *engine = NULL; + switch(code) { + case USCRIPT_THAI: + engine = new ThaiBreakEngine(dict, status); + break; + default: + break; + } + if (engine == NULL) { + delete dict; + } + else if (U_FAILURE(status)) { + delete engine; + engine = NULL; + } + return engine; + } + } + return NULL; +} + +const CompactTrieDictionary * +ICULanguageBreakFactory::loadDictionaryFor(UScriptCode script, int32_t breakType) { + UErrorCode status = U_ZERO_ERROR; + // Open root from brkitr tree. + char dictnbuff[256]; + char ext[4]={'\0'}; + + UResourceBundle *b = ures_open(U_ICUDATA_BRKITR, "", &status); + b = ures_getByKeyWithFallback(b, "dictionaries", b, &status); + b = ures_getByKeyWithFallback(b, uscript_getShortName(script), b, &status); + int32_t dictnlength = 0; + const UChar *dictfname = ures_getString(b, &dictnlength, &status); + if (U_SUCCESS(status) && (size_t)dictnlength >= sizeof(dictnbuff)) { + dictnlength = 0; + status = U_BUFFER_OVERFLOW_ERROR; + } + if (U_SUCCESS(status) && dictfname) { + UChar* extStart=u_strchr(dictfname, 0x002e); + int len = 0; + if(extStart!=NULL){ + len = extStart-dictfname; + u_UCharsToChars(extStart+1, ext, sizeof(ext)); // nul terminates the buff + u_UCharsToChars(dictfname, dictnbuff, len); + } + dictnbuff[len]=0; // nul terminate + } + ures_close(b); + UDataMemory *file = udata_open(U_ICUDATA_BRKITR, ext, dictnbuff, &status); + if (U_SUCCESS(status)) { + const CompactTrieDictionary *dict = new CompactTrieDictionary( + file, status); + if (U_SUCCESS(status) && dict == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + } + if (U_FAILURE(status)) { + delete dict; + dict = NULL; + } + return dict; + } + return NULL; +} + U_NAMESPACE_END #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ diff --git a/icu4c/source/common/brkeng.h b/icu4c/source/common/brkeng.h index afc70dc3ea..618b2aeea5 100644 --- a/icu4c/source/common/brkeng.h +++ b/icu4c/source/common/brkeng.h @@ -1,8 +1,8 @@ /** - ******************************************************************************* - * Copyright (C) 2006, International Business Machines Corporation and others. * - * All Rights Reserved. * - ******************************************************************************* + ************************************************************************************ + * Copyright (C) 2006-2007, International Business Machines Corporation and others. * + * All Rights Reserved. * + ************************************************************************************ */ #ifndef BRKENG_H @@ -11,11 +11,13 @@ #include "unicode/utypes.h" #include "unicode/uobject.h" #include "unicode/utext.h" +#include "unicode/uscript.h" U_NAMESPACE_BEGIN class UnicodeSet; class UStack; +class CompactTrieDictionary; /******************************************************************* * LanguageBreakEngine @@ -257,6 +259,31 @@ class ICULanguageBreakFactory : public LanguageBreakFactory { */ virtual const LanguageBreakEngine *getEngineFor(UChar32 c, int32_t breakType); + protected: + + /** + *

Create a LanguageBreakEngine for the set of characters to which + * the supplied character belongs, for the specified break type.

+ * + * @param c A character that begins a run for which a LanguageBreakEngine is + * sought. + * @param breakType The kind of text break for which a LanguageBreakEngine is + * sought. + * @return A LanguageBreakEngine with the desired characteristics, or 0. + */ + virtual const LanguageBreakEngine *loadEngineFor(UChar32 c, int32_t breakType); + + /** + *

Create a CompactTrieDictionary for the specified script and break type.

+ * + * @param script An ISO 15924 script code that identifies the dictionary to be + * created. + * @param breakType The kind of text break for which a dictionary is + * sought. + * @return A CompactTrieDictionary with the desired characteristics, or 0. + */ + virtual const CompactTrieDictionary *loadDictionaryFor(UScriptCode script, int32_t breakType); + }; U_NAMESPACE_END