[*] i hate this shit lib

This commit is contained in:
Reece Wilson 2022-09-08 22:15:13 +01:00
parent 02aa1a021c
commit 7910db6d3d
1165 changed files with 33560 additions and 28943 deletions

0
build_bin/genbrk Executable file → Normal file
View File

0
build_bin/genccode Executable file → Normal file
View File

0
build_bin/gencfu Executable file → Normal file
View File

0
build_bin/gencmn Executable file → Normal file
View File

0
build_bin/gencnval Executable file → Normal file
View File

0
build_bin/gendict Executable file → Normal file
View File

0
build_bin/gennorm2 Executable file → Normal file
View File

0
build_bin/genrb Executable file → Normal file
View File

0
build_bin/gensprep Executable file → Normal file
View File

0
build_bin/icupkg Executable file → Normal file
View File

0
build_bin/makeconv Executable file → Normal file
View File

0
build_bin/pkgdata Executable file → Normal file
View File

View File

@ -1 +0,0 @@
libicudata.so.68.1

1
build_lib/libicudata.so Normal file
View File

@ -0,0 +1 @@
libicudata.so.68.1

View File

@ -1 +0,0 @@
libicudata.so.68.1

View File

@ -0,0 +1 @@
libicudata.so.68.1

0
build_lib/libicudata.so.68.1 Executable file → Normal file
View File

View File

@ -1 +0,0 @@
libicui18n.so.68.1

1
build_lib/libicui18n.so Normal file
View File

@ -0,0 +1 @@
libicui18n.so.68.1

View File

@ -1 +0,0 @@
libicui18n.so.68.1

View File

@ -0,0 +1 @@
libicui18n.so.68.1

0
build_lib/libicui18n.so.68.1 Executable file → Normal file
View File

View File

@ -1 +0,0 @@
libicutu.so.68.1

1
build_lib/libicutu.so Normal file
View File

@ -0,0 +1 @@
libicutu.so.68.1

View File

@ -1 +0,0 @@
libicutu.so.68.1

1
build_lib/libicutu.so.68 Normal file
View File

@ -0,0 +1 @@
libicutu.so.68.1

0
build_lib/libicutu.so.68.1 Executable file → Normal file
View File

View File

@ -1 +0,0 @@
libicuuc.so.68.1

1
build_lib/libicuuc.so Normal file
View File

@ -0,0 +1 @@
libicuuc.so.68.1

View File

@ -1 +0,0 @@
libicuuc.so.68.1

1
build_lib/libicuuc.so.68 Normal file
View File

@ -0,0 +1 @@
libicuuc.so.68.1

0
build_lib/libicuuc.so.68.1 Executable file → Normal file
View File

1213
common/BUILD.bazel Normal file

File diff suppressed because it is too large Load Diff

View File

@ -1,207 +0,0 @@
# Copyright (C) 2016 and later: Unicode, Inc. and others.
# License & terms of use: http://www.unicode.org/copyright.html
#******************************************************************************
#
# Copyright (C) 1999-2016, International Business Machines
# Corporation and others. All Rights Reserved.
#
#******************************************************************************
## Makefile.in for ICU - icuuc.so
## Stephen F. Booth
## Source directory information
srcdir = .
top_srcdir = ..
top_builddir = ..
## All the flags and other definitions are included here.
include $(top_builddir)/icudefs.mk
## Build directory information
subdir = common
# for service hook
LOCALSVC_CPP=localsvc.cpp
SVC_HOOK_INC=$(top_builddir)/common/svchook.mk
## Extra files to remove for 'make clean'
CLEANFILES = *~ $(DEPS) $(IMPORT_LIB) $(MIDDLE_IMPORT_LIB) $(FINAL_IMPORT_LIB) $(SVC_HOOK_INC)
## Target information
TARGET_STUBNAME=$(COMMON_STUBNAME)
ifneq ($(ENABLE_STATIC),)
TARGET = $(LIBDIR)/$(LIBSICU)$(TARGET_STUBNAME)$(ICULIBSUFFIX).$(A)
endif
ifneq ($(ENABLE_SHARED),)
SO_TARGET = $(LIBDIR)/$(LIBICU)$(TARGET_STUBNAME)$(ICULIBSUFFIX).$(SO)
ALL_SO_TARGETS = $(SO_TARGET) $(MIDDLE_SO_TARGET) $(FINAL_SO_TARGET) $(SHARED_OBJECT)
ifeq ($(ENABLE_SO_VERSION_DATA),1)
SO_VERSION_DATA = common.res
endif
ifeq ($(OS390BATCH),1)
BATCH_TARGET = $(BATCH_COMMON_TARGET)
BATCH_LIBS = $(BATCH_LIBICUDT) -lm
endif # OS390BATCH
endif # ENABLE_SHARED
ALL_TARGETS = $(TARGET) $(ALL_SO_TARGETS) $(BATCH_TARGET)
DYNAMICCPPFLAGS = $(SHAREDLIBCPPFLAGS)
DYNAMICCFLAGS = $(SHAREDLIBCFLAGS)
DYNAMICCXXFLAGS = $(SHAREDLIBCXXFLAGS)
CFLAGS += $(LIBCFLAGS)
CXXFLAGS += $(LIBCXXFLAGS)
ifeq ($(OS390BATCH),1)
CFLAGS += -WI
CXXFLAGS += -WI
endif
CPPFLAGS += -I$(srcdir) $(LIBCPPFLAGS) $(CPPFLAGSICUUC)
# we want DEFS here
DEFS += -DU_COMMON_IMPLEMENTATION
LDFLAGS += $(LDFLAGSICUUC)
# for plugin configuration
CPPFLAGS += "-DDEFAULT_ICU_PLUGINS=\"$(libdir)/icu\" "
# for icu data location
ifeq ($(PKGDATA_MODE),common)
CPPFLAGS += "-DU_ICU_DATA_DEFAULT_DIR=\"$(ICUDATA_DIR)\""
endif
# $(LIBICUDT) is either stub data or the real DLL common data.
LIBS = $(LIBICUDT) $(DEFAULT_LIBS)
SOURCES = $(shell cat $(srcdir)/sources.txt)
OBJECTS = $(SOURCES:.cpp=.o)
## Header files to install
HEADERS = $(srcdir)/unicode/*.h
STATIC_OBJECTS = $(OBJECTS:.o=.$(STATIC_O))
DEPS = $(OBJECTS:.o=.d)
-include Makefile.local
-include $(SVC_HOOK_INC)
## List of phony targets
.PHONY : all all-local install install-local clean clean-local \
distclean distclean-local install-library install-headers dist \
dist-local check check-local check-exhaustive
## Clear suffix list
.SUFFIXES :
## List of standard targets
all: all-local
install: install-local
clean: clean-local
distclean : distclean-local
dist: dist-local
check: all check-local
check-exhaustive: check
all-local: $(ALL_TARGETS)
install-local: install-headers install-library
install-library: all-local
$(MKINSTALLDIRS) $(DESTDIR)$(libdir)
ifneq ($(ENABLE_STATIC),)
$(INSTALL-L) $(TARGET) $(DESTDIR)$(libdir)
endif
ifneq ($(ENABLE_SHARED),)
# For MinGW, do we want the DLL to go in the bin location?
ifeq ($(MINGW_MOVEDLLSTOBINDIR),YES)
$(MKINSTALLDIRS) $(DESTDIR)$(bindir)
$(INSTALL-L) $(FINAL_SO_TARGET) $(DESTDIR)$(bindir)
else
$(INSTALL-L) $(FINAL_SO_TARGET) $(DESTDIR)$(libdir)
ifneq ($(FINAL_SO_TARGET),$(SO_TARGET))
cd $(DESTDIR)$(libdir) && $(RM) $(notdir $(SO_TARGET)) && ln -s $(notdir $(FINAL_SO_TARGET)) $(notdir $(SO_TARGET))
ifneq ($(FINAL_SO_TARGET),$(MIDDLE_SO_TARGET))
cd $(DESTDIR)$(libdir) && $(RM) $(notdir $(MIDDLE_SO_TARGET)) && ln -s $(notdir $(FINAL_SO_TARGET)) $(notdir $(MIDDLE_SO_TARGET))
endif
endif
endif
ifneq ($(IMPORT_LIB_EXT),)
$(INSTALL-L) $(FINAL_IMPORT_LIB) $(DESTDIR)$(libdir)
ifneq ($(IMPORT_LIB),$(FINAL_IMPORT_LIB))
cd $(DESTDIR)$(libdir) && $(RM) $(notdir $(IMPORT_LIB)) && ln -s $(notdir $(FINAL_IMPORT_LIB)) $(notdir $(IMPORT_LIB))
endif
ifneq ($(MIDDLE_IMPORT_LIB),$(FINAL_IMPORT_LIB))
cd $(DESTDIR)$(libdir) && $(RM) $(notdir $(MIDDLE_IMPORT_LIB)) && ln -s $(notdir $(FINAL_IMPORT_LIB)) $(notdir $(MIDDLE_IMPORT_LIB))
endif
endif
endif
$(SVC_HOOK_INC):
@echo generating $@
@-test -f $(top_srcdir)/common/$(LOCALSVC_CPP) && ( echo "have $(LOCALSVC_CPP) - U_LOCAL_SERVICE_HOOK=1" ; \
echo 'CPPFLAGS +=-DU_LOCAL_SERVICE_HOOK=1' > $@ ; \
echo 'OBJECTS += $(LOCALSVC_CPP:%.cpp=%.o)' >> $@ \
) ; true
@echo "# Autogenerated by Makefile" >> $@
install-headers:
$(MKINSTALLDIRS) $(DESTDIR)$(includedir)/unicode
@for file in $(HEADERS); do \
echo "$(INSTALL_DATA) $$file $(DESTDIR)$(includedir)/unicode"; \
$(INSTALL_DATA) $$file $(DESTDIR)$(includedir)/unicode || exit; \
done
dist-local:
clean-local:
test -z "$(CLEANFILES)" || $(RMV) $(CLEANFILES)
$(RMV) $(OBJECTS) $(STATIC_OBJECTS) $(ALL_TARGETS) $(SO_VERSION_DATA)
distclean-local: clean-local
$(RMV) Makefile icucfg.h $(SVC_HOOK_INC)
check-local:
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status $(SVC_HOOK_INC)
cd $(top_builddir) \
&& CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
ifneq ($(ENABLE_STATIC),)
$(TARGET): $(STATIC_OBJECTS)
$(AR) $(ARFLAGS) $(AR_OUTOPT)$@ $^
$(RANLIB) $@
endif
ifneq ($(ENABLE_SHARED),)
$(SHARED_OBJECT): $(OBJECTS) $(SO_VERSION_DATA)
$(SHLIB.cc) $(LD_SONAME) $(OUTOPT)$@ $^ $(LIBS)
ifeq ($(ENABLE_RPATH),YES)
ifneq ($(wildcard $(libdir)/$(MIDDLE_SO_TARGET)),)
$(warning RPATH warning: --enable-rpath means test programs may use existing $(libdir)/$(MIDDLE_SO_TARGET))
endif
endif
ifeq ($(OS390BATCH),1)
$(BATCH_TARGET):$(OBJECTS)
$(SHLIB.cc) $(LD_SONAME) $(OUTOPT)$@ $^ $(BATCH_LIBS)
endif # OS390BATCH
endif # ENABLE_SHARED
ifeq (,$(MAKECMDGOALS))
-include $(DEPS)
else
ifneq ($(patsubst %clean,,$(MAKECMDGOALS)),)
-include $(DEPS)
endif
endif

View File

@ -37,23 +37,23 @@ Appendable::appendString(const UChar *s, int32_t length) {
UChar c;
while((c=*s++)!=0) {
if(!appendCodeUnit(c)) {
return FALSE;
return false;
}
}
} else if(length>0) {
const UChar *limit=s+length;
do {
if(!appendCodeUnit(*s++)) {
return FALSE;
return false;
}
} while(s<limit);
}
return TRUE;
return true;
}
UBool
Appendable::reserveAppendCapacity(int32_t /*appendCapacity*/) {
return TRUE;
return true;
}
UChar *

View File

@ -1,4 +0,0 @@
appendable.o appendable.d : appendable.cpp unicode/utypes.h unicode/umachine.h \
unicode/ptypes.h unicode/platform.h unicode/uconfig.h \
unicode/uvernum.h unicode/urename.h unicode/uversion.h \
unicode/appendable.h unicode/uobject.h unicode/utf16.h unicode/utf.h

View File

@ -309,9 +309,9 @@ BMPSet::contains(UChar32 c) const {
// surrogate or supplementary code point
return containsSlow(c, list4kStarts[0xd], list4kStarts[0x11]);
} else {
// Out-of-range code points get FALSE, consistent with long-standing
// Out-of-range code points get false, consistent with long-standing
// behavior of UnicodeSet::contains(c).
return FALSE;
return false;
}
}

View File

@ -1,9 +0,0 @@
bmpset.o bmpset.d : bmpset.cpp unicode/utypes.h unicode/umachine.h unicode/ptypes.h \
unicode/platform.h unicode/uconfig.h unicode/uvernum.h \
unicode/urename.h unicode/uversion.h unicode/uniset.h unicode/ucpmap.h \
unicode/unifilt.h unicode/unifunct.h unicode/uobject.h \
unicode/unimatch.h unicode/unistr.h unicode/char16ptr.h unicode/rep.h \
unicode/std_string.h unicode/stringpiece.h unicode/bytestream.h \
unicode/uset.h unicode/uchar.h unicode/stringoptions.h \
unicode/localpointer.h unicode/utf8.h unicode/utf.h unicode/utf16.h \
cmemory.h bmpset.h uassert.h

View File

@ -25,6 +25,7 @@
#include "brkeng.h"
#include "cmemory.h"
#include "dictbe.h"
#include "lstmbe.h"
#include "charstr.h"
#include "dictionarydata.h"
#include "mutex.h"
@ -77,7 +78,10 @@ int32_t
UnhandledEngine::findBreaks( UText *text,
int32_t /* startPos */,
int32_t endPos,
UVector32 &/*foundBreaks*/ ) const {
UVector32 &/*foundBreaks*/,
UBool /* isPhraseBreaking */,
UErrorCode &status) const {
if (U_FAILURE(status)) return 0;
UChar32 c = utext_current32(text);
while((int32_t)utext_getNativeIndex(text) < endPos && fHandled->contains(c)) {
utext_next32(text); // TODO: recast loop to work with post-increment operations.
@ -132,14 +136,13 @@ ICULanguageBreakFactory::getEngineFor(UChar32 c) {
static UMutex gBreakEngineMutex;
Mutex m(&gBreakEngineMutex);
if (fEngines == NULL) {
UStack *engines = new UStack(_deleteEngine, NULL, status);
if (U_FAILURE(status) || engines == NULL) {
if (fEngines == nullptr) {
LocalPointer<UStack> engines(new UStack(_deleteEngine, nullptr, status), status);
if (U_FAILURE(status) ) {
// Note: no way to return error code to caller.
delete engines;
return NULL;
return nullptr;
}
fEngines = engines;
fEngines = engines.orphan();
} else {
int32_t i = fEngines->size();
while (--i >= 0) {
@ -152,10 +155,10 @@ ICULanguageBreakFactory::getEngineFor(UChar32 c) {
// We didn't find an engine. Create one.
lbe = loadEngineFor(c);
if (lbe != NULL) {
if (lbe != nullptr) {
fEngines->push((void *)lbe, status);
}
return lbe;
return U_SUCCESS(status) ? lbe : nullptr;
}
const LanguageBreakEngine *
@ -163,9 +166,26 @@ ICULanguageBreakFactory::loadEngineFor(UChar32 c) {
UErrorCode status = U_ZERO_ERROR;
UScriptCode code = uscript_getScript(c, &status);
if (U_SUCCESS(status)) {
const LanguageBreakEngine *engine = nullptr;
// Try to use LSTM first
const LSTMData *data = CreateLSTMDataForScript(code, status);
if (U_SUCCESS(status)) {
if (data != nullptr) {
engine = CreateLSTMBreakEngine(code, data, status);
if (U_SUCCESS(status) && engine != nullptr) {
return engine;
}
if (engine != nullptr) {
delete engine;
engine = nullptr;
} else {
DeleteLSTMData(data);
}
}
}
status = U_ZERO_ERROR; // fallback to dictionary based
DictionaryMatcher *m = loadDictionaryMatcherFor(code);
if (m != NULL) {
const LanguageBreakEngine *engine = NULL;
switch(code) {
case USCRIPT_THAI:
engine = new ThaiBreakEngine(m, status);
@ -241,10 +261,10 @@ ICULanguageBreakFactory::loadDictionaryMatcherFor(UScriptCode script) {
const UChar *extStart = u_memrchr(dictfname, 0x002e, dictnlength); // last dot
if (extStart != NULL) {
int32_t len = (int32_t)(extStart - dictfname);
ext.appendInvariantChars(UnicodeString(FALSE, extStart + 1, dictnlength - len - 1), status);
ext.appendInvariantChars(UnicodeString(false, extStart + 1, dictnlength - len - 1), status);
dictnlength = len;
}
dictnbuf.appendInvariantChars(UnicodeString(FALSE, dictfname, dictnlength), status);
dictnbuf.appendInvariantChars(UnicodeString(false, dictfname, dictnlength), status);
ures_close(b);
UDataMemory *file = udata_open(U_ICUDATA_BRKITR, ext.data(), dictnbuf.data(), &status);

View File

@ -1,15 +0,0 @@
brkeng.o brkeng.d : brkeng.cpp unicode/utypes.h unicode/umachine.h unicode/ptypes.h \
unicode/platform.h unicode/uconfig.h unicode/uvernum.h \
unicode/urename.h unicode/uversion.h unicode/uchar.h \
unicode/stringoptions.h unicode/ucpmap.h unicode/uniset.h \
unicode/unifilt.h unicode/unifunct.h unicode/uobject.h \
unicode/unimatch.h unicode/unistr.h unicode/char16ptr.h unicode/rep.h \
unicode/std_string.h unicode/stringpiece.h unicode/bytestream.h \
unicode/uset.h unicode/localpointer.h unicode/chariter.h \
unicode/ures.h unicode/uloc.h unicode/uenum.h unicode/udata.h \
unicode/putil.h unicode/ustring.h unicode/uiter.h unicode/uscript.h \
unicode/ucharstrie.h unicode/ustringtrie.h unicode/bytestrie.h \
brkeng.h unicode/utext.h cmemory.h dictbe.h uvectr32.h uhash.h \
uelement.h uassert.h charstr.h dictionarydata.h udataswp.h mutex.h \
umutex.h unicode/uclean.h putilimp.h uvector.h uarrsort.h uresimp.h \
uresdata.h resource.h restrace.h ubrkimpl.h

View File

@ -68,12 +68,15 @@ class LanguageBreakEngine : public UMemory {
* @param startPos The start of the run within the supplied text.
* @param endPos The end of the run within the supplied text.
* @param foundBreaks A Vector of int32_t to receive the breaks.
* @param status Information on any errors encountered.
* @return The number of breaks found.
*/
virtual int32_t findBreaks( UText *text,
int32_t startPos,
int32_t endPos,
UVector32 &foundBreaks ) const = 0;
UVector32 &foundBreaks,
UBool isPhraseBreaking,
UErrorCode &status) const = 0;
};
@ -174,7 +177,7 @@ class UnhandledEngine : public LanguageBreakEngine {
* @return true if this engine handles the particular character and break
* type.
*/
virtual UBool handles(UChar32 c) const;
virtual UBool handles(UChar32 c) const override;
/**
* <p>Find any breaks within a run in the supplied text.</p>
@ -185,12 +188,15 @@ class UnhandledEngine : public LanguageBreakEngine {
* @param startPos The start of the run within the supplied text.
* @param endPos The end of the run within the supplied text.
* @param foundBreaks An allocated C array of the breaks found, if any
* @param status Information on any errors encountered.
* @return The number of breaks found.
*/
virtual int32_t findBreaks( UText *text,
int32_t startPos,
int32_t endPos,
UVector32 &foundBreaks ) const;
UVector32 &foundBreaks,
UBool isPhraseBreaking,
UErrorCode &status) const override;
/**
* <p>Tell the engine to handle a particular character and break type.</p>
@ -243,7 +249,7 @@ class ICULanguageBreakFactory : public LanguageBreakFactory {
* sought.
* @return A LanguageBreakEngine with the desired characteristics, or 0.
*/
virtual const LanguageBreakEngine *getEngineFor(UChar32 c);
virtual const LanguageBreakEngine *getEngineFor(UChar32 c) override;
protected:
/**

View File

@ -30,6 +30,7 @@
#include "unicode/ures.h"
#include "unicode/ustring.h"
#include "unicode/filteredbrk.h"
#include "bytesinkutil.h"
#include "ucln_cmn.h"
#include "cstring.h"
#include "umutex.h"
@ -115,7 +116,7 @@ BreakIterator::buildInstance(const Locale& loc, const char *type, UErrorCode &st
}
// Create a RuleBasedBreakIterator
result = new RuleBasedBreakIterator(file, status);
result = new RuleBasedBreakIterator(file, uprv_strstr(type, "phrase") != NULL, status);
// If there is a result, set the valid locale and actual locale, and the kind
if (U_SUCCESS(status) && result != NULL) {
@ -234,7 +235,7 @@ class ICUBreakIteratorFactory : public ICUResourceBundleFactory {
public:
virtual ~ICUBreakIteratorFactory();
protected:
virtual UObject* handleCreate(const Locale& loc, int32_t kind, const ICUService* /*service*/, UErrorCode& status) const {
virtual UObject* handleCreate(const Locale& loc, int32_t kind, const ICUService* /*service*/, UErrorCode& status) const override {
return BreakIterator::makeInstance(loc, kind, status);
}
};
@ -254,11 +255,11 @@ public:
virtual ~ICUBreakIteratorService();
virtual UObject* cloneInstance(UObject* instance) const {
virtual UObject* cloneInstance(UObject* instance) const override {
return ((BreakIterator*)instance)->clone();
}
virtual UObject* handleDefault(const ICUServiceKey& key, UnicodeString* /*actualID*/, UErrorCode& status) const {
virtual UObject* handleDefault(const ICUServiceKey& key, UnicodeString* /*actualID*/, UErrorCode& status) const override {
LocaleKey& lkey = (LocaleKey&)key;
int32_t kind = lkey.kind();
Locale loc;
@ -266,7 +267,7 @@ public:
return BreakIterator::makeInstance(loc, kind, status);
}
virtual UBool isDefault() const {
virtual UBool isDefault() const override {
return countFactories() == 1;
}
};
@ -278,7 +279,7 @@ ICUBreakIteratorService::~ICUBreakIteratorService() {}
// defined in ucln_cmn.h
U_NAMESPACE_END
static icu::UInitOnce gInitOnceBrkiter = U_INITONCE_INITIALIZER;
static icu::UInitOnce gInitOnceBrkiter {};
static icu::ICULocaleService* gService = NULL;
@ -295,7 +296,7 @@ static UBool U_CALLCONV breakiterator_cleanup(void) {
}
gInitOnceBrkiter.reset();
#endif
return TRUE;
return true;
}
U_CDECL_END
U_NAMESPACE_BEGIN
@ -346,7 +347,7 @@ BreakIterator::unregister(URegistryKey key, UErrorCode& status)
}
status = U_MEMORY_ALLOCATION_ERROR;
}
return FALSE;
return false;
}
// -------------------------------------
@ -408,7 +409,6 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status)
if (U_FAILURE(status)) {
return NULL;
}
char lbType[kKeyValueLenMax];
BreakIterator *result = NULL;
switch (kind) {
@ -428,18 +428,29 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status)
break;
case UBRK_LINE:
{
char lb_lw[kKeyValueLenMax];
UTRACE_ENTRY(UTRACE_UBRK_CREATE_LINE);
uprv_strcpy(lbType, "line");
char lbKeyValue[kKeyValueLenMax] = {0};
uprv_strcpy(lb_lw, "line");
UErrorCode kvStatus = U_ZERO_ERROR;
int32_t kLen = loc.getKeywordValue("lb", lbKeyValue, kKeyValueLenMax, kvStatus);
if (U_SUCCESS(kvStatus) && kLen > 0 && (uprv_strcmp(lbKeyValue,"strict")==0 || uprv_strcmp(lbKeyValue,"normal")==0 || uprv_strcmp(lbKeyValue,"loose")==0)) {
uprv_strcat(lbType, "_");
uprv_strcat(lbType, lbKeyValue);
CharString value;
CharStringByteSink valueSink(&value);
loc.getKeywordValue("lb", valueSink, kvStatus);
if (U_SUCCESS(kvStatus) && (value == "strict" || value == "normal" || value == "loose")) {
uprv_strcat(lb_lw, "_");
uprv_strcat(lb_lw, value.data());
}
result = BreakIterator::buildInstance(loc, lbType, status);
// lw=phrase is only supported in Japanese.
if (uprv_strcmp(loc.getLanguage(), "ja") == 0) {
value.clear();
loc.getKeywordValue("lw", valueSink, kvStatus);
if (U_SUCCESS(kvStatus) && value == "phrase") {
uprv_strcat(lb_lw, "_");
uprv_strcat(lb_lw, value.data());
}
}
result = BreakIterator::buildInstance(loc, lb_lw, status);
UTRACE_DATA1(UTRACE_INFO, "lb=%s", lbKeyValue);
UTRACE_DATA1(UTRACE_INFO, "lb_lw=%s", lb_lw);
UTRACE_EXIT_STATUS(status);
}
break;

View File

@ -1,16 +0,0 @@
brkiter.o brkiter.d : brkiter.cpp unicode/utypes.h unicode/umachine.h \
unicode/ptypes.h unicode/platform.h unicode/uconfig.h \
unicode/uvernum.h unicode/urename.h unicode/uversion.h unicode/rbbi.h \
unicode/brkiter.h unicode/uobject.h unicode/unistr.h \
unicode/char16ptr.h unicode/rep.h unicode/std_string.h \
unicode/stringpiece.h unicode/bytestream.h unicode/chariter.h \
unicode/locid.h unicode/localpointer.h unicode/strenum.h \
unicode/putil.h unicode/uloc.h unicode/uenum.h unicode/ubrk.h \
unicode/utext.h unicode/uchar.h unicode/stringoptions.h \
unicode/ucpmap.h unicode/parseerr.h unicode/umisc.h unicode/udata.h \
unicode/schriter.h unicode/uchriter.h unicode/ures.h unicode/ustring.h \
unicode/uiter.h unicode/filteredbrk.h ucln_cmn.h ucln.h cstring.h \
cmemory.h umutex.h unicode/uclean.h putilimp.h servloc.h hash.h \
uhash.h uelement.h uvector.h uarrsort.h serv.h servnotf.h mutex.h \
locutil.h locbased.h uresimp.h uresdata.h udataswp.h resource.h \
restrace.h uassert.h ubrkimpl.h utracimp.h unicode/utrace.h charstr.h

View File

@ -20,7 +20,7 @@ U_NAMESPACE_BEGIN
UBool
ByteSinkUtil::appendChange(int32_t length, const char16_t *s16, int32_t s16Length,
ByteSink &sink, Edits *edits, UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) { return FALSE; }
if (U_FAILURE(errorCode)) { return false; }
char scratch[200];
int32_t s8Length = 0;
for (int32_t i = 0; i < s16Length;) {
@ -44,7 +44,7 @@ ByteSinkUtil::appendChange(int32_t length, const char16_t *s16, int32_t s16Lengt
}
if (j > (INT32_MAX - s8Length)) {
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
return FALSE;
return false;
}
sink.Append(buffer, j);
s8Length += j;
@ -52,17 +52,17 @@ ByteSinkUtil::appendChange(int32_t length, const char16_t *s16, int32_t s16Lengt
if (edits != nullptr) {
edits->addReplace(length, s8Length);
}
return TRUE;
return true;
}
UBool
ByteSinkUtil::appendChange(const uint8_t *s, const uint8_t *limit,
const char16_t *s16, int32_t s16Length,
ByteSink &sink, Edits *edits, UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) { return FALSE; }
if (U_FAILURE(errorCode)) { return false; }
if ((limit - s) > INT32_MAX) {
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
return FALSE;
return false;
}
return appendChange((int32_t)(limit - s), s16, s16Length, sink, edits, errorCode);
}
@ -109,16 +109,16 @@ UBool
ByteSinkUtil::appendUnchanged(const uint8_t *s, const uint8_t *limit,
ByteSink &sink, uint32_t options, Edits *edits,
UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) { return FALSE; }
if (U_FAILURE(errorCode)) { return false; }
if ((limit - s) > INT32_MAX) {
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
return FALSE;
return false;
}
int32_t length = (int32_t)(limit - s);
if (length > 0) {
appendNonEmptyUnchanged(s, length, sink, options, edits);
}
return TRUE;
return true;
}
CharStringByteSink::CharStringByteSink(CharString* dest) : dest_(*dest) {

View File

@ -1,8 +0,0 @@
bytesinkutil.o bytesinkutil.d : bytesinkutil.cpp unicode/utypes.h unicode/umachine.h \
unicode/ptypes.h unicode/platform.h unicode/uconfig.h \
unicode/uvernum.h unicode/urename.h unicode/uversion.h \
unicode/bytestream.h unicode/uobject.h unicode/std_string.h \
unicode/edits.h unicode/stringoptions.h unicode/utf8.h unicode/utf.h \
unicode/utf16.h bytesinkutil.h cmemory.h unicode/localpointer.h \
uassert.h charstr.h unicode/unistr.h unicode/char16ptr.h unicode/rep.h \
unicode/stringpiece.h

View File

@ -4,6 +4,9 @@
// bytesinkutil.h
// created: 2017sep14 Markus W. Scherer
#ifndef BYTESINKUTIL_H
#define BYTESINKUTIL_H
#include "unicode/utypes.h"
#include "unicode/bytestream.h"
#include "unicode/edits.h"
@ -81,3 +84,5 @@ private:
};
U_NAMESPACE_END
#endif //BYTESINKUTIL_H

View File

@ -30,14 +30,14 @@ void ByteSink::Flush() {}
CheckedArrayByteSink::CheckedArrayByteSink(char* outbuf, int32_t capacity)
: outbuf_(outbuf), capacity_(capacity < 0 ? 0 : capacity),
size_(0), appended_(0), overflowed_(FALSE) {
size_(0), appended_(0), overflowed_(false) {
}
CheckedArrayByteSink::~CheckedArrayByteSink() {}
CheckedArrayByteSink& CheckedArrayByteSink::Reset() {
size_ = appended_ = 0;
overflowed_ = FALSE;
overflowed_ = false;
return *this;
}
@ -48,14 +48,14 @@ void CheckedArrayByteSink::Append(const char* bytes, int32_t n) {
if (n > (INT32_MAX - appended_)) {
// TODO: Report as integer overflow, not merely buffer overflow.
appended_ = INT32_MAX;
overflowed_ = TRUE;
overflowed_ = true;
return;
}
appended_ += n;
int32_t available = capacity_ - size_;
if (n > available) {
n = available;
overflowed_ = TRUE;
overflowed_ = true;
}
if (n > 0 && bytes != (outbuf_ + size_)) {
uprv_memcpy(outbuf_ + size_, bytes, n);

View File

@ -1,5 +0,0 @@
bytestream.o bytestream.d : bytestream.cpp unicode/utypes.h unicode/umachine.h \
unicode/ptypes.h unicode/platform.h unicode/uconfig.h \
unicode/uvernum.h unicode/urename.h unicode/uversion.h \
unicode/bytestream.h unicode/uobject.h unicode/std_string.h cmemory.h \
unicode/localpointer.h

View File

@ -337,13 +337,13 @@ BytesTrie::findUniqueValueFromBranch(const uint8_t *pos, int32_t length,
}
} else {
uniqueValue=value;
haveUniqueValue=TRUE;
haveUniqueValue=true;
}
} else {
if(!findUniqueValue(pos+value, haveUniqueValue, uniqueValue)) {
return NULL;
}
haveUniqueValue=TRUE;
haveUniqueValue=true;
}
} while(--length>1);
return pos+1; // ignore the last comparison byte
@ -359,9 +359,9 @@ BytesTrie::findUniqueValue(const uint8_t *pos, UBool haveUniqueValue, int32_t &u
}
pos=findUniqueValueFromBranch(pos, node+1, haveUniqueValue, uniqueValue);
if(pos==NULL) {
return FALSE;
return false;
}
haveUniqueValue=TRUE;
haveUniqueValue=true;
} else if(node<kMinValueLead) {
// linear-match node
pos+=node-kMinLinearMatch+1; // Ignore the match bytes.
@ -370,14 +370,14 @@ BytesTrie::findUniqueValue(const uint8_t *pos, UBool haveUniqueValue, int32_t &u
int32_t value=readValue(pos, node>>1);
if(haveUniqueValue) {
if(value!=uniqueValue) {
return FALSE;
return false;
}
} else {
uniqueValue=value;
haveUniqueValue=TRUE;
haveUniqueValue=true;
}
if(isFinal) {
return TRUE;
return true;
}
pos=skipValue(pos, node);
}

View File

@ -1,6 +0,0 @@
bytestrie.o bytestrie.d : bytestrie.cpp unicode/utypes.h unicode/umachine.h \
unicode/ptypes.h unicode/platform.h unicode/uconfig.h \
unicode/uvernum.h unicode/urename.h unicode/uversion.h \
unicode/bytestream.h unicode/uobject.h unicode/std_string.h \
unicode/bytestrie.h unicode/stringpiece.h unicode/ustringtrie.h \
cmemory.h unicode/localpointer.h uassert.h

View File

@ -231,7 +231,7 @@ BytesTrieBuilder::buildBytes(UStringTrieBuildOption buildOption, UErrorCode &err
}
uprv_sortArray(elements, elementsLength, (int32_t)sizeof(BytesTrieElement),
compareElementStrings, strings,
FALSE, // need not be a stable sort
false, // need not be a stable sort
&errorCode);
if(U_FAILURE(errorCode)) {
return;
@ -343,13 +343,13 @@ BytesTrieBuilder::BTLinearMatchNode::BTLinearMatchNode(const char *bytes, int32_
static_cast<uint32_t>(hash)*37u + static_cast<uint32_t>(ustr_hashCharsN(bytes, len)));
}
UBool
bool
BytesTrieBuilder::BTLinearMatchNode::operator==(const Node &other) const {
if(this==&other) {
return TRUE;
return true;
}
if(!LinearMatchNode::operator==(other)) {
return FALSE;
return false;
}
const BTLinearMatchNode &o=(const BTLinearMatchNode &)other;
return 0==uprv_memcmp(s, o.s, length);
@ -375,7 +375,7 @@ BytesTrieBuilder::createLinearMatchNode(int32_t i, int32_t byteIndex, int32_t le
UBool
BytesTrieBuilder::ensureCapacity(int32_t length) {
if(bytes==NULL) {
return FALSE; // previous memory allocation had failed
return false; // previous memory allocation had failed
}
if(length>bytesCapacity) {
int32_t newCapacity=bytesCapacity;
@ -388,7 +388,7 @@ BytesTrieBuilder::ensureCapacity(int32_t length) {
uprv_free(bytes);
bytes=NULL;
bytesCapacity=0;
return FALSE;
return false;
}
uprv_memcpy(newBytes+(newCapacity-bytesLength),
bytes+(bytesCapacity-bytesLength), bytesLength);
@ -396,7 +396,7 @@ BytesTrieBuilder::ensureCapacity(int32_t length) {
bytes=newBytes;
bytesCapacity=newCapacity;
}
return TRUE;
return true;
}
int32_t
@ -463,7 +463,7 @@ int32_t
BytesTrieBuilder::writeValueAndType(UBool hasValue, int32_t value, int32_t node) {
int32_t offset=write(node);
if(hasValue) {
offset=writeValueAndFinal(value, FALSE);
offset=writeValueAndFinal(value, false);
}
return offset;
}
@ -474,31 +474,39 @@ BytesTrieBuilder::writeDeltaTo(int32_t jumpTarget) {
U_ASSERT(i>=0);
if(i<=BytesTrie::kMaxOneByteDelta) {
return write(i);
} else {
char intBytes[5];
return write(intBytes, internalEncodeDelta(i, intBytes));
}
char intBytes[5];
int32_t length;
}
int32_t
BytesTrieBuilder::internalEncodeDelta(int32_t i, char intBytes[]) {
U_ASSERT(i>=0);
if(i<=BytesTrie::kMaxOneByteDelta) {
intBytes[0]=(char)i;
return 1;
}
int32_t length=1;
if(i<=BytesTrie::kMaxTwoByteDelta) {
intBytes[0]=(char)(BytesTrie::kMinTwoByteDeltaLead+(i>>8));
length=1;
} else {
if(i<=BytesTrie::kMaxThreeByteDelta) {
intBytes[0]=(char)(BytesTrie::kMinThreeByteDeltaLead+(i>>16));
length=2;
} else {
if(i<=0xffffff) {
intBytes[0]=(char)BytesTrie::kFourByteDeltaLead;
length=3;
} else {
intBytes[0]=(char)BytesTrie::kFiveByteDeltaLead;
intBytes[1]=(char)(i>>24);
length=4;
length=2;
}
intBytes[1]=(char)(i>>16);
intBytes[length++]=(char)(i>>16);
}
intBytes[1]=(char)(i>>8);
intBytes[length++]=(char)(i>>8);
}
intBytes[length++]=(char)i;
return write(intBytes, length);
return length;
}
U_NAMESPACE_END

View File

@ -1,10 +0,0 @@
bytestriebuilder.o bytestriebuilder.d : bytestriebuilder.cpp unicode/utypes.h \
unicode/umachine.h unicode/ptypes.h unicode/platform.h \
unicode/uconfig.h unicode/uvernum.h unicode/urename.h \
unicode/uversion.h unicode/bytestrie.h unicode/stringpiece.h \
unicode/uobject.h unicode/std_string.h unicode/ustringtrie.h \
unicode/bytestriebuilder.h unicode/stringtriebuilder.h charstr.h \
unicode/unistr.h unicode/char16ptr.h unicode/rep.h \
unicode/bytestream.h cmemory.h unicode/localpointer.h uhash.h \
uelement.h uarrsort.h uassert.h ustr_imp.h unicode/utf8.h \
unicode/utf.h

View File

@ -101,12 +101,12 @@ BytesTrie::Iterator::hasNext() const { return pos_!=NULL || !stack_->isEmpty();
UBool
BytesTrie::Iterator::next(UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) {
return FALSE;
return false;
}
const uint8_t *pos=pos_;
if(pos==NULL) {
if(stack_->isEmpty()) {
return FALSE;
return false;
}
// Pop the state off the stack and continue with the next outbound edge of
// the branch node.
@ -119,7 +119,7 @@ BytesTrie::Iterator::next(UErrorCode &errorCode) {
if(length>1) {
pos=branchNext(pos, length, errorCode);
if(pos==NULL) {
return TRUE; // Reached a final value.
return true; // Reached a final value.
}
} else {
str_->append((char)*pos++, errorCode);
@ -141,7 +141,7 @@ BytesTrie::Iterator::next(UErrorCode &errorCode) {
} else {
pos_=skipValue(pos, node);
}
return TRUE;
return true;
}
if(maxLength_>0 && str_->length()==maxLength_) {
return truncateAndStop();
@ -152,7 +152,7 @@ BytesTrie::Iterator::next(UErrorCode &errorCode) {
}
pos=branchNext(pos, node+1, errorCode);
if(pos==NULL) {
return TRUE; // Reached a final value.
return true; // Reached a final value.
}
} else {
// Linear-match node, append length bytes to str_.
@ -177,7 +177,7 @@ UBool
BytesTrie::Iterator::truncateAndStop() {
pos_=NULL;
value_=-1; // no real value for str
return TRUE;
return true;
}
// Branch node, needs to take the first outbound edge and push state for the rest.

View File

@ -1,8 +0,0 @@
bytestrieiterator.o bytestrieiterator.d : bytestrieiterator.cpp unicode/utypes.h \
unicode/umachine.h unicode/ptypes.h unicode/platform.h \
unicode/uconfig.h unicode/uvernum.h unicode/urename.h \
unicode/uversion.h unicode/bytestrie.h unicode/stringpiece.h \
unicode/uobject.h unicode/std_string.h unicode/ustringtrie.h charstr.h \
unicode/unistr.h unicode/char16ptr.h unicode/rep.h \
unicode/bytestream.h cmemory.h unicode/localpointer.h uvectr32.h \
uhash.h uelement.h uassert.h

View File

@ -119,7 +119,7 @@ UnicodeString CanonicalIterator::getSource() {
* Resets the iterator so that one can start again from the beginning.
*/
void CanonicalIterator::reset() {
done = FALSE;
done = false;
for (int i = 0; i < current_length; ++i) {
current[i] = 0;
}
@ -151,7 +151,7 @@ UnicodeString CanonicalIterator::next() {
for (i = current_length - 1; ; --i) {
if (i < 0) {
done = TRUE;
done = true;
break;
}
current[i]++;
@ -176,7 +176,7 @@ void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &st
if(U_FAILURE(status)) {
return;
}
done = FALSE;
done = false;
cleanPieces();
@ -208,10 +208,10 @@ void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &st
goto CleanPartialInitialization;
}
// i should initialy be the number of code units at the
// i should initially be the number of code units at the
// start of the string
i = U16_LENGTH(source.char32At(0));
//int32_t i = 1;
// int32_t i = 1;
// find the segments
// This code iterates through the source string and
// extracts segments that end up on a codepoint that
@ -494,7 +494,7 @@ Hashtable *CanonicalIterator::getEquivalents2(Hashtable *fillinResult, const UCh
/**
* See if the decomposition of cp2 is at segment starting at segmentPos
* (with canonical rearrangment!)
* (with canonical rearrangement!)
* If so, take the remainder, and return the equivalents
*/
Hashtable *CanonicalIterator::extract(Hashtable *fillinResult, UChar32 comp, const UChar *segment, int32_t segLen, int32_t segmentPos, UErrorCode &status) {
@ -521,7 +521,7 @@ Hashtable *CanonicalIterator::extract(Hashtable *fillinResult, UChar32 comp, con
int32_t decompLen=decompString.length();
// See if it matches the start of segment (at segmentPos)
UBool ok = FALSE;
UBool ok = false;
UChar32 cp;
int32_t decompPos = 0;
UChar32 decompCp;
@ -537,7 +537,7 @@ Hashtable *CanonicalIterator::extract(Hashtable *fillinResult, UChar32 comp, con
if (decompPos == decompLen) { // done, have all decomp characters!
temp.append(segment+i, segLen-i);
ok = TRUE;
ok = true;
break;
}
U16_NEXT(decomp, decompPos, decompLen, decompCp);

View File

@ -1,13 +0,0 @@
caniter.o caniter.d : caniter.cpp unicode/utypes.h unicode/umachine.h \
unicode/ptypes.h unicode/platform.h unicode/uconfig.h \
unicode/uvernum.h unicode/urename.h unicode/uversion.h \
unicode/caniter.h unicode/uobject.h unicode/unistr.h \
unicode/char16ptr.h unicode/rep.h unicode/std_string.h \
unicode/stringpiece.h unicode/bytestream.h unicode/normalizer2.h \
unicode/uniset.h unicode/ucpmap.h unicode/unifilt.h unicode/unifunct.h \
unicode/unimatch.h unicode/uset.h unicode/uchar.h \
unicode/stringoptions.h unicode/localpointer.h unicode/unorm2.h \
unicode/usetiter.h unicode/ustring.h unicode/putil.h unicode/uiter.h \
unicode/utf16.h unicode/utf.h cmemory.h hash.h uhash.h uelement.h \
normalizer2impl.h unicode/ucptrie.h unicode/utf8.h unicode/unorm.h \
mutex.h umutex.h unicode/uclean.h putilimp.h udataswp.h uset_imp.h

View File

@ -14,6 +14,7 @@
#include "unicode/uscript.h"
#include "unicode/uset.h"
#include "cmemory.h"
#include "emojiprops.h"
#include "mutex.h"
#include "normalizer2impl.h"
#include "uassert.h"
@ -35,11 +36,11 @@ namespace {
UBool U_CALLCONV characterproperties_cleanup();
constexpr int32_t NUM_INCLUSIONS = UPROPS_SRC_COUNT + UCHAR_INT_LIMIT - UCHAR_INT_START;
constexpr int32_t NUM_INCLUSIONS = UPROPS_SRC_COUNT + (UCHAR_INT_LIMIT - UCHAR_INT_START);
struct Inclusion {
UnicodeSet *fSet = nullptr;
UInitOnce fInitOnce = U_INITONCE_INITIALIZER;
UInitOnce fInitOnce {};
};
Inclusion gInclusions[NUM_INCLUSIONS]; // cached getInclusions()
@ -84,7 +85,7 @@ UBool U_CALLCONV characterproperties_cleanup() {
ucptrie_close(reinterpret_cast<UCPTrie *>(maps[i]));
maps[i] = nullptr;
}
return TRUE;
return true;
}
void U_CALLCONV initInclusion(UPropertySource src, UErrorCode &errorCode) {
@ -170,6 +171,13 @@ void U_CALLCONV initInclusion(UPropertySource src, UErrorCode &errorCode) {
case UPROPS_SRC_VO:
uprops_addPropertyStarts((UPropertySource)src, &sa, &errorCode);
break;
case UPROPS_SRC_EMOJI: {
const icu::EmojiProps *ep = icu::EmojiProps::getSingleton(errorCode);
if (U_SUCCESS(errorCode)) {
ep->addPropertyStarts(&sa, errorCode);
}
break;
}
default:
errorCode = U_INTERNAL_PROGRAM_ERROR;
break;
@ -202,7 +210,7 @@ const UnicodeSet *getInclusionsForSource(UPropertySource src, UErrorCode &errorC
void U_CALLCONV initIntPropInclusion(UProperty prop, UErrorCode &errorCode) {
// This function is invoked only via umtx_initOnce().
U_ASSERT(UCHAR_INT_START <= prop && prop < UCHAR_INT_LIMIT);
int32_t inclIndex = UPROPS_SRC_COUNT + prop - UCHAR_INT_START;
int32_t inclIndex = UPROPS_SRC_COUNT + (prop - UCHAR_INT_START);
U_ASSERT(gInclusions[inclIndex].fSet == nullptr);
UPropertySource src = uprops_getSource(prop);
const UnicodeSet *incl = getInclusionsForSource(src, errorCode);
@ -247,7 +255,7 @@ const UnicodeSet *CharacterProperties::getInclusionsForProperty(
UProperty prop, UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) { return nullptr; }
if (UCHAR_INT_START <= prop && prop < UCHAR_INT_LIMIT) {
int32_t inclIndex = UPROPS_SRC_COUNT + prop - UCHAR_INT_START;
int32_t inclIndex = UPROPS_SRC_COUNT + (prop - UCHAR_INT_START);
Inclusion &i = gInclusions[inclIndex];
umtx_initOnce(i.fInitOnce, &initIntPropInclusion, prop, errorCode);
return i.fSet;
@ -268,6 +276,26 @@ UnicodeSet *makeSet(UProperty property, UErrorCode &errorCode) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
return nullptr;
}
if (UCHAR_BASIC_EMOJI <= property && property <= UCHAR_RGI_EMOJI) {
// property of strings
const icu::EmojiProps *ep = icu::EmojiProps::getSingleton(errorCode);
if (U_FAILURE(errorCode)) { return nullptr; }
USetAdder sa = {
(USet *)set.getAlias(),
_set_add,
_set_addRange,
_set_addString,
nullptr, // don't need remove()
nullptr // don't need removeRange()
};
ep->addStrings(&sa, property, errorCode);
if (property != UCHAR_BASIC_EMOJI && property != UCHAR_RGI_EMOJI) {
// property of _only_ strings
set->freeze();
return set.orphan();
}
}
const UnicodeSet *inclusions =
icu::CharacterProperties::getInclusionsForProperty(property, errorCode);
if (U_FAILURE(errorCode)) { return nullptr; }

View File

@ -1,14 +0,0 @@
characterproperties.o characterproperties.d : characterproperties.cpp unicode/utypes.h \
unicode/umachine.h unicode/ptypes.h unicode/platform.h \
unicode/uconfig.h unicode/uvernum.h unicode/urename.h \
unicode/uversion.h unicode/localpointer.h unicode/uchar.h \
unicode/stringoptions.h unicode/ucpmap.h unicode/ucptrie.h \
unicode/utf8.h unicode/utf.h unicode/umutablecptrie.h unicode/uniset.h \
unicode/unifilt.h unicode/unifunct.h unicode/uobject.h \
unicode/unimatch.h unicode/unistr.h unicode/char16ptr.h unicode/rep.h \
unicode/std_string.h unicode/stringpiece.h unicode/bytestream.h \
unicode/uset.h unicode/uscript.h cmemory.h mutex.h umutex.h \
unicode/uclean.h putilimp.h unicode/putil.h normalizer2impl.h \
unicode/normalizer2.h unicode/unorm2.h unicode/unorm.h unicode/uiter.h \
unicode/utf16.h udataswp.h uset_imp.h uassert.h ubidi_props.h ucase.h \
utrie2.h ucln_cmn.h ucln.h uprops.h

View File

@ -1,6 +0,0 @@
chariter.o chariter.d : chariter.cpp unicode/chariter.h unicode/utypes.h \
unicode/umachine.h unicode/ptypes.h unicode/platform.h \
unicode/uconfig.h unicode/uvernum.h unicode/urename.h \
unicode/uversion.h unicode/uobject.h unicode/unistr.h \
unicode/char16ptr.h unicode/rep.h unicode/std_string.h \
unicode/stringpiece.h unicode/bytestream.h

View File

@ -14,6 +14,8 @@
* created by: Markus W. Scherer
*/
#include <cstdlib>
#include "unicode/utypes.h"
#include "unicode/putil.h"
#include "charstr.h"
@ -141,6 +143,38 @@ CharString &CharString::append(const char *s, int32_t sLength, UErrorCode &error
return *this;
}
CharString &CharString::appendNumber(int32_t number, UErrorCode &status) {
if (number < 0) {
this->append('-', status);
if (U_FAILURE(status)) {
return *this;
}
}
if (number == 0) {
this->append('0', status);
return *this;
}
int32_t numLen = 0;
while (number != 0) {
int32_t residue = number % 10;
number /= 10;
this->append(std::abs(residue) + '0', status);
numLen++;
if (U_FAILURE(status)) {
return *this;
}
}
int32_t start = this->length() - numLen, end = this->length() - 1;
while(start < end) {
std::swap(this->data()[start++], this->data()[end--]);
}
return *this;
}
char *CharString::getAppendBuffer(int32_t minCapacity,
int32_t desiredCapacityHint,
int32_t &resultCapacity,
@ -186,7 +220,7 @@ UBool CharString::ensureCapacity(int32_t capacity,
int32_t desiredCapacityHint,
UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) {
return FALSE;
return false;
}
if(capacity>buffer.getCapacity()) {
if(desiredCapacityHint==0) {
@ -196,10 +230,10 @@ UBool CharString::ensureCapacity(int32_t capacity,
buffer.resize(capacity, len+1)==NULL
) {
errorCode=U_MEMORY_ALLOCATION_ERROR;
return FALSE;
return false;
}
}
return TRUE;
return true;
}
CharString &CharString::appendPathPart(StringPiece s, UErrorCode &errorCode) {

View File

@ -1,7 +0,0 @@
charstr.o charstr.d : charstr.cpp unicode/utypes.h unicode/umachine.h \
unicode/ptypes.h unicode/platform.h unicode/uconfig.h \
unicode/uvernum.h unicode/urename.h unicode/uversion.h unicode/putil.h \
charstr.h unicode/unistr.h unicode/char16ptr.h unicode/rep.h \
unicode/uobject.h unicode/std_string.h unicode/stringpiece.h \
unicode/bytestream.h cmemory.h unicode/localpointer.h cstring.h \
uinvchar.h ustr_imp.h unicode/utf8.h unicode/utf.h

View File

@ -127,6 +127,9 @@ public:
return append(s.data(), s.length(), errorCode);
}
CharString &append(const char *s, int32_t sLength, UErrorCode &status);
CharString &appendNumber(int32_t number, UErrorCode &status);
/**
* Returns a writable buffer for appending and writes the buffer's capacity to
* resultCapacity. Guarantees resultCapacity>=minCapacity if U_SUCCESS().
@ -174,8 +177,8 @@ private:
UBool ensureCapacity(int32_t capacity, int32_t desiredCapacityHint, UErrorCode &errorCode);
CharString(const CharString &other); // forbid copying of this class
CharString &operator=(const CharString &other); // forbid copying of this class
CharString(const CharString &other) = delete; // forbid copying of this class
CharString &operator=(const CharString &other) = delete; // forbid copying of this class
/**
* Returns U_FILE_ALT_SEP_CHAR if found in string, and U_FILE_SEP_CHAR is not found.

View File

@ -134,5 +134,5 @@ U_CFUNC UBool cmemory_cleanup(void) {
pAlloc = NULL;
pRealloc = NULL;
pFree = NULL;
return TRUE;
return true;
}

View File

@ -1,5 +0,0 @@
cmemory.o cmemory.d : cmemory.cpp unicode/uclean.h unicode/utypes.h \
unicode/umachine.h unicode/ptypes.h unicode/platform.h \
unicode/uconfig.h unicode/uvernum.h unicode/urename.h \
unicode/uversion.h cmemory.h unicode/localpointer.h unicode/uobject.h \
putilimp.h unicode/putil.h uassert.h

View File

@ -31,14 +31,63 @@
#include <stddef.h>
#include <string.h>
#include "unicode/localpointer.h"
#include "uassert.h"
#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
#include <stdio.h>
#endif
#define uprv_memcpy(dst, src, size) U_STANDARD_CPP_NAMESPACE memcpy(dst, src, size)
#define uprv_memmove(dst, src, size) U_STANDARD_CPP_NAMESPACE memmove(dst, src, size)
// uprv_memcpy and uprv_memmove
#if defined(__clang__)
#define uprv_memcpy(dst, src, size) UPRV_BLOCK_MACRO_BEGIN { \
/* Suppress warnings about addresses that will never be NULL */ \
_Pragma("clang diagnostic push") \
_Pragma("clang diagnostic ignored \"-Waddress\"") \
U_ASSERT(dst != NULL); \
U_ASSERT(src != NULL); \
_Pragma("clang diagnostic pop") \
U_STANDARD_CPP_NAMESPACE memcpy(dst, src, size); \
} UPRV_BLOCK_MACRO_END
#define uprv_memmove(dst, src, size) UPRV_BLOCK_MACRO_BEGIN { \
/* Suppress warnings about addresses that will never be NULL */ \
_Pragma("clang diagnostic push") \
_Pragma("clang diagnostic ignored \"-Waddress\"") \
U_ASSERT(dst != NULL); \
U_ASSERT(src != NULL); \
_Pragma("clang diagnostic pop") \
U_STANDARD_CPP_NAMESPACE memmove(dst, src, size); \
} UPRV_BLOCK_MACRO_END
#elif defined(__GNUC__)
#define uprv_memcpy(dst, src, size) UPRV_BLOCK_MACRO_BEGIN { \
/* Suppress warnings about addresses that will never be NULL */ \
_Pragma("GCC diagnostic push") \
_Pragma("GCC diagnostic ignored \"-Waddress\"") \
U_ASSERT(dst != NULL); \
U_ASSERT(src != NULL); \
_Pragma("GCC diagnostic pop") \
U_STANDARD_CPP_NAMESPACE memcpy(dst, src, size); \
} UPRV_BLOCK_MACRO_END
#define uprv_memmove(dst, src, size) UPRV_BLOCK_MACRO_BEGIN { \
/* Suppress warnings about addresses that will never be NULL */ \
_Pragma("GCC diagnostic push") \
_Pragma("GCC diagnostic ignored \"-Waddress\"") \
U_ASSERT(dst != NULL); \
U_ASSERT(src != NULL); \
_Pragma("GCC diagnostic pop") \
U_STANDARD_CPP_NAMESPACE memmove(dst, src, size); \
} UPRV_BLOCK_MACRO_END
#else
#define uprv_memcpy(dst, src, size) UPRV_BLOCK_MACRO_BEGIN { \
U_ASSERT(dst != NULL); \
U_ASSERT(src != NULL); \
U_STANDARD_CPP_NAMESPACE memcpy(dst, src, size); \
} UPRV_BLOCK_MACRO_END
#define uprv_memmove(dst, src, size) UPRV_BLOCK_MACRO_BEGIN { \
U_ASSERT(dst != NULL); \
U_ASSERT(src != NULL); \
U_STANDARD_CPP_NAMESPACE memmove(dst, src, size); \
} UPRV_BLOCK_MACRO_END
#endif
/**
* \def UPRV_LENGTHOF

View File

@ -58,7 +58,7 @@
<RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
</ClCompile>
<Link>
<OutputFile>..\..\$(IcuBinOutputDir)\icuuc68d.dll</OutputFile>
<OutputFile>..\..\$(IcuBinOutputDir)\icuuc$(IcuMajorVersion)d.dll</OutputFile>
<ProgramDatabaseFile>.\..\..\$(IcuLibOutputDir)\icuucd.pdb</ProgramDatabaseFile>
<ImportLibrary>..\..\$(IcuLibOutputDir)\icuucd.lib</ImportLibrary>
</Link>
@ -70,7 +70,7 @@
<FunctionLevelLinking>true</FunctionLevelLinking>
</ClCompile>
<Link>
<OutputFile>..\..\$(IcuBinOutputDir)\icuuc68.dll</OutputFile>
<OutputFile>..\..\$(IcuBinOutputDir)\icuuc$(IcuMajorVersion).dll</OutputFile>
<ProgramDatabaseFile>.\..\..\$(IcuLibOutputDir)\icuuc.pdb</ProgramDatabaseFile>
<ImportLibrary>..\..\$(IcuLibOutputDir)\icuuc.lib</ImportLibrary>
</Link>
@ -87,6 +87,7 @@
<ClCompile Include="brkeng.cpp" />
<ClCompile Include="brkiter.cpp" />
<ClCompile Include="dictbe.cpp" />
<ClCompile Include="lstmbe.cpp" />
<ClCompile Include="pluralmap.cpp" />
<ClCompile Include="rbbi.cpp" />
<ClCompile Include="rbbidata.cpp" />
@ -203,6 +204,7 @@
<ClCompile Include="ucase.cpp" />
<ClCompile Include="uchar.cpp" />
<ClCompile Include="characterproperties.cpp" />
<ClCompile Include="emojiprops.cpp" />
<ClCompile Include="unames.cpp" />
<ClCompile Include="unifiedcache.cpp" />
<ClCompile Include="unifilt.cpp" />
@ -279,6 +281,7 @@
<ClInclude Include="ubidiimp.h" />
<ClInclude Include="brkeng.h" />
<ClInclude Include="dictbe.h" />
<ClInclude Include="lstmbe.h" />
<ClInclude Include="rbbidata.h" />
<ClInclude Include="rbbinode.h" />
<ClInclude Include="rbbirb.h" />
@ -363,6 +366,7 @@
<ClInclude Include="patternprops.h" />
<ClInclude Include="propname.h" />
<ClInclude Include="ruleiter.h" />
<ClInclude Include="emojiprops.h" />
<ClInclude Include="ucase.h" />
<ClInclude Include="ulayout_props.h" />
<ClInclude Include="unisetspan.h" />

View File

@ -73,6 +73,9 @@
<ClCompile Include="dictbe.cpp">
<Filter>break iteration</Filter>
</ClCompile>
<ClCompile Include="lstmbe.cpp">
<Filter>break iteration</Filter>
</ClCompile>
<ClCompile Include="rbbi.cpp">
<Filter>break iteration</Filter>
</ClCompile>
@ -409,6 +412,9 @@
<ClCompile Include="characterproperties.cpp">
<Filter>properties &amp; sets</Filter>
</ClCompile>
<ClCompile Include="emojiprops.cpp">
<Filter>properties &amp; sets</Filter>
</ClCompile>
<ClCompile Include="propname.cpp">
<Filter>properties &amp; sets</Filter>
</ClCompile>
@ -651,6 +657,9 @@
<ClInclude Include="dictbe.h">
<Filter>break iteration</Filter>
</ClInclude>
<ClInclude Include="lstmbe.h">
<Filter>break iteration</Filter>
</ClInclude>
<ClInclude Include="rbbidata.h">
<Filter>break iteration</Filter>
</ClInclude>
@ -888,6 +897,9 @@
<ClInclude Include="ruleiter.h">
<Filter>properties &amp; sets</Filter>
</ClInclude>
<ClInclude Include="emojiprops.h">
<Filter>properties &amp; sets</Filter>
</ClInclude>
<ClInclude Include="ucase.h">
<Filter>properties &amp; sets</Filter>
</ClInclude>

View File

@ -125,7 +125,7 @@
<Link>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<AdditionalDependencies>vccorlib.lib;msvcrt.lib;vcruntime.lib;%(AdditionalDependencies)</AdditionalDependencies>
<OutputFile>..\..\$(IcuBinOutputDir)\icuuc68.dll</OutputFile>
<OutputFile>..\..\$(IcuBinOutputDir)\icuuc$(IcuMajorVersion).dll</OutputFile>
<ProgramDatabaseFile>.\..\..\$(IcuLibOutputDir)\icuuc.pdb</ProgramDatabaseFile>
<ImportLibrary>..\..\$(IcuLibOutputDir)\icuuc.lib</ImportLibrary>
</Link>
@ -148,7 +148,7 @@
<Link>
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalDependencies>vccorlibd.lib;msvcrtd.lib;vcruntimed.lib;%(AdditionalDependencies)</AdditionalDependencies>
<OutputFile>..\..\$(IcuBinOutputDir)\icuuc68d.dll</OutputFile>
<OutputFile>..\..\$(IcuBinOutputDir)\icuuc$(IcuMajorVersion)d.dll</OutputFile>
<ProgramDatabaseFile>.\..\..\$(IcuLibOutputDir)\icuucd.pdb</ProgramDatabaseFile>
<ImportLibrary>..\..\$(IcuLibOutputDir)\icuucd.lib</ImportLibrary>
</Link>
@ -221,6 +221,7 @@
<ClCompile Include="brkeng.cpp" />
<ClCompile Include="brkiter.cpp" />
<ClCompile Include="dictbe.cpp" />
<ClCompile Include="lstmbe.cpp" />
<ClCompile Include="pluralmap.cpp" />
<ClCompile Include="rbbi.cpp" />
<ClCompile Include="rbbidata.cpp" />
@ -337,6 +338,7 @@
<ClCompile Include="ucase.cpp" />
<ClCompile Include="uchar.cpp" />
<ClCompile Include="characterproperties.cpp" />
<ClCompile Include="emojiprops.cpp" />
<ClCompile Include="unames.cpp" />
<ClCompile Include="unifiedcache.cpp" />
<ClCompile Include="unifilt.cpp" />
@ -414,6 +416,7 @@
<ClInclude Include="ubidiimp.h" />
<ClInclude Include="brkeng.h" />
<ClInclude Include="dictbe.h" />
<ClInclude Include="lstmbe.h" />
<ClInclude Include="rbbidata.h" />
<ClInclude Include="rbbinode.h" />
<ClInclude Include="rbbirb.h" />
@ -498,6 +501,7 @@
<ClInclude Include="patternprops.h" />
<ClInclude Include="propname.h" />
<ClInclude Include="ruleiter.h" />
<ClInclude Include="emojiprops.h" />
<ClInclude Include="ucase.h" />
<ClInclude Include="ulayout_props.h" />
<ClInclude Include="unisetspan.h" />

View File

@ -1,6 +0,0 @@
cstr.o cstr.d : cstr.cpp unicode/utypes.h unicode/umachine.h unicode/ptypes.h \
unicode/platform.h unicode/uconfig.h unicode/uvernum.h \
unicode/urename.h unicode/uversion.h unicode/putil.h unicode/unistr.h \
unicode/char16ptr.h unicode/rep.h unicode/uobject.h \
unicode/std_string.h unicode/stringpiece.h unicode/bytestream.h cstr.h \
charstr.h cmemory.h unicode/localpointer.h uinvchar.h

View File

@ -28,7 +28,7 @@
* default code page conversion, which will do the best job possible,
* but may be lossy, depending on the platform.
*
* If no other conversion is available, use invariant conversion and substitue
* If no other conversion is available, use invariant conversion and substitute
* '?' for non-invariant characters.
*
* Example Usage:
@ -51,8 +51,8 @@ class U_COMMON_API CStr : public UMemory {
private:
CharString s;
CStr(const CStr &other); // Forbid copying of this class.
CStr &operator =(const CStr &other); // Forbid assignment.
CStr(const CStr &other) = delete; // Forbid copying of this class.
CStr &operator =(const CStr &other) = delete; // Forbid assignment.
};
U_NAMESPACE_END

View File

@ -1,4 +0,0 @@
cstring.o cstring.d : cstring.cpp unicode/utypes.h unicode/umachine.h \
unicode/ptypes.h unicode/platform.h unicode/uconfig.h \
unicode/uvernum.h unicode/urename.h unicode/uversion.h cmemory.h \
unicode/localpointer.h unicode/uobject.h cstring.h uassert.h

View File

@ -1,3 +0,0 @@
cwchar.o cwchar.d : cwchar.cpp unicode/utypes.h unicode/umachine.h unicode/ptypes.h \
unicode/platform.h unicode/uconfig.h unicode/uvernum.h \
unicode/urename.h unicode/uversion.h

View File

@ -17,7 +17,10 @@
#include "dictbe.h"
#include "unicode/uniset.h"
#include "unicode/chariter.h"
#include "unicode/resbund.h"
#include "unicode/ubrk.h"
#include "unicode/usetiter.h"
#include "ubrkimpl.h"
#include "utracimp.h"
#include "uvectr32.h"
#include "uvector.h"
@ -47,7 +50,10 @@ int32_t
DictionaryBreakEngine::findBreaks( UText *text,
int32_t startPos,
int32_t endPos,
UVector32 &foundBreaks ) const {
UVector32 &foundBreaks,
UBool isPhraseBreaking,
UErrorCode& status) const {
if (U_FAILURE(status)) return 0;
(void)startPos; // TODO: remove this param?
int32_t result = 0;
@ -66,7 +72,7 @@ DictionaryBreakEngine::findBreaks( UText *text,
}
rangeStart = start;
rangeEnd = current;
result = divideUpDictionaryRange(text, rangeStart, rangeEnd, foundBreaks);
result = divideUpDictionaryRange(text, rangeStart, rangeEnd, foundBreaks, isPhraseBreaking, status);
utext_setNativeIndex(text, current);
return result;
@ -113,7 +119,7 @@ public:
// Select the currently marked candidate, point after it in the text, and invalidate self
int32_t acceptMarked( UText *text );
// Back up from the current candidate to the next shorter one; return TRUE if that exists
// Back up from the current candidate to the next shorter one; return true if that exists
// and point the text after it
UBool backUp( UText *text );
@ -159,9 +165,9 @@ UBool
PossibleWord::backUp( UText *text ) {
if (current > 0) {
utext_setNativeIndex(text, offset + cuLengths[--current]);
return TRUE;
return true;
}
return FALSE;
return false;
}
/*
@ -179,7 +185,7 @@ static const int32_t THAI_ROOT_COMBINE_THRESHOLD = 3;
// dictionary word, with a preceding word
static const int32_t THAI_PREFIX_COMBINE_THRESHOLD = 3;
// Ellision character
// Elision character
static const int32_t THAI_PAIYANNOI = 0x0E2F;
// Repeat character
@ -197,13 +203,13 @@ ThaiBreakEngine::ThaiBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode
{
UTRACE_ENTRY(UTRACE_UBRK_CREATE_BREAK_ENGINE);
UTRACE_DATA1(UTRACE_INFO, "dictbe=%s", "Thai");
fThaiWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Thai:]&[:LineBreak=SA:]]"), status);
UnicodeSet thaiWordSet(UnicodeString(u"[[:Thai:]&[:LineBreak=SA:]]"), status);
if (U_SUCCESS(status)) {
setCharacters(fThaiWordSet);
setCharacters(thaiWordSet);
}
fMarkSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Thai:]&[:LineBreak=SA:]&[:M:]]"), status);
fMarkSet.applyPattern(UnicodeString(u"[[:Thai:]&[:LineBreak=SA:]&[:M:]]"), status);
fMarkSet.add(0x0020);
fEndWordSet = fThaiWordSet;
fEndWordSet = thaiWordSet;
fEndWordSet.remove(0x0E31); // MAI HAN-AKAT
fEndWordSet.remove(0x0E40, 0x0E44); // SARA E through SARA AI MAIMALAI
fBeginWordSet.add(0x0E01, 0x0E2E); // KO KAI through HO NOKHUK
@ -227,7 +233,10 @@ int32_t
ThaiBreakEngine::divideUpDictionaryRange( UText *text,
int32_t rangeStart,
int32_t rangeEnd,
UVector32 &foundBreaks ) const {
UVector32 &foundBreaks,
UBool /* isPhraseBreaking */,
UErrorCode& status) const {
if (U_FAILURE(status)) return 0;
utext_setNativeIndex(text, rangeStart);
utext_moveIndex32(text, THAI_MIN_WORD_SPAN);
if (utext_getNativeIndex(text) >= rangeEnd) {
@ -240,7 +249,6 @@ ThaiBreakEngine::divideUpDictionaryRange( UText *text,
int32_t cpWordLength = 0; // Word Length in Code Points.
int32_t cuWordLength = 0; // Word length in code units (UText native indexing)
int32_t current;
UErrorCode status = U_ZERO_ERROR;
PossibleWord words[THAI_LOOKAHEAD];
utext_setNativeIndex(text, rangeStart);
@ -265,13 +273,9 @@ ThaiBreakEngine::divideUpDictionaryRange( UText *text,
goto foundBest;
}
do {
int32_t wordsMatched = 1;
if (words[(wordsFound + 1) % THAI_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) > 0) {
if (wordsMatched < 2) {
// Followed by another dictionary word; mark first word as a good candidate
words[wordsFound%THAI_LOOKAHEAD].markCurrent();
wordsMatched = 2;
}
// Followed by another dictionary word; mark first word as a good candidate
words[wordsFound%THAI_LOOKAHEAD].markCurrent();
// If we're already at the end of the range, we're done
if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) {
@ -442,13 +446,13 @@ LaoBreakEngine::LaoBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &s
{
UTRACE_ENTRY(UTRACE_UBRK_CREATE_BREAK_ENGINE);
UTRACE_DATA1(UTRACE_INFO, "dictbe=%s", "Laoo");
fLaoWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Laoo:]&[:LineBreak=SA:]]"), status);
UnicodeSet laoWordSet(UnicodeString(u"[[:Laoo:]&[:LineBreak=SA:]]"), status);
if (U_SUCCESS(status)) {
setCharacters(fLaoWordSet);
setCharacters(laoWordSet);
}
fMarkSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Laoo:]&[:LineBreak=SA:]&[:M:]]"), status);
fMarkSet.applyPattern(UnicodeString(u"[[:Laoo:]&[:LineBreak=SA:]&[:M:]]"), status);
fMarkSet.add(0x0020);
fEndWordSet = fLaoWordSet;
fEndWordSet = laoWordSet;
fEndWordSet.remove(0x0EC0, 0x0EC4); // prefix vowels
fBeginWordSet.add(0x0E81, 0x0EAE); // basic consonants (including holes for corresponding Thai characters)
fBeginWordSet.add(0x0EDC, 0x0EDD); // digraph consonants (no Thai equivalent)
@ -469,7 +473,10 @@ int32_t
LaoBreakEngine::divideUpDictionaryRange( UText *text,
int32_t rangeStart,
int32_t rangeEnd,
UVector32 &foundBreaks ) const {
UVector32 &foundBreaks,
UBool /* isPhraseBreaking */,
UErrorCode& status) const {
if (U_FAILURE(status)) return 0;
if ((rangeEnd - rangeStart) < LAO_MIN_WORD_SPAN) {
return 0; // Not enough characters for two words
}
@ -478,11 +485,10 @@ LaoBreakEngine::divideUpDictionaryRange( UText *text,
int32_t cpWordLength = 0;
int32_t cuWordLength = 0;
int32_t current;
UErrorCode status = U_ZERO_ERROR;
PossibleWord words[LAO_LOOKAHEAD];
utext_setNativeIndex(text, rangeStart);
while (U_SUCCESS(status) && (current = (int32_t)utext_getNativeIndex(text)) < rangeEnd) {
cuWordLength = 0;
cpWordLength = 0;
@ -503,13 +509,9 @@ LaoBreakEngine::divideUpDictionaryRange( UText *text,
goto foundBest;
}
do {
int32_t wordsMatched = 1;
if (words[(wordsFound + 1) % LAO_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) > 0) {
if (wordsMatched < 2) {
// Followed by another dictionary word; mark first word as a good candidate
words[wordsFound%LAO_LOOKAHEAD].markCurrent();
wordsMatched = 2;
}
// Followed by another dictionary word; mark first word as a good candidate
words[wordsFound%LAO_LOOKAHEAD].markCurrent();
// If we're already at the end of the range, we're done
if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) {
@ -535,7 +537,7 @@ foundBest:
}
// We come here after having either found a word or not. We look ahead to the
// next word. If it's not a dictionary word, we will combine it withe the word we
// next word. If it's not a dictionary word, we will combine it with the word we
// just found (if there is one), but only if the preceding word does not exceed
// the threshold.
// The text iterator should now be positioned at the end of the word we found.
@ -641,14 +643,13 @@ BurmeseBreakEngine::BurmeseBreakEngine(DictionaryMatcher *adoptDictionary, UErro
{
UTRACE_ENTRY(UTRACE_UBRK_CREATE_BREAK_ENGINE);
UTRACE_DATA1(UTRACE_INFO, "dictbe=%s", "Mymr");
fBurmeseWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Mymr:]&[:LineBreak=SA:]]"), status);
if (U_SUCCESS(status)) {
setCharacters(fBurmeseWordSet);
}
fMarkSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Mymr:]&[:LineBreak=SA:]&[:M:]]"), status);
fMarkSet.add(0x0020);
fEndWordSet = fBurmeseWordSet;
fBeginWordSet.add(0x1000, 0x102A); // basic consonants and independent vowels
fEndWordSet.applyPattern(UnicodeString(u"[[:Mymr:]&[:LineBreak=SA:]]"), status);
fMarkSet.applyPattern(UnicodeString(u"[[:Mymr:]&[:LineBreak=SA:]&[:M:]]"), status);
fMarkSet.add(0x0020);
if (U_SUCCESS(status)) {
setCharacters(fEndWordSet);
}
// Compact for caching.
fMarkSet.compact();
@ -665,7 +666,10 @@ int32_t
BurmeseBreakEngine::divideUpDictionaryRange( UText *text,
int32_t rangeStart,
int32_t rangeEnd,
UVector32 &foundBreaks ) const {
UVector32 &foundBreaks,
UBool /* isPhraseBreaking */,
UErrorCode& status ) const {
if (U_FAILURE(status)) return 0;
if ((rangeEnd - rangeStart) < BURMESE_MIN_WORD_SPAN) {
return 0; // Not enough characters for two words
}
@ -674,11 +678,10 @@ BurmeseBreakEngine::divideUpDictionaryRange( UText *text,
int32_t cpWordLength = 0;
int32_t cuWordLength = 0;
int32_t current;
UErrorCode status = U_ZERO_ERROR;
PossibleWord words[BURMESE_LOOKAHEAD];
utext_setNativeIndex(text, rangeStart);
while (U_SUCCESS(status) && (current = (int32_t)utext_getNativeIndex(text)) < rangeEnd) {
cuWordLength = 0;
cpWordLength = 0;
@ -699,13 +702,9 @@ BurmeseBreakEngine::divideUpDictionaryRange( UText *text,
goto foundBest;
}
do {
int32_t wordsMatched = 1;
if (words[(wordsFound + 1) % BURMESE_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) > 0) {
if (wordsMatched < 2) {
// Followed by another dictionary word; mark first word as a good candidate
words[wordsFound%BURMESE_LOOKAHEAD].markCurrent();
wordsMatched = 2;
}
// Followed by another dictionary word; mark first word as a good candidate
words[wordsFound%BURMESE_LOOKAHEAD].markCurrent();
// If we're already at the end of the range, we're done
if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) {
@ -731,7 +730,7 @@ foundBest:
}
// We come here after having either found a word or not. We look ahead to the
// next word. If it's not a dictionary word, we will combine it withe the word we
// next word. If it's not a dictionary word, we will combine it with the word we
// just found (if there is one), but only if the preceding word does not exceed
// the threshold.
// The text iterator should now be positioned at the end of the word we found.
@ -837,13 +836,13 @@ KhmerBreakEngine::KhmerBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCod
{
UTRACE_ENTRY(UTRACE_UBRK_CREATE_BREAK_ENGINE);
UTRACE_DATA1(UTRACE_INFO, "dictbe=%s", "Khmr");
fKhmerWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Khmr:]&[:LineBreak=SA:]]"), status);
UnicodeSet khmerWordSet(UnicodeString(u"[[:Khmr:]&[:LineBreak=SA:]]"), status);
if (U_SUCCESS(status)) {
setCharacters(fKhmerWordSet);
setCharacters(khmerWordSet);
}
fMarkSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Khmr:]&[:LineBreak=SA:]&[:M:]]"), status);
fMarkSet.applyPattern(UnicodeString(u"[[:Khmr:]&[:LineBreak=SA:]&[:M:]]"), status);
fMarkSet.add(0x0020);
fEndWordSet = fKhmerWordSet;
fEndWordSet = khmerWordSet;
fBeginWordSet.add(0x1780, 0x17B3);
//fBeginWordSet.add(0x17A3, 0x17A4); // deprecated vowels
//fEndWordSet.remove(0x17A5, 0x17A9); // Khmer independent vowels that can't end a word
@ -873,7 +872,10 @@ int32_t
KhmerBreakEngine::divideUpDictionaryRange( UText *text,
int32_t rangeStart,
int32_t rangeEnd,
UVector32 &foundBreaks ) const {
UVector32 &foundBreaks,
UBool /* isPhraseBreaking */,
UErrorCode& status ) const {
if (U_FAILURE(status)) return 0;
if ((rangeEnd - rangeStart) < KHMER_MIN_WORD_SPAN) {
return 0; // Not enough characters for two words
}
@ -882,7 +884,6 @@ KhmerBreakEngine::divideUpDictionaryRange( UText *text,
int32_t cpWordLength = 0;
int32_t cuWordLength = 0;
int32_t current;
UErrorCode status = U_ZERO_ERROR;
PossibleWord words[KHMER_LOOKAHEAD];
utext_setNativeIndex(text, rangeStart);
@ -908,13 +909,9 @@ KhmerBreakEngine::divideUpDictionaryRange( UText *text,
goto foundBest;
}
do {
int32_t wordsMatched = 1;
if (words[(wordsFound + 1) % KHMER_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) > 0) {
if (wordsMatched < 2) {
// Followed by another dictionary word; mark first word as a good candidate
words[wordsFound % KHMER_LOOKAHEAD].markCurrent();
wordsMatched = 2;
}
// Followed by another dictionary word; mark first word as a good candidate
words[wordsFound % KHMER_LOOKAHEAD].markCurrent();
// If we're already at the end of the range, we're done
if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) {
@ -1060,25 +1057,27 @@ CjkBreakEngine::CjkBreakEngine(DictionaryMatcher *adoptDictionary, LanguageType
: DictionaryBreakEngine(), fDictionary(adoptDictionary) {
UTRACE_ENTRY(UTRACE_UBRK_CREATE_BREAK_ENGINE);
UTRACE_DATA1(UTRACE_INFO, "dictbe=%s", "Hani");
// Korean dictionary only includes Hangul syllables
fHangulWordSet.applyPattern(UNICODE_STRING_SIMPLE("[\\uac00-\\ud7a3]"), status);
fHanWordSet.applyPattern(UNICODE_STRING_SIMPLE("[:Han:]"), status);
fKatakanaWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Katakana:]\\uff9e\\uff9f]"), status);
fHiraganaWordSet.applyPattern(UNICODE_STRING_SIMPLE("[:Hiragana:]"), status);
nfkcNorm2 = Normalizer2::getNFKCInstance(status);
// Korean dictionary only includes Hangul syllables
fHangulWordSet.applyPattern(UnicodeString(u"[\\uac00-\\ud7a3]"), status);
fHangulWordSet.compact();
// Digits, open puncutation and Alphabetic characters.
fDigitOrOpenPunctuationOrAlphabetSet.applyPattern(
UnicodeString(u"[[:Nd:][:Pi:][:Ps:][:Alphabetic:]]"), status);
fDigitOrOpenPunctuationOrAlphabetSet.compact();
fClosePunctuationSet.applyPattern(UnicodeString(u"[[:Pc:][:Pd:][:Pe:][:Pf:][:Po:]]"), status);
fClosePunctuationSet.compact();
if (U_SUCCESS(status)) {
// handle Korean and Japanese/Chinese using different dictionaries
if (type == kKorean) {
// handle Korean and Japanese/Chinese using different dictionaries
if (type == kKorean) {
if (U_SUCCESS(status)) {
setCharacters(fHangulWordSet);
} else { //Chinese and Japanese
UnicodeSet cjSet;
cjSet.addAll(fHanWordSet);
cjSet.addAll(fKatakanaWordSet);
cjSet.addAll(fHiraganaWordSet);
cjSet.add(0xFF70); // HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
cjSet.add(0x30FC); // KATAKANA-HIRAGANA PROLONGED SOUND MARK
}
} else { //Chinese and Japanese
UnicodeSet cjSet(UnicodeString(u"[[:Han:][:Hiragana:][:Katakana:]\\u30fc\\uff70\\uff9e\\uff9f]"), status);
if (U_SUCCESS(status)) {
setCharacters(cjSet);
initJapanesePhraseParameter(status);
}
}
UTRACE_EXIT_STATUS(status);
@ -1106,14 +1105,12 @@ static inline bool isKatakana(UChar32 value) {
(value >= 0xFF66 && value <= 0xFF9f);
}
// Function for accessing internal utext flags.
// Replicates an internal UText function.
static inline int32_t utext_i32_flag(int32_t bitIndex) {
return (int32_t)1 << bitIndex;
}
/*
* @param text A UText representing the text
@ -1126,7 +1123,10 @@ int32_t
CjkBreakEngine::divideUpDictionaryRange( UText *inText,
int32_t rangeStart,
int32_t rangeEnd,
UVector32 &foundBreaks ) const {
UVector32 &foundBreaks,
UBool isPhraseBreaking,
UErrorCode& status) const {
if (U_FAILURE(status)) return 0;
if (rangeStart >= rangeEnd) {
return 0;
}
@ -1138,9 +1138,6 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,
// If NULL then mapping is 1:1
LocalPointer<UVector32> inputMap;
UErrorCode status = U_ZERO_ERROR;
// if UText has the input string as one contiguous UTF-16 chunk
if ((inText->providerProperties & utext_i32_flag(UTEXT_PROVIDER_STABLE_CHUNKS)) &&
inText->chunkNativeStart <= rangeStart &&
@ -1149,7 +1146,7 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,
// Input UText is in one contiguous UTF-16 chunk.
// Use Read-only aliasing UnicodeString.
inString.setTo(FALSE,
inString.setTo(false,
inText->chunkContents + rangeStart - inText->chunkNativeStart,
rangeEnd - rangeStart);
} else {
@ -1358,6 +1355,31 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,
if ((uint32_t)bestSnlp.elementAti(numCodePts) == kuint32max) {
t_boundary.addElement(numCodePts, status);
numBreaks++;
} else if (isPhraseBreaking) {
t_boundary.addElement(numCodePts, status);
if(U_SUCCESS(status)) {
numBreaks++;
int32_t prevIdx = numCodePts;
int32_t codeUnitIdx = -1;
int32_t prevCodeUnitIdx = -1;
int32_t length = -1;
for (int32_t i = prev.elementAti(numCodePts); i > 0; i = prev.elementAti(i)) {
codeUnitIdx = inString.moveIndex32(0, i);
prevCodeUnitIdx = inString.moveIndex32(0, prevIdx);
// Calculate the length by using the code unit.
length = prevCodeUnitIdx - codeUnitIdx;
prevIdx = i;
// Keep the breakpoint if the pattern is not in the fSkipSet and continuous Katakana
// characters don't occur.
if (!fSkipSet.containsKey(inString.tempSubString(codeUnitIdx, length))
&& (!isKatakana(inString.char32At(inString.moveIndex32(codeUnitIdx, -1)))
|| !isKatakana(inString.char32At(codeUnitIdx)))) {
t_boundary.addElement(i, status);
numBreaks++;
}
}
}
} else {
for (int32_t i = numCodePts; i > 0; i = prev.elementAti(i)) {
t_boundary.addElement(i, status);
@ -1378,7 +1400,8 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,
// while reversing t_boundary and pushing values to foundBreaks.
int32_t prevCPPos = -1;
int32_t prevUTextPos = -1;
for (int32_t i = numBreaks-1; i >= 0; i--) {
int32_t correctedNumBreaks = 0;
for (int32_t i = numBreaks - 1; i >= 0; i--) {
int32_t cpPos = t_boundary.elementAti(i);
U_ASSERT(cpPos > prevCPPos);
int32_t utextPos = inputMap.isValid() ? inputMap->elementAti(cpPos) : cpPos + rangeStart;
@ -1386,7 +1409,15 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,
if (utextPos > prevUTextPos) {
// Boundaries are added to foundBreaks output in ascending order.
U_ASSERT(foundBreaks.size() == 0 || foundBreaks.peeki() < utextPos);
foundBreaks.push(utextPos, status);
// In phrase breaking, there has to be a breakpoint between Cj character and close
// punctuation.
// E.g.[携帯電話]正しい選択 -> [携帯▁電話]▁正しい▁選択 -> breakpoint between and 正
if (utextPos != rangeStart
|| (isPhraseBreaking && utextPos > 0
&& fClosePunctuationSet.contains(utext_char32At(inText, utextPos - 1)))) {
foundBreaks.push(utextPos, status);
correctedNumBreaks++;
}
} else {
// Normalization expanded the input text, the dictionary found a boundary
// within the expansion, giving two boundaries with the same index in the
@ -1398,9 +1429,52 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,
}
(void)prevCPPos; // suppress compiler warnings about unused variable
UChar32 nextChar = utext_char32At(inText, rangeEnd);
if (!foundBreaks.isEmpty() && foundBreaks.peeki() == rangeEnd) {
// In phrase breaking, there has to be a breakpoint between Cj character and
// the number/open punctuation.
// E.g. る文字「そうだ、京都」->る▁文字▁「そうだ、▁京都」-> breakpoint between 字 and「
// E.g. 乗車率90%程度だろうか -> 乗車▁率▁90%▁程度だろうか -> breakpoint between 率 and
// E.g. しかもロゴがUnicode! -> しかも▁ロゴが▁Unicode!-> breakpoint between が and
if (isPhraseBreaking) {
if (!fDigitOrOpenPunctuationOrAlphabetSet.contains(nextChar)) {
foundBreaks.popi();
correctedNumBreaks--;
}
} else {
foundBreaks.popi();
correctedNumBreaks--;
}
}
// inString goes out of scope
// inputMap goes out of scope
return numBreaks;
return correctedNumBreaks;
}
void CjkBreakEngine::initJapanesePhraseParameter(UErrorCode& error) {
loadJapaneseExtensions(error);
loadHiragana(error);
}
void CjkBreakEngine::loadJapaneseExtensions(UErrorCode& error) {
const char* tag = "extensions";
ResourceBundle ja(U_ICUDATA_BRKITR, "ja", error);
if (U_SUCCESS(error)) {
ResourceBundle bundle = ja.get(tag, error);
while (U_SUCCESS(error) && bundle.hasNext()) {
fSkipSet.puti(bundle.getNextString(error), 1, error);
}
}
}
void CjkBreakEngine::loadHiragana(UErrorCode& error) {
UnicodeSet hiraganaWordSet(UnicodeString(u"[:Hiragana:]"), error);
hiraganaWordSet.compact();
UnicodeSetIterator iterator(hiraganaWordSet);
while (iterator.next()) {
fSkipSet.puti(UnicodeString(iterator.getCodepoint()), 1, error);
}
}
#endif

View File

@ -1,14 +0,0 @@
dictbe.o dictbe.d : dictbe.cpp unicode/utypes.h unicode/umachine.h unicode/ptypes.h \
unicode/platform.h unicode/uconfig.h unicode/uvernum.h \
unicode/urename.h unicode/uversion.h brkeng.h unicode/uobject.h \
unicode/utext.h unicode/uchar.h unicode/stringoptions.h \
unicode/ucpmap.h unicode/localpointer.h unicode/rep.h unicode/unistr.h \
unicode/char16ptr.h unicode/std_string.h unicode/stringpiece.h \
unicode/bytestream.h unicode/chariter.h unicode/uscript.h dictbe.h \
unicode/uniset.h unicode/unifilt.h unicode/unifunct.h \
unicode/unimatch.h unicode/uset.h uvectr32.h uhash.h cmemory.h \
uelement.h uassert.h unicode/ubrk.h unicode/uloc.h unicode/uenum.h \
unicode/parseerr.h utracimp.h unicode/utrace.h uvector.h uarrsort.h \
unicode/normlzr.h unicode/normalizer2.h unicode/unorm2.h \
unicode/unorm.h unicode/uiter.h dictionarydata.h unicode/udata.h \
udataswp.h unicode/ustringtrie.h

View File

@ -15,6 +15,7 @@
#include "unicode/utext.h"
#include "brkeng.h"
#include "hash.h"
#include "uvectr32.h"
U_NAMESPACE_BEGIN
@ -62,23 +63,26 @@ class DictionaryBreakEngine : public LanguageBreakEngine {
* @return true if this engine handles the particular character and break
* type.
*/
virtual UBool handles(UChar32 c) const;
virtual UBool handles(UChar32 c) const override;
/**
* <p>Find any breaks within a run in the supplied text.</p>
*
* @param text A UText representing the text. The iterator is left at
* the end of the run of characters which the engine is capable of handling
* the end of the run of characters which the engine is capable of handling
* that starts from the first character in the range.
* @param startPos The start of the run within the supplied text.
* @param endPos The end of the run within the supplied text.
* @param foundBreaks vector of int32_t to receive the break positions
* @param status Information on any errors encountered.
* @return The number of breaks found.
*/
virtual int32_t findBreaks( UText *text,
int32_t startPos,
int32_t endPos,
UVector32 &foundBreaks ) const;
UVector32 &foundBreaks,
UBool isPhraseBreaking,
UErrorCode& status ) const override;
protected:
@ -96,12 +100,15 @@ class DictionaryBreakEngine : public LanguageBreakEngine {
* @param rangeStart The start of the range of dictionary characters
* @param rangeEnd The end of the range of dictionary characters
* @param foundBreaks Output of C array of int32_t break positions, or 0
* @param status Information on any errors encountered.
* @return The number of breaks found
*/
virtual int32_t divideUpDictionaryRange( UText *text,
int32_t rangeStart,
int32_t rangeEnd,
UVector32 &foundBreaks ) const = 0;
UVector32 &foundBreaks,
UBool isPhraseBreaking,
UErrorCode& status) const = 0;
};
@ -123,7 +130,6 @@ class ThaiBreakEngine : public DictionaryBreakEngine {
* @internal
*/
UnicodeSet fThaiWordSet;
UnicodeSet fEndWordSet;
UnicodeSet fBeginWordSet;
UnicodeSet fSuffixSet;
@ -153,12 +159,15 @@ class ThaiBreakEngine : public DictionaryBreakEngine {
* @param rangeStart The start of the range of dictionary characters
* @param rangeEnd The end of the range of dictionary characters
* @param foundBreaks Output of C array of int32_t break positions, or 0
* @param status Information on any errors encountered.
* @return The number of breaks found
*/
virtual int32_t divideUpDictionaryRange( UText *text,
int32_t rangeStart,
int32_t rangeEnd,
UVector32 &foundBreaks ) const;
UVector32 &foundBreaks,
UBool isPhraseBreaking,
UErrorCode& status) const override;
};
@ -180,7 +189,6 @@ class LaoBreakEngine : public DictionaryBreakEngine {
* @internal
*/
UnicodeSet fLaoWordSet;
UnicodeSet fEndWordSet;
UnicodeSet fBeginWordSet;
UnicodeSet fMarkSet;
@ -209,127 +217,134 @@ class LaoBreakEngine : public DictionaryBreakEngine {
* @param rangeStart The start of the range of dictionary characters
* @param rangeEnd The end of the range of dictionary characters
* @param foundBreaks Output of C array of int32_t break positions, or 0
* @param status Information on any errors encountered.
* @return The number of breaks found
*/
virtual int32_t divideUpDictionaryRange( UText *text,
int32_t rangeStart,
int32_t rangeEnd,
UVector32 &foundBreaks ) const;
UVector32 &foundBreaks,
UBool isPhraseBreaking,
UErrorCode& status) const override;
};
/*******************************************************************
* BurmeseBreakEngine
*/
/**
* <p>BurmeseBreakEngine is a kind of DictionaryBreakEngine that uses a
* DictionaryMatcher and heuristics to determine Burmese-specific breaks.</p>
*
* <p>After it is constructed a BurmeseBreakEngine may be shared between
* threads without synchronization.</p>
*/
class BurmeseBreakEngine : public DictionaryBreakEngine {
private:
/**
* The set of characters handled by this engine
* @internal
*/
UnicodeSet fEndWordSet;
UnicodeSet fBeginWordSet;
UnicodeSet fMarkSet;
DictionaryMatcher *fDictionary;
public:
/**
* <p>Default constructor.</p>
*
* @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the
* engine is deleted.
*/
BurmeseBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status);
/**
* <p>Virtual destructor.</p>
*/
virtual ~BurmeseBreakEngine();
protected:
/**
* <p>Divide up a range of known dictionary characters.</p>
*
* @param text A UText representing the text
* @param rangeStart The start of the range of dictionary characters
* @param rangeEnd The end of the range of dictionary characters
* @param foundBreaks Output of C array of int32_t break positions, or 0
* @param status Information on any errors encountered.
* @return The number of breaks found
*/
virtual int32_t divideUpDictionaryRange( UText *text,
int32_t rangeStart,
int32_t rangeEnd,
UVector32 &foundBreaks,
UBool isPhraseBreaking,
UErrorCode& status) const override;
};
/*******************************************************************
* KhmerBreakEngine
*/
/**
* <p>KhmerBreakEngine is a kind of DictionaryBreakEngine that uses a
* DictionaryMatcher and heuristics to determine Khmer-specific breaks.</p>
*
* <p>After it is constructed a KhmerBreakEngine may be shared between
* threads without synchronization.</p>
*/
class KhmerBreakEngine : public DictionaryBreakEngine {
private:
/**
* The set of characters handled by this engine
* @internal
*/
UnicodeSet fEndWordSet;
UnicodeSet fBeginWordSet;
UnicodeSet fMarkSet;
DictionaryMatcher *fDictionary;
public:
/**
* <p>Default constructor.</p>
*
* @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the
* engine is deleted.
*/
KhmerBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status);
/**
* <p>Virtual destructor.</p>
*/
virtual ~KhmerBreakEngine();
protected:
/**
* <p>Divide up a range of known dictionary characters.</p>
*
* @param text A UText representing the text
* @param rangeStart The start of the range of dictionary characters
* @param rangeEnd The end of the range of dictionary characters
* @param foundBreaks Output of C array of int32_t break positions, or 0
* @param status Information on any errors encountered.
* @return The number of breaks found
*/
virtual int32_t divideUpDictionaryRange( UText *text,
int32_t rangeStart,
int32_t rangeEnd,
UVector32 &foundBreaks,
UBool isPhraseBreaking,
UErrorCode& status) const override;
};
/*******************************************************************
* BurmeseBreakEngine
*/
/**
* <p>BurmeseBreakEngine is a kind of DictionaryBreakEngine that uses a
* DictionaryMatcher and heuristics to determine Burmese-specific breaks.</p>
*
* <p>After it is constructed a BurmeseBreakEngine may be shared between
* threads without synchronization.</p>
*/
class BurmeseBreakEngine : public DictionaryBreakEngine {
private:
/**
* The set of characters handled by this engine
* @internal
*/
UnicodeSet fBurmeseWordSet;
UnicodeSet fEndWordSet;
UnicodeSet fBeginWordSet;
UnicodeSet fMarkSet;
DictionaryMatcher *fDictionary;
public:
/**
* <p>Default constructor.</p>
*
* @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the
* engine is deleted.
*/
BurmeseBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status);
/**
* <p>Virtual destructor.</p>
*/
virtual ~BurmeseBreakEngine();
protected:
/**
* <p>Divide up a range of known dictionary characters.</p>
*
* @param text A UText representing the text
* @param rangeStart The start of the range of dictionary characters
* @param rangeEnd The end of the range of dictionary characters
* @param foundBreaks Output of C array of int32_t break positions, or 0
* @return The number of breaks found
*/
virtual int32_t divideUpDictionaryRange( UText *text,
int32_t rangeStart,
int32_t rangeEnd,
UVector32 &foundBreaks ) const;
};
/*******************************************************************
* KhmerBreakEngine
*/
/**
* <p>KhmerBreakEngine is a kind of DictionaryBreakEngine that uses a
* DictionaryMatcher and heuristics to determine Khmer-specific breaks.</p>
*
* <p>After it is constructed a KhmerBreakEngine may be shared between
* threads without synchronization.</p>
*/
class KhmerBreakEngine : public DictionaryBreakEngine {
private:
/**
* The set of characters handled by this engine
* @internal
*/
UnicodeSet fKhmerWordSet;
UnicodeSet fEndWordSet;
UnicodeSet fBeginWordSet;
UnicodeSet fMarkSet;
DictionaryMatcher *fDictionary;
public:
/**
* <p>Default constructor.</p>
*
* @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the
* engine is deleted.
*/
KhmerBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status);
/**
* <p>Virtual destructor.</p>
*/
virtual ~KhmerBreakEngine();
protected:
/**
* <p>Divide up a range of known dictionary characters.</p>
*
* @param text A UText representing the text
* @param rangeStart The start of the range of dictionary characters
* @param rangeEnd The end of the range of dictionary characters
* @param foundBreaks Output of C array of int32_t break positions, or 0
* @return The number of breaks found
*/
virtual int32_t divideUpDictionaryRange( UText *text,
int32_t rangeStart,
int32_t rangeEnd,
UVector32 &foundBreaks ) const;
};
#if !UCONFIG_NO_NORMALIZATION
/*******************************************************************
@ -354,13 +369,22 @@ class CjkBreakEngine : public DictionaryBreakEngine {
* @internal
*/
UnicodeSet fHangulWordSet;
UnicodeSet fHanWordSet;
UnicodeSet fKatakanaWordSet;
UnicodeSet fHiraganaWordSet;
UnicodeSet fDigitOrOpenPunctuationOrAlphabetSet;
UnicodeSet fClosePunctuationSet;
DictionaryMatcher *fDictionary;
const Normalizer2 *nfkcNorm2;
private:
// Load Japanese extensions.
void loadJapaneseExtensions(UErrorCode& error);
// Load Japanese Hiragana.
void loadHiragana(UErrorCode& error);
// Initialize fSkipSet by loading Japanese Hiragana and extensions.
void initJapanesePhraseParameter(UErrorCode& error);
Hashtable fSkipSet;
public:
/**
@ -385,12 +409,15 @@ class CjkBreakEngine : public DictionaryBreakEngine {
* @param rangeStart The start of the range of dictionary characters
* @param rangeEnd The end of the range of dictionary characters
* @param foundBreaks Output of C array of int32_t break positions, or 0
* @param status Information on any errors encountered.
* @return The number of breaks found
*/
virtual int32_t divideUpDictionaryRange( UText *text,
int32_t rangeStart,
int32_t rangeEnd,
UVector32 &foundBreaks ) const;
UVector32 &foundBreaks,
UBool isPhraseBreaking,
UErrorCode& status) const override;
};

View File

@ -1,9 +0,0 @@
dictionarydata.o dictionarydata.d : dictionarydata.cpp dictionarydata.h unicode/utypes.h \
unicode/umachine.h unicode/ptypes.h unicode/platform.h \
unicode/uconfig.h unicode/uvernum.h unicode/urename.h \
unicode/uversion.h unicode/utext.h unicode/uchar.h \
unicode/stringoptions.h unicode/ucpmap.h unicode/localpointer.h \
unicode/rep.h unicode/uobject.h unicode/unistr.h unicode/char16ptr.h \
unicode/std_string.h unicode/stringpiece.h unicode/bytestream.h \
unicode/chariter.h unicode/udata.h udataswp.h unicode/ustringtrie.h \
unicode/ucharstrie.h unicode/bytestrie.h cmemory.h

View File

@ -107,8 +107,8 @@ public:
virtual ~UCharsDictionaryMatcher();
virtual int32_t matches(UText *text, int32_t maxLength, int32_t limit,
int32_t *lengths, int32_t *cpLengths, int32_t *values,
int32_t *prefix) const;
virtual int32_t getType() const;
int32_t *prefix) const override;
virtual int32_t getType() const override;
private:
const UChar *characters;
UDataMemory *file;
@ -125,8 +125,8 @@ public:
virtual ~BytesDictionaryMatcher();
virtual int32_t matches(UText *text, int32_t maxLength, int32_t limit,
int32_t *lengths, int32_t *cpLengths, int32_t *values,
int32_t *prefix) const;
virtual int32_t getType() const;
int32_t *prefix) const override;
virtual int32_t getType() const override;
private:
UChar32 transform(UChar32 c) const;
@ -159,7 +159,7 @@ udict_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *out
* Constants are defined in the DictionaryData class.
*
* For the data structure of BytesTrie & UCharsTrie see
* http://site.icu-project.org/design/struct/tries
* https://icu.unicode.org/design/struct/tries
* and the bytestrie.h and ucharstrie.h header files.
*
* int32_t indexes[indexesLength]; -- indexesLength=indexes[IX_STRING_TRIE_OFFSET]/4;

View File

@ -53,7 +53,7 @@ DateInterval::clone() const {
}
UBool
bool
DateInterval::operator==(const DateInterval& other) const {
return ( fromDate == other.fromDate && toDate == other.toDate );
}

View File

@ -1,4 +0,0 @@
dtintrv.o dtintrv.d : dtintrv.cpp unicode/dtintrv.h unicode/utypes.h \
unicode/umachine.h unicode/ptypes.h unicode/platform.h \
unicode/uconfig.h unicode/uvernum.h unicode/urename.h \
unicode/uversion.h unicode/uobject.h

View File

@ -86,6 +86,7 @@ Edits &Edits::moveArray(Edits &src) U_NOEXCEPT {
}
Edits &Edits::operator=(const Edits &other) {
if (this == &other) { return *this; } // self-assignment: no-op
length = other.length;
delta = other.delta;
numChanges = other.numChanges;
@ -220,7 +221,7 @@ UBool Edits::growArray() {
// Not U_BUFFER_OVERFLOW_ERROR because that could be confused on a string transform API
// with a result-string-buffer overflow.
errorCode_ = U_INDEX_OUTOFBOUNDS_ERROR;
return FALSE;
return false;
} else if (capacity >= (INT32_MAX / 2)) {
newCapacity = INT32_MAX;
} else {
@ -229,25 +230,25 @@ UBool Edits::growArray() {
// Grow by at least 5 units so that a maximal change record will fit.
if ((newCapacity - capacity) < 5) {
errorCode_ = U_INDEX_OUTOFBOUNDS_ERROR;
return FALSE;
return false;
}
uint16_t *newArray = (uint16_t *)uprv_malloc((size_t)newCapacity * 2);
if (newArray == NULL) {
errorCode_ = U_MEMORY_ALLOCATION_ERROR;
return FALSE;
return false;
}
uprv_memcpy(newArray, array, (size_t)length * 2);
releaseArray();
array = newArray;
capacity = newCapacity;
return TRUE;
return true;
}
UBool Edits::copyErrorTo(UErrorCode &outErrorCode) const {
if (U_FAILURE(outErrorCode)) { return TRUE; }
if (U_SUCCESS(errorCode_)) { return FALSE; }
if (U_FAILURE(outErrorCode)) { return true; }
if (U_SUCCESS(errorCode_)) { return false; }
outErrorCode = errorCode_;
return TRUE;
return true;
}
Edits &Edits::mergeAndAppend(const Edits &ab, const Edits &bc, UErrorCode &errorCode) {
@ -256,7 +257,7 @@ Edits &Edits::mergeAndAppend(const Edits &ab, const Edits &bc, UErrorCode &error
// Parallel iteration over both Edits.
Iterator abIter = ab.getFineIterator();
Iterator bcIter = bc.getFineIterator();
UBool abHasNext = TRUE, bcHasNext = TRUE;
UBool abHasNext = true, bcHasNext = true;
// Copy iterator state into local variables, so that we can modify and subdivide spans.
// ab old & new length, bc old & new length
int32_t aLength = 0, ab_bLength = 0, bc_bLength = 0, cLength = 0;
@ -399,7 +400,7 @@ Edits &Edits::mergeAndAppend(const Edits &ab, const Edits &bc, UErrorCode &error
Edits::Iterator::Iterator(const uint16_t *a, int32_t len, UBool oc, UBool crs) :
array(a), index(0), length(len), remaining(0),
onlyChanges_(oc), coarse(crs),
dir(0), changed(FALSE), oldLength_(0), newLength_(0),
dir(0), changed(false), oldLength_(0), newLength_(0),
srcIndex(0), replIndex(0), destIndex(0) {}
int32_t Edits::Iterator::readLength(int32_t head) {
@ -440,16 +441,16 @@ void Edits::Iterator::updatePreviousIndexes() {
UBool Edits::Iterator::noNext() {
// No change before or beyond the string.
dir = 0;
changed = FALSE;
changed = false;
oldLength_ = newLength_ = 0;
return FALSE;
return false;
}
UBool Edits::Iterator::next(UBool onlyChanges, UErrorCode &errorCode) {
// Forward iteration: Update the string indexes to the limit of the current span,
// and post-increment-read array units to assemble a new span.
// Leaves the array index one after the last unit of that span.
if (U_FAILURE(errorCode)) { return FALSE; }
if (U_FAILURE(errorCode)) { return false; }
// We have an errorCode in case we need to start guarding against integer overflows.
// It is also convenient for caller loops if we bail out when an error was set elsewhere.
if (dir > 0) {
@ -463,7 +464,7 @@ UBool Edits::Iterator::next(UBool onlyChanges, UErrorCode &errorCode) {
// Stay on the current one of a sequence of compressed changes.
++index; // next() rests on the index after the sequence unit.
dir = 1;
return TRUE;
return true;
}
}
dir = 1;
@ -472,7 +473,7 @@ UBool Edits::Iterator::next(UBool onlyChanges, UErrorCode &errorCode) {
// Fine-grained iterator: Continue a sequence of compressed changes.
if (remaining > 1) {
--remaining;
return TRUE;
return true;
}
remaining = 0;
}
@ -482,7 +483,7 @@ UBool Edits::Iterator::next(UBool onlyChanges, UErrorCode &errorCode) {
int32_t u = array[index++];
if (u <= MAX_UNCHANGED) {
// Combine adjacent unchanged ranges.
changed = FALSE;
changed = false;
oldLength_ = u + 1;
while (index < length && (u = array[index]) <= MAX_UNCHANGED) {
++index;
@ -497,10 +498,10 @@ UBool Edits::Iterator::next(UBool onlyChanges, UErrorCode &errorCode) {
// already fetched u > MAX_UNCHANGED at index
++index;
} else {
return TRUE;
return true;
}
}
changed = TRUE;
changed = true;
if (u <= MAX_SHORT_CHANGE) {
int32_t oldLen = u >> 12;
int32_t newLen = (u >> 9) & MAX_SHORT_CHANGE_NEW_LENGTH;
@ -515,14 +516,14 @@ UBool Edits::Iterator::next(UBool onlyChanges, UErrorCode &errorCode) {
if (num > 1) {
remaining = num; // This is the first of two or more changes.
}
return TRUE;
return true;
}
} else {
U_ASSERT(u <= 0x7fff);
oldLength_ = readLength((u >> 6) & 0x3f);
newLength_ = readLength(u & 0x3f);
if (!coarse) {
return TRUE;
return true;
}
}
// Combine adjacent changes.
@ -538,14 +539,14 @@ UBool Edits::Iterator::next(UBool onlyChanges, UErrorCode &errorCode) {
newLength_ += readLength(u & 0x3f);
}
}
return TRUE;
return true;
}
UBool Edits::Iterator::previous(UErrorCode &errorCode) {
// Backward iteration: Pre-decrement-read array units to assemble a new span,
// then update the string indexes to the start of that span.
// Leaves the array index on the head unit of that span.
if (U_FAILURE(errorCode)) { return FALSE; }
if (U_FAILURE(errorCode)) { return false; }
// We have an errorCode in case we need to start guarding against integer overflows.
// It is also convenient for caller loops if we bail out when an error was set elsewhere.
if (dir >= 0) {
@ -558,7 +559,7 @@ UBool Edits::Iterator::previous(UErrorCode &errorCode) {
// Stay on the current one of a sequence of compressed changes.
--index; // previous() rests on the sequence unit.
dir = -1;
return TRUE;
return true;
}
updateNextIndexes();
}
@ -571,7 +572,7 @@ UBool Edits::Iterator::previous(UErrorCode &errorCode) {
if (remaining <= (u & SHORT_CHANGE_NUM_MASK)) {
++remaining;
updatePreviousIndexes();
return TRUE;
return true;
}
remaining = 0;
}
@ -581,7 +582,7 @@ UBool Edits::Iterator::previous(UErrorCode &errorCode) {
int32_t u = array[--index];
if (u <= MAX_UNCHANGED) {
// Combine adjacent unchanged ranges.
changed = FALSE;
changed = false;
oldLength_ = u + 1;
while (index > 0 && (u = array[index - 1]) <= MAX_UNCHANGED) {
--index;
@ -590,9 +591,9 @@ UBool Edits::Iterator::previous(UErrorCode &errorCode) {
newLength_ = oldLength_;
// No need to handle onlyChanges as long as previous() is called only from findIndex().
updatePreviousIndexes();
return TRUE;
return true;
}
changed = TRUE;
changed = true;
if (u <= MAX_SHORT_CHANGE) {
int32_t oldLen = u >> 12;
int32_t newLen = (u >> 9) & MAX_SHORT_CHANGE_NEW_LENGTH;
@ -608,7 +609,7 @@ UBool Edits::Iterator::previous(UErrorCode &errorCode) {
remaining = 1; // This is the last of two or more changes.
}
updatePreviousIndexes();
return TRUE;
return true;
}
} else {
if (u <= 0x7fff) {
@ -628,7 +629,7 @@ UBool Edits::Iterator::previous(UErrorCode &errorCode) {
}
if (!coarse) {
updatePreviousIndexes();
return TRUE;
return true;
}
}
// Combine adjacent changes.
@ -647,7 +648,7 @@ UBool Edits::Iterator::previous(UErrorCode &errorCode) {
}
}
updatePreviousIndexes();
return TRUE;
return true;
}
int32_t Edits::Iterator::findIndex(int32_t i, UBool findSource, UErrorCode &errorCode) {
@ -704,7 +705,7 @@ int32_t Edits::Iterator::findIndex(int32_t i, UBool findSource, UErrorCode &erro
// The index is in the current span.
return 0;
}
while (next(FALSE, errorCode)) {
while (next(false, errorCode)) {
if (findSource) {
spanStart = srcIndex;
spanLength = oldLength_;
@ -738,7 +739,7 @@ int32_t Edits::Iterator::findIndex(int32_t i, UBool findSource, UErrorCode &erro
}
int32_t Edits::Iterator::destinationIndexFromSourceIndex(int32_t i, UErrorCode &errorCode) {
int32_t where = findIndex(i, TRUE, errorCode);
int32_t where = findIndex(i, true, errorCode);
if (where < 0) {
// Error or before the string.
return 0;
@ -757,7 +758,7 @@ int32_t Edits::Iterator::destinationIndexFromSourceIndex(int32_t i, UErrorCode &
}
int32_t Edits::Iterator::sourceIndexFromDestinationIndex(int32_t i, UErrorCode &errorCode) {
int32_t where = findIndex(i, FALSE, errorCode);
int32_t where = findIndex(i, false, errorCode);
if (where < 0) {
// Error or before the string.
return 0;

View File

@ -1,6 +0,0 @@
edits.o edits.d : edits.cpp unicode/edits.h unicode/utypes.h unicode/umachine.h \
unicode/ptypes.h unicode/platform.h unicode/uconfig.h \
unicode/uvernum.h unicode/urename.h unicode/uversion.h \
unicode/uobject.h unicode/unistr.h unicode/char16ptr.h unicode/rep.h \
unicode/std_string.h unicode/stringpiece.h unicode/bytestream.h \
cmemory.h unicode/localpointer.h uassert.h util.h

220
common/emojiprops.cpp Normal file
View File

@ -0,0 +1,220 @@
// © 2021 and later: Unicode, Inc. and others.
// License & terms of use: https://www.unicode.org/copyright.html
// emojiprops.cpp
// created: 2021sep04 Markus W. Scherer
#include "unicode/utypes.h"
#include "unicode/uchar.h"
#include "unicode/ucharstrie.h"
#include "unicode/ucptrie.h"
#include "unicode/udata.h"
#include "unicode/ustringtrie.h"
#include "unicode/utf16.h"
#include "emojiprops.h"
#include "ucln.h"
#include "ucln_cmn.h"
#include "umutex.h"
#include "uset_imp.h"
U_NAMESPACE_BEGIN
namespace {
EmojiProps *singleton = nullptr;
icu::UInitOnce emojiInitOnce {};
UBool U_CALLCONV emojiprops_cleanup() {
delete singleton;
singleton = nullptr;
emojiInitOnce.reset();
return true;
}
void U_CALLCONV initSingleton(UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) { return; }
singleton = new EmojiProps(errorCode);
if (singleton == nullptr) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
} else if (U_FAILURE(errorCode)) {
delete singleton;
singleton = nullptr;
}
ucln_common_registerCleanup(UCLN_COMMON_EMOJIPROPS, emojiprops_cleanup);
}
// TODO: turn this into a shared helper function
// Requires the major version to match, and then requires at least the minor version.
UBool udata_isAcceptableMajorMinor(
const UDataInfo &info, const UChar *dataFormat, uint8_t major, uint8_t minor) {
return
info.size >= 20 &&
info.isBigEndian == U_IS_BIG_ENDIAN &&
info.charsetFamily == U_CHARSET_FAMILY &&
info.dataFormat[0] == dataFormat[0] &&
info.dataFormat[1] == dataFormat[1] &&
info.dataFormat[2] == dataFormat[2] &&
info.dataFormat[3] == dataFormat[3] &&
info.formatVersion[0] == major &&
info.formatVersion[1] >= minor;
}
} // namespace
EmojiProps::~EmojiProps() {
udata_close(memory);
ucptrie_close(cpTrie);
}
const EmojiProps *
EmojiProps::getSingleton(UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) { return nullptr; }
umtx_initOnce(emojiInitOnce, &initSingleton, errorCode);
return singleton;
}
UBool U_CALLCONV
EmojiProps::isAcceptable(void * /*context*/, const char * /*type*/, const char * /*name*/,
const UDataInfo *pInfo) {
return udata_isAcceptableMajorMinor(*pInfo, u"Emoj", 1, 0);
}
void
EmojiProps::load(UErrorCode &errorCode) {
memory = udata_openChoice(nullptr, "icu", "uemoji", isAcceptable, this, &errorCode);
if (U_FAILURE(errorCode)) { return; }
const uint8_t *inBytes = (const uint8_t *)udata_getMemory(memory);
const int32_t *inIndexes = (const int32_t *)inBytes;
int32_t indexesLength = inIndexes[IX_CPTRIE_OFFSET] / 4;
if (indexesLength <= IX_RGI_EMOJI_ZWJ_SEQUENCE_TRIE_OFFSET) {
errorCode = U_INVALID_FORMAT_ERROR; // Not enough indexes.
return;
}
int32_t i = IX_CPTRIE_OFFSET;
int32_t offset = inIndexes[i++];
int32_t nextOffset = inIndexes[i];
cpTrie = ucptrie_openFromBinary(UCPTRIE_TYPE_FAST, UCPTRIE_VALUE_BITS_8,
inBytes + offset, nextOffset - offset, nullptr, &errorCode);
if (U_FAILURE(errorCode)) {
return;
}
for (i = IX_BASIC_EMOJI_TRIE_OFFSET; i <= IX_RGI_EMOJI_ZWJ_SEQUENCE_TRIE_OFFSET; ++i) {
offset = inIndexes[i];
nextOffset = inIndexes[i + 1];
// Set/leave nullptr if there is no UCharsTrie.
const UChar *p = nextOffset > offset ? (const UChar *)(inBytes + offset) : nullptr;
stringTries[getStringTrieIndex(i)] = p;
}
}
void
EmojiProps::addPropertyStarts(const USetAdder *sa, UErrorCode & /*errorCode*/) const {
// Add the start code point of each same-value range of the trie.
UChar32 start = 0, end;
uint32_t value;
while ((end = ucptrie_getRange(cpTrie, start, UCPMAP_RANGE_NORMAL, 0,
nullptr, nullptr, &value)) >= 0) {
sa->add(sa->set, start);
start = end + 1;
}
}
UBool
EmojiProps::hasBinaryProperty(UChar32 c, UProperty which) {
UErrorCode errorCode = U_ZERO_ERROR;
const EmojiProps *ep = getSingleton(errorCode);
return U_SUCCESS(errorCode) && ep->hasBinaryPropertyImpl(c, which);
}
UBool
EmojiProps::hasBinaryPropertyImpl(UChar32 c, UProperty which) const {
if (which < UCHAR_EMOJI || UCHAR_RGI_EMOJI < which) {
return false;
}
// Note: UCHAR_REGIONAL_INDICATOR is a single, hardcoded range implemented elsewhere.
static constexpr int8_t bitFlags[] = {
BIT_EMOJI, // UCHAR_EMOJI=57
BIT_EMOJI_PRESENTATION, // UCHAR_EMOJI_PRESENTATION=58
BIT_EMOJI_MODIFIER, // UCHAR_EMOJI_MODIFIER=59
BIT_EMOJI_MODIFIER_BASE, // UCHAR_EMOJI_MODIFIER_BASE=60
BIT_EMOJI_COMPONENT, // UCHAR_EMOJI_COMPONENT=61
-1, // UCHAR_REGIONAL_INDICATOR=62
-1, // UCHAR_PREPENDED_CONCATENATION_MARK=63
BIT_EXTENDED_PICTOGRAPHIC, // UCHAR_EXTENDED_PICTOGRAPHIC=64
BIT_BASIC_EMOJI, // UCHAR_BASIC_EMOJI=65
-1, // UCHAR_EMOJI_KEYCAP_SEQUENCE=66
-1, // UCHAR_RGI_EMOJI_MODIFIER_SEQUENCE=67
-1, // UCHAR_RGI_EMOJI_FLAG_SEQUENCE=68
-1, // UCHAR_RGI_EMOJI_TAG_SEQUENCE=69
-1, // UCHAR_RGI_EMOJI_ZWJ_SEQUENCE=70
BIT_BASIC_EMOJI, // UCHAR_RGI_EMOJI=71
};
int32_t bit = bitFlags[which - UCHAR_EMOJI];
if (bit < 0) {
return false; // not a property that we support in this function
}
uint8_t bits = UCPTRIE_FAST_GET(cpTrie, UCPTRIE_8, c);
return (bits >> bit) & 1;
}
UBool
EmojiProps::hasBinaryProperty(const UChar *s, int32_t length, UProperty which) {
UErrorCode errorCode = U_ZERO_ERROR;
const EmojiProps *ep = getSingleton(errorCode);
return U_SUCCESS(errorCode) && ep->hasBinaryPropertyImpl(s, length, which);
}
UBool
EmojiProps::hasBinaryPropertyImpl(const UChar *s, int32_t length, UProperty which) const {
if (s == nullptr && length != 0) { return false; }
if (length <= 0 && (length == 0 || *s == 0)) { return false; } // empty string
// The caller should have delegated single code points to hasBinaryProperty(c, which).
if (which < UCHAR_BASIC_EMOJI || UCHAR_RGI_EMOJI < which) {
return false;
}
UProperty firstProp = which, lastProp = which;
if (which == UCHAR_RGI_EMOJI) {
// RGI_Emoji is the union of the other emoji properties of strings.
firstProp = UCHAR_BASIC_EMOJI;
lastProp = UCHAR_RGI_EMOJI_ZWJ_SEQUENCE;
}
for (int32_t prop = firstProp; prop <= lastProp; ++prop) {
const UChar *trieUChars = stringTries[prop - UCHAR_BASIC_EMOJI];
if (trieUChars != nullptr) {
UCharsTrie trie(trieUChars);
UStringTrieResult result = trie.next(s, length);
if (USTRINGTRIE_HAS_VALUE(result)) {
return true;
}
}
}
return false;
}
void
EmojiProps::addStrings(const USetAdder *sa, UProperty which, UErrorCode &errorCode) const {
if (U_FAILURE(errorCode)) { return; }
if (which < UCHAR_BASIC_EMOJI || UCHAR_RGI_EMOJI < which) {
return;
}
UProperty firstProp = which, lastProp = which;
if (which == UCHAR_RGI_EMOJI) {
// RGI_Emoji is the union of the other emoji properties of strings.
firstProp = UCHAR_BASIC_EMOJI;
lastProp = UCHAR_RGI_EMOJI_ZWJ_SEQUENCE;
}
for (int32_t prop = firstProp; prop <= lastProp; ++prop) {
const UChar *trieUChars = stringTries[prop - UCHAR_BASIC_EMOJI];
if (trieUChars != nullptr) {
UCharsTrie::Iterator iter(trieUChars, 0, errorCode);
while (iter.next(errorCode)) {
const UnicodeString &s = iter.getString();
sa->addString(sa->set, s.getBuffer(), s.length());
}
}
}
}
U_NAMESPACE_END

90
common/emojiprops.h Normal file
View File

@ -0,0 +1,90 @@
// © 2021 and later: Unicode, Inc. and others.
// License & terms of use: https://www.unicode.org/copyright.html
// emojiprops.h
// created: 2021sep03 Markus W. Scherer
#ifndef __EMOJIPROPS_H__
#define __EMOJIPROPS_H__
#include "unicode/utypes.h"
#include "unicode/ucptrie.h"
#include "unicode/udata.h"
#include "unicode/uobject.h"
#include "uset_imp.h"
U_NAMESPACE_BEGIN
class EmojiProps : public UMemory {
public:
// @internal
EmojiProps(UErrorCode &errorCode) { load(errorCode); }
~EmojiProps();
static const EmojiProps *getSingleton(UErrorCode &errorCode);
static UBool hasBinaryProperty(UChar32 c, UProperty which);
static UBool hasBinaryProperty(const UChar *s, int32_t length, UProperty which);
void addPropertyStarts(const USetAdder *sa, UErrorCode &errorCode) const;
void addStrings(const USetAdder *sa, UProperty which, UErrorCode &errorCode) const;
enum {
// Byte offsets from the start of the data, after the generic header,
// in ascending order.
// UCPTrie=CodePointTrie, follows the indexes
IX_CPTRIE_OFFSET,
IX_RESERVED1,
IX_RESERVED2,
IX_RESERVED3,
// UCharsTrie=CharsTrie
IX_BASIC_EMOJI_TRIE_OFFSET,
IX_EMOJI_KEYCAP_SEQUENCE_TRIE_OFFSET,
IX_RGI_EMOJI_MODIFIER_SEQUENCE_TRIE_OFFSET,
IX_RGI_EMOJI_FLAG_SEQUENCE_TRIE_OFFSET,
IX_RGI_EMOJI_TAG_SEQUENCE_TRIE_OFFSET,
IX_RGI_EMOJI_ZWJ_SEQUENCE_TRIE_OFFSET,
IX_RESERVED10,
IX_RESERVED11,
IX_RESERVED12,
IX_TOTAL_SIZE,
// Not initially byte offsets.
IX_RESERVED14,
IX_RESERVED15,
IX_COUNT // 16
};
// Properties in the code point trie.
enum {
// https://www.unicode.org/reports/tr51/#Emoji_Properties
BIT_EMOJI,
BIT_EMOJI_PRESENTATION,
BIT_EMOJI_MODIFIER,
BIT_EMOJI_MODIFIER_BASE,
BIT_EMOJI_COMPONENT,
BIT_EXTENDED_PICTOGRAPHIC,
// https://www.unicode.org/reports/tr51/#Emoji_Sets
BIT_BASIC_EMOJI
};
private:
static UBool U_CALLCONV
isAcceptable(void *context, const char *type, const char *name, const UDataInfo *pInfo);
/** Input i: One of the IX_..._TRIE_OFFSET indexes into the data file indexes[] array. */
static int32_t getStringTrieIndex(int32_t i) {
return i - IX_BASIC_EMOJI_TRIE_OFFSET;
}
void load(UErrorCode &errorCode);
UBool hasBinaryPropertyImpl(UChar32 c, UProperty which) const;
UBool hasBinaryPropertyImpl(const UChar *s, int32_t length, UProperty which) const;
UDataMemory *memory = nullptr;
UCPTrie *cpTrie = nullptr;
const UChar *stringTries[6] = { nullptr, nullptr, nullptr, nullptr, nullptr, nullptr };
};
U_NAMESPACE_END
#endif // __EMOJIPROPS_H__

View File

@ -1,4 +0,0 @@
errorcode.o errorcode.d : errorcode.cpp unicode/utypes.h unicode/umachine.h \
unicode/ptypes.h unicode/platform.h unicode/uconfig.h \
unicode/uvernum.h unicode/urename.h unicode/uversion.h \
unicode/errorcode.h unicode/uobject.h

View File

@ -20,6 +20,7 @@
#include "ubrkimpl.h" // U_ICUDATA_BRKITR
#include "uvector.h"
#include "cmemory.h"
#include "umutex.h"
U_NAMESPACE_BEGIN
@ -48,7 +49,7 @@ static void _fb_trace(const char *m, const UnicodeString *s, UBool b, int32_t d,
/**
* Used with sortedInsert()
*/
static int8_t U_CALLCONV compareUnicodeString(UElement t1, UElement t2) {
static int32_t U_CALLCONV compareUnicodeString(UElement t1, UElement t2) {
const UnicodeString &a = *(const UnicodeString*)t1.pointer;
const UnicodeString &b = *(const UnicodeString*)t2.pointer;
return a.compare(b);
@ -57,7 +58,7 @@ static int8_t U_CALLCONV compareUnicodeString(UElement t1, UElement t2) {
/**
* A UVector which implements a set of strings.
*/
class U_COMMON_API UStringSet : public UVector {
class UStringSet : public UVector {
public:
UStringSet(UErrorCode &status) : UVector(uprv_deleteUObject,
uhash_compareUnicodeString,
@ -89,7 +90,6 @@ class U_COMMON_API UStringSet : public UVector {
} else {
sortedInsert(str, compareUnicodeString, status);
if(U_FAILURE(status)) {
delete str;
return false;
}
return true;
@ -139,13 +139,30 @@ class SimpleFilteredSentenceBreakData : public UMemory {
public:
SimpleFilteredSentenceBreakData(UCharsTrie *forwards, UCharsTrie *backwards )
: fForwardsPartialTrie(forwards), fBackwardsTrie(backwards), refcount(1) { }
SimpleFilteredSentenceBreakData *incr() { refcount++; return this; }
SimpleFilteredSentenceBreakData *decr() { if((--refcount) <= 0) delete this; return 0; }
virtual ~SimpleFilteredSentenceBreakData();
SimpleFilteredSentenceBreakData *incr() {
umtx_atomic_inc(&refcount);
return this;
}
SimpleFilteredSentenceBreakData *decr() {
if(umtx_atomic_dec(&refcount) <= 0) {
delete this;
}
return 0;
}
virtual ~SimpleFilteredSentenceBreakData();
LocalPointer<UCharsTrie> fForwardsPartialTrie; // Has ".a" for "a.M."
LocalPointer<UCharsTrie> fBackwardsTrie; // i.e. ".srM" for Mrs.
int32_t refcount;
bool hasForwardsPartialTrie() const { return fForwardsPartialTrie.isValid(); }
bool hasBackwardsTrie() const { return fBackwardsTrie.isValid(); }
const UCharsTrie &getForwardsPartialTrie() const { return *fForwardsPartialTrie; }
const UCharsTrie &getBackwardsTrie() const { return *fBackwardsTrie; }
private:
// These tries own their data arrays.
// They are shared and must therefore not be modified.
LocalPointer<UCharsTrie> fForwardsPartialTrie; // Has ".a" for "a.M."
LocalPointer<UCharsTrie> fBackwardsTrie; // i.e. ".srM" for Mrs.
u_atomic_int32_t refcount;
};
SimpleFilteredSentenceBreakData::~SimpleFilteredSentenceBreakData() {}
@ -168,37 +185,37 @@ public:
/* -- cloning and other subclass stuff -- */
virtual BreakIterator * createBufferClone(void * /*stackBuffer*/,
int32_t &/*BufferSize*/,
UErrorCode &status) {
UErrorCode &status) override {
// for now - always deep clone
status = U_SAFECLONE_ALLOCATED_WARNING;
return clone();
}
virtual SimpleFilteredSentenceBreakIterator* clone() const { return new SimpleFilteredSentenceBreakIterator(*this); }
virtual UClassID getDynamicClassID(void) const { return NULL; }
virtual UBool operator==(const BreakIterator& o) const { if(this==&o) return true; return false; }
virtual SimpleFilteredSentenceBreakIterator* clone() const override { return new SimpleFilteredSentenceBreakIterator(*this); }
virtual UClassID getDynamicClassID(void) const override { return NULL; }
virtual bool operator==(const BreakIterator& o) const override { if(this==&o) return true; return false; }
/* -- text modifying -- */
virtual void setText(UText *text, UErrorCode &status) { fDelegate->setText(text,status); }
virtual BreakIterator &refreshInputText(UText *input, UErrorCode &status) { fDelegate->refreshInputText(input,status); return *this; }
virtual void adoptText(CharacterIterator* it) { fDelegate->adoptText(it); }
virtual void setText(const UnicodeString &text) { fDelegate->setText(text); }
virtual void setText(UText *text, UErrorCode &status) override { fDelegate->setText(text,status); }
virtual BreakIterator &refreshInputText(UText *input, UErrorCode &status) override { fDelegate->refreshInputText(input,status); return *this; }
virtual void adoptText(CharacterIterator* it) override { fDelegate->adoptText(it); }
virtual void setText(const UnicodeString &text) override { fDelegate->setText(text); }
/* -- other functions that are just delegated -- */
virtual UText *getUText(UText *fillIn, UErrorCode &status) const { return fDelegate->getUText(fillIn,status); }
virtual CharacterIterator& getText(void) const { return fDelegate->getText(); }
virtual UText *getUText(UText *fillIn, UErrorCode &status) const override { return fDelegate->getUText(fillIn,status); }
virtual CharacterIterator& getText(void) const override { return fDelegate->getText(); }
/* -- ITERATION -- */
virtual int32_t first(void);
virtual int32_t preceding(int32_t offset);
virtual int32_t previous(void);
virtual UBool isBoundary(int32_t offset);
virtual int32_t current(void) const { return fDelegate->current(); } // we keep the delegate current, so this should be correct.
virtual int32_t first(void) override;
virtual int32_t preceding(int32_t offset) override;
virtual int32_t previous(void) override;
virtual UBool isBoundary(int32_t offset) override;
virtual int32_t current(void) const override { return fDelegate->current(); } // we keep the delegate current, so this should be correct.
virtual int32_t next(void);
virtual int32_t next(void) override;
virtual int32_t next(int32_t n);
virtual int32_t following(int32_t offset);
virtual int32_t last(void);
virtual int32_t next(int32_t n) override;
virtual int32_t following(int32_t offset) override;
virtual int32_t last(void) override;
private:
/**
@ -244,7 +261,13 @@ SimpleFilteredSentenceBreakIterator::SimpleFilteredSentenceBreakIterator(BreakIt
fData(new SimpleFilteredSentenceBreakData(forwards, backwards)),
fDelegate(adopt)
{
// all set..
if (fData == nullptr) {
delete forwards;
delete backwards;
if (U_SUCCESS(status)) {
status = U_MEMORY_ALLOCATION_ERROR;
}
}
}
SimpleFilteredSentenceBreakIterator::~SimpleFilteredSentenceBreakIterator() {
@ -261,59 +284,62 @@ SimpleFilteredSentenceBreakIterator::breakExceptionAt(int32_t n) {
int32_t bestValue = -1;
// loops while 'n' points to an exception.
utext_setNativeIndex(fText.getAlias(), n); // from n..
fData->fBackwardsTrie->reset();
UChar32 uch;
//if(debug2) u_printf(" n@ %d\n", n);
// Assume a space is following the '.' (so we handle the case: "Mr. /Brown")
if((uch=utext_previous32(fText.getAlias()))==(UChar32)0x0020) { // TODO: skip a class of chars here??
if(utext_previous32(fText.getAlias())==u' ') { // TODO: skip a class of chars here??
// TODO only do this the 1st time?
//if(debug2) u_printf("skipping prev: |%C| \n", (UChar)uch);
} else {
//if(debug2) u_printf("not skipping prev: |%C| \n", (UChar)uch);
uch = utext_next32(fText.getAlias());
utext_next32(fText.getAlias());
//if(debug2) u_printf(" -> : |%C| \n", (UChar)uch);
}
UStringTrieResult r = USTRINGTRIE_INTERMEDIATE_VALUE;
while((uch=utext_previous32(fText.getAlias()))!=U_SENTINEL && // more to consume backwards and..
USTRINGTRIE_HAS_NEXT(r=fData->fBackwardsTrie->nextForCodePoint(uch))) {// more in the trie
if(USTRINGTRIE_HAS_VALUE(r)) { // remember the best match so far
bestPosn = utext_getNativeIndex(fText.getAlias());
bestValue = fData->fBackwardsTrie->getValue();
}
//if(debug2) u_printf("rev< /%C/ cont?%d @%d\n", (UChar)uch, r, utext_getNativeIndex(fText.getAlias()));
{
// Do not modify the shared trie!
UCharsTrie iter(fData->getBackwardsTrie());
UChar32 uch;
while((uch=utext_previous32(fText.getAlias()))!=U_SENTINEL) { // more to consume backwards
UStringTrieResult r = iter.nextForCodePoint(uch);
if(USTRINGTRIE_HAS_VALUE(r)) { // remember the best match so far
bestPosn = utext_getNativeIndex(fText.getAlias());
bestValue = iter.getValue();
}
if(!USTRINGTRIE_HAS_NEXT(r)) {
break;
}
//if(debug2) u_printf("rev< /%C/ cont?%d @%d\n", (UChar)uch, r, utext_getNativeIndex(fText.getAlias()));
}
}
if(USTRINGTRIE_MATCHES(r)) { // exact match?
//if(debug2) u_printf("rev<?/%C/?end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (UChar)uch, r, bestPosn, bestValue);
bestValue = fData->fBackwardsTrie->getValue();
bestPosn = utext_getNativeIndex(fText.getAlias());
//if(debug2) u_printf("rev<+/%C/+end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (UChar)uch, r, bestPosn, bestValue);
}
//if(bestValue >= 0) {
//if(debug2) u_printf("rev<+/%C/+end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (UChar)uch, r, bestPosn, bestValue);
//}
if(bestPosn>=0) {
//if(debug2) u_printf("rev< /%C/ end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (UChar)uch, r, bestPosn, bestValue);
//if(USTRINGTRIE_MATCHES(r)) { // matched - so, now what?
//int32_t bestValue = fBackwardsTrie->getValue();
//int32_t bestValue = iter.getValue();
////if(debug2) u_printf("rev< /%C/ matched, skip..%d bestValue=%d\n", (UChar)uch, r, bestValue);
if(bestValue == kMATCH) { // exact match!
//if(debug2) u_printf(" exact backward match\n");
return kExceptionHere; // See if the next is another exception.
} else if(bestValue == kPARTIAL
&& fData->fForwardsPartialTrie.isValid()) { // make sure there's a forward trie
&& fData->hasForwardsPartialTrie()) { // make sure there's a forward trie
//if(debug2) u_printf(" partial backward match\n");
// We matched the "Ph." in "Ph.D." - now we need to run everything through the forwards trie
// to see if it matches something going forward.
fData->fForwardsPartialTrie->reset();
UStringTrieResult rfwd = USTRINGTRIE_INTERMEDIATE_VALUE;
utext_setNativeIndex(fText.getAlias(), bestPosn); // hope that's close ..
//if(debug2) u_printf("Retrying at %d\n", bestPosn);
// Do not modify the shared trie!
UCharsTrie iter(fData->getForwardsPartialTrie());
UChar32 uch;
while((uch=utext_next32(fText.getAlias()))!=U_SENTINEL &&
USTRINGTRIE_HAS_NEXT(rfwd=fData->fForwardsPartialTrie->nextForCodePoint(uch))) {
USTRINGTRIE_HAS_NEXT(rfwd=iter.nextForCodePoint(uch))) {
//if(debug2) u_printf("fwd> /%C/ cont?%d @%d\n", (UChar)uch, rfwd, utext_getNativeIndex(fText.getAlias()));
}
if(USTRINGTRIE_MATCHES(rfwd)) {
@ -339,7 +365,7 @@ SimpleFilteredSentenceBreakIterator::breakExceptionAt(int32_t n) {
int32_t
SimpleFilteredSentenceBreakIterator::internalNext(int32_t n) {
if(n == UBRK_DONE || // at end or
fData->fBackwardsTrie.isNull()) { // .. no backwards table loaded == no exceptions
!fData->hasBackwardsTrie()) { // .. no backwards table loaded == no exceptions
return n;
}
// OK, do we need to break here?
@ -369,7 +395,7 @@ SimpleFilteredSentenceBreakIterator::internalNext(int32_t n) {
int32_t
SimpleFilteredSentenceBreakIterator::internalPrev(int32_t n) {
if(n == 0 || n == UBRK_DONE || // at end or
fData->fBackwardsTrie.isNull()) { // .. no backwards table loaded == no exceptions
!fData->hasBackwardsTrie()) { // .. no backwards table loaded == no exceptions
return n;
}
// OK, do we need to break here?
@ -420,7 +446,7 @@ SimpleFilteredSentenceBreakIterator::previous(void) {
UBool SimpleFilteredSentenceBreakIterator::isBoundary(int32_t offset) {
if (!fDelegate->isBoundary(offset)) return false; // no break to suppress
if (fData->fBackwardsTrie.isNull()) return true; // no data = no suppressions
if (!fData->hasBackwardsTrie()) return true; // no data = no suppressions
UErrorCode status = U_ZERO_ERROR;
resetState(status);
@ -456,14 +482,14 @@ SimpleFilteredSentenceBreakIterator::last(void) {
/**
* Concrete implementation of builder class.
*/
class U_COMMON_API SimpleFilteredBreakIteratorBuilder : public FilteredBreakIteratorBuilder {
class SimpleFilteredBreakIteratorBuilder : public FilteredBreakIteratorBuilder {
public:
virtual ~SimpleFilteredBreakIteratorBuilder();
SimpleFilteredBreakIteratorBuilder(const Locale &fromLocale, UErrorCode &status);
SimpleFilteredBreakIteratorBuilder(UErrorCode &status);
virtual UBool suppressBreakAfter(const UnicodeString& exception, UErrorCode& status);
virtual UBool unsuppressBreakAfter(const UnicodeString& exception, UErrorCode& status);
virtual BreakIterator *build(BreakIterator* adoptBreakIterator, UErrorCode& status);
virtual UBool suppressBreakAfter(const UnicodeString& exception, UErrorCode& status) override;
virtual UBool unsuppressBreakAfter(const UnicodeString& exception, UErrorCode& status) override;
virtual BreakIterator *build(BreakIterator* adoptBreakIterator, UErrorCode& status) override;
private:
UStringSet fSet;
};
@ -588,11 +614,11 @@ SimpleFilteredBreakIteratorBuilder::build(BreakIterator* adoptBreakIterator, UEr
i++) {
const UnicodeString *abbr = fSet.getStringAt(i);
if(abbr) {
FB_TRACE("build",abbr,TRUE,i);
FB_TRACE("build",abbr,true,i);
ustrs[n] = *abbr; // copy by value
FB_TRACE("ustrs[n]",&ustrs[n],TRUE,i);
FB_TRACE("ustrs[n]",&ustrs[n],true,i);
} else {
FB_TRACE("build",abbr,FALSE,i);
FB_TRACE("build",abbr,false,i);
status = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
@ -603,37 +629,37 @@ SimpleFilteredBreakIteratorBuilder::build(BreakIterator* adoptBreakIterator, UEr
for(int i=0;i<subCount;i++) {
int nn = ustrs[i].indexOf(kFULLSTOP); // TODO: non-'.' abbreviations
if(nn>-1 && (nn+1)!=ustrs[i].length()) {
FB_TRACE("partial",&ustrs[i],FALSE,i);
FB_TRACE("partial",&ustrs[i],false,i);
// is partial.
// is it unique?
int sameAs = -1;
for(int j=0;j<subCount;j++) {
if(j==i) continue;
if(ustrs[i].compare(0,nn+1,ustrs[j],0,nn+1)==0) {
FB_TRACE("prefix",&ustrs[j],FALSE,nn+1);
FB_TRACE("prefix",&ustrs[j],false,nn+1);
//UBool otherIsPartial = ((nn+1)!=ustrs[j].length()); // true if ustrs[j] doesn't end at nn
if(partials[j]==0) { // hasn't been processed yet
partials[j] = kSuppressInReverse | kAddToForward;
FB_TRACE("suppressing",&ustrs[j],FALSE,j);
FB_TRACE("suppressing",&ustrs[j],false,j);
} else if(partials[j] & kSuppressInReverse) {
sameAs = j; // the other entry is already in the reverse table.
}
}
}
FB_TRACE("for partial same-",&ustrs[i],FALSE,sameAs);
FB_TRACE(" == partial #",&ustrs[i],FALSE,partials[i]);
FB_TRACE("for partial same-",&ustrs[i],false,sameAs);
FB_TRACE(" == partial #",&ustrs[i],false,partials[i]);
UnicodeString prefix(ustrs[i], 0, nn+1);
if(sameAs == -1 && partials[i] == 0) {
// first one - add the prefix to the reverse table.
prefix.reverse();
builder->add(prefix, kPARTIAL, status);
revCount++;
FB_TRACE("Added partial",&prefix,FALSE, i);
FB_TRACE(u_errorName(status),&ustrs[i],FALSE,i);
FB_TRACE("Added partial",&prefix,false, i);
FB_TRACE(u_errorName(status),&ustrs[i],false,i);
partials[i] = kSuppressInReverse | kAddToForward;
} else {
FB_TRACE("NOT adding partial",&prefix,FALSE, i);
FB_TRACE(u_errorName(status),&ustrs[i],FALSE,i);
FB_TRACE("NOT adding partial",&prefix,false, i);
FB_TRACE(u_errorName(status),&ustrs[i],false,i);
}
}
}
@ -642,9 +668,9 @@ SimpleFilteredBreakIteratorBuilder::build(BreakIterator* adoptBreakIterator, UEr
ustrs[i].reverse();
builder->add(ustrs[i], kMATCH, status);
revCount++;
FB_TRACE(u_errorName(status), &ustrs[i], FALSE, i);
FB_TRACE(u_errorName(status), &ustrs[i], false, i);
} else {
FB_TRACE("Adding fwd",&ustrs[i], FALSE, i);
FB_TRACE("Adding fwd",&ustrs[i], false, i);
// an optimization would be to only add the portion after the '.'
// for example, for "Ph.D." we store ".hP" in the reverse table. We could just store "D." in the forward,
@ -656,12 +682,12 @@ SimpleFilteredBreakIteratorBuilder::build(BreakIterator* adoptBreakIterator, UEr
////if(debug2) u_printf("SUPPRESS- not Added(%d): /%S/ status=%s\n",partials[i], ustrs[i].getTerminatedBuffer(), u_errorName(status));
}
}
FB_TRACE("AbbrCount",NULL,FALSE, subCount);
FB_TRACE("AbbrCount",NULL,false, subCount);
if(revCount>0) {
backwardsTrie.adoptInstead(builder->build(USTRINGTRIE_BUILD_FAST, status));
if(U_FAILURE(status)) {
FB_TRACE(u_errorName(status),NULL,FALSE, -1);
FB_TRACE(u_errorName(status),NULL,false, -1);
return NULL;
}
}
@ -669,7 +695,7 @@ SimpleFilteredBreakIteratorBuilder::build(BreakIterator* adoptBreakIterator, UEr
if(fwdCount>0) {
forwardsPartialTrie.adoptInstead(builder2->build(USTRINGTRIE_BUILD_FAST, status));
if(U_FAILURE(status)) {
FB_TRACE(u_errorName(status),NULL,FALSE, -1);
FB_TRACE(u_errorName(status),NULL,false, -1);
return NULL;
}
}

View File

@ -1,14 +0,0 @@
filteredbrk.o filteredbrk.d : filteredbrk.cpp unicode/utypes.h unicode/umachine.h \
unicode/ptypes.h unicode/platform.h unicode/uconfig.h \
unicode/uvernum.h unicode/urename.h unicode/uversion.h cmemory.h \
unicode/localpointer.h unicode/uobject.h unicode/filteredbrk.h \
unicode/brkiter.h unicode/unistr.h unicode/char16ptr.h unicode/rep.h \
unicode/std_string.h unicode/stringpiece.h unicode/bytestream.h \
unicode/chariter.h unicode/locid.h unicode/strenum.h unicode/putil.h \
unicode/uloc.h unicode/uenum.h unicode/ubrk.h unicode/utext.h \
unicode/uchar.h unicode/stringoptions.h unicode/ucpmap.h \
unicode/parseerr.h unicode/umisc.h unicode/ucharstriebuilder.h \
unicode/stringtriebuilder.h unicode/ucharstrie.h unicode/ustringtrie.h \
unicode/ures.h uresimp.h uresdata.h unicode/udata.h putilimp.h \
udataswp.h resource.h restrace.h ubrkimpl.h uvector.h uarrsort.h \
uelement.h

View File

@ -137,14 +137,14 @@ UnicodeString &
FilteredNormalizer2::normalizeSecondAndAppend(UnicodeString &first,
const UnicodeString &second,
UErrorCode &errorCode) const {
return normalizeSecondAndAppend(first, second, TRUE, errorCode);
return normalizeSecondAndAppend(first, second, true, errorCode);
}
UnicodeString &
FilteredNormalizer2::append(UnicodeString &first,
const UnicodeString &second,
UErrorCode &errorCode) const {
return normalizeSecondAndAppend(first, second, FALSE, errorCode);
return normalizeSecondAndAppend(first, second, false, errorCode);
}
UnicodeString &
@ -224,7 +224,7 @@ UBool
FilteredNormalizer2::isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
uprv_checkCanGetBuffer(s, errorCode);
if(U_FAILURE(errorCode)) {
return FALSE;
return false;
}
USetSpanCondition spanCondition=USET_SPAN_SIMPLE;
for(int32_t prevSpanLimit=0; prevSpanLimit<s.length();) {
@ -235,19 +235,19 @@ FilteredNormalizer2::isNormalized(const UnicodeString &s, UErrorCode &errorCode)
if( !norm2.isNormalized(s.tempSubStringBetween(prevSpanLimit, spanLimit), errorCode) ||
U_FAILURE(errorCode)
) {
return FALSE;
return false;
}
spanCondition=USET_SPAN_NOT_CONTAINED;
}
prevSpanLimit=spanLimit;
}
return TRUE;
return true;
}
UBool
FilteredNormalizer2::isNormalizedUTF8(StringPiece sp, UErrorCode &errorCode) const {
if(U_FAILURE(errorCode)) {
return FALSE;
return false;
}
const char *s = sp.data();
int32_t length = sp.length();
@ -259,14 +259,14 @@ FilteredNormalizer2::isNormalizedUTF8(StringPiece sp, UErrorCode &errorCode) con
} else {
if (!norm2.isNormalizedUTF8(StringPiece(s, spanLength), errorCode) ||
U_FAILURE(errorCode)) {
return FALSE;
return false;
}
spanCondition = USET_SPAN_NOT_CONTAINED;
}
s += spanLength;
length -= spanLength;
}
return TRUE;
return true;
}
UNormalizationCheckResult

View File

@ -1,10 +0,0 @@
filterednormalizer2.o filterednormalizer2.d : filterednormalizer2.cpp unicode/utypes.h \
unicode/umachine.h unicode/ptypes.h unicode/platform.h \
unicode/uconfig.h unicode/uvernum.h unicode/urename.h \
unicode/uversion.h unicode/edits.h unicode/uobject.h \
unicode/normalizer2.h unicode/stringpiece.h unicode/std_string.h \
unicode/uniset.h unicode/ucpmap.h unicode/unifilt.h unicode/unifunct.h \
unicode/unimatch.h unicode/unistr.h unicode/char16ptr.h unicode/rep.h \
unicode/bytestream.h unicode/uset.h unicode/uchar.h \
unicode/stringoptions.h unicode/localpointer.h unicode/unorm2.h \
unicode/unorm.h unicode/uiter.h cpputils.h cmemory.h

View File

@ -85,16 +85,22 @@ public:
inline int32_t puti(const UnicodeString& key, int32_t value, UErrorCode& status);
inline int32_t putiAllowZero(const UnicodeString& key, int32_t value, UErrorCode& status);
inline void* get(const UnicodeString& key) const;
inline int32_t geti(const UnicodeString& key) const;
inline int32_t getiAndFound(const UnicodeString& key, UBool &found) const;
inline void* remove(const UnicodeString& key);
inline int32_t removei(const UnicodeString& key);
inline void removeAll(void);
inline UBool containsKey(const UnicodeString& key) const;
inline const UHashElement* find(const UnicodeString& key) const;
/**
@ -109,8 +115,8 @@ public:
inline UBool equals(const Hashtable& that) const;
private:
Hashtable(const Hashtable &other); // forbid copying of this class
Hashtable &operator=(const Hashtable &other); // forbid copying of this class
Hashtable(const Hashtable &other) = delete; // forbid copying of this class
Hashtable &operator=(const Hashtable &other) = delete; // forbid copying of this class
};
/*********************************************************************
@ -203,6 +209,11 @@ inline int32_t Hashtable::puti(const UnicodeString& key, int32_t value, UErrorCo
return uhash_puti(hash, new UnicodeString(key), value, &status);
}
inline int32_t Hashtable::putiAllowZero(const UnicodeString& key, int32_t value,
UErrorCode& status) {
return uhash_putiAllowZero(hash, new UnicodeString(key), value, &status);
}
inline void* Hashtable::get(const UnicodeString& key) const {
return uhash_get(hash, &key);
}
@ -211,6 +222,10 @@ inline int32_t Hashtable::geti(const UnicodeString& key) const {
return uhash_geti(hash, &key);
}
inline int32_t Hashtable::getiAndFound(const UnicodeString& key, UBool &found) const {
return uhash_getiAndFound(hash, &key, &found);
}
inline void* Hashtable::remove(const UnicodeString& key) {
return uhash_remove(hash, &key);
}
@ -219,6 +234,10 @@ inline int32_t Hashtable::removei(const UnicodeString& key) {
return uhash_removei(hash, &key);
}
inline UBool Hashtable::containsKey(const UnicodeString& key) const {
return uhash_containsKey(hash, &key);
}
inline const UHashElement* Hashtable::find(const UnicodeString& key) const {
return uhash_find(hash, &key);
}

View File

@ -1,9 +0,0 @@
icudataver.o icudataver.d : icudataver.cpp unicode/utypes.h unicode/umachine.h \
unicode/ptypes.h unicode/platform.h unicode/uconfig.h \
unicode/uvernum.h unicode/urename.h unicode/uversion.h \
unicode/icudataver.h unicode/ures.h unicode/uloc.h unicode/uenum.h \
unicode/localpointer.h unicode/unistr.h unicode/char16ptr.h \
unicode/rep.h unicode/uobject.h unicode/std_string.h \
unicode/stringpiece.h unicode/bytestream.h uresimp.h uresdata.h \
unicode/udata.h putilimp.h unicode/putil.h udataswp.h resource.h \
restrace.h

View File

@ -59,8 +59,8 @@ struct UPlugData {
void *context; /**< user context data */
char name[UPLUG_NAME_MAX]; /**< name of plugin */
UPlugLevel level; /**< level of plugin */
UBool awaitingLoad; /**< TRUE if the plugin is awaiting a load call */
UBool dontUnload; /**< TRUE if plugin must stay resident (leak plugin and lib) */
UBool awaitingLoad; /**< true if the plugin is awaiting a load call */
UBool dontUnload; /**< true if plugin must stay resident (leak plugin and lib) */
UErrorCode pluginStatus; /**< status code of plugin */
};
@ -284,7 +284,7 @@ static void uplug_callPlug(UPlugData *plug, UPlugReason reason, UErrorCode *stat
static void uplug_unloadPlug(UPlugData *plug, UErrorCode *status) {
if(plug->awaitingLoad) { /* shouldn't happen. Plugin hasn'tbeen loaded yet.*/
if(plug->awaitingLoad) { /* shouldn't happen. Plugin hasn't been loaded yet.*/
*status = U_INTERNAL_PROGRAM_ERROR;
return;
}
@ -295,7 +295,7 @@ static void uplug_unloadPlug(UPlugData *plug, UErrorCode *status) {
}
static void uplug_queryPlug(UPlugData *plug, UErrorCode *status) {
if(!plug->awaitingLoad || !(plug->level == UPLUG_LEVEL_UNKNOWN) ) { /* shouldn't happen. Plugin hasn'tbeen loaded yet.*/
if(!plug->awaitingLoad || !(plug->level == UPLUG_LEVEL_UNKNOWN) ) { /* shouldn't happen. Plugin hasn't been loaded yet.*/
*status = U_INTERNAL_PROGRAM_ERROR;
return;
}
@ -304,11 +304,11 @@ static void uplug_queryPlug(UPlugData *plug, UErrorCode *status) {
if(U_SUCCESS(*status)) {
if(plug->level == UPLUG_LEVEL_INVALID) {
plug->pluginStatus = U_PLUGIN_DIDNT_SET_LEVEL;
plug->awaitingLoad = FALSE;
plug->awaitingLoad = false;
}
} else {
plug->pluginStatus = U_INTERNAL_PROGRAM_ERROR;
plug->awaitingLoad = FALSE;
plug->awaitingLoad = false;
}
}
@ -317,12 +317,12 @@ static void uplug_loadPlug(UPlugData *plug, UErrorCode *status) {
if(U_FAILURE(*status)) {
return;
}
if(!plug->awaitingLoad || (plug->level < UPLUG_LEVEL_LOW) ) { /* shouldn't happen. Plugin hasn'tbeen loaded yet.*/
if(!plug->awaitingLoad || (plug->level < UPLUG_LEVEL_LOW) ) { /* shouldn't happen. Plugin hasn't been loaded yet.*/
*status = U_INTERNAL_PROGRAM_ERROR;
return;
}
uplug_callPlug(plug, UPLUG_REASON_LOAD, status);
plug->awaitingLoad = FALSE;
plug->awaitingLoad = false;
if(!U_SUCCESS(*status)) {
plug->pluginStatus = U_INTERNAL_PROGRAM_ERROR;
}
@ -347,8 +347,8 @@ static UPlugData *uplug_allocateEmptyPlug(UErrorCode *status)
plug->structSize = sizeof(UPlugData);
plug->name[0]=0;
plug->level = UPLUG_LEVEL_UNKNOWN; /* initialize to null state */
plug->awaitingLoad = TRUE;
plug->dontUnload = FALSE;
plug->awaitingLoad = true;
plug->dontUnload = false;
plug->pluginStatus = U_ZERO_ERROR;
plug->libName[0] = 0;
plug->config[0]=0;
@ -403,9 +403,9 @@ static void uplug_deallocatePlug(UPlugData *plug, UErrorCode *status) {
pluginCount = uplug_removeEntryAt(pluginList, pluginCount, sizeof(plug[0]), uplug_pluginNumber(plug));
} else {
/* not ok- leave as a message. */
plug->awaitingLoad=FALSE;
plug->awaitingLoad=false;
plug->entrypoint=0;
plug->dontUnload=TRUE;
plug->dontUnload=true;
}
}
@ -526,7 +526,7 @@ uplug_getPlugLoadStatus(UPlugData *plug) {
/**
* Initialize a plugin fron an entrypoint and library - but don't load it.
* Initialize a plugin from an entrypoint and library - but don't load it.
*/
static UPlugData* uplug_initPlugFromEntrypointAndLibrary(UPlugEntrypoint *entrypoint, const char *config, void *lib, const char *sym,
UErrorCode *status) {
@ -558,8 +558,8 @@ uplug_initErrorPlug(const char *libName, const char *sym, const char *config, co
if(U_FAILURE(*status)) return NULL;
plug->pluginStatus = loadStatus;
plug->awaitingLoad = FALSE; /* Won't load. */
plug->dontUnload = TRUE; /* cannot unload. */
plug->awaitingLoad = false; /* Won't load. */
plug->dontUnload = true; /* cannot unload. */
if(sym!=NULL) {
uprv_strncpy(plug->sym, sym, UPLUG_NAME_MAX);
@ -646,7 +646,7 @@ static UBool U_CALLCONV uplug_cleanup(void)
}
/* close other held libs? */
gCurrentLevel = UPLUG_LEVEL_LOW;
return TRUE;
return true;
}
#if U_ENABLE_DYLOAD
@ -678,7 +678,7 @@ static void uplug_loadWaitingPlugs(UErrorCode *status) {
currentLevel = newLevel;
}
}
pluginToLoad->awaitingLoad = FALSE;
pluginToLoad->awaitingLoad = false;
}
}
}
@ -694,7 +694,7 @@ static void uplug_loadWaitingPlugs(UErrorCode *status) {
} else {
uplug_loadPlug(pluginToLoad, &subStatus);
}
pluginToLoad->awaitingLoad = FALSE;
pluginToLoad->awaitingLoad = false;
}
}

View File

@ -1,4 +0,0 @@
icuplug.o icuplug.d : icuplug.cpp unicode/icuplug.h unicode/utypes.h \
unicode/umachine.h unicode/ptypes.h unicode/platform.h \
unicode/uconfig.h unicode/uvernum.h unicode/urename.h \
unicode/uversion.h

View File

@ -67,9 +67,9 @@ LoadedNormalizer2Impl::isAcceptable(void * /*context*/,
) {
// Normalizer2Impl *me=(Normalizer2Impl *)context;
// uprv_memcpy(me->dataVersion, pInfo->dataVersion, 4);
return TRUE;
return true;
} else {
return FALSE;
return false;
}
}
@ -134,14 +134,14 @@ U_CDECL_END
#if !NORM2_HARDCODE_NFC_DATA
static Norm2AllModes *nfcSingleton;
static icu::UInitOnce nfcInitOnce = U_INITONCE_INITIALIZER;
static icu::UInitOnce nfcInitOnce {};
#endif
static Norm2AllModes *nfkcSingleton;
static icu::UInitOnce nfkcInitOnce = U_INITONCE_INITIALIZER;
static icu::UInitOnce nfkcInitOnce {};
static Norm2AllModes *nfkc_cfSingleton;
static icu::UInitOnce nfkc_cfInitOnce = U_INITONCE_INITIALIZER;
static icu::UInitOnce nfkc_cfInitOnce {};
static UHashtable *cache=NULL;
@ -157,7 +157,7 @@ static void U_CALLCONV initSingletons(const char *what, UErrorCode &errorCode) {
} else if (uprv_strcmp(what, "nfkc_cf") == 0) {
nfkc_cfSingleton = Norm2AllModes::createInstance(NULL, "nfkc_cf", errorCode);
} else {
UPRV_UNREACHABLE; // Unknown singleton
UPRV_UNREACHABLE_EXIT; // Unknown singleton
}
ucln_common_registerCleanup(UCLN_COMMON_LOADED_NORMALIZER2, uprv_loaded_normalizer2_cleanup);
}
@ -185,7 +185,7 @@ static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup() {
uhash_close(cache);
cache=NULL;
return TRUE;
return true;
}
U_CDECL_END

View File

@ -1,15 +0,0 @@
loadednormalizer2impl.o loadednormalizer2impl.d : loadednormalizer2impl.cpp unicode/utypes.h \
unicode/umachine.h unicode/ptypes.h unicode/platform.h \
unicode/uconfig.h unicode/uvernum.h unicode/urename.h \
unicode/uversion.h unicode/udata.h unicode/localpointer.h \
unicode/normalizer2.h unicode/stringpiece.h unicode/uobject.h \
unicode/std_string.h unicode/uniset.h unicode/ucpmap.h \
unicode/unifilt.h unicode/unifunct.h unicode/unimatch.h \
unicode/unistr.h unicode/char16ptr.h unicode/rep.h \
unicode/bytestream.h unicode/uset.h unicode/uchar.h \
unicode/stringoptions.h unicode/unorm2.h unicode/ucptrie.h \
unicode/utf8.h unicode/utf.h unicode/unorm.h unicode/uiter.h cstring.h \
cmemory.h mutex.h umutex.h unicode/uclean.h putilimp.h unicode/putil.h \
norm2allmodes.h unicode/edits.h cpputils.h normalizer2impl.h \
unicode/utf16.h udataswp.h uset_imp.h uassert.h ucln_cmn.h ucln.h \
uhash.h uelement.h

View File

@ -15,7 +15,7 @@ U_NAMESPACE_BEGIN
#define UPRV_ISDIGIT(c) (((c) >= '0') && ((c) <= '9'))
#define UPRV_ISALPHANUM(c) (uprv_isASCIILetter(c) || UPRV_ISDIGIT(c) )
const char* kAttributeKey = "attribute";
constexpr const char* kAttributeKey = "attribute";
static bool _isExtensionSubtags(char key, const char* s, int32_t len) {
switch (uprv_tolower(key)) {
@ -228,7 +228,7 @@ LocaleBuilder& LocaleBuilder::setExtension(char key, StringPiece value)
return *this;
}
if (extensions_ == nullptr) {
extensions_ = new Locale();
extensions_ = Locale::getRoot().clone();
if (extensions_ == nullptr) {
status_ = U_MEMORY_ALLOCATION_ERROR;
return *this;
@ -259,11 +259,11 @@ LocaleBuilder& LocaleBuilder::setUnicodeLocaleKeyword(
return *this;
}
if (extensions_ == nullptr) {
extensions_ = new Locale();
}
if (extensions_ == nullptr) {
status_ = U_MEMORY_ALLOCATION_ERROR;
return *this;
extensions_ = Locale::getRoot().clone();
if (extensions_ == nullptr) {
status_ = U_MEMORY_ALLOCATION_ERROR;
return *this;
}
}
extensions_->setUnicodeKeywordValue(key, type, status_);
return *this;
@ -280,7 +280,7 @@ LocaleBuilder& LocaleBuilder::addUnicodeLocaleAttribute(
return *this;
}
if (extensions_ == nullptr) {
extensions_ = new Locale();
extensions_ = Locale::getRoot().clone();
if (extensions_ == nullptr) {
status_ = U_MEMORY_ALLOCATION_ERROR;
return *this;
@ -415,7 +415,7 @@ void LocaleBuilder::copyExtensionsFrom(const Locale& src, UErrorCode& errorCode)
return;
}
if (extensions_ == nullptr) {
extensions_ = new Locale();
extensions_ = Locale::getRoot().clone();
if (extensions_ == nullptr) {
status_ = U_MEMORY_ALLOCATION_ERROR;
return;
@ -459,7 +459,7 @@ Locale LocaleBuilder::build(UErrorCode& errorCode)
UBool LocaleBuilder::copyErrorTo(UErrorCode &outErrorCode) const {
if (U_FAILURE(outErrorCode)) {
// Do not overwrite the older error code
return TRUE;
return true;
}
outErrorCode = status_;
return U_FAILURE(outErrorCode);

View File

@ -1,9 +0,0 @@
localebuilder.o localebuilder.d : localebuilder.cpp bytesinkutil.h unicode/utypes.h \
unicode/umachine.h unicode/ptypes.h unicode/platform.h \
unicode/uconfig.h unicode/uvernum.h unicode/urename.h \
unicode/uversion.h unicode/bytestream.h unicode/uobject.h \
unicode/std_string.h unicode/edits.h cmemory.h unicode/localpointer.h \
uassert.h charstr.h unicode/unistr.h unicode/char16ptr.h unicode/rep.h \
unicode/stringpiece.h cstring.h ulocimp.h unicode/uloc.h \
unicode/uenum.h unicode/localebuilder.h unicode/locid.h \
unicode/strenum.h unicode/putil.h unicode/localematcher.h

View File

@ -60,7 +60,7 @@ LocaleMatcher::Result::Result(LocaleMatcher::Result &&src) U_NOEXCEPT :
if (desiredIsOwned) {
src.desiredLocale = nullptr;
src.desiredIndex = -1;
src.desiredIsOwned = FALSE;
src.desiredIsOwned = false;
}
}
@ -82,7 +82,7 @@ LocaleMatcher::Result &LocaleMatcher::Result::operator=(LocaleMatcher::Result &&
if (desiredIsOwned) {
src.desiredLocale = nullptr;
src.desiredIndex = -1;
src.desiredIsOwned = FALSE;
src.desiredIsOwned = false;
}
return *this;
}
@ -168,12 +168,9 @@ void LocaleMatcher::Builder::clearSupportedLocales() {
bool LocaleMatcher::Builder::ensureSupportedLocaleVector() {
if (U_FAILURE(errorCode_)) { return false; }
if (supportedLocales_ != nullptr) { return true; }
supportedLocales_ = new UVector(uprv_deleteUObject, nullptr, errorCode_);
LocalPointer<UVector> lpSupportedLocales(new UVector(uprv_deleteUObject, nullptr, errorCode_), errorCode_);
if (U_FAILURE(errorCode_)) { return false; }
if (supportedLocales_ == nullptr) {
errorCode_ = U_MEMORY_ALLOCATION_ERROR;
return false;
}
supportedLocales_ = lpSupportedLocales.orphan();
return true;
}
@ -187,9 +184,8 @@ LocaleMatcher::Builder &LocaleMatcher::Builder::setSupportedLocalesFromListStrin
for (int32_t i = 0; i < length; ++i) {
Locale *locale = list.orphanLocaleAt(i);
if (locale == nullptr) { continue; }
supportedLocales_->addElement(locale, errorCode_);
supportedLocales_->adoptElement(locale, errorCode_);
if (U_FAILURE(errorCode_)) {
delete locale;
break;
}
}
@ -197,35 +193,21 @@ LocaleMatcher::Builder &LocaleMatcher::Builder::setSupportedLocalesFromListStrin
}
LocaleMatcher::Builder &LocaleMatcher::Builder::setSupportedLocales(Locale::Iterator &locales) {
if (U_FAILURE(errorCode_)) { return *this; }
clearSupportedLocales();
if (!ensureSupportedLocaleVector()) { return *this; }
while (locales.hasNext()) {
const Locale &locale = locales.next();
Locale *clone = locale.clone();
if (clone == nullptr) {
errorCode_ = U_MEMORY_ALLOCATION_ERROR;
break;
}
supportedLocales_->addElement(clone, errorCode_);
if (U_FAILURE(errorCode_)) {
delete clone;
break;
if (ensureSupportedLocaleVector()) {
clearSupportedLocales();
while (locales.hasNext() && U_SUCCESS(errorCode_)) {
const Locale &locale = locales.next();
LocalPointer<Locale> clone (locale.clone(), errorCode_);
supportedLocales_->adoptElement(clone.orphan(), errorCode_);
}
}
return *this;
}
LocaleMatcher::Builder &LocaleMatcher::Builder::addSupportedLocale(const Locale &locale) {
if (!ensureSupportedLocaleVector()) { return *this; }
Locale *clone = locale.clone();
if (clone == nullptr) {
errorCode_ = U_MEMORY_ALLOCATION_ERROR;
return *this;
}
supportedLocales_->addElement(clone, errorCode_);
if (U_FAILURE(errorCode_)) {
delete clone;
if (ensureSupportedLocaleVector()) {
LocalPointer<Locale> clone(locale.clone(), errorCode_);
supportedLocales_->adoptElement(clone.orphan(), errorCode_);
}
return *this;
}
@ -305,10 +287,10 @@ LocaleMatcher::Builder &LocaleMatcher::Builder::internalSetThresholdDistance(int
#endif
UBool LocaleMatcher::Builder::copyErrorTo(UErrorCode &outErrorCode) const {
if (U_FAILURE(outErrorCode)) { return TRUE; }
if (U_SUCCESS(errorCode_)) { return FALSE; }
if (U_FAILURE(outErrorCode)) { return true; }
if (U_SUCCESS(errorCode_)) { return false; }
outErrorCode = errorCode_;
return TRUE;
return true;
}
LocaleMatcher LocaleMatcher::Builder::build(UErrorCode &errorCode) const {
@ -345,9 +327,8 @@ UBool compareLSRs(const UHashTok t1, const UHashTok t2) {
int32_t LocaleMatcher::putIfAbsent(const LSR &lsr, int32_t i, int32_t suppLength,
UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) { return suppLength; }
int32_t index = uhash_geti(supportedLsrToIndex, &lsr);
if (index == 0) {
uhash_puti(supportedLsrToIndex, const_cast<LSR *>(&lsr), i + 1, &errorCode);
if (!uhash_containsKey(supportedLsrToIndex, &lsr)) {
uhash_putiAllowZero(supportedLsrToIndex, const_cast<LSR *>(&lsr), i, &errorCode);
if (U_SUCCESS(errorCode)) {
supportedLSRs[suppLength] = &lsr;
supportedIndexes[suppLength++] = i;
@ -651,30 +632,30 @@ const Locale *LocaleMatcher::getBestMatchForListString(
LocaleMatcher::Result LocaleMatcher::getBestMatchResult(
const Locale &desiredLocale, UErrorCode &errorCode) const {
if (U_FAILURE(errorCode)) {
return Result(nullptr, defaultLocale, -1, -1, FALSE);
return Result(nullptr, defaultLocale, -1, -1, false);
}
int32_t suppIndex = getBestSuppIndex(
getMaximalLsrOrUnd(likelySubtags, desiredLocale, errorCode),
nullptr, errorCode);
if (U_FAILURE(errorCode) || suppIndex < 0) {
return Result(nullptr, defaultLocale, -1, -1, FALSE);
return Result(nullptr, defaultLocale, -1, -1, false);
} else {
return Result(&desiredLocale, supportedLocales[suppIndex], 0, suppIndex, FALSE);
return Result(&desiredLocale, supportedLocales[suppIndex], 0, suppIndex, false);
}
}
LocaleMatcher::Result LocaleMatcher::getBestMatchResult(
Locale::Iterator &desiredLocales, UErrorCode &errorCode) const {
if (U_FAILURE(errorCode) || !desiredLocales.hasNext()) {
return Result(nullptr, defaultLocale, -1, -1, FALSE);
return Result(nullptr, defaultLocale, -1, -1, false);
}
LocaleLsrIterator lsrIter(likelySubtags, desiredLocales, ULOCMATCH_TEMPORARY_LOCALES);
int32_t suppIndex = getBestSuppIndex(lsrIter.next(errorCode), &lsrIter, errorCode);
if (U_FAILURE(errorCode) || suppIndex < 0) {
return Result(nullptr, defaultLocale, -1, -1, FALSE);
return Result(nullptr, defaultLocale, -1, -1, false);
} else {
return Result(lsrIter.orphanRemembered(), supportedLocales[suppIndex],
lsrIter.getBestDesiredIndex(), suppIndex, TRUE);
lsrIter.getBestDesiredIndex(), suppIndex, true);
}
}
@ -685,12 +666,11 @@ int32_t LocaleMatcher::getBestSuppIndex(LSR desiredLSR, LocaleLsrIterator *remai
int32_t bestSupportedLsrIndex = -1;
for (int32_t bestShiftedDistance = LocaleDistance::shiftDistance(thresholdDistance);;) {
// Quick check for exact maximized LSR.
// Returns suppIndex+1 where 0 means not found.
if (supportedLsrToIndex != nullptr) {
desiredLSR.setHashCode();
int32_t index = uhash_geti(supportedLsrToIndex, &desiredLSR);
if (index != 0) {
int32_t suppIndex = index - 1;
UBool found = false;
int32_t suppIndex = uhash_getiAndFound(supportedLsrToIndex, &desiredLSR, &found);
if (found) {
if (remainingIter != nullptr) {
remainingIter->rememberCurrent(desiredIndex, errorCode);
}

View File

@ -1,11 +0,0 @@
localematcher.o localematcher.d : localematcher.cpp unicode/utypes.h unicode/umachine.h \
unicode/ptypes.h unicode/platform.h unicode/uconfig.h \
unicode/uvernum.h unicode/urename.h unicode/uversion.h \
unicode/localebuilder.h unicode/locid.h unicode/bytestream.h \
unicode/uobject.h unicode/std_string.h unicode/localpointer.h \
unicode/strenum.h unicode/unistr.h unicode/char16ptr.h unicode/rep.h \
unicode/stringpiece.h unicode/putil.h unicode/uloc.h unicode/uenum.h \
unicode/localematcher.h cstring.h cmemory.h localeprioritylist.h \
loclikelysubtags.h unicode/bytestrie.h unicode/ustringtrie.h \
unicode/ures.h charstrmap.h uhash.h uelement.h lsr.h locdistance.h \
uassert.h ustr_imp.h unicode/utf8.h unicode/utf.h uvector.h uarrsort.h

View File

@ -187,17 +187,18 @@ bool LocalePriorityList::add(const Locale &locale, int32_t weight, UErrorCode &e
if (U_FAILURE(errorCode)) { return false; }
}
LocalPointer<Locale> clone;
int32_t index = uhash_geti(map, &locale);
if (index != 0) {
UBool found = false;
int32_t index = uhash_getiAndFound(map, &locale, &found);
if (found) {
// Duplicate: Remove the old item and append it anew.
LocaleAndWeight &lw = list->array[index - 1];
LocaleAndWeight &lw = list->array[index];
clone.adoptInstead(lw.locale);
lw.locale = nullptr;
lw.weight = 0;
++numRemoved;
}
if (weight <= 0) { // do not add q=0
if (index != 0) {
if (found) {
// Not strictly necessary but cleaner.
uhash_removei(map, &locale);
}
@ -217,7 +218,7 @@ bool LocalePriorityList::add(const Locale &locale, int32_t weight, UErrorCode &e
return false;
}
}
uhash_puti(map, clone.getAlias(), listLength + 1, &errorCode);
uhash_putiAllowZero(map, clone.getAlias(), listLength, &errorCode);
if (U_FAILURE(errorCode)) { return false; }
LocaleAndWeight &lw = list->array[listLength];
lw.locale = clone.orphan();
@ -233,7 +234,7 @@ void LocalePriorityList::sort(UErrorCode &errorCode) {
// The comparator forces a stable sort via the item index.
if (U_FAILURE(errorCode) || getLength() <= 1 || !hasWeights) { return; }
uprv_sortArray(list->array.getAlias(), listLength, sizeof(LocaleAndWeight),
compareLocaleAndWeight, nullptr, FALSE, &errorCode);
compareLocaleAndWeight, nullptr, false, &errorCode);
}
U_NAMESPACE_END

View File

@ -1,9 +0,0 @@
localeprioritylist.o localeprioritylist.d : localeprioritylist.cpp unicode/utypes.h \
unicode/umachine.h unicode/ptypes.h unicode/platform.h \
unicode/uconfig.h unicode/uvernum.h unicode/urename.h \
unicode/uversion.h unicode/localpointer.h unicode/locid.h \
unicode/bytestream.h unicode/uobject.h unicode/std_string.h \
unicode/strenum.h unicode/unistr.h unicode/char16ptr.h unicode/rep.h \
unicode/stringpiece.h unicode/putil.h unicode/uloc.h unicode/uenum.h \
charstr.h cmemory.h localeprioritylist.h uarrsort.h uassert.h uhash.h \
uelement.h

Some files were not shown because too many files have changed in this diff Show More