[*] i hate this shit lib
This commit is contained in:
parent
02aa1a021c
commit
7910db6d3d
0
build_bin/genbrk
Executable file → Normal file
0
build_bin/genbrk
Executable file → Normal file
0
build_bin/genccode
Executable file → Normal file
0
build_bin/genccode
Executable file → Normal file
0
build_bin/gencfu
Executable file → Normal file
0
build_bin/gencfu
Executable file → Normal file
0
build_bin/gencmn
Executable file → Normal file
0
build_bin/gencmn
Executable file → Normal file
0
build_bin/gencnval
Executable file → Normal file
0
build_bin/gencnval
Executable file → Normal file
0
build_bin/gendict
Executable file → Normal file
0
build_bin/gendict
Executable file → Normal file
0
build_bin/gennorm2
Executable file → Normal file
0
build_bin/gennorm2
Executable file → Normal file
0
build_bin/genrb
Executable file → Normal file
0
build_bin/genrb
Executable file → Normal file
0
build_bin/gensprep
Executable file → Normal file
0
build_bin/gensprep
Executable file → Normal file
0
build_bin/icupkg
Executable file → Normal file
0
build_bin/icupkg
Executable file → Normal file
0
build_bin/makeconv
Executable file → Normal file
0
build_bin/makeconv
Executable file → Normal file
0
build_bin/pkgdata
Executable file → Normal file
0
build_bin/pkgdata
Executable file → Normal file
@ -1 +0,0 @@
|
||||
libicudata.so.68.1
|
1
build_lib/libicudata.so
Normal file
1
build_lib/libicudata.so
Normal file
@ -0,0 +1 @@
|
||||
libicudata.so.68.1
|
@ -1 +0,0 @@
|
||||
libicudata.so.68.1
|
1
build_lib/libicudata.so.68
Normal file
1
build_lib/libicudata.so.68
Normal file
@ -0,0 +1 @@
|
||||
libicudata.so.68.1
|
0
build_lib/libicudata.so.68.1
Executable file → Normal file
0
build_lib/libicudata.so.68.1
Executable file → Normal file
@ -1 +0,0 @@
|
||||
libicui18n.so.68.1
|
1
build_lib/libicui18n.so
Normal file
1
build_lib/libicui18n.so
Normal file
@ -0,0 +1 @@
|
||||
libicui18n.so.68.1
|
@ -1 +0,0 @@
|
||||
libicui18n.so.68.1
|
1
build_lib/libicui18n.so.68
Normal file
1
build_lib/libicui18n.so.68
Normal file
@ -0,0 +1 @@
|
||||
libicui18n.so.68.1
|
0
build_lib/libicui18n.so.68.1
Executable file → Normal file
0
build_lib/libicui18n.so.68.1
Executable file → Normal file
@ -1 +0,0 @@
|
||||
libicutu.so.68.1
|
1
build_lib/libicutu.so
Normal file
1
build_lib/libicutu.so
Normal file
@ -0,0 +1 @@
|
||||
libicutu.so.68.1
|
@ -1 +0,0 @@
|
||||
libicutu.so.68.1
|
1
build_lib/libicutu.so.68
Normal file
1
build_lib/libicutu.so.68
Normal file
@ -0,0 +1 @@
|
||||
libicutu.so.68.1
|
0
build_lib/libicutu.so.68.1
Executable file → Normal file
0
build_lib/libicutu.so.68.1
Executable file → Normal file
@ -1 +0,0 @@
|
||||
libicuuc.so.68.1
|
1
build_lib/libicuuc.so
Normal file
1
build_lib/libicuuc.so
Normal file
@ -0,0 +1 @@
|
||||
libicuuc.so.68.1
|
@ -1 +0,0 @@
|
||||
libicuuc.so.68.1
|
1
build_lib/libicuuc.so.68
Normal file
1
build_lib/libicuuc.so.68
Normal file
@ -0,0 +1 @@
|
||||
libicuuc.so.68.1
|
0
build_lib/libicuuc.so.68.1
Executable file → Normal file
0
build_lib/libicuuc.so.68.1
Executable file → Normal file
1213
common/BUILD.bazel
Normal file
1213
common/BUILD.bazel
Normal file
File diff suppressed because it is too large
Load Diff
207
common/Makefile
207
common/Makefile
@ -1,207 +0,0 @@
|
||||
# Copyright (C) 2016 and later: Unicode, Inc. and others.
|
||||
# License & terms of use: http://www.unicode.org/copyright.html
|
||||
#******************************************************************************
|
||||
#
|
||||
# Copyright (C) 1999-2016, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#
|
||||
#******************************************************************************
|
||||
## Makefile.in for ICU - icuuc.so
|
||||
## Stephen F. Booth
|
||||
|
||||
## Source directory information
|
||||
srcdir = .
|
||||
top_srcdir = ..
|
||||
|
||||
top_builddir = ..
|
||||
|
||||
## All the flags and other definitions are included here.
|
||||
include $(top_builddir)/icudefs.mk
|
||||
|
||||
## Build directory information
|
||||
subdir = common
|
||||
|
||||
# for service hook
|
||||
LOCALSVC_CPP=localsvc.cpp
|
||||
SVC_HOOK_INC=$(top_builddir)/common/svchook.mk
|
||||
|
||||
## Extra files to remove for 'make clean'
|
||||
CLEANFILES = *~ $(DEPS) $(IMPORT_LIB) $(MIDDLE_IMPORT_LIB) $(FINAL_IMPORT_LIB) $(SVC_HOOK_INC)
|
||||
|
||||
## Target information
|
||||
|
||||
TARGET_STUBNAME=$(COMMON_STUBNAME)
|
||||
|
||||
ifneq ($(ENABLE_STATIC),)
|
||||
TARGET = $(LIBDIR)/$(LIBSICU)$(TARGET_STUBNAME)$(ICULIBSUFFIX).$(A)
|
||||
endif
|
||||
|
||||
ifneq ($(ENABLE_SHARED),)
|
||||
SO_TARGET = $(LIBDIR)/$(LIBICU)$(TARGET_STUBNAME)$(ICULIBSUFFIX).$(SO)
|
||||
ALL_SO_TARGETS = $(SO_TARGET) $(MIDDLE_SO_TARGET) $(FINAL_SO_TARGET) $(SHARED_OBJECT)
|
||||
|
||||
ifeq ($(ENABLE_SO_VERSION_DATA),1)
|
||||
SO_VERSION_DATA = common.res
|
||||
endif
|
||||
|
||||
ifeq ($(OS390BATCH),1)
|
||||
BATCH_TARGET = $(BATCH_COMMON_TARGET)
|
||||
BATCH_LIBS = $(BATCH_LIBICUDT) -lm
|
||||
endif # OS390BATCH
|
||||
|
||||
endif # ENABLE_SHARED
|
||||
|
||||
ALL_TARGETS = $(TARGET) $(ALL_SO_TARGETS) $(BATCH_TARGET)
|
||||
|
||||
DYNAMICCPPFLAGS = $(SHAREDLIBCPPFLAGS)
|
||||
DYNAMICCFLAGS = $(SHAREDLIBCFLAGS)
|
||||
DYNAMICCXXFLAGS = $(SHAREDLIBCXXFLAGS)
|
||||
CFLAGS += $(LIBCFLAGS)
|
||||
CXXFLAGS += $(LIBCXXFLAGS)
|
||||
ifeq ($(OS390BATCH),1)
|
||||
CFLAGS += -WI
|
||||
CXXFLAGS += -WI
|
||||
endif
|
||||
|
||||
CPPFLAGS += -I$(srcdir) $(LIBCPPFLAGS) $(CPPFLAGSICUUC)
|
||||
# we want DEFS here
|
||||
DEFS += -DU_COMMON_IMPLEMENTATION
|
||||
LDFLAGS += $(LDFLAGSICUUC)
|
||||
|
||||
# for plugin configuration
|
||||
CPPFLAGS += "-DDEFAULT_ICU_PLUGINS=\"$(libdir)/icu\" "
|
||||
|
||||
# for icu data location
|
||||
ifeq ($(PKGDATA_MODE),common)
|
||||
CPPFLAGS += "-DU_ICU_DATA_DEFAULT_DIR=\"$(ICUDATA_DIR)\""
|
||||
endif
|
||||
|
||||
# $(LIBICUDT) is either stub data or the real DLL common data.
|
||||
LIBS = $(LIBICUDT) $(DEFAULT_LIBS)
|
||||
|
||||
SOURCES = $(shell cat $(srcdir)/sources.txt)
|
||||
OBJECTS = $(SOURCES:.cpp=.o)
|
||||
|
||||
## Header files to install
|
||||
HEADERS = $(srcdir)/unicode/*.h
|
||||
|
||||
STATIC_OBJECTS = $(OBJECTS:.o=.$(STATIC_O))
|
||||
|
||||
DEPS = $(OBJECTS:.o=.d)
|
||||
|
||||
-include Makefile.local
|
||||
|
||||
-include $(SVC_HOOK_INC)
|
||||
|
||||
|
||||
## List of phony targets
|
||||
.PHONY : all all-local install install-local clean clean-local \
|
||||
distclean distclean-local install-library install-headers dist \
|
||||
dist-local check check-local check-exhaustive
|
||||
|
||||
## Clear suffix list
|
||||
.SUFFIXES :
|
||||
|
||||
## List of standard targets
|
||||
all: all-local
|
||||
install: install-local
|
||||
clean: clean-local
|
||||
distclean : distclean-local
|
||||
dist: dist-local
|
||||
check: all check-local
|
||||
|
||||
check-exhaustive: check
|
||||
|
||||
all-local: $(ALL_TARGETS)
|
||||
|
||||
install-local: install-headers install-library
|
||||
|
||||
install-library: all-local
|
||||
$(MKINSTALLDIRS) $(DESTDIR)$(libdir)
|
||||
ifneq ($(ENABLE_STATIC),)
|
||||
$(INSTALL-L) $(TARGET) $(DESTDIR)$(libdir)
|
||||
endif
|
||||
ifneq ($(ENABLE_SHARED),)
|
||||
# For MinGW, do we want the DLL to go in the bin location?
|
||||
ifeq ($(MINGW_MOVEDLLSTOBINDIR),YES)
|
||||
$(MKINSTALLDIRS) $(DESTDIR)$(bindir)
|
||||
$(INSTALL-L) $(FINAL_SO_TARGET) $(DESTDIR)$(bindir)
|
||||
else
|
||||
$(INSTALL-L) $(FINAL_SO_TARGET) $(DESTDIR)$(libdir)
|
||||
ifneq ($(FINAL_SO_TARGET),$(SO_TARGET))
|
||||
cd $(DESTDIR)$(libdir) && $(RM) $(notdir $(SO_TARGET)) && ln -s $(notdir $(FINAL_SO_TARGET)) $(notdir $(SO_TARGET))
|
||||
ifneq ($(FINAL_SO_TARGET),$(MIDDLE_SO_TARGET))
|
||||
cd $(DESTDIR)$(libdir) && $(RM) $(notdir $(MIDDLE_SO_TARGET)) && ln -s $(notdir $(FINAL_SO_TARGET)) $(notdir $(MIDDLE_SO_TARGET))
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
ifneq ($(IMPORT_LIB_EXT),)
|
||||
$(INSTALL-L) $(FINAL_IMPORT_LIB) $(DESTDIR)$(libdir)
|
||||
ifneq ($(IMPORT_LIB),$(FINAL_IMPORT_LIB))
|
||||
cd $(DESTDIR)$(libdir) && $(RM) $(notdir $(IMPORT_LIB)) && ln -s $(notdir $(FINAL_IMPORT_LIB)) $(notdir $(IMPORT_LIB))
|
||||
endif
|
||||
ifneq ($(MIDDLE_IMPORT_LIB),$(FINAL_IMPORT_LIB))
|
||||
cd $(DESTDIR)$(libdir) && $(RM) $(notdir $(MIDDLE_IMPORT_LIB)) && ln -s $(notdir $(FINAL_IMPORT_LIB)) $(notdir $(MIDDLE_IMPORT_LIB))
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
$(SVC_HOOK_INC):
|
||||
@echo generating $@
|
||||
@-test -f $(top_srcdir)/common/$(LOCALSVC_CPP) && ( echo "have $(LOCALSVC_CPP) - U_LOCAL_SERVICE_HOOK=1" ; \
|
||||
echo 'CPPFLAGS +=-DU_LOCAL_SERVICE_HOOK=1' > $@ ; \
|
||||
echo 'OBJECTS += $(LOCALSVC_CPP:%.cpp=%.o)' >> $@ \
|
||||
) ; true
|
||||
@echo "# Autogenerated by Makefile" >> $@
|
||||
|
||||
install-headers:
|
||||
$(MKINSTALLDIRS) $(DESTDIR)$(includedir)/unicode
|
||||
@for file in $(HEADERS); do \
|
||||
echo "$(INSTALL_DATA) $$file $(DESTDIR)$(includedir)/unicode"; \
|
||||
$(INSTALL_DATA) $$file $(DESTDIR)$(includedir)/unicode || exit; \
|
||||
done
|
||||
|
||||
dist-local:
|
||||
|
||||
clean-local:
|
||||
test -z "$(CLEANFILES)" || $(RMV) $(CLEANFILES)
|
||||
$(RMV) $(OBJECTS) $(STATIC_OBJECTS) $(ALL_TARGETS) $(SO_VERSION_DATA)
|
||||
|
||||
distclean-local: clean-local
|
||||
$(RMV) Makefile icucfg.h $(SVC_HOOK_INC)
|
||||
|
||||
check-local:
|
||||
|
||||
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status $(SVC_HOOK_INC)
|
||||
cd $(top_builddir) \
|
||||
&& CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
|
||||
|
||||
ifneq ($(ENABLE_STATIC),)
|
||||
$(TARGET): $(STATIC_OBJECTS)
|
||||
$(AR) $(ARFLAGS) $(AR_OUTOPT)$@ $^
|
||||
$(RANLIB) $@
|
||||
endif
|
||||
|
||||
ifneq ($(ENABLE_SHARED),)
|
||||
$(SHARED_OBJECT): $(OBJECTS) $(SO_VERSION_DATA)
|
||||
$(SHLIB.cc) $(LD_SONAME) $(OUTOPT)$@ $^ $(LIBS)
|
||||
ifeq ($(ENABLE_RPATH),YES)
|
||||
ifneq ($(wildcard $(libdir)/$(MIDDLE_SO_TARGET)),)
|
||||
$(warning RPATH warning: --enable-rpath means test programs may use existing $(libdir)/$(MIDDLE_SO_TARGET))
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(OS390BATCH),1)
|
||||
$(BATCH_TARGET):$(OBJECTS)
|
||||
$(SHLIB.cc) $(LD_SONAME) $(OUTOPT)$@ $^ $(BATCH_LIBS)
|
||||
endif # OS390BATCH
|
||||
endif # ENABLE_SHARED
|
||||
|
||||
ifeq (,$(MAKECMDGOALS))
|
||||
-include $(DEPS)
|
||||
else
|
||||
ifneq ($(patsubst %clean,,$(MAKECMDGOALS)),)
|
||||
-include $(DEPS)
|
||||
endif
|
||||
endif
|
||||
|
@ -37,23 +37,23 @@ Appendable::appendString(const UChar *s, int32_t length) {
|
||||
UChar c;
|
||||
while((c=*s++)!=0) {
|
||||
if(!appendCodeUnit(c)) {
|
||||
return FALSE;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
} else if(length>0) {
|
||||
const UChar *limit=s+length;
|
||||
do {
|
||||
if(!appendCodeUnit(*s++)) {
|
||||
return FALSE;
|
||||
return false;
|
||||
}
|
||||
} while(s<limit);
|
||||
}
|
||||
return TRUE;
|
||||
return true;
|
||||
}
|
||||
|
||||
UBool
|
||||
Appendable::reserveAppendCapacity(int32_t /*appendCapacity*/) {
|
||||
return TRUE;
|
||||
return true;
|
||||
}
|
||||
|
||||
UChar *
|
||||
|
@ -1,4 +0,0 @@
|
||||
appendable.o appendable.d : appendable.cpp unicode/utypes.h unicode/umachine.h \
|
||||
unicode/ptypes.h unicode/platform.h unicode/uconfig.h \
|
||||
unicode/uvernum.h unicode/urename.h unicode/uversion.h \
|
||||
unicode/appendable.h unicode/uobject.h unicode/utf16.h unicode/utf.h
|
@ -309,9 +309,9 @@ BMPSet::contains(UChar32 c) const {
|
||||
// surrogate or supplementary code point
|
||||
return containsSlow(c, list4kStarts[0xd], list4kStarts[0x11]);
|
||||
} else {
|
||||
// Out-of-range code points get FALSE, consistent with long-standing
|
||||
// Out-of-range code points get false, consistent with long-standing
|
||||
// behavior of UnicodeSet::contains(c).
|
||||
return FALSE;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1,9 +0,0 @@
|
||||
bmpset.o bmpset.d : bmpset.cpp unicode/utypes.h unicode/umachine.h unicode/ptypes.h \
|
||||
unicode/platform.h unicode/uconfig.h unicode/uvernum.h \
|
||||
unicode/urename.h unicode/uversion.h unicode/uniset.h unicode/ucpmap.h \
|
||||
unicode/unifilt.h unicode/unifunct.h unicode/uobject.h \
|
||||
unicode/unimatch.h unicode/unistr.h unicode/char16ptr.h unicode/rep.h \
|
||||
unicode/std_string.h unicode/stringpiece.h unicode/bytestream.h \
|
||||
unicode/uset.h unicode/uchar.h unicode/stringoptions.h \
|
||||
unicode/localpointer.h unicode/utf8.h unicode/utf.h unicode/utf16.h \
|
||||
cmemory.h bmpset.h uassert.h
|
@ -25,6 +25,7 @@
|
||||
#include "brkeng.h"
|
||||
#include "cmemory.h"
|
||||
#include "dictbe.h"
|
||||
#include "lstmbe.h"
|
||||
#include "charstr.h"
|
||||
#include "dictionarydata.h"
|
||||
#include "mutex.h"
|
||||
@ -77,7 +78,10 @@ int32_t
|
||||
UnhandledEngine::findBreaks( UText *text,
|
||||
int32_t /* startPos */,
|
||||
int32_t endPos,
|
||||
UVector32 &/*foundBreaks*/ ) const {
|
||||
UVector32 &/*foundBreaks*/,
|
||||
UBool /* isPhraseBreaking */,
|
||||
UErrorCode &status) const {
|
||||
if (U_FAILURE(status)) return 0;
|
||||
UChar32 c = utext_current32(text);
|
||||
while((int32_t)utext_getNativeIndex(text) < endPos && fHandled->contains(c)) {
|
||||
utext_next32(text); // TODO: recast loop to work with post-increment operations.
|
||||
@ -132,14 +136,13 @@ ICULanguageBreakFactory::getEngineFor(UChar32 c) {
|
||||
static UMutex gBreakEngineMutex;
|
||||
Mutex m(&gBreakEngineMutex);
|
||||
|
||||
if (fEngines == NULL) {
|
||||
UStack *engines = new UStack(_deleteEngine, NULL, status);
|
||||
if (U_FAILURE(status) || engines == NULL) {
|
||||
if (fEngines == nullptr) {
|
||||
LocalPointer<UStack> engines(new UStack(_deleteEngine, nullptr, status), status);
|
||||
if (U_FAILURE(status) ) {
|
||||
// Note: no way to return error code to caller.
|
||||
delete engines;
|
||||
return NULL;
|
||||
return nullptr;
|
||||
}
|
||||
fEngines = engines;
|
||||
fEngines = engines.orphan();
|
||||
} else {
|
||||
int32_t i = fEngines->size();
|
||||
while (--i >= 0) {
|
||||
@ -152,10 +155,10 @@ ICULanguageBreakFactory::getEngineFor(UChar32 c) {
|
||||
|
||||
// We didn't find an engine. Create one.
|
||||
lbe = loadEngineFor(c);
|
||||
if (lbe != NULL) {
|
||||
if (lbe != nullptr) {
|
||||
fEngines->push((void *)lbe, status);
|
||||
}
|
||||
return lbe;
|
||||
return U_SUCCESS(status) ? lbe : nullptr;
|
||||
}
|
||||
|
||||
const LanguageBreakEngine *
|
||||
@ -163,9 +166,26 @@ ICULanguageBreakFactory::loadEngineFor(UChar32 c) {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UScriptCode code = uscript_getScript(c, &status);
|
||||
if (U_SUCCESS(status)) {
|
||||
const LanguageBreakEngine *engine = nullptr;
|
||||
// Try to use LSTM first
|
||||
const LSTMData *data = CreateLSTMDataForScript(code, status);
|
||||
if (U_SUCCESS(status)) {
|
||||
if (data != nullptr) {
|
||||
engine = CreateLSTMBreakEngine(code, data, status);
|
||||
if (U_SUCCESS(status) && engine != nullptr) {
|
||||
return engine;
|
||||
}
|
||||
if (engine != nullptr) {
|
||||
delete engine;
|
||||
engine = nullptr;
|
||||
} else {
|
||||
DeleteLSTMData(data);
|
||||
}
|
||||
}
|
||||
}
|
||||
status = U_ZERO_ERROR; // fallback to dictionary based
|
||||
DictionaryMatcher *m = loadDictionaryMatcherFor(code);
|
||||
if (m != NULL) {
|
||||
const LanguageBreakEngine *engine = NULL;
|
||||
switch(code) {
|
||||
case USCRIPT_THAI:
|
||||
engine = new ThaiBreakEngine(m, status);
|
||||
@ -241,10 +261,10 @@ ICULanguageBreakFactory::loadDictionaryMatcherFor(UScriptCode script) {
|
||||
const UChar *extStart = u_memrchr(dictfname, 0x002e, dictnlength); // last dot
|
||||
if (extStart != NULL) {
|
||||
int32_t len = (int32_t)(extStart - dictfname);
|
||||
ext.appendInvariantChars(UnicodeString(FALSE, extStart + 1, dictnlength - len - 1), status);
|
||||
ext.appendInvariantChars(UnicodeString(false, extStart + 1, dictnlength - len - 1), status);
|
||||
dictnlength = len;
|
||||
}
|
||||
dictnbuf.appendInvariantChars(UnicodeString(FALSE, dictfname, dictnlength), status);
|
||||
dictnbuf.appendInvariantChars(UnicodeString(false, dictfname, dictnlength), status);
|
||||
ures_close(b);
|
||||
|
||||
UDataMemory *file = udata_open(U_ICUDATA_BRKITR, ext.data(), dictnbuf.data(), &status);
|
||||
|
@ -1,15 +0,0 @@
|
||||
brkeng.o brkeng.d : brkeng.cpp unicode/utypes.h unicode/umachine.h unicode/ptypes.h \
|
||||
unicode/platform.h unicode/uconfig.h unicode/uvernum.h \
|
||||
unicode/urename.h unicode/uversion.h unicode/uchar.h \
|
||||
unicode/stringoptions.h unicode/ucpmap.h unicode/uniset.h \
|
||||
unicode/unifilt.h unicode/unifunct.h unicode/uobject.h \
|
||||
unicode/unimatch.h unicode/unistr.h unicode/char16ptr.h unicode/rep.h \
|
||||
unicode/std_string.h unicode/stringpiece.h unicode/bytestream.h \
|
||||
unicode/uset.h unicode/localpointer.h unicode/chariter.h \
|
||||
unicode/ures.h unicode/uloc.h unicode/uenum.h unicode/udata.h \
|
||||
unicode/putil.h unicode/ustring.h unicode/uiter.h unicode/uscript.h \
|
||||
unicode/ucharstrie.h unicode/ustringtrie.h unicode/bytestrie.h \
|
||||
brkeng.h unicode/utext.h cmemory.h dictbe.h uvectr32.h uhash.h \
|
||||
uelement.h uassert.h charstr.h dictionarydata.h udataswp.h mutex.h \
|
||||
umutex.h unicode/uclean.h putilimp.h uvector.h uarrsort.h uresimp.h \
|
||||
uresdata.h resource.h restrace.h ubrkimpl.h
|
@ -68,12 +68,15 @@ class LanguageBreakEngine : public UMemory {
|
||||
* @param startPos The start of the run within the supplied text.
|
||||
* @param endPos The end of the run within the supplied text.
|
||||
* @param foundBreaks A Vector of int32_t to receive the breaks.
|
||||
* @param status Information on any errors encountered.
|
||||
* @return The number of breaks found.
|
||||
*/
|
||||
virtual int32_t findBreaks( UText *text,
|
||||
int32_t startPos,
|
||||
int32_t endPos,
|
||||
UVector32 &foundBreaks ) const = 0;
|
||||
UVector32 &foundBreaks,
|
||||
UBool isPhraseBreaking,
|
||||
UErrorCode &status) const = 0;
|
||||
|
||||
};
|
||||
|
||||
@ -174,7 +177,7 @@ class UnhandledEngine : public LanguageBreakEngine {
|
||||
* @return true if this engine handles the particular character and break
|
||||
* type.
|
||||
*/
|
||||
virtual UBool handles(UChar32 c) const;
|
||||
virtual UBool handles(UChar32 c) const override;
|
||||
|
||||
/**
|
||||
* <p>Find any breaks within a run in the supplied text.</p>
|
||||
@ -185,12 +188,15 @@ class UnhandledEngine : public LanguageBreakEngine {
|
||||
* @param startPos The start of the run within the supplied text.
|
||||
* @param endPos The end of the run within the supplied text.
|
||||
* @param foundBreaks An allocated C array of the breaks found, if any
|
||||
* @param status Information on any errors encountered.
|
||||
* @return The number of breaks found.
|
||||
*/
|
||||
virtual int32_t findBreaks( UText *text,
|
||||
int32_t startPos,
|
||||
int32_t endPos,
|
||||
UVector32 &foundBreaks ) const;
|
||||
UVector32 &foundBreaks,
|
||||
UBool isPhraseBreaking,
|
||||
UErrorCode &status) const override;
|
||||
|
||||
/**
|
||||
* <p>Tell the engine to handle a particular character and break type.</p>
|
||||
@ -243,7 +249,7 @@ class ICULanguageBreakFactory : public LanguageBreakFactory {
|
||||
* sought.
|
||||
* @return A LanguageBreakEngine with the desired characteristics, or 0.
|
||||
*/
|
||||
virtual const LanguageBreakEngine *getEngineFor(UChar32 c);
|
||||
virtual const LanguageBreakEngine *getEngineFor(UChar32 c) override;
|
||||
|
||||
protected:
|
||||
/**
|
||||
|
@ -30,6 +30,7 @@
|
||||
#include "unicode/ures.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "unicode/filteredbrk.h"
|
||||
#include "bytesinkutil.h"
|
||||
#include "ucln_cmn.h"
|
||||
#include "cstring.h"
|
||||
#include "umutex.h"
|
||||
@ -115,7 +116,7 @@ BreakIterator::buildInstance(const Locale& loc, const char *type, UErrorCode &st
|
||||
}
|
||||
|
||||
// Create a RuleBasedBreakIterator
|
||||
result = new RuleBasedBreakIterator(file, status);
|
||||
result = new RuleBasedBreakIterator(file, uprv_strstr(type, "phrase") != NULL, status);
|
||||
|
||||
// If there is a result, set the valid locale and actual locale, and the kind
|
||||
if (U_SUCCESS(status) && result != NULL) {
|
||||
@ -234,7 +235,7 @@ class ICUBreakIteratorFactory : public ICUResourceBundleFactory {
|
||||
public:
|
||||
virtual ~ICUBreakIteratorFactory();
|
||||
protected:
|
||||
virtual UObject* handleCreate(const Locale& loc, int32_t kind, const ICUService* /*service*/, UErrorCode& status) const {
|
||||
virtual UObject* handleCreate(const Locale& loc, int32_t kind, const ICUService* /*service*/, UErrorCode& status) const override {
|
||||
return BreakIterator::makeInstance(loc, kind, status);
|
||||
}
|
||||
};
|
||||
@ -254,11 +255,11 @@ public:
|
||||
|
||||
virtual ~ICUBreakIteratorService();
|
||||
|
||||
virtual UObject* cloneInstance(UObject* instance) const {
|
||||
virtual UObject* cloneInstance(UObject* instance) const override {
|
||||
return ((BreakIterator*)instance)->clone();
|
||||
}
|
||||
|
||||
virtual UObject* handleDefault(const ICUServiceKey& key, UnicodeString* /*actualID*/, UErrorCode& status) const {
|
||||
virtual UObject* handleDefault(const ICUServiceKey& key, UnicodeString* /*actualID*/, UErrorCode& status) const override {
|
||||
LocaleKey& lkey = (LocaleKey&)key;
|
||||
int32_t kind = lkey.kind();
|
||||
Locale loc;
|
||||
@ -266,7 +267,7 @@ public:
|
||||
return BreakIterator::makeInstance(loc, kind, status);
|
||||
}
|
||||
|
||||
virtual UBool isDefault() const {
|
||||
virtual UBool isDefault() const override {
|
||||
return countFactories() == 1;
|
||||
}
|
||||
};
|
||||
@ -278,7 +279,7 @@ ICUBreakIteratorService::~ICUBreakIteratorService() {}
|
||||
// defined in ucln_cmn.h
|
||||
U_NAMESPACE_END
|
||||
|
||||
static icu::UInitOnce gInitOnceBrkiter = U_INITONCE_INITIALIZER;
|
||||
static icu::UInitOnce gInitOnceBrkiter {};
|
||||
static icu::ICULocaleService* gService = NULL;
|
||||
|
||||
|
||||
@ -295,7 +296,7 @@ static UBool U_CALLCONV breakiterator_cleanup(void) {
|
||||
}
|
||||
gInitOnceBrkiter.reset();
|
||||
#endif
|
||||
return TRUE;
|
||||
return true;
|
||||
}
|
||||
U_CDECL_END
|
||||
U_NAMESPACE_BEGIN
|
||||
@ -346,7 +347,7 @@ BreakIterator::unregister(URegistryKey key, UErrorCode& status)
|
||||
}
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
return FALSE;
|
||||
return false;
|
||||
}
|
||||
|
||||
// -------------------------------------
|
||||
@ -408,7 +409,6 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status)
|
||||
if (U_FAILURE(status)) {
|
||||
return NULL;
|
||||
}
|
||||
char lbType[kKeyValueLenMax];
|
||||
|
||||
BreakIterator *result = NULL;
|
||||
switch (kind) {
|
||||
@ -428,18 +428,29 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status)
|
||||
break;
|
||||
case UBRK_LINE:
|
||||
{
|
||||
char lb_lw[kKeyValueLenMax];
|
||||
UTRACE_ENTRY(UTRACE_UBRK_CREATE_LINE);
|
||||
uprv_strcpy(lbType, "line");
|
||||
char lbKeyValue[kKeyValueLenMax] = {0};
|
||||
uprv_strcpy(lb_lw, "line");
|
||||
UErrorCode kvStatus = U_ZERO_ERROR;
|
||||
int32_t kLen = loc.getKeywordValue("lb", lbKeyValue, kKeyValueLenMax, kvStatus);
|
||||
if (U_SUCCESS(kvStatus) && kLen > 0 && (uprv_strcmp(lbKeyValue,"strict")==0 || uprv_strcmp(lbKeyValue,"normal")==0 || uprv_strcmp(lbKeyValue,"loose")==0)) {
|
||||
uprv_strcat(lbType, "_");
|
||||
uprv_strcat(lbType, lbKeyValue);
|
||||
CharString value;
|
||||
CharStringByteSink valueSink(&value);
|
||||
loc.getKeywordValue("lb", valueSink, kvStatus);
|
||||
if (U_SUCCESS(kvStatus) && (value == "strict" || value == "normal" || value == "loose")) {
|
||||
uprv_strcat(lb_lw, "_");
|
||||
uprv_strcat(lb_lw, value.data());
|
||||
}
|
||||
result = BreakIterator::buildInstance(loc, lbType, status);
|
||||
// lw=phrase is only supported in Japanese.
|
||||
if (uprv_strcmp(loc.getLanguage(), "ja") == 0) {
|
||||
value.clear();
|
||||
loc.getKeywordValue("lw", valueSink, kvStatus);
|
||||
if (U_SUCCESS(kvStatus) && value == "phrase") {
|
||||
uprv_strcat(lb_lw, "_");
|
||||
uprv_strcat(lb_lw, value.data());
|
||||
}
|
||||
}
|
||||
result = BreakIterator::buildInstance(loc, lb_lw, status);
|
||||
|
||||
UTRACE_DATA1(UTRACE_INFO, "lb=%s", lbKeyValue);
|
||||
UTRACE_DATA1(UTRACE_INFO, "lb_lw=%s", lb_lw);
|
||||
UTRACE_EXIT_STATUS(status);
|
||||
}
|
||||
break;
|
||||
|
@ -1,16 +0,0 @@
|
||||
brkiter.o brkiter.d : brkiter.cpp unicode/utypes.h unicode/umachine.h \
|
||||
unicode/ptypes.h unicode/platform.h unicode/uconfig.h \
|
||||
unicode/uvernum.h unicode/urename.h unicode/uversion.h unicode/rbbi.h \
|
||||
unicode/brkiter.h unicode/uobject.h unicode/unistr.h \
|
||||
unicode/char16ptr.h unicode/rep.h unicode/std_string.h \
|
||||
unicode/stringpiece.h unicode/bytestream.h unicode/chariter.h \
|
||||
unicode/locid.h unicode/localpointer.h unicode/strenum.h \
|
||||
unicode/putil.h unicode/uloc.h unicode/uenum.h unicode/ubrk.h \
|
||||
unicode/utext.h unicode/uchar.h unicode/stringoptions.h \
|
||||
unicode/ucpmap.h unicode/parseerr.h unicode/umisc.h unicode/udata.h \
|
||||
unicode/schriter.h unicode/uchriter.h unicode/ures.h unicode/ustring.h \
|
||||
unicode/uiter.h unicode/filteredbrk.h ucln_cmn.h ucln.h cstring.h \
|
||||
cmemory.h umutex.h unicode/uclean.h putilimp.h servloc.h hash.h \
|
||||
uhash.h uelement.h uvector.h uarrsort.h serv.h servnotf.h mutex.h \
|
||||
locutil.h locbased.h uresimp.h uresdata.h udataswp.h resource.h \
|
||||
restrace.h uassert.h ubrkimpl.h utracimp.h unicode/utrace.h charstr.h
|
@ -20,7 +20,7 @@ U_NAMESPACE_BEGIN
|
||||
UBool
|
||||
ByteSinkUtil::appendChange(int32_t length, const char16_t *s16, int32_t s16Length,
|
||||
ByteSink &sink, Edits *edits, UErrorCode &errorCode) {
|
||||
if (U_FAILURE(errorCode)) { return FALSE; }
|
||||
if (U_FAILURE(errorCode)) { return false; }
|
||||
char scratch[200];
|
||||
int32_t s8Length = 0;
|
||||
for (int32_t i = 0; i < s16Length;) {
|
||||
@ -44,7 +44,7 @@ ByteSinkUtil::appendChange(int32_t length, const char16_t *s16, int32_t s16Lengt
|
||||
}
|
||||
if (j > (INT32_MAX - s8Length)) {
|
||||
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return FALSE;
|
||||
return false;
|
||||
}
|
||||
sink.Append(buffer, j);
|
||||
s8Length += j;
|
||||
@ -52,17 +52,17 @@ ByteSinkUtil::appendChange(int32_t length, const char16_t *s16, int32_t s16Lengt
|
||||
if (edits != nullptr) {
|
||||
edits->addReplace(length, s8Length);
|
||||
}
|
||||
return TRUE;
|
||||
return true;
|
||||
}
|
||||
|
||||
UBool
|
||||
ByteSinkUtil::appendChange(const uint8_t *s, const uint8_t *limit,
|
||||
const char16_t *s16, int32_t s16Length,
|
||||
ByteSink &sink, Edits *edits, UErrorCode &errorCode) {
|
||||
if (U_FAILURE(errorCode)) { return FALSE; }
|
||||
if (U_FAILURE(errorCode)) { return false; }
|
||||
if ((limit - s) > INT32_MAX) {
|
||||
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return FALSE;
|
||||
return false;
|
||||
}
|
||||
return appendChange((int32_t)(limit - s), s16, s16Length, sink, edits, errorCode);
|
||||
}
|
||||
@ -109,16 +109,16 @@ UBool
|
||||
ByteSinkUtil::appendUnchanged(const uint8_t *s, const uint8_t *limit,
|
||||
ByteSink &sink, uint32_t options, Edits *edits,
|
||||
UErrorCode &errorCode) {
|
||||
if (U_FAILURE(errorCode)) { return FALSE; }
|
||||
if (U_FAILURE(errorCode)) { return false; }
|
||||
if ((limit - s) > INT32_MAX) {
|
||||
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return FALSE;
|
||||
return false;
|
||||
}
|
||||
int32_t length = (int32_t)(limit - s);
|
||||
if (length > 0) {
|
||||
appendNonEmptyUnchanged(s, length, sink, options, edits);
|
||||
}
|
||||
return TRUE;
|
||||
return true;
|
||||
}
|
||||
|
||||
CharStringByteSink::CharStringByteSink(CharString* dest) : dest_(*dest) {
|
||||
|
@ -1,8 +0,0 @@
|
||||
bytesinkutil.o bytesinkutil.d : bytesinkutil.cpp unicode/utypes.h unicode/umachine.h \
|
||||
unicode/ptypes.h unicode/platform.h unicode/uconfig.h \
|
||||
unicode/uvernum.h unicode/urename.h unicode/uversion.h \
|
||||
unicode/bytestream.h unicode/uobject.h unicode/std_string.h \
|
||||
unicode/edits.h unicode/stringoptions.h unicode/utf8.h unicode/utf.h \
|
||||
unicode/utf16.h bytesinkutil.h cmemory.h unicode/localpointer.h \
|
||||
uassert.h charstr.h unicode/unistr.h unicode/char16ptr.h unicode/rep.h \
|
||||
unicode/stringpiece.h
|
@ -4,6 +4,9 @@
|
||||
// bytesinkutil.h
|
||||
// created: 2017sep14 Markus W. Scherer
|
||||
|
||||
#ifndef BYTESINKUTIL_H
|
||||
#define BYTESINKUTIL_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/bytestream.h"
|
||||
#include "unicode/edits.h"
|
||||
@ -81,3 +84,5 @@ private:
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif //BYTESINKUTIL_H
|
||||
|
@ -30,14 +30,14 @@ void ByteSink::Flush() {}
|
||||
|
||||
CheckedArrayByteSink::CheckedArrayByteSink(char* outbuf, int32_t capacity)
|
||||
: outbuf_(outbuf), capacity_(capacity < 0 ? 0 : capacity),
|
||||
size_(0), appended_(0), overflowed_(FALSE) {
|
||||
size_(0), appended_(0), overflowed_(false) {
|
||||
}
|
||||
|
||||
CheckedArrayByteSink::~CheckedArrayByteSink() {}
|
||||
|
||||
CheckedArrayByteSink& CheckedArrayByteSink::Reset() {
|
||||
size_ = appended_ = 0;
|
||||
overflowed_ = FALSE;
|
||||
overflowed_ = false;
|
||||
return *this;
|
||||
}
|
||||
|
||||
@ -48,14 +48,14 @@ void CheckedArrayByteSink::Append(const char* bytes, int32_t n) {
|
||||
if (n > (INT32_MAX - appended_)) {
|
||||
// TODO: Report as integer overflow, not merely buffer overflow.
|
||||
appended_ = INT32_MAX;
|
||||
overflowed_ = TRUE;
|
||||
overflowed_ = true;
|
||||
return;
|
||||
}
|
||||
appended_ += n;
|
||||
int32_t available = capacity_ - size_;
|
||||
if (n > available) {
|
||||
n = available;
|
||||
overflowed_ = TRUE;
|
||||
overflowed_ = true;
|
||||
}
|
||||
if (n > 0 && bytes != (outbuf_ + size_)) {
|
||||
uprv_memcpy(outbuf_ + size_, bytes, n);
|
||||
|
@ -1,5 +0,0 @@
|
||||
bytestream.o bytestream.d : bytestream.cpp unicode/utypes.h unicode/umachine.h \
|
||||
unicode/ptypes.h unicode/platform.h unicode/uconfig.h \
|
||||
unicode/uvernum.h unicode/urename.h unicode/uversion.h \
|
||||
unicode/bytestream.h unicode/uobject.h unicode/std_string.h cmemory.h \
|
||||
unicode/localpointer.h
|
@ -337,13 +337,13 @@ BytesTrie::findUniqueValueFromBranch(const uint8_t *pos, int32_t length,
|
||||
}
|
||||
} else {
|
||||
uniqueValue=value;
|
||||
haveUniqueValue=TRUE;
|
||||
haveUniqueValue=true;
|
||||
}
|
||||
} else {
|
||||
if(!findUniqueValue(pos+value, haveUniqueValue, uniqueValue)) {
|
||||
return NULL;
|
||||
}
|
||||
haveUniqueValue=TRUE;
|
||||
haveUniqueValue=true;
|
||||
}
|
||||
} while(--length>1);
|
||||
return pos+1; // ignore the last comparison byte
|
||||
@ -359,9 +359,9 @@ BytesTrie::findUniqueValue(const uint8_t *pos, UBool haveUniqueValue, int32_t &u
|
||||
}
|
||||
pos=findUniqueValueFromBranch(pos, node+1, haveUniqueValue, uniqueValue);
|
||||
if(pos==NULL) {
|
||||
return FALSE;
|
||||
return false;
|
||||
}
|
||||
haveUniqueValue=TRUE;
|
||||
haveUniqueValue=true;
|
||||
} else if(node<kMinValueLead) {
|
||||
// linear-match node
|
||||
pos+=node-kMinLinearMatch+1; // Ignore the match bytes.
|
||||
@ -370,14 +370,14 @@ BytesTrie::findUniqueValue(const uint8_t *pos, UBool haveUniqueValue, int32_t &u
|
||||
int32_t value=readValue(pos, node>>1);
|
||||
if(haveUniqueValue) {
|
||||
if(value!=uniqueValue) {
|
||||
return FALSE;
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
uniqueValue=value;
|
||||
haveUniqueValue=TRUE;
|
||||
haveUniqueValue=true;
|
||||
}
|
||||
if(isFinal) {
|
||||
return TRUE;
|
||||
return true;
|
||||
}
|
||||
pos=skipValue(pos, node);
|
||||
}
|
||||
|
@ -1,6 +0,0 @@
|
||||
bytestrie.o bytestrie.d : bytestrie.cpp unicode/utypes.h unicode/umachine.h \
|
||||
unicode/ptypes.h unicode/platform.h unicode/uconfig.h \
|
||||
unicode/uvernum.h unicode/urename.h unicode/uversion.h \
|
||||
unicode/bytestream.h unicode/uobject.h unicode/std_string.h \
|
||||
unicode/bytestrie.h unicode/stringpiece.h unicode/ustringtrie.h \
|
||||
cmemory.h unicode/localpointer.h uassert.h
|
@ -231,7 +231,7 @@ BytesTrieBuilder::buildBytes(UStringTrieBuildOption buildOption, UErrorCode &err
|
||||
}
|
||||
uprv_sortArray(elements, elementsLength, (int32_t)sizeof(BytesTrieElement),
|
||||
compareElementStrings, strings,
|
||||
FALSE, // need not be a stable sort
|
||||
false, // need not be a stable sort
|
||||
&errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return;
|
||||
@ -343,13 +343,13 @@ BytesTrieBuilder::BTLinearMatchNode::BTLinearMatchNode(const char *bytes, int32_
|
||||
static_cast<uint32_t>(hash)*37u + static_cast<uint32_t>(ustr_hashCharsN(bytes, len)));
|
||||
}
|
||||
|
||||
UBool
|
||||
bool
|
||||
BytesTrieBuilder::BTLinearMatchNode::operator==(const Node &other) const {
|
||||
if(this==&other) {
|
||||
return TRUE;
|
||||
return true;
|
||||
}
|
||||
if(!LinearMatchNode::operator==(other)) {
|
||||
return FALSE;
|
||||
return false;
|
||||
}
|
||||
const BTLinearMatchNode &o=(const BTLinearMatchNode &)other;
|
||||
return 0==uprv_memcmp(s, o.s, length);
|
||||
@ -375,7 +375,7 @@ BytesTrieBuilder::createLinearMatchNode(int32_t i, int32_t byteIndex, int32_t le
|
||||
UBool
|
||||
BytesTrieBuilder::ensureCapacity(int32_t length) {
|
||||
if(bytes==NULL) {
|
||||
return FALSE; // previous memory allocation had failed
|
||||
return false; // previous memory allocation had failed
|
||||
}
|
||||
if(length>bytesCapacity) {
|
||||
int32_t newCapacity=bytesCapacity;
|
||||
@ -388,7 +388,7 @@ BytesTrieBuilder::ensureCapacity(int32_t length) {
|
||||
uprv_free(bytes);
|
||||
bytes=NULL;
|
||||
bytesCapacity=0;
|
||||
return FALSE;
|
||||
return false;
|
||||
}
|
||||
uprv_memcpy(newBytes+(newCapacity-bytesLength),
|
||||
bytes+(bytesCapacity-bytesLength), bytesLength);
|
||||
@ -396,7 +396,7 @@ BytesTrieBuilder::ensureCapacity(int32_t length) {
|
||||
bytes=newBytes;
|
||||
bytesCapacity=newCapacity;
|
||||
}
|
||||
return TRUE;
|
||||
return true;
|
||||
}
|
||||
|
||||
int32_t
|
||||
@ -463,7 +463,7 @@ int32_t
|
||||
BytesTrieBuilder::writeValueAndType(UBool hasValue, int32_t value, int32_t node) {
|
||||
int32_t offset=write(node);
|
||||
if(hasValue) {
|
||||
offset=writeValueAndFinal(value, FALSE);
|
||||
offset=writeValueAndFinal(value, false);
|
||||
}
|
||||
return offset;
|
||||
}
|
||||
@ -474,31 +474,39 @@ BytesTrieBuilder::writeDeltaTo(int32_t jumpTarget) {
|
||||
U_ASSERT(i>=0);
|
||||
if(i<=BytesTrie::kMaxOneByteDelta) {
|
||||
return write(i);
|
||||
}
|
||||
} else {
|
||||
char intBytes[5];
|
||||
int32_t length;
|
||||
return write(intBytes, internalEncodeDelta(i, intBytes));
|
||||
}
|
||||
}
|
||||
|
||||
int32_t
|
||||
BytesTrieBuilder::internalEncodeDelta(int32_t i, char intBytes[]) {
|
||||
U_ASSERT(i>=0);
|
||||
if(i<=BytesTrie::kMaxOneByteDelta) {
|
||||
intBytes[0]=(char)i;
|
||||
return 1;
|
||||
}
|
||||
int32_t length=1;
|
||||
if(i<=BytesTrie::kMaxTwoByteDelta) {
|
||||
intBytes[0]=(char)(BytesTrie::kMinTwoByteDeltaLead+(i>>8));
|
||||
length=1;
|
||||
} else {
|
||||
if(i<=BytesTrie::kMaxThreeByteDelta) {
|
||||
intBytes[0]=(char)(BytesTrie::kMinThreeByteDeltaLead+(i>>16));
|
||||
length=2;
|
||||
} else {
|
||||
if(i<=0xffffff) {
|
||||
intBytes[0]=(char)BytesTrie::kFourByteDeltaLead;
|
||||
length=3;
|
||||
} else {
|
||||
intBytes[0]=(char)BytesTrie::kFiveByteDeltaLead;
|
||||
intBytes[1]=(char)(i>>24);
|
||||
length=4;
|
||||
length=2;
|
||||
}
|
||||
intBytes[1]=(char)(i>>16);
|
||||
intBytes[length++]=(char)(i>>16);
|
||||
}
|
||||
intBytes[1]=(char)(i>>8);
|
||||
intBytes[length++]=(char)(i>>8);
|
||||
}
|
||||
intBytes[length++]=(char)i;
|
||||
return write(intBytes, length);
|
||||
return length;
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
@ -1,10 +0,0 @@
|
||||
bytestriebuilder.o bytestriebuilder.d : bytestriebuilder.cpp unicode/utypes.h \
|
||||
unicode/umachine.h unicode/ptypes.h unicode/platform.h \
|
||||
unicode/uconfig.h unicode/uvernum.h unicode/urename.h \
|
||||
unicode/uversion.h unicode/bytestrie.h unicode/stringpiece.h \
|
||||
unicode/uobject.h unicode/std_string.h unicode/ustringtrie.h \
|
||||
unicode/bytestriebuilder.h unicode/stringtriebuilder.h charstr.h \
|
||||
unicode/unistr.h unicode/char16ptr.h unicode/rep.h \
|
||||
unicode/bytestream.h cmemory.h unicode/localpointer.h uhash.h \
|
||||
uelement.h uarrsort.h uassert.h ustr_imp.h unicode/utf8.h \
|
||||
unicode/utf.h
|
@ -101,12 +101,12 @@ BytesTrie::Iterator::hasNext() const { return pos_!=NULL || !stack_->isEmpty();
|
||||
UBool
|
||||
BytesTrie::Iterator::next(UErrorCode &errorCode) {
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return FALSE;
|
||||
return false;
|
||||
}
|
||||
const uint8_t *pos=pos_;
|
||||
if(pos==NULL) {
|
||||
if(stack_->isEmpty()) {
|
||||
return FALSE;
|
||||
return false;
|
||||
}
|
||||
// Pop the state off the stack and continue with the next outbound edge of
|
||||
// the branch node.
|
||||
@ -119,7 +119,7 @@ BytesTrie::Iterator::next(UErrorCode &errorCode) {
|
||||
if(length>1) {
|
||||
pos=branchNext(pos, length, errorCode);
|
||||
if(pos==NULL) {
|
||||
return TRUE; // Reached a final value.
|
||||
return true; // Reached a final value.
|
||||
}
|
||||
} else {
|
||||
str_->append((char)*pos++, errorCode);
|
||||
@ -141,7 +141,7 @@ BytesTrie::Iterator::next(UErrorCode &errorCode) {
|
||||
} else {
|
||||
pos_=skipValue(pos, node);
|
||||
}
|
||||
return TRUE;
|
||||
return true;
|
||||
}
|
||||
if(maxLength_>0 && str_->length()==maxLength_) {
|
||||
return truncateAndStop();
|
||||
@ -152,7 +152,7 @@ BytesTrie::Iterator::next(UErrorCode &errorCode) {
|
||||
}
|
||||
pos=branchNext(pos, node+1, errorCode);
|
||||
if(pos==NULL) {
|
||||
return TRUE; // Reached a final value.
|
||||
return true; // Reached a final value.
|
||||
}
|
||||
} else {
|
||||
// Linear-match node, append length bytes to str_.
|
||||
@ -177,7 +177,7 @@ UBool
|
||||
BytesTrie::Iterator::truncateAndStop() {
|
||||
pos_=NULL;
|
||||
value_=-1; // no real value for str
|
||||
return TRUE;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Branch node, needs to take the first outbound edge and push state for the rest.
|
||||
|
@ -1,8 +0,0 @@
|
||||
bytestrieiterator.o bytestrieiterator.d : bytestrieiterator.cpp unicode/utypes.h \
|
||||
unicode/umachine.h unicode/ptypes.h unicode/platform.h \
|
||||
unicode/uconfig.h unicode/uvernum.h unicode/urename.h \
|
||||
unicode/uversion.h unicode/bytestrie.h unicode/stringpiece.h \
|
||||
unicode/uobject.h unicode/std_string.h unicode/ustringtrie.h charstr.h \
|
||||
unicode/unistr.h unicode/char16ptr.h unicode/rep.h \
|
||||
unicode/bytestream.h cmemory.h unicode/localpointer.h uvectr32.h \
|
||||
uhash.h uelement.h uassert.h
|
@ -119,7 +119,7 @@ UnicodeString CanonicalIterator::getSource() {
|
||||
* Resets the iterator so that one can start again from the beginning.
|
||||
*/
|
||||
void CanonicalIterator::reset() {
|
||||
done = FALSE;
|
||||
done = false;
|
||||
for (int i = 0; i < current_length; ++i) {
|
||||
current[i] = 0;
|
||||
}
|
||||
@ -151,7 +151,7 @@ UnicodeString CanonicalIterator::next() {
|
||||
|
||||
for (i = current_length - 1; ; --i) {
|
||||
if (i < 0) {
|
||||
done = TRUE;
|
||||
done = true;
|
||||
break;
|
||||
}
|
||||
current[i]++;
|
||||
@ -176,7 +176,7 @@ void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &st
|
||||
if(U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
done = FALSE;
|
||||
done = false;
|
||||
|
||||
cleanPieces();
|
||||
|
||||
@ -208,10 +208,10 @@ void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &st
|
||||
goto CleanPartialInitialization;
|
||||
}
|
||||
|
||||
// i should initialy be the number of code units at the
|
||||
// i should initially be the number of code units at the
|
||||
// start of the string
|
||||
i = U16_LENGTH(source.char32At(0));
|
||||
//int32_t i = 1;
|
||||
// int32_t i = 1;
|
||||
// find the segments
|
||||
// This code iterates through the source string and
|
||||
// extracts segments that end up on a codepoint that
|
||||
@ -494,7 +494,7 @@ Hashtable *CanonicalIterator::getEquivalents2(Hashtable *fillinResult, const UCh
|
||||
|
||||
/**
|
||||
* See if the decomposition of cp2 is at segment starting at segmentPos
|
||||
* (with canonical rearrangment!)
|
||||
* (with canonical rearrangement!)
|
||||
* If so, take the remainder, and return the equivalents
|
||||
*/
|
||||
Hashtable *CanonicalIterator::extract(Hashtable *fillinResult, UChar32 comp, const UChar *segment, int32_t segLen, int32_t segmentPos, UErrorCode &status) {
|
||||
@ -521,7 +521,7 @@ Hashtable *CanonicalIterator::extract(Hashtable *fillinResult, UChar32 comp, con
|
||||
int32_t decompLen=decompString.length();
|
||||
|
||||
// See if it matches the start of segment (at segmentPos)
|
||||
UBool ok = FALSE;
|
||||
UBool ok = false;
|
||||
UChar32 cp;
|
||||
int32_t decompPos = 0;
|
||||
UChar32 decompCp;
|
||||
@ -537,7 +537,7 @@ Hashtable *CanonicalIterator::extract(Hashtable *fillinResult, UChar32 comp, con
|
||||
|
||||
if (decompPos == decompLen) { // done, have all decomp characters!
|
||||
temp.append(segment+i, segLen-i);
|
||||
ok = TRUE;
|
||||
ok = true;
|
||||
break;
|
||||
}
|
||||
U16_NEXT(decomp, decompPos, decompLen, decompCp);
|
||||
|
@ -1,13 +0,0 @@
|
||||
caniter.o caniter.d : caniter.cpp unicode/utypes.h unicode/umachine.h \
|
||||
unicode/ptypes.h unicode/platform.h unicode/uconfig.h \
|
||||
unicode/uvernum.h unicode/urename.h unicode/uversion.h \
|
||||
unicode/caniter.h unicode/uobject.h unicode/unistr.h \
|
||||
unicode/char16ptr.h unicode/rep.h unicode/std_string.h \
|
||||
unicode/stringpiece.h unicode/bytestream.h unicode/normalizer2.h \
|
||||
unicode/uniset.h unicode/ucpmap.h unicode/unifilt.h unicode/unifunct.h \
|
||||
unicode/unimatch.h unicode/uset.h unicode/uchar.h \
|
||||
unicode/stringoptions.h unicode/localpointer.h unicode/unorm2.h \
|
||||
unicode/usetiter.h unicode/ustring.h unicode/putil.h unicode/uiter.h \
|
||||
unicode/utf16.h unicode/utf.h cmemory.h hash.h uhash.h uelement.h \
|
||||
normalizer2impl.h unicode/ucptrie.h unicode/utf8.h unicode/unorm.h \
|
||||
mutex.h umutex.h unicode/uclean.h putilimp.h udataswp.h uset_imp.h
|
@ -14,6 +14,7 @@
|
||||
#include "unicode/uscript.h"
|
||||
#include "unicode/uset.h"
|
||||
#include "cmemory.h"
|
||||
#include "emojiprops.h"
|
||||
#include "mutex.h"
|
||||
#include "normalizer2impl.h"
|
||||
#include "uassert.h"
|
||||
@ -35,11 +36,11 @@ namespace {
|
||||
|
||||
UBool U_CALLCONV characterproperties_cleanup();
|
||||
|
||||
constexpr int32_t NUM_INCLUSIONS = UPROPS_SRC_COUNT + UCHAR_INT_LIMIT - UCHAR_INT_START;
|
||||
constexpr int32_t NUM_INCLUSIONS = UPROPS_SRC_COUNT + (UCHAR_INT_LIMIT - UCHAR_INT_START);
|
||||
|
||||
struct Inclusion {
|
||||
UnicodeSet *fSet = nullptr;
|
||||
UInitOnce fInitOnce = U_INITONCE_INITIALIZER;
|
||||
UInitOnce fInitOnce {};
|
||||
};
|
||||
Inclusion gInclusions[NUM_INCLUSIONS]; // cached getInclusions()
|
||||
|
||||
@ -84,7 +85,7 @@ UBool U_CALLCONV characterproperties_cleanup() {
|
||||
ucptrie_close(reinterpret_cast<UCPTrie *>(maps[i]));
|
||||
maps[i] = nullptr;
|
||||
}
|
||||
return TRUE;
|
||||
return true;
|
||||
}
|
||||
|
||||
void U_CALLCONV initInclusion(UPropertySource src, UErrorCode &errorCode) {
|
||||
@ -170,6 +171,13 @@ void U_CALLCONV initInclusion(UPropertySource src, UErrorCode &errorCode) {
|
||||
case UPROPS_SRC_VO:
|
||||
uprops_addPropertyStarts((UPropertySource)src, &sa, &errorCode);
|
||||
break;
|
||||
case UPROPS_SRC_EMOJI: {
|
||||
const icu::EmojiProps *ep = icu::EmojiProps::getSingleton(errorCode);
|
||||
if (U_SUCCESS(errorCode)) {
|
||||
ep->addPropertyStarts(&sa, errorCode);
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
errorCode = U_INTERNAL_PROGRAM_ERROR;
|
||||
break;
|
||||
@ -202,7 +210,7 @@ const UnicodeSet *getInclusionsForSource(UPropertySource src, UErrorCode &errorC
|
||||
void U_CALLCONV initIntPropInclusion(UProperty prop, UErrorCode &errorCode) {
|
||||
// This function is invoked only via umtx_initOnce().
|
||||
U_ASSERT(UCHAR_INT_START <= prop && prop < UCHAR_INT_LIMIT);
|
||||
int32_t inclIndex = UPROPS_SRC_COUNT + prop - UCHAR_INT_START;
|
||||
int32_t inclIndex = UPROPS_SRC_COUNT + (prop - UCHAR_INT_START);
|
||||
U_ASSERT(gInclusions[inclIndex].fSet == nullptr);
|
||||
UPropertySource src = uprops_getSource(prop);
|
||||
const UnicodeSet *incl = getInclusionsForSource(src, errorCode);
|
||||
@ -247,7 +255,7 @@ const UnicodeSet *CharacterProperties::getInclusionsForProperty(
|
||||
UProperty prop, UErrorCode &errorCode) {
|
||||
if (U_FAILURE(errorCode)) { return nullptr; }
|
||||
if (UCHAR_INT_START <= prop && prop < UCHAR_INT_LIMIT) {
|
||||
int32_t inclIndex = UPROPS_SRC_COUNT + prop - UCHAR_INT_START;
|
||||
int32_t inclIndex = UPROPS_SRC_COUNT + (prop - UCHAR_INT_START);
|
||||
Inclusion &i = gInclusions[inclIndex];
|
||||
umtx_initOnce(i.fInitOnce, &initIntPropInclusion, prop, errorCode);
|
||||
return i.fSet;
|
||||
@ -268,6 +276,26 @@ UnicodeSet *makeSet(UProperty property, UErrorCode &errorCode) {
|
||||
errorCode = U_MEMORY_ALLOCATION_ERROR;
|
||||
return nullptr;
|
||||
}
|
||||
if (UCHAR_BASIC_EMOJI <= property && property <= UCHAR_RGI_EMOJI) {
|
||||
// property of strings
|
||||
const icu::EmojiProps *ep = icu::EmojiProps::getSingleton(errorCode);
|
||||
if (U_FAILURE(errorCode)) { return nullptr; }
|
||||
USetAdder sa = {
|
||||
(USet *)set.getAlias(),
|
||||
_set_add,
|
||||
_set_addRange,
|
||||
_set_addString,
|
||||
nullptr, // don't need remove()
|
||||
nullptr // don't need removeRange()
|
||||
};
|
||||
ep->addStrings(&sa, property, errorCode);
|
||||
if (property != UCHAR_BASIC_EMOJI && property != UCHAR_RGI_EMOJI) {
|
||||
// property of _only_ strings
|
||||
set->freeze();
|
||||
return set.orphan();
|
||||
}
|
||||
}
|
||||
|
||||
const UnicodeSet *inclusions =
|
||||
icu::CharacterProperties::getInclusionsForProperty(property, errorCode);
|
||||
if (U_FAILURE(errorCode)) { return nullptr; }
|
||||
|
@ -1,14 +0,0 @@
|
||||
characterproperties.o characterproperties.d : characterproperties.cpp unicode/utypes.h \
|
||||
unicode/umachine.h unicode/ptypes.h unicode/platform.h \
|
||||
unicode/uconfig.h unicode/uvernum.h unicode/urename.h \
|
||||
unicode/uversion.h unicode/localpointer.h unicode/uchar.h \
|
||||
unicode/stringoptions.h unicode/ucpmap.h unicode/ucptrie.h \
|
||||
unicode/utf8.h unicode/utf.h unicode/umutablecptrie.h unicode/uniset.h \
|
||||
unicode/unifilt.h unicode/unifunct.h unicode/uobject.h \
|
||||
unicode/unimatch.h unicode/unistr.h unicode/char16ptr.h unicode/rep.h \
|
||||
unicode/std_string.h unicode/stringpiece.h unicode/bytestream.h \
|
||||
unicode/uset.h unicode/uscript.h cmemory.h mutex.h umutex.h \
|
||||
unicode/uclean.h putilimp.h unicode/putil.h normalizer2impl.h \
|
||||
unicode/normalizer2.h unicode/unorm2.h unicode/unorm.h unicode/uiter.h \
|
||||
unicode/utf16.h udataswp.h uset_imp.h uassert.h ubidi_props.h ucase.h \
|
||||
utrie2.h ucln_cmn.h ucln.h uprops.h
|
@ -1,6 +0,0 @@
|
||||
chariter.o chariter.d : chariter.cpp unicode/chariter.h unicode/utypes.h \
|
||||
unicode/umachine.h unicode/ptypes.h unicode/platform.h \
|
||||
unicode/uconfig.h unicode/uvernum.h unicode/urename.h \
|
||||
unicode/uversion.h unicode/uobject.h unicode/unistr.h \
|
||||
unicode/char16ptr.h unicode/rep.h unicode/std_string.h \
|
||||
unicode/stringpiece.h unicode/bytestream.h
|
@ -14,6 +14,8 @@
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#include <cstdlib>
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/putil.h"
|
||||
#include "charstr.h"
|
||||
@ -141,6 +143,38 @@ CharString &CharString::append(const char *s, int32_t sLength, UErrorCode &error
|
||||
return *this;
|
||||
}
|
||||
|
||||
CharString &CharString::appendNumber(int32_t number, UErrorCode &status) {
|
||||
if (number < 0) {
|
||||
this->append('-', status);
|
||||
if (U_FAILURE(status)) {
|
||||
return *this;
|
||||
}
|
||||
}
|
||||
|
||||
if (number == 0) {
|
||||
this->append('0', status);
|
||||
return *this;
|
||||
}
|
||||
|
||||
int32_t numLen = 0;
|
||||
while (number != 0) {
|
||||
int32_t residue = number % 10;
|
||||
number /= 10;
|
||||
this->append(std::abs(residue) + '0', status);
|
||||
numLen++;
|
||||
if (U_FAILURE(status)) {
|
||||
return *this;
|
||||
}
|
||||
}
|
||||
|
||||
int32_t start = this->length() - numLen, end = this->length() - 1;
|
||||
while(start < end) {
|
||||
std::swap(this->data()[start++], this->data()[end--]);
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
char *CharString::getAppendBuffer(int32_t minCapacity,
|
||||
int32_t desiredCapacityHint,
|
||||
int32_t &resultCapacity,
|
||||
@ -186,7 +220,7 @@ UBool CharString::ensureCapacity(int32_t capacity,
|
||||
int32_t desiredCapacityHint,
|
||||
UErrorCode &errorCode) {
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return FALSE;
|
||||
return false;
|
||||
}
|
||||
if(capacity>buffer.getCapacity()) {
|
||||
if(desiredCapacityHint==0) {
|
||||
@ -196,10 +230,10 @@ UBool CharString::ensureCapacity(int32_t capacity,
|
||||
buffer.resize(capacity, len+1)==NULL
|
||||
) {
|
||||
errorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
return FALSE;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return TRUE;
|
||||
return true;
|
||||
}
|
||||
|
||||
CharString &CharString::appendPathPart(StringPiece s, UErrorCode &errorCode) {
|
||||
|
@ -1,7 +0,0 @@
|
||||
charstr.o charstr.d : charstr.cpp unicode/utypes.h unicode/umachine.h \
|
||||
unicode/ptypes.h unicode/platform.h unicode/uconfig.h \
|
||||
unicode/uvernum.h unicode/urename.h unicode/uversion.h unicode/putil.h \
|
||||
charstr.h unicode/unistr.h unicode/char16ptr.h unicode/rep.h \
|
||||
unicode/uobject.h unicode/std_string.h unicode/stringpiece.h \
|
||||
unicode/bytestream.h cmemory.h unicode/localpointer.h cstring.h \
|
||||
uinvchar.h ustr_imp.h unicode/utf8.h unicode/utf.h
|
@ -127,6 +127,9 @@ public:
|
||||
return append(s.data(), s.length(), errorCode);
|
||||
}
|
||||
CharString &append(const char *s, int32_t sLength, UErrorCode &status);
|
||||
|
||||
CharString &appendNumber(int32_t number, UErrorCode &status);
|
||||
|
||||
/**
|
||||
* Returns a writable buffer for appending and writes the buffer's capacity to
|
||||
* resultCapacity. Guarantees resultCapacity>=minCapacity if U_SUCCESS().
|
||||
@ -174,8 +177,8 @@ private:
|
||||
|
||||
UBool ensureCapacity(int32_t capacity, int32_t desiredCapacityHint, UErrorCode &errorCode);
|
||||
|
||||
CharString(const CharString &other); // forbid copying of this class
|
||||
CharString &operator=(const CharString &other); // forbid copying of this class
|
||||
CharString(const CharString &other) = delete; // forbid copying of this class
|
||||
CharString &operator=(const CharString &other) = delete; // forbid copying of this class
|
||||
|
||||
/**
|
||||
* Returns U_FILE_ALT_SEP_CHAR if found in string, and U_FILE_SEP_CHAR is not found.
|
||||
|
@ -134,5 +134,5 @@ U_CFUNC UBool cmemory_cleanup(void) {
|
||||
pAlloc = NULL;
|
||||
pRealloc = NULL;
|
||||
pFree = NULL;
|
||||
return TRUE;
|
||||
return true;
|
||||
}
|
||||
|
@ -1,5 +0,0 @@
|
||||
cmemory.o cmemory.d : cmemory.cpp unicode/uclean.h unicode/utypes.h \
|
||||
unicode/umachine.h unicode/ptypes.h unicode/platform.h \
|
||||
unicode/uconfig.h unicode/uvernum.h unicode/urename.h \
|
||||
unicode/uversion.h cmemory.h unicode/localpointer.h unicode/uobject.h \
|
||||
putilimp.h unicode/putil.h uassert.h
|
@ -31,14 +31,63 @@
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
#include "unicode/localpointer.h"
|
||||
#include "uassert.h"
|
||||
|
||||
#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
|
||||
#include <stdio.h>
|
||||
#endif
|
||||
|
||||
|
||||
#define uprv_memcpy(dst, src, size) U_STANDARD_CPP_NAMESPACE memcpy(dst, src, size)
|
||||
#define uprv_memmove(dst, src, size) U_STANDARD_CPP_NAMESPACE memmove(dst, src, size)
|
||||
// uprv_memcpy and uprv_memmove
|
||||
#if defined(__clang__)
|
||||
#define uprv_memcpy(dst, src, size) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
/* Suppress warnings about addresses that will never be NULL */ \
|
||||
_Pragma("clang diagnostic push") \
|
||||
_Pragma("clang diagnostic ignored \"-Waddress\"") \
|
||||
U_ASSERT(dst != NULL); \
|
||||
U_ASSERT(src != NULL); \
|
||||
_Pragma("clang diagnostic pop") \
|
||||
U_STANDARD_CPP_NAMESPACE memcpy(dst, src, size); \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
#define uprv_memmove(dst, src, size) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
/* Suppress warnings about addresses that will never be NULL */ \
|
||||
_Pragma("clang diagnostic push") \
|
||||
_Pragma("clang diagnostic ignored \"-Waddress\"") \
|
||||
U_ASSERT(dst != NULL); \
|
||||
U_ASSERT(src != NULL); \
|
||||
_Pragma("clang diagnostic pop") \
|
||||
U_STANDARD_CPP_NAMESPACE memmove(dst, src, size); \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
#elif defined(__GNUC__)
|
||||
#define uprv_memcpy(dst, src, size) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
/* Suppress warnings about addresses that will never be NULL */ \
|
||||
_Pragma("GCC diagnostic push") \
|
||||
_Pragma("GCC diagnostic ignored \"-Waddress\"") \
|
||||
U_ASSERT(dst != NULL); \
|
||||
U_ASSERT(src != NULL); \
|
||||
_Pragma("GCC diagnostic pop") \
|
||||
U_STANDARD_CPP_NAMESPACE memcpy(dst, src, size); \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
#define uprv_memmove(dst, src, size) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
/* Suppress warnings about addresses that will never be NULL */ \
|
||||
_Pragma("GCC diagnostic push") \
|
||||
_Pragma("GCC diagnostic ignored \"-Waddress\"") \
|
||||
U_ASSERT(dst != NULL); \
|
||||
U_ASSERT(src != NULL); \
|
||||
_Pragma("GCC diagnostic pop") \
|
||||
U_STANDARD_CPP_NAMESPACE memmove(dst, src, size); \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
#else
|
||||
#define uprv_memcpy(dst, src, size) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
U_ASSERT(dst != NULL); \
|
||||
U_ASSERT(src != NULL); \
|
||||
U_STANDARD_CPP_NAMESPACE memcpy(dst, src, size); \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
#define uprv_memmove(dst, src, size) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
U_ASSERT(dst != NULL); \
|
||||
U_ASSERT(src != NULL); \
|
||||
U_STANDARD_CPP_NAMESPACE memmove(dst, src, size); \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def UPRV_LENGTHOF
|
||||
|
@ -58,7 +58,7 @@
|
||||
<RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<OutputFile>..\..\$(IcuBinOutputDir)\icuuc68d.dll</OutputFile>
|
||||
<OutputFile>..\..\$(IcuBinOutputDir)\icuuc$(IcuMajorVersion)d.dll</OutputFile>
|
||||
<ProgramDatabaseFile>.\..\..\$(IcuLibOutputDir)\icuucd.pdb</ProgramDatabaseFile>
|
||||
<ImportLibrary>..\..\$(IcuLibOutputDir)\icuucd.lib</ImportLibrary>
|
||||
</Link>
|
||||
@ -70,7 +70,7 @@
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<OutputFile>..\..\$(IcuBinOutputDir)\icuuc68.dll</OutputFile>
|
||||
<OutputFile>..\..\$(IcuBinOutputDir)\icuuc$(IcuMajorVersion).dll</OutputFile>
|
||||
<ProgramDatabaseFile>.\..\..\$(IcuLibOutputDir)\icuuc.pdb</ProgramDatabaseFile>
|
||||
<ImportLibrary>..\..\$(IcuLibOutputDir)\icuuc.lib</ImportLibrary>
|
||||
</Link>
|
||||
@ -87,6 +87,7 @@
|
||||
<ClCompile Include="brkeng.cpp" />
|
||||
<ClCompile Include="brkiter.cpp" />
|
||||
<ClCompile Include="dictbe.cpp" />
|
||||
<ClCompile Include="lstmbe.cpp" />
|
||||
<ClCompile Include="pluralmap.cpp" />
|
||||
<ClCompile Include="rbbi.cpp" />
|
||||
<ClCompile Include="rbbidata.cpp" />
|
||||
@ -203,6 +204,7 @@
|
||||
<ClCompile Include="ucase.cpp" />
|
||||
<ClCompile Include="uchar.cpp" />
|
||||
<ClCompile Include="characterproperties.cpp" />
|
||||
<ClCompile Include="emojiprops.cpp" />
|
||||
<ClCompile Include="unames.cpp" />
|
||||
<ClCompile Include="unifiedcache.cpp" />
|
||||
<ClCompile Include="unifilt.cpp" />
|
||||
@ -279,6 +281,7 @@
|
||||
<ClInclude Include="ubidiimp.h" />
|
||||
<ClInclude Include="brkeng.h" />
|
||||
<ClInclude Include="dictbe.h" />
|
||||
<ClInclude Include="lstmbe.h" />
|
||||
<ClInclude Include="rbbidata.h" />
|
||||
<ClInclude Include="rbbinode.h" />
|
||||
<ClInclude Include="rbbirb.h" />
|
||||
@ -363,6 +366,7 @@
|
||||
<ClInclude Include="patternprops.h" />
|
||||
<ClInclude Include="propname.h" />
|
||||
<ClInclude Include="ruleiter.h" />
|
||||
<ClInclude Include="emojiprops.h" />
|
||||
<ClInclude Include="ucase.h" />
|
||||
<ClInclude Include="ulayout_props.h" />
|
||||
<ClInclude Include="unisetspan.h" />
|
||||
|
@ -73,6 +73,9 @@
|
||||
<ClCompile Include="dictbe.cpp">
|
||||
<Filter>break iteration</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="lstmbe.cpp">
|
||||
<Filter>break iteration</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="rbbi.cpp">
|
||||
<Filter>break iteration</Filter>
|
||||
</ClCompile>
|
||||
@ -409,6 +412,9 @@
|
||||
<ClCompile Include="characterproperties.cpp">
|
||||
<Filter>properties & sets</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="emojiprops.cpp">
|
||||
<Filter>properties & sets</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="propname.cpp">
|
||||
<Filter>properties & sets</Filter>
|
||||
</ClCompile>
|
||||
@ -651,6 +657,9 @@
|
||||
<ClInclude Include="dictbe.h">
|
||||
<Filter>break iteration</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="lstmbe.h">
|
||||
<Filter>break iteration</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="rbbidata.h">
|
||||
<Filter>break iteration</Filter>
|
||||
</ClInclude>
|
||||
@ -888,6 +897,9 @@
|
||||
<ClInclude Include="ruleiter.h">
|
||||
<Filter>properties & sets</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="emojiprops.h">
|
||||
<Filter>properties & sets</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="ucase.h">
|
||||
<Filter>properties & sets</Filter>
|
||||
</ClInclude>
|
||||
|
@ -125,7 +125,7 @@
|
||||
<Link>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<AdditionalDependencies>vccorlib.lib;msvcrt.lib;vcruntime.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<OutputFile>..\..\$(IcuBinOutputDir)\icuuc68.dll</OutputFile>
|
||||
<OutputFile>..\..\$(IcuBinOutputDir)\icuuc$(IcuMajorVersion).dll</OutputFile>
|
||||
<ProgramDatabaseFile>.\..\..\$(IcuLibOutputDir)\icuuc.pdb</ProgramDatabaseFile>
|
||||
<ImportLibrary>..\..\$(IcuLibOutputDir)\icuuc.lib</ImportLibrary>
|
||||
</Link>
|
||||
@ -148,7 +148,7 @@
|
||||
<Link>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<AdditionalDependencies>vccorlibd.lib;msvcrtd.lib;vcruntimed.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<OutputFile>..\..\$(IcuBinOutputDir)\icuuc68d.dll</OutputFile>
|
||||
<OutputFile>..\..\$(IcuBinOutputDir)\icuuc$(IcuMajorVersion)d.dll</OutputFile>
|
||||
<ProgramDatabaseFile>.\..\..\$(IcuLibOutputDir)\icuucd.pdb</ProgramDatabaseFile>
|
||||
<ImportLibrary>..\..\$(IcuLibOutputDir)\icuucd.lib</ImportLibrary>
|
||||
</Link>
|
||||
@ -221,6 +221,7 @@
|
||||
<ClCompile Include="brkeng.cpp" />
|
||||
<ClCompile Include="brkiter.cpp" />
|
||||
<ClCompile Include="dictbe.cpp" />
|
||||
<ClCompile Include="lstmbe.cpp" />
|
||||
<ClCompile Include="pluralmap.cpp" />
|
||||
<ClCompile Include="rbbi.cpp" />
|
||||
<ClCompile Include="rbbidata.cpp" />
|
||||
@ -337,6 +338,7 @@
|
||||
<ClCompile Include="ucase.cpp" />
|
||||
<ClCompile Include="uchar.cpp" />
|
||||
<ClCompile Include="characterproperties.cpp" />
|
||||
<ClCompile Include="emojiprops.cpp" />
|
||||
<ClCompile Include="unames.cpp" />
|
||||
<ClCompile Include="unifiedcache.cpp" />
|
||||
<ClCompile Include="unifilt.cpp" />
|
||||
@ -414,6 +416,7 @@
|
||||
<ClInclude Include="ubidiimp.h" />
|
||||
<ClInclude Include="brkeng.h" />
|
||||
<ClInclude Include="dictbe.h" />
|
||||
<ClInclude Include="lstmbe.h" />
|
||||
<ClInclude Include="rbbidata.h" />
|
||||
<ClInclude Include="rbbinode.h" />
|
||||
<ClInclude Include="rbbirb.h" />
|
||||
@ -498,6 +501,7 @@
|
||||
<ClInclude Include="patternprops.h" />
|
||||
<ClInclude Include="propname.h" />
|
||||
<ClInclude Include="ruleiter.h" />
|
||||
<ClInclude Include="emojiprops.h" />
|
||||
<ClInclude Include="ucase.h" />
|
||||
<ClInclude Include="ulayout_props.h" />
|
||||
<ClInclude Include="unisetspan.h" />
|
||||
|
@ -1,6 +0,0 @@
|
||||
cstr.o cstr.d : cstr.cpp unicode/utypes.h unicode/umachine.h unicode/ptypes.h \
|
||||
unicode/platform.h unicode/uconfig.h unicode/uvernum.h \
|
||||
unicode/urename.h unicode/uversion.h unicode/putil.h unicode/unistr.h \
|
||||
unicode/char16ptr.h unicode/rep.h unicode/uobject.h \
|
||||
unicode/std_string.h unicode/stringpiece.h unicode/bytestream.h cstr.h \
|
||||
charstr.h cmemory.h unicode/localpointer.h uinvchar.h
|
@ -28,7 +28,7 @@
|
||||
* default code page conversion, which will do the best job possible,
|
||||
* but may be lossy, depending on the platform.
|
||||
*
|
||||
* If no other conversion is available, use invariant conversion and substitue
|
||||
* If no other conversion is available, use invariant conversion and substitute
|
||||
* '?' for non-invariant characters.
|
||||
*
|
||||
* Example Usage:
|
||||
@ -51,8 +51,8 @@ class U_COMMON_API CStr : public UMemory {
|
||||
|
||||
private:
|
||||
CharString s;
|
||||
CStr(const CStr &other); // Forbid copying of this class.
|
||||
CStr &operator =(const CStr &other); // Forbid assignment.
|
||||
CStr(const CStr &other) = delete; // Forbid copying of this class.
|
||||
CStr &operator =(const CStr &other) = delete; // Forbid assignment.
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
@ -1,4 +0,0 @@
|
||||
cstring.o cstring.d : cstring.cpp unicode/utypes.h unicode/umachine.h \
|
||||
unicode/ptypes.h unicode/platform.h unicode/uconfig.h \
|
||||
unicode/uvernum.h unicode/urename.h unicode/uversion.h cmemory.h \
|
||||
unicode/localpointer.h unicode/uobject.h cstring.h uassert.h
|
@ -1,3 +0,0 @@
|
||||
cwchar.o cwchar.d : cwchar.cpp unicode/utypes.h unicode/umachine.h unicode/ptypes.h \
|
||||
unicode/platform.h unicode/uconfig.h unicode/uvernum.h \
|
||||
unicode/urename.h unicode/uversion.h
|
@ -17,7 +17,10 @@
|
||||
#include "dictbe.h"
|
||||
#include "unicode/uniset.h"
|
||||
#include "unicode/chariter.h"
|
||||
#include "unicode/resbund.h"
|
||||
#include "unicode/ubrk.h"
|
||||
#include "unicode/usetiter.h"
|
||||
#include "ubrkimpl.h"
|
||||
#include "utracimp.h"
|
||||
#include "uvectr32.h"
|
||||
#include "uvector.h"
|
||||
@ -47,7 +50,10 @@ int32_t
|
||||
DictionaryBreakEngine::findBreaks( UText *text,
|
||||
int32_t startPos,
|
||||
int32_t endPos,
|
||||
UVector32 &foundBreaks ) const {
|
||||
UVector32 &foundBreaks,
|
||||
UBool isPhraseBreaking,
|
||||
UErrorCode& status) const {
|
||||
if (U_FAILURE(status)) return 0;
|
||||
(void)startPos; // TODO: remove this param?
|
||||
int32_t result = 0;
|
||||
|
||||
@ -66,7 +72,7 @@ DictionaryBreakEngine::findBreaks( UText *text,
|
||||
}
|
||||
rangeStart = start;
|
||||
rangeEnd = current;
|
||||
result = divideUpDictionaryRange(text, rangeStart, rangeEnd, foundBreaks);
|
||||
result = divideUpDictionaryRange(text, rangeStart, rangeEnd, foundBreaks, isPhraseBreaking, status);
|
||||
utext_setNativeIndex(text, current);
|
||||
|
||||
return result;
|
||||
@ -113,7 +119,7 @@ public:
|
||||
// Select the currently marked candidate, point after it in the text, and invalidate self
|
||||
int32_t acceptMarked( UText *text );
|
||||
|
||||
// Back up from the current candidate to the next shorter one; return TRUE if that exists
|
||||
// Back up from the current candidate to the next shorter one; return true if that exists
|
||||
// and point the text after it
|
||||
UBool backUp( UText *text );
|
||||
|
||||
@ -159,9 +165,9 @@ UBool
|
||||
PossibleWord::backUp( UText *text ) {
|
||||
if (current > 0) {
|
||||
utext_setNativeIndex(text, offset + cuLengths[--current]);
|
||||
return TRUE;
|
||||
return true;
|
||||
}
|
||||
return FALSE;
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -179,7 +185,7 @@ static const int32_t THAI_ROOT_COMBINE_THRESHOLD = 3;
|
||||
// dictionary word, with a preceding word
|
||||
static const int32_t THAI_PREFIX_COMBINE_THRESHOLD = 3;
|
||||
|
||||
// Ellision character
|
||||
// Elision character
|
||||
static const int32_t THAI_PAIYANNOI = 0x0E2F;
|
||||
|
||||
// Repeat character
|
||||
@ -197,13 +203,13 @@ ThaiBreakEngine::ThaiBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode
|
||||
{
|
||||
UTRACE_ENTRY(UTRACE_UBRK_CREATE_BREAK_ENGINE);
|
||||
UTRACE_DATA1(UTRACE_INFO, "dictbe=%s", "Thai");
|
||||
fThaiWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Thai:]&[:LineBreak=SA:]]"), status);
|
||||
UnicodeSet thaiWordSet(UnicodeString(u"[[:Thai:]&[:LineBreak=SA:]]"), status);
|
||||
if (U_SUCCESS(status)) {
|
||||
setCharacters(fThaiWordSet);
|
||||
setCharacters(thaiWordSet);
|
||||
}
|
||||
fMarkSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Thai:]&[:LineBreak=SA:]&[:M:]]"), status);
|
||||
fMarkSet.applyPattern(UnicodeString(u"[[:Thai:]&[:LineBreak=SA:]&[:M:]]"), status);
|
||||
fMarkSet.add(0x0020);
|
||||
fEndWordSet = fThaiWordSet;
|
||||
fEndWordSet = thaiWordSet;
|
||||
fEndWordSet.remove(0x0E31); // MAI HAN-AKAT
|
||||
fEndWordSet.remove(0x0E40, 0x0E44); // SARA E through SARA AI MAIMALAI
|
||||
fBeginWordSet.add(0x0E01, 0x0E2E); // KO KAI through HO NOKHUK
|
||||
@ -227,7 +233,10 @@ int32_t
|
||||
ThaiBreakEngine::divideUpDictionaryRange( UText *text,
|
||||
int32_t rangeStart,
|
||||
int32_t rangeEnd,
|
||||
UVector32 &foundBreaks ) const {
|
||||
UVector32 &foundBreaks,
|
||||
UBool /* isPhraseBreaking */,
|
||||
UErrorCode& status) const {
|
||||
if (U_FAILURE(status)) return 0;
|
||||
utext_setNativeIndex(text, rangeStart);
|
||||
utext_moveIndex32(text, THAI_MIN_WORD_SPAN);
|
||||
if (utext_getNativeIndex(text) >= rangeEnd) {
|
||||
@ -240,7 +249,6 @@ ThaiBreakEngine::divideUpDictionaryRange( UText *text,
|
||||
int32_t cpWordLength = 0; // Word Length in Code Points.
|
||||
int32_t cuWordLength = 0; // Word length in code units (UText native indexing)
|
||||
int32_t current;
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
PossibleWord words[THAI_LOOKAHEAD];
|
||||
|
||||
utext_setNativeIndex(text, rangeStart);
|
||||
@ -265,13 +273,9 @@ ThaiBreakEngine::divideUpDictionaryRange( UText *text,
|
||||
goto foundBest;
|
||||
}
|
||||
do {
|
||||
int32_t wordsMatched = 1;
|
||||
if (words[(wordsFound + 1) % THAI_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) > 0) {
|
||||
if (wordsMatched < 2) {
|
||||
// Followed by another dictionary word; mark first word as a good candidate
|
||||
words[wordsFound%THAI_LOOKAHEAD].markCurrent();
|
||||
wordsMatched = 2;
|
||||
}
|
||||
|
||||
// If we're already at the end of the range, we're done
|
||||
if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) {
|
||||
@ -442,13 +446,13 @@ LaoBreakEngine::LaoBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &s
|
||||
{
|
||||
UTRACE_ENTRY(UTRACE_UBRK_CREATE_BREAK_ENGINE);
|
||||
UTRACE_DATA1(UTRACE_INFO, "dictbe=%s", "Laoo");
|
||||
fLaoWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Laoo:]&[:LineBreak=SA:]]"), status);
|
||||
UnicodeSet laoWordSet(UnicodeString(u"[[:Laoo:]&[:LineBreak=SA:]]"), status);
|
||||
if (U_SUCCESS(status)) {
|
||||
setCharacters(fLaoWordSet);
|
||||
setCharacters(laoWordSet);
|
||||
}
|
||||
fMarkSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Laoo:]&[:LineBreak=SA:]&[:M:]]"), status);
|
||||
fMarkSet.applyPattern(UnicodeString(u"[[:Laoo:]&[:LineBreak=SA:]&[:M:]]"), status);
|
||||
fMarkSet.add(0x0020);
|
||||
fEndWordSet = fLaoWordSet;
|
||||
fEndWordSet = laoWordSet;
|
||||
fEndWordSet.remove(0x0EC0, 0x0EC4); // prefix vowels
|
||||
fBeginWordSet.add(0x0E81, 0x0EAE); // basic consonants (including holes for corresponding Thai characters)
|
||||
fBeginWordSet.add(0x0EDC, 0x0EDD); // digraph consonants (no Thai equivalent)
|
||||
@ -469,7 +473,10 @@ int32_t
|
||||
LaoBreakEngine::divideUpDictionaryRange( UText *text,
|
||||
int32_t rangeStart,
|
||||
int32_t rangeEnd,
|
||||
UVector32 &foundBreaks ) const {
|
||||
UVector32 &foundBreaks,
|
||||
UBool /* isPhraseBreaking */,
|
||||
UErrorCode& status) const {
|
||||
if (U_FAILURE(status)) return 0;
|
||||
if ((rangeEnd - rangeStart) < LAO_MIN_WORD_SPAN) {
|
||||
return 0; // Not enough characters for two words
|
||||
}
|
||||
@ -478,7 +485,6 @@ LaoBreakEngine::divideUpDictionaryRange( UText *text,
|
||||
int32_t cpWordLength = 0;
|
||||
int32_t cuWordLength = 0;
|
||||
int32_t current;
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
PossibleWord words[LAO_LOOKAHEAD];
|
||||
|
||||
utext_setNativeIndex(text, rangeStart);
|
||||
@ -503,13 +509,9 @@ LaoBreakEngine::divideUpDictionaryRange( UText *text,
|
||||
goto foundBest;
|
||||
}
|
||||
do {
|
||||
int32_t wordsMatched = 1;
|
||||
if (words[(wordsFound + 1) % LAO_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) > 0) {
|
||||
if (wordsMatched < 2) {
|
||||
// Followed by another dictionary word; mark first word as a good candidate
|
||||
words[wordsFound%LAO_LOOKAHEAD].markCurrent();
|
||||
wordsMatched = 2;
|
||||
}
|
||||
|
||||
// If we're already at the end of the range, we're done
|
||||
if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) {
|
||||
@ -535,7 +537,7 @@ foundBest:
|
||||
}
|
||||
|
||||
// We come here after having either found a word or not. We look ahead to the
|
||||
// next word. If it's not a dictionary word, we will combine it withe the word we
|
||||
// next word. If it's not a dictionary word, we will combine it with the word we
|
||||
// just found (if there is one), but only if the preceding word does not exceed
|
||||
// the threshold.
|
||||
// The text iterator should now be positioned at the end of the word we found.
|
||||
@ -641,14 +643,13 @@ BurmeseBreakEngine::BurmeseBreakEngine(DictionaryMatcher *adoptDictionary, UErro
|
||||
{
|
||||
UTRACE_ENTRY(UTRACE_UBRK_CREATE_BREAK_ENGINE);
|
||||
UTRACE_DATA1(UTRACE_INFO, "dictbe=%s", "Mymr");
|
||||
fBurmeseWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Mymr:]&[:LineBreak=SA:]]"), status);
|
||||
if (U_SUCCESS(status)) {
|
||||
setCharacters(fBurmeseWordSet);
|
||||
}
|
||||
fMarkSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Mymr:]&[:LineBreak=SA:]&[:M:]]"), status);
|
||||
fMarkSet.add(0x0020);
|
||||
fEndWordSet = fBurmeseWordSet;
|
||||
fBeginWordSet.add(0x1000, 0x102A); // basic consonants and independent vowels
|
||||
fEndWordSet.applyPattern(UnicodeString(u"[[:Mymr:]&[:LineBreak=SA:]]"), status);
|
||||
fMarkSet.applyPattern(UnicodeString(u"[[:Mymr:]&[:LineBreak=SA:]&[:M:]]"), status);
|
||||
fMarkSet.add(0x0020);
|
||||
if (U_SUCCESS(status)) {
|
||||
setCharacters(fEndWordSet);
|
||||
}
|
||||
|
||||
// Compact for caching.
|
||||
fMarkSet.compact();
|
||||
@ -665,7 +666,10 @@ int32_t
|
||||
BurmeseBreakEngine::divideUpDictionaryRange( UText *text,
|
||||
int32_t rangeStart,
|
||||
int32_t rangeEnd,
|
||||
UVector32 &foundBreaks ) const {
|
||||
UVector32 &foundBreaks,
|
||||
UBool /* isPhraseBreaking */,
|
||||
UErrorCode& status ) const {
|
||||
if (U_FAILURE(status)) return 0;
|
||||
if ((rangeEnd - rangeStart) < BURMESE_MIN_WORD_SPAN) {
|
||||
return 0; // Not enough characters for two words
|
||||
}
|
||||
@ -674,7 +678,6 @@ BurmeseBreakEngine::divideUpDictionaryRange( UText *text,
|
||||
int32_t cpWordLength = 0;
|
||||
int32_t cuWordLength = 0;
|
||||
int32_t current;
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
PossibleWord words[BURMESE_LOOKAHEAD];
|
||||
|
||||
utext_setNativeIndex(text, rangeStart);
|
||||
@ -699,13 +702,9 @@ BurmeseBreakEngine::divideUpDictionaryRange( UText *text,
|
||||
goto foundBest;
|
||||
}
|
||||
do {
|
||||
int32_t wordsMatched = 1;
|
||||
if (words[(wordsFound + 1) % BURMESE_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) > 0) {
|
||||
if (wordsMatched < 2) {
|
||||
// Followed by another dictionary word; mark first word as a good candidate
|
||||
words[wordsFound%BURMESE_LOOKAHEAD].markCurrent();
|
||||
wordsMatched = 2;
|
||||
}
|
||||
|
||||
// If we're already at the end of the range, we're done
|
||||
if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) {
|
||||
@ -731,7 +730,7 @@ foundBest:
|
||||
}
|
||||
|
||||
// We come here after having either found a word or not. We look ahead to the
|
||||
// next word. If it's not a dictionary word, we will combine it withe the word we
|
||||
// next word. If it's not a dictionary word, we will combine it with the word we
|
||||
// just found (if there is one), but only if the preceding word does not exceed
|
||||
// the threshold.
|
||||
// The text iterator should now be positioned at the end of the word we found.
|
||||
@ -837,13 +836,13 @@ KhmerBreakEngine::KhmerBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCod
|
||||
{
|
||||
UTRACE_ENTRY(UTRACE_UBRK_CREATE_BREAK_ENGINE);
|
||||
UTRACE_DATA1(UTRACE_INFO, "dictbe=%s", "Khmr");
|
||||
fKhmerWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Khmr:]&[:LineBreak=SA:]]"), status);
|
||||
UnicodeSet khmerWordSet(UnicodeString(u"[[:Khmr:]&[:LineBreak=SA:]]"), status);
|
||||
if (U_SUCCESS(status)) {
|
||||
setCharacters(fKhmerWordSet);
|
||||
setCharacters(khmerWordSet);
|
||||
}
|
||||
fMarkSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Khmr:]&[:LineBreak=SA:]&[:M:]]"), status);
|
||||
fMarkSet.applyPattern(UnicodeString(u"[[:Khmr:]&[:LineBreak=SA:]&[:M:]]"), status);
|
||||
fMarkSet.add(0x0020);
|
||||
fEndWordSet = fKhmerWordSet;
|
||||
fEndWordSet = khmerWordSet;
|
||||
fBeginWordSet.add(0x1780, 0x17B3);
|
||||
//fBeginWordSet.add(0x17A3, 0x17A4); // deprecated vowels
|
||||
//fEndWordSet.remove(0x17A5, 0x17A9); // Khmer independent vowels that can't end a word
|
||||
@ -873,7 +872,10 @@ int32_t
|
||||
KhmerBreakEngine::divideUpDictionaryRange( UText *text,
|
||||
int32_t rangeStart,
|
||||
int32_t rangeEnd,
|
||||
UVector32 &foundBreaks ) const {
|
||||
UVector32 &foundBreaks,
|
||||
UBool /* isPhraseBreaking */,
|
||||
UErrorCode& status ) const {
|
||||
if (U_FAILURE(status)) return 0;
|
||||
if ((rangeEnd - rangeStart) < KHMER_MIN_WORD_SPAN) {
|
||||
return 0; // Not enough characters for two words
|
||||
}
|
||||
@ -882,7 +884,6 @@ KhmerBreakEngine::divideUpDictionaryRange( UText *text,
|
||||
int32_t cpWordLength = 0;
|
||||
int32_t cuWordLength = 0;
|
||||
int32_t current;
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
PossibleWord words[KHMER_LOOKAHEAD];
|
||||
|
||||
utext_setNativeIndex(text, rangeStart);
|
||||
@ -908,13 +909,9 @@ KhmerBreakEngine::divideUpDictionaryRange( UText *text,
|
||||
goto foundBest;
|
||||
}
|
||||
do {
|
||||
int32_t wordsMatched = 1;
|
||||
if (words[(wordsFound + 1) % KHMER_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) > 0) {
|
||||
if (wordsMatched < 2) {
|
||||
// Followed by another dictionary word; mark first word as a good candidate
|
||||
words[wordsFound % KHMER_LOOKAHEAD].markCurrent();
|
||||
wordsMatched = 2;
|
||||
}
|
||||
|
||||
// If we're already at the end of the range, we're done
|
||||
if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) {
|
||||
@ -1060,25 +1057,27 @@ CjkBreakEngine::CjkBreakEngine(DictionaryMatcher *adoptDictionary, LanguageType
|
||||
: DictionaryBreakEngine(), fDictionary(adoptDictionary) {
|
||||
UTRACE_ENTRY(UTRACE_UBRK_CREATE_BREAK_ENGINE);
|
||||
UTRACE_DATA1(UTRACE_INFO, "dictbe=%s", "Hani");
|
||||
// Korean dictionary only includes Hangul syllables
|
||||
fHangulWordSet.applyPattern(UNICODE_STRING_SIMPLE("[\\uac00-\\ud7a3]"), status);
|
||||
fHanWordSet.applyPattern(UNICODE_STRING_SIMPLE("[:Han:]"), status);
|
||||
fKatakanaWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Katakana:]\\uff9e\\uff9f]"), status);
|
||||
fHiraganaWordSet.applyPattern(UNICODE_STRING_SIMPLE("[:Hiragana:]"), status);
|
||||
nfkcNorm2 = Normalizer2::getNFKCInstance(status);
|
||||
// Korean dictionary only includes Hangul syllables
|
||||
fHangulWordSet.applyPattern(UnicodeString(u"[\\uac00-\\ud7a3]"), status);
|
||||
fHangulWordSet.compact();
|
||||
// Digits, open puncutation and Alphabetic characters.
|
||||
fDigitOrOpenPunctuationOrAlphabetSet.applyPattern(
|
||||
UnicodeString(u"[[:Nd:][:Pi:][:Ps:][:Alphabetic:]]"), status);
|
||||
fDigitOrOpenPunctuationOrAlphabetSet.compact();
|
||||
fClosePunctuationSet.applyPattern(UnicodeString(u"[[:Pc:][:Pd:][:Pe:][:Pf:][:Po:]]"), status);
|
||||
fClosePunctuationSet.compact();
|
||||
|
||||
if (U_SUCCESS(status)) {
|
||||
// handle Korean and Japanese/Chinese using different dictionaries
|
||||
if (type == kKorean) {
|
||||
if (U_SUCCESS(status)) {
|
||||
setCharacters(fHangulWordSet);
|
||||
}
|
||||
} else { //Chinese and Japanese
|
||||
UnicodeSet cjSet;
|
||||
cjSet.addAll(fHanWordSet);
|
||||
cjSet.addAll(fKatakanaWordSet);
|
||||
cjSet.addAll(fHiraganaWordSet);
|
||||
cjSet.add(0xFF70); // HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
|
||||
cjSet.add(0x30FC); // KATAKANA-HIRAGANA PROLONGED SOUND MARK
|
||||
UnicodeSet cjSet(UnicodeString(u"[[:Han:][:Hiragana:][:Katakana:]\\u30fc\\uff70\\uff9e\\uff9f]"), status);
|
||||
if (U_SUCCESS(status)) {
|
||||
setCharacters(cjSet);
|
||||
initJapanesePhraseParameter(status);
|
||||
}
|
||||
}
|
||||
UTRACE_EXIT_STATUS(status);
|
||||
@ -1106,7 +1105,6 @@ static inline bool isKatakana(UChar32 value) {
|
||||
(value >= 0xFF66 && value <= 0xFF9f);
|
||||
}
|
||||
|
||||
|
||||
// Function for accessing internal utext flags.
|
||||
// Replicates an internal UText function.
|
||||
|
||||
@ -1114,7 +1112,6 @@ static inline int32_t utext_i32_flag(int32_t bitIndex) {
|
||||
return (int32_t)1 << bitIndex;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* @param text A UText representing the text
|
||||
* @param rangeStart The start of the range of dictionary characters
|
||||
@ -1126,7 +1123,10 @@ int32_t
|
||||
CjkBreakEngine::divideUpDictionaryRange( UText *inText,
|
||||
int32_t rangeStart,
|
||||
int32_t rangeEnd,
|
||||
UVector32 &foundBreaks ) const {
|
||||
UVector32 &foundBreaks,
|
||||
UBool isPhraseBreaking,
|
||||
UErrorCode& status) const {
|
||||
if (U_FAILURE(status)) return 0;
|
||||
if (rangeStart >= rangeEnd) {
|
||||
return 0;
|
||||
}
|
||||
@ -1138,9 +1138,6 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,
|
||||
// If NULL then mapping is 1:1
|
||||
LocalPointer<UVector32> inputMap;
|
||||
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
|
||||
|
||||
// if UText has the input string as one contiguous UTF-16 chunk
|
||||
if ((inText->providerProperties & utext_i32_flag(UTEXT_PROVIDER_STABLE_CHUNKS)) &&
|
||||
inText->chunkNativeStart <= rangeStart &&
|
||||
@ -1149,7 +1146,7 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,
|
||||
|
||||
// Input UText is in one contiguous UTF-16 chunk.
|
||||
// Use Read-only aliasing UnicodeString.
|
||||
inString.setTo(FALSE,
|
||||
inString.setTo(false,
|
||||
inText->chunkContents + rangeStart - inText->chunkNativeStart,
|
||||
rangeEnd - rangeStart);
|
||||
} else {
|
||||
@ -1358,6 +1355,31 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,
|
||||
if ((uint32_t)bestSnlp.elementAti(numCodePts) == kuint32max) {
|
||||
t_boundary.addElement(numCodePts, status);
|
||||
numBreaks++;
|
||||
} else if (isPhraseBreaking) {
|
||||
t_boundary.addElement(numCodePts, status);
|
||||
if(U_SUCCESS(status)) {
|
||||
numBreaks++;
|
||||
int32_t prevIdx = numCodePts;
|
||||
|
||||
int32_t codeUnitIdx = -1;
|
||||
int32_t prevCodeUnitIdx = -1;
|
||||
int32_t length = -1;
|
||||
for (int32_t i = prev.elementAti(numCodePts); i > 0; i = prev.elementAti(i)) {
|
||||
codeUnitIdx = inString.moveIndex32(0, i);
|
||||
prevCodeUnitIdx = inString.moveIndex32(0, prevIdx);
|
||||
// Calculate the length by using the code unit.
|
||||
length = prevCodeUnitIdx - codeUnitIdx;
|
||||
prevIdx = i;
|
||||
// Keep the breakpoint if the pattern is not in the fSkipSet and continuous Katakana
|
||||
// characters don't occur.
|
||||
if (!fSkipSet.containsKey(inString.tempSubString(codeUnitIdx, length))
|
||||
&& (!isKatakana(inString.char32At(inString.moveIndex32(codeUnitIdx, -1)))
|
||||
|| !isKatakana(inString.char32At(codeUnitIdx)))) {
|
||||
t_boundary.addElement(i, status);
|
||||
numBreaks++;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (int32_t i = numCodePts; i > 0; i = prev.elementAti(i)) {
|
||||
t_boundary.addElement(i, status);
|
||||
@ -1378,7 +1400,8 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,
|
||||
// while reversing t_boundary and pushing values to foundBreaks.
|
||||
int32_t prevCPPos = -1;
|
||||
int32_t prevUTextPos = -1;
|
||||
for (int32_t i = numBreaks-1; i >= 0; i--) {
|
||||
int32_t correctedNumBreaks = 0;
|
||||
for (int32_t i = numBreaks - 1; i >= 0; i--) {
|
||||
int32_t cpPos = t_boundary.elementAti(i);
|
||||
U_ASSERT(cpPos > prevCPPos);
|
||||
int32_t utextPos = inputMap.isValid() ? inputMap->elementAti(cpPos) : cpPos + rangeStart;
|
||||
@ -1386,7 +1409,15 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,
|
||||
if (utextPos > prevUTextPos) {
|
||||
// Boundaries are added to foundBreaks output in ascending order.
|
||||
U_ASSERT(foundBreaks.size() == 0 || foundBreaks.peeki() < utextPos);
|
||||
// In phrase breaking, there has to be a breakpoint between Cj character and close
|
||||
// punctuation.
|
||||
// E.g.[携帯電話]正しい選択 -> [携帯▁電話]▁正しい▁選択 -> breakpoint between ] and 正
|
||||
if (utextPos != rangeStart
|
||||
|| (isPhraseBreaking && utextPos > 0
|
||||
&& fClosePunctuationSet.contains(utext_char32At(inText, utextPos - 1)))) {
|
||||
foundBreaks.push(utextPos, status);
|
||||
correctedNumBreaks++;
|
||||
}
|
||||
} else {
|
||||
// Normalization expanded the input text, the dictionary found a boundary
|
||||
// within the expansion, giving two boundaries with the same index in the
|
||||
@ -1398,9 +1429,52 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,
|
||||
}
|
||||
(void)prevCPPos; // suppress compiler warnings about unused variable
|
||||
|
||||
UChar32 nextChar = utext_char32At(inText, rangeEnd);
|
||||
if (!foundBreaks.isEmpty() && foundBreaks.peeki() == rangeEnd) {
|
||||
// In phrase breaking, there has to be a breakpoint between Cj character and
|
||||
// the number/open punctuation.
|
||||
// E.g. る文字「そうだ、京都」->る▁文字▁「そうだ、▁京都」-> breakpoint between 字 and「
|
||||
// E.g. 乗車率90%程度だろうか -> 乗車▁率▁90%▁程度だろうか -> breakpoint between 率 and 9
|
||||
// E.g. しかもロゴがUnicode! -> しかも▁ロゴが▁Unicode!-> breakpoint between が and U
|
||||
if (isPhraseBreaking) {
|
||||
if (!fDigitOrOpenPunctuationOrAlphabetSet.contains(nextChar)) {
|
||||
foundBreaks.popi();
|
||||
correctedNumBreaks--;
|
||||
}
|
||||
} else {
|
||||
foundBreaks.popi();
|
||||
correctedNumBreaks--;
|
||||
}
|
||||
}
|
||||
|
||||
// inString goes out of scope
|
||||
// inputMap goes out of scope
|
||||
return numBreaks;
|
||||
return correctedNumBreaks;
|
||||
}
|
||||
|
||||
void CjkBreakEngine::initJapanesePhraseParameter(UErrorCode& error) {
|
||||
loadJapaneseExtensions(error);
|
||||
loadHiragana(error);
|
||||
}
|
||||
|
||||
void CjkBreakEngine::loadJapaneseExtensions(UErrorCode& error) {
|
||||
const char* tag = "extensions";
|
||||
ResourceBundle ja(U_ICUDATA_BRKITR, "ja", error);
|
||||
if (U_SUCCESS(error)) {
|
||||
ResourceBundle bundle = ja.get(tag, error);
|
||||
while (U_SUCCESS(error) && bundle.hasNext()) {
|
||||
fSkipSet.puti(bundle.getNextString(error), 1, error);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void CjkBreakEngine::loadHiragana(UErrorCode& error) {
|
||||
UnicodeSet hiraganaWordSet(UnicodeString(u"[:Hiragana:]"), error);
|
||||
hiraganaWordSet.compact();
|
||||
UnicodeSetIterator iterator(hiraganaWordSet);
|
||||
while (iterator.next()) {
|
||||
fSkipSet.puti(UnicodeString(iterator.getCodepoint()), 1, error);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -1,14 +0,0 @@
|
||||
dictbe.o dictbe.d : dictbe.cpp unicode/utypes.h unicode/umachine.h unicode/ptypes.h \
|
||||
unicode/platform.h unicode/uconfig.h unicode/uvernum.h \
|
||||
unicode/urename.h unicode/uversion.h brkeng.h unicode/uobject.h \
|
||||
unicode/utext.h unicode/uchar.h unicode/stringoptions.h \
|
||||
unicode/ucpmap.h unicode/localpointer.h unicode/rep.h unicode/unistr.h \
|
||||
unicode/char16ptr.h unicode/std_string.h unicode/stringpiece.h \
|
||||
unicode/bytestream.h unicode/chariter.h unicode/uscript.h dictbe.h \
|
||||
unicode/uniset.h unicode/unifilt.h unicode/unifunct.h \
|
||||
unicode/unimatch.h unicode/uset.h uvectr32.h uhash.h cmemory.h \
|
||||
uelement.h uassert.h unicode/ubrk.h unicode/uloc.h unicode/uenum.h \
|
||||
unicode/parseerr.h utracimp.h unicode/utrace.h uvector.h uarrsort.h \
|
||||
unicode/normlzr.h unicode/normalizer2.h unicode/unorm2.h \
|
||||
unicode/unorm.h unicode/uiter.h dictionarydata.h unicode/udata.h \
|
||||
udataswp.h unicode/ustringtrie.h
|
@ -15,6 +15,7 @@
|
||||
#include "unicode/utext.h"
|
||||
|
||||
#include "brkeng.h"
|
||||
#include "hash.h"
|
||||
#include "uvectr32.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
@ -62,7 +63,7 @@ class DictionaryBreakEngine : public LanguageBreakEngine {
|
||||
* @return true if this engine handles the particular character and break
|
||||
* type.
|
||||
*/
|
||||
virtual UBool handles(UChar32 c) const;
|
||||
virtual UBool handles(UChar32 c) const override;
|
||||
|
||||
/**
|
||||
* <p>Find any breaks within a run in the supplied text.</p>
|
||||
@ -73,12 +74,15 @@ class DictionaryBreakEngine : public LanguageBreakEngine {
|
||||
* @param startPos The start of the run within the supplied text.
|
||||
* @param endPos The end of the run within the supplied text.
|
||||
* @param foundBreaks vector of int32_t to receive the break positions
|
||||
* @param status Information on any errors encountered.
|
||||
* @return The number of breaks found.
|
||||
*/
|
||||
virtual int32_t findBreaks( UText *text,
|
||||
int32_t startPos,
|
||||
int32_t endPos,
|
||||
UVector32 &foundBreaks ) const;
|
||||
UVector32 &foundBreaks,
|
||||
UBool isPhraseBreaking,
|
||||
UErrorCode& status ) const override;
|
||||
|
||||
protected:
|
||||
|
||||
@ -96,12 +100,15 @@ class DictionaryBreakEngine : public LanguageBreakEngine {
|
||||
* @param rangeStart The start of the range of dictionary characters
|
||||
* @param rangeEnd The end of the range of dictionary characters
|
||||
* @param foundBreaks Output of C array of int32_t break positions, or 0
|
||||
* @param status Information on any errors encountered.
|
||||
* @return The number of breaks found
|
||||
*/
|
||||
virtual int32_t divideUpDictionaryRange( UText *text,
|
||||
int32_t rangeStart,
|
||||
int32_t rangeEnd,
|
||||
UVector32 &foundBreaks ) const = 0;
|
||||
UVector32 &foundBreaks,
|
||||
UBool isPhraseBreaking,
|
||||
UErrorCode& status) const = 0;
|
||||
|
||||
};
|
||||
|
||||
@ -123,7 +130,6 @@ class ThaiBreakEngine : public DictionaryBreakEngine {
|
||||
* @internal
|
||||
*/
|
||||
|
||||
UnicodeSet fThaiWordSet;
|
||||
UnicodeSet fEndWordSet;
|
||||
UnicodeSet fBeginWordSet;
|
||||
UnicodeSet fSuffixSet;
|
||||
@ -153,12 +159,15 @@ class ThaiBreakEngine : public DictionaryBreakEngine {
|
||||
* @param rangeStart The start of the range of dictionary characters
|
||||
* @param rangeEnd The end of the range of dictionary characters
|
||||
* @param foundBreaks Output of C array of int32_t break positions, or 0
|
||||
* @param status Information on any errors encountered.
|
||||
* @return The number of breaks found
|
||||
*/
|
||||
virtual int32_t divideUpDictionaryRange( UText *text,
|
||||
int32_t rangeStart,
|
||||
int32_t rangeEnd,
|
||||
UVector32 &foundBreaks ) const;
|
||||
UVector32 &foundBreaks,
|
||||
UBool isPhraseBreaking,
|
||||
UErrorCode& status) const override;
|
||||
|
||||
};
|
||||
|
||||
@ -180,7 +189,6 @@ class LaoBreakEngine : public DictionaryBreakEngine {
|
||||
* @internal
|
||||
*/
|
||||
|
||||
UnicodeSet fLaoWordSet;
|
||||
UnicodeSet fEndWordSet;
|
||||
UnicodeSet fBeginWordSet;
|
||||
UnicodeSet fMarkSet;
|
||||
@ -209,12 +217,15 @@ class LaoBreakEngine : public DictionaryBreakEngine {
|
||||
* @param rangeStart The start of the range of dictionary characters
|
||||
* @param rangeEnd The end of the range of dictionary characters
|
||||
* @param foundBreaks Output of C array of int32_t break positions, or 0
|
||||
* @param status Information on any errors encountered.
|
||||
* @return The number of breaks found
|
||||
*/
|
||||
virtual int32_t divideUpDictionaryRange( UText *text,
|
||||
int32_t rangeStart,
|
||||
int32_t rangeEnd,
|
||||
UVector32 &foundBreaks ) const;
|
||||
UVector32 &foundBreaks,
|
||||
UBool isPhraseBreaking,
|
||||
UErrorCode& status) const override;
|
||||
|
||||
};
|
||||
|
||||
@ -236,7 +247,6 @@ class BurmeseBreakEngine : public DictionaryBreakEngine {
|
||||
* @internal
|
||||
*/
|
||||
|
||||
UnicodeSet fBurmeseWordSet;
|
||||
UnicodeSet fEndWordSet;
|
||||
UnicodeSet fBeginWordSet;
|
||||
UnicodeSet fMarkSet;
|
||||
@ -265,12 +275,15 @@ class BurmeseBreakEngine : public DictionaryBreakEngine {
|
||||
* @param rangeStart The start of the range of dictionary characters
|
||||
* @param rangeEnd The end of the range of dictionary characters
|
||||
* @param foundBreaks Output of C array of int32_t break positions, or 0
|
||||
* @param status Information on any errors encountered.
|
||||
* @return The number of breaks found
|
||||
*/
|
||||
virtual int32_t divideUpDictionaryRange( UText *text,
|
||||
int32_t rangeStart,
|
||||
int32_t rangeEnd,
|
||||
UVector32 &foundBreaks ) const;
|
||||
UVector32 &foundBreaks,
|
||||
UBool isPhraseBreaking,
|
||||
UErrorCode& status) const override;
|
||||
|
||||
};
|
||||
|
||||
@ -292,7 +305,6 @@ class KhmerBreakEngine : public DictionaryBreakEngine {
|
||||
* @internal
|
||||
*/
|
||||
|
||||
UnicodeSet fKhmerWordSet;
|
||||
UnicodeSet fEndWordSet;
|
||||
UnicodeSet fBeginWordSet;
|
||||
UnicodeSet fMarkSet;
|
||||
@ -321,12 +333,15 @@ class KhmerBreakEngine : public DictionaryBreakEngine {
|
||||
* @param rangeStart The start of the range of dictionary characters
|
||||
* @param rangeEnd The end of the range of dictionary characters
|
||||
* @param foundBreaks Output of C array of int32_t break positions, or 0
|
||||
* @param status Information on any errors encountered.
|
||||
* @return The number of breaks found
|
||||
*/
|
||||
virtual int32_t divideUpDictionaryRange( UText *text,
|
||||
int32_t rangeStart,
|
||||
int32_t rangeEnd,
|
||||
UVector32 &foundBreaks ) const;
|
||||
UVector32 &foundBreaks,
|
||||
UBool isPhraseBreaking,
|
||||
UErrorCode& status) const override;
|
||||
|
||||
};
|
||||
|
||||
@ -354,13 +369,22 @@ class CjkBreakEngine : public DictionaryBreakEngine {
|
||||
* @internal
|
||||
*/
|
||||
UnicodeSet fHangulWordSet;
|
||||
UnicodeSet fHanWordSet;
|
||||
UnicodeSet fKatakanaWordSet;
|
||||
UnicodeSet fHiraganaWordSet;
|
||||
UnicodeSet fDigitOrOpenPunctuationOrAlphabetSet;
|
||||
UnicodeSet fClosePunctuationSet;
|
||||
|
||||
DictionaryMatcher *fDictionary;
|
||||
const Normalizer2 *nfkcNorm2;
|
||||
|
||||
private:
|
||||
// Load Japanese extensions.
|
||||
void loadJapaneseExtensions(UErrorCode& error);
|
||||
// Load Japanese Hiragana.
|
||||
void loadHiragana(UErrorCode& error);
|
||||
// Initialize fSkipSet by loading Japanese Hiragana and extensions.
|
||||
void initJapanesePhraseParameter(UErrorCode& error);
|
||||
|
||||
Hashtable fSkipSet;
|
||||
|
||||
public:
|
||||
|
||||
/**
|
||||
@ -385,12 +409,15 @@ class CjkBreakEngine : public DictionaryBreakEngine {
|
||||
* @param rangeStart The start of the range of dictionary characters
|
||||
* @param rangeEnd The end of the range of dictionary characters
|
||||
* @param foundBreaks Output of C array of int32_t break positions, or 0
|
||||
* @param status Information on any errors encountered.
|
||||
* @return The number of breaks found
|
||||
*/
|
||||
virtual int32_t divideUpDictionaryRange( UText *text,
|
||||
int32_t rangeStart,
|
||||
int32_t rangeEnd,
|
||||
UVector32 &foundBreaks ) const;
|
||||
UVector32 &foundBreaks,
|
||||
UBool isPhraseBreaking,
|
||||
UErrorCode& status) const override;
|
||||
|
||||
};
|
||||
|
||||
|
@ -1,9 +0,0 @@
|
||||
dictionarydata.o dictionarydata.d : dictionarydata.cpp dictionarydata.h unicode/utypes.h \
|
||||
unicode/umachine.h unicode/ptypes.h unicode/platform.h \
|
||||
unicode/uconfig.h unicode/uvernum.h unicode/urename.h \
|
||||
unicode/uversion.h unicode/utext.h unicode/uchar.h \
|
||||
unicode/stringoptions.h unicode/ucpmap.h unicode/localpointer.h \
|
||||
unicode/rep.h unicode/uobject.h unicode/unistr.h unicode/char16ptr.h \
|
||||
unicode/std_string.h unicode/stringpiece.h unicode/bytestream.h \
|
||||
unicode/chariter.h unicode/udata.h udataswp.h unicode/ustringtrie.h \
|
||||
unicode/ucharstrie.h unicode/bytestrie.h cmemory.h
|
@ -107,8 +107,8 @@ public:
|
||||
virtual ~UCharsDictionaryMatcher();
|
||||
virtual int32_t matches(UText *text, int32_t maxLength, int32_t limit,
|
||||
int32_t *lengths, int32_t *cpLengths, int32_t *values,
|
||||
int32_t *prefix) const;
|
||||
virtual int32_t getType() const;
|
||||
int32_t *prefix) const override;
|
||||
virtual int32_t getType() const override;
|
||||
private:
|
||||
const UChar *characters;
|
||||
UDataMemory *file;
|
||||
@ -125,8 +125,8 @@ public:
|
||||
virtual ~BytesDictionaryMatcher();
|
||||
virtual int32_t matches(UText *text, int32_t maxLength, int32_t limit,
|
||||
int32_t *lengths, int32_t *cpLengths, int32_t *values,
|
||||
int32_t *prefix) const;
|
||||
virtual int32_t getType() const;
|
||||
int32_t *prefix) const override;
|
||||
virtual int32_t getType() const override;
|
||||
private:
|
||||
UChar32 transform(UChar32 c) const;
|
||||
|
||||
@ -159,7 +159,7 @@ udict_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *out
|
||||
* Constants are defined in the DictionaryData class.
|
||||
*
|
||||
* For the data structure of BytesTrie & UCharsTrie see
|
||||
* http://site.icu-project.org/design/struct/tries
|
||||
* https://icu.unicode.org/design/struct/tries
|
||||
* and the bytestrie.h and ucharstrie.h header files.
|
||||
*
|
||||
* int32_t indexes[indexesLength]; -- indexesLength=indexes[IX_STRING_TRIE_OFFSET]/4;
|
||||
|
@ -53,7 +53,7 @@ DateInterval::clone() const {
|
||||
}
|
||||
|
||||
|
||||
UBool
|
||||
bool
|
||||
DateInterval::operator==(const DateInterval& other) const {
|
||||
return ( fromDate == other.fromDate && toDate == other.toDate );
|
||||
}
|
||||
|
@ -1,4 +0,0 @@
|
||||
dtintrv.o dtintrv.d : dtintrv.cpp unicode/dtintrv.h unicode/utypes.h \
|
||||
unicode/umachine.h unicode/ptypes.h unicode/platform.h \
|
||||
unicode/uconfig.h unicode/uvernum.h unicode/urename.h \
|
||||
unicode/uversion.h unicode/uobject.h
|
@ -86,6 +86,7 @@ Edits &Edits::moveArray(Edits &src) U_NOEXCEPT {
|
||||
}
|
||||
|
||||
Edits &Edits::operator=(const Edits &other) {
|
||||
if (this == &other) { return *this; } // self-assignment: no-op
|
||||
length = other.length;
|
||||
delta = other.delta;
|
||||
numChanges = other.numChanges;
|
||||
@ -220,7 +221,7 @@ UBool Edits::growArray() {
|
||||
// Not U_BUFFER_OVERFLOW_ERROR because that could be confused on a string transform API
|
||||
// with a result-string-buffer overflow.
|
||||
errorCode_ = U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return FALSE;
|
||||
return false;
|
||||
} else if (capacity >= (INT32_MAX / 2)) {
|
||||
newCapacity = INT32_MAX;
|
||||
} else {
|
||||
@ -229,25 +230,25 @@ UBool Edits::growArray() {
|
||||
// Grow by at least 5 units so that a maximal change record will fit.
|
||||
if ((newCapacity - capacity) < 5) {
|
||||
errorCode_ = U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return FALSE;
|
||||
return false;
|
||||
}
|
||||
uint16_t *newArray = (uint16_t *)uprv_malloc((size_t)newCapacity * 2);
|
||||
if (newArray == NULL) {
|
||||
errorCode_ = U_MEMORY_ALLOCATION_ERROR;
|
||||
return FALSE;
|
||||
return false;
|
||||
}
|
||||
uprv_memcpy(newArray, array, (size_t)length * 2);
|
||||
releaseArray();
|
||||
array = newArray;
|
||||
capacity = newCapacity;
|
||||
return TRUE;
|
||||
return true;
|
||||
}
|
||||
|
||||
UBool Edits::copyErrorTo(UErrorCode &outErrorCode) const {
|
||||
if (U_FAILURE(outErrorCode)) { return TRUE; }
|
||||
if (U_SUCCESS(errorCode_)) { return FALSE; }
|
||||
if (U_FAILURE(outErrorCode)) { return true; }
|
||||
if (U_SUCCESS(errorCode_)) { return false; }
|
||||
outErrorCode = errorCode_;
|
||||
return TRUE;
|
||||
return true;
|
||||
}
|
||||
|
||||
Edits &Edits::mergeAndAppend(const Edits &ab, const Edits &bc, UErrorCode &errorCode) {
|
||||
@ -256,7 +257,7 @@ Edits &Edits::mergeAndAppend(const Edits &ab, const Edits &bc, UErrorCode &error
|
||||
// Parallel iteration over both Edits.
|
||||
Iterator abIter = ab.getFineIterator();
|
||||
Iterator bcIter = bc.getFineIterator();
|
||||
UBool abHasNext = TRUE, bcHasNext = TRUE;
|
||||
UBool abHasNext = true, bcHasNext = true;
|
||||
// Copy iterator state into local variables, so that we can modify and subdivide spans.
|
||||
// ab old & new length, bc old & new length
|
||||
int32_t aLength = 0, ab_bLength = 0, bc_bLength = 0, cLength = 0;
|
||||
@ -399,7 +400,7 @@ Edits &Edits::mergeAndAppend(const Edits &ab, const Edits &bc, UErrorCode &error
|
||||
Edits::Iterator::Iterator(const uint16_t *a, int32_t len, UBool oc, UBool crs) :
|
||||
array(a), index(0), length(len), remaining(0),
|
||||
onlyChanges_(oc), coarse(crs),
|
||||
dir(0), changed(FALSE), oldLength_(0), newLength_(0),
|
||||
dir(0), changed(false), oldLength_(0), newLength_(0),
|
||||
srcIndex(0), replIndex(0), destIndex(0) {}
|
||||
|
||||
int32_t Edits::Iterator::readLength(int32_t head) {
|
||||
@ -440,16 +441,16 @@ void Edits::Iterator::updatePreviousIndexes() {
|
||||
UBool Edits::Iterator::noNext() {
|
||||
// No change before or beyond the string.
|
||||
dir = 0;
|
||||
changed = FALSE;
|
||||
changed = false;
|
||||
oldLength_ = newLength_ = 0;
|
||||
return FALSE;
|
||||
return false;
|
||||
}
|
||||
|
||||
UBool Edits::Iterator::next(UBool onlyChanges, UErrorCode &errorCode) {
|
||||
// Forward iteration: Update the string indexes to the limit of the current span,
|
||||
// and post-increment-read array units to assemble a new span.
|
||||
// Leaves the array index one after the last unit of that span.
|
||||
if (U_FAILURE(errorCode)) { return FALSE; }
|
||||
if (U_FAILURE(errorCode)) { return false; }
|
||||
// We have an errorCode in case we need to start guarding against integer overflows.
|
||||
// It is also convenient for caller loops if we bail out when an error was set elsewhere.
|
||||
if (dir > 0) {
|
||||
@ -463,7 +464,7 @@ UBool Edits::Iterator::next(UBool onlyChanges, UErrorCode &errorCode) {
|
||||
// Stay on the current one of a sequence of compressed changes.
|
||||
++index; // next() rests on the index after the sequence unit.
|
||||
dir = 1;
|
||||
return TRUE;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
dir = 1;
|
||||
@ -472,7 +473,7 @@ UBool Edits::Iterator::next(UBool onlyChanges, UErrorCode &errorCode) {
|
||||
// Fine-grained iterator: Continue a sequence of compressed changes.
|
||||
if (remaining > 1) {
|
||||
--remaining;
|
||||
return TRUE;
|
||||
return true;
|
||||
}
|
||||
remaining = 0;
|
||||
}
|
||||
@ -482,7 +483,7 @@ UBool Edits::Iterator::next(UBool onlyChanges, UErrorCode &errorCode) {
|
||||
int32_t u = array[index++];
|
||||
if (u <= MAX_UNCHANGED) {
|
||||
// Combine adjacent unchanged ranges.
|
||||
changed = FALSE;
|
||||
changed = false;
|
||||
oldLength_ = u + 1;
|
||||
while (index < length && (u = array[index]) <= MAX_UNCHANGED) {
|
||||
++index;
|
||||
@ -497,10 +498,10 @@ UBool Edits::Iterator::next(UBool onlyChanges, UErrorCode &errorCode) {
|
||||
// already fetched u > MAX_UNCHANGED at index
|
||||
++index;
|
||||
} else {
|
||||
return TRUE;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
changed = TRUE;
|
||||
changed = true;
|
||||
if (u <= MAX_SHORT_CHANGE) {
|
||||
int32_t oldLen = u >> 12;
|
||||
int32_t newLen = (u >> 9) & MAX_SHORT_CHANGE_NEW_LENGTH;
|
||||
@ -515,14 +516,14 @@ UBool Edits::Iterator::next(UBool onlyChanges, UErrorCode &errorCode) {
|
||||
if (num > 1) {
|
||||
remaining = num; // This is the first of two or more changes.
|
||||
}
|
||||
return TRUE;
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
U_ASSERT(u <= 0x7fff);
|
||||
oldLength_ = readLength((u >> 6) & 0x3f);
|
||||
newLength_ = readLength(u & 0x3f);
|
||||
if (!coarse) {
|
||||
return TRUE;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
// Combine adjacent changes.
|
||||
@ -538,14 +539,14 @@ UBool Edits::Iterator::next(UBool onlyChanges, UErrorCode &errorCode) {
|
||||
newLength_ += readLength(u & 0x3f);
|
||||
}
|
||||
}
|
||||
return TRUE;
|
||||
return true;
|
||||
}
|
||||
|
||||
UBool Edits::Iterator::previous(UErrorCode &errorCode) {
|
||||
// Backward iteration: Pre-decrement-read array units to assemble a new span,
|
||||
// then update the string indexes to the start of that span.
|
||||
// Leaves the array index on the head unit of that span.
|
||||
if (U_FAILURE(errorCode)) { return FALSE; }
|
||||
if (U_FAILURE(errorCode)) { return false; }
|
||||
// We have an errorCode in case we need to start guarding against integer overflows.
|
||||
// It is also convenient for caller loops if we bail out when an error was set elsewhere.
|
||||
if (dir >= 0) {
|
||||
@ -558,7 +559,7 @@ UBool Edits::Iterator::previous(UErrorCode &errorCode) {
|
||||
// Stay on the current one of a sequence of compressed changes.
|
||||
--index; // previous() rests on the sequence unit.
|
||||
dir = -1;
|
||||
return TRUE;
|
||||
return true;
|
||||
}
|
||||
updateNextIndexes();
|
||||
}
|
||||
@ -571,7 +572,7 @@ UBool Edits::Iterator::previous(UErrorCode &errorCode) {
|
||||
if (remaining <= (u & SHORT_CHANGE_NUM_MASK)) {
|
||||
++remaining;
|
||||
updatePreviousIndexes();
|
||||
return TRUE;
|
||||
return true;
|
||||
}
|
||||
remaining = 0;
|
||||
}
|
||||
@ -581,7 +582,7 @@ UBool Edits::Iterator::previous(UErrorCode &errorCode) {
|
||||
int32_t u = array[--index];
|
||||
if (u <= MAX_UNCHANGED) {
|
||||
// Combine adjacent unchanged ranges.
|
||||
changed = FALSE;
|
||||
changed = false;
|
||||
oldLength_ = u + 1;
|
||||
while (index > 0 && (u = array[index - 1]) <= MAX_UNCHANGED) {
|
||||
--index;
|
||||
@ -590,9 +591,9 @@ UBool Edits::Iterator::previous(UErrorCode &errorCode) {
|
||||
newLength_ = oldLength_;
|
||||
// No need to handle onlyChanges as long as previous() is called only from findIndex().
|
||||
updatePreviousIndexes();
|
||||
return TRUE;
|
||||
return true;
|
||||
}
|
||||
changed = TRUE;
|
||||
changed = true;
|
||||
if (u <= MAX_SHORT_CHANGE) {
|
||||
int32_t oldLen = u >> 12;
|
||||
int32_t newLen = (u >> 9) & MAX_SHORT_CHANGE_NEW_LENGTH;
|
||||
@ -608,7 +609,7 @@ UBool Edits::Iterator::previous(UErrorCode &errorCode) {
|
||||
remaining = 1; // This is the last of two or more changes.
|
||||
}
|
||||
updatePreviousIndexes();
|
||||
return TRUE;
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
if (u <= 0x7fff) {
|
||||
@ -628,7 +629,7 @@ UBool Edits::Iterator::previous(UErrorCode &errorCode) {
|
||||
}
|
||||
if (!coarse) {
|
||||
updatePreviousIndexes();
|
||||
return TRUE;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
// Combine adjacent changes.
|
||||
@ -647,7 +648,7 @@ UBool Edits::Iterator::previous(UErrorCode &errorCode) {
|
||||
}
|
||||
}
|
||||
updatePreviousIndexes();
|
||||
return TRUE;
|
||||
return true;
|
||||
}
|
||||
|
||||
int32_t Edits::Iterator::findIndex(int32_t i, UBool findSource, UErrorCode &errorCode) {
|
||||
@ -704,7 +705,7 @@ int32_t Edits::Iterator::findIndex(int32_t i, UBool findSource, UErrorCode &erro
|
||||
// The index is in the current span.
|
||||
return 0;
|
||||
}
|
||||
while (next(FALSE, errorCode)) {
|
||||
while (next(false, errorCode)) {
|
||||
if (findSource) {
|
||||
spanStart = srcIndex;
|
||||
spanLength = oldLength_;
|
||||
@ -738,7 +739,7 @@ int32_t Edits::Iterator::findIndex(int32_t i, UBool findSource, UErrorCode &erro
|
||||
}
|
||||
|
||||
int32_t Edits::Iterator::destinationIndexFromSourceIndex(int32_t i, UErrorCode &errorCode) {
|
||||
int32_t where = findIndex(i, TRUE, errorCode);
|
||||
int32_t where = findIndex(i, true, errorCode);
|
||||
if (where < 0) {
|
||||
// Error or before the string.
|
||||
return 0;
|
||||
@ -757,7 +758,7 @@ int32_t Edits::Iterator::destinationIndexFromSourceIndex(int32_t i, UErrorCode &
|
||||
}
|
||||
|
||||
int32_t Edits::Iterator::sourceIndexFromDestinationIndex(int32_t i, UErrorCode &errorCode) {
|
||||
int32_t where = findIndex(i, FALSE, errorCode);
|
||||
int32_t where = findIndex(i, false, errorCode);
|
||||
if (where < 0) {
|
||||
// Error or before the string.
|
||||
return 0;
|
||||
|
@ -1,6 +0,0 @@
|
||||
edits.o edits.d : edits.cpp unicode/edits.h unicode/utypes.h unicode/umachine.h \
|
||||
unicode/ptypes.h unicode/platform.h unicode/uconfig.h \
|
||||
unicode/uvernum.h unicode/urename.h unicode/uversion.h \
|
||||
unicode/uobject.h unicode/unistr.h unicode/char16ptr.h unicode/rep.h \
|
||||
unicode/std_string.h unicode/stringpiece.h unicode/bytestream.h \
|
||||
cmemory.h unicode/localpointer.h uassert.h util.h
|
220
common/emojiprops.cpp
Normal file
220
common/emojiprops.cpp
Normal file
@ -0,0 +1,220 @@
|
||||
// © 2021 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: https://www.unicode.org/copyright.html
|
||||
|
||||
// emojiprops.cpp
|
||||
// created: 2021sep04 Markus W. Scherer
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uchar.h"
|
||||
#include "unicode/ucharstrie.h"
|
||||
#include "unicode/ucptrie.h"
|
||||
#include "unicode/udata.h"
|
||||
#include "unicode/ustringtrie.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "emojiprops.h"
|
||||
#include "ucln.h"
|
||||
#include "ucln_cmn.h"
|
||||
#include "umutex.h"
|
||||
#include "uset_imp.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
namespace {
|
||||
|
||||
EmojiProps *singleton = nullptr;
|
||||
icu::UInitOnce emojiInitOnce {};
|
||||
|
||||
UBool U_CALLCONV emojiprops_cleanup() {
|
||||
delete singleton;
|
||||
singleton = nullptr;
|
||||
emojiInitOnce.reset();
|
||||
return true;
|
||||
}
|
||||
|
||||
void U_CALLCONV initSingleton(UErrorCode &errorCode) {
|
||||
if (U_FAILURE(errorCode)) { return; }
|
||||
singleton = new EmojiProps(errorCode);
|
||||
if (singleton == nullptr) {
|
||||
errorCode = U_MEMORY_ALLOCATION_ERROR;
|
||||
} else if (U_FAILURE(errorCode)) {
|
||||
delete singleton;
|
||||
singleton = nullptr;
|
||||
}
|
||||
ucln_common_registerCleanup(UCLN_COMMON_EMOJIPROPS, emojiprops_cleanup);
|
||||
}
|
||||
|
||||
// TODO: turn this into a shared helper function
|
||||
// Requires the major version to match, and then requires at least the minor version.
|
||||
UBool udata_isAcceptableMajorMinor(
|
||||
const UDataInfo &info, const UChar *dataFormat, uint8_t major, uint8_t minor) {
|
||||
return
|
||||
info.size >= 20 &&
|
||||
info.isBigEndian == U_IS_BIG_ENDIAN &&
|
||||
info.charsetFamily == U_CHARSET_FAMILY &&
|
||||
info.dataFormat[0] == dataFormat[0] &&
|
||||
info.dataFormat[1] == dataFormat[1] &&
|
||||
info.dataFormat[2] == dataFormat[2] &&
|
||||
info.dataFormat[3] == dataFormat[3] &&
|
||||
info.formatVersion[0] == major &&
|
||||
info.formatVersion[1] >= minor;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
EmojiProps::~EmojiProps() {
|
||||
udata_close(memory);
|
||||
ucptrie_close(cpTrie);
|
||||
}
|
||||
|
||||
const EmojiProps *
|
||||
EmojiProps::getSingleton(UErrorCode &errorCode) {
|
||||
if (U_FAILURE(errorCode)) { return nullptr; }
|
||||
umtx_initOnce(emojiInitOnce, &initSingleton, errorCode);
|
||||
return singleton;
|
||||
}
|
||||
|
||||
UBool U_CALLCONV
|
||||
EmojiProps::isAcceptable(void * /*context*/, const char * /*type*/, const char * /*name*/,
|
||||
const UDataInfo *pInfo) {
|
||||
return udata_isAcceptableMajorMinor(*pInfo, u"Emoj", 1, 0);
|
||||
}
|
||||
|
||||
void
|
||||
EmojiProps::load(UErrorCode &errorCode) {
|
||||
memory = udata_openChoice(nullptr, "icu", "uemoji", isAcceptable, this, &errorCode);
|
||||
if (U_FAILURE(errorCode)) { return; }
|
||||
const uint8_t *inBytes = (const uint8_t *)udata_getMemory(memory);
|
||||
const int32_t *inIndexes = (const int32_t *)inBytes;
|
||||
int32_t indexesLength = inIndexes[IX_CPTRIE_OFFSET] / 4;
|
||||
if (indexesLength <= IX_RGI_EMOJI_ZWJ_SEQUENCE_TRIE_OFFSET) {
|
||||
errorCode = U_INVALID_FORMAT_ERROR; // Not enough indexes.
|
||||
return;
|
||||
}
|
||||
|
||||
int32_t i = IX_CPTRIE_OFFSET;
|
||||
int32_t offset = inIndexes[i++];
|
||||
int32_t nextOffset = inIndexes[i];
|
||||
cpTrie = ucptrie_openFromBinary(UCPTRIE_TYPE_FAST, UCPTRIE_VALUE_BITS_8,
|
||||
inBytes + offset, nextOffset - offset, nullptr, &errorCode);
|
||||
if (U_FAILURE(errorCode)) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (i = IX_BASIC_EMOJI_TRIE_OFFSET; i <= IX_RGI_EMOJI_ZWJ_SEQUENCE_TRIE_OFFSET; ++i) {
|
||||
offset = inIndexes[i];
|
||||
nextOffset = inIndexes[i + 1];
|
||||
// Set/leave nullptr if there is no UCharsTrie.
|
||||
const UChar *p = nextOffset > offset ? (const UChar *)(inBytes + offset) : nullptr;
|
||||
stringTries[getStringTrieIndex(i)] = p;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
EmojiProps::addPropertyStarts(const USetAdder *sa, UErrorCode & /*errorCode*/) const {
|
||||
// Add the start code point of each same-value range of the trie.
|
||||
UChar32 start = 0, end;
|
||||
uint32_t value;
|
||||
while ((end = ucptrie_getRange(cpTrie, start, UCPMAP_RANGE_NORMAL, 0,
|
||||
nullptr, nullptr, &value)) >= 0) {
|
||||
sa->add(sa->set, start);
|
||||
start = end + 1;
|
||||
}
|
||||
}
|
||||
|
||||
UBool
|
||||
EmojiProps::hasBinaryProperty(UChar32 c, UProperty which) {
|
||||
UErrorCode errorCode = U_ZERO_ERROR;
|
||||
const EmojiProps *ep = getSingleton(errorCode);
|
||||
return U_SUCCESS(errorCode) && ep->hasBinaryPropertyImpl(c, which);
|
||||
}
|
||||
|
||||
UBool
|
||||
EmojiProps::hasBinaryPropertyImpl(UChar32 c, UProperty which) const {
|
||||
if (which < UCHAR_EMOJI || UCHAR_RGI_EMOJI < which) {
|
||||
return false;
|
||||
}
|
||||
// Note: UCHAR_REGIONAL_INDICATOR is a single, hardcoded range implemented elsewhere.
|
||||
static constexpr int8_t bitFlags[] = {
|
||||
BIT_EMOJI, // UCHAR_EMOJI=57
|
||||
BIT_EMOJI_PRESENTATION, // UCHAR_EMOJI_PRESENTATION=58
|
||||
BIT_EMOJI_MODIFIER, // UCHAR_EMOJI_MODIFIER=59
|
||||
BIT_EMOJI_MODIFIER_BASE, // UCHAR_EMOJI_MODIFIER_BASE=60
|
||||
BIT_EMOJI_COMPONENT, // UCHAR_EMOJI_COMPONENT=61
|
||||
-1, // UCHAR_REGIONAL_INDICATOR=62
|
||||
-1, // UCHAR_PREPENDED_CONCATENATION_MARK=63
|
||||
BIT_EXTENDED_PICTOGRAPHIC, // UCHAR_EXTENDED_PICTOGRAPHIC=64
|
||||
BIT_BASIC_EMOJI, // UCHAR_BASIC_EMOJI=65
|
||||
-1, // UCHAR_EMOJI_KEYCAP_SEQUENCE=66
|
||||
-1, // UCHAR_RGI_EMOJI_MODIFIER_SEQUENCE=67
|
||||
-1, // UCHAR_RGI_EMOJI_FLAG_SEQUENCE=68
|
||||
-1, // UCHAR_RGI_EMOJI_TAG_SEQUENCE=69
|
||||
-1, // UCHAR_RGI_EMOJI_ZWJ_SEQUENCE=70
|
||||
BIT_BASIC_EMOJI, // UCHAR_RGI_EMOJI=71
|
||||
};
|
||||
int32_t bit = bitFlags[which - UCHAR_EMOJI];
|
||||
if (bit < 0) {
|
||||
return false; // not a property that we support in this function
|
||||
}
|
||||
uint8_t bits = UCPTRIE_FAST_GET(cpTrie, UCPTRIE_8, c);
|
||||
return (bits >> bit) & 1;
|
||||
}
|
||||
|
||||
UBool
|
||||
EmojiProps::hasBinaryProperty(const UChar *s, int32_t length, UProperty which) {
|
||||
UErrorCode errorCode = U_ZERO_ERROR;
|
||||
const EmojiProps *ep = getSingleton(errorCode);
|
||||
return U_SUCCESS(errorCode) && ep->hasBinaryPropertyImpl(s, length, which);
|
||||
}
|
||||
|
||||
UBool
|
||||
EmojiProps::hasBinaryPropertyImpl(const UChar *s, int32_t length, UProperty which) const {
|
||||
if (s == nullptr && length != 0) { return false; }
|
||||
if (length <= 0 && (length == 0 || *s == 0)) { return false; } // empty string
|
||||
// The caller should have delegated single code points to hasBinaryProperty(c, which).
|
||||
if (which < UCHAR_BASIC_EMOJI || UCHAR_RGI_EMOJI < which) {
|
||||
return false;
|
||||
}
|
||||
UProperty firstProp = which, lastProp = which;
|
||||
if (which == UCHAR_RGI_EMOJI) {
|
||||
// RGI_Emoji is the union of the other emoji properties of strings.
|
||||
firstProp = UCHAR_BASIC_EMOJI;
|
||||
lastProp = UCHAR_RGI_EMOJI_ZWJ_SEQUENCE;
|
||||
}
|
||||
for (int32_t prop = firstProp; prop <= lastProp; ++prop) {
|
||||
const UChar *trieUChars = stringTries[prop - UCHAR_BASIC_EMOJI];
|
||||
if (trieUChars != nullptr) {
|
||||
UCharsTrie trie(trieUChars);
|
||||
UStringTrieResult result = trie.next(s, length);
|
||||
if (USTRINGTRIE_HAS_VALUE(result)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void
|
||||
EmojiProps::addStrings(const USetAdder *sa, UProperty which, UErrorCode &errorCode) const {
|
||||
if (U_FAILURE(errorCode)) { return; }
|
||||
if (which < UCHAR_BASIC_EMOJI || UCHAR_RGI_EMOJI < which) {
|
||||
return;
|
||||
}
|
||||
UProperty firstProp = which, lastProp = which;
|
||||
if (which == UCHAR_RGI_EMOJI) {
|
||||
// RGI_Emoji is the union of the other emoji properties of strings.
|
||||
firstProp = UCHAR_BASIC_EMOJI;
|
||||
lastProp = UCHAR_RGI_EMOJI_ZWJ_SEQUENCE;
|
||||
}
|
||||
for (int32_t prop = firstProp; prop <= lastProp; ++prop) {
|
||||
const UChar *trieUChars = stringTries[prop - UCHAR_BASIC_EMOJI];
|
||||
if (trieUChars != nullptr) {
|
||||
UCharsTrie::Iterator iter(trieUChars, 0, errorCode);
|
||||
while (iter.next(errorCode)) {
|
||||
const UnicodeString &s = iter.getString();
|
||||
sa->addString(sa->set, s.getBuffer(), s.length());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
90
common/emojiprops.h
Normal file
90
common/emojiprops.h
Normal file
@ -0,0 +1,90 @@
|
||||
// © 2021 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: https://www.unicode.org/copyright.html
|
||||
|
||||
// emojiprops.h
|
||||
// created: 2021sep03 Markus W. Scherer
|
||||
|
||||
#ifndef __EMOJIPROPS_H__
|
||||
#define __EMOJIPROPS_H__
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/ucptrie.h"
|
||||
#include "unicode/udata.h"
|
||||
#include "unicode/uobject.h"
|
||||
#include "uset_imp.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
class EmojiProps : public UMemory {
|
||||
public:
|
||||
// @internal
|
||||
EmojiProps(UErrorCode &errorCode) { load(errorCode); }
|
||||
~EmojiProps();
|
||||
|
||||
static const EmojiProps *getSingleton(UErrorCode &errorCode);
|
||||
static UBool hasBinaryProperty(UChar32 c, UProperty which);
|
||||
static UBool hasBinaryProperty(const UChar *s, int32_t length, UProperty which);
|
||||
|
||||
void addPropertyStarts(const USetAdder *sa, UErrorCode &errorCode) const;
|
||||
void addStrings(const USetAdder *sa, UProperty which, UErrorCode &errorCode) const;
|
||||
|
||||
enum {
|
||||
// Byte offsets from the start of the data, after the generic header,
|
||||
// in ascending order.
|
||||
// UCPTrie=CodePointTrie, follows the indexes
|
||||
IX_CPTRIE_OFFSET,
|
||||
IX_RESERVED1,
|
||||
IX_RESERVED2,
|
||||
IX_RESERVED3,
|
||||
|
||||
// UCharsTrie=CharsTrie
|
||||
IX_BASIC_EMOJI_TRIE_OFFSET,
|
||||
IX_EMOJI_KEYCAP_SEQUENCE_TRIE_OFFSET,
|
||||
IX_RGI_EMOJI_MODIFIER_SEQUENCE_TRIE_OFFSET,
|
||||
IX_RGI_EMOJI_FLAG_SEQUENCE_TRIE_OFFSET,
|
||||
IX_RGI_EMOJI_TAG_SEQUENCE_TRIE_OFFSET,
|
||||
IX_RGI_EMOJI_ZWJ_SEQUENCE_TRIE_OFFSET,
|
||||
IX_RESERVED10,
|
||||
IX_RESERVED11,
|
||||
IX_RESERVED12,
|
||||
IX_TOTAL_SIZE,
|
||||
|
||||
// Not initially byte offsets.
|
||||
IX_RESERVED14,
|
||||
IX_RESERVED15,
|
||||
IX_COUNT // 16
|
||||
};
|
||||
|
||||
// Properties in the code point trie.
|
||||
enum {
|
||||
// https://www.unicode.org/reports/tr51/#Emoji_Properties
|
||||
BIT_EMOJI,
|
||||
BIT_EMOJI_PRESENTATION,
|
||||
BIT_EMOJI_MODIFIER,
|
||||
BIT_EMOJI_MODIFIER_BASE,
|
||||
BIT_EMOJI_COMPONENT,
|
||||
BIT_EXTENDED_PICTOGRAPHIC,
|
||||
// https://www.unicode.org/reports/tr51/#Emoji_Sets
|
||||
BIT_BASIC_EMOJI
|
||||
};
|
||||
|
||||
private:
|
||||
static UBool U_CALLCONV
|
||||
isAcceptable(void *context, const char *type, const char *name, const UDataInfo *pInfo);
|
||||
/** Input i: One of the IX_..._TRIE_OFFSET indexes into the data file indexes[] array. */
|
||||
static int32_t getStringTrieIndex(int32_t i) {
|
||||
return i - IX_BASIC_EMOJI_TRIE_OFFSET;
|
||||
}
|
||||
|
||||
void load(UErrorCode &errorCode);
|
||||
UBool hasBinaryPropertyImpl(UChar32 c, UProperty which) const;
|
||||
UBool hasBinaryPropertyImpl(const UChar *s, int32_t length, UProperty which) const;
|
||||
|
||||
UDataMemory *memory = nullptr;
|
||||
UCPTrie *cpTrie = nullptr;
|
||||
const UChar *stringTries[6] = { nullptr, nullptr, nullptr, nullptr, nullptr, nullptr };
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif // __EMOJIPROPS_H__
|
@ -1,4 +0,0 @@
|
||||
errorcode.o errorcode.d : errorcode.cpp unicode/utypes.h unicode/umachine.h \
|
||||
unicode/ptypes.h unicode/platform.h unicode/uconfig.h \
|
||||
unicode/uvernum.h unicode/urename.h unicode/uversion.h \
|
||||
unicode/errorcode.h unicode/uobject.h
|
@ -20,6 +20,7 @@
|
||||
#include "ubrkimpl.h" // U_ICUDATA_BRKITR
|
||||
#include "uvector.h"
|
||||
#include "cmemory.h"
|
||||
#include "umutex.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
@ -48,7 +49,7 @@ static void _fb_trace(const char *m, const UnicodeString *s, UBool b, int32_t d,
|
||||
/**
|
||||
* Used with sortedInsert()
|
||||
*/
|
||||
static int8_t U_CALLCONV compareUnicodeString(UElement t1, UElement t2) {
|
||||
static int32_t U_CALLCONV compareUnicodeString(UElement t1, UElement t2) {
|
||||
const UnicodeString &a = *(const UnicodeString*)t1.pointer;
|
||||
const UnicodeString &b = *(const UnicodeString*)t2.pointer;
|
||||
return a.compare(b);
|
||||
@ -57,7 +58,7 @@ static int8_t U_CALLCONV compareUnicodeString(UElement t1, UElement t2) {
|
||||
/**
|
||||
* A UVector which implements a set of strings.
|
||||
*/
|
||||
class U_COMMON_API UStringSet : public UVector {
|
||||
class UStringSet : public UVector {
|
||||
public:
|
||||
UStringSet(UErrorCode &status) : UVector(uprv_deleteUObject,
|
||||
uhash_compareUnicodeString,
|
||||
@ -89,7 +90,6 @@ class U_COMMON_API UStringSet : public UVector {
|
||||
} else {
|
||||
sortedInsert(str, compareUnicodeString, status);
|
||||
if(U_FAILURE(status)) {
|
||||
delete str;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
@ -139,13 +139,30 @@ class SimpleFilteredSentenceBreakData : public UMemory {
|
||||
public:
|
||||
SimpleFilteredSentenceBreakData(UCharsTrie *forwards, UCharsTrie *backwards )
|
||||
: fForwardsPartialTrie(forwards), fBackwardsTrie(backwards), refcount(1) { }
|
||||
SimpleFilteredSentenceBreakData *incr() { refcount++; return this; }
|
||||
SimpleFilteredSentenceBreakData *decr() { if((--refcount) <= 0) delete this; return 0; }
|
||||
SimpleFilteredSentenceBreakData *incr() {
|
||||
umtx_atomic_inc(&refcount);
|
||||
return this;
|
||||
}
|
||||
SimpleFilteredSentenceBreakData *decr() {
|
||||
if(umtx_atomic_dec(&refcount) <= 0) {
|
||||
delete this;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
virtual ~SimpleFilteredSentenceBreakData();
|
||||
|
||||
bool hasForwardsPartialTrie() const { return fForwardsPartialTrie.isValid(); }
|
||||
bool hasBackwardsTrie() const { return fBackwardsTrie.isValid(); }
|
||||
|
||||
const UCharsTrie &getForwardsPartialTrie() const { return *fForwardsPartialTrie; }
|
||||
const UCharsTrie &getBackwardsTrie() const { return *fBackwardsTrie; }
|
||||
|
||||
private:
|
||||
// These tries own their data arrays.
|
||||
// They are shared and must therefore not be modified.
|
||||
LocalPointer<UCharsTrie> fForwardsPartialTrie; // Has ".a" for "a.M."
|
||||
LocalPointer<UCharsTrie> fBackwardsTrie; // i.e. ".srM" for Mrs.
|
||||
int32_t refcount;
|
||||
u_atomic_int32_t refcount;
|
||||
};
|
||||
|
||||
SimpleFilteredSentenceBreakData::~SimpleFilteredSentenceBreakData() {}
|
||||
@ -168,37 +185,37 @@ public:
|
||||
/* -- cloning and other subclass stuff -- */
|
||||
virtual BreakIterator * createBufferClone(void * /*stackBuffer*/,
|
||||
int32_t &/*BufferSize*/,
|
||||
UErrorCode &status) {
|
||||
UErrorCode &status) override {
|
||||
// for now - always deep clone
|
||||
status = U_SAFECLONE_ALLOCATED_WARNING;
|
||||
return clone();
|
||||
}
|
||||
virtual SimpleFilteredSentenceBreakIterator* clone() const { return new SimpleFilteredSentenceBreakIterator(*this); }
|
||||
virtual UClassID getDynamicClassID(void) const { return NULL; }
|
||||
virtual UBool operator==(const BreakIterator& o) const { if(this==&o) return true; return false; }
|
||||
virtual SimpleFilteredSentenceBreakIterator* clone() const override { return new SimpleFilteredSentenceBreakIterator(*this); }
|
||||
virtual UClassID getDynamicClassID(void) const override { return NULL; }
|
||||
virtual bool operator==(const BreakIterator& o) const override { if(this==&o) return true; return false; }
|
||||
|
||||
/* -- text modifying -- */
|
||||
virtual void setText(UText *text, UErrorCode &status) { fDelegate->setText(text,status); }
|
||||
virtual BreakIterator &refreshInputText(UText *input, UErrorCode &status) { fDelegate->refreshInputText(input,status); return *this; }
|
||||
virtual void adoptText(CharacterIterator* it) { fDelegate->adoptText(it); }
|
||||
virtual void setText(const UnicodeString &text) { fDelegate->setText(text); }
|
||||
virtual void setText(UText *text, UErrorCode &status) override { fDelegate->setText(text,status); }
|
||||
virtual BreakIterator &refreshInputText(UText *input, UErrorCode &status) override { fDelegate->refreshInputText(input,status); return *this; }
|
||||
virtual void adoptText(CharacterIterator* it) override { fDelegate->adoptText(it); }
|
||||
virtual void setText(const UnicodeString &text) override { fDelegate->setText(text); }
|
||||
|
||||
/* -- other functions that are just delegated -- */
|
||||
virtual UText *getUText(UText *fillIn, UErrorCode &status) const { return fDelegate->getUText(fillIn,status); }
|
||||
virtual CharacterIterator& getText(void) const { return fDelegate->getText(); }
|
||||
virtual UText *getUText(UText *fillIn, UErrorCode &status) const override { return fDelegate->getUText(fillIn,status); }
|
||||
virtual CharacterIterator& getText(void) const override { return fDelegate->getText(); }
|
||||
|
||||
/* -- ITERATION -- */
|
||||
virtual int32_t first(void);
|
||||
virtual int32_t preceding(int32_t offset);
|
||||
virtual int32_t previous(void);
|
||||
virtual UBool isBoundary(int32_t offset);
|
||||
virtual int32_t current(void) const { return fDelegate->current(); } // we keep the delegate current, so this should be correct.
|
||||
virtual int32_t first(void) override;
|
||||
virtual int32_t preceding(int32_t offset) override;
|
||||
virtual int32_t previous(void) override;
|
||||
virtual UBool isBoundary(int32_t offset) override;
|
||||
virtual int32_t current(void) const override { return fDelegate->current(); } // we keep the delegate current, so this should be correct.
|
||||
|
||||
virtual int32_t next(void);
|
||||
virtual int32_t next(void) override;
|
||||
|
||||
virtual int32_t next(int32_t n);
|
||||
virtual int32_t following(int32_t offset);
|
||||
virtual int32_t last(void);
|
||||
virtual int32_t next(int32_t n) override;
|
||||
virtual int32_t following(int32_t offset) override;
|
||||
virtual int32_t last(void) override;
|
||||
|
||||
private:
|
||||
/**
|
||||
@ -244,7 +261,13 @@ SimpleFilteredSentenceBreakIterator::SimpleFilteredSentenceBreakIterator(BreakIt
|
||||
fData(new SimpleFilteredSentenceBreakData(forwards, backwards)),
|
||||
fDelegate(adopt)
|
||||
{
|
||||
// all set..
|
||||
if (fData == nullptr) {
|
||||
delete forwards;
|
||||
delete backwards;
|
||||
if (U_SUCCESS(status)) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SimpleFilteredSentenceBreakIterator::~SimpleFilteredSentenceBreakIterator() {
|
||||
@ -261,59 +284,62 @@ SimpleFilteredSentenceBreakIterator::breakExceptionAt(int32_t n) {
|
||||
int32_t bestValue = -1;
|
||||
// loops while 'n' points to an exception.
|
||||
utext_setNativeIndex(fText.getAlias(), n); // from n..
|
||||
fData->fBackwardsTrie->reset();
|
||||
UChar32 uch;
|
||||
|
||||
//if(debug2) u_printf(" n@ %d\n", n);
|
||||
// Assume a space is following the '.' (so we handle the case: "Mr. /Brown")
|
||||
if((uch=utext_previous32(fText.getAlias()))==(UChar32)0x0020) { // TODO: skip a class of chars here??
|
||||
if(utext_previous32(fText.getAlias())==u' ') { // TODO: skip a class of chars here??
|
||||
// TODO only do this the 1st time?
|
||||
//if(debug2) u_printf("skipping prev: |%C| \n", (UChar)uch);
|
||||
} else {
|
||||
//if(debug2) u_printf("not skipping prev: |%C| \n", (UChar)uch);
|
||||
uch = utext_next32(fText.getAlias());
|
||||
utext_next32(fText.getAlias());
|
||||
//if(debug2) u_printf(" -> : |%C| \n", (UChar)uch);
|
||||
}
|
||||
|
||||
UStringTrieResult r = USTRINGTRIE_INTERMEDIATE_VALUE;
|
||||
|
||||
while((uch=utext_previous32(fText.getAlias()))!=U_SENTINEL && // more to consume backwards and..
|
||||
USTRINGTRIE_HAS_NEXT(r=fData->fBackwardsTrie->nextForCodePoint(uch))) {// more in the trie
|
||||
{
|
||||
// Do not modify the shared trie!
|
||||
UCharsTrie iter(fData->getBackwardsTrie());
|
||||
UChar32 uch;
|
||||
while((uch=utext_previous32(fText.getAlias()))!=U_SENTINEL) { // more to consume backwards
|
||||
UStringTrieResult r = iter.nextForCodePoint(uch);
|
||||
if(USTRINGTRIE_HAS_VALUE(r)) { // remember the best match so far
|
||||
bestPosn = utext_getNativeIndex(fText.getAlias());
|
||||
bestValue = fData->fBackwardsTrie->getValue();
|
||||
bestValue = iter.getValue();
|
||||
}
|
||||
if(!USTRINGTRIE_HAS_NEXT(r)) {
|
||||
break;
|
||||
}
|
||||
//if(debug2) u_printf("rev< /%C/ cont?%d @%d\n", (UChar)uch, r, utext_getNativeIndex(fText.getAlias()));
|
||||
}
|
||||
|
||||
if(USTRINGTRIE_MATCHES(r)) { // exact match?
|
||||
//if(debug2) u_printf("rev<?/%C/?end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (UChar)uch, r, bestPosn, bestValue);
|
||||
bestValue = fData->fBackwardsTrie->getValue();
|
||||
bestPosn = utext_getNativeIndex(fText.getAlias());
|
||||
//if(debug2) u_printf("rev<+/%C/+end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (UChar)uch, r, bestPosn, bestValue);
|
||||
}
|
||||
|
||||
//if(bestValue >= 0) {
|
||||
//if(debug2) u_printf("rev<+/%C/+end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (UChar)uch, r, bestPosn, bestValue);
|
||||
//}
|
||||
|
||||
if(bestPosn>=0) {
|
||||
//if(debug2) u_printf("rev< /%C/ end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (UChar)uch, r, bestPosn, bestValue);
|
||||
|
||||
//if(USTRINGTRIE_MATCHES(r)) { // matched - so, now what?
|
||||
//int32_t bestValue = fBackwardsTrie->getValue();
|
||||
//int32_t bestValue = iter.getValue();
|
||||
////if(debug2) u_printf("rev< /%C/ matched, skip..%d bestValue=%d\n", (UChar)uch, r, bestValue);
|
||||
|
||||
if(bestValue == kMATCH) { // exact match!
|
||||
//if(debug2) u_printf(" exact backward match\n");
|
||||
return kExceptionHere; // See if the next is another exception.
|
||||
} else if(bestValue == kPARTIAL
|
||||
&& fData->fForwardsPartialTrie.isValid()) { // make sure there's a forward trie
|
||||
&& fData->hasForwardsPartialTrie()) { // make sure there's a forward trie
|
||||
//if(debug2) u_printf(" partial backward match\n");
|
||||
// We matched the "Ph." in "Ph.D." - now we need to run everything through the forwards trie
|
||||
// to see if it matches something going forward.
|
||||
fData->fForwardsPartialTrie->reset();
|
||||
UStringTrieResult rfwd = USTRINGTRIE_INTERMEDIATE_VALUE;
|
||||
utext_setNativeIndex(fText.getAlias(), bestPosn); // hope that's close ..
|
||||
//if(debug2) u_printf("Retrying at %d\n", bestPosn);
|
||||
// Do not modify the shared trie!
|
||||
UCharsTrie iter(fData->getForwardsPartialTrie());
|
||||
UChar32 uch;
|
||||
while((uch=utext_next32(fText.getAlias()))!=U_SENTINEL &&
|
||||
USTRINGTRIE_HAS_NEXT(rfwd=fData->fForwardsPartialTrie->nextForCodePoint(uch))) {
|
||||
USTRINGTRIE_HAS_NEXT(rfwd=iter.nextForCodePoint(uch))) {
|
||||
//if(debug2) u_printf("fwd> /%C/ cont?%d @%d\n", (UChar)uch, rfwd, utext_getNativeIndex(fText.getAlias()));
|
||||
}
|
||||
if(USTRINGTRIE_MATCHES(rfwd)) {
|
||||
@ -339,7 +365,7 @@ SimpleFilteredSentenceBreakIterator::breakExceptionAt(int32_t n) {
|
||||
int32_t
|
||||
SimpleFilteredSentenceBreakIterator::internalNext(int32_t n) {
|
||||
if(n == UBRK_DONE || // at end or
|
||||
fData->fBackwardsTrie.isNull()) { // .. no backwards table loaded == no exceptions
|
||||
!fData->hasBackwardsTrie()) { // .. no backwards table loaded == no exceptions
|
||||
return n;
|
||||
}
|
||||
// OK, do we need to break here?
|
||||
@ -369,7 +395,7 @@ SimpleFilteredSentenceBreakIterator::internalNext(int32_t n) {
|
||||
int32_t
|
||||
SimpleFilteredSentenceBreakIterator::internalPrev(int32_t n) {
|
||||
if(n == 0 || n == UBRK_DONE || // at end or
|
||||
fData->fBackwardsTrie.isNull()) { // .. no backwards table loaded == no exceptions
|
||||
!fData->hasBackwardsTrie()) { // .. no backwards table loaded == no exceptions
|
||||
return n;
|
||||
}
|
||||
// OK, do we need to break here?
|
||||
@ -420,7 +446,7 @@ SimpleFilteredSentenceBreakIterator::previous(void) {
|
||||
UBool SimpleFilteredSentenceBreakIterator::isBoundary(int32_t offset) {
|
||||
if (!fDelegate->isBoundary(offset)) return false; // no break to suppress
|
||||
|
||||
if (fData->fBackwardsTrie.isNull()) return true; // no data = no suppressions
|
||||
if (!fData->hasBackwardsTrie()) return true; // no data = no suppressions
|
||||
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
resetState(status);
|
||||
@ -456,14 +482,14 @@ SimpleFilteredSentenceBreakIterator::last(void) {
|
||||
/**
|
||||
* Concrete implementation of builder class.
|
||||
*/
|
||||
class U_COMMON_API SimpleFilteredBreakIteratorBuilder : public FilteredBreakIteratorBuilder {
|
||||
class SimpleFilteredBreakIteratorBuilder : public FilteredBreakIteratorBuilder {
|
||||
public:
|
||||
virtual ~SimpleFilteredBreakIteratorBuilder();
|
||||
SimpleFilteredBreakIteratorBuilder(const Locale &fromLocale, UErrorCode &status);
|
||||
SimpleFilteredBreakIteratorBuilder(UErrorCode &status);
|
||||
virtual UBool suppressBreakAfter(const UnicodeString& exception, UErrorCode& status);
|
||||
virtual UBool unsuppressBreakAfter(const UnicodeString& exception, UErrorCode& status);
|
||||
virtual BreakIterator *build(BreakIterator* adoptBreakIterator, UErrorCode& status);
|
||||
virtual UBool suppressBreakAfter(const UnicodeString& exception, UErrorCode& status) override;
|
||||
virtual UBool unsuppressBreakAfter(const UnicodeString& exception, UErrorCode& status) override;
|
||||
virtual BreakIterator *build(BreakIterator* adoptBreakIterator, UErrorCode& status) override;
|
||||
private:
|
||||
UStringSet fSet;
|
||||
};
|
||||
@ -588,11 +614,11 @@ SimpleFilteredBreakIteratorBuilder::build(BreakIterator* adoptBreakIterator, UEr
|
||||
i++) {
|
||||
const UnicodeString *abbr = fSet.getStringAt(i);
|
||||
if(abbr) {
|
||||
FB_TRACE("build",abbr,TRUE,i);
|
||||
FB_TRACE("build",abbr,true,i);
|
||||
ustrs[n] = *abbr; // copy by value
|
||||
FB_TRACE("ustrs[n]",&ustrs[n],TRUE,i);
|
||||
FB_TRACE("ustrs[n]",&ustrs[n],true,i);
|
||||
} else {
|
||||
FB_TRACE("build",abbr,FALSE,i);
|
||||
FB_TRACE("build",abbr,false,i);
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
@ -603,37 +629,37 @@ SimpleFilteredBreakIteratorBuilder::build(BreakIterator* adoptBreakIterator, UEr
|
||||
for(int i=0;i<subCount;i++) {
|
||||
int nn = ustrs[i].indexOf(kFULLSTOP); // TODO: non-'.' abbreviations
|
||||
if(nn>-1 && (nn+1)!=ustrs[i].length()) {
|
||||
FB_TRACE("partial",&ustrs[i],FALSE,i);
|
||||
FB_TRACE("partial",&ustrs[i],false,i);
|
||||
// is partial.
|
||||
// is it unique?
|
||||
int sameAs = -1;
|
||||
for(int j=0;j<subCount;j++) {
|
||||
if(j==i) continue;
|
||||
if(ustrs[i].compare(0,nn+1,ustrs[j],0,nn+1)==0) {
|
||||
FB_TRACE("prefix",&ustrs[j],FALSE,nn+1);
|
||||
FB_TRACE("prefix",&ustrs[j],false,nn+1);
|
||||
//UBool otherIsPartial = ((nn+1)!=ustrs[j].length()); // true if ustrs[j] doesn't end at nn
|
||||
if(partials[j]==0) { // hasn't been processed yet
|
||||
partials[j] = kSuppressInReverse | kAddToForward;
|
||||
FB_TRACE("suppressing",&ustrs[j],FALSE,j);
|
||||
FB_TRACE("suppressing",&ustrs[j],false,j);
|
||||
} else if(partials[j] & kSuppressInReverse) {
|
||||
sameAs = j; // the other entry is already in the reverse table.
|
||||
}
|
||||
}
|
||||
}
|
||||
FB_TRACE("for partial same-",&ustrs[i],FALSE,sameAs);
|
||||
FB_TRACE(" == partial #",&ustrs[i],FALSE,partials[i]);
|
||||
FB_TRACE("for partial same-",&ustrs[i],false,sameAs);
|
||||
FB_TRACE(" == partial #",&ustrs[i],false,partials[i]);
|
||||
UnicodeString prefix(ustrs[i], 0, nn+1);
|
||||
if(sameAs == -1 && partials[i] == 0) {
|
||||
// first one - add the prefix to the reverse table.
|
||||
prefix.reverse();
|
||||
builder->add(prefix, kPARTIAL, status);
|
||||
revCount++;
|
||||
FB_TRACE("Added partial",&prefix,FALSE, i);
|
||||
FB_TRACE(u_errorName(status),&ustrs[i],FALSE,i);
|
||||
FB_TRACE("Added partial",&prefix,false, i);
|
||||
FB_TRACE(u_errorName(status),&ustrs[i],false,i);
|
||||
partials[i] = kSuppressInReverse | kAddToForward;
|
||||
} else {
|
||||
FB_TRACE("NOT adding partial",&prefix,FALSE, i);
|
||||
FB_TRACE(u_errorName(status),&ustrs[i],FALSE,i);
|
||||
FB_TRACE("NOT adding partial",&prefix,false, i);
|
||||
FB_TRACE(u_errorName(status),&ustrs[i],false,i);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -642,9 +668,9 @@ SimpleFilteredBreakIteratorBuilder::build(BreakIterator* adoptBreakIterator, UEr
|
||||
ustrs[i].reverse();
|
||||
builder->add(ustrs[i], kMATCH, status);
|
||||
revCount++;
|
||||
FB_TRACE(u_errorName(status), &ustrs[i], FALSE, i);
|
||||
FB_TRACE(u_errorName(status), &ustrs[i], false, i);
|
||||
} else {
|
||||
FB_TRACE("Adding fwd",&ustrs[i], FALSE, i);
|
||||
FB_TRACE("Adding fwd",&ustrs[i], false, i);
|
||||
|
||||
// an optimization would be to only add the portion after the '.'
|
||||
// for example, for "Ph.D." we store ".hP" in the reverse table. We could just store "D." in the forward,
|
||||
@ -656,12 +682,12 @@ SimpleFilteredBreakIteratorBuilder::build(BreakIterator* adoptBreakIterator, UEr
|
||||
////if(debug2) u_printf("SUPPRESS- not Added(%d): /%S/ status=%s\n",partials[i], ustrs[i].getTerminatedBuffer(), u_errorName(status));
|
||||
}
|
||||
}
|
||||
FB_TRACE("AbbrCount",NULL,FALSE, subCount);
|
||||
FB_TRACE("AbbrCount",NULL,false, subCount);
|
||||
|
||||
if(revCount>0) {
|
||||
backwardsTrie.adoptInstead(builder->build(USTRINGTRIE_BUILD_FAST, status));
|
||||
if(U_FAILURE(status)) {
|
||||
FB_TRACE(u_errorName(status),NULL,FALSE, -1);
|
||||
FB_TRACE(u_errorName(status),NULL,false, -1);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
@ -669,7 +695,7 @@ SimpleFilteredBreakIteratorBuilder::build(BreakIterator* adoptBreakIterator, UEr
|
||||
if(fwdCount>0) {
|
||||
forwardsPartialTrie.adoptInstead(builder2->build(USTRINGTRIE_BUILD_FAST, status));
|
||||
if(U_FAILURE(status)) {
|
||||
FB_TRACE(u_errorName(status),NULL,FALSE, -1);
|
||||
FB_TRACE(u_errorName(status),NULL,false, -1);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
@ -1,14 +0,0 @@
|
||||
filteredbrk.o filteredbrk.d : filteredbrk.cpp unicode/utypes.h unicode/umachine.h \
|
||||
unicode/ptypes.h unicode/platform.h unicode/uconfig.h \
|
||||
unicode/uvernum.h unicode/urename.h unicode/uversion.h cmemory.h \
|
||||
unicode/localpointer.h unicode/uobject.h unicode/filteredbrk.h \
|
||||
unicode/brkiter.h unicode/unistr.h unicode/char16ptr.h unicode/rep.h \
|
||||
unicode/std_string.h unicode/stringpiece.h unicode/bytestream.h \
|
||||
unicode/chariter.h unicode/locid.h unicode/strenum.h unicode/putil.h \
|
||||
unicode/uloc.h unicode/uenum.h unicode/ubrk.h unicode/utext.h \
|
||||
unicode/uchar.h unicode/stringoptions.h unicode/ucpmap.h \
|
||||
unicode/parseerr.h unicode/umisc.h unicode/ucharstriebuilder.h \
|
||||
unicode/stringtriebuilder.h unicode/ucharstrie.h unicode/ustringtrie.h \
|
||||
unicode/ures.h uresimp.h uresdata.h unicode/udata.h putilimp.h \
|
||||
udataswp.h resource.h restrace.h ubrkimpl.h uvector.h uarrsort.h \
|
||||
uelement.h
|
@ -137,14 +137,14 @@ UnicodeString &
|
||||
FilteredNormalizer2::normalizeSecondAndAppend(UnicodeString &first,
|
||||
const UnicodeString &second,
|
||||
UErrorCode &errorCode) const {
|
||||
return normalizeSecondAndAppend(first, second, TRUE, errorCode);
|
||||
return normalizeSecondAndAppend(first, second, true, errorCode);
|
||||
}
|
||||
|
||||
UnicodeString &
|
||||
FilteredNormalizer2::append(UnicodeString &first,
|
||||
const UnicodeString &second,
|
||||
UErrorCode &errorCode) const {
|
||||
return normalizeSecondAndAppend(first, second, FALSE, errorCode);
|
||||
return normalizeSecondAndAppend(first, second, false, errorCode);
|
||||
}
|
||||
|
||||
UnicodeString &
|
||||
@ -224,7 +224,7 @@ UBool
|
||||
FilteredNormalizer2::isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
|
||||
uprv_checkCanGetBuffer(s, errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return FALSE;
|
||||
return false;
|
||||
}
|
||||
USetSpanCondition spanCondition=USET_SPAN_SIMPLE;
|
||||
for(int32_t prevSpanLimit=0; prevSpanLimit<s.length();) {
|
||||
@ -235,19 +235,19 @@ FilteredNormalizer2::isNormalized(const UnicodeString &s, UErrorCode &errorCode)
|
||||
if( !norm2.isNormalized(s.tempSubStringBetween(prevSpanLimit, spanLimit), errorCode) ||
|
||||
U_FAILURE(errorCode)
|
||||
) {
|
||||
return FALSE;
|
||||
return false;
|
||||
}
|
||||
spanCondition=USET_SPAN_NOT_CONTAINED;
|
||||
}
|
||||
prevSpanLimit=spanLimit;
|
||||
}
|
||||
return TRUE;
|
||||
return true;
|
||||
}
|
||||
|
||||
UBool
|
||||
FilteredNormalizer2::isNormalizedUTF8(StringPiece sp, UErrorCode &errorCode) const {
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return FALSE;
|
||||
return false;
|
||||
}
|
||||
const char *s = sp.data();
|
||||
int32_t length = sp.length();
|
||||
@ -259,14 +259,14 @@ FilteredNormalizer2::isNormalizedUTF8(StringPiece sp, UErrorCode &errorCode) con
|
||||
} else {
|
||||
if (!norm2.isNormalizedUTF8(StringPiece(s, spanLength), errorCode) ||
|
||||
U_FAILURE(errorCode)) {
|
||||
return FALSE;
|
||||
return false;
|
||||
}
|
||||
spanCondition = USET_SPAN_NOT_CONTAINED;
|
||||
}
|
||||
s += spanLength;
|
||||
length -= spanLength;
|
||||
}
|
||||
return TRUE;
|
||||
return true;
|
||||
}
|
||||
|
||||
UNormalizationCheckResult
|
||||
|
@ -1,10 +0,0 @@
|
||||
filterednormalizer2.o filterednormalizer2.d : filterednormalizer2.cpp unicode/utypes.h \
|
||||
unicode/umachine.h unicode/ptypes.h unicode/platform.h \
|
||||
unicode/uconfig.h unicode/uvernum.h unicode/urename.h \
|
||||
unicode/uversion.h unicode/edits.h unicode/uobject.h \
|
||||
unicode/normalizer2.h unicode/stringpiece.h unicode/std_string.h \
|
||||
unicode/uniset.h unicode/ucpmap.h unicode/unifilt.h unicode/unifunct.h \
|
||||
unicode/unimatch.h unicode/unistr.h unicode/char16ptr.h unicode/rep.h \
|
||||
unicode/bytestream.h unicode/uset.h unicode/uchar.h \
|
||||
unicode/stringoptions.h unicode/localpointer.h unicode/unorm2.h \
|
||||
unicode/unorm.h unicode/uiter.h cpputils.h cmemory.h
|
@ -85,16 +85,22 @@ public:
|
||||
|
||||
inline int32_t puti(const UnicodeString& key, int32_t value, UErrorCode& status);
|
||||
|
||||
inline int32_t putiAllowZero(const UnicodeString& key, int32_t value, UErrorCode& status);
|
||||
|
||||
inline void* get(const UnicodeString& key) const;
|
||||
|
||||
inline int32_t geti(const UnicodeString& key) const;
|
||||
|
||||
inline int32_t getiAndFound(const UnicodeString& key, UBool &found) const;
|
||||
|
||||
inline void* remove(const UnicodeString& key);
|
||||
|
||||
inline int32_t removei(const UnicodeString& key);
|
||||
|
||||
inline void removeAll(void);
|
||||
|
||||
inline UBool containsKey(const UnicodeString& key) const;
|
||||
|
||||
inline const UHashElement* find(const UnicodeString& key) const;
|
||||
|
||||
/**
|
||||
@ -109,8 +115,8 @@ public:
|
||||
|
||||
inline UBool equals(const Hashtable& that) const;
|
||||
private:
|
||||
Hashtable(const Hashtable &other); // forbid copying of this class
|
||||
Hashtable &operator=(const Hashtable &other); // forbid copying of this class
|
||||
Hashtable(const Hashtable &other) = delete; // forbid copying of this class
|
||||
Hashtable &operator=(const Hashtable &other) = delete; // forbid copying of this class
|
||||
};
|
||||
|
||||
/*********************************************************************
|
||||
@ -203,6 +209,11 @@ inline int32_t Hashtable::puti(const UnicodeString& key, int32_t value, UErrorCo
|
||||
return uhash_puti(hash, new UnicodeString(key), value, &status);
|
||||
}
|
||||
|
||||
inline int32_t Hashtable::putiAllowZero(const UnicodeString& key, int32_t value,
|
||||
UErrorCode& status) {
|
||||
return uhash_putiAllowZero(hash, new UnicodeString(key), value, &status);
|
||||
}
|
||||
|
||||
inline void* Hashtable::get(const UnicodeString& key) const {
|
||||
return uhash_get(hash, &key);
|
||||
}
|
||||
@ -211,6 +222,10 @@ inline int32_t Hashtable::geti(const UnicodeString& key) const {
|
||||
return uhash_geti(hash, &key);
|
||||
}
|
||||
|
||||
inline int32_t Hashtable::getiAndFound(const UnicodeString& key, UBool &found) const {
|
||||
return uhash_getiAndFound(hash, &key, &found);
|
||||
}
|
||||
|
||||
inline void* Hashtable::remove(const UnicodeString& key) {
|
||||
return uhash_remove(hash, &key);
|
||||
}
|
||||
@ -219,6 +234,10 @@ inline int32_t Hashtable::removei(const UnicodeString& key) {
|
||||
return uhash_removei(hash, &key);
|
||||
}
|
||||
|
||||
inline UBool Hashtable::containsKey(const UnicodeString& key) const {
|
||||
return uhash_containsKey(hash, &key);
|
||||
}
|
||||
|
||||
inline const UHashElement* Hashtable::find(const UnicodeString& key) const {
|
||||
return uhash_find(hash, &key);
|
||||
}
|
||||
|
@ -1,9 +0,0 @@
|
||||
icudataver.o icudataver.d : icudataver.cpp unicode/utypes.h unicode/umachine.h \
|
||||
unicode/ptypes.h unicode/platform.h unicode/uconfig.h \
|
||||
unicode/uvernum.h unicode/urename.h unicode/uversion.h \
|
||||
unicode/icudataver.h unicode/ures.h unicode/uloc.h unicode/uenum.h \
|
||||
unicode/localpointer.h unicode/unistr.h unicode/char16ptr.h \
|
||||
unicode/rep.h unicode/uobject.h unicode/std_string.h \
|
||||
unicode/stringpiece.h unicode/bytestream.h uresimp.h uresdata.h \
|
||||
unicode/udata.h putilimp.h unicode/putil.h udataswp.h resource.h \
|
||||
restrace.h
|
@ -59,8 +59,8 @@ struct UPlugData {
|
||||
void *context; /**< user context data */
|
||||
char name[UPLUG_NAME_MAX]; /**< name of plugin */
|
||||
UPlugLevel level; /**< level of plugin */
|
||||
UBool awaitingLoad; /**< TRUE if the plugin is awaiting a load call */
|
||||
UBool dontUnload; /**< TRUE if plugin must stay resident (leak plugin and lib) */
|
||||
UBool awaitingLoad; /**< true if the plugin is awaiting a load call */
|
||||
UBool dontUnload; /**< true if plugin must stay resident (leak plugin and lib) */
|
||||
UErrorCode pluginStatus; /**< status code of plugin */
|
||||
};
|
||||
|
||||
@ -284,7 +284,7 @@ static void uplug_callPlug(UPlugData *plug, UPlugReason reason, UErrorCode *stat
|
||||
|
||||
|
||||
static void uplug_unloadPlug(UPlugData *plug, UErrorCode *status) {
|
||||
if(plug->awaitingLoad) { /* shouldn't happen. Plugin hasn'tbeen loaded yet.*/
|
||||
if(plug->awaitingLoad) { /* shouldn't happen. Plugin hasn't been loaded yet.*/
|
||||
*status = U_INTERNAL_PROGRAM_ERROR;
|
||||
return;
|
||||
}
|
||||
@ -295,7 +295,7 @@ static void uplug_unloadPlug(UPlugData *plug, UErrorCode *status) {
|
||||
}
|
||||
|
||||
static void uplug_queryPlug(UPlugData *plug, UErrorCode *status) {
|
||||
if(!plug->awaitingLoad || !(plug->level == UPLUG_LEVEL_UNKNOWN) ) { /* shouldn't happen. Plugin hasn'tbeen loaded yet.*/
|
||||
if(!plug->awaitingLoad || !(plug->level == UPLUG_LEVEL_UNKNOWN) ) { /* shouldn't happen. Plugin hasn't been loaded yet.*/
|
||||
*status = U_INTERNAL_PROGRAM_ERROR;
|
||||
return;
|
||||
}
|
||||
@ -304,11 +304,11 @@ static void uplug_queryPlug(UPlugData *plug, UErrorCode *status) {
|
||||
if(U_SUCCESS(*status)) {
|
||||
if(plug->level == UPLUG_LEVEL_INVALID) {
|
||||
plug->pluginStatus = U_PLUGIN_DIDNT_SET_LEVEL;
|
||||
plug->awaitingLoad = FALSE;
|
||||
plug->awaitingLoad = false;
|
||||
}
|
||||
} else {
|
||||
plug->pluginStatus = U_INTERNAL_PROGRAM_ERROR;
|
||||
plug->awaitingLoad = FALSE;
|
||||
plug->awaitingLoad = false;
|
||||
}
|
||||
}
|
||||
|
||||
@ -317,12 +317,12 @@ static void uplug_loadPlug(UPlugData *plug, UErrorCode *status) {
|
||||
if(U_FAILURE(*status)) {
|
||||
return;
|
||||
}
|
||||
if(!plug->awaitingLoad || (plug->level < UPLUG_LEVEL_LOW) ) { /* shouldn't happen. Plugin hasn'tbeen loaded yet.*/
|
||||
if(!plug->awaitingLoad || (plug->level < UPLUG_LEVEL_LOW) ) { /* shouldn't happen. Plugin hasn't been loaded yet.*/
|
||||
*status = U_INTERNAL_PROGRAM_ERROR;
|
||||
return;
|
||||
}
|
||||
uplug_callPlug(plug, UPLUG_REASON_LOAD, status);
|
||||
plug->awaitingLoad = FALSE;
|
||||
plug->awaitingLoad = false;
|
||||
if(!U_SUCCESS(*status)) {
|
||||
plug->pluginStatus = U_INTERNAL_PROGRAM_ERROR;
|
||||
}
|
||||
@ -347,8 +347,8 @@ static UPlugData *uplug_allocateEmptyPlug(UErrorCode *status)
|
||||
plug->structSize = sizeof(UPlugData);
|
||||
plug->name[0]=0;
|
||||
plug->level = UPLUG_LEVEL_UNKNOWN; /* initialize to null state */
|
||||
plug->awaitingLoad = TRUE;
|
||||
plug->dontUnload = FALSE;
|
||||
plug->awaitingLoad = true;
|
||||
plug->dontUnload = false;
|
||||
plug->pluginStatus = U_ZERO_ERROR;
|
||||
plug->libName[0] = 0;
|
||||
plug->config[0]=0;
|
||||
@ -403,9 +403,9 @@ static void uplug_deallocatePlug(UPlugData *plug, UErrorCode *status) {
|
||||
pluginCount = uplug_removeEntryAt(pluginList, pluginCount, sizeof(plug[0]), uplug_pluginNumber(plug));
|
||||
} else {
|
||||
/* not ok- leave as a message. */
|
||||
plug->awaitingLoad=FALSE;
|
||||
plug->awaitingLoad=false;
|
||||
plug->entrypoint=0;
|
||||
plug->dontUnload=TRUE;
|
||||
plug->dontUnload=true;
|
||||
}
|
||||
}
|
||||
|
||||
@ -526,7 +526,7 @@ uplug_getPlugLoadStatus(UPlugData *plug) {
|
||||
|
||||
|
||||
/**
|
||||
* Initialize a plugin fron an entrypoint and library - but don't load it.
|
||||
* Initialize a plugin from an entrypoint and library - but don't load it.
|
||||
*/
|
||||
static UPlugData* uplug_initPlugFromEntrypointAndLibrary(UPlugEntrypoint *entrypoint, const char *config, void *lib, const char *sym,
|
||||
UErrorCode *status) {
|
||||
@ -558,8 +558,8 @@ uplug_initErrorPlug(const char *libName, const char *sym, const char *config, co
|
||||
if(U_FAILURE(*status)) return NULL;
|
||||
|
||||
plug->pluginStatus = loadStatus;
|
||||
plug->awaitingLoad = FALSE; /* Won't load. */
|
||||
plug->dontUnload = TRUE; /* cannot unload. */
|
||||
plug->awaitingLoad = false; /* Won't load. */
|
||||
plug->dontUnload = true; /* cannot unload. */
|
||||
|
||||
if(sym!=NULL) {
|
||||
uprv_strncpy(plug->sym, sym, UPLUG_NAME_MAX);
|
||||
@ -646,7 +646,7 @@ static UBool U_CALLCONV uplug_cleanup(void)
|
||||
}
|
||||
/* close other held libs? */
|
||||
gCurrentLevel = UPLUG_LEVEL_LOW;
|
||||
return TRUE;
|
||||
return true;
|
||||
}
|
||||
|
||||
#if U_ENABLE_DYLOAD
|
||||
@ -678,7 +678,7 @@ static void uplug_loadWaitingPlugs(UErrorCode *status) {
|
||||
currentLevel = newLevel;
|
||||
}
|
||||
}
|
||||
pluginToLoad->awaitingLoad = FALSE;
|
||||
pluginToLoad->awaitingLoad = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -694,7 +694,7 @@ static void uplug_loadWaitingPlugs(UErrorCode *status) {
|
||||
} else {
|
||||
uplug_loadPlug(pluginToLoad, &subStatus);
|
||||
}
|
||||
pluginToLoad->awaitingLoad = FALSE;
|
||||
pluginToLoad->awaitingLoad = false;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1,4 +0,0 @@
|
||||
icuplug.o icuplug.d : icuplug.cpp unicode/icuplug.h unicode/utypes.h \
|
||||
unicode/umachine.h unicode/ptypes.h unicode/platform.h \
|
||||
unicode/uconfig.h unicode/uvernum.h unicode/urename.h \
|
||||
unicode/uversion.h
|
@ -67,9 +67,9 @@ LoadedNormalizer2Impl::isAcceptable(void * /*context*/,
|
||||
) {
|
||||
// Normalizer2Impl *me=(Normalizer2Impl *)context;
|
||||
// uprv_memcpy(me->dataVersion, pInfo->dataVersion, 4);
|
||||
return TRUE;
|
||||
return true;
|
||||
} else {
|
||||
return FALSE;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
@ -134,14 +134,14 @@ U_CDECL_END
|
||||
|
||||
#if !NORM2_HARDCODE_NFC_DATA
|
||||
static Norm2AllModes *nfcSingleton;
|
||||
static icu::UInitOnce nfcInitOnce = U_INITONCE_INITIALIZER;
|
||||
static icu::UInitOnce nfcInitOnce {};
|
||||
#endif
|
||||
|
||||
static Norm2AllModes *nfkcSingleton;
|
||||
static icu::UInitOnce nfkcInitOnce = U_INITONCE_INITIALIZER;
|
||||
static icu::UInitOnce nfkcInitOnce {};
|
||||
|
||||
static Norm2AllModes *nfkc_cfSingleton;
|
||||
static icu::UInitOnce nfkc_cfInitOnce = U_INITONCE_INITIALIZER;
|
||||
static icu::UInitOnce nfkc_cfInitOnce {};
|
||||
|
||||
static UHashtable *cache=NULL;
|
||||
|
||||
@ -157,7 +157,7 @@ static void U_CALLCONV initSingletons(const char *what, UErrorCode &errorCode) {
|
||||
} else if (uprv_strcmp(what, "nfkc_cf") == 0) {
|
||||
nfkc_cfSingleton = Norm2AllModes::createInstance(NULL, "nfkc_cf", errorCode);
|
||||
} else {
|
||||
UPRV_UNREACHABLE; // Unknown singleton
|
||||
UPRV_UNREACHABLE_EXIT; // Unknown singleton
|
||||
}
|
||||
ucln_common_registerCleanup(UCLN_COMMON_LOADED_NORMALIZER2, uprv_loaded_normalizer2_cleanup);
|
||||
}
|
||||
@ -185,7 +185,7 @@ static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup() {
|
||||
|
||||
uhash_close(cache);
|
||||
cache=NULL;
|
||||
return TRUE;
|
||||
return true;
|
||||
}
|
||||
|
||||
U_CDECL_END
|
||||
|
@ -1,15 +0,0 @@
|
||||
loadednormalizer2impl.o loadednormalizer2impl.d : loadednormalizer2impl.cpp unicode/utypes.h \
|
||||
unicode/umachine.h unicode/ptypes.h unicode/platform.h \
|
||||
unicode/uconfig.h unicode/uvernum.h unicode/urename.h \
|
||||
unicode/uversion.h unicode/udata.h unicode/localpointer.h \
|
||||
unicode/normalizer2.h unicode/stringpiece.h unicode/uobject.h \
|
||||
unicode/std_string.h unicode/uniset.h unicode/ucpmap.h \
|
||||
unicode/unifilt.h unicode/unifunct.h unicode/unimatch.h \
|
||||
unicode/unistr.h unicode/char16ptr.h unicode/rep.h \
|
||||
unicode/bytestream.h unicode/uset.h unicode/uchar.h \
|
||||
unicode/stringoptions.h unicode/unorm2.h unicode/ucptrie.h \
|
||||
unicode/utf8.h unicode/utf.h unicode/unorm.h unicode/uiter.h cstring.h \
|
||||
cmemory.h mutex.h umutex.h unicode/uclean.h putilimp.h unicode/putil.h \
|
||||
norm2allmodes.h unicode/edits.h cpputils.h normalizer2impl.h \
|
||||
unicode/utf16.h udataswp.h uset_imp.h uassert.h ucln_cmn.h ucln.h \
|
||||
uhash.h uelement.h
|
@ -15,7 +15,7 @@ U_NAMESPACE_BEGIN
|
||||
#define UPRV_ISDIGIT(c) (((c) >= '0') && ((c) <= '9'))
|
||||
#define UPRV_ISALPHANUM(c) (uprv_isASCIILetter(c) || UPRV_ISDIGIT(c) )
|
||||
|
||||
const char* kAttributeKey = "attribute";
|
||||
constexpr const char* kAttributeKey = "attribute";
|
||||
|
||||
static bool _isExtensionSubtags(char key, const char* s, int32_t len) {
|
||||
switch (uprv_tolower(key)) {
|
||||
@ -228,7 +228,7 @@ LocaleBuilder& LocaleBuilder::setExtension(char key, StringPiece value)
|
||||
return *this;
|
||||
}
|
||||
if (extensions_ == nullptr) {
|
||||
extensions_ = new Locale();
|
||||
extensions_ = Locale::getRoot().clone();
|
||||
if (extensions_ == nullptr) {
|
||||
status_ = U_MEMORY_ALLOCATION_ERROR;
|
||||
return *this;
|
||||
@ -259,12 +259,12 @@ LocaleBuilder& LocaleBuilder::setUnicodeLocaleKeyword(
|
||||
return *this;
|
||||
}
|
||||
if (extensions_ == nullptr) {
|
||||
extensions_ = new Locale();
|
||||
}
|
||||
extensions_ = Locale::getRoot().clone();
|
||||
if (extensions_ == nullptr) {
|
||||
status_ = U_MEMORY_ALLOCATION_ERROR;
|
||||
return *this;
|
||||
}
|
||||
}
|
||||
extensions_->setUnicodeKeywordValue(key, type, status_);
|
||||
return *this;
|
||||
}
|
||||
@ -280,7 +280,7 @@ LocaleBuilder& LocaleBuilder::addUnicodeLocaleAttribute(
|
||||
return *this;
|
||||
}
|
||||
if (extensions_ == nullptr) {
|
||||
extensions_ = new Locale();
|
||||
extensions_ = Locale::getRoot().clone();
|
||||
if (extensions_ == nullptr) {
|
||||
status_ = U_MEMORY_ALLOCATION_ERROR;
|
||||
return *this;
|
||||
@ -415,7 +415,7 @@ void LocaleBuilder::copyExtensionsFrom(const Locale& src, UErrorCode& errorCode)
|
||||
return;
|
||||
}
|
||||
if (extensions_ == nullptr) {
|
||||
extensions_ = new Locale();
|
||||
extensions_ = Locale::getRoot().clone();
|
||||
if (extensions_ == nullptr) {
|
||||
status_ = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
@ -459,7 +459,7 @@ Locale LocaleBuilder::build(UErrorCode& errorCode)
|
||||
UBool LocaleBuilder::copyErrorTo(UErrorCode &outErrorCode) const {
|
||||
if (U_FAILURE(outErrorCode)) {
|
||||
// Do not overwrite the older error code
|
||||
return TRUE;
|
||||
return true;
|
||||
}
|
||||
outErrorCode = status_;
|
||||
return U_FAILURE(outErrorCode);
|
||||
|
@ -1,9 +0,0 @@
|
||||
localebuilder.o localebuilder.d : localebuilder.cpp bytesinkutil.h unicode/utypes.h \
|
||||
unicode/umachine.h unicode/ptypes.h unicode/platform.h \
|
||||
unicode/uconfig.h unicode/uvernum.h unicode/urename.h \
|
||||
unicode/uversion.h unicode/bytestream.h unicode/uobject.h \
|
||||
unicode/std_string.h unicode/edits.h cmemory.h unicode/localpointer.h \
|
||||
uassert.h charstr.h unicode/unistr.h unicode/char16ptr.h unicode/rep.h \
|
||||
unicode/stringpiece.h cstring.h ulocimp.h unicode/uloc.h \
|
||||
unicode/uenum.h unicode/localebuilder.h unicode/locid.h \
|
||||
unicode/strenum.h unicode/putil.h unicode/localematcher.h
|
@ -60,7 +60,7 @@ LocaleMatcher::Result::Result(LocaleMatcher::Result &&src) U_NOEXCEPT :
|
||||
if (desiredIsOwned) {
|
||||
src.desiredLocale = nullptr;
|
||||
src.desiredIndex = -1;
|
||||
src.desiredIsOwned = FALSE;
|
||||
src.desiredIsOwned = false;
|
||||
}
|
||||
}
|
||||
|
||||
@ -82,7 +82,7 @@ LocaleMatcher::Result &LocaleMatcher::Result::operator=(LocaleMatcher::Result &&
|
||||
if (desiredIsOwned) {
|
||||
src.desiredLocale = nullptr;
|
||||
src.desiredIndex = -1;
|
||||
src.desiredIsOwned = FALSE;
|
||||
src.desiredIsOwned = false;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
@ -168,12 +168,9 @@ void LocaleMatcher::Builder::clearSupportedLocales() {
|
||||
bool LocaleMatcher::Builder::ensureSupportedLocaleVector() {
|
||||
if (U_FAILURE(errorCode_)) { return false; }
|
||||
if (supportedLocales_ != nullptr) { return true; }
|
||||
supportedLocales_ = new UVector(uprv_deleteUObject, nullptr, errorCode_);
|
||||
LocalPointer<UVector> lpSupportedLocales(new UVector(uprv_deleteUObject, nullptr, errorCode_), errorCode_);
|
||||
if (U_FAILURE(errorCode_)) { return false; }
|
||||
if (supportedLocales_ == nullptr) {
|
||||
errorCode_ = U_MEMORY_ALLOCATION_ERROR;
|
||||
return false;
|
||||
}
|
||||
supportedLocales_ = lpSupportedLocales.orphan();
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -187,9 +184,8 @@ LocaleMatcher::Builder &LocaleMatcher::Builder::setSupportedLocalesFromListStrin
|
||||
for (int32_t i = 0; i < length; ++i) {
|
||||
Locale *locale = list.orphanLocaleAt(i);
|
||||
if (locale == nullptr) { continue; }
|
||||
supportedLocales_->addElement(locale, errorCode_);
|
||||
supportedLocales_->adoptElement(locale, errorCode_);
|
||||
if (U_FAILURE(errorCode_)) {
|
||||
delete locale;
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -197,35 +193,21 @@ LocaleMatcher::Builder &LocaleMatcher::Builder::setSupportedLocalesFromListStrin
|
||||
}
|
||||
|
||||
LocaleMatcher::Builder &LocaleMatcher::Builder::setSupportedLocales(Locale::Iterator &locales) {
|
||||
if (U_FAILURE(errorCode_)) { return *this; }
|
||||
if (ensureSupportedLocaleVector()) {
|
||||
clearSupportedLocales();
|
||||
if (!ensureSupportedLocaleVector()) { return *this; }
|
||||
while (locales.hasNext()) {
|
||||
while (locales.hasNext() && U_SUCCESS(errorCode_)) {
|
||||
const Locale &locale = locales.next();
|
||||
Locale *clone = locale.clone();
|
||||
if (clone == nullptr) {
|
||||
errorCode_ = U_MEMORY_ALLOCATION_ERROR;
|
||||
break;
|
||||
}
|
||||
supportedLocales_->addElement(clone, errorCode_);
|
||||
if (U_FAILURE(errorCode_)) {
|
||||
delete clone;
|
||||
break;
|
||||
LocalPointer<Locale> clone (locale.clone(), errorCode_);
|
||||
supportedLocales_->adoptElement(clone.orphan(), errorCode_);
|
||||
}
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
LocaleMatcher::Builder &LocaleMatcher::Builder::addSupportedLocale(const Locale &locale) {
|
||||
if (!ensureSupportedLocaleVector()) { return *this; }
|
||||
Locale *clone = locale.clone();
|
||||
if (clone == nullptr) {
|
||||
errorCode_ = U_MEMORY_ALLOCATION_ERROR;
|
||||
return *this;
|
||||
}
|
||||
supportedLocales_->addElement(clone, errorCode_);
|
||||
if (U_FAILURE(errorCode_)) {
|
||||
delete clone;
|
||||
if (ensureSupportedLocaleVector()) {
|
||||
LocalPointer<Locale> clone(locale.clone(), errorCode_);
|
||||
supportedLocales_->adoptElement(clone.orphan(), errorCode_);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
@ -305,10 +287,10 @@ LocaleMatcher::Builder &LocaleMatcher::Builder::internalSetThresholdDistance(int
|
||||
#endif
|
||||
|
||||
UBool LocaleMatcher::Builder::copyErrorTo(UErrorCode &outErrorCode) const {
|
||||
if (U_FAILURE(outErrorCode)) { return TRUE; }
|
||||
if (U_SUCCESS(errorCode_)) { return FALSE; }
|
||||
if (U_FAILURE(outErrorCode)) { return true; }
|
||||
if (U_SUCCESS(errorCode_)) { return false; }
|
||||
outErrorCode = errorCode_;
|
||||
return TRUE;
|
||||
return true;
|
||||
}
|
||||
|
||||
LocaleMatcher LocaleMatcher::Builder::build(UErrorCode &errorCode) const {
|
||||
@ -345,9 +327,8 @@ UBool compareLSRs(const UHashTok t1, const UHashTok t2) {
|
||||
int32_t LocaleMatcher::putIfAbsent(const LSR &lsr, int32_t i, int32_t suppLength,
|
||||
UErrorCode &errorCode) {
|
||||
if (U_FAILURE(errorCode)) { return suppLength; }
|
||||
int32_t index = uhash_geti(supportedLsrToIndex, &lsr);
|
||||
if (index == 0) {
|
||||
uhash_puti(supportedLsrToIndex, const_cast<LSR *>(&lsr), i + 1, &errorCode);
|
||||
if (!uhash_containsKey(supportedLsrToIndex, &lsr)) {
|
||||
uhash_putiAllowZero(supportedLsrToIndex, const_cast<LSR *>(&lsr), i, &errorCode);
|
||||
if (U_SUCCESS(errorCode)) {
|
||||
supportedLSRs[suppLength] = &lsr;
|
||||
supportedIndexes[suppLength++] = i;
|
||||
@ -651,30 +632,30 @@ const Locale *LocaleMatcher::getBestMatchForListString(
|
||||
LocaleMatcher::Result LocaleMatcher::getBestMatchResult(
|
||||
const Locale &desiredLocale, UErrorCode &errorCode) const {
|
||||
if (U_FAILURE(errorCode)) {
|
||||
return Result(nullptr, defaultLocale, -1, -1, FALSE);
|
||||
return Result(nullptr, defaultLocale, -1, -1, false);
|
||||
}
|
||||
int32_t suppIndex = getBestSuppIndex(
|
||||
getMaximalLsrOrUnd(likelySubtags, desiredLocale, errorCode),
|
||||
nullptr, errorCode);
|
||||
if (U_FAILURE(errorCode) || suppIndex < 0) {
|
||||
return Result(nullptr, defaultLocale, -1, -1, FALSE);
|
||||
return Result(nullptr, defaultLocale, -1, -1, false);
|
||||
} else {
|
||||
return Result(&desiredLocale, supportedLocales[suppIndex], 0, suppIndex, FALSE);
|
||||
return Result(&desiredLocale, supportedLocales[suppIndex], 0, suppIndex, false);
|
||||
}
|
||||
}
|
||||
|
||||
LocaleMatcher::Result LocaleMatcher::getBestMatchResult(
|
||||
Locale::Iterator &desiredLocales, UErrorCode &errorCode) const {
|
||||
if (U_FAILURE(errorCode) || !desiredLocales.hasNext()) {
|
||||
return Result(nullptr, defaultLocale, -1, -1, FALSE);
|
||||
return Result(nullptr, defaultLocale, -1, -1, false);
|
||||
}
|
||||
LocaleLsrIterator lsrIter(likelySubtags, desiredLocales, ULOCMATCH_TEMPORARY_LOCALES);
|
||||
int32_t suppIndex = getBestSuppIndex(lsrIter.next(errorCode), &lsrIter, errorCode);
|
||||
if (U_FAILURE(errorCode) || suppIndex < 0) {
|
||||
return Result(nullptr, defaultLocale, -1, -1, FALSE);
|
||||
return Result(nullptr, defaultLocale, -1, -1, false);
|
||||
} else {
|
||||
return Result(lsrIter.orphanRemembered(), supportedLocales[suppIndex],
|
||||
lsrIter.getBestDesiredIndex(), suppIndex, TRUE);
|
||||
lsrIter.getBestDesiredIndex(), suppIndex, true);
|
||||
}
|
||||
}
|
||||
|
||||
@ -685,12 +666,11 @@ int32_t LocaleMatcher::getBestSuppIndex(LSR desiredLSR, LocaleLsrIterator *remai
|
||||
int32_t bestSupportedLsrIndex = -1;
|
||||
for (int32_t bestShiftedDistance = LocaleDistance::shiftDistance(thresholdDistance);;) {
|
||||
// Quick check for exact maximized LSR.
|
||||
// Returns suppIndex+1 where 0 means not found.
|
||||
if (supportedLsrToIndex != nullptr) {
|
||||
desiredLSR.setHashCode();
|
||||
int32_t index = uhash_geti(supportedLsrToIndex, &desiredLSR);
|
||||
if (index != 0) {
|
||||
int32_t suppIndex = index - 1;
|
||||
UBool found = false;
|
||||
int32_t suppIndex = uhash_getiAndFound(supportedLsrToIndex, &desiredLSR, &found);
|
||||
if (found) {
|
||||
if (remainingIter != nullptr) {
|
||||
remainingIter->rememberCurrent(desiredIndex, errorCode);
|
||||
}
|
||||
|
@ -1,11 +0,0 @@
|
||||
localematcher.o localematcher.d : localematcher.cpp unicode/utypes.h unicode/umachine.h \
|
||||
unicode/ptypes.h unicode/platform.h unicode/uconfig.h \
|
||||
unicode/uvernum.h unicode/urename.h unicode/uversion.h \
|
||||
unicode/localebuilder.h unicode/locid.h unicode/bytestream.h \
|
||||
unicode/uobject.h unicode/std_string.h unicode/localpointer.h \
|
||||
unicode/strenum.h unicode/unistr.h unicode/char16ptr.h unicode/rep.h \
|
||||
unicode/stringpiece.h unicode/putil.h unicode/uloc.h unicode/uenum.h \
|
||||
unicode/localematcher.h cstring.h cmemory.h localeprioritylist.h \
|
||||
loclikelysubtags.h unicode/bytestrie.h unicode/ustringtrie.h \
|
||||
unicode/ures.h charstrmap.h uhash.h uelement.h lsr.h locdistance.h \
|
||||
uassert.h ustr_imp.h unicode/utf8.h unicode/utf.h uvector.h uarrsort.h
|
@ -187,17 +187,18 @@ bool LocalePriorityList::add(const Locale &locale, int32_t weight, UErrorCode &e
|
||||
if (U_FAILURE(errorCode)) { return false; }
|
||||
}
|
||||
LocalPointer<Locale> clone;
|
||||
int32_t index = uhash_geti(map, &locale);
|
||||
if (index != 0) {
|
||||
UBool found = false;
|
||||
int32_t index = uhash_getiAndFound(map, &locale, &found);
|
||||
if (found) {
|
||||
// Duplicate: Remove the old item and append it anew.
|
||||
LocaleAndWeight &lw = list->array[index - 1];
|
||||
LocaleAndWeight &lw = list->array[index];
|
||||
clone.adoptInstead(lw.locale);
|
||||
lw.locale = nullptr;
|
||||
lw.weight = 0;
|
||||
++numRemoved;
|
||||
}
|
||||
if (weight <= 0) { // do not add q=0
|
||||
if (index != 0) {
|
||||
if (found) {
|
||||
// Not strictly necessary but cleaner.
|
||||
uhash_removei(map, &locale);
|
||||
}
|
||||
@ -217,7 +218,7 @@ bool LocalePriorityList::add(const Locale &locale, int32_t weight, UErrorCode &e
|
||||
return false;
|
||||
}
|
||||
}
|
||||
uhash_puti(map, clone.getAlias(), listLength + 1, &errorCode);
|
||||
uhash_putiAllowZero(map, clone.getAlias(), listLength, &errorCode);
|
||||
if (U_FAILURE(errorCode)) { return false; }
|
||||
LocaleAndWeight &lw = list->array[listLength];
|
||||
lw.locale = clone.orphan();
|
||||
@ -233,7 +234,7 @@ void LocalePriorityList::sort(UErrorCode &errorCode) {
|
||||
// The comparator forces a stable sort via the item index.
|
||||
if (U_FAILURE(errorCode) || getLength() <= 1 || !hasWeights) { return; }
|
||||
uprv_sortArray(list->array.getAlias(), listLength, sizeof(LocaleAndWeight),
|
||||
compareLocaleAndWeight, nullptr, FALSE, &errorCode);
|
||||
compareLocaleAndWeight, nullptr, false, &errorCode);
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
@ -1,9 +0,0 @@
|
||||
localeprioritylist.o localeprioritylist.d : localeprioritylist.cpp unicode/utypes.h \
|
||||
unicode/umachine.h unicode/ptypes.h unicode/platform.h \
|
||||
unicode/uconfig.h unicode/uvernum.h unicode/urename.h \
|
||||
unicode/uversion.h unicode/localpointer.h unicode/locid.h \
|
||||
unicode/bytestream.h unicode/uobject.h unicode/std_string.h \
|
||||
unicode/strenum.h unicode/unistr.h unicode/char16ptr.h unicode/rep.h \
|
||||
unicode/stringpiece.h unicode/putil.h unicode/uloc.h unicode/uenum.h \
|
||||
charstr.h cmemory.h localeprioritylist.h uarrsort.h uassert.h uhash.h \
|
||||
uelement.h
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user