ICU-20086 C++ sets & maps for Unicode properties (#93)
also create ucpmap.h from renamed parts of ucptrie.h
This commit is contained in:
parent
2290dba1a4
commit
82f0f480d4
@ -100,7 +100,8 @@ utf_impl.o ustring.o ustrcase.o ucasemap.o ucasemap_titlecase_brkiter.o cstring.
|
|||||||
unistr_case_locale.o ustrcase_locale.o unistr_titlecase_brkiter.o ustr_titlecase_brkiter.o \
|
unistr_case_locale.o ustrcase_locale.o unistr_titlecase_brkiter.o ustr_titlecase_brkiter.o \
|
||||||
normalizer2impl.o normalizer2.o filterednormalizer2.o normlzr.o unorm.o unormcmp.o loadednormalizer2impl.o \
|
normalizer2impl.o normalizer2.o filterednormalizer2.o normlzr.o unorm.o unormcmp.o loadednormalizer2impl.o \
|
||||||
chariter.o schriter.o uchriter.o uiter.o \
|
chariter.o schriter.o uchriter.o uiter.o \
|
||||||
patternprops.o uchar.o uprops.o ucase.o propname.o ubidi_props.o ubidi.o ubidiwrt.o ubidiln.o ushape.o \
|
patternprops.o uchar.o uprops.o ucase.o propname.o ubidi_props.o characterproperties.o \
|
||||||
|
ubidi.o ubidiwrt.o ubidiln.o ushape.o \
|
||||||
uscript.o uscript_props.o usc_impl.o unames.o \
|
uscript.o uscript_props.o usc_impl.o unames.o \
|
||||||
utrie.o utrie2.o utrie2_builder.o ucptrie.o umutablecptrie.o \
|
utrie.o utrie2.o utrie2_builder.o ucptrie.o umutablecptrie.o \
|
||||||
bmpset.o unisetspan.o uset_props.o uniset_props.o uniset_closure.o uset.o uniset.o usetiter.o ruleiter.o caniter.o unifilt.o unifunct.o \
|
bmpset.o unisetspan.o uset_props.o uniset_props.o uniset_closure.o uset.o uniset.o usetiter.o ruleiter.o caniter.o unifilt.o unifunct.o \
|
||||||
|
340
icu4c/source/common/characterproperties.cpp
Normal file
340
icu4c/source/common/characterproperties.cpp
Normal file
@ -0,0 +1,340 @@
|
|||||||
|
// © 2018 and later: Unicode, Inc. and others.
|
||||||
|
// License & terms of use: http://www.unicode.org/copyright.html
|
||||||
|
|
||||||
|
// characterproperties.cpp
|
||||||
|
// created: 2018sep03 Markus W. Scherer
|
||||||
|
|
||||||
|
#include "unicode/utypes.h"
|
||||||
|
#include "unicode/localpointer.h"
|
||||||
|
#include "unicode/uchar.h"
|
||||||
|
#include "unicode/ucpmap.h"
|
||||||
|
#include "unicode/ucptrie.h"
|
||||||
|
#include "unicode/umutablecptrie.h"
|
||||||
|
#include "unicode/uniset.h"
|
||||||
|
#include "unicode/uscript.h"
|
||||||
|
#include "unicode/uset.h"
|
||||||
|
#include "cmemory.h"
|
||||||
|
#include "mutex.h"
|
||||||
|
#include "normalizer2impl.h"
|
||||||
|
#include "uassert.h"
|
||||||
|
#include "ubidi_props.h"
|
||||||
|
#include "ucase.h"
|
||||||
|
#include "ucln_cmn.h"
|
||||||
|
#include "umutex.h"
|
||||||
|
#include "uprops.h"
|
||||||
|
|
||||||
|
using icu::UInitOnce;
|
||||||
|
using icu::UnicodeSet;
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
U_CDECL_BEGIN
|
||||||
|
|
||||||
|
UBool U_CALLCONV characterproperties_cleanup();
|
||||||
|
|
||||||
|
struct Inclusion {
|
||||||
|
UnicodeSet *fSet;
|
||||||
|
UInitOnce fInitOnce;
|
||||||
|
};
|
||||||
|
Inclusion gInclusions[UPROPS_SRC_COUNT]; // cached getInclusions()
|
||||||
|
|
||||||
|
UnicodeSet *sets[UCHAR_BINARY_LIMIT] = {};
|
||||||
|
|
||||||
|
UCPMap *maps[UCHAR_INT_LIMIT - UCHAR_INT_START] = {};
|
||||||
|
|
||||||
|
UMutex cpMutex = U_MUTEX_INITIALIZER;
|
||||||
|
|
||||||
|
//----------------------------------------------------------------
|
||||||
|
// Inclusions list
|
||||||
|
//----------------------------------------------------------------
|
||||||
|
|
||||||
|
// USetAdder implementation
|
||||||
|
// Does not use uset.h to reduce code dependencies
|
||||||
|
void U_CALLCONV
|
||||||
|
_set_add(USet *set, UChar32 c) {
|
||||||
|
((UnicodeSet *)set)->add(c);
|
||||||
|
}
|
||||||
|
|
||||||
|
void U_CALLCONV
|
||||||
|
_set_addRange(USet *set, UChar32 start, UChar32 end) {
|
||||||
|
((UnicodeSet *)set)->add(start, end);
|
||||||
|
}
|
||||||
|
|
||||||
|
void U_CALLCONV
|
||||||
|
_set_addString(USet *set, const UChar *str, int32_t length) {
|
||||||
|
((UnicodeSet *)set)->add(icu::UnicodeString((UBool)(length<0), str, length));
|
||||||
|
}
|
||||||
|
|
||||||
|
UBool U_CALLCONV characterproperties_cleanup() {
|
||||||
|
for (Inclusion &in: gInclusions) {
|
||||||
|
delete in.fSet;
|
||||||
|
in.fSet = nullptr;
|
||||||
|
in.fInitOnce.reset();
|
||||||
|
}
|
||||||
|
for (int32_t i = 0; i < UPRV_LENGTHOF(sets); ++i) {
|
||||||
|
delete sets[i];
|
||||||
|
sets[i] = nullptr;
|
||||||
|
}
|
||||||
|
for (int32_t i = 0; i < UPRV_LENGTHOF(maps); ++i) {
|
||||||
|
ucptrie_close(reinterpret_cast<UCPTrie *>(maps[i]));
|
||||||
|
maps[i] = nullptr;
|
||||||
|
}
|
||||||
|
return TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
U_CDECL_END
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
U_NAMESPACE_BEGIN
|
||||||
|
|
||||||
|
/*
|
||||||
|
Reduce excessive reallocation, and make it easier to detect initialization problems.
|
||||||
|
Usually you don't see smaller sets than this for Unicode 5.0.
|
||||||
|
*/
|
||||||
|
constexpr int32_t DEFAULT_INCLUSION_CAPACITY = 3072;
|
||||||
|
|
||||||
|
void U_CALLCONV CharacterProperties::initInclusion(UPropertySource src, UErrorCode &errorCode) {
|
||||||
|
// This function is invoked only via umtx_initOnce().
|
||||||
|
// This function is a friend of class UnicodeSet.
|
||||||
|
|
||||||
|
U_ASSERT(0 <= src && src < UPROPS_SRC_COUNT);
|
||||||
|
if (src == UPROPS_SRC_NONE) {
|
||||||
|
errorCode = U_INTERNAL_PROGRAM_ERROR;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
UnicodeSet * &incl = gInclusions[src].fSet;
|
||||||
|
U_ASSERT(incl == nullptr);
|
||||||
|
|
||||||
|
incl = new UnicodeSet();
|
||||||
|
if (incl == nullptr) {
|
||||||
|
errorCode = U_MEMORY_ALLOCATION_ERROR;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
USetAdder sa = {
|
||||||
|
(USet *)incl,
|
||||||
|
_set_add,
|
||||||
|
_set_addRange,
|
||||||
|
_set_addString,
|
||||||
|
nullptr, // don't need remove()
|
||||||
|
nullptr // don't need removeRange()
|
||||||
|
};
|
||||||
|
|
||||||
|
incl->ensureCapacity(DEFAULT_INCLUSION_CAPACITY, errorCode);
|
||||||
|
switch(src) {
|
||||||
|
case UPROPS_SRC_CHAR:
|
||||||
|
uchar_addPropertyStarts(&sa, &errorCode);
|
||||||
|
break;
|
||||||
|
case UPROPS_SRC_PROPSVEC:
|
||||||
|
upropsvec_addPropertyStarts(&sa, &errorCode);
|
||||||
|
break;
|
||||||
|
case UPROPS_SRC_CHAR_AND_PROPSVEC:
|
||||||
|
uchar_addPropertyStarts(&sa, &errorCode);
|
||||||
|
upropsvec_addPropertyStarts(&sa, &errorCode);
|
||||||
|
break;
|
||||||
|
#if !UCONFIG_NO_NORMALIZATION
|
||||||
|
case UPROPS_SRC_CASE_AND_NORM: {
|
||||||
|
const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode);
|
||||||
|
if(U_SUCCESS(errorCode)) {
|
||||||
|
impl->addPropertyStarts(&sa, errorCode);
|
||||||
|
}
|
||||||
|
ucase_addPropertyStarts(&sa, &errorCode);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case UPROPS_SRC_NFC: {
|
||||||
|
const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode);
|
||||||
|
if(U_SUCCESS(errorCode)) {
|
||||||
|
impl->addPropertyStarts(&sa, errorCode);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case UPROPS_SRC_NFKC: {
|
||||||
|
const Normalizer2Impl *impl=Normalizer2Factory::getNFKCImpl(errorCode);
|
||||||
|
if(U_SUCCESS(errorCode)) {
|
||||||
|
impl->addPropertyStarts(&sa, errorCode);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case UPROPS_SRC_NFKC_CF: {
|
||||||
|
const Normalizer2Impl *impl=Normalizer2Factory::getNFKC_CFImpl(errorCode);
|
||||||
|
if(U_SUCCESS(errorCode)) {
|
||||||
|
impl->addPropertyStarts(&sa, errorCode);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case UPROPS_SRC_NFC_CANON_ITER: {
|
||||||
|
const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode);
|
||||||
|
if(U_SUCCESS(errorCode)) {
|
||||||
|
impl->addCanonIterPropertyStarts(&sa, errorCode);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
case UPROPS_SRC_CASE:
|
||||||
|
ucase_addPropertyStarts(&sa, &errorCode);
|
||||||
|
break;
|
||||||
|
case UPROPS_SRC_BIDI:
|
||||||
|
ubidi_addPropertyStarts(&sa, &errorCode);
|
||||||
|
break;
|
||||||
|
case UPROPS_SRC_INPC:
|
||||||
|
case UPROPS_SRC_INSC:
|
||||||
|
case UPROPS_SRC_VO:
|
||||||
|
uprops_addPropertyStarts((UPropertySource)src, &sa, &errorCode);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
errorCode = U_INTERNAL_PROGRAM_ERROR;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (U_FAILURE(errorCode)) {
|
||||||
|
delete incl;
|
||||||
|
incl = nullptr;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// Compact for caching
|
||||||
|
incl->compact();
|
||||||
|
ucln_common_registerCleanup(UCLN_COMMON_CHARACTERPROPERTIES, characterproperties_cleanup);
|
||||||
|
}
|
||||||
|
|
||||||
|
const UnicodeSet *getInclusionsForSource(UPropertySource src, UErrorCode &errorCode) {
|
||||||
|
if (U_FAILURE(errorCode)) { return nullptr; }
|
||||||
|
if (src < 0 || UPROPS_SRC_COUNT <= src) {
|
||||||
|
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
Inclusion &i = gInclusions[src];
|
||||||
|
umtx_initOnce(i.fInitOnce, &CharacterProperties::initInclusion, src, errorCode);
|
||||||
|
return i.fSet;
|
||||||
|
}
|
||||||
|
|
||||||
|
const UnicodeSet *CharacterProperties::getInclusionsForProperty(
|
||||||
|
UProperty prop, UErrorCode &errorCode) {
|
||||||
|
if (U_FAILURE(errorCode)) { return nullptr; }
|
||||||
|
UPropertySource src = uprops_getSource(prop);
|
||||||
|
return getInclusionsForSource(src, errorCode);
|
||||||
|
}
|
||||||
|
|
||||||
|
U_NAMESPACE_END
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
UnicodeSet *makeSet(UProperty property, UErrorCode &errorCode) {
|
||||||
|
if (U_FAILURE(errorCode)) { return nullptr; }
|
||||||
|
icu::LocalPointer<UnicodeSet> set(new UnicodeSet());
|
||||||
|
if (set.isNull()) {
|
||||||
|
errorCode = U_MEMORY_ALLOCATION_ERROR;
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
const UnicodeSet *inclusions =
|
||||||
|
icu::CharacterProperties::getInclusionsForProperty(property, errorCode);
|
||||||
|
if (U_FAILURE(errorCode)) { return nullptr; }
|
||||||
|
int32_t numRanges = inclusions->getRangeCount();
|
||||||
|
UChar32 startHasProperty = -1;
|
||||||
|
|
||||||
|
for (int32_t i = 0; i < numRanges; ++i) {
|
||||||
|
UChar32 rangeEnd = inclusions->getRangeEnd(i);
|
||||||
|
for (UChar32 c = inclusions->getRangeStart(i); c <= rangeEnd; ++c) {
|
||||||
|
// TODO: Get a UCharacterProperty.BinaryProperty to avoid the property dispatch.
|
||||||
|
if (u_hasBinaryProperty(c, property)) {
|
||||||
|
if (startHasProperty < 0) {
|
||||||
|
// Transition from false to true.
|
||||||
|
startHasProperty = c;
|
||||||
|
}
|
||||||
|
} else if (startHasProperty >= 0) {
|
||||||
|
// Transition from true to false.
|
||||||
|
set->add(startHasProperty, c - 1);
|
||||||
|
startHasProperty = -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (startHasProperty >= 0) {
|
||||||
|
set->add(startHasProperty, 0x10FFFF);
|
||||||
|
}
|
||||||
|
set->freeze();
|
||||||
|
return set.orphan();
|
||||||
|
}
|
||||||
|
|
||||||
|
UCPMap *makeMap(UProperty property, UErrorCode &errorCode) {
|
||||||
|
if (U_FAILURE(errorCode)) { return nullptr; }
|
||||||
|
uint32_t nullValue = property == UCHAR_SCRIPT ? USCRIPT_UNKNOWN : 0;
|
||||||
|
icu::LocalUMutableCPTriePointer mutableTrie(
|
||||||
|
umutablecptrie_open(nullValue, nullValue, &errorCode));
|
||||||
|
const UnicodeSet *inclusions =
|
||||||
|
icu::CharacterProperties::getInclusionsForProperty(property, errorCode);
|
||||||
|
if (U_FAILURE(errorCode)) { return nullptr; }
|
||||||
|
int32_t numRanges = inclusions->getRangeCount();
|
||||||
|
UChar32 start = 0;
|
||||||
|
uint32_t value = nullValue;
|
||||||
|
|
||||||
|
for (int32_t i = 0; i < numRanges; ++i) {
|
||||||
|
UChar32 rangeEnd = inclusions->getRangeEnd(i);
|
||||||
|
for (UChar32 c = inclusions->getRangeStart(i); c <= rangeEnd; ++c) {
|
||||||
|
// TODO: Get a UCharacterProperty.IntProperty to avoid the property dispatch.
|
||||||
|
uint32_t nextValue = u_getIntPropertyValue(c, property);
|
||||||
|
if (value != nextValue) {
|
||||||
|
if (value != nullValue) {
|
||||||
|
umutablecptrie_setRange(mutableTrie.getAlias(), start, c - 1, value, &errorCode);
|
||||||
|
}
|
||||||
|
start = c;
|
||||||
|
value = nextValue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (value != 0) {
|
||||||
|
umutablecptrie_setRange(mutableTrie.getAlias(), start, 0x10FFFF, value, &errorCode);
|
||||||
|
}
|
||||||
|
|
||||||
|
UCPTrieType type;
|
||||||
|
if (property == UCHAR_BIDI_CLASS || property == UCHAR_GENERAL_CATEGORY) {
|
||||||
|
type = UCPTRIE_TYPE_FAST;
|
||||||
|
} else {
|
||||||
|
type = UCPTRIE_TYPE_SMALL;
|
||||||
|
}
|
||||||
|
UCPTrieValueWidth valueWidth;
|
||||||
|
// TODO: UCharacterProperty.IntProperty
|
||||||
|
int32_t max = u_getIntPropertyMaxValue(property);
|
||||||
|
if (max <= 0xff) {
|
||||||
|
valueWidth = UCPTRIE_VALUE_BITS_8;
|
||||||
|
} else if (max <= 0xffff) {
|
||||||
|
valueWidth = UCPTRIE_VALUE_BITS_16;
|
||||||
|
} else {
|
||||||
|
valueWidth = UCPTRIE_VALUE_BITS_32;
|
||||||
|
}
|
||||||
|
return reinterpret_cast<UCPMap *>(
|
||||||
|
umutablecptrie_buildImmutable(mutableTrie.getAlias(), type, valueWidth, &errorCode));
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
U_NAMESPACE_USE
|
||||||
|
|
||||||
|
U_CAPI const USet * U_EXPORT2
|
||||||
|
u_getBinaryPropertySet(UProperty property, UErrorCode *pErrorCode) {
|
||||||
|
if (U_FAILURE(*pErrorCode)) { return nullptr; }
|
||||||
|
if (property < 0 || UCHAR_BINARY_LIMIT <= property) {
|
||||||
|
*pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
Mutex m(&cpMutex);
|
||||||
|
UnicodeSet *set = sets[property];
|
||||||
|
if (set == nullptr) {
|
||||||
|
sets[property] = set = makeSet(property, *pErrorCode);
|
||||||
|
}
|
||||||
|
if (U_FAILURE(*pErrorCode)) { return nullptr; }
|
||||||
|
return set->toUSet();
|
||||||
|
}
|
||||||
|
|
||||||
|
U_CAPI const UCPMap * U_EXPORT2
|
||||||
|
u_getIntPropertyMap(UProperty property, UErrorCode *pErrorCode) {
|
||||||
|
if (U_FAILURE(*pErrorCode)) { return nullptr; }
|
||||||
|
if (property < UCHAR_INT_START || UCHAR_INT_LIMIT <= property) {
|
||||||
|
*pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
Mutex m(&cpMutex);
|
||||||
|
UCPMap *map = maps[property - UCHAR_INT_START];
|
||||||
|
if (map == nullptr) {
|
||||||
|
maps[property - UCHAR_INT_START] = map = makeMap(property, *pErrorCode);
|
||||||
|
}
|
||||||
|
return map;
|
||||||
|
}
|
@ -268,6 +268,7 @@
|
|||||||
<ClCompile Include="ruleiter.cpp" />
|
<ClCompile Include="ruleiter.cpp" />
|
||||||
<ClCompile Include="ucase.cpp" />
|
<ClCompile Include="ucase.cpp" />
|
||||||
<ClCompile Include="uchar.cpp" />
|
<ClCompile Include="uchar.cpp" />
|
||||||
|
<ClCompile Include="characterproperties.cpp" />
|
||||||
<ClCompile Include="unames.cpp" />
|
<ClCompile Include="unames.cpp" />
|
||||||
<ClCompile Include="unifiedcache.cpp" />
|
<ClCompile Include="unifiedcache.cpp" />
|
||||||
<ClCompile Include="unifilt.cpp" />
|
<ClCompile Include="unifilt.cpp" />
|
||||||
|
@ -388,6 +388,9 @@
|
|||||||
<ClCompile Include="bmpset.cpp">
|
<ClCompile Include="bmpset.cpp">
|
||||||
<Filter>properties & sets</Filter>
|
<Filter>properties & sets</Filter>
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
|
<ClCompile Include="characterproperties.cpp">
|
||||||
|
<Filter>properties & sets</Filter>
|
||||||
|
</ClCompile>
|
||||||
<ClCompile Include="propname.cpp">
|
<ClCompile Include="propname.cpp">
|
||||||
<Filter>properties & sets</Filter>
|
<Filter>properties & sets</Filter>
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
|
@ -393,6 +393,7 @@
|
|||||||
<ClCompile Include="ruleiter.cpp" />
|
<ClCompile Include="ruleiter.cpp" />
|
||||||
<ClCompile Include="ucase.cpp" />
|
<ClCompile Include="ucase.cpp" />
|
||||||
<ClCompile Include="uchar.cpp" />
|
<ClCompile Include="uchar.cpp" />
|
||||||
|
<ClCompile Include="characterproperties.cpp" />
|
||||||
<ClCompile Include="unames.cpp" />
|
<ClCompile Include="unames.cpp" />
|
||||||
<ClCompile Include="unifiedcache.cpp" />
|
<ClCompile Include="unifiedcache.cpp" />
|
||||||
<ClCompile Include="unifilt.cpp" />
|
<ClCompile Include="unifilt.cpp" />
|
||||||
|
@ -35,7 +35,7 @@ U_NAMESPACE_BEGIN
|
|||||||
|
|
||||||
// For example:
|
// For example:
|
||||||
//
|
//
|
||||||
// UMutex myMutex;
|
// UMutex myMutex = U_MUTEX_INITIALIZER;
|
||||||
//
|
//
|
||||||
// void Function(int arg1, int arg2)
|
// void Function(int arg1, int arg2)
|
||||||
// {
|
// {
|
||||||
|
@ -466,7 +466,7 @@ void
|
|||||||
Normalizer2Impl::addLcccChars(UnicodeSet &set) const {
|
Normalizer2Impl::addLcccChars(UnicodeSet &set) const {
|
||||||
UChar32 start = 0, end;
|
UChar32 start = 0, end;
|
||||||
uint32_t norm16;
|
uint32_t norm16;
|
||||||
while ((end = ucptrie_getRange(normTrie, start, UCPTRIE_RANGE_FIXED_LEAD_SURROGATES, INERT,
|
while ((end = ucptrie_getRange(normTrie, start, UCPMAP_RANGE_FIXED_LEAD_SURROGATES, INERT,
|
||||||
nullptr, nullptr, &norm16)) >= 0) {
|
nullptr, nullptr, &norm16)) >= 0) {
|
||||||
if (norm16 > Normalizer2Impl::MIN_NORMAL_MAYBE_YES &&
|
if (norm16 > Normalizer2Impl::MIN_NORMAL_MAYBE_YES &&
|
||||||
norm16 != Normalizer2Impl::JAMO_VT) {
|
norm16 != Normalizer2Impl::JAMO_VT) {
|
||||||
@ -484,7 +484,7 @@ Normalizer2Impl::addPropertyStarts(const USetAdder *sa, UErrorCode & /*errorCode
|
|||||||
// Add the start code point of each same-value range of the trie.
|
// Add the start code point of each same-value range of the trie.
|
||||||
UChar32 start = 0, end;
|
UChar32 start = 0, end;
|
||||||
uint32_t value;
|
uint32_t value;
|
||||||
while ((end = ucptrie_getRange(normTrie, start, UCPTRIE_RANGE_FIXED_LEAD_SURROGATES, INERT,
|
while ((end = ucptrie_getRange(normTrie, start, UCPMAP_RANGE_FIXED_LEAD_SURROGATES, INERT,
|
||||||
nullptr, nullptr, &value)) >= 0) {
|
nullptr, nullptr, &value)) >= 0) {
|
||||||
sa->add(sa->set, start);
|
sa->add(sa->set, start);
|
||||||
if (start != end && isAlgorithmicNoNo((uint16_t)value) &&
|
if (start != end && isAlgorithmicNoNo((uint16_t)value) &&
|
||||||
@ -518,7 +518,7 @@ Normalizer2Impl::addCanonIterPropertyStarts(const USetAdder *sa, UErrorCode &err
|
|||||||
// Currently only used for the SEGMENT_STARTER property.
|
// Currently only used for the SEGMENT_STARTER property.
|
||||||
UChar32 start = 0, end;
|
UChar32 start = 0, end;
|
||||||
uint32_t value;
|
uint32_t value;
|
||||||
while ((end = ucptrie_getRange(fCanonIterData->trie, start, UCPTRIE_RANGE_NORMAL, 0,
|
while ((end = ucptrie_getRange(fCanonIterData->trie, start, UCPMAP_RANGE_NORMAL, 0,
|
||||||
segmentStarterMapper, nullptr, &value)) >= 0) {
|
segmentStarterMapper, nullptr, &value)) >= 0) {
|
||||||
sa->add(sa->set, start);
|
sa->add(sa->set, start);
|
||||||
start = end + 1;
|
start = end + 1;
|
||||||
@ -2398,7 +2398,7 @@ void InitCanonIterData::doInit(Normalizer2Impl *impl, UErrorCode &errorCode) {
|
|||||||
UChar32 start = 0, end;
|
UChar32 start = 0, end;
|
||||||
uint32_t value;
|
uint32_t value;
|
||||||
while ((end = ucptrie_getRange(impl->normTrie, start,
|
while ((end = ucptrie_getRange(impl->normTrie, start,
|
||||||
UCPTRIE_RANGE_FIXED_LEAD_SURROGATES, Normalizer2Impl::INERT,
|
UCPMAP_RANGE_FIXED_LEAD_SURROGATES, Normalizer2Impl::INERT,
|
||||||
nullptr, nullptr, &value)) >= 0) {
|
nullptr, nullptr, &value)) >= 0) {
|
||||||
// Call Normalizer2Impl::makeCanonIterDataFromNorm16() for a range of same-norm16 characters.
|
// Call Normalizer2Impl::makeCanonIterDataFromNorm16() for a range of same-norm16 characters.
|
||||||
if (value != Normalizer2Impl::INERT) {
|
if (value != Normalizer2Impl::INERT) {
|
||||||
|
@ -30,6 +30,7 @@
|
|||||||
#include "unicode/utf.h"
|
#include "unicode/utf.h"
|
||||||
#include "unicode/utf16.h"
|
#include "unicode/utf16.h"
|
||||||
#include "mutex.h"
|
#include "mutex.h"
|
||||||
|
#include "udataswp.h"
|
||||||
#include "uset_imp.h"
|
#include "uset_imp.h"
|
||||||
|
|
||||||
// When the nfc.nrm data is *not* hardcoded into the common library
|
// When the nfc.nrm data is *not* hardcoded into the common library
|
||||||
|
@ -45,6 +45,7 @@ typedef enum ECleanupCommonType {
|
|||||||
UCLN_COMMON_CURRENCY,
|
UCLN_COMMON_CURRENCY,
|
||||||
UCLN_COMMON_LOADED_NORMALIZER2,
|
UCLN_COMMON_LOADED_NORMALIZER2,
|
||||||
UCLN_COMMON_NORMALIZER2,
|
UCLN_COMMON_NORMALIZER2,
|
||||||
|
UCLN_COMMON_CHARACTERPROPERTIES,
|
||||||
UCLN_COMMON_USET,
|
UCLN_COMMON_USET,
|
||||||
UCLN_COMMON_UNAMES,
|
UCLN_COMMON_UNAMES,
|
||||||
UCLN_COMMON_UPROPS,
|
UCLN_COMMON_UPROPS,
|
||||||
|
@ -247,7 +247,7 @@ namespace {
|
|||||||
constexpr int32_t MAX_UNICODE = 0x10ffff;
|
constexpr int32_t MAX_UNICODE = 0x10ffff;
|
||||||
|
|
||||||
inline uint32_t maybeFilterValue(uint32_t value, uint32_t trieNullValue, uint32_t nullValue,
|
inline uint32_t maybeFilterValue(uint32_t value, uint32_t trieNullValue, uint32_t nullValue,
|
||||||
UCPTrieValueFilter *filter, const void *context) {
|
UCPMapValueFilter *filter, const void *context) {
|
||||||
if (value == trieNullValue) {
|
if (value == trieNullValue) {
|
||||||
value = nullValue;
|
value = nullValue;
|
||||||
} else if (filter != nullptr) {
|
} else if (filter != nullptr) {
|
||||||
@ -257,7 +257,7 @@ inline uint32_t maybeFilterValue(uint32_t value, uint32_t trieNullValue, uint32_
|
|||||||
}
|
}
|
||||||
|
|
||||||
UChar32 getRange(const void *t, UChar32 start,
|
UChar32 getRange(const void *t, UChar32 start,
|
||||||
UCPTrieValueFilter *filter, const void *context, uint32_t *pValue) {
|
UCPMapValueFilter *filter, const void *context, uint32_t *pValue) {
|
||||||
if ((uint32_t)start > MAX_UNICODE) {
|
if ((uint32_t)start > MAX_UNICODE) {
|
||||||
return U_SENTINEL;
|
return U_SENTINEL;
|
||||||
}
|
}
|
||||||
@ -403,9 +403,9 @@ UChar32 getRange(const void *t, UChar32 start,
|
|||||||
U_CFUNC UChar32
|
U_CFUNC UChar32
|
||||||
ucptrie_internalGetRange(UCPTrieGetRange *getRange,
|
ucptrie_internalGetRange(UCPTrieGetRange *getRange,
|
||||||
const void *trie, UChar32 start,
|
const void *trie, UChar32 start,
|
||||||
UCPTrieRangeOption option, uint32_t surrogateValue,
|
UCPMapRangeOption option, uint32_t surrogateValue,
|
||||||
UCPTrieValueFilter *filter, const void *context, uint32_t *pValue) {
|
UCPMapValueFilter *filter, const void *context, uint32_t *pValue) {
|
||||||
if (option == UCPTRIE_RANGE_NORMAL) {
|
if (option == UCPMAP_RANGE_NORMAL) {
|
||||||
return getRange(trie, start, filter, context, pValue);
|
return getRange(trie, start, filter, context, pValue);
|
||||||
}
|
}
|
||||||
uint32_t value;
|
uint32_t value;
|
||||||
@ -413,7 +413,7 @@ ucptrie_internalGetRange(UCPTrieGetRange *getRange,
|
|||||||
// We need to examine the range value even if the caller does not want it.
|
// We need to examine the range value even if the caller does not want it.
|
||||||
pValue = &value;
|
pValue = &value;
|
||||||
}
|
}
|
||||||
UChar32 surrEnd = option == UCPTRIE_RANGE_FIXED_ALL_SURROGATES ? 0xdfff : 0xdbff;
|
UChar32 surrEnd = option == UCPMAP_RANGE_FIXED_ALL_SURROGATES ? 0xdfff : 0xdbff;
|
||||||
UChar32 end = getRange(trie, start, filter, context, pValue);
|
UChar32 end = getRange(trie, start, filter, context, pValue);
|
||||||
if (end < 0xd7ff || start > surrEnd) {
|
if (end < 0xd7ff || start > surrEnd) {
|
||||||
return end;
|
return end;
|
||||||
@ -448,8 +448,8 @@ ucptrie_internalGetRange(UCPTrieGetRange *getRange,
|
|||||||
|
|
||||||
U_CAPI UChar32 U_EXPORT2
|
U_CAPI UChar32 U_EXPORT2
|
||||||
ucptrie_getRange(const UCPTrie *trie, UChar32 start,
|
ucptrie_getRange(const UCPTrie *trie, UChar32 start,
|
||||||
UCPTrieRangeOption option, uint32_t surrogateValue,
|
UCPMapRangeOption option, uint32_t surrogateValue,
|
||||||
UCPTrieValueFilter *filter, const void *context, uint32_t *pValue) {
|
UCPMapValueFilter *filter, const void *context, uint32_t *pValue) {
|
||||||
return ucptrie_internalGetRange(getRange, trie, start,
|
return ucptrie_internalGetRange(getRange, trie, start,
|
||||||
option, surrogateValue,
|
option, surrogateValue,
|
||||||
filter, context, pValue);
|
filter, context, pValue);
|
||||||
@ -571,3 +571,20 @@ ucptrie_printLengths(const UCPTrie *trie, const char *which) {
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
|
// UCPMap ----
|
||||||
|
// Initially, this is the same as UCPTrie. This may well change.
|
||||||
|
|
||||||
|
U_CAPI uint32_t U_EXPORT2
|
||||||
|
ucpmap_get(const UCPMap *map, UChar32 c) {
|
||||||
|
return ucptrie_get(reinterpret_cast<const UCPTrie *>(map), c);
|
||||||
|
}
|
||||||
|
|
||||||
|
U_CAPI UChar32 U_EXPORT2
|
||||||
|
ucpmap_getRange(const UCPMap *map, UChar32 start,
|
||||||
|
UCPMapRangeOption option, uint32_t surrogateValue,
|
||||||
|
UCPMapValueFilter *filter, const void *context, uint32_t *pValue) {
|
||||||
|
return ucptrie_getRange(reinterpret_cast<const UCPTrie *>(map), start,
|
||||||
|
option, surrogateValue,
|
||||||
|
filter, context, pValue);
|
||||||
|
}
|
||||||
|
@ -131,13 +131,13 @@ enum {
|
|||||||
|
|
||||||
typedef UChar32
|
typedef UChar32
|
||||||
UCPTrieGetRange(const void *trie, UChar32 start,
|
UCPTrieGetRange(const void *trie, UChar32 start,
|
||||||
UCPTrieValueFilter *filter, const void *context, uint32_t *pValue);
|
UCPMapValueFilter *filter, const void *context, uint32_t *pValue);
|
||||||
|
|
||||||
U_CFUNC UChar32
|
U_CFUNC UChar32
|
||||||
ucptrie_internalGetRange(UCPTrieGetRange *getRange,
|
ucptrie_internalGetRange(UCPTrieGetRange *getRange,
|
||||||
const void *trie, UChar32 start,
|
const void *trie, UChar32 start,
|
||||||
UCPTrieRangeOption option, uint32_t surrogateValue,
|
UCPMapRangeOption option, uint32_t surrogateValue,
|
||||||
UCPTrieValueFilter *filter, const void *context, uint32_t *pValue);
|
UCPMapValueFilter *filter, const void *context, uint32_t *pValue);
|
||||||
|
|
||||||
#ifdef UCPTRIE_DEBUG
|
#ifdef UCPTRIE_DEBUG
|
||||||
U_CFUNC void
|
U_CFUNC void
|
||||||
|
@ -70,10 +70,11 @@ public:
|
|||||||
|
|
||||||
MutableCodePointTrie &operator=(const MutableCodePointTrie &other) = delete;
|
MutableCodePointTrie &operator=(const MutableCodePointTrie &other) = delete;
|
||||||
|
|
||||||
|
static MutableCodePointTrie *fromUCPMap(const UCPMap *map, UErrorCode &errorCode);
|
||||||
static MutableCodePointTrie *fromUCPTrie(const UCPTrie *trie, UErrorCode &errorCode);
|
static MutableCodePointTrie *fromUCPTrie(const UCPTrie *trie, UErrorCode &errorCode);
|
||||||
|
|
||||||
uint32_t get(UChar32 c) const;
|
uint32_t get(UChar32 c) const;
|
||||||
int32_t getRange(UChar32 start, UCPTrieValueFilter *filter, const void *context,
|
int32_t getRange(UChar32 start, UCPMapValueFilter *filter, const void *context,
|
||||||
uint32_t *pValue) const;
|
uint32_t *pValue) const;
|
||||||
|
|
||||||
void set(UChar32 c, uint32_t value, UErrorCode &errorCode);
|
void set(UChar32 c, uint32_t value, UErrorCode &errorCode);
|
||||||
@ -171,6 +172,36 @@ MutableCodePointTrie::~MutableCodePointTrie() {
|
|||||||
uprv_free(index16);
|
uprv_free(index16);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
MutableCodePointTrie *MutableCodePointTrie::fromUCPMap(const UCPMap *map, UErrorCode &errorCode) {
|
||||||
|
// Use the highValue as the initialValue to reduce the highStart.
|
||||||
|
uint32_t errorValue = ucpmap_get(map, -1);
|
||||||
|
uint32_t initialValue = ucpmap_get(map, 0x10ffff);
|
||||||
|
LocalPointer<MutableCodePointTrie> mutableTrie(
|
||||||
|
new MutableCodePointTrie(initialValue, errorValue, errorCode),
|
||||||
|
errorCode);
|
||||||
|
if (U_FAILURE(errorCode)) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
UChar32 start = 0, end;
|
||||||
|
uint32_t value;
|
||||||
|
while ((end = ucpmap_getRange(map, start, UCPMAP_RANGE_NORMAL, 0,
|
||||||
|
nullptr, nullptr, &value)) >= 0) {
|
||||||
|
if (value != initialValue) {
|
||||||
|
if (start == end) {
|
||||||
|
mutableTrie->set(start, value, errorCode);
|
||||||
|
} else {
|
||||||
|
mutableTrie->setRange(start, end, value, errorCode);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
start = end + 1;
|
||||||
|
}
|
||||||
|
if (U_SUCCESS(errorCode)) {
|
||||||
|
return mutableTrie.orphan();
|
||||||
|
} else {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
MutableCodePointTrie *MutableCodePointTrie::fromUCPTrie(const UCPTrie *trie, UErrorCode &errorCode) {
|
MutableCodePointTrie *MutableCodePointTrie::fromUCPTrie(const UCPTrie *trie, UErrorCode &errorCode) {
|
||||||
// Use the highValue as the initialValue to reduce the highStart.
|
// Use the highValue as the initialValue to reduce the highStart.
|
||||||
uint32_t errorValue;
|
uint32_t errorValue;
|
||||||
@ -201,7 +232,7 @@ MutableCodePointTrie *MutableCodePointTrie::fromUCPTrie(const UCPTrie *trie, UEr
|
|||||||
}
|
}
|
||||||
UChar32 start = 0, end;
|
UChar32 start = 0, end;
|
||||||
uint32_t value;
|
uint32_t value;
|
||||||
while ((end = ucptrie_getRange(trie, start, UCPTRIE_RANGE_NORMAL, 0,
|
while ((end = ucptrie_getRange(trie, start, UCPMAP_RANGE_NORMAL, 0,
|
||||||
nullptr, nullptr, &value)) >= 0) {
|
nullptr, nullptr, &value)) >= 0) {
|
||||||
if (value != initialValue) {
|
if (value != initialValue) {
|
||||||
if (start == end) {
|
if (start == end) {
|
||||||
@ -244,7 +275,7 @@ uint32_t MutableCodePointTrie::get(UChar32 c) const {
|
|||||||
}
|
}
|
||||||
|
|
||||||
inline uint32_t maybeFilterValue(uint32_t value, uint32_t initialValue, uint32_t nullValue,
|
inline uint32_t maybeFilterValue(uint32_t value, uint32_t initialValue, uint32_t nullValue,
|
||||||
UCPTrieValueFilter *filter, const void *context) {
|
UCPMapValueFilter *filter, const void *context) {
|
||||||
if (value == initialValue) {
|
if (value == initialValue) {
|
||||||
value = nullValue;
|
value = nullValue;
|
||||||
} else if (filter != nullptr) {
|
} else if (filter != nullptr) {
|
||||||
@ -254,7 +285,7 @@ inline uint32_t maybeFilterValue(uint32_t value, uint32_t initialValue, uint32_t
|
|||||||
}
|
}
|
||||||
|
|
||||||
UChar32 MutableCodePointTrie::getRange(
|
UChar32 MutableCodePointTrie::getRange(
|
||||||
UChar32 start, UCPTrieValueFilter *filter, const void *context,
|
UChar32 start, UCPMapValueFilter *filter, const void *context,
|
||||||
uint32_t *pValue) const {
|
uint32_t *pValue) const {
|
||||||
if ((uint32_t)start > MAX_UNICODE) {
|
if ((uint32_t)start > MAX_UNICODE) {
|
||||||
return U_SENTINEL;
|
return U_SENTINEL;
|
||||||
@ -1565,6 +1596,18 @@ umutablecptrie_close(UMutableCPTrie *trie) {
|
|||||||
delete reinterpret_cast<MutableCodePointTrie *>(trie);
|
delete reinterpret_cast<MutableCodePointTrie *>(trie);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
U_CAPI UMutableCPTrie * U_EXPORT2
|
||||||
|
umutablecptrie_fromUCPMap(const UCPMap *map, UErrorCode *pErrorCode) {
|
||||||
|
if (U_FAILURE(*pErrorCode)) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
if (map == nullptr) {
|
||||||
|
*pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
return reinterpret_cast<UMutableCPTrie *>(MutableCodePointTrie::fromUCPMap(map, *pErrorCode));
|
||||||
|
}
|
||||||
|
|
||||||
U_CAPI UMutableCPTrie * U_EXPORT2
|
U_CAPI UMutableCPTrie * U_EXPORT2
|
||||||
umutablecptrie_fromUCPTrie(const UCPTrie *trie, UErrorCode *pErrorCode) {
|
umutablecptrie_fromUCPTrie(const UCPTrie *trie, UErrorCode *pErrorCode) {
|
||||||
if (U_FAILURE(*pErrorCode)) {
|
if (U_FAILURE(*pErrorCode)) {
|
||||||
@ -1585,7 +1628,7 @@ umutablecptrie_get(const UMutableCPTrie *trie, UChar32 c) {
|
|||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
UChar32 getRange(const void *trie, UChar32 start,
|
UChar32 getRange(const void *trie, UChar32 start,
|
||||||
UCPTrieValueFilter *filter, const void *context, uint32_t *pValue) {
|
UCPMapValueFilter *filter, const void *context, uint32_t *pValue) {
|
||||||
return reinterpret_cast<const MutableCodePointTrie *>(trie)->
|
return reinterpret_cast<const MutableCodePointTrie *>(trie)->
|
||||||
getRange(start, filter, context, pValue);
|
getRange(start, filter, context, pValue);
|
||||||
}
|
}
|
||||||
@ -1594,8 +1637,8 @@ UChar32 getRange(const void *trie, UChar32 start,
|
|||||||
|
|
||||||
U_CAPI UChar32 U_EXPORT2
|
U_CAPI UChar32 U_EXPORT2
|
||||||
umutablecptrie_getRange(const UMutableCPTrie *trie, UChar32 start,
|
umutablecptrie_getRange(const UMutableCPTrie *trie, UChar32 start,
|
||||||
UCPTrieRangeOption option, uint32_t surrogateValue,
|
UCPMapRangeOption option, uint32_t surrogateValue,
|
||||||
UCPTrieValueFilter *filter, const void *context, uint32_t *pValue) {
|
UCPMapValueFilter *filter, const void *context, uint32_t *pValue) {
|
||||||
return ucptrie_internalGetRange(getRange, trie, start,
|
return ucptrie_internalGetRange(getRange, trie, start,
|
||||||
option, surrogateValue,
|
option, surrogateValue,
|
||||||
filter, context, pValue);
|
filter, context, pValue);
|
||||||
|
@ -27,6 +27,24 @@
|
|||||||
|
|
||||||
#include "unicode/utypes.h"
|
#include "unicode/utypes.h"
|
||||||
#include "unicode/stringoptions.h"
|
#include "unicode/stringoptions.h"
|
||||||
|
#include "unicode/ucpmap.h"
|
||||||
|
|
||||||
|
#if !defined(USET_DEFINED) && !defined(U_IN_DOXYGEN)
|
||||||
|
|
||||||
|
#define USET_DEFINED
|
||||||
|
|
||||||
|
/**
|
||||||
|
* USet is the C API type corresponding to C++ class UnicodeSet.
|
||||||
|
* It is forward-declared here to avoid including unicode/uset.h file if related
|
||||||
|
* APIs are not used.
|
||||||
|
*
|
||||||
|
* @see ucnv_getUnicodeSet
|
||||||
|
* @stable ICU 2.4
|
||||||
|
*/
|
||||||
|
typedef struct USet USet;
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
U_CDECL_BEGIN
|
U_CDECL_BEGIN
|
||||||
|
|
||||||
@ -61,6 +79,18 @@ U_CDECL_BEGIN
|
|||||||
* "About the Unicode Character Database" (http://www.unicode.org/ucd/)
|
* "About the Unicode Character Database" (http://www.unicode.org/ucd/)
|
||||||
* and the ICU User Guide chapter on Properties (http://icu-project.org/userguide/properties.html).
|
* and the ICU User Guide chapter on Properties (http://icu-project.org/userguide/properties.html).
|
||||||
*
|
*
|
||||||
|
* Many properties are accessible via generic functions that take a UProperty selector.
|
||||||
|
* - u_hasBinaryProperty() returns a binary value (TRUE/FALSE) per property and code point.
|
||||||
|
* - u_getIntPropertyValue() returns an integer value per property and code point.
|
||||||
|
* For each supported enumerated or catalog property, there is
|
||||||
|
* an enum type for all of the property's values, and
|
||||||
|
* u_getIntPropertyValue() returns the numeric values of those constants.
|
||||||
|
* - u_getBinaryPropertySet() returns a set for each ICU-supported binary property with
|
||||||
|
* all code points for which the property is true.
|
||||||
|
* - u_getIntPropertyMap() returns a map for each
|
||||||
|
* ICU-supported enumerated/catalog/int-valued property which
|
||||||
|
* maps all Unicode code points to their values for that property.
|
||||||
|
*
|
||||||
* Many functions are designed to match java.lang.Character functions.
|
* Many functions are designed to match java.lang.Character functions.
|
||||||
* See the individual function documentation,
|
* See the individual function documentation,
|
||||||
* and see the JDK 1.4 java.lang.Character documentation
|
* and see the JDK 1.4 java.lang.Character documentation
|
||||||
@ -2519,6 +2549,7 @@ typedef enum UVerticalOrientation {
|
|||||||
* does not have data for the property at all, or not for this code point.
|
* does not have data for the property at all, or not for this code point.
|
||||||
*
|
*
|
||||||
* @see UProperty
|
* @see UProperty
|
||||||
|
* @see u_getBinaryPropertySet
|
||||||
* @see u_getIntPropertyValue
|
* @see u_getIntPropertyValue
|
||||||
* @see u_getUnicodeVersion
|
* @see u_getUnicodeVersion
|
||||||
* @stable ICU 2.1
|
* @stable ICU 2.1
|
||||||
@ -2526,6 +2557,27 @@ typedef enum UVerticalOrientation {
|
|||||||
U_STABLE UBool U_EXPORT2
|
U_STABLE UBool U_EXPORT2
|
||||||
u_hasBinaryProperty(UChar32 c, UProperty which);
|
u_hasBinaryProperty(UChar32 c, UProperty which);
|
||||||
|
|
||||||
|
#ifndef U_HIDE_DRAFT_API
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a frozen USet for a binary property.
|
||||||
|
* The library retains ownership over the returned object.
|
||||||
|
* Sets an error code if the property number is not one for a binary property.
|
||||||
|
*
|
||||||
|
* The returned set contains all code points for which the property is true.
|
||||||
|
*
|
||||||
|
* @param property UCHAR_BINARY_START..UCHAR_BINARY_LIMIT-1
|
||||||
|
* @param pErrorCode an in/out ICU UErrorCode
|
||||||
|
* @return the property as a set
|
||||||
|
* @see UProperty
|
||||||
|
* @see u_hasBinaryProperty
|
||||||
|
* @see Unicode::fromUSet
|
||||||
|
*/
|
||||||
|
U_CAPI const USet * U_EXPORT2
|
||||||
|
u_getBinaryPropertySet(UProperty property, UErrorCode *pErrorCode);
|
||||||
|
|
||||||
|
#endif // U_HIDE_DRAFT_API
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Check if a code point has the Alphabetic Unicode property.
|
* Check if a code point has the Alphabetic Unicode property.
|
||||||
* Same as u_hasBinaryProperty(c, UCHAR_ALPHABETIC).
|
* Same as u_hasBinaryProperty(c, UCHAR_ALPHABETIC).
|
||||||
@ -2626,6 +2678,7 @@ u_isUWhiteSpace(UChar32 c);
|
|||||||
* @see u_hasBinaryProperty
|
* @see u_hasBinaryProperty
|
||||||
* @see u_getIntPropertyMinValue
|
* @see u_getIntPropertyMinValue
|
||||||
* @see u_getIntPropertyMaxValue
|
* @see u_getIntPropertyMaxValue
|
||||||
|
* @see u_getIntPropertyMap
|
||||||
* @see u_getUnicodeVersion
|
* @see u_getUnicodeVersion
|
||||||
* @stable ICU 2.2
|
* @stable ICU 2.2
|
||||||
*/
|
*/
|
||||||
@ -2682,6 +2735,27 @@ u_getIntPropertyMinValue(UProperty which);
|
|||||||
U_STABLE int32_t U_EXPORT2
|
U_STABLE int32_t U_EXPORT2
|
||||||
u_getIntPropertyMaxValue(UProperty which);
|
u_getIntPropertyMaxValue(UProperty which);
|
||||||
|
|
||||||
|
#ifndef U_HIDE_DRAFT_API
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns an immutable UCPMap for an enumerated/catalog/int-valued property.
|
||||||
|
* The library retains ownership over the returned object.
|
||||||
|
* Sets an error code if the property number is not one for an "int property".
|
||||||
|
*
|
||||||
|
* The returned object maps all Unicode code points to their values for that property.
|
||||||
|
* For documentation of the integer values see u_getIntPropertyValue().
|
||||||
|
*
|
||||||
|
* @param property UCHAR_INT_START..UCHAR_INT_LIMIT-1
|
||||||
|
* @param pErrorCode an in/out ICU UErrorCode
|
||||||
|
* @return the property as a map
|
||||||
|
* @see UProperty
|
||||||
|
* @see u_getIntPropertyValue
|
||||||
|
*/
|
||||||
|
U_CAPI const UCPMap * U_EXPORT2
|
||||||
|
u_getIntPropertyMap(UProperty property, UErrorCode *pErrorCode);
|
||||||
|
|
||||||
|
#endif // U_HIDE_DRAFT_API
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get the numeric value for a Unicode code point as defined in the
|
* Get the numeric value for a Unicode code point as defined in the
|
||||||
* Unicode Character Database.
|
* Unicode Character Database.
|
||||||
|
@ -53,19 +53,18 @@
|
|||||||
#include "unicode/uenum.h"
|
#include "unicode/uenum.h"
|
||||||
#include "unicode/localpointer.h"
|
#include "unicode/localpointer.h"
|
||||||
|
|
||||||
#ifndef __USET_H__
|
#if !defined(USET_DEFINED) && !defined(U_IN_DOXYGEN)
|
||||||
|
|
||||||
|
#define USET_DEFINED
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* USet is the C API type for Unicode sets.
|
* USet is the C API type corresponding to C++ class UnicodeSet.
|
||||||
* It is forward-declared here to avoid including the header file if related
|
* It is forward-declared here to avoid including unicode/uset.h file if related
|
||||||
* conversion APIs are not used.
|
* conversion APIs are not used.
|
||||||
* See unicode/uset.h
|
|
||||||
*
|
*
|
||||||
* @see ucnv_getUnicodeSet
|
* @see ucnv_getUnicodeSet
|
||||||
* @stable ICU 2.6
|
* @stable ICU 2.4
|
||||||
*/
|
*/
|
||||||
struct USet;
|
|
||||||
/** @stable ICU 2.6 */
|
|
||||||
typedef struct USet USet;
|
typedef struct USet USet;
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
159
icu4c/source/common/unicode/ucpmap.h
Normal file
159
icu4c/source/common/unicode/ucpmap.h
Normal file
@ -0,0 +1,159 @@
|
|||||||
|
// © 2018 and later: Unicode, Inc. and others.
|
||||||
|
// License & terms of use: http://www.unicode.org/copyright.html
|
||||||
|
|
||||||
|
// ucpmap.h
|
||||||
|
// created: 2018sep03 Markus W. Scherer
|
||||||
|
|
||||||
|
#ifndef __UCPMAP_H__
|
||||||
|
#define __UCPMAP_H__
|
||||||
|
|
||||||
|
#include "unicode/utypes.h"
|
||||||
|
|
||||||
|
#ifndef U_HIDE_DRAFT_API
|
||||||
|
|
||||||
|
U_CDECL_BEGIN
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \file
|
||||||
|
*
|
||||||
|
* This file defines an abstract map from Unicode code points to integer values.
|
||||||
|
*
|
||||||
|
* @see UCPMap
|
||||||
|
* @see UCPTrie
|
||||||
|
* @see UMutableCPTrie
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Abstract map from Unicode code points (U+0000..U+10FFFF) to integer values.
|
||||||
|
*
|
||||||
|
* @see UCPTrie
|
||||||
|
* @see UMutableCPTrie
|
||||||
|
* @draft ICU 63
|
||||||
|
*/
|
||||||
|
typedef struct UCPMap UCPMap;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Selectors for how ucpmap_getRange() etc. should report value ranges overlapping with surrogates.
|
||||||
|
* Most users should use UCPMAP_RANGE_NORMAL.
|
||||||
|
*
|
||||||
|
* @see ucpmap_getRange
|
||||||
|
* @see ucptrie_getRange
|
||||||
|
* @see umutablecptrie_getRange
|
||||||
|
* @draft ICU 63
|
||||||
|
*/
|
||||||
|
enum UCPMapRangeOption {
|
||||||
|
/**
|
||||||
|
* ucpmap_getRange() enumerates all same-value ranges as stored in the map.
|
||||||
|
* Most users should use this option.
|
||||||
|
*/
|
||||||
|
UCPMAP_RANGE_NORMAL,
|
||||||
|
/**
|
||||||
|
* ucpmap_getRange() enumerates all same-value ranges as stored in the map,
|
||||||
|
* except that lead surrogates (U+D800..U+DBFF) are treated as having the
|
||||||
|
* surrogateValue, which is passed to getRange() as a separate parameter.
|
||||||
|
* The surrogateValue is not transformed via filter().
|
||||||
|
* See U_IS_LEAD(c).
|
||||||
|
*
|
||||||
|
* Most users should use UCPMAP_RANGE_NORMAL instead.
|
||||||
|
*
|
||||||
|
* This option is useful for maps that map surrogate code *units* to
|
||||||
|
* special values optimized for UTF-16 string processing
|
||||||
|
* or for special error behavior for unpaired surrogates,
|
||||||
|
* but those values are not to be associated with the lead surrogate code *points*.
|
||||||
|
*/
|
||||||
|
UCPMAP_RANGE_FIXED_LEAD_SURROGATES,
|
||||||
|
/**
|
||||||
|
* ucpmap_getRange() enumerates all same-value ranges as stored in the map,
|
||||||
|
* except that all surrogates (U+D800..U+DFFF) are treated as having the
|
||||||
|
* surrogateValue, which is passed to getRange() as a separate parameter.
|
||||||
|
* The surrogateValue is not transformed via filter().
|
||||||
|
* See U_IS_SURROGATE(c).
|
||||||
|
*
|
||||||
|
* Most users should use UCPMAP_RANGE_NORMAL instead.
|
||||||
|
*
|
||||||
|
* This option is useful for maps that map surrogate code *units* to
|
||||||
|
* special values optimized for UTF-16 string processing
|
||||||
|
* or for special error behavior for unpaired surrogates,
|
||||||
|
* but those values are not to be associated with the lead surrogate code *points*.
|
||||||
|
*/
|
||||||
|
UCPMAP_RANGE_FIXED_ALL_SURROGATES
|
||||||
|
};
|
||||||
|
#ifndef U_IN_DOXYGEN
|
||||||
|
typedef enum UCPMapRangeOption UCPMapRangeOption;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the value for a code point as stored in the map, with range checking.
|
||||||
|
* Returns an implementation-defined error value if c is not in the range 0..U+10FFFF.
|
||||||
|
*
|
||||||
|
* @param map the map
|
||||||
|
* @param c the code point
|
||||||
|
* @return the map value,
|
||||||
|
* or an implementation-defined error value if the code point is not in the range 0..U+10FFFF
|
||||||
|
* @draft ICU 63
|
||||||
|
*/
|
||||||
|
U_CAPI uint32_t U_EXPORT2
|
||||||
|
ucpmap_get(const UCPMap *map, UChar32 c);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Callback function type: Modifies a map value.
|
||||||
|
* Optionally called by ucpmap_getRange()/ucptrie_getRange()/umutablecptrie_getRange().
|
||||||
|
* The modified value will be returned by the getRange function.
|
||||||
|
*
|
||||||
|
* Can be used to ignore some of the value bits,
|
||||||
|
* make a filter for one of several values,
|
||||||
|
* return a value index computed from the map value, etc.
|
||||||
|
*
|
||||||
|
* @param context an opaque pointer, as passed into the getRange function
|
||||||
|
* @param value a value from the map
|
||||||
|
* @return the modified value
|
||||||
|
* @draft ICU 63
|
||||||
|
*/
|
||||||
|
typedef uint32_t U_CALLCONV
|
||||||
|
UCPMapValueFilter(const void *context, uint32_t value);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the last code point such that all those from start to there have the same value.
|
||||||
|
* Can be used to efficiently iterate over all same-value ranges in a map.
|
||||||
|
* (This is normally faster than iterating over code points and get()ting each value,
|
||||||
|
* but much slower than a data structure that stores ranges directly.)
|
||||||
|
*
|
||||||
|
* If the UCPMapValueFilter function pointer is not NULL, then
|
||||||
|
* the value to be delivered is passed through that function, and the return value is the end
|
||||||
|
* of the range where all values are modified to the same actual value.
|
||||||
|
* The value is unchanged if that function pointer is NULL.
|
||||||
|
*
|
||||||
|
* Example:
|
||||||
|
* \code
|
||||||
|
* UChar32 start = 0, end;
|
||||||
|
* uint32_t value;
|
||||||
|
* while ((end = ucpmap_getRange(map, start, UCPMAP_RANGE_NORMAL, 0,
|
||||||
|
* NULL, NULL, &value)) >= 0) {
|
||||||
|
* // Work with the range start..end and its value.
|
||||||
|
* start = end + 1;
|
||||||
|
* }
|
||||||
|
* \endcode
|
||||||
|
*
|
||||||
|
* @param map the map
|
||||||
|
* @param start range start
|
||||||
|
* @param option defines whether surrogates are treated normally,
|
||||||
|
* or as having the surrogateValue; usually UCPMAP_RANGE_NORMAL
|
||||||
|
* @param surrogateValue value for surrogates; ignored if option==UCPMAP_RANGE_NORMAL
|
||||||
|
* @param filter a pointer to a function that may modify the map data value,
|
||||||
|
* or NULL if the values from the map are to be used unmodified
|
||||||
|
* @param context an opaque pointer that is passed on to the filter function
|
||||||
|
* @param pValue if not NULL, receives the value that every code point start..end has;
|
||||||
|
* may have been modified by filter(context, map value)
|
||||||
|
* if that function pointer is not NULL
|
||||||
|
* @return the range end code point, or -1 if start is not a valid code point
|
||||||
|
* @draft ICU 63
|
||||||
|
*/
|
||||||
|
U_CAPI UChar32 U_EXPORT2
|
||||||
|
ucpmap_getRange(const UCPMap *map, UChar32 start,
|
||||||
|
UCPMapRangeOption option, uint32_t surrogateValue,
|
||||||
|
UCPMapValueFilter *filter, const void *context, uint32_t *pValue);
|
||||||
|
|
||||||
|
U_CDECL_END
|
||||||
|
|
||||||
|
#endif // U_HIDE_DRAFT_API
|
||||||
|
#endif
|
@ -8,10 +8,12 @@
|
|||||||
#define __UCPTRIE_H__
|
#define __UCPTRIE_H__
|
||||||
|
|
||||||
#include "unicode/utypes.h"
|
#include "unicode/utypes.h"
|
||||||
|
|
||||||
|
#ifndef U_HIDE_DRAFT_API
|
||||||
|
|
||||||
#include "unicode/localpointer.h"
|
#include "unicode/localpointer.h"
|
||||||
|
#include "unicode/ucpmap.h"
|
||||||
#include "unicode/utf8.h"
|
#include "unicode/utf8.h"
|
||||||
#include "putilimp.h"
|
|
||||||
#include "udataswp.h"
|
|
||||||
|
|
||||||
U_CDECL_BEGIN
|
U_CDECL_BEGIN
|
||||||
|
|
||||||
@ -174,54 +176,6 @@ enum UCPTrieValueWidth {
|
|||||||
typedef enum UCPTrieValueWidth UCPTrieValueWidth;
|
typedef enum UCPTrieValueWidth UCPTrieValueWidth;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/**
|
|
||||||
* Selectors for how ucptrie_getRange() should report value ranges overlapping with surrogates.
|
|
||||||
* Most users should use UCPTRIE_RANGE_NORMAL.
|
|
||||||
*
|
|
||||||
* @see ucptrie_getRange
|
|
||||||
* @draft ICU 63
|
|
||||||
*/
|
|
||||||
enum UCPTrieRangeOption {
|
|
||||||
/**
|
|
||||||
* ucptrie_getRange() enumerates all same-value ranges as stored in the trie.
|
|
||||||
* Most users should use this option.
|
|
||||||
*/
|
|
||||||
UCPTRIE_RANGE_NORMAL,
|
|
||||||
/**
|
|
||||||
* ucptrie_getRange() enumerates all same-value ranges as stored in the trie,
|
|
||||||
* except that lead surrogates (U+D800..U+DBFF) are treated as having the
|
|
||||||
* surrogateValue, which is passed to getRange() as a separate parameter.
|
|
||||||
* The surrogateValue is not transformed via filter().
|
|
||||||
* See U_IS_LEAD(c).
|
|
||||||
*
|
|
||||||
* Most users should use UCPTRIE_RANGE_NORMAL instead.
|
|
||||||
*
|
|
||||||
* This option is useful for tries that map surrogate code *units* to
|
|
||||||
* special values optimized for UTF-16 string processing
|
|
||||||
* or for special error behavior for unpaired surrogates,
|
|
||||||
* but those values are not to be associated with the lead surrogate code *points*.
|
|
||||||
*/
|
|
||||||
UCPTRIE_RANGE_FIXED_LEAD_SURROGATES,
|
|
||||||
/**
|
|
||||||
* ucptrie_getRange() enumerates all same-value ranges as stored in the trie,
|
|
||||||
* except that all surrogates (U+D800..U+DFFF) are treated as having the
|
|
||||||
* surrogateValue, which is passed to getRange() as a separate parameter.
|
|
||||||
* The surrogateValue is not transformed via filter().
|
|
||||||
* See U_IS_SURROGATE(c).
|
|
||||||
*
|
|
||||||
* Most users should use UCPTRIE_RANGE_NORMAL instead.
|
|
||||||
*
|
|
||||||
* This option is useful for tries that map surrogate code *units* to
|
|
||||||
* special values optimized for UTF-16 string processing
|
|
||||||
* or for special error behavior for unpaired surrogates,
|
|
||||||
* but those values are not to be associated with the lead surrogate code *points*.
|
|
||||||
*/
|
|
||||||
UCPTRIE_RANGE_FIXED_ALL_SURROGATES
|
|
||||||
};
|
|
||||||
#ifndef U_IN_DOXYGEN
|
|
||||||
typedef enum UCPTrieRangeOption UCPTrieRangeOption;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Opens a trie from its binary form, stored in 32-bit-aligned memory.
|
* Opens a trie from its binary form, stored in 32-bit-aligned memory.
|
||||||
* Inverse of ucptrie_toBinary().
|
* Inverse of ucptrie_toBinary().
|
||||||
@ -322,30 +276,13 @@ ucptrie_getValueWidth(const UCPTrie *trie);
|
|||||||
U_CAPI uint32_t U_EXPORT2
|
U_CAPI uint32_t U_EXPORT2
|
||||||
ucptrie_get(const UCPTrie *trie, UChar32 c);
|
ucptrie_get(const UCPTrie *trie, UChar32 c);
|
||||||
|
|
||||||
/**
|
|
||||||
* Callback function type: Modifies a trie value.
|
|
||||||
* Optionally called by ucptrie_getRange() or umutablecptrie_getRange().
|
|
||||||
* The modified value will be returned by the getRange function.
|
|
||||||
*
|
|
||||||
* Can be used to ignore some of the value bits,
|
|
||||||
* make a filter for one of several values,
|
|
||||||
* return a value index computed from the trie value, etc.
|
|
||||||
*
|
|
||||||
* @param context an opaque pointer, as passed into the getRange function
|
|
||||||
* @param value a value from the trie
|
|
||||||
* @return the modified value
|
|
||||||
* @draft ICU 63
|
|
||||||
*/
|
|
||||||
typedef uint32_t U_CALLCONV
|
|
||||||
UCPTrieValueFilter(const void *context, uint32_t value);
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the last code point such that all those from start to there have the same value.
|
* Returns the last code point such that all those from start to there have the same value.
|
||||||
* Can be used to efficiently iterate over all same-value ranges in a trie.
|
* Can be used to efficiently iterate over all same-value ranges in a trie.
|
||||||
* (This is normally faster than iterating over code points and get()ting each value,
|
* (This is normally faster than iterating over code points and get()ting each value,
|
||||||
* but much slower than a data structure that stores ranges directly.)
|
* but much slower than a data structure that stores ranges directly.)
|
||||||
*
|
*
|
||||||
* If the UCPTrieValueFilter function pointer is not NULL, then
|
* If the UCPMapValueFilter function pointer is not NULL, then
|
||||||
* the value to be delivered is passed through that function, and the return value is the end
|
* the value to be delivered is passed through that function, and the return value is the end
|
||||||
* of the range where all values are modified to the same actual value.
|
* of the range where all values are modified to the same actual value.
|
||||||
* The value is unchanged if that function pointer is NULL.
|
* The value is unchanged if that function pointer is NULL.
|
||||||
@ -354,7 +291,7 @@ UCPTrieValueFilter(const void *context, uint32_t value);
|
|||||||
* \code
|
* \code
|
||||||
* UChar32 start = 0, end;
|
* UChar32 start = 0, end;
|
||||||
* uint32_t value;
|
* uint32_t value;
|
||||||
* while ((end = ucptrie_getRange(trie, start, UCPTRIE_RANGE_NORMAL, 0,
|
* while ((end = ucptrie_getRange(trie, start, UCPMAP_RANGE_NORMAL, 0,
|
||||||
* NULL, NULL, &value)) >= 0) {
|
* NULL, NULL, &value)) >= 0) {
|
||||||
* // Work with the range start..end and its value.
|
* // Work with the range start..end and its value.
|
||||||
* start = end + 1;
|
* start = end + 1;
|
||||||
@ -364,8 +301,8 @@ UCPTrieValueFilter(const void *context, uint32_t value);
|
|||||||
* @param trie the trie
|
* @param trie the trie
|
||||||
* @param start range start
|
* @param start range start
|
||||||
* @param option defines whether surrogates are treated normally,
|
* @param option defines whether surrogates are treated normally,
|
||||||
* or as having the surrogateValue; usually UCPTRIE_RANGE_NORMAL
|
* or as having the surrogateValue; usually UCPMAP_RANGE_NORMAL
|
||||||
* @param surrogateValue value for surrogates; ignored if option==UCPTRIE_RANGE_NORMAL
|
* @param surrogateValue value for surrogates; ignored if option==UCPMAP_RANGE_NORMAL
|
||||||
* @param filter a pointer to a function that may modify the trie data value,
|
* @param filter a pointer to a function that may modify the trie data value,
|
||||||
* or NULL if the values from the trie are to be used unmodified
|
* or NULL if the values from the trie are to be used unmodified
|
||||||
* @param context an opaque pointer that is passed on to the filter function
|
* @param context an opaque pointer that is passed on to the filter function
|
||||||
@ -377,8 +314,8 @@ UCPTrieValueFilter(const void *context, uint32_t value);
|
|||||||
*/
|
*/
|
||||||
U_CAPI UChar32 U_EXPORT2
|
U_CAPI UChar32 U_EXPORT2
|
||||||
ucptrie_getRange(const UCPTrie *trie, UChar32 start,
|
ucptrie_getRange(const UCPTrie *trie, UChar32 start,
|
||||||
UCPTrieRangeOption option, uint32_t surrogateValue,
|
UCPMapRangeOption option, uint32_t surrogateValue,
|
||||||
UCPTrieValueFilter *filter, const void *context, uint32_t *pValue);
|
UCPMapValueFilter *filter, const void *context, uint32_t *pValue);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Writes a memory-mappable form of the trie into 32-bit aligned memory.
|
* Writes a memory-mappable form of the trie into 32-bit aligned memory.
|
||||||
@ -704,4 +641,5 @@ ucptrie_internalU8PrevIndex(const UCPTrie *trie, UChar32 c,
|
|||||||
U_CDECL_END
|
U_CDECL_END
|
||||||
|
|
||||||
#endif // U_IN_DOXYGEN
|
#endif // U_IN_DOXYGEN
|
||||||
|
#endif // U_HIDE_DRAFT_API
|
||||||
#endif
|
#endif
|
||||||
|
@ -8,11 +8,13 @@
|
|||||||
#define __UMUTABLECPTRIE_H__
|
#define __UMUTABLECPTRIE_H__
|
||||||
|
|
||||||
#include "unicode/utypes.h"
|
#include "unicode/utypes.h"
|
||||||
|
|
||||||
|
#ifndef U_HIDE_DRAFT_API
|
||||||
|
|
||||||
#include "unicode/localpointer.h"
|
#include "unicode/localpointer.h"
|
||||||
|
#include "unicode/ucpmap.h"
|
||||||
#include "unicode/ucptrie.h"
|
#include "unicode/ucptrie.h"
|
||||||
#include "unicode/utf8.h"
|
#include "unicode/utf8.h"
|
||||||
#include "putilimp.h"
|
|
||||||
#include "udataswp.h"
|
|
||||||
|
|
||||||
U_CDECL_BEGIN
|
U_CDECL_BEGIN
|
||||||
|
|
||||||
@ -102,6 +104,18 @@ U_NAMESPACE_END
|
|||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a mutable trie with the same contents as the UCPMap.
|
||||||
|
* You must umutablecptrie_close() the mutable trie once you are done using it.
|
||||||
|
*
|
||||||
|
* @param map the source map
|
||||||
|
* @param pErrorCode an in/out ICU UErrorCode
|
||||||
|
* @return the mutable trie
|
||||||
|
* @draft ICU 63
|
||||||
|
*/
|
||||||
|
U_CAPI UMutableCPTrie * U_EXPORT2
|
||||||
|
umutablecptrie_fromUCPMap(const UCPMap *map, UErrorCode *pErrorCode);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a mutable trie with the same contents as the immutable one.
|
* Creates a mutable trie with the same contents as the immutable one.
|
||||||
* You must umutablecptrie_close() the mutable trie once you are done using it.
|
* You must umutablecptrie_close() the mutable trie once you are done using it.
|
||||||
@ -133,7 +147,7 @@ umutablecptrie_get(const UMutableCPTrie *trie, UChar32 c);
|
|||||||
*
|
*
|
||||||
* The trie can be modified between calls to this function.
|
* The trie can be modified between calls to this function.
|
||||||
*
|
*
|
||||||
* If the UCPTrieValueFilter function pointer is not NULL, then
|
* If the UCPMapValueFilter function pointer is not NULL, then
|
||||||
* the value to be delivered is passed through that function, and the return value is the end
|
* the value to be delivered is passed through that function, and the return value is the end
|
||||||
* of the range where all values are modified to the same actual value.
|
* of the range where all values are modified to the same actual value.
|
||||||
* The value is unchanged if that function pointer is NULL.
|
* The value is unchanged if that function pointer is NULL.
|
||||||
@ -143,8 +157,8 @@ umutablecptrie_get(const UMutableCPTrie *trie, UChar32 c);
|
|||||||
* @param trie the trie
|
* @param trie the trie
|
||||||
* @param start range start
|
* @param start range start
|
||||||
* @param option defines whether surrogates are treated normally,
|
* @param option defines whether surrogates are treated normally,
|
||||||
* or as having the surrogateValue; usually UCPTRIE_RANGE_NORMAL
|
* or as having the surrogateValue; usually UCPMAP_RANGE_NORMAL
|
||||||
* @param surrogateValue value for surrogates; ignored if option==UCPTRIE_RANGE_NORMAL
|
* @param surrogateValue value for surrogates; ignored if option==UCPMAP_RANGE_NORMAL
|
||||||
* @param filter a pointer to a function that may modify the trie data value,
|
* @param filter a pointer to a function that may modify the trie data value,
|
||||||
* or NULL if the values from the trie are to be used unmodified
|
* or NULL if the values from the trie are to be used unmodified
|
||||||
* @param context an opaque pointer that is passed on to the filter function
|
* @param context an opaque pointer that is passed on to the filter function
|
||||||
@ -156,8 +170,8 @@ umutablecptrie_get(const UMutableCPTrie *trie, UChar32 c);
|
|||||||
*/
|
*/
|
||||||
U_CAPI UChar32 U_EXPORT2
|
U_CAPI UChar32 U_EXPORT2
|
||||||
umutablecptrie_getRange(const UMutableCPTrie *trie, UChar32 start,
|
umutablecptrie_getRange(const UMutableCPTrie *trie, UChar32 start,
|
||||||
UCPTrieRangeOption option, uint32_t surrogateValue,
|
UCPMapRangeOption option, uint32_t surrogateValue,
|
||||||
UCPTrieValueFilter *filter, const void *context, uint32_t *pValue);
|
UCPMapValueFilter *filter, const void *context, uint32_t *pValue);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sets a value for a code point.
|
* Sets a value for a code point.
|
||||||
@ -223,4 +237,5 @@ umutablecptrie_buildImmutable(UMutableCPTrie *trie, UCPTrieType type, UCPTrieVal
|
|||||||
|
|
||||||
U_CDECL_END
|
U_CDECL_END
|
||||||
|
|
||||||
|
#endif // U_HIDE_DRAFT_API
|
||||||
#endif
|
#endif
|
||||||
|
@ -13,6 +13,7 @@
|
|||||||
#ifndef UNICODESET_H
|
#ifndef UNICODESET_H
|
||||||
#define UNICODESET_H
|
#define UNICODESET_H
|
||||||
|
|
||||||
|
#include "unicode/ucpmap.h"
|
||||||
#include "unicode/unifilt.h"
|
#include "unicode/unifilt.h"
|
||||||
#include "unicode/unistr.h"
|
#include "unicode/unistr.h"
|
||||||
#include "unicode/uset.h"
|
#include "unicode/uset.h"
|
||||||
@ -25,9 +26,8 @@
|
|||||||
U_NAMESPACE_BEGIN
|
U_NAMESPACE_BEGIN
|
||||||
|
|
||||||
// Forward Declarations.
|
// Forward Declarations.
|
||||||
void U_CALLCONV UnicodeSet_initInclusion(int32_t src, UErrorCode &status); /**< @internal */
|
|
||||||
|
|
||||||
class BMPSet;
|
class BMPSet;
|
||||||
|
class CharacterProperties;
|
||||||
class ParsePosition;
|
class ParsePosition;
|
||||||
class RBBIRuleScanner;
|
class RBBIRuleScanner;
|
||||||
class SymbolTable;
|
class SymbolTable;
|
||||||
@ -584,9 +584,8 @@ public:
|
|||||||
//----------------------------------------------------------------
|
//----------------------------------------------------------------
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Make this object represent the range <code>start - end</code>.
|
* Make this object represent the range `start - end`.
|
||||||
* If <code>end > start</code> then this object is set to an
|
* If `end > start` then this object is set to an empty range.
|
||||||
* an empty range.
|
|
||||||
* A frozen set will not be modified.
|
* A frozen set will not be modified.
|
||||||
*
|
*
|
||||||
* @param start first character in the set, inclusive
|
* @param start first character in the set, inclusive
|
||||||
@ -1506,6 +1505,7 @@ private:
|
|||||||
//----------------------------------------------------------------
|
//----------------------------------------------------------------
|
||||||
|
|
||||||
UnicodeSet(const UnicodeSet& o, UBool /* asThawed */);
|
UnicodeSet(const UnicodeSet& o, UBool /* asThawed */);
|
||||||
|
UnicodeSet& copyFrom(const UnicodeSet& o, UBool asThawed);
|
||||||
|
|
||||||
//----------------------------------------------------------------
|
//----------------------------------------------------------------
|
||||||
// Implementation: Pattern parsing
|
// Implementation: Pattern parsing
|
||||||
@ -1614,7 +1614,7 @@ private:
|
|||||||
UnicodeString& rebuiltPat,
|
UnicodeString& rebuiltPat,
|
||||||
UErrorCode& ec);
|
UErrorCode& ec);
|
||||||
|
|
||||||
friend void U_CALLCONV UnicodeSet_initInclusion(int32_t src, UErrorCode &status);
|
friend class CharacterProperties;
|
||||||
static const UnicodeSet* getInclusions(int32_t src, UErrorCode &status);
|
static const UnicodeSet* getInclusions(int32_t src, UErrorCode &status);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -1634,9 +1634,13 @@ private:
|
|||||||
*/
|
*/
|
||||||
void applyFilter(Filter filter,
|
void applyFilter(Filter filter,
|
||||||
void* context,
|
void* context,
|
||||||
int32_t src,
|
const UnicodeSet* inclusions,
|
||||||
UErrorCode &status);
|
UErrorCode &status);
|
||||||
|
|
||||||
|
void applyIntPropertyValue(const UCPMap *map,
|
||||||
|
UCPMapValueFilter *filter, const void *context,
|
||||||
|
UErrorCode &errorCode);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set the new pattern to cache.
|
* Set the new pattern to cache.
|
||||||
*/
|
*/
|
||||||
|
@ -33,10 +33,14 @@
|
|||||||
#include "unicode/uchar.h"
|
#include "unicode/uchar.h"
|
||||||
#include "unicode/localpointer.h"
|
#include "unicode/localpointer.h"
|
||||||
|
|
||||||
#ifndef UCNV_H
|
#ifndef USET_DEFINED
|
||||||
struct USet;
|
|
||||||
|
#ifndef U_IN_DOXYGEN
|
||||||
|
#define USET_DEFINED
|
||||||
|
#endif
|
||||||
/**
|
/**
|
||||||
* A UnicodeSet. Use the uset_* API to manipulate. Create with
|
* USet is the C API type corresponding to C++ class UnicodeSet.
|
||||||
|
* Use the uset_* API to manipulate. Create with
|
||||||
* uset_open*, and destroy with uset_close.
|
* uset_open*, and destroy with uset_close.
|
||||||
* @stable ICU 2.4
|
* @stable ICU 2.4
|
||||||
*/
|
*/
|
||||||
|
@ -276,6 +276,10 @@ UnicodeSet::~UnicodeSet() {
|
|||||||
* Assigns this object to be a copy of another.
|
* Assigns this object to be a copy of another.
|
||||||
*/
|
*/
|
||||||
UnicodeSet& UnicodeSet::operator=(const UnicodeSet& o) {
|
UnicodeSet& UnicodeSet::operator=(const UnicodeSet& o) {
|
||||||
|
return copyFrom(o, FALSE);
|
||||||
|
}
|
||||||
|
|
||||||
|
UnicodeSet& UnicodeSet::copyFrom(const UnicodeSet& o, UBool asThawed) {
|
||||||
if (this == &o) {
|
if (this == &o) {
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
@ -294,7 +298,7 @@ UnicodeSet& UnicodeSet::operator=(const UnicodeSet& o) {
|
|||||||
}
|
}
|
||||||
len = o.len;
|
len = o.len;
|
||||||
uprv_memcpy(list, o.list, (size_t)len*sizeof(UChar32));
|
uprv_memcpy(list, o.list, (size_t)len*sizeof(UChar32));
|
||||||
if (o.bmpSet == NULL) {
|
if (o.bmpSet == NULL || asThawed) {
|
||||||
bmpSet = NULL;
|
bmpSet = NULL;
|
||||||
} else {
|
} else {
|
||||||
bmpSet = new BMPSet(*o.bmpSet, list, len);
|
bmpSet = new BMPSet(*o.bmpSet, list, len);
|
||||||
@ -309,7 +313,7 @@ UnicodeSet& UnicodeSet::operator=(const UnicodeSet& o) {
|
|||||||
setToBogus();
|
setToBogus();
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
if (o.stringSpan == NULL) {
|
if (o.stringSpan == NULL || asThawed) {
|
||||||
stringSpan = NULL;
|
stringSpan = NULL;
|
||||||
} else {
|
} else {
|
||||||
stringSpan = new UnicodeSetStringSpan(*o.stringSpan, *strings);
|
stringSpan = new UnicodeSetStringSpan(*o.stringSpan, *strings);
|
||||||
|
@ -36,8 +36,6 @@
|
|||||||
#include "uprops.h"
|
#include "uprops.h"
|
||||||
#include "propname.h"
|
#include "propname.h"
|
||||||
#include "normalizer2impl.h"
|
#include "normalizer2impl.h"
|
||||||
#include "ucase.h"
|
|
||||||
#include "ubidi_props.h"
|
|
||||||
#include "uinvchar.h"
|
#include "uinvchar.h"
|
||||||
#include "uprops.h"
|
#include "uprops.h"
|
||||||
#include "charstr.h"
|
#include "charstr.h"
|
||||||
@ -98,47 +96,13 @@ static const char ASSIGNED[] = "Assigned"; // [:^Cn:]
|
|||||||
U_CDECL_BEGIN
|
U_CDECL_BEGIN
|
||||||
static UBool U_CALLCONV uset_cleanup();
|
static UBool U_CALLCONV uset_cleanup();
|
||||||
|
|
||||||
struct Inclusion {
|
|
||||||
UnicodeSet *fSet;
|
|
||||||
UInitOnce fInitOnce;
|
|
||||||
};
|
|
||||||
static Inclusion gInclusions[UPROPS_SRC_COUNT]; // cached getInclusions()
|
|
||||||
|
|
||||||
static UnicodeSet *uni32Singleton;
|
static UnicodeSet *uni32Singleton;
|
||||||
static icu::UInitOnce uni32InitOnce = U_INITONCE_INITIALIZER;
|
static icu::UInitOnce uni32InitOnce = U_INITONCE_INITIALIZER;
|
||||||
|
|
||||||
//----------------------------------------------------------------
|
|
||||||
// Inclusions list
|
|
||||||
//----------------------------------------------------------------
|
|
||||||
|
|
||||||
// USetAdder implementation
|
|
||||||
// Does not use uset.h to reduce code dependencies
|
|
||||||
static void U_CALLCONV
|
|
||||||
_set_add(USet *set, UChar32 c) {
|
|
||||||
((UnicodeSet *)set)->add(c);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void U_CALLCONV
|
|
||||||
_set_addRange(USet *set, UChar32 start, UChar32 end) {
|
|
||||||
((UnicodeSet *)set)->add(start, end);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void U_CALLCONV
|
|
||||||
_set_addString(USet *set, const UChar *str, int32_t length) {
|
|
||||||
((UnicodeSet *)set)->add(UnicodeString((UBool)(length<0), str, length));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Cleanup function for UnicodeSet
|
* Cleanup function for UnicodeSet
|
||||||
*/
|
*/
|
||||||
static UBool U_CALLCONV uset_cleanup(void) {
|
static UBool U_CALLCONV uset_cleanup(void) {
|
||||||
for(int32_t i = UPROPS_SRC_NONE; i < UPROPS_SRC_COUNT; ++i) {
|
|
||||||
Inclusion &in = gInclusions[i];
|
|
||||||
delete in.fSet;
|
|
||||||
in.fSet = NULL;
|
|
||||||
in.fInitOnce.reset();
|
|
||||||
}
|
|
||||||
|
|
||||||
delete uni32Singleton;
|
delete uni32Singleton;
|
||||||
uni32Singleton = NULL;
|
uni32Singleton = NULL;
|
||||||
uni32InitOnce.reset();
|
uni32InitOnce.reset();
|
||||||
@ -149,119 +113,6 @@ U_CDECL_END
|
|||||||
|
|
||||||
U_NAMESPACE_BEGIN
|
U_NAMESPACE_BEGIN
|
||||||
|
|
||||||
/*
|
|
||||||
Reduce excessive reallocation, and make it easier to detect initialization problems.
|
|
||||||
Usually you don't see smaller sets than this for Unicode 5.0.
|
|
||||||
*/
|
|
||||||
#define DEFAULT_INCLUSION_CAPACITY 3072
|
|
||||||
|
|
||||||
void U_CALLCONV UnicodeSet_initInclusion(int32_t src, UErrorCode &status) {
|
|
||||||
// This function is invoked only via umtx_initOnce().
|
|
||||||
// This function is a friend of class UnicodeSet.
|
|
||||||
|
|
||||||
U_ASSERT(src >=0 && src<UPROPS_SRC_COUNT);
|
|
||||||
UnicodeSet * &incl = gInclusions[src].fSet;
|
|
||||||
U_ASSERT(incl == NULL);
|
|
||||||
|
|
||||||
incl = new UnicodeSet();
|
|
||||||
if (incl == NULL) {
|
|
||||||
status = U_MEMORY_ALLOCATION_ERROR;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
USetAdder sa = {
|
|
||||||
(USet *)incl,
|
|
||||||
_set_add,
|
|
||||||
_set_addRange,
|
|
||||||
_set_addString,
|
|
||||||
NULL, // don't need remove()
|
|
||||||
NULL // don't need removeRange()
|
|
||||||
};
|
|
||||||
|
|
||||||
incl->ensureCapacity(DEFAULT_INCLUSION_CAPACITY, status);
|
|
||||||
switch(src) {
|
|
||||||
case UPROPS_SRC_CHAR:
|
|
||||||
uchar_addPropertyStarts(&sa, &status);
|
|
||||||
break;
|
|
||||||
case UPROPS_SRC_PROPSVEC:
|
|
||||||
upropsvec_addPropertyStarts(&sa, &status);
|
|
||||||
break;
|
|
||||||
case UPROPS_SRC_CHAR_AND_PROPSVEC:
|
|
||||||
uchar_addPropertyStarts(&sa, &status);
|
|
||||||
upropsvec_addPropertyStarts(&sa, &status);
|
|
||||||
break;
|
|
||||||
#if !UCONFIG_NO_NORMALIZATION
|
|
||||||
case UPROPS_SRC_CASE_AND_NORM: {
|
|
||||||
const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(status);
|
|
||||||
if(U_SUCCESS(status)) {
|
|
||||||
impl->addPropertyStarts(&sa, status);
|
|
||||||
}
|
|
||||||
ucase_addPropertyStarts(&sa, &status);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case UPROPS_SRC_NFC: {
|
|
||||||
const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(status);
|
|
||||||
if(U_SUCCESS(status)) {
|
|
||||||
impl->addPropertyStarts(&sa, status);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case UPROPS_SRC_NFKC: {
|
|
||||||
const Normalizer2Impl *impl=Normalizer2Factory::getNFKCImpl(status);
|
|
||||||
if(U_SUCCESS(status)) {
|
|
||||||
impl->addPropertyStarts(&sa, status);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case UPROPS_SRC_NFKC_CF: {
|
|
||||||
const Normalizer2Impl *impl=Normalizer2Factory::getNFKC_CFImpl(status);
|
|
||||||
if(U_SUCCESS(status)) {
|
|
||||||
impl->addPropertyStarts(&sa, status);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case UPROPS_SRC_NFC_CANON_ITER: {
|
|
||||||
const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(status);
|
|
||||||
if(U_SUCCESS(status)) {
|
|
||||||
impl->addCanonIterPropertyStarts(&sa, status);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
case UPROPS_SRC_CASE:
|
|
||||||
ucase_addPropertyStarts(&sa, &status);
|
|
||||||
break;
|
|
||||||
case UPROPS_SRC_BIDI:
|
|
||||||
ubidi_addPropertyStarts(&sa, &status);
|
|
||||||
break;
|
|
||||||
case UPROPS_SRC_INPC:
|
|
||||||
case UPROPS_SRC_INSC:
|
|
||||||
case UPROPS_SRC_VO:
|
|
||||||
uprops_addPropertyStarts((UPropertySource)src, &sa, &status);
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
status = U_INTERNAL_PROGRAM_ERROR;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (U_FAILURE(status)) {
|
|
||||||
delete incl;
|
|
||||||
incl = NULL;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
// Compact for caching
|
|
||||||
incl->compact();
|
|
||||||
ucln_common_registerCleanup(UCLN_COMMON_USET, uset_cleanup);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
const UnicodeSet* UnicodeSet::getInclusions(int32_t src, UErrorCode &status) {
|
|
||||||
U_ASSERT(src >=0 && src<UPROPS_SRC_COUNT);
|
|
||||||
Inclusion &i = gInclusions[src];
|
|
||||||
umtx_initOnce(i.fInitOnce, &UnicodeSet_initInclusion, src, status);
|
|
||||||
return i.fSet;
|
|
||||||
}
|
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
// Cache some sets for other services -------------------------------------- ***
|
// Cache some sets for other services -------------------------------------- ***
|
||||||
@ -862,11 +713,6 @@ static UBool numericValueFilter(UChar32 ch, void* context) {
|
|||||||
return u_getNumericValue(ch) == *(double*)context;
|
return u_getNumericValue(ch) == *(double*)context;
|
||||||
}
|
}
|
||||||
|
|
||||||
static UBool generalCategoryMaskFilter(UChar32 ch, void* context) {
|
|
||||||
int32_t value = *(int32_t*)context;
|
|
||||||
return (U_GET_GC_MASK((UChar32) ch) & value) != 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static UBool versionFilter(UChar32 ch, void* context) {
|
static UBool versionFilter(UChar32 ch, void* context) {
|
||||||
static const UVersionInfo none = { 0, 0, 0, 0 };
|
static const UVersionInfo none = { 0, 0, 0, 0 };
|
||||||
UVersionInfo v;
|
UVersionInfo v;
|
||||||
@ -875,16 +721,6 @@ static UBool versionFilter(UChar32 ch, void* context) {
|
|||||||
return uprv_memcmp(&v, &none, sizeof(v)) > 0 && uprv_memcmp(&v, version, sizeof(v)) <= 0;
|
return uprv_memcmp(&v, &none, sizeof(v)) > 0 && uprv_memcmp(&v, version, sizeof(v)) <= 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
UProperty prop;
|
|
||||||
int32_t value;
|
|
||||||
} IntPropertyContext;
|
|
||||||
|
|
||||||
static UBool intPropertyFilter(UChar32 ch, void* context) {
|
|
||||||
IntPropertyContext* c = (IntPropertyContext*)context;
|
|
||||||
return u_getIntPropertyValue((UChar32) ch, c->prop) == c->value;
|
|
||||||
}
|
|
||||||
|
|
||||||
static UBool scriptExtensionsFilter(UChar32 ch, void* context) {
|
static UBool scriptExtensionsFilter(UChar32 ch, void* context) {
|
||||||
return uscript_hasScript(ch, *(UScriptCode*)context);
|
return uscript_hasScript(ch, *(UScriptCode*)context);
|
||||||
}
|
}
|
||||||
@ -896,7 +732,7 @@ static UBool scriptExtensionsFilter(UChar32 ch, void* context) {
|
|||||||
*/
|
*/
|
||||||
void UnicodeSet::applyFilter(UnicodeSet::Filter filter,
|
void UnicodeSet::applyFilter(UnicodeSet::Filter filter,
|
||||||
void* context,
|
void* context,
|
||||||
int32_t src,
|
const UnicodeSet* inclusions,
|
||||||
UErrorCode &status) {
|
UErrorCode &status) {
|
||||||
if (U_FAILURE(status)) return;
|
if (U_FAILURE(status)) return;
|
||||||
|
|
||||||
@ -907,12 +743,8 @@ void UnicodeSet::applyFilter(UnicodeSet::Filter filter,
|
|||||||
// To improve performance, use an inclusions set which
|
// To improve performance, use an inclusions set which
|
||||||
// encodes information about character ranges that are known
|
// encodes information about character ranges that are known
|
||||||
// to have identical properties.
|
// to have identical properties.
|
||||||
// getInclusions(src) contains exactly the first characters of
|
// inclusions contains the first characters of
|
||||||
// same-value ranges for the given properties "source".
|
// same-value ranges for the given property.
|
||||||
const UnicodeSet* inclusions = getInclusions(src, status);
|
|
||||||
if (U_FAILURE(status)) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
clear();
|
clear();
|
||||||
|
|
||||||
@ -949,6 +781,43 @@ void UnicodeSet::applyFilter(UnicodeSet::Filter filter,
|
|||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
|
/** Maps map values to 1 if the mask contains their value'th bit, all others to 0. */
|
||||||
|
uint32_t U_CALLCONV generalCategoryMaskFilter(const void *context, uint32_t value) {
|
||||||
|
uint32_t mask = *(const uint32_t *)context;
|
||||||
|
value = U_MASK(value) & mask;
|
||||||
|
if (value != 0) { value = 1; }
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Maps one map value to 1, all others to 0. */
|
||||||
|
uint32_t U_CALLCONV intValueFilter(const void *context, uint32_t value) {
|
||||||
|
uint32_t v = *(const uint32_t *)context;
|
||||||
|
return value == v ? 1 : 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
void UnicodeSet::applyIntPropertyValue(const UCPMap *map,
|
||||||
|
UCPMapValueFilter *filter, const void *context,
|
||||||
|
UErrorCode &errorCode) {
|
||||||
|
if (U_FAILURE(errorCode)) { return; }
|
||||||
|
clear();
|
||||||
|
UChar32 start = 0, end;
|
||||||
|
uint32_t value;
|
||||||
|
while ((end = ucpmap_getRange(map, start, UCPMAP_RANGE_NORMAL, 0,
|
||||||
|
filter, context, &value)) >= 0) {
|
||||||
|
if (value != 0) {
|
||||||
|
add(start, end);
|
||||||
|
}
|
||||||
|
start = end + 1;
|
||||||
|
}
|
||||||
|
if (isBogus()) {
|
||||||
|
errorCode = U_MEMORY_ALLOCATION_ERROR;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
static UBool mungeCharName(char* dst, const char* src, int32_t dstCapacity) {
|
static UBool mungeCharName(char* dst, const char* src, int32_t dstCapacity) {
|
||||||
/* Note: we use ' ' in compiler code page */
|
/* Note: we use ' ' in compiler code page */
|
||||||
int32_t j = 0;
|
int32_t j = 0;
|
||||||
@ -976,16 +845,35 @@ static UBool mungeCharName(char* dst, const char* src, int32_t dstCapacity) {
|
|||||||
|
|
||||||
UnicodeSet&
|
UnicodeSet&
|
||||||
UnicodeSet::applyIntPropertyValue(UProperty prop, int32_t value, UErrorCode& ec) {
|
UnicodeSet::applyIntPropertyValue(UProperty prop, int32_t value, UErrorCode& ec) {
|
||||||
if (U_FAILURE(ec) || isFrozen()) return *this;
|
if (U_FAILURE(ec)) { return *this; }
|
||||||
|
// All of the following check isFrozen() before modifying this set.
|
||||||
if (prop == UCHAR_GENERAL_CATEGORY_MASK) {
|
if (prop == UCHAR_GENERAL_CATEGORY_MASK) {
|
||||||
applyFilter(generalCategoryMaskFilter, &value, UPROPS_SRC_CHAR, ec);
|
const UCPMap *map = u_getIntPropertyMap(UCHAR_GENERAL_CATEGORY, &ec);
|
||||||
|
applyIntPropertyValue(map, generalCategoryMaskFilter, &value, ec);
|
||||||
} else if (prop == UCHAR_SCRIPT_EXTENSIONS) {
|
} else if (prop == UCHAR_SCRIPT_EXTENSIONS) {
|
||||||
|
const UnicodeSet* inclusions = CharacterProperties::getInclusionsForProperty(prop, ec);
|
||||||
UScriptCode script = (UScriptCode)value;
|
UScriptCode script = (UScriptCode)value;
|
||||||
applyFilter(scriptExtensionsFilter, &script, UPROPS_SRC_PROPSVEC, ec);
|
applyFilter(scriptExtensionsFilter, &script, inclusions, ec);
|
||||||
|
} else if (0 <= prop && prop < UCHAR_BINARY_LIMIT) {
|
||||||
|
if (value == 0 || value == 1) {
|
||||||
|
const USet *set = u_getBinaryPropertySet(prop, &ec);
|
||||||
|
if (U_FAILURE(ec)) { return *this; }
|
||||||
|
copyFrom(*UnicodeSet::fromUSet(set), TRUE);
|
||||||
|
if (value == 0) {
|
||||||
|
complement();
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
clear();
|
||||||
|
}
|
||||||
|
} else if (UCHAR_INT_START <= prop && prop < UCHAR_INT_LIMIT) {
|
||||||
|
const UCPMap *map = u_getIntPropertyMap(prop, &ec);
|
||||||
|
applyIntPropertyValue(map, intValueFilter, &value, ec);
|
||||||
} else {
|
} else {
|
||||||
IntPropertyContext c = {prop, value};
|
// This code used to always call getInclusions(property source)
|
||||||
applyFilter(intPropertyFilter, &c, uprops_getSource(prop), ec);
|
// which sets an error for an unsupported property.
|
||||||
|
ec = U_ILLEGAL_ARGUMENT_ERROR;
|
||||||
|
// Otherwise we would just clear() this set because
|
||||||
|
// getIntPropertyValue(c, prop) returns 0 for all code points.
|
||||||
}
|
}
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
@ -1061,7 +949,8 @@ UnicodeSet::applyPropertyAlias(const UnicodeString& prop,
|
|||||||
if (*end != 0) {
|
if (*end != 0) {
|
||||||
FAIL(ec);
|
FAIL(ec);
|
||||||
}
|
}
|
||||||
applyFilter(numericValueFilter, &val, UPROPS_SRC_CHAR, ec);
|
applyFilter(numericValueFilter, &val,
|
||||||
|
CharacterProperties::getInclusionsForProperty(p, ec), ec);
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
case UCHAR_NAME:
|
case UCHAR_NAME:
|
||||||
@ -1090,7 +979,8 @@ UnicodeSet::applyPropertyAlias(const UnicodeString& prop,
|
|||||||
if (!mungeCharName(buf, vname.data(), sizeof(buf))) FAIL(ec);
|
if (!mungeCharName(buf, vname.data(), sizeof(buf))) FAIL(ec);
|
||||||
UVersionInfo version;
|
UVersionInfo version;
|
||||||
u_versionFromString(version, buf);
|
u_versionFromString(version, buf);
|
||||||
applyFilter(versionFilter, &version, UPROPS_SRC_PROPSVEC, ec);
|
applyFilter(versionFilter, &version,
|
||||||
|
CharacterProperties::getInclusionsForProperty(p, ec), ec);
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
case UCHAR_SCRIPT_EXTENSIONS:
|
case UCHAR_SCRIPT_EXTENSIONS:
|
||||||
|
@ -605,7 +605,7 @@ uprops_addPropertyStarts(UPropertySource src, const USetAdder *sa, UErrorCode *p
|
|||||||
|
|
||||||
// Add the start code point of each same-value range of the trie.
|
// Add the start code point of each same-value range of the trie.
|
||||||
UChar32 start = 0, end;
|
UChar32 start = 0, end;
|
||||||
while ((end = ucptrie_getRange(trie, start, UCPTRIE_RANGE_NORMAL, 0,
|
while ((end = ucptrie_getRange(trie, start, UCPMAP_RANGE_NORMAL, 0,
|
||||||
nullptr, nullptr, nullptr)) >= 0) {
|
nullptr, nullptr, nullptr)) >= 0) {
|
||||||
sa->add(sa->set, start);
|
sa->add(sa->set, start);
|
||||||
start = end + 1;
|
start = end + 1;
|
||||||
|
@ -459,6 +459,13 @@ U_NAMESPACE_BEGIN
|
|||||||
|
|
||||||
class UnicodeSet;
|
class UnicodeSet;
|
||||||
|
|
||||||
|
class CharacterProperties {
|
||||||
|
public:
|
||||||
|
CharacterProperties() = delete;
|
||||||
|
static void U_CALLCONV initInclusion(UPropertySource src, UErrorCode &errorCode);
|
||||||
|
static const UnicodeSet *getInclusionsForProperty(UProperty prop, UErrorCode &errorCode);
|
||||||
|
};
|
||||||
|
|
||||||
// implemented in uniset_props.cpp
|
// implemented in uniset_props.cpp
|
||||||
U_CFUNC UnicodeSet *
|
U_CFUNC UnicodeSet *
|
||||||
uniset_getUnicode32Instance(UErrorCode &errorCode);
|
uniset_getUnicode32Instance(UErrorCode &errorCode);
|
||||||
|
@ -61,6 +61,8 @@ static void TestPropertyNames(void);
|
|||||||
static void TestPropertyValues(void);
|
static void TestPropertyValues(void);
|
||||||
static void TestConsistency(void);
|
static void TestConsistency(void);
|
||||||
static void TestCaseFolding(void);
|
static void TestCaseFolding(void);
|
||||||
|
static void TestBinaryCharacterPropertiesAPI(void);
|
||||||
|
static void TestIntCharacterPropertiesAPI(void);
|
||||||
|
|
||||||
/* internal methods used */
|
/* internal methods used */
|
||||||
static int32_t MakeProp(char* str);
|
static int32_t MakeProp(char* str);
|
||||||
@ -196,6 +198,10 @@ void addUnicodeTest(TestNode** root)
|
|||||||
addTest(root, &TestPropertyValues, "tsutil/cucdtst/TestPropertyValues");
|
addTest(root, &TestPropertyValues, "tsutil/cucdtst/TestPropertyValues");
|
||||||
addTest(root, &TestConsistency, "tsutil/cucdtst/TestConsistency");
|
addTest(root, &TestConsistency, "tsutil/cucdtst/TestConsistency");
|
||||||
addTest(root, &TestCaseFolding, "tsutil/cucdtst/TestCaseFolding");
|
addTest(root, &TestCaseFolding, "tsutil/cucdtst/TestCaseFolding");
|
||||||
|
addTest(root, &TestBinaryCharacterPropertiesAPI,
|
||||||
|
"tsutil/cucdtst/TestBinaryCharacterPropertiesAPI");
|
||||||
|
addTest(root, &TestIntCharacterPropertiesAPI,
|
||||||
|
"tsutil/cucdtst/TestIntCharacterPropertiesAPI");
|
||||||
}
|
}
|
||||||
|
|
||||||
/*==================================================== */
|
/*==================================================== */
|
||||||
@ -3522,3 +3528,41 @@ TestCaseFolding() {
|
|||||||
|
|
||||||
uset_close(data.notSeen);
|
uset_close(data.notSeen);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void TestBinaryCharacterPropertiesAPI() {
|
||||||
|
// API test only. See intltest/ucdtest.cpp for functional test.
|
||||||
|
UErrorCode errorCode = U_ZERO_ERROR;
|
||||||
|
const USet *set = u_getBinaryPropertySet(-1, &errorCode);
|
||||||
|
if (U_SUCCESS(errorCode)) {
|
||||||
|
log_err("u_getBinaryPropertySet(-1) did not fail\n");
|
||||||
|
}
|
||||||
|
errorCode = U_ZERO_ERROR;
|
||||||
|
set = u_getBinaryPropertySet(UCHAR_BINARY_LIMIT, &errorCode);
|
||||||
|
if (U_SUCCESS(errorCode)) {
|
||||||
|
log_err("u_getBinaryPropertySet(UCHAR_BINARY_LIMIT) did not fail\n");
|
||||||
|
}
|
||||||
|
errorCode = U_ZERO_ERROR;
|
||||||
|
set = u_getBinaryPropertySet(UCHAR_WHITE_SPACE, &errorCode);
|
||||||
|
if (!uset_contains(set, 0x20) || uset_contains(set, 0x61)) {
|
||||||
|
log_err("u_getBinaryPropertySet(UCHAR_WHITE_SPACE) wrong contents\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void TestIntCharacterPropertiesAPI() {
|
||||||
|
// API test only. See intltest/ucdtest.cpp for functional test.
|
||||||
|
UErrorCode errorCode = U_ZERO_ERROR;
|
||||||
|
const UCPMap *map = u_getIntPropertyMap(UCHAR_INT_START - 1, &errorCode);
|
||||||
|
if (U_SUCCESS(errorCode)) {
|
||||||
|
log_err("u_getIntPropertyMap(UCHAR_INT_START - 1) did not fail\n");
|
||||||
|
}
|
||||||
|
errorCode = U_ZERO_ERROR;
|
||||||
|
map = u_getIntPropertyMap(UCHAR_INT_LIMIT, &errorCode);
|
||||||
|
if (U_SUCCESS(errorCode)) {
|
||||||
|
log_err("u_getIntPropertyMap(UCHAR_INT_LIMIT) did not fail\n");
|
||||||
|
}
|
||||||
|
errorCode = U_ZERO_ERROR;
|
||||||
|
map = u_getIntPropertyMap(UCHAR_GENERAL_CATEGORY, &errorCode);
|
||||||
|
if (ucpmap_get(map, 0x20) != U_SPACE_SEPARATOR || ucpmap_get(map, 0x23456) != U_OTHER_LETTER) {
|
||||||
|
log_err("u_getIntPropertyMap(UCHAR_GENERAL_CATEGORY) wrong contents\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -107,11 +107,11 @@ static UChar32 iterStarts[] = {
|
|||||||
|
|
||||||
static void
|
static void
|
||||||
testTrieGetRanges(const char *testName, const UCPTrie *trie, const UMutableCPTrie *mutableTrie,
|
testTrieGetRanges(const char *testName, const UCPTrie *trie, const UMutableCPTrie *mutableTrie,
|
||||||
UCPTrieRangeOption option, uint32_t surrValue,
|
UCPMapRangeOption option, uint32_t surrValue,
|
||||||
const CheckRange checkRanges[], int32_t countCheckRanges) {
|
const CheckRange checkRanges[], int32_t countCheckRanges) {
|
||||||
const char *const typeName = trie == NULL ? "mutableTrie" : "trie";
|
const char *const typeName = trie == NULL ? "mutableTrie" : "trie";
|
||||||
const char *const optionName = option == UCPTRIE_RANGE_NORMAL ? "normal" :
|
const char *const optionName = option == UCPMAP_RANGE_NORMAL ? "normal" :
|
||||||
option == UCPTRIE_RANGE_FIXED_LEAD_SURROGATES ? "fixedLeadSurr" : "fixedAllSurr";
|
option == UCPMAP_RANGE_FIXED_LEAD_SURROGATES ? "fixedLeadSurr" : "fixedAllSurr";
|
||||||
char name[80];
|
char name[80];
|
||||||
int32_t s;
|
int32_t s;
|
||||||
for (s = 0; s < UPRV_LENGTHOF(iterStarts); ++s) {
|
for (s = 0; s < UPRV_LENGTHOF(iterStarts); ++s) {
|
||||||
@ -690,7 +690,7 @@ testTrie(const char *testName, const UCPTrie *trie,
|
|||||||
UCPTrieType type, UCPTrieValueWidth valueWidth,
|
UCPTrieType type, UCPTrieValueWidth valueWidth,
|
||||||
const CheckRange checkRanges[], int32_t countCheckRanges) {
|
const CheckRange checkRanges[], int32_t countCheckRanges) {
|
||||||
testTrieGetters(testName, trie, type, valueWidth, checkRanges, countCheckRanges);
|
testTrieGetters(testName, trie, type, valueWidth, checkRanges, countCheckRanges);
|
||||||
testTrieGetRanges(testName, trie, NULL, UCPTRIE_RANGE_NORMAL, 0, checkRanges, countCheckRanges);
|
testTrieGetRanges(testName, trie, NULL, UCPMAP_RANGE_NORMAL, 0, checkRanges, countCheckRanges);
|
||||||
if (type == UCPTRIE_TYPE_FAST) {
|
if (type == UCPTRIE_TYPE_FAST) {
|
||||||
testTrieUTF16(testName, trie, valueWidth, checkRanges, countCheckRanges);
|
testTrieUTF16(testName, trie, valueWidth, checkRanges, countCheckRanges);
|
||||||
testTrieUTF8(testName, trie, valueWidth, checkRanges, countCheckRanges);
|
testTrieUTF8(testName, trie, valueWidth, checkRanges, countCheckRanges);
|
||||||
@ -701,7 +701,7 @@ static void
|
|||||||
testBuilder(const char *testName, const UMutableCPTrie *mutableTrie,
|
testBuilder(const char *testName, const UMutableCPTrie *mutableTrie,
|
||||||
const CheckRange checkRanges[], int32_t countCheckRanges) {
|
const CheckRange checkRanges[], int32_t countCheckRanges) {
|
||||||
testBuilderGetters(testName, mutableTrie, checkRanges, countCheckRanges);
|
testBuilderGetters(testName, mutableTrie, checkRanges, countCheckRanges);
|
||||||
testTrieGetRanges(testName, NULL, mutableTrie, UCPTRIE_RANGE_NORMAL, 0, checkRanges, countCheckRanges);
|
testTrieGetRanges(testName, NULL, mutableTrie, UCPMAP_RANGE_NORMAL, 0, checkRanges, countCheckRanges);
|
||||||
}
|
}
|
||||||
|
|
||||||
static uint32_t storage[120000];
|
static uint32_t storage[120000];
|
||||||
@ -1366,7 +1366,7 @@ MuchDataTest(void) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void testGetRangesFixedSurr(const char *testName, const UMutableCPTrie *mutableTrie,
|
static void testGetRangesFixedSurr(const char *testName, const UMutableCPTrie *mutableTrie,
|
||||||
UCPTrieRangeOption option,
|
UCPMapRangeOption option,
|
||||||
const CheckRange checkRanges[], int32_t countCheckRanges) {
|
const CheckRange checkRanges[], int32_t countCheckRanges) {
|
||||||
testTrieGetRanges(testName, NULL, mutableTrie, option, 5, checkRanges, countCheckRanges);
|
testTrieGetRanges(testName, NULL, mutableTrie, option, 5, checkRanges, countCheckRanges);
|
||||||
UErrorCode errorCode = U_ZERO_ERROR;
|
UErrorCode errorCode = U_ZERO_ERROR;
|
||||||
@ -1454,9 +1454,9 @@ TrieTestGetRangesFixedSurr(void) {
|
|||||||
if (mutableTrie == NULL) {
|
if (mutableTrie == NULL) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
testGetRangesFixedSurr("fixedLeadSurr1", mutableTrie, UCPTRIE_RANGE_FIXED_LEAD_SURROGATES,
|
testGetRangesFixedSurr("fixedLeadSurr1", mutableTrie, UCPMAP_RANGE_FIXED_LEAD_SURROGATES,
|
||||||
checkRangesFixedLeadSurr1, UPRV_LENGTHOF(checkRangesFixedLeadSurr1));
|
checkRangesFixedLeadSurr1, UPRV_LENGTHOF(checkRangesFixedLeadSurr1));
|
||||||
testGetRangesFixedSurr("fixedAllSurr1", mutableTrie, UCPTRIE_RANGE_FIXED_ALL_SURROGATES,
|
testGetRangesFixedSurr("fixedAllSurr1", mutableTrie, UCPMAP_RANGE_FIXED_ALL_SURROGATES,
|
||||||
checkRangesFixedAllSurr1, UPRV_LENGTHOF(checkRangesFixedAllSurr1));
|
checkRangesFixedAllSurr1, UPRV_LENGTHOF(checkRangesFixedAllSurr1));
|
||||||
// Setting a range in the middle of lead surrogates makes no difference.
|
// Setting a range in the middle of lead surrogates makes no difference.
|
||||||
umutablecptrie_setRange(mutableTrie, 0xd844, 0xd899, 5, &errorCode);
|
umutablecptrie_setRange(mutableTrie, 0xd844, 0xd899, 5, &errorCode);
|
||||||
@ -1465,7 +1465,7 @@ TrieTestGetRangesFixedSurr(void) {
|
|||||||
umutablecptrie_close(mutableTrie);
|
umutablecptrie_close(mutableTrie);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
testGetRangesFixedSurr("fixedLeadSurr2", mutableTrie, UCPTRIE_RANGE_FIXED_LEAD_SURROGATES,
|
testGetRangesFixedSurr("fixedLeadSurr2", mutableTrie, UCPMAP_RANGE_FIXED_LEAD_SURROGATES,
|
||||||
checkRangesFixedLeadSurr1, UPRV_LENGTHOF(checkRangesFixedLeadSurr1));
|
checkRangesFixedLeadSurr1, UPRV_LENGTHOF(checkRangesFixedLeadSurr1));
|
||||||
// Bridge the gap before the lead surrogates.
|
// Bridge the gap before the lead surrogates.
|
||||||
umutablecptrie_set(mutableTrie, 0xd7ff, 5, &errorCode);
|
umutablecptrie_set(mutableTrie, 0xd7ff, 5, &errorCode);
|
||||||
@ -1474,9 +1474,9 @@ TrieTestGetRangesFixedSurr(void) {
|
|||||||
umutablecptrie_close(mutableTrie);
|
umutablecptrie_close(mutableTrie);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
testGetRangesFixedSurr("fixedLeadSurr3", mutableTrie, UCPTRIE_RANGE_FIXED_LEAD_SURROGATES,
|
testGetRangesFixedSurr("fixedLeadSurr3", mutableTrie, UCPMAP_RANGE_FIXED_LEAD_SURROGATES,
|
||||||
checkRangesFixedLeadSurr3, UPRV_LENGTHOF(checkRangesFixedLeadSurr3));
|
checkRangesFixedLeadSurr3, UPRV_LENGTHOF(checkRangesFixedLeadSurr3));
|
||||||
testGetRangesFixedSurr("fixedAllSurr3", mutableTrie, UCPTRIE_RANGE_FIXED_ALL_SURROGATES,
|
testGetRangesFixedSurr("fixedAllSurr3", mutableTrie, UCPMAP_RANGE_FIXED_ALL_SURROGATES,
|
||||||
checkRangesFixedAllSurr3, UPRV_LENGTHOF(checkRangesFixedAllSurr3));
|
checkRangesFixedAllSurr3, UPRV_LENGTHOF(checkRangesFixedAllSurr3));
|
||||||
// Bridge the gap after the trail surrogates.
|
// Bridge the gap after the trail surrogates.
|
||||||
umutablecptrie_set(mutableTrie, 0xe000, 5, &errorCode);
|
umutablecptrie_set(mutableTrie, 0xe000, 5, &errorCode);
|
||||||
@ -1485,7 +1485,7 @@ TrieTestGetRangesFixedSurr(void) {
|
|||||||
umutablecptrie_close(mutableTrie);
|
umutablecptrie_close(mutableTrie);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
testGetRangesFixedSurr("fixedSurr4", mutableTrie, UCPTRIE_RANGE_FIXED_ALL_SURROGATES,
|
testGetRangesFixedSurr("fixedSurr4", mutableTrie, UCPMAP_RANGE_FIXED_ALL_SURROGATES,
|
||||||
checkRangesFixedSurr4, UPRV_LENGTHOF(checkRangesFixedSurr4));
|
checkRangesFixedSurr4, UPRV_LENGTHOF(checkRangesFixedSurr4));
|
||||||
umutablecptrie_close(mutableTrie);
|
umutablecptrie_close(mutableTrie);
|
||||||
}
|
}
|
||||||
|
@ -7,13 +7,16 @@
|
|||||||
|
|
||||||
#include "unicode/ustring.h"
|
#include "unicode/ustring.h"
|
||||||
#include "unicode/uchar.h"
|
#include "unicode/uchar.h"
|
||||||
|
#include "unicode/ucpmap.h"
|
||||||
#include "unicode/uniset.h"
|
#include "unicode/uniset.h"
|
||||||
#include "unicode/putil.h"
|
#include "unicode/putil.h"
|
||||||
#include "unicode/uscript.h"
|
#include "unicode/uscript.h"
|
||||||
|
#include "unicode/uset.h"
|
||||||
#include "cstring.h"
|
#include "cstring.h"
|
||||||
#include "hash.h"
|
#include "hash.h"
|
||||||
#include "patternprops.h"
|
#include "patternprops.h"
|
||||||
#include "normalizer2impl.h"
|
#include "normalizer2impl.h"
|
||||||
|
#include "testutil.h"
|
||||||
#include "uparse.h"
|
#include "uparse.h"
|
||||||
#include "ucdtest.h"
|
#include "ucdtest.h"
|
||||||
|
|
||||||
@ -67,6 +70,8 @@ void UnicodeTest::runIndexedTest( int32_t index, UBool exec, const char* &name,
|
|||||||
TESTCASE_AUTO(TestVerticalOrientation);
|
TESTCASE_AUTO(TestVerticalOrientation);
|
||||||
TESTCASE_AUTO(TestDefaultScriptExtensions);
|
TESTCASE_AUTO(TestDefaultScriptExtensions);
|
||||||
TESTCASE_AUTO(TestInvalidCodePointFolding);
|
TESTCASE_AUTO(TestInvalidCodePointFolding);
|
||||||
|
TESTCASE_AUTO(TestBinaryCharacterProperties);
|
||||||
|
TESTCASE_AUTO(TestIntCharacterProperties);
|
||||||
TESTCASE_AUTO_END;
|
TESTCASE_AUTO_END;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -615,3 +620,73 @@ void UnicodeTest::TestInvalidCodePointFolding(void) {
|
|||||||
cp, u_foldCase(cp, U_FOLD_CASE_EXCLUDE_SPECIAL_I));
|
cp, u_foldCase(cp, U_FOLD_CASE_EXCLUDE_SPECIAL_I));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void UnicodeTest::TestBinaryCharacterProperties() {
|
||||||
|
IcuTestErrorCode errorCode(*this, "TestBinaryCharacterProperties()");
|
||||||
|
// Spot-check getBinaryPropertySet() vs. hasBinaryProperty().
|
||||||
|
for (int32_t prop = 0; prop < UCHAR_BINARY_LIMIT; ++prop) {
|
||||||
|
const USet *uset = u_getBinaryPropertySet((UProperty)prop, errorCode);
|
||||||
|
if (errorCode.errIfFailureAndReset("u_getBinaryPropertySet(%d)", (int)prop)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
const UnicodeSet &set = *UnicodeSet::fromUSet(uset);
|
||||||
|
int32_t size = set.size();
|
||||||
|
if (size == 0) {
|
||||||
|
assertFalse(UnicodeString("!hasBinaryProperty(U+0020, ") + prop + u")",
|
||||||
|
u_hasBinaryProperty(0x20, (UProperty)prop));
|
||||||
|
assertFalse(UnicodeString("!hasBinaryProperty(U+0061, ") + prop + u")",
|
||||||
|
u_hasBinaryProperty(0x61, (UProperty)prop));
|
||||||
|
assertFalse(UnicodeString("!hasBinaryProperty(U+4E00, ") + prop + u")",
|
||||||
|
u_hasBinaryProperty(0x4e00, (UProperty)prop));
|
||||||
|
} else {
|
||||||
|
UChar32 c = set.charAt(0);
|
||||||
|
if (c > 0) {
|
||||||
|
assertFalse(
|
||||||
|
UnicodeString("!hasBinaryProperty(") + TestUtility::hex(c - 1) +
|
||||||
|
u", " + prop + u")",
|
||||||
|
u_hasBinaryProperty(c - 1, (UProperty)prop));
|
||||||
|
}
|
||||||
|
assertTrue(
|
||||||
|
UnicodeString("hasBinaryProperty(") + TestUtility::hex(c) +
|
||||||
|
u", " + prop + u")",
|
||||||
|
u_hasBinaryProperty(c, (UProperty)prop));
|
||||||
|
c = set.charAt(size - 1);
|
||||||
|
assertTrue(
|
||||||
|
UnicodeString("hasBinaryProperty(") + TestUtility::hex(c) +
|
||||||
|
u", " + prop + u")",
|
||||||
|
u_hasBinaryProperty(c, (UProperty)prop));
|
||||||
|
if (c < 0x10ffff) {
|
||||||
|
assertFalse(
|
||||||
|
UnicodeString("!hasBinaryProperty(") + TestUtility::hex(c + 1) +
|
||||||
|
u", " + prop + u")",
|
||||||
|
u_hasBinaryProperty(c + 1, (UProperty)prop));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void UnicodeTest::TestIntCharacterProperties() {
|
||||||
|
IcuTestErrorCode errorCode(*this, "TestIntCharacterProperties()");
|
||||||
|
// Spot-check getIntPropertyMap() vs. getIntPropertyValue().
|
||||||
|
for (int32_t prop = UCHAR_INT_START; prop < UCHAR_INT_LIMIT; ++prop) {
|
||||||
|
const UCPMap *map = u_getIntPropertyMap((UProperty)prop, errorCode);
|
||||||
|
if (errorCode.errIfFailureAndReset("u_getIntPropertyMap(%d)", (int)prop)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
uint32_t value;
|
||||||
|
UChar32 end = ucpmap_getRange(map, 0, UCPMAP_RANGE_NORMAL, 0, nullptr, nullptr, &value);
|
||||||
|
assertTrue("int property first range", end >= 0);
|
||||||
|
UChar32 c = end / 2;
|
||||||
|
assertEquals(UnicodeString("int property first range value at ") + TestUtility::hex(c),
|
||||||
|
u_getIntPropertyValue(c, (UProperty)prop), value);
|
||||||
|
end = ucpmap_getRange(map, 0x5000, UCPMAP_RANGE_NORMAL, 0, nullptr, nullptr, &value);
|
||||||
|
assertTrue("int property later range", end >= 0);
|
||||||
|
assertEquals(UnicodeString("int property later range value at ") + TestUtility::hex(end),
|
||||||
|
u_getIntPropertyValue(end, (UProperty)prop), value);
|
||||||
|
// ucpmap_get() API coverage
|
||||||
|
// TODO: move to cucdtst.c
|
||||||
|
assertEquals(
|
||||||
|
"int property upcmap_get(U+0061)",
|
||||||
|
u_getIntPropertyValue(0x61, (UProperty)prop), ucpmap_get(map, 0x61));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -46,6 +46,8 @@ public:
|
|||||||
void TestVerticalOrientation();
|
void TestVerticalOrientation();
|
||||||
void TestDefaultScriptExtensions();
|
void TestDefaultScriptExtensions();
|
||||||
void TestInvalidCodePointFolding();
|
void TestInvalidCodePointFolding();
|
||||||
|
void TestBinaryCharacterProperties();
|
||||||
|
void TestIntCharacterProperties();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
||||||
|
@ -650,7 +650,7 @@ LocalUCPTriePointer Normalizer2DataBuilder::processData() {
|
|||||||
// First check that surrogate code *points* are inert.
|
// First check that surrogate code *points* are inert.
|
||||||
// The parser should have rejected values/mappings for them.
|
// The parser should have rejected values/mappings for them.
|
||||||
uint32_t value;
|
uint32_t value;
|
||||||
UChar32 end = umutablecptrie_getRange(norm16Trie, 0xd800, UCPTRIE_RANGE_NORMAL, 0,
|
UChar32 end = umutablecptrie_getRange(norm16Trie, 0xd800, UCPMAP_RANGE_NORMAL, 0,
|
||||||
nullptr, nullptr, &value);
|
nullptr, nullptr, &value);
|
||||||
if (value != Normalizer2Impl::INERT || end < 0xdfff) {
|
if (value != Normalizer2Impl::INERT || end < 0xdfff) {
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
@ -665,7 +665,7 @@ LocalUCPTriePointer Normalizer2DataBuilder::processData() {
|
|||||||
end = 0;
|
end = 0;
|
||||||
for (UChar32 start = 0x10000;;) {
|
for (UChar32 start = 0x10000;;) {
|
||||||
if (start > end) {
|
if (start > end) {
|
||||||
end = umutablecptrie_getRange(norm16Trie, start, UCPTRIE_RANGE_NORMAL, 0,
|
end = umutablecptrie_getRange(norm16Trie, start, UCPMAP_RANGE_NORMAL, 0,
|
||||||
nullptr, nullptr, &value);
|
nullptr, nullptr, &value);
|
||||||
if (end < 0) { break; }
|
if (end < 0) { break; }
|
||||||
}
|
}
|
||||||
|
@ -156,7 +156,7 @@ UBool Norms::combinesWithCCBetween(const Norm &norm, uint8_t lowCC, int32_t high
|
|||||||
void Norms::enumRanges(Enumerator &e) {
|
void Norms::enumRanges(Enumerator &e) {
|
||||||
UChar32 start = 0, end;
|
UChar32 start = 0, end;
|
||||||
uint32_t i;
|
uint32_t i;
|
||||||
while ((end = umutablecptrie_getRange(normTrie, start, UCPTRIE_RANGE_NORMAL, 0,
|
while ((end = umutablecptrie_getRange(normTrie, start, UCPMAP_RANGE_NORMAL, 0,
|
||||||
nullptr, nullptr, &i)) >= 0) {
|
nullptr, nullptr, &i)) >= 0) {
|
||||||
if (i > 0) {
|
if (i > 0) {
|
||||||
e.rangeHandler(start, end, norms[i]);
|
e.rangeHandler(start, end, norms[i]);
|
||||||
|
@ -0,0 +1,86 @@
|
|||||||
|
// © 2018 and later: Unicode, Inc. and others.
|
||||||
|
// License & terms of use: http://www.unicode.org/copyright.html#License
|
||||||
|
package com.ibm.icu.impl;
|
||||||
|
|
||||||
|
import com.ibm.icu.text.UnicodeSet;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Properties functionality above class UCharacterProperty
|
||||||
|
* but below class CharacterProperties and class UnicodeSet.
|
||||||
|
*/
|
||||||
|
public final class CharacterPropertiesImpl {
|
||||||
|
/**
|
||||||
|
* A set of all characters _except_ the second through last characters of
|
||||||
|
* certain ranges. These ranges are ranges of characters whose
|
||||||
|
* properties are all exactly alike, e.g. CJK Ideographs from
|
||||||
|
* U+4E00 to U+9FA5.
|
||||||
|
*/
|
||||||
|
private static final UnicodeSet inclusions[] = new UnicodeSet[UCharacterProperty.SRC_COUNT];
|
||||||
|
|
||||||
|
/** For {@link UnicodeSet#setDefaultXSymbolTable}. */
|
||||||
|
public static synchronized void clear() {
|
||||||
|
for (int i = 0; i < inclusions.length; ++i) {
|
||||||
|
inclusions[i] = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static synchronized UnicodeSet getInclusionsForSource(int src) {
|
||||||
|
if (inclusions[src] == null) {
|
||||||
|
UnicodeSet incl = new UnicodeSet();
|
||||||
|
switch(src) {
|
||||||
|
case UCharacterProperty.SRC_CHAR:
|
||||||
|
UCharacterProperty.INSTANCE.addPropertyStarts(incl);
|
||||||
|
break;
|
||||||
|
case UCharacterProperty.SRC_PROPSVEC:
|
||||||
|
UCharacterProperty.INSTANCE.upropsvec_addPropertyStarts(incl);
|
||||||
|
break;
|
||||||
|
case UCharacterProperty.SRC_CHAR_AND_PROPSVEC:
|
||||||
|
UCharacterProperty.INSTANCE.addPropertyStarts(incl);
|
||||||
|
UCharacterProperty.INSTANCE.upropsvec_addPropertyStarts(incl);
|
||||||
|
break;
|
||||||
|
case UCharacterProperty.SRC_CASE_AND_NORM:
|
||||||
|
Norm2AllModes.getNFCInstance().impl.addPropertyStarts(incl);
|
||||||
|
UCaseProps.INSTANCE.addPropertyStarts(incl);
|
||||||
|
break;
|
||||||
|
case UCharacterProperty.SRC_NFC:
|
||||||
|
Norm2AllModes.getNFCInstance().impl.addPropertyStarts(incl);
|
||||||
|
break;
|
||||||
|
case UCharacterProperty.SRC_NFKC:
|
||||||
|
Norm2AllModes.getNFKCInstance().impl.addPropertyStarts(incl);
|
||||||
|
break;
|
||||||
|
case UCharacterProperty.SRC_NFKC_CF:
|
||||||
|
Norm2AllModes.getNFKC_CFInstance().impl.addPropertyStarts(incl);
|
||||||
|
break;
|
||||||
|
case UCharacterProperty.SRC_NFC_CANON_ITER:
|
||||||
|
Norm2AllModes.getNFCInstance().impl.addCanonIterPropertyStarts(incl);
|
||||||
|
break;
|
||||||
|
case UCharacterProperty.SRC_CASE:
|
||||||
|
UCaseProps.INSTANCE.addPropertyStarts(incl);
|
||||||
|
break;
|
||||||
|
case UCharacterProperty.SRC_BIDI:
|
||||||
|
UBiDiProps.INSTANCE.addPropertyStarts(incl);
|
||||||
|
break;
|
||||||
|
case UCharacterProperty.SRC_INPC:
|
||||||
|
case UCharacterProperty.SRC_INSC:
|
||||||
|
case UCharacterProperty.SRC_VO:
|
||||||
|
UCharacterProperty.INSTANCE.ulayout_addPropertyStarts(src, incl);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw new IllegalStateException("getInclusions(unknown src " + src + ")");
|
||||||
|
}
|
||||||
|
// We do not freeze() the set because we only iterate over it,
|
||||||
|
// rather than testing contains(),
|
||||||
|
// so the extra time and memory to optimize that are not necessary.
|
||||||
|
inclusions[src] = incl;
|
||||||
|
}
|
||||||
|
return inclusions[src];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a mutable UnicodeSet -- do not modify!
|
||||||
|
*/
|
||||||
|
public static UnicodeSet getInclusionsForProperty(int prop) {
|
||||||
|
int src = UCharacterProperty.INSTANCE.getSource(prop);
|
||||||
|
return getInclusionsForSource(src);
|
||||||
|
}
|
||||||
|
}
|
@ -1535,7 +1535,7 @@ public final class UCharacterProperty
|
|||||||
return -1; // undefined
|
return -1; // undefined
|
||||||
}
|
}
|
||||||
|
|
||||||
public final int getSource(int which) {
|
final int getSource(int which) {
|
||||||
if(which<UProperty.BINARY_START) {
|
if(which<UProperty.BINARY_START) {
|
||||||
return SRC_NONE; /* undefined */
|
return SRC_NONE; /* undefined */
|
||||||
} else if(which<UProperty.BINARY_LIMIT) {
|
} else if(which<UProperty.BINARY_LIMIT) {
|
||||||
|
@ -0,0 +1,158 @@
|
|||||||
|
// © 2018 and later: Unicode, Inc. and others.
|
||||||
|
// License & terms of use: http://www.unicode.org/copyright.html#License
|
||||||
|
|
||||||
|
package com.ibm.icu.lang;
|
||||||
|
|
||||||
|
import com.ibm.icu.impl.CharacterPropertiesImpl;
|
||||||
|
import com.ibm.icu.text.UnicodeSet;
|
||||||
|
import com.ibm.icu.util.CodePointMap;
|
||||||
|
import com.ibm.icu.util.CodePointTrie;
|
||||||
|
import com.ibm.icu.util.MutableCodePointTrie;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets and maps for Unicode properties.
|
||||||
|
* The methods here return an object per property:
|
||||||
|
* A set for each ICU-supported binary property with all code points for which the property is true.
|
||||||
|
* A map for each ICU-supported enumerated/catalog/int-valued property
|
||||||
|
* which maps all Unicode code points to their values for that property.
|
||||||
|
*
|
||||||
|
* <p>For details see the method descriptions.
|
||||||
|
* For lookup of property values by code point see class {@link UCharacter}.
|
||||||
|
*
|
||||||
|
* @draft ICU 63
|
||||||
|
* @provisional This API might change or be removed in a future release.
|
||||||
|
*/
|
||||||
|
public final class CharacterProperties {
|
||||||
|
private CharacterProperties() {} // all-static
|
||||||
|
|
||||||
|
private static final UnicodeSet sets[] = new UnicodeSet[UProperty.BINARY_LIMIT];
|
||||||
|
private static final CodePointMap maps[] = new CodePointMap[UProperty.INT_LIMIT - UProperty.INT_START];
|
||||||
|
|
||||||
|
private static UnicodeSet makeSet(int property) {
|
||||||
|
UnicodeSet set = new UnicodeSet();
|
||||||
|
UnicodeSet inclusions = CharacterPropertiesImpl.getInclusionsForProperty(property);
|
||||||
|
int numRanges = inclusions.getRangeCount();
|
||||||
|
int startHasProperty = -1;
|
||||||
|
|
||||||
|
for (int i = 0; i < numRanges; ++i) {
|
||||||
|
int rangeEnd = inclusions.getRangeEnd(i);
|
||||||
|
for (int c = inclusions.getRangeStart(i); c <= rangeEnd; ++c) {
|
||||||
|
// TODO: Get a UCharacterProperty.BinaryProperty to avoid the property dispatch.
|
||||||
|
if (UCharacter.hasBinaryProperty(c, property)) {
|
||||||
|
if (startHasProperty < 0) {
|
||||||
|
// Transition from false to true.
|
||||||
|
startHasProperty = c;
|
||||||
|
}
|
||||||
|
} else if (startHasProperty >= 0) {
|
||||||
|
// Transition from true to false.
|
||||||
|
set.add(startHasProperty, c - 1);
|
||||||
|
startHasProperty = -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (startHasProperty >= 0) {
|
||||||
|
set.add(startHasProperty, 0x10FFFF);
|
||||||
|
}
|
||||||
|
|
||||||
|
return set.freeze();
|
||||||
|
}
|
||||||
|
|
||||||
|
private static CodePointMap makeMap(int property) {
|
||||||
|
int nullValue = property == UProperty.SCRIPT ? UScript.UNKNOWN : 0;
|
||||||
|
MutableCodePointTrie mutableTrie = new MutableCodePointTrie(nullValue, nullValue);
|
||||||
|
UnicodeSet inclusions = CharacterPropertiesImpl.getInclusionsForProperty(property);
|
||||||
|
int numRanges = inclusions.getRangeCount();
|
||||||
|
int start = 0;
|
||||||
|
int value = nullValue;
|
||||||
|
|
||||||
|
for (int i = 0; i < numRanges; ++i) {
|
||||||
|
int rangeEnd = inclusions.getRangeEnd(i);
|
||||||
|
for (int c = inclusions.getRangeStart(i); c <= rangeEnd; ++c) {
|
||||||
|
// TODO: Get a UCharacterProperty.IntProperty to avoid the property dispatch.
|
||||||
|
int nextValue = UCharacter.getIntPropertyValue(c, property);
|
||||||
|
if (value != nextValue) {
|
||||||
|
if (value != nullValue) {
|
||||||
|
mutableTrie.setRange(start, c - 1, value);
|
||||||
|
}
|
||||||
|
start = c;
|
||||||
|
value = nextValue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (value != 0) {
|
||||||
|
mutableTrie.setRange(start, 0x10FFFF, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
CodePointTrie.Type type;
|
||||||
|
if (property == UProperty.BIDI_CLASS || property == UProperty.GENERAL_CATEGORY) {
|
||||||
|
type = CodePointTrie.Type.FAST;
|
||||||
|
} else {
|
||||||
|
type = CodePointTrie.Type.SMALL;
|
||||||
|
}
|
||||||
|
CodePointTrie.ValueWidth valueWidth;
|
||||||
|
// TODO: UCharacterProperty.IntProperty
|
||||||
|
int max = UCharacter.getIntPropertyMaxValue(property);
|
||||||
|
if (max <= 0xff) {
|
||||||
|
valueWidth = CodePointTrie.ValueWidth.BITS_8;
|
||||||
|
} else if (max <= 0xffff) {
|
||||||
|
valueWidth = CodePointTrie.ValueWidth.BITS_16;
|
||||||
|
} else {
|
||||||
|
valueWidth = CodePointTrie.ValueWidth.BITS_32;
|
||||||
|
}
|
||||||
|
return mutableTrie.buildImmutable(type, valueWidth);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a frozen UnicodeSet for a binary property.
|
||||||
|
* Throws an exception if the property number is not one for a binary property.
|
||||||
|
*
|
||||||
|
* <p>The returned set contains all code points for which the property is true.
|
||||||
|
*
|
||||||
|
* @param property {@link UProperty#BINARY_START}..{@link UProperty#BINARY_LIMIT}-1
|
||||||
|
* @return the property as a set
|
||||||
|
* @see UProperty
|
||||||
|
* @see UCharacter#hasBinaryProperty
|
||||||
|
*/
|
||||||
|
public static final UnicodeSet getBinaryPropertySet(int property) {
|
||||||
|
if (property < 0 || UProperty.BINARY_LIMIT <= property) {
|
||||||
|
throw new IllegalArgumentException("" + property +
|
||||||
|
" is not a constant for a UProperty binary property");
|
||||||
|
}
|
||||||
|
synchronized(sets) {
|
||||||
|
UnicodeSet set = sets[property];
|
||||||
|
if (set == null) {
|
||||||
|
sets[property] = set = makeSet(property);
|
||||||
|
}
|
||||||
|
return set;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns an immutable CodePointMap for an enumerated/catalog/int-valued property.
|
||||||
|
* Throws an exception if the property number is not one for an "int property".
|
||||||
|
*
|
||||||
|
* <p>The returned object maps all Unicode code points to their values for that property.
|
||||||
|
* For documentation of the integer values see {@link UCharacter#getIntPropertyValue(int, int)}.
|
||||||
|
*
|
||||||
|
* <p>The actual type of the returned object differs between properties
|
||||||
|
* and may change over time.
|
||||||
|
*
|
||||||
|
* @param property {@link UProperty#INT_START}..{@link UProperty#INT_LIMIT}-1
|
||||||
|
* @return the property as a map
|
||||||
|
* @see UProperty
|
||||||
|
* @see UCharacter#getIntPropertyValue
|
||||||
|
*/
|
||||||
|
public static final CodePointMap getIntPropertyMap(int property) {
|
||||||
|
if (property < UProperty.INT_START || UProperty.INT_LIMIT <= property) {
|
||||||
|
throw new IllegalArgumentException("" + property +
|
||||||
|
" is not a constant for a UProperty int property");
|
||||||
|
}
|
||||||
|
synchronized(maps) {
|
||||||
|
CodePointMap map = maps[property - UProperty.INT_START];
|
||||||
|
if (map == null) {
|
||||||
|
maps[property - UProperty.INT_START] = map = makeMap(property);
|
||||||
|
}
|
||||||
|
return map;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -5698,7 +5698,7 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* {@icu} <p>Check a binary Unicode property for a code point.
|
* {@icu} Check a binary Unicode property for a code point.
|
||||||
* <p>Unicode, especially in version 3.2, defines many more properties
|
* <p>Unicode, especially in version 3.2, defines many more properties
|
||||||
* than the original set in UnicodeData.txt.
|
* than the original set in UnicodeData.txt.
|
||||||
* <p>This API is intended to reflect Unicode properties as defined in
|
* <p>This API is intended to reflect Unicode properties as defined in
|
||||||
@ -5720,6 +5720,7 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
|
|||||||
* Unicode version does not have data for the property at all, or
|
* Unicode version does not have data for the property at all, or
|
||||||
* not for this code point.
|
* not for this code point.
|
||||||
* @see com.ibm.icu.lang.UProperty
|
* @see com.ibm.icu.lang.UProperty
|
||||||
|
* @see CharacterProperties#getBinaryPropertySet(int)
|
||||||
* @stable ICU 2.6
|
* @stable ICU 2.6
|
||||||
*/
|
*/
|
||||||
public static boolean hasBinaryProperty(int ch, int property)
|
public static boolean hasBinaryProperty(int ch, int property)
|
||||||
@ -5777,7 +5778,7 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* {@icu} <p>Returns the property value for an Unicode property type of a code point.
|
* {@icu} Returns the property value for a Unicode property type of a code point.
|
||||||
* Also returns binary and mask property values.
|
* Also returns binary and mask property values.
|
||||||
* <p>Unicode, especially in version 3.2, defines many more properties than
|
* <p>Unicode, especially in version 3.2, defines many more properties than
|
||||||
* the original set in UnicodeData.txt.
|
* the original set in UnicodeData.txt.
|
||||||
@ -5801,8 +5802,9 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
|
|||||||
* UProperty.MASK_START <= type < UProperty.MASK_LIMIT.
|
* UProperty.MASK_START <= type < UProperty.MASK_LIMIT.
|
||||||
* @return numeric value that is directly the property value or,
|
* @return numeric value that is directly the property value or,
|
||||||
* for enumerated properties, corresponds to the numeric value of
|
* for enumerated properties, corresponds to the numeric value of
|
||||||
* the enumerated constant of the respective property value
|
* the enumerated constant of the respective property value type
|
||||||
* enumeration type (cast to enum type if necessary).
|
* ({@link ECharacterCategory}, {@link ECharacterDirection},
|
||||||
|
* {@link DecompositionType}, etc.).
|
||||||
* Returns 0 or 1 (for false / true) for binary Unicode properties.
|
* Returns 0 or 1 (for false / true) for binary Unicode properties.
|
||||||
* Returns a bit-mask for mask properties.
|
* Returns a bit-mask for mask properties.
|
||||||
* Returns 0 if 'type' is out of bounds or if the Unicode version
|
* Returns 0 if 'type' is out of bounds or if the Unicode version
|
||||||
@ -5812,6 +5814,7 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
|
|||||||
* @see #hasBinaryProperty
|
* @see #hasBinaryProperty
|
||||||
* @see #getIntPropertyMinValue
|
* @see #getIntPropertyMinValue
|
||||||
* @see #getIntPropertyMaxValue
|
* @see #getIntPropertyMaxValue
|
||||||
|
* @see CharacterProperties#getIntPropertyMap(int)
|
||||||
* @see #getUnicodeVersion
|
* @see #getUnicodeVersion
|
||||||
* @stable ICU 2.4
|
* @stable ICU 2.4
|
||||||
*/
|
*/
|
||||||
|
@ -18,21 +18,21 @@ import java.util.NoSuchElementException;
|
|||||||
import java.util.TreeSet;
|
import java.util.TreeSet;
|
||||||
|
|
||||||
import com.ibm.icu.impl.BMPSet;
|
import com.ibm.icu.impl.BMPSet;
|
||||||
import com.ibm.icu.impl.Norm2AllModes;
|
import com.ibm.icu.impl.CharacterPropertiesImpl;
|
||||||
import com.ibm.icu.impl.PatternProps;
|
import com.ibm.icu.impl.PatternProps;
|
||||||
import com.ibm.icu.impl.RuleCharacterIterator;
|
import com.ibm.icu.impl.RuleCharacterIterator;
|
||||||
import com.ibm.icu.impl.SortedSetRelation;
|
import com.ibm.icu.impl.SortedSetRelation;
|
||||||
import com.ibm.icu.impl.StringRange;
|
import com.ibm.icu.impl.StringRange;
|
||||||
import com.ibm.icu.impl.UBiDiProps;
|
|
||||||
import com.ibm.icu.impl.UCaseProps;
|
import com.ibm.icu.impl.UCaseProps;
|
||||||
import com.ibm.icu.impl.UCharacterProperty;
|
|
||||||
import com.ibm.icu.impl.UPropertyAliases;
|
import com.ibm.icu.impl.UPropertyAliases;
|
||||||
import com.ibm.icu.impl.UnicodeSetStringSpan;
|
import com.ibm.icu.impl.UnicodeSetStringSpan;
|
||||||
import com.ibm.icu.impl.Utility;
|
import com.ibm.icu.impl.Utility;
|
||||||
import com.ibm.icu.lang.CharSequences;
|
import com.ibm.icu.lang.CharSequences;
|
||||||
|
import com.ibm.icu.lang.CharacterProperties;
|
||||||
import com.ibm.icu.lang.UCharacter;
|
import com.ibm.icu.lang.UCharacter;
|
||||||
import com.ibm.icu.lang.UProperty;
|
import com.ibm.icu.lang.UProperty;
|
||||||
import com.ibm.icu.lang.UScript;
|
import com.ibm.icu.lang.UScript;
|
||||||
|
import com.ibm.icu.util.CodePointMap;
|
||||||
import com.ibm.icu.util.Freezable;
|
import com.ibm.icu.util.Freezable;
|
||||||
import com.ibm.icu.util.ICUUncheckedIOException;
|
import com.ibm.icu.util.ICUUncheckedIOException;
|
||||||
import com.ibm.icu.util.OutputInt;
|
import com.ibm.icu.util.OutputInt;
|
||||||
@ -346,14 +346,6 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||||||
private static final String ASCII_ID = "ASCII"; // [\u0000-\u007F]
|
private static final String ASCII_ID = "ASCII"; // [\u0000-\u007F]
|
||||||
private static final String ASSIGNED = "Assigned"; // [:^Cn:]
|
private static final String ASSIGNED = "Assigned"; // [:^Cn:]
|
||||||
|
|
||||||
/**
|
|
||||||
* A set of all characters _except_ the second through last characters of
|
|
||||||
* certain ranges. These ranges are ranges of characters whose
|
|
||||||
* properties are all exactly alike, e.g. CJK Ideographs from
|
|
||||||
* U+4E00 to U+9FA5.
|
|
||||||
*/
|
|
||||||
private static UnicodeSet INCLUSIONS[] = null;
|
|
||||||
|
|
||||||
private volatile BMPSet bmpSet; // The set is frozen if bmpSet or stringSpan is not null.
|
private volatile BMPSet bmpSet; // The set is frozen if bmpSet or stringSpan is not null.
|
||||||
private volatile UnicodeSetStringSpan stringSpan;
|
private volatile UnicodeSetStringSpan stringSpan;
|
||||||
//----------------------------------------------------------------
|
//----------------------------------------------------------------
|
||||||
@ -520,8 +512,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Make this object represent the range <code>start - end</code>.
|
* Make this object represent the range <code>start - end</code>.
|
||||||
* If <code>end > start</code> then this object is set to an
|
* If <code>end > start</code> then this object is set to an empty range.
|
||||||
* an empty range.
|
|
||||||
*
|
*
|
||||||
* @param start first character in the set, inclusive
|
* @param start first character in the set, inclusive
|
||||||
* @param end last character in the set, inclusive
|
* @param end last character in the set, inclusive
|
||||||
@ -3186,7 +3177,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||||||
boolean contains(int codePoint);
|
boolean contains(int codePoint);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static class NumericValueFilter implements Filter {
|
private static final class NumericValueFilter implements Filter {
|
||||||
double value;
|
double value;
|
||||||
NumericValueFilter(double value) { this.value = value; }
|
NumericValueFilter(double value) { this.value = value; }
|
||||||
@Override
|
@Override
|
||||||
@ -3195,29 +3186,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static class GeneralCategoryMaskFilter implements Filter {
|
private static final class ScriptExtensionsFilter implements Filter {
|
||||||
int mask;
|
|
||||||
GeneralCategoryMaskFilter(int mask) { this.mask = mask; }
|
|
||||||
@Override
|
|
||||||
public boolean contains(int ch) {
|
|
||||||
return ((1 << UCharacter.getType(ch)) & mask) != 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private static class IntPropertyFilter implements Filter {
|
|
||||||
int prop;
|
|
||||||
int value;
|
|
||||||
IntPropertyFilter(int prop, int value) {
|
|
||||||
this.prop = prop;
|
|
||||||
this.value = value;
|
|
||||||
}
|
|
||||||
@Override
|
|
||||||
public boolean contains(int ch) {
|
|
||||||
return UCharacter.getIntPropertyValue(ch, prop) == value;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private static class ScriptExtensionsFilter implements Filter {
|
|
||||||
int script;
|
int script;
|
||||||
ScriptExtensionsFilter(int script) { this.script = script; }
|
ScriptExtensionsFilter(int script) { this.script = script; }
|
||||||
@Override
|
@Override
|
||||||
@ -3229,7 +3198,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||||||
// VersionInfo for unassigned characters
|
// VersionInfo for unassigned characters
|
||||||
private static final VersionInfo NO_VERSION = VersionInfo.getInstance(0, 0, 0, 0);
|
private static final VersionInfo NO_VERSION = VersionInfo.getInstance(0, 0, 0, 0);
|
||||||
|
|
||||||
private static class VersionFilter implements Filter {
|
private static final class VersionFilter implements Filter {
|
||||||
VersionInfo version;
|
VersionInfo version;
|
||||||
VersionFilter(VersionInfo version) { this.version = version; }
|
VersionFilter(VersionInfo version) { this.version = version; }
|
||||||
@Override
|
@Override
|
||||||
@ -3242,62 +3211,10 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static synchronized UnicodeSet getInclusions(int src) {
|
|
||||||
if (INCLUSIONS == null) {
|
|
||||||
INCLUSIONS = new UnicodeSet[UCharacterProperty.SRC_COUNT];
|
|
||||||
}
|
|
||||||
if(INCLUSIONS[src] == null) {
|
|
||||||
UnicodeSet incl = new UnicodeSet();
|
|
||||||
switch(src) {
|
|
||||||
case UCharacterProperty.SRC_CHAR:
|
|
||||||
UCharacterProperty.INSTANCE.addPropertyStarts(incl);
|
|
||||||
break;
|
|
||||||
case UCharacterProperty.SRC_PROPSVEC:
|
|
||||||
UCharacterProperty.INSTANCE.upropsvec_addPropertyStarts(incl);
|
|
||||||
break;
|
|
||||||
case UCharacterProperty.SRC_CHAR_AND_PROPSVEC:
|
|
||||||
UCharacterProperty.INSTANCE.addPropertyStarts(incl);
|
|
||||||
UCharacterProperty.INSTANCE.upropsvec_addPropertyStarts(incl);
|
|
||||||
break;
|
|
||||||
case UCharacterProperty.SRC_CASE_AND_NORM:
|
|
||||||
Norm2AllModes.getNFCInstance().impl.addPropertyStarts(incl);
|
|
||||||
UCaseProps.INSTANCE.addPropertyStarts(incl);
|
|
||||||
break;
|
|
||||||
case UCharacterProperty.SRC_NFC:
|
|
||||||
Norm2AllModes.getNFCInstance().impl.addPropertyStarts(incl);
|
|
||||||
break;
|
|
||||||
case UCharacterProperty.SRC_NFKC:
|
|
||||||
Norm2AllModes.getNFKCInstance().impl.addPropertyStarts(incl);
|
|
||||||
break;
|
|
||||||
case UCharacterProperty.SRC_NFKC_CF:
|
|
||||||
Norm2AllModes.getNFKC_CFInstance().impl.addPropertyStarts(incl);
|
|
||||||
break;
|
|
||||||
case UCharacterProperty.SRC_NFC_CANON_ITER:
|
|
||||||
Norm2AllModes.getNFCInstance().impl.addCanonIterPropertyStarts(incl);
|
|
||||||
break;
|
|
||||||
case UCharacterProperty.SRC_CASE:
|
|
||||||
UCaseProps.INSTANCE.addPropertyStarts(incl);
|
|
||||||
break;
|
|
||||||
case UCharacterProperty.SRC_BIDI:
|
|
||||||
UBiDiProps.INSTANCE.addPropertyStarts(incl);
|
|
||||||
break;
|
|
||||||
case UCharacterProperty.SRC_INPC:
|
|
||||||
case UCharacterProperty.SRC_INSC:
|
|
||||||
case UCharacterProperty.SRC_VO:
|
|
||||||
UCharacterProperty.INSTANCE.ulayout_addPropertyStarts(src, incl);
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
throw new IllegalStateException("UnicodeSet.getInclusions(unknown src "+src+")");
|
|
||||||
}
|
|
||||||
INCLUSIONS[src] = incl;
|
|
||||||
}
|
|
||||||
return INCLUSIONS[src];
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Generic filter-based scanning code for UCD property UnicodeSets.
|
* Generic filter-based scanning code for UCD property UnicodeSets.
|
||||||
*/
|
*/
|
||||||
private UnicodeSet applyFilter(Filter filter, int src) {
|
private void applyFilter(Filter filter, UnicodeSet inclusions) {
|
||||||
// Logically, walk through all Unicode characters, noting the start
|
// Logically, walk through all Unicode characters, noting the start
|
||||||
// and end of each range for which filter.contain(c) is
|
// and end of each range for which filter.contain(c) is
|
||||||
// true. Add each range to a set.
|
// true. Add each range to a set.
|
||||||
@ -3305,13 +3222,12 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||||||
// To improve performance, use an inclusions set which
|
// To improve performance, use an inclusions set which
|
||||||
// encodes information about character ranges that are known
|
// encodes information about character ranges that are known
|
||||||
// to have identical properties.
|
// to have identical properties.
|
||||||
// getInclusions(src) contains exactly the first characters of
|
// inclusions contains the first characters of
|
||||||
// same-value ranges for the given properties "source".
|
// same-value ranges for the given property.
|
||||||
|
|
||||||
clear();
|
clear();
|
||||||
|
|
||||||
int startHasProperty = -1;
|
int startHasProperty = -1;
|
||||||
UnicodeSet inclusions = getInclusions(src);
|
|
||||||
int limitRange = inclusions.getRangeCount();
|
int limitRange = inclusions.getRangeCount();
|
||||||
|
|
||||||
for (int j=0; j<limitRange; ++j) {
|
for (int j=0; j<limitRange; ++j) {
|
||||||
@ -3336,10 +3252,39 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||||||
if (startHasProperty >= 0) {
|
if (startHasProperty >= 0) {
|
||||||
add_unchecked(startHasProperty, 0x10FFFF);
|
add_unchecked(startHasProperty, 0x10FFFF);
|
||||||
}
|
}
|
||||||
|
|
||||||
return this;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Maps map values to 1 if the mask contains their value'th bit, all others to 0. */
|
||||||
|
private static final class GeneralCategoryMaskFilter implements CodePointMap.ValueFilter {
|
||||||
|
int mask;
|
||||||
|
GeneralCategoryMaskFilter(int mask) { this.mask = mask; }
|
||||||
|
@Override
|
||||||
|
public int apply(int value) {
|
||||||
|
value = (1 << value) & mask;
|
||||||
|
if (value != 0) { value = 1; }
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Maps one map value to 1, all others to 0. */
|
||||||
|
private static final class IntValueFilter implements CodePointMap.ValueFilter {
|
||||||
|
int v;
|
||||||
|
IntValueFilter(int value) { v = value; }
|
||||||
|
@Override
|
||||||
|
public int apply(int value) { return value == v ? 1 : 0; }
|
||||||
|
}
|
||||||
|
|
||||||
|
private void applyIntPropertyValue(CodePointMap map, CodePointMap.ValueFilter filter) {
|
||||||
|
clear();
|
||||||
|
CodePointMap.Range range = new CodePointMap.Range();
|
||||||
|
for (int start = 0; map.getRange(start, filter, range);) {
|
||||||
|
int end = range.getEnd();
|
||||||
|
if (range.getValue() != 0) {
|
||||||
|
add_unchecked(start, end);
|
||||||
|
}
|
||||||
|
start = end + 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Remove leading and trailing Pattern_White_Space and compress
|
* Remove leading and trailing Pattern_White_Space and compress
|
||||||
@ -3393,13 +3338,31 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||||||
* @stable ICU 2.4
|
* @stable ICU 2.4
|
||||||
*/
|
*/
|
||||||
public UnicodeSet applyIntPropertyValue(int prop, int value) {
|
public UnicodeSet applyIntPropertyValue(int prop, int value) {
|
||||||
checkFrozen();
|
// All of the following include checkFrozen() before modifying this set.
|
||||||
if (prop == UProperty.GENERAL_CATEGORY_MASK) {
|
if (prop == UProperty.GENERAL_CATEGORY_MASK) {
|
||||||
applyFilter(new GeneralCategoryMaskFilter(value), UCharacterProperty.SRC_CHAR);
|
CodePointMap map = CharacterProperties.getIntPropertyMap(UProperty.GENERAL_CATEGORY);
|
||||||
|
applyIntPropertyValue(map, new GeneralCategoryMaskFilter(value));
|
||||||
} else if (prop == UProperty.SCRIPT_EXTENSIONS) {
|
} else if (prop == UProperty.SCRIPT_EXTENSIONS) {
|
||||||
applyFilter(new ScriptExtensionsFilter(value), UCharacterProperty.SRC_PROPSVEC);
|
UnicodeSet inclusions = CharacterPropertiesImpl.getInclusionsForProperty(prop);
|
||||||
|
applyFilter(new ScriptExtensionsFilter(value), inclusions);
|
||||||
|
} else if (0 <= prop && prop < UProperty.BINARY_LIMIT) {
|
||||||
|
if (value == 0 || value == 1) {
|
||||||
|
set(CharacterProperties.getBinaryPropertySet(prop));
|
||||||
|
if (value == 0) {
|
||||||
|
complement();
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
clear();
|
||||||
|
}
|
||||||
|
} else if (UProperty.INT_START <= prop && prop < UProperty.INT_LIMIT) {
|
||||||
|
CodePointMap map = CharacterProperties.getIntPropertyMap(prop);
|
||||||
|
applyIntPropertyValue(map, new IntValueFilter(value));
|
||||||
} else {
|
} else {
|
||||||
applyFilter(new IntPropertyFilter(prop, value), UCharacterProperty.INSTANCE.getSource(prop));
|
// This code used to always call getInclusions(property source)
|
||||||
|
// which throws an exception for an unsupported property.
|
||||||
|
throw new IllegalArgumentException("unsupported property " + prop);
|
||||||
|
// Otherwise we would just clear() this set because
|
||||||
|
// getIntPropertyValue(c, prop) returns 0 for all code points.
|
||||||
}
|
}
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
@ -3499,7 +3462,8 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||||||
case UProperty.NUMERIC_VALUE:
|
case UProperty.NUMERIC_VALUE:
|
||||||
{
|
{
|
||||||
double value = Double.parseDouble(PatternProps.trimWhiteSpace(valueAlias));
|
double value = Double.parseDouble(PatternProps.trimWhiteSpace(valueAlias));
|
||||||
applyFilter(new NumericValueFilter(value), UCharacterProperty.SRC_CHAR);
|
applyFilter(new NumericValueFilter(value),
|
||||||
|
CharacterPropertiesImpl.getInclusionsForProperty(p));
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
case UProperty.NAME:
|
case UProperty.NAME:
|
||||||
@ -3525,7 +3489,8 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||||||
// VersionInfo.getInstance() does not do
|
// VersionInfo.getInstance() does not do
|
||||||
// 'loose' matching.
|
// 'loose' matching.
|
||||||
VersionInfo version = VersionInfo.getInstance(mungeCharName(valueAlias));
|
VersionInfo version = VersionInfo.getInstance(mungeCharName(valueAlias));
|
||||||
applyFilter(new VersionFilter(version), UCharacterProperty.SRC_PROPSVEC);
|
applyFilter(new VersionFilter(version),
|
||||||
|
CharacterPropertiesImpl.getInclusionsForProperty(p));
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
case UProperty.SCRIPT_EXTENSIONS:
|
case UProperty.SCRIPT_EXTENSIONS:
|
||||||
@ -4881,7 +4846,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||||||
* of UnicodeSets.
|
* of UnicodeSets.
|
||||||
* <p>
|
* <p>
|
||||||
* WARNING: If this function is used with a UnicodeProperty, and the
|
* WARNING: If this function is used with a UnicodeProperty, and the
|
||||||
* Unassigned characters (gc=Cn) are different than in ICU other than in ICU, you MUST call
|
* Unassigned characters (gc=Cn) are different than in ICU, you MUST call
|
||||||
* {@code UnicodeProperty.ResetCacheProperties} afterwards. If you then call {@code UnicodeSet.setDefaultXSymbolTable}
|
* {@code UnicodeProperty.ResetCacheProperties} afterwards. If you then call {@code UnicodeSet.setDefaultXSymbolTable}
|
||||||
* with null to clear the value, you MUST also call {@code UnicodeProperty.ResetCacheProperties}.
|
* with null to clear the value, you MUST also call {@code UnicodeProperty.ResetCacheProperties}.
|
||||||
*
|
*
|
||||||
@ -4891,7 +4856,9 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||||||
*/
|
*/
|
||||||
@Deprecated
|
@Deprecated
|
||||||
public static void setDefaultXSymbolTable(XSymbolTable xSymbolTable) {
|
public static void setDefaultXSymbolTable(XSymbolTable xSymbolTable) {
|
||||||
INCLUSIONS = null; // If the properties override inclusions, these have to be regenerated.
|
// If the properties override inclusions, these have to be regenerated.
|
||||||
|
// TODO: Check if the Unicode Tools or Unicode Utilities really need this.
|
||||||
|
CharacterPropertiesImpl.clear();
|
||||||
XSYMBOL_TABLE = xSymbolTable;
|
XSYMBOL_TABLE = xSymbolTable;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -25,6 +25,7 @@ import com.ibm.icu.impl.Normalizer2Impl;
|
|||||||
import com.ibm.icu.impl.PatternProps;
|
import com.ibm.icu.impl.PatternProps;
|
||||||
import com.ibm.icu.impl.UCharacterName;
|
import com.ibm.icu.impl.UCharacterName;
|
||||||
import com.ibm.icu.impl.Utility;
|
import com.ibm.icu.impl.Utility;
|
||||||
|
import com.ibm.icu.lang.CharacterProperties;
|
||||||
import com.ibm.icu.lang.UCharacter;
|
import com.ibm.icu.lang.UCharacter;
|
||||||
import com.ibm.icu.lang.UCharacterCategory;
|
import com.ibm.icu.lang.UCharacterCategory;
|
||||||
import com.ibm.icu.lang.UCharacterDirection;
|
import com.ibm.icu.lang.UCharacterDirection;
|
||||||
@ -35,6 +36,7 @@ import com.ibm.icu.text.Normalizer2;
|
|||||||
import com.ibm.icu.text.UTF16;
|
import com.ibm.icu.text.UTF16;
|
||||||
import com.ibm.icu.text.UnicodeSet;
|
import com.ibm.icu.text.UnicodeSet;
|
||||||
import com.ibm.icu.text.UnicodeSetIterator;
|
import com.ibm.icu.text.UnicodeSetIterator;
|
||||||
|
import com.ibm.icu.util.CodePointMap;
|
||||||
import com.ibm.icu.util.RangeValueIterator;
|
import com.ibm.icu.util.RangeValueIterator;
|
||||||
import com.ibm.icu.util.ULocale;
|
import com.ibm.icu.util.ULocale;
|
||||||
import com.ibm.icu.util.ValueIterator;
|
import com.ibm.icu.util.ValueIterator;
|
||||||
@ -3641,4 +3643,67 @@ public final class UCharacterTest extends TestFmwk
|
|||||||
int output = UCharacter.getCharFromNameAlias(alias);
|
int output = UCharacter.getCharFromNameAlias(alias);
|
||||||
assertEquals("alias for '" + input + "'", input, output);
|
assertEquals("alias for '" + input + "'", input, output);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void TestBinaryCharacterProperties() {
|
||||||
|
try {
|
||||||
|
CharacterProperties.getBinaryPropertySet(-1);
|
||||||
|
fail("getBinaryPropertySet(-1) did not throw an exception");
|
||||||
|
CharacterProperties.getBinaryPropertySet(UProperty.BINARY_LIMIT);
|
||||||
|
fail("getBinaryPropertySet(BINARY_LIMIT) did not throw an exception");
|
||||||
|
} catch(Exception expected) {
|
||||||
|
}
|
||||||
|
// Spot-check getBinaryPropertySet() vs. hasBinaryProperty().
|
||||||
|
for (int prop = 0; prop < UProperty.BINARY_LIMIT; ++prop) {
|
||||||
|
UnicodeSet set = CharacterProperties.getBinaryPropertySet(prop);
|
||||||
|
int size = set.size();
|
||||||
|
if (size == 0) {
|
||||||
|
assertFalse("!hasBinaryProperty(U+0020, " + prop + ')',
|
||||||
|
UCharacter.hasBinaryProperty(0x20, prop));
|
||||||
|
assertFalse("!hasBinaryProperty(U+0061, " + prop + ')',
|
||||||
|
UCharacter.hasBinaryProperty(0x61, prop));
|
||||||
|
assertFalse("!hasBinaryProperty(U+4E00, " + prop + ')',
|
||||||
|
UCharacter.hasBinaryProperty(0x4e00, prop));
|
||||||
|
} else {
|
||||||
|
int c = set.charAt(0);
|
||||||
|
if (c > 0) {
|
||||||
|
assertFalse("!hasBinaryProperty(" + Utility.hex(c - 1) + ", " + prop + ')',
|
||||||
|
UCharacter.hasBinaryProperty(c - 1, prop));
|
||||||
|
}
|
||||||
|
assertTrue("hasBinaryProperty(" + Utility.hex(c) + ", " + prop + ')',
|
||||||
|
UCharacter.hasBinaryProperty(c, prop));
|
||||||
|
c = set.charAt(size - 1);
|
||||||
|
assertTrue("hasBinaryProperty(" + Utility.hex(c) + ", " + prop + ')',
|
||||||
|
UCharacter.hasBinaryProperty(c, prop));
|
||||||
|
if (c < 0x10ffff) {
|
||||||
|
assertFalse("!hasBinaryProperty(" + Utility.hex(c + 1) + ", " + prop + ')',
|
||||||
|
UCharacter.hasBinaryProperty(c + 1, prop));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void TestIntCharacterProperties() {
|
||||||
|
try {
|
||||||
|
CharacterProperties.getIntPropertyMap(UProperty.INT_START - 1);
|
||||||
|
fail("getIntPropertyMap(INT_START-1) did not throw an exception");
|
||||||
|
CharacterProperties.getIntPropertyMap(UProperty.INT_LIMIT);
|
||||||
|
fail("getIntPropertyMap(INT_LIMIT) did not throw an exception");
|
||||||
|
} catch(Exception expected) {
|
||||||
|
}
|
||||||
|
// Spot-check getIntPropertyMap() vs. getIntPropertyValue().
|
||||||
|
CodePointMap.Range range = new CodePointMap.Range();
|
||||||
|
for (int prop = UProperty.INT_START; prop < UProperty.INT_LIMIT; ++prop) {
|
||||||
|
CodePointMap map = CharacterProperties.getIntPropertyMap(prop);
|
||||||
|
assertTrue("int property first range", map.getRange(0, null, range));
|
||||||
|
int c = (range.getStart() + range.getEnd()) / 2;
|
||||||
|
assertEquals("int property first range value at " + Utility.hex(c),
|
||||||
|
UCharacter.getIntPropertyValue(c, prop), range.getValue());
|
||||||
|
assertTrue("int property later range", map.getRange(0x5000, null, range));
|
||||||
|
int end = range.getEnd();
|
||||||
|
assertEquals("int property later range value at " + Utility.hex(end),
|
||||||
|
UCharacter.getIntPropertyValue(end, prop), range.getValue());
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user