ICU-4078 use USetAdder interface to remove dependencies of low-level code on the USet/UnicodeSet implementation
X-SVN-Rev: 16265
This commit is contained in:
parent
02ef1c94e9
commit
8a3a93deed
@ -63,11 +63,11 @@ udata.o ucmndata.o udatamem.o udataswp.o umapfile.o ucol_swp.o \
|
||||
uresbund.o ures_cnv.o uresdata.o resbund.o resbund_cnv.o \
|
||||
ucat.o locmap.o uloc.o locid.o \
|
||||
uhash.o uhash_us.o \
|
||||
ucnv.o ucnv_bld.o ucnv_cb.o ucnv_cnv.o ucnv_err.o ucnv_ext.o ucnv_io.o ucnvlat1.o \
|
||||
ucnv.o ucnv_set.o ucnv_bld.o ucnv_cb.o ucnv_cnv.o ucnv_err.o ucnv_ext.o ucnv_io.o ucnvlat1.o \
|
||||
ucnv_u7.o ucnv_u8.o ucnv_u16.o ucnv_u32.o ucnvscsu.o ucnvbocu.o \
|
||||
ucnvmbcs.o ucnv2022.o ucnvhz.o ucnv_lmb.o ucnvisci.o \
|
||||
unistr.o unistr_case.o unistr_cnv.o unistr_props.o \
|
||||
utf_impl.o ustring.o ustr_cnv.o ustrcase.o cstring.o ustrfmt.o ustrtrns.o \
|
||||
utf_impl.o ustring.o ustr_cnv.o ustrcase.o cstring.o ustrfmt.o ustrtrns.o ustr_wcs.o \
|
||||
normlzr.o unorm.o unorm_it.o chariter.o schriter.o uchriter.o uiter.o \
|
||||
uchar.o uprops.o ucase.o propname.o ubidi.o ubidiwrt.o ubidiln.o ushape.o unames.o \
|
||||
ucln_cmn.o uscript.o usc_impl.o uvector.o ustack.o uvectr32.o ucmp8.o \
|
||||
|
@ -1516,6 +1516,10 @@ SOURCE=.\ucnv_lmb.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\ucnv_set.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\ucnv_u16.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
@ -2911,6 +2915,10 @@ InputPath=.\unicode\uset.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\uset_imp.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\uset_props.cpp
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
@ -3458,6 +3466,10 @@ SOURCE=.\ustr_imp.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\ustr_wcs.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\ustrcase.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
@ -755,6 +755,9 @@
|
||||
<File
|
||||
RelativePath=".\ucnv.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\ucnv_set.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\unicode\ucnv.h">
|
||||
<FileConfiguration
|
||||
@ -1519,6 +1522,9 @@
|
||||
RelativePath=".\uset.cpp">
|
||||
</File>
|
||||
<File
|
||||
l RelativePath=".\uset_imp.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\uset_props.cpp">
|
||||
</File>
|
||||
<File
|
||||
@ -1798,6 +1804,9 @@
|
||||
<File
|
||||
RelativePath=".\ustr_imp.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\ustr_wcs.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\ustrcase.c">
|
||||
</File>
|
||||
|
@ -340,15 +340,15 @@ ucase_swap(const UDataSwapper *ds,
|
||||
static UBool U_CALLCONV
|
||||
_enumPropertyStartsRange(const void *context, UChar32 start, UChar32 limit, uint32_t value) {
|
||||
/* add the start code point to the USet */
|
||||
uset_add((USet *)context, start);
|
||||
USetAdder *sa=(USetAdder *)context;
|
||||
sa->add(sa->set, start);
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
/* TODO define/use USetAdder */
|
||||
U_CAPI void U_EXPORT2
|
||||
ucase_addPropertyStarts(const UCaseProps *csp, USet *set, UErrorCode *pErrorCode) {
|
||||
ucase_addPropertyStarts(const UCaseProps *csp, USetAdder *sa, UErrorCode *pErrorCode) {
|
||||
/* add the start code point of each same-value range of the trie */
|
||||
utrie_enum(&csp->trie, NULL, _enumPropertyStartsRange, set);
|
||||
utrie_enum(&csp->trie, NULL, _enumPropertyStartsRange, sa);
|
||||
|
||||
/* add code points with hardcoded properties, plus the ones following them */
|
||||
|
||||
|
@ -21,6 +21,7 @@
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uset.h"
|
||||
#include "uset_imp.h"
|
||||
#include "udataswp.h"
|
||||
|
||||
U_CDECL_BEGIN
|
||||
@ -53,7 +54,7 @@ ucase_swap(const UDataSwapper *ds,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
ucase_addPropertyStarts(const UCaseProps *csp, USet *set, UErrorCode *pErrorCode);
|
||||
ucase_addPropertyStarts(const UCaseProps *csp, USetAdder *sa, UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Bit mask for getting just the options from a string compare options word
|
||||
|
@ -982,14 +982,15 @@ uprv_getMaxValues(int32_t column) {
|
||||
static UBool U_CALLCONV
|
||||
_enumPropertyStartsRange(const void *context, UChar32 start, UChar32 limit, uint32_t value) {
|
||||
/* add the start code point to the USet */
|
||||
uset_add((USet *)context, start);
|
||||
USetAdder *sa=(USetAdder *)context;
|
||||
sa->add(sa->set, start);
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
#define USET_ADD_CP_AND_NEXT(set, cp) uset_add(set, cp); uset_add(set, cp+1)
|
||||
#define USET_ADD_CP_AND_NEXT(sa, cp) sa->add(sa->set, cp); sa->add(sa->set, cp+1)
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
uchar_addPropertyStarts(USet *set, UErrorCode *pErrorCode) {
|
||||
uchar_addPropertyStarts(USetAdder *sa, UErrorCode *pErrorCode) {
|
||||
UChar32 c;
|
||||
int32_t value, value2;
|
||||
|
||||
@ -999,62 +1000,62 @@ uchar_addPropertyStarts(USet *set, UErrorCode *pErrorCode) {
|
||||
}
|
||||
|
||||
/* add the start code point of each same-value range of each trie */
|
||||
utrie_enum(&propsTrie, NULL, _enumPropertyStartsRange, set);
|
||||
utrie_enum(&propsVectorsTrie, NULL, _enumPropertyStartsRange, set);
|
||||
utrie_enum(&propsTrie, NULL, _enumPropertyStartsRange, sa);
|
||||
utrie_enum(&propsVectorsTrie, NULL, _enumPropertyStartsRange, sa);
|
||||
|
||||
/* add code points with hardcoded properties, plus the ones following them */
|
||||
|
||||
/* add for IS_THAT_CONTROL_SPACE() */
|
||||
uset_add(set, TAB); /* range TAB..CR */
|
||||
uset_add(set, CR+1);
|
||||
uset_add(set, 0x1c);
|
||||
uset_add(set, 0x1f+1);
|
||||
USET_ADD_CP_AND_NEXT(set, NL);
|
||||
sa->add(sa->set, TAB); /* range TAB..CR */
|
||||
sa->add(sa->set, CR+1);
|
||||
sa->add(sa->set, 0x1c);
|
||||
sa->add(sa->set, 0x1f+1);
|
||||
USET_ADD_CP_AND_NEXT(sa, NL);
|
||||
|
||||
/* add for u_isIDIgnorable() what was not added above */
|
||||
uset_add(set, DEL); /* range DEL..NBSP-1, NBSP added below */
|
||||
uset_add(set, HAIRSP);
|
||||
uset_add(set, RLM+1);
|
||||
uset_add(set, INHSWAP);
|
||||
uset_add(set, NOMDIG+1);
|
||||
USET_ADD_CP_AND_NEXT(set, ZWNBSP);
|
||||
sa->add(sa->set, DEL); /* range DEL..NBSP-1, NBSP added below */
|
||||
sa->add(sa->set, HAIRSP);
|
||||
sa->add(sa->set, RLM+1);
|
||||
sa->add(sa->set, INHSWAP);
|
||||
sa->add(sa->set, NOMDIG+1);
|
||||
USET_ADD_CP_AND_NEXT(sa, ZWNBSP);
|
||||
|
||||
/* add no-break spaces for u_isWhitespace() what was not added above */
|
||||
USET_ADD_CP_AND_NEXT(set, NBSP);
|
||||
USET_ADD_CP_AND_NEXT(set, FIGURESP);
|
||||
USET_ADD_CP_AND_NEXT(set, NNBSP);
|
||||
USET_ADD_CP_AND_NEXT(sa, NBSP);
|
||||
USET_ADD_CP_AND_NEXT(sa, FIGURESP);
|
||||
USET_ADD_CP_AND_NEXT(sa, NNBSP);
|
||||
|
||||
/* add for u_charDigitValue() */
|
||||
USET_ADD_CP_AND_NEXT(set, 0x3007);
|
||||
USET_ADD_CP_AND_NEXT(set, 0x4e00);
|
||||
USET_ADD_CP_AND_NEXT(set, 0x4e8c);
|
||||
USET_ADD_CP_AND_NEXT(set, 0x4e09);
|
||||
USET_ADD_CP_AND_NEXT(set, 0x56db);
|
||||
USET_ADD_CP_AND_NEXT(set, 0x4e94);
|
||||
USET_ADD_CP_AND_NEXT(set, 0x516d);
|
||||
USET_ADD_CP_AND_NEXT(set, 0x4e03);
|
||||
USET_ADD_CP_AND_NEXT(set, 0x516b);
|
||||
USET_ADD_CP_AND_NEXT(set, 0x4e5d);
|
||||
USET_ADD_CP_AND_NEXT(sa, 0x3007);
|
||||
USET_ADD_CP_AND_NEXT(sa, 0x4e00);
|
||||
USET_ADD_CP_AND_NEXT(sa, 0x4e8c);
|
||||
USET_ADD_CP_AND_NEXT(sa, 0x4e09);
|
||||
USET_ADD_CP_AND_NEXT(sa, 0x56db);
|
||||
USET_ADD_CP_AND_NEXT(sa, 0x4e94);
|
||||
USET_ADD_CP_AND_NEXT(sa, 0x516d);
|
||||
USET_ADD_CP_AND_NEXT(sa, 0x4e03);
|
||||
USET_ADD_CP_AND_NEXT(sa, 0x516b);
|
||||
USET_ADD_CP_AND_NEXT(sa, 0x4e5d);
|
||||
|
||||
/* add for u_digit() */
|
||||
uset_add(set, U_a);
|
||||
uset_add(set, U_z+1);
|
||||
uset_add(set, U_A);
|
||||
uset_add(set, U_Z+1);
|
||||
sa->add(sa->set, U_a);
|
||||
sa->add(sa->set, U_z+1);
|
||||
sa->add(sa->set, U_A);
|
||||
sa->add(sa->set, U_Z+1);
|
||||
|
||||
/* add for UCHAR_DEFAULT_IGNORABLE_CODE_POINT what was not added above */
|
||||
uset_add(set, WJ); /* range WJ..NOMDIG */
|
||||
uset_add(set, 0xfff0);
|
||||
uset_add(set, 0xfffb+1);
|
||||
uset_add(set, 0xe0000);
|
||||
uset_add(set, 0xe0fff+1);
|
||||
sa->add(sa->set, WJ); /* range WJ..NOMDIG */
|
||||
sa->add(sa->set, 0xfff0);
|
||||
sa->add(sa->set, 0xfffb+1);
|
||||
sa->add(sa->set, 0xe0000);
|
||||
sa->add(sa->set, 0xe0fff+1);
|
||||
|
||||
/* add for UCHAR_GRAPHEME_BASE and others */
|
||||
USET_ADD_CP_AND_NEXT(set, CGJ);
|
||||
USET_ADD_CP_AND_NEXT(sa, CGJ);
|
||||
|
||||
/* add for UCHAR_JOINING_TYPE */
|
||||
uset_add(set, ZWNJ); /* range ZWNJ..ZWJ */
|
||||
uset_add(set, ZWJ+1);
|
||||
sa->add(sa->set, ZWNJ); /* range ZWNJ..ZWJ */
|
||||
sa->add(sa->set, ZWJ+1);
|
||||
|
||||
/*
|
||||
* Add Jamo type boundaries for UCHAR_HANGUL_SYLLABLE_TYPE.
|
||||
@ -1064,33 +1065,33 @@ uchar_addPropertyStarts(USet *set, UErrorCode *pErrorCode) {
|
||||
* at the end of the per-Jamo-block assignments in Unicode 4 or earlier.
|
||||
* (These have not changed since Unicode 2.)
|
||||
*/
|
||||
uset_add(set, 0x1100);
|
||||
sa->add(sa->set, 0x1100);
|
||||
value=U_HST_LEADING_JAMO;
|
||||
for(c=0x115a; c<=0x115f; ++c) {
|
||||
value2=u_getIntPropertyValue(c, UCHAR_HANGUL_SYLLABLE_TYPE);
|
||||
if(value!=value2) {
|
||||
value=value2;
|
||||
uset_add(set, c);
|
||||
sa->add(sa->set, c);
|
||||
}
|
||||
}
|
||||
|
||||
uset_add(set, 0x1160);
|
||||
sa->add(sa->set, 0x1160);
|
||||
value=U_HST_VOWEL_JAMO;
|
||||
for(c=0x11a3; c<=0x11a7; ++c) {
|
||||
value2=u_getIntPropertyValue(c, UCHAR_HANGUL_SYLLABLE_TYPE);
|
||||
if(value!=value2) {
|
||||
value=value2;
|
||||
uset_add(set, c);
|
||||
sa->add(sa->set, c);
|
||||
}
|
||||
}
|
||||
|
||||
uset_add(set, 0x11a8);
|
||||
sa->add(sa->set, 0x11a8);
|
||||
value=U_HST_TRAILING_JAMO;
|
||||
for(c=0x11fa; c<=0x11ff; ++c) {
|
||||
value2=u_getIntPropertyValue(c, UCHAR_HANGUL_SYLLABLE_TYPE);
|
||||
if(value!=value2) {
|
||||
value=value2;
|
||||
uset_add(set, c);
|
||||
sa->add(sa->set, c);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -626,33 +626,6 @@ ucnv_getPlatform (const UConverter * converter,
|
||||
return (UConverterPlatform)converter->sharedData->staticData->platform;
|
||||
}
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
ucnv_getUnicodeSet(const UConverter *cnv,
|
||||
USet *setFillIn,
|
||||
UConverterUnicodeSet whichSet,
|
||||
UErrorCode *pErrorCode) {
|
||||
/* argument checking */
|
||||
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
|
||||
return;
|
||||
}
|
||||
if(cnv==NULL || setFillIn==NULL || whichSet<UCNV_ROUNDTRIP_SET || UCNV_SET_COUNT<=whichSet) {
|
||||
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return;
|
||||
}
|
||||
|
||||
/* does this converter support this function? */
|
||||
if(cnv->sharedData->impl->getUnicodeSet==NULL) {
|
||||
*pErrorCode=U_UNSUPPORTED_ERROR;
|
||||
return;
|
||||
}
|
||||
|
||||
/* empty the set */
|
||||
uset_clear(setFillIn);
|
||||
|
||||
/* call the converter to add the code points it supports */
|
||||
cnv->sharedData->impl->getUnicodeSet(cnv, setFillIn, whichSet, pErrorCode);
|
||||
}
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
ucnv_getToUCallBack (const UConverter * converter,
|
||||
UConverterToUCallback *action,
|
||||
|
@ -2985,7 +2985,7 @@ _ISO_2022_SafeClone(
|
||||
|
||||
static void
|
||||
_ISO_2022_GetUnicodeSet(const UConverter *cnv,
|
||||
USet *set,
|
||||
USetAdder *sa,
|
||||
UConverterUnicodeSet which,
|
||||
UErrorCode *pErrorCode)
|
||||
{
|
||||
@ -2998,8 +2998,8 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,
|
||||
#ifdef U_ENABLE_GENERIC_ISO_2022
|
||||
if (cnv->sharedData == &_ISO2022Data) {
|
||||
/* We use UTF-8 in this case */
|
||||
uset_addRange(set, 0, 0xd7FF);
|
||||
uset_addRange(set, 0xE000, 0x10FFFF);
|
||||
sa->addRange(sa->set, 0, 0xd7FF);
|
||||
sa->addRange(sa->set, 0xE000, 0x10FFFF);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
@ -3011,24 +3011,25 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,
|
||||
case 'j':
|
||||
if(jpCharsetMasks[cnvData->version]&CSM(ISO8859_1)) {
|
||||
/* include Latin-1 for some variants of JP */
|
||||
uset_addRange(set, 0, 0xff);
|
||||
sa->addRange(sa->set, 0, 0xff);
|
||||
} else {
|
||||
/* include ASCII for JP */
|
||||
uset_addRange(set, 0, 0x7f);
|
||||
sa->addRange(sa->set, 0, 0x7f);
|
||||
}
|
||||
if(jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT)) {
|
||||
/* include half-width Katakana for JP */
|
||||
uset_addRange(set, 0xff61, 0xff9f);
|
||||
sa->addRange(sa->set, 0xff61, 0xff9f);
|
||||
}
|
||||
break;
|
||||
case 'c':
|
||||
case 'z':
|
||||
/* include ASCII for CN */
|
||||
uset_addRange(set, 0, 0x7f);
|
||||
sa->addRange(sa->set, 0, 0x7f);
|
||||
break;
|
||||
case 'k':
|
||||
/* there is only one converter for KR, and it is not in the myConverterArray[] */
|
||||
ucnv_getUnicodeSet(cnvData->currentConverter, set, which, pErrorCode);
|
||||
cnvData->currentConverter->sharedData->impl->getUnicodeSet(
|
||||
cnvData->currentConverter, sa, which, pErrorCode);
|
||||
return;
|
||||
default:
|
||||
break;
|
||||
@ -3049,11 +3050,11 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,
|
||||
/* special handling for non-EXT ISO-2022-CN: add only code points for CNS planes 1 and 2 */
|
||||
_MBCSGetUnicodeSetForBytes(
|
||||
cnvData->myConverterArray[i],
|
||||
set, UCNV_ROUNDTRIP_SET,
|
||||
sa, UCNV_ROUNDTRIP_SET,
|
||||
0, 0x81, 0x82,
|
||||
pErrorCode);
|
||||
} else {
|
||||
_MBCSGetUnicodeSetForUnicode(cnvData->myConverterArray[i], set, which, pErrorCode);
|
||||
_MBCSGetUnicodeSetForUnicode(cnvData->myConverterArray[i], sa, which, pErrorCode);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -28,19 +28,19 @@
|
||||
|
||||
U_CFUNC void
|
||||
ucnv_getCompleteUnicodeSet(const UConverter *cnv,
|
||||
USet *set,
|
||||
USetAdder *sa,
|
||||
UConverterUnicodeSet which,
|
||||
UErrorCode *pErrorCode) {
|
||||
uset_addRange(set, 0, 0x10ffff);
|
||||
sa->addRange(sa->set, 0, 0x10ffff);
|
||||
}
|
||||
|
||||
U_CFUNC void
|
||||
ucnv_getNonSurrogateUnicodeSet(const UConverter *cnv,
|
||||
USet *set,
|
||||
USetAdder *sa,
|
||||
UConverterUnicodeSet which,
|
||||
UErrorCode *pErrorCode) {
|
||||
uset_addRange(set, 0, 0xd7ff);
|
||||
uset_addRange(set, 0xe000, 0x10ffff);
|
||||
sa->addRange(sa->set, 0, 0xd7ff);
|
||||
sa->addRange(sa->set, 0xe000, 0x10ffff);
|
||||
}
|
||||
|
||||
U_CFUNC void
|
||||
|
@ -24,6 +24,8 @@
|
||||
|
||||
#include "unicode/ucnv.h"
|
||||
#include "unicode/ucnv_err.h"
|
||||
#include "unicode/uset.h"
|
||||
#include "uset_imp.h"
|
||||
|
||||
U_CDECL_BEGIN
|
||||
|
||||
@ -169,7 +171,7 @@ typedef UConverter * (*UConverterSafeClone) (const UConverter *cnv,
|
||||
* For more documentation, see ucnv_getUnicodeSet() in ucnv.h.
|
||||
*/
|
||||
typedef void (*UConverterGetUnicodeSet) (const UConverter *cnv,
|
||||
USet *set,
|
||||
USetAdder *sa,
|
||||
UConverterUnicodeSet which,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
@ -244,13 +246,13 @@ U_CDECL_END
|
||||
|
||||
U_CFUNC void
|
||||
ucnv_getCompleteUnicodeSet(const UConverter *cnv,
|
||||
USet *set,
|
||||
USetAdder *sa,
|
||||
UConverterUnicodeSet which,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
U_CFUNC void
|
||||
ucnv_getNonSurrogateUnicodeSet(const UConverter *cnv,
|
||||
USet *set,
|
||||
USetAdder *sa,
|
||||
UConverterUnicodeSet which,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
|
@ -932,7 +932,7 @@ ucnv_extContinueMatchFromU(UConverter *cnv,
|
||||
static void
|
||||
ucnv_extGetUnicodeSetString(const UConverterSharedData *sharedData,
|
||||
const int32_t *cx,
|
||||
USet *set,
|
||||
USetAdder *sa,
|
||||
UConverterUnicodeSet which,
|
||||
int32_t minLength,
|
||||
UChar32 c,
|
||||
@ -958,10 +958,10 @@ ucnv_extGetUnicodeSetString(const UConverterSharedData *sharedData,
|
||||
) {
|
||||
if(c>=0) {
|
||||
/* add the initial code point */
|
||||
uset_add(set, c);
|
||||
sa->add(sa->set, c);
|
||||
} else {
|
||||
/* add the string so far */
|
||||
uset_addString(set, s, length);
|
||||
sa->addString(sa->set, s, length);
|
||||
}
|
||||
}
|
||||
|
||||
@ -974,7 +974,7 @@ ucnv_extGetUnicodeSetString(const UConverterSharedData *sharedData,
|
||||
/* no mapping, do nothing */
|
||||
} else if(UCNV_EXT_FROM_U_IS_PARTIAL(value)) {
|
||||
ucnv_extGetUnicodeSetString(
|
||||
sharedData, cx, set, which, minLength,
|
||||
sharedData, cx, sa, which, minLength,
|
||||
U_SENTINEL, s, length+1,
|
||||
(int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value),
|
||||
pErrorCode);
|
||||
@ -982,14 +982,14 @@ ucnv_extGetUnicodeSetString(const UConverterSharedData *sharedData,
|
||||
UCNV_EXT_FROM_U_ROUNDTRIP_FLAG) &&
|
||||
UCNV_EXT_FROM_U_GET_LENGTH(value)>=minLength
|
||||
) {
|
||||
uset_addString(set, s, length+1);
|
||||
sa->addString(sa->set, s, length+1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
U_CFUNC void
|
||||
ucnv_extGetUnicodeSet(const UConverterSharedData *sharedData,
|
||||
USet *set,
|
||||
USetAdder *sa,
|
||||
UConverterUnicodeSet which,
|
||||
UErrorCode *pErrorCode) {
|
||||
const int32_t *cx;
|
||||
@ -1051,7 +1051,7 @@ ucnv_extGetUnicodeSet(const UConverterSharedData *sharedData,
|
||||
length=0;
|
||||
U16_APPEND_UNSAFE(s, length, c);
|
||||
ucnv_extGetUnicodeSetString(
|
||||
sharedData, cx, set, which, minLength,
|
||||
sharedData, cx, sa, which, minLength,
|
||||
c, s, length,
|
||||
(int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value),
|
||||
pErrorCode);
|
||||
@ -1059,7 +1059,7 @@ ucnv_extGetUnicodeSet(const UConverterSharedData *sharedData,
|
||||
UCNV_EXT_FROM_U_ROUNDTRIP_FLAG) &&
|
||||
UCNV_EXT_FROM_U_GET_LENGTH(value)>=minLength
|
||||
) {
|
||||
uset_add(set, c);
|
||||
sa->add(sa->set, c);
|
||||
}
|
||||
} while((++c&0xf)!=0);
|
||||
} else {
|
||||
|
@ -384,7 +384,7 @@ ucnv_extContinueMatchFromU(UConverter *cnv,
|
||||
|
||||
U_CFUNC void
|
||||
ucnv_extGetUnicodeSet(const UConverterSharedData *sharedData,
|
||||
USet *set,
|
||||
USetAdder *sa,
|
||||
UConverterUnicodeSet which,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
|
@ -664,12 +664,12 @@ _LMBCSSafeClone(const UConverter *cnv,
|
||||
|
||||
static void
|
||||
_LMBCSGetUnicodeSet(const UConverter *cnv,
|
||||
USet *set,
|
||||
USetAdder *sa,
|
||||
UConverterUnicodeSet which,
|
||||
UErrorCode *pErrorCode) {
|
||||
/* all but U+F6xx, see LMBCS explanation above (search for F6xx) */
|
||||
uset_addRange(set, 0, 0xf5ff);
|
||||
uset_addRange(set, 0xf700, 0x10ffff);
|
||||
sa->addRange(sa->set, 0, 0xf5ff);
|
||||
sa->addRange(sa->set, 0xf700, 0x10ffff);
|
||||
}
|
||||
|
||||
/*
|
||||
|
62
icu4c/source/common/ucnv_set.c
Normal file
62
icu4c/source/common/ucnv_set.c
Normal file
@ -0,0 +1,62 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2003-2004, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: ucnv_set.c
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2004sep07
|
||||
* created by: Markus W. Scherer
|
||||
*
|
||||
* Conversion API functions using USet (ucnv_getUnicodeSet())
|
||||
* moved here from ucnv.c for removing the dependency of other ucnv_
|
||||
* implementation functions on the USet implementation.
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uset.h"
|
||||
#include "unicode/ucnv.h"
|
||||
#include "ucnv_bld.h"
|
||||
#include "uset_imp.h"
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
ucnv_getUnicodeSet(const UConverter *cnv,
|
||||
USet *setFillIn,
|
||||
UConverterUnicodeSet whichSet,
|
||||
UErrorCode *pErrorCode) {
|
||||
/* argument checking */
|
||||
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
|
||||
return;
|
||||
}
|
||||
if(cnv==NULL || setFillIn==NULL || whichSet<UCNV_ROUNDTRIP_SET || UCNV_SET_COUNT<=whichSet) {
|
||||
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return;
|
||||
}
|
||||
|
||||
/* does this converter support this function? */
|
||||
if(cnv->sharedData->impl->getUnicodeSet==NULL) {
|
||||
*pErrorCode=U_UNSUPPORTED_ERROR;
|
||||
return;
|
||||
}
|
||||
|
||||
{
|
||||
USetAdder sa={
|
||||
NULL,
|
||||
uset_add,
|
||||
uset_addRange,
|
||||
uset_addString
|
||||
};
|
||||
sa.set=setFillIn;
|
||||
|
||||
/* empty the set */
|
||||
uset_clear(setFillIn);
|
||||
|
||||
/* call the converter to add the code points it supports */
|
||||
cnv->sharedData->impl->getUnicodeSet(cnv, &sa, whichSet, pErrorCode);
|
||||
}
|
||||
}
|
@ -510,17 +510,17 @@ _HZ_SafeClone(const UConverter *cnv,
|
||||
|
||||
static void
|
||||
_HZ_GetUnicodeSet(const UConverter *cnv,
|
||||
USet *set,
|
||||
USetAdder *sa,
|
||||
UConverterUnicodeSet which,
|
||||
UErrorCode *pErrorCode) {
|
||||
/* the tilde '~' is hardcoded in the converter */
|
||||
uset_add(set, 0x7e);
|
||||
sa->add(sa->set, 0x7e);
|
||||
|
||||
/* add all of the code points that the sub-converter handles */
|
||||
((UConverterDataHZ*)cnv->extraInfo)->
|
||||
gbConverter->sharedData->impl->
|
||||
getUnicodeSet(((UConverterDataHZ*)cnv->extraInfo)->gbConverter,
|
||||
set, which, pErrorCode);
|
||||
sa, which, pErrorCode);
|
||||
}
|
||||
|
||||
static const UConverterImpl _HZImpl={
|
||||
|
@ -1332,7 +1332,7 @@ _ISCII_SafeClone(const UConverter *cnv,
|
||||
|
||||
static void
|
||||
_ISCIIGetUnicodeSet(const UConverter *cnv,
|
||||
USet *set,
|
||||
USetAdder *sa,
|
||||
UConverterUnicodeSet which,
|
||||
UErrorCode *pErrorCode)
|
||||
{
|
||||
@ -1341,19 +1341,19 @@ _ISCIIGetUnicodeSet(const UConverter *cnv,
|
||||
|
||||
/* Since all ISCII versions allow switching to other ISCII
|
||||
scripts, we add all roundtrippable characters to this set. */
|
||||
uset_addRange(set, 0, ASCII_END);
|
||||
sa->addRange(sa->set, 0, ASCII_END);
|
||||
for (script = DEVANAGARI; script <= MALAYALAM; script++) {
|
||||
mask = (uint8_t)(lookupInitialData[script][1]);
|
||||
for (idx = 0; idx < DELTA; idx++) {
|
||||
if (validityTable[idx] & mask) {
|
||||
uset_add(set, idx + (script * DELTA) + INDIC_BLOCK_BEGIN);
|
||||
sa->add(sa->set, idx + (script * DELTA) + INDIC_BLOCK_BEGIN);
|
||||
}
|
||||
}
|
||||
}
|
||||
uset_add(set, DANDA);
|
||||
uset_add(set, DOUBLE_DANDA);
|
||||
uset_add(set, ZWNJ);
|
||||
uset_add(set, ZWJ);
|
||||
sa->add(sa->set, DANDA);
|
||||
sa->add(sa->set, DOUBLE_DANDA);
|
||||
sa->add(sa->set, ZWNJ);
|
||||
sa->add(sa->set, ZWJ);
|
||||
}
|
||||
|
||||
static const UConverterImpl _ISCIIImpl={
|
||||
|
@ -332,10 +332,10 @@ noMoreInput:
|
||||
|
||||
static void
|
||||
_Latin1GetUnicodeSet(const UConverter *cnv,
|
||||
USet *set,
|
||||
USetAdder *sa,
|
||||
UConverterUnicodeSet which,
|
||||
UErrorCode *pErrorCode) {
|
||||
uset_addRange(set, 0, 0xff);
|
||||
sa->addRange(sa->set, 0, 0xff);
|
||||
}
|
||||
|
||||
static const UConverterImpl _Latin1Impl={
|
||||
@ -534,10 +534,10 @@ _ASCIIGetNextUChar(UConverterToUnicodeArgs *pArgs,
|
||||
|
||||
static void
|
||||
_ASCIIGetUnicodeSet(const UConverter *cnv,
|
||||
USet *set,
|
||||
USetAdder *sa,
|
||||
UConverterUnicodeSet which,
|
||||
UErrorCode *pErrorCode) {
|
||||
uset_addRange(set, 0, 0x7f);
|
||||
sa->addRange(sa->set, 0, 0x7f);
|
||||
}
|
||||
|
||||
static const UConverterImpl _ASCIIImpl={
|
||||
|
@ -430,7 +430,7 @@ _MBCSSizeofFromUBytes(UConverterMBCSTable *mbcsTable) {
|
||||
static void
|
||||
_getUnicodeSetForBytes(const UConverterSharedData *sharedData,
|
||||
const int32_t (*stateTable)[256], const uint16_t *unicodeCodeUnits,
|
||||
USet *set,
|
||||
USetAdder *sa,
|
||||
UConverterUnicodeSet which,
|
||||
uint8_t state, uint32_t offset, int32_t lowByte, int32_t highByte,
|
||||
|
||||
@ -442,7 +442,7 @@ _getUnicodeSetForBytes(const UConverterSharedData *sharedData,
|
||||
if(MBCS_ENTRY_IS_TRANSITION(entry)) {
|
||||
_getUnicodeSetForBytes(
|
||||
sharedData, stateTable, unicodeCodeUnits,
|
||||
set, which,
|
||||
sa, which,
|
||||
(uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry),
|
||||
offset+MBCS_ENTRY_TRANSITION_OFFSET(entry),
|
||||
0, 0xff,
|
||||
@ -490,7 +490,7 @@ _getUnicodeSetForBytes(const UConverterSharedData *sharedData,
|
||||
}
|
||||
|
||||
if(c>=0) {
|
||||
uset_add(set, c);
|
||||
sa->add(sa->set, c);
|
||||
}
|
||||
offset=rowOffset;
|
||||
}
|
||||
@ -507,20 +507,20 @@ _getUnicodeSetForBytes(const UConverterSharedData *sharedData,
|
||||
*/
|
||||
U_CFUNC void
|
||||
_MBCSGetUnicodeSetForBytes(const UConverterSharedData *sharedData,
|
||||
USet *set,
|
||||
USetAdder *sa,
|
||||
UConverterUnicodeSet which,
|
||||
uint8_t state, int32_t lowByte, int32_t highByte,
|
||||
UErrorCode *pErrorCode) {
|
||||
_getUnicodeSetForBytes(
|
||||
sharedData, sharedData->mbcs.stateTable, sharedData->mbcs.unicodeCodeUnits,
|
||||
set, which,
|
||||
sa, which,
|
||||
state, 0, lowByte, highByte,
|
||||
pErrorCode);
|
||||
}
|
||||
|
||||
U_CFUNC void
|
||||
_MBCSGetUnicodeSetForUnicode(const UConverterSharedData *sharedData,
|
||||
USet *set,
|
||||
USetAdder *sa,
|
||||
UConverterUnicodeSet which,
|
||||
UErrorCode *pErrorCode) {
|
||||
const UConverterMBCSTable *mbcsTable;
|
||||
@ -565,7 +565,7 @@ _MBCSGetUnicodeSetForUnicode(const UConverterSharedData *sharedData,
|
||||
*/
|
||||
do {
|
||||
if(*stage3++>=0xf00) {
|
||||
uset_add(set, c);
|
||||
sa->add(sa->set, c);
|
||||
}
|
||||
} while((++c&0xf)!=0);
|
||||
} else {
|
||||
@ -605,7 +605,7 @@ _MBCSGetUnicodeSetForUnicode(const UConverterSharedData *sharedData,
|
||||
*/
|
||||
do {
|
||||
if((st3&1)!=0 && *stage3>=0x100) {
|
||||
uset_add(set, c);
|
||||
sa->add(sa->set, c);
|
||||
}
|
||||
st3>>=1;
|
||||
++stage3;
|
||||
@ -638,7 +638,7 @@ _MBCSGetUnicodeSetForUnicode(const UConverterSharedData *sharedData,
|
||||
*/
|
||||
do {
|
||||
if(st3&1) {
|
||||
uset_add(set, c);
|
||||
sa->add(sa->set, c);
|
||||
}
|
||||
st3>>=1;
|
||||
} while((++c&0xf)!=0);
|
||||
@ -652,19 +652,19 @@ _MBCSGetUnicodeSetForUnicode(const UConverterSharedData *sharedData,
|
||||
}
|
||||
}
|
||||
|
||||
ucnv_extGetUnicodeSet(sharedData, set, which, pErrorCode);
|
||||
ucnv_extGetUnicodeSet(sharedData, sa, which, pErrorCode);
|
||||
}
|
||||
|
||||
static void
|
||||
_MBCSGetUnicodeSet(const UConverter *cnv,
|
||||
USet *set,
|
||||
USetAdder *sa,
|
||||
UConverterUnicodeSet which,
|
||||
UErrorCode *pErrorCode) {
|
||||
if(cnv->options&_MBCS_OPTION_GB18030) {
|
||||
uset_addRange(set, 0, 0xd7ff);
|
||||
uset_addRange(set, 0xe000, 0x10ffff);
|
||||
sa->addRange(sa->set, 0, 0xd7ff);
|
||||
sa->addRange(sa->set, 0xe000, 0x10ffff);
|
||||
} else {
|
||||
_MBCSGetUnicodeSetForUnicode(cnv->sharedData, set, which, pErrorCode);
|
||||
_MBCSGetUnicodeSetForUnicode(cnv->sharedData, sa, which, pErrorCode);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -373,7 +373,7 @@ _MBCSToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
|
||||
*/
|
||||
U_CFUNC void
|
||||
_MBCSGetUnicodeSetForBytes(const UConverterSharedData *sharedData,
|
||||
USet *set,
|
||||
USetAdder *sa,
|
||||
UConverterUnicodeSet which,
|
||||
uint8_t state, int32_t lowByte, int32_t highByte,
|
||||
UErrorCode *pErrorCode);
|
||||
@ -388,7 +388,7 @@ _MBCSGetUnicodeSetForBytes(const UConverterSharedData *sharedData,
|
||||
*/
|
||||
U_CFUNC void
|
||||
_MBCSGetUnicodeSetForUnicode(const UConverterSharedData *sharedData,
|
||||
USet *set,
|
||||
USetAdder *sa,
|
||||
UConverterUnicodeSet which,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1999-2003, International Business Machines
|
||||
* Copyright (C) 1999-2004, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
@ -23,6 +23,7 @@
|
||||
#include "unicode/uchar.h"
|
||||
#include "unicode/udata.h"
|
||||
#include "unicode/uset.h"
|
||||
#include "uset_imp.h"
|
||||
#include "ustr_imp.h"
|
||||
#include "umutex.h"
|
||||
#include "cmemory.h"
|
||||
@ -1723,7 +1724,7 @@ uprv_getMaxISOCommentLength() {
|
||||
* @param uset USet to receive characters. Existing contents are deleted.
|
||||
*/
|
||||
static void
|
||||
charSetToUSet(uint32_t cset[8], USet* uset) {
|
||||
charSetToUSet(uint32_t cset[8], USetAdder *sa) {
|
||||
UChar us[256];
|
||||
char cs[256];
|
||||
|
||||
@ -1731,7 +1732,6 @@ charSetToUSet(uint32_t cset[8], USet* uset) {
|
||||
UErrorCode errorCode;
|
||||
|
||||
errorCode=U_ZERO_ERROR;
|
||||
uset_clear(uset);
|
||||
|
||||
if(!calcNameSetsLengths(&errorCode)) {
|
||||
return;
|
||||
@ -1751,18 +1751,18 @@ charSetToUSet(uint32_t cset[8], USet* uset) {
|
||||
/* add each UChar to the USet */
|
||||
for(i=0; i<length; ++i) {
|
||||
if(us[i]!=0 || cs[i]==0) { /* non-invariant chars become (UChar)0 */
|
||||
uset_add(uset, us[i]);
|
||||
sa->add(sa->set, us[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Fills set with characters that are used in Unicode character names.
|
||||
* @param set USet to receive characters. Existing contents are deleted.
|
||||
* @param set USet to receive characters.
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
uprv_getCharNameCharacters(USet* set) {
|
||||
charSetToUSet(gNameSet, set);
|
||||
uprv_getCharNameCharacters(USetAdder *sa) {
|
||||
charSetToUSet(gNameSet, sa);
|
||||
}
|
||||
|
||||
#if 0
|
||||
@ -1772,11 +1772,11 @@ urename.h and uprops.h changed accordingly.
|
||||
*/
|
||||
/**
|
||||
* Fills set with characters that are used in Unicode character names.
|
||||
* @param set USet to receive characters. Existing contents are deleted.
|
||||
* @param set USetAdder to receive characters.
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
uprv_getISOCommentCharacters(USet* set) {
|
||||
charSetToUSet(gISOCommentSet, set);
|
||||
uprv_getISOCommentCharacters(USetAdder *sa) {
|
||||
charSetToUSet(gISOCommentSet, sa);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -22,6 +22,8 @@
|
||||
#include "unicode/uchar.h"
|
||||
#include "unicode/uscript.h"
|
||||
#include "unicode/symtable.h"
|
||||
#include "unicode/uset.h"
|
||||
#include "uset_imp.h"
|
||||
#include "ruleiter.h"
|
||||
#include "cmemory.h"
|
||||
#include "uhash.h"
|
||||
@ -1466,14 +1468,38 @@ void UnicodeSet::applyPropertyPattern(RuleCharacterIterator& chars,
|
||||
// Inclusions list
|
||||
//----------------------------------------------------------------
|
||||
|
||||
// USetAdder implementation
|
||||
// Does not use uset.h to reduce code dependencies
|
||||
static void U_CALLCONV
|
||||
_set_add(USet *set, UChar32 c) {
|
||||
((UnicodeSet *)set)->add(c);
|
||||
}
|
||||
|
||||
static void U_CALLCONV
|
||||
_set_addRange(USet *set, UChar32 start, UChar32 end) {
|
||||
((UnicodeSet *)set)->add(start, end);
|
||||
}
|
||||
|
||||
static void U_CALLCONV
|
||||
_set_addString(USet *set, const UChar *str, int32_t length) {
|
||||
((UnicodeSet *)set)->add(UnicodeString((UBool)(length<0), str, length));
|
||||
}
|
||||
|
||||
const UnicodeSet* UnicodeSet::getInclusions(UErrorCode &status) {
|
||||
umtx_lock(NULL);
|
||||
UBool f = (INCLUSIONS == NULL);
|
||||
umtx_unlock(NULL);
|
||||
if (f) {
|
||||
UnicodeSet* incl = new UnicodeSet();
|
||||
USetAdder sa = {
|
||||
(USet *)incl,
|
||||
_set_add,
|
||||
_set_addRange,
|
||||
_set_addString
|
||||
};
|
||||
|
||||
if (incl != NULL) {
|
||||
uprv_getInclusions((USet*)incl, &status);
|
||||
uprv_getInclusions(&sa, &status);
|
||||
if (U_SUCCESS(status)) {
|
||||
umtx_lock(NULL);
|
||||
if (INCLUSIONS == NULL) {
|
||||
|
@ -262,7 +262,8 @@ isAcceptable(void * /* context */,
|
||||
static UBool U_CALLCONV
|
||||
_enumPropertyStartsRange(const void *context, UChar32 start, UChar32 /*limit*/, uint32_t /*value*/) {
|
||||
/* add the start code point to the USet */
|
||||
uset_add((USet *)context, start);
|
||||
USetAdder *sa=(USetAdder *)context;
|
||||
sa->add(sa->set, start);
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
@ -1153,7 +1154,7 @@ unorm_isNFSkippable(UChar32 c, UNormalizationMode mode) {
|
||||
}
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
unorm_addPropertyStarts(USet *set, UErrorCode *pErrorCode) {
|
||||
unorm_addPropertyStarts(USetAdder *sa, UErrorCode *pErrorCode) {
|
||||
UChar c;
|
||||
|
||||
if(!_haveData(*pErrorCode)) {
|
||||
@ -1161,18 +1162,18 @@ unorm_addPropertyStarts(USet *set, UErrorCode *pErrorCode) {
|
||||
}
|
||||
|
||||
/* add the start code point of each same-value range of each trie */
|
||||
utrie_enum(&normTrie, NULL, _enumPropertyStartsRange, set);
|
||||
utrie_enum(&fcdTrie, NULL, _enumPropertyStartsRange, set);
|
||||
utrie_enum(&normTrie, NULL, _enumPropertyStartsRange, sa);
|
||||
utrie_enum(&fcdTrie, NULL, _enumPropertyStartsRange, sa);
|
||||
if(formatVersion_2_1) {
|
||||
utrie_enum(&auxTrie, NULL, _enumPropertyStartsRange, set);
|
||||
utrie_enum(&auxTrie, NULL, _enumPropertyStartsRange, sa);
|
||||
}
|
||||
|
||||
/* add Hangul LV syllables and LV+1 because of skippables */
|
||||
for(c=HANGUL_BASE; c<HANGUL_BASE+HANGUL_COUNT; c+=JAMO_T_COUNT) {
|
||||
uset_add(set, c);
|
||||
uset_add(set, c+1);
|
||||
sa->add(sa->set, c);
|
||||
sa->add(sa->set, c+1);
|
||||
}
|
||||
uset_add(set, HANGUL_BASE+HANGUL_COUNT); /* add Hangul+1 to continue with other properties */
|
||||
sa->add(sa->set, HANGUL_BASE+HANGUL_COUNT); /* add Hangul+1 to continue with other properties */
|
||||
}
|
||||
|
||||
U_CAPI UNormalizationCheckResult U_EXPORT2
|
||||
|
@ -395,7 +395,7 @@ unorm_isNFSkippable(UChar32 c, UNormalizationMode mode);
|
||||
* @internal
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
unorm_addPropertyStarts(USet *set, UErrorCode *pErrorCode);
|
||||
unorm_addPropertyStarts(USetAdder *sa, UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Swap unorm.icu. See udataswp.h.
|
||||
|
@ -27,10 +27,6 @@
|
||||
|
||||
#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
|
||||
|
||||
#ifdef DEBUG
|
||||
#include <stdio.h>
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Get the next non-ignorable ASCII character from a property name
|
||||
* and lowercases it.
|
||||
@ -584,76 +580,14 @@ strrch(const char* source,uint32_t sourceLen,char find){
|
||||
#endif
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
uprv_getInclusions(USet* set, UErrorCode *pErrorCode) {
|
||||
uprv_getInclusions(USetAdder *sa, UErrorCode *pErrorCode) {
|
||||
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
|
||||
return;
|
||||
}
|
||||
|
||||
uset_clear(set);
|
||||
|
||||
#if !UCONFIG_NO_NORMALIZATION
|
||||
unorm_addPropertyStarts(set, pErrorCode);
|
||||
#endif
|
||||
uchar_addPropertyStarts(set, pErrorCode);
|
||||
ucase_addPropertyStarts(uchar_getCaseProps(pErrorCode), set, pErrorCode);
|
||||
|
||||
#ifdef DEBUG
|
||||
{
|
||||
UChar* result=NULL;
|
||||
int32_t resultCapacity=0;
|
||||
int32_t bufLen = uset_toPattern(set,result,resultCapacity,TRUE,pErrorCode);
|
||||
char* resultChars = NULL;
|
||||
if(*pErrorCode == U_BUFFER_OVERFLOW_ERROR){
|
||||
uint32_t len = 0, add=0;
|
||||
char *buf=NULL, *current = NULL;
|
||||
*pErrorCode = U_ZERO_ERROR;
|
||||
resultCapacity = bufLen;
|
||||
result = (UChar*) uprv_malloc(resultCapacity * U_SIZEOF_UCHAR);
|
||||
bufLen = uset_toPattern(set,result,resultCapacity,TRUE,pErrorCode);
|
||||
resultChars = (char*) uprv_malloc(len+1);
|
||||
u_UCharsToChars(result,resultChars,bufLen);
|
||||
resultChars[bufLen] = 0;
|
||||
buf = resultChars;
|
||||
/*printf(resultChars);*/
|
||||
while(len < bufLen){
|
||||
add = 70-5/* for ", +\n */;
|
||||
current = buf +len;
|
||||
if (add < (bufLen-len)) {
|
||||
uint32_t index = strrch(current,add,'\\');
|
||||
if (index > add) {
|
||||
index = add;
|
||||
} else {
|
||||
int32_t num =index-1;
|
||||
uint32_t seqLen;
|
||||
while(num>0){
|
||||
if(current[num]=='\\'){
|
||||
num--;
|
||||
}else{
|
||||
break;
|
||||
}
|
||||
}
|
||||
if ((index-num)%2==0) {
|
||||
index--;
|
||||
}
|
||||
seqLen = (current[index+1]=='u') ? 6 : 2;
|
||||
if ((add-index) < seqLen) {
|
||||
add = index + seqLen;
|
||||
}
|
||||
}
|
||||
}
|
||||
fwrite("\"",1,1,stdout);
|
||||
if(len+add<bufLen){
|
||||
fwrite(current,1,add,stdout);
|
||||
fwrite("\" +\n",1,4,stdout);
|
||||
}else{
|
||||
fwrite(current,1,bufLen-len,stdout);
|
||||
}
|
||||
len+=add;
|
||||
}
|
||||
|
||||
}
|
||||
uprv_free(result);
|
||||
uprv_free(resultChars);
|
||||
}
|
||||
unorm_addPropertyStarts(sa, pErrorCode);
|
||||
#endif
|
||||
uchar_addPropertyStarts(sa, pErrorCode);
|
||||
ucase_addPropertyStarts(uchar_getCaseProps(pErrorCode), sa, pErrorCode);
|
||||
}
|
||||
|
@ -22,6 +22,7 @@
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uset.h"
|
||||
#include "uset_imp.h"
|
||||
#include "ucase.h"
|
||||
#include "udataswp.h"
|
||||
|
||||
@ -348,10 +349,10 @@ uprv_getMaxISOCommentLength();
|
||||
* Fills set with characters that are used in Unicode character names.
|
||||
* Includes all characters that are used in regular/Unicode 1.0/extended names.
|
||||
* Just empties the set if no character names are available.
|
||||
* @param set USet to receive characters. Existing contents are deleted.
|
||||
* @param sa USetAdder to receive characters.
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
uprv_getCharNameCharacters(USet* set);
|
||||
uprv_getCharNameCharacters(USetAdder *sa);
|
||||
|
||||
#if 0
|
||||
/*
|
||||
@ -361,10 +362,10 @@ urename.h and unames.c changed accordingly.
|
||||
/**
|
||||
* Fills set with characters that are used in Unicode character names.
|
||||
* Just empties the set if no ISO comments are available.
|
||||
* @param set USet to receive characters. Existing contents are deleted.
|
||||
* @param sa USetAdder to receive characters.
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
uprv_getISOCommentCharacters(USet* set);
|
||||
uprv_getISOCommentCharacters(USetAdder *sa);
|
||||
*/
|
||||
#endif
|
||||
|
||||
@ -374,18 +375,18 @@ uprv_getISOCommentCharacters(USet* set);
|
||||
* @internal
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
uchar_addPropertyStarts(USet *set, UErrorCode *pErrorCode);
|
||||
uchar_addPropertyStarts(USetAdder *sa, UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Return a set of characters for property enumeration.
|
||||
* For each two consecutive characters (start, limit) in the set,
|
||||
* all of the properties for start..limit-1 are all the same.
|
||||
*
|
||||
* @param set USet to receive result. Existing contents are lost.
|
||||
* @param sa USetAdder to receive result. Existing contents are lost.
|
||||
* @internal
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
uprv_getInclusions(USet* set, UErrorCode *pErrorCode);
|
||||
uprv_getInclusions(USetAdder *sa, UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Swap the ICU Unicode properties file. See uchar.c.
|
||||
|
51
icu4c/source/common/uset_imp.h
Normal file
51
icu4c/source/common/uset_imp.h
Normal file
@ -0,0 +1,51 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2004, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: uset_imp.h
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2004sep07
|
||||
* created by: Markus W. Scherer
|
||||
*
|
||||
* Internal USet definitions.
|
||||
*/
|
||||
|
||||
#ifndef __USET_IMP_H__
|
||||
#define __USET_IMP_H__
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uset.h"
|
||||
|
||||
U_CDECL_BEGIN
|
||||
|
||||
typedef void U_CALLCONV
|
||||
USetAdd(USet *set, UChar32 c);
|
||||
|
||||
typedef void U_CALLCONV
|
||||
USetAddRange(USet *set, UChar32 start, UChar32 end);
|
||||
|
||||
typedef void U_CALLCONV
|
||||
USetAddString(USet *set, const UChar *str, int32_t length);
|
||||
|
||||
/**
|
||||
* Interface for adding items to a USet, to keep low-level code from
|
||||
* statically depending on the USet implementation.
|
||||
* Calls will look like sa->add(sa->set, c);
|
||||
*/
|
||||
struct USetAdder {
|
||||
USet *set;
|
||||
USetAdd *add;
|
||||
USetAddRange *addRange;
|
||||
USetAddString *addString;
|
||||
};
|
||||
typedef struct USetAdder USetAdder;
|
||||
|
||||
U_CDECL_END
|
||||
|
||||
#endif
|
Loading…
Reference in New Issue
Block a user