ICU-4078 use USetAdder interface to remove dependencies of low-level code on the USet/UnicodeSet implementation

X-SVN-Rev: 16265
This commit is contained in:
Markus Scherer 2004-09-07 17:59:53 +00:00
parent 02ef1c94e9
commit 8a3a93deed
26 changed files with 312 additions and 238 deletions

View File

@ -63,11 +63,11 @@ udata.o ucmndata.o udatamem.o udataswp.o umapfile.o ucol_swp.o \
uresbund.o ures_cnv.o uresdata.o resbund.o resbund_cnv.o \
ucat.o locmap.o uloc.o locid.o \
uhash.o uhash_us.o \
ucnv.o ucnv_bld.o ucnv_cb.o ucnv_cnv.o ucnv_err.o ucnv_ext.o ucnv_io.o ucnvlat1.o \
ucnv.o ucnv_set.o ucnv_bld.o ucnv_cb.o ucnv_cnv.o ucnv_err.o ucnv_ext.o ucnv_io.o ucnvlat1.o \
ucnv_u7.o ucnv_u8.o ucnv_u16.o ucnv_u32.o ucnvscsu.o ucnvbocu.o \
ucnvmbcs.o ucnv2022.o ucnvhz.o ucnv_lmb.o ucnvisci.o \
unistr.o unistr_case.o unistr_cnv.o unistr_props.o \
utf_impl.o ustring.o ustr_cnv.o ustrcase.o cstring.o ustrfmt.o ustrtrns.o \
utf_impl.o ustring.o ustr_cnv.o ustrcase.o cstring.o ustrfmt.o ustrtrns.o ustr_wcs.o \
normlzr.o unorm.o unorm_it.o chariter.o schriter.o uchriter.o uiter.o \
uchar.o uprops.o ucase.o propname.o ubidi.o ubidiwrt.o ubidiln.o ushape.o unames.o \
ucln_cmn.o uscript.o usc_impl.o uvector.o ustack.o uvectr32.o ucmp8.o \

View File

@ -1516,6 +1516,10 @@ SOURCE=.\ucnv_lmb.c
# End Source File
# Begin Source File
SOURCE=.\ucnv_set.c
# End Source File
# Begin Source File
SOURCE=.\ucnv_u16.c
# End Source File
# Begin Source File
@ -2911,6 +2915,10 @@ InputPath=.\unicode\uset.h
# End Source File
# Begin Source File
SOURCE=.\uset_imp.h
# End Source File
# Begin Source File
SOURCE=.\uset_props.cpp
# End Source File
# Begin Source File
@ -3458,6 +3466,10 @@ SOURCE=.\ustr_imp.h
# End Source File
# Begin Source File
SOURCE=.\ustr_wcs.c
# End Source File
# Begin Source File
SOURCE=.\ustrcase.c
# End Source File
# Begin Source File

View File

@ -755,6 +755,9 @@
<File
RelativePath=".\ucnv.c">
</File>
<File
RelativePath=".\ucnv_set.c">
</File>
<File
RelativePath=".\unicode\ucnv.h">
<FileConfiguration
@ -1519,6 +1522,9 @@
RelativePath=".\uset.cpp">
</File>
<File
l RelativePath=".\uset_imp.h">
</File>
<File
RelativePath=".\uset_props.cpp">
</File>
<File
@ -1798,6 +1804,9 @@
<File
RelativePath=".\ustr_imp.h">
</File>
<File
RelativePath=".\ustr_wcs.c">
</File>
<File
RelativePath=".\ustrcase.c">
</File>

View File

@ -340,15 +340,15 @@ ucase_swap(const UDataSwapper *ds,
static UBool U_CALLCONV
_enumPropertyStartsRange(const void *context, UChar32 start, UChar32 limit, uint32_t value) {
/* add the start code point to the USet */
uset_add((USet *)context, start);
USetAdder *sa=(USetAdder *)context;
sa->add(sa->set, start);
return TRUE;
}
/* TODO define/use USetAdder */
U_CAPI void U_EXPORT2
ucase_addPropertyStarts(const UCaseProps *csp, USet *set, UErrorCode *pErrorCode) {
ucase_addPropertyStarts(const UCaseProps *csp, USetAdder *sa, UErrorCode *pErrorCode) {
/* add the start code point of each same-value range of the trie */
utrie_enum(&csp->trie, NULL, _enumPropertyStartsRange, set);
utrie_enum(&csp->trie, NULL, _enumPropertyStartsRange, sa);
/* add code points with hardcoded properties, plus the ones following them */

View File

@ -21,6 +21,7 @@
#include "unicode/utypes.h"
#include "unicode/uset.h"
#include "uset_imp.h"
#include "udataswp.h"
U_CDECL_BEGIN
@ -53,7 +54,7 @@ ucase_swap(const UDataSwapper *ds,
UErrorCode *pErrorCode);
U_CAPI void U_EXPORT2
ucase_addPropertyStarts(const UCaseProps *csp, USet *set, UErrorCode *pErrorCode);
ucase_addPropertyStarts(const UCaseProps *csp, USetAdder *sa, UErrorCode *pErrorCode);
/**
* Bit mask for getting just the options from a string compare options word

View File

@ -982,14 +982,15 @@ uprv_getMaxValues(int32_t column) {
static UBool U_CALLCONV
_enumPropertyStartsRange(const void *context, UChar32 start, UChar32 limit, uint32_t value) {
/* add the start code point to the USet */
uset_add((USet *)context, start);
USetAdder *sa=(USetAdder *)context;
sa->add(sa->set, start);
return TRUE;
}
#define USET_ADD_CP_AND_NEXT(set, cp) uset_add(set, cp); uset_add(set, cp+1)
#define USET_ADD_CP_AND_NEXT(sa, cp) sa->add(sa->set, cp); sa->add(sa->set, cp+1)
U_CAPI void U_EXPORT2
uchar_addPropertyStarts(USet *set, UErrorCode *pErrorCode) {
uchar_addPropertyStarts(USetAdder *sa, UErrorCode *pErrorCode) {
UChar32 c;
int32_t value, value2;
@ -999,62 +1000,62 @@ uchar_addPropertyStarts(USet *set, UErrorCode *pErrorCode) {
}
/* add the start code point of each same-value range of each trie */
utrie_enum(&propsTrie, NULL, _enumPropertyStartsRange, set);
utrie_enum(&propsVectorsTrie, NULL, _enumPropertyStartsRange, set);
utrie_enum(&propsTrie, NULL, _enumPropertyStartsRange, sa);
utrie_enum(&propsVectorsTrie, NULL, _enumPropertyStartsRange, sa);
/* add code points with hardcoded properties, plus the ones following them */
/* add for IS_THAT_CONTROL_SPACE() */
uset_add(set, TAB); /* range TAB..CR */
uset_add(set, CR+1);
uset_add(set, 0x1c);
uset_add(set, 0x1f+1);
USET_ADD_CP_AND_NEXT(set, NL);
sa->add(sa->set, TAB); /* range TAB..CR */
sa->add(sa->set, CR+1);
sa->add(sa->set, 0x1c);
sa->add(sa->set, 0x1f+1);
USET_ADD_CP_AND_NEXT(sa, NL);
/* add for u_isIDIgnorable() what was not added above */
uset_add(set, DEL); /* range DEL..NBSP-1, NBSP added below */
uset_add(set, HAIRSP);
uset_add(set, RLM+1);
uset_add(set, INHSWAP);
uset_add(set, NOMDIG+1);
USET_ADD_CP_AND_NEXT(set, ZWNBSP);
sa->add(sa->set, DEL); /* range DEL..NBSP-1, NBSP added below */
sa->add(sa->set, HAIRSP);
sa->add(sa->set, RLM+1);
sa->add(sa->set, INHSWAP);
sa->add(sa->set, NOMDIG+1);
USET_ADD_CP_AND_NEXT(sa, ZWNBSP);
/* add no-break spaces for u_isWhitespace() what was not added above */
USET_ADD_CP_AND_NEXT(set, NBSP);
USET_ADD_CP_AND_NEXT(set, FIGURESP);
USET_ADD_CP_AND_NEXT(set, NNBSP);
USET_ADD_CP_AND_NEXT(sa, NBSP);
USET_ADD_CP_AND_NEXT(sa, FIGURESP);
USET_ADD_CP_AND_NEXT(sa, NNBSP);
/* add for u_charDigitValue() */
USET_ADD_CP_AND_NEXT(set, 0x3007);
USET_ADD_CP_AND_NEXT(set, 0x4e00);
USET_ADD_CP_AND_NEXT(set, 0x4e8c);
USET_ADD_CP_AND_NEXT(set, 0x4e09);
USET_ADD_CP_AND_NEXT(set, 0x56db);
USET_ADD_CP_AND_NEXT(set, 0x4e94);
USET_ADD_CP_AND_NEXT(set, 0x516d);
USET_ADD_CP_AND_NEXT(set, 0x4e03);
USET_ADD_CP_AND_NEXT(set, 0x516b);
USET_ADD_CP_AND_NEXT(set, 0x4e5d);
USET_ADD_CP_AND_NEXT(sa, 0x3007);
USET_ADD_CP_AND_NEXT(sa, 0x4e00);
USET_ADD_CP_AND_NEXT(sa, 0x4e8c);
USET_ADD_CP_AND_NEXT(sa, 0x4e09);
USET_ADD_CP_AND_NEXT(sa, 0x56db);
USET_ADD_CP_AND_NEXT(sa, 0x4e94);
USET_ADD_CP_AND_NEXT(sa, 0x516d);
USET_ADD_CP_AND_NEXT(sa, 0x4e03);
USET_ADD_CP_AND_NEXT(sa, 0x516b);
USET_ADD_CP_AND_NEXT(sa, 0x4e5d);
/* add for u_digit() */
uset_add(set, U_a);
uset_add(set, U_z+1);
uset_add(set, U_A);
uset_add(set, U_Z+1);
sa->add(sa->set, U_a);
sa->add(sa->set, U_z+1);
sa->add(sa->set, U_A);
sa->add(sa->set, U_Z+1);
/* add for UCHAR_DEFAULT_IGNORABLE_CODE_POINT what was not added above */
uset_add(set, WJ); /* range WJ..NOMDIG */
uset_add(set, 0xfff0);
uset_add(set, 0xfffb+1);
uset_add(set, 0xe0000);
uset_add(set, 0xe0fff+1);
sa->add(sa->set, WJ); /* range WJ..NOMDIG */
sa->add(sa->set, 0xfff0);
sa->add(sa->set, 0xfffb+1);
sa->add(sa->set, 0xe0000);
sa->add(sa->set, 0xe0fff+1);
/* add for UCHAR_GRAPHEME_BASE and others */
USET_ADD_CP_AND_NEXT(set, CGJ);
USET_ADD_CP_AND_NEXT(sa, CGJ);
/* add for UCHAR_JOINING_TYPE */
uset_add(set, ZWNJ); /* range ZWNJ..ZWJ */
uset_add(set, ZWJ+1);
sa->add(sa->set, ZWNJ); /* range ZWNJ..ZWJ */
sa->add(sa->set, ZWJ+1);
/*
* Add Jamo type boundaries for UCHAR_HANGUL_SYLLABLE_TYPE.
@ -1064,33 +1065,33 @@ uchar_addPropertyStarts(USet *set, UErrorCode *pErrorCode) {
* at the end of the per-Jamo-block assignments in Unicode 4 or earlier.
* (These have not changed since Unicode 2.)
*/
uset_add(set, 0x1100);
sa->add(sa->set, 0x1100);
value=U_HST_LEADING_JAMO;
for(c=0x115a; c<=0x115f; ++c) {
value2=u_getIntPropertyValue(c, UCHAR_HANGUL_SYLLABLE_TYPE);
if(value!=value2) {
value=value2;
uset_add(set, c);
sa->add(sa->set, c);
}
}
uset_add(set, 0x1160);
sa->add(sa->set, 0x1160);
value=U_HST_VOWEL_JAMO;
for(c=0x11a3; c<=0x11a7; ++c) {
value2=u_getIntPropertyValue(c, UCHAR_HANGUL_SYLLABLE_TYPE);
if(value!=value2) {
value=value2;
uset_add(set, c);
sa->add(sa->set, c);
}
}
uset_add(set, 0x11a8);
sa->add(sa->set, 0x11a8);
value=U_HST_TRAILING_JAMO;
for(c=0x11fa; c<=0x11ff; ++c) {
value2=u_getIntPropertyValue(c, UCHAR_HANGUL_SYLLABLE_TYPE);
if(value!=value2) {
value=value2;
uset_add(set, c);
sa->add(sa->set, c);
}
}
}

View File

@ -626,33 +626,6 @@ ucnv_getPlatform (const UConverter * converter,
return (UConverterPlatform)converter->sharedData->staticData->platform;
}
U_CAPI void U_EXPORT2
ucnv_getUnicodeSet(const UConverter *cnv,
USet *setFillIn,
UConverterUnicodeSet whichSet,
UErrorCode *pErrorCode) {
/* argument checking */
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
return;
}
if(cnv==NULL || setFillIn==NULL || whichSet<UCNV_ROUNDTRIP_SET || UCNV_SET_COUNT<=whichSet) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return;
}
/* does this converter support this function? */
if(cnv->sharedData->impl->getUnicodeSet==NULL) {
*pErrorCode=U_UNSUPPORTED_ERROR;
return;
}
/* empty the set */
uset_clear(setFillIn);
/* call the converter to add the code points it supports */
cnv->sharedData->impl->getUnicodeSet(cnv, setFillIn, whichSet, pErrorCode);
}
U_CAPI void U_EXPORT2
ucnv_getToUCallBack (const UConverter * converter,
UConverterToUCallback *action,

View File

@ -2985,7 +2985,7 @@ _ISO_2022_SafeClone(
static void
_ISO_2022_GetUnicodeSet(const UConverter *cnv,
USet *set,
USetAdder *sa,
UConverterUnicodeSet which,
UErrorCode *pErrorCode)
{
@ -2998,8 +2998,8 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,
#ifdef U_ENABLE_GENERIC_ISO_2022
if (cnv->sharedData == &_ISO2022Data) {
/* We use UTF-8 in this case */
uset_addRange(set, 0, 0xd7FF);
uset_addRange(set, 0xE000, 0x10FFFF);
sa->addRange(sa->set, 0, 0xd7FF);
sa->addRange(sa->set, 0xE000, 0x10FFFF);
return;
}
#endif
@ -3011,24 +3011,25 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,
case 'j':
if(jpCharsetMasks[cnvData->version]&CSM(ISO8859_1)) {
/* include Latin-1 for some variants of JP */
uset_addRange(set, 0, 0xff);
sa->addRange(sa->set, 0, 0xff);
} else {
/* include ASCII for JP */
uset_addRange(set, 0, 0x7f);
sa->addRange(sa->set, 0, 0x7f);
}
if(jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT)) {
/* include half-width Katakana for JP */
uset_addRange(set, 0xff61, 0xff9f);
sa->addRange(sa->set, 0xff61, 0xff9f);
}
break;
case 'c':
case 'z':
/* include ASCII for CN */
uset_addRange(set, 0, 0x7f);
sa->addRange(sa->set, 0, 0x7f);
break;
case 'k':
/* there is only one converter for KR, and it is not in the myConverterArray[] */
ucnv_getUnicodeSet(cnvData->currentConverter, set, which, pErrorCode);
cnvData->currentConverter->sharedData->impl->getUnicodeSet(
cnvData->currentConverter, sa, which, pErrorCode);
return;
default:
break;
@ -3049,11 +3050,11 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,
/* special handling for non-EXT ISO-2022-CN: add only code points for CNS planes 1 and 2 */
_MBCSGetUnicodeSetForBytes(
cnvData->myConverterArray[i],
set, UCNV_ROUNDTRIP_SET,
sa, UCNV_ROUNDTRIP_SET,
0, 0x81, 0x82,
pErrorCode);
} else {
_MBCSGetUnicodeSetForUnicode(cnvData->myConverterArray[i], set, which, pErrorCode);
_MBCSGetUnicodeSetForUnicode(cnvData->myConverterArray[i], sa, which, pErrorCode);
}
}
}

View File

@ -28,19 +28,19 @@
U_CFUNC void
ucnv_getCompleteUnicodeSet(const UConverter *cnv,
USet *set,
USetAdder *sa,
UConverterUnicodeSet which,
UErrorCode *pErrorCode) {
uset_addRange(set, 0, 0x10ffff);
sa->addRange(sa->set, 0, 0x10ffff);
}
U_CFUNC void
ucnv_getNonSurrogateUnicodeSet(const UConverter *cnv,
USet *set,
USetAdder *sa,
UConverterUnicodeSet which,
UErrorCode *pErrorCode) {
uset_addRange(set, 0, 0xd7ff);
uset_addRange(set, 0xe000, 0x10ffff);
sa->addRange(sa->set, 0, 0xd7ff);
sa->addRange(sa->set, 0xe000, 0x10ffff);
}
U_CFUNC void

View File

@ -24,6 +24,8 @@
#include "unicode/ucnv.h"
#include "unicode/ucnv_err.h"
#include "unicode/uset.h"
#include "uset_imp.h"
U_CDECL_BEGIN
@ -169,7 +171,7 @@ typedef UConverter * (*UConverterSafeClone) (const UConverter *cnv,
* For more documentation, see ucnv_getUnicodeSet() in ucnv.h.
*/
typedef void (*UConverterGetUnicodeSet) (const UConverter *cnv,
USet *set,
USetAdder *sa,
UConverterUnicodeSet which,
UErrorCode *pErrorCode);
@ -244,13 +246,13 @@ U_CDECL_END
U_CFUNC void
ucnv_getCompleteUnicodeSet(const UConverter *cnv,
USet *set,
USetAdder *sa,
UConverterUnicodeSet which,
UErrorCode *pErrorCode);
U_CFUNC void
ucnv_getNonSurrogateUnicodeSet(const UConverter *cnv,
USet *set,
USetAdder *sa,
UConverterUnicodeSet which,
UErrorCode *pErrorCode);

View File

@ -932,7 +932,7 @@ ucnv_extContinueMatchFromU(UConverter *cnv,
static void
ucnv_extGetUnicodeSetString(const UConverterSharedData *sharedData,
const int32_t *cx,
USet *set,
USetAdder *sa,
UConverterUnicodeSet which,
int32_t minLength,
UChar32 c,
@ -958,10 +958,10 @@ ucnv_extGetUnicodeSetString(const UConverterSharedData *sharedData,
) {
if(c>=0) {
/* add the initial code point */
uset_add(set, c);
sa->add(sa->set, c);
} else {
/* add the string so far */
uset_addString(set, s, length);
sa->addString(sa->set, s, length);
}
}
@ -974,7 +974,7 @@ ucnv_extGetUnicodeSetString(const UConverterSharedData *sharedData,
/* no mapping, do nothing */
} else if(UCNV_EXT_FROM_U_IS_PARTIAL(value)) {
ucnv_extGetUnicodeSetString(
sharedData, cx, set, which, minLength,
sharedData, cx, sa, which, minLength,
U_SENTINEL, s, length+1,
(int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value),
pErrorCode);
@ -982,14 +982,14 @@ ucnv_extGetUnicodeSetString(const UConverterSharedData *sharedData,
UCNV_EXT_FROM_U_ROUNDTRIP_FLAG) &&
UCNV_EXT_FROM_U_GET_LENGTH(value)>=minLength
) {
uset_addString(set, s, length+1);
sa->addString(sa->set, s, length+1);
}
}
}
U_CFUNC void
ucnv_extGetUnicodeSet(const UConverterSharedData *sharedData,
USet *set,
USetAdder *sa,
UConverterUnicodeSet which,
UErrorCode *pErrorCode) {
const int32_t *cx;
@ -1051,7 +1051,7 @@ ucnv_extGetUnicodeSet(const UConverterSharedData *sharedData,
length=0;
U16_APPEND_UNSAFE(s, length, c);
ucnv_extGetUnicodeSetString(
sharedData, cx, set, which, minLength,
sharedData, cx, sa, which, minLength,
c, s, length,
(int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value),
pErrorCode);
@ -1059,7 +1059,7 @@ ucnv_extGetUnicodeSet(const UConverterSharedData *sharedData,
UCNV_EXT_FROM_U_ROUNDTRIP_FLAG) &&
UCNV_EXT_FROM_U_GET_LENGTH(value)>=minLength
) {
uset_add(set, c);
sa->add(sa->set, c);
}
} while((++c&0xf)!=0);
} else {

View File

@ -384,7 +384,7 @@ ucnv_extContinueMatchFromU(UConverter *cnv,
U_CFUNC void
ucnv_extGetUnicodeSet(const UConverterSharedData *sharedData,
USet *set,
USetAdder *sa,
UConverterUnicodeSet which,
UErrorCode *pErrorCode);

View File

@ -664,12 +664,12 @@ _LMBCSSafeClone(const UConverter *cnv,
static void
_LMBCSGetUnicodeSet(const UConverter *cnv,
USet *set,
USetAdder *sa,
UConverterUnicodeSet which,
UErrorCode *pErrorCode) {
/* all but U+F6xx, see LMBCS explanation above (search for F6xx) */
uset_addRange(set, 0, 0xf5ff);
uset_addRange(set, 0xf700, 0x10ffff);
sa->addRange(sa->set, 0, 0xf5ff);
sa->addRange(sa->set, 0xf700, 0x10ffff);
}
/*

View File

@ -0,0 +1,62 @@
/*
*******************************************************************************
*
* Copyright (C) 2003-2004, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: ucnv_set.c
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2004sep07
* created by: Markus W. Scherer
*
* Conversion API functions using USet (ucnv_getUnicodeSet())
* moved here from ucnv.c for removing the dependency of other ucnv_
* implementation functions on the USet implementation.
*/
#include "unicode/utypes.h"
#include "unicode/uset.h"
#include "unicode/ucnv.h"
#include "ucnv_bld.h"
#include "uset_imp.h"
U_CAPI void U_EXPORT2
ucnv_getUnicodeSet(const UConverter *cnv,
USet *setFillIn,
UConverterUnicodeSet whichSet,
UErrorCode *pErrorCode) {
/* argument checking */
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
return;
}
if(cnv==NULL || setFillIn==NULL || whichSet<UCNV_ROUNDTRIP_SET || UCNV_SET_COUNT<=whichSet) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return;
}
/* does this converter support this function? */
if(cnv->sharedData->impl->getUnicodeSet==NULL) {
*pErrorCode=U_UNSUPPORTED_ERROR;
return;
}
{
USetAdder sa={
NULL,
uset_add,
uset_addRange,
uset_addString
};
sa.set=setFillIn;
/* empty the set */
uset_clear(setFillIn);
/* call the converter to add the code points it supports */
cnv->sharedData->impl->getUnicodeSet(cnv, &sa, whichSet, pErrorCode);
}
}

View File

@ -510,17 +510,17 @@ _HZ_SafeClone(const UConverter *cnv,
static void
_HZ_GetUnicodeSet(const UConverter *cnv,
USet *set,
USetAdder *sa,
UConverterUnicodeSet which,
UErrorCode *pErrorCode) {
/* the tilde '~' is hardcoded in the converter */
uset_add(set, 0x7e);
sa->add(sa->set, 0x7e);
/* add all of the code points that the sub-converter handles */
((UConverterDataHZ*)cnv->extraInfo)->
gbConverter->sharedData->impl->
getUnicodeSet(((UConverterDataHZ*)cnv->extraInfo)->gbConverter,
set, which, pErrorCode);
sa, which, pErrorCode);
}
static const UConverterImpl _HZImpl={

View File

@ -1332,7 +1332,7 @@ _ISCII_SafeClone(const UConverter *cnv,
static void
_ISCIIGetUnicodeSet(const UConverter *cnv,
USet *set,
USetAdder *sa,
UConverterUnicodeSet which,
UErrorCode *pErrorCode)
{
@ -1341,19 +1341,19 @@ _ISCIIGetUnicodeSet(const UConverter *cnv,
/* Since all ISCII versions allow switching to other ISCII
scripts, we add all roundtrippable characters to this set. */
uset_addRange(set, 0, ASCII_END);
sa->addRange(sa->set, 0, ASCII_END);
for (script = DEVANAGARI; script <= MALAYALAM; script++) {
mask = (uint8_t)(lookupInitialData[script][1]);
for (idx = 0; idx < DELTA; idx++) {
if (validityTable[idx] & mask) {
uset_add(set, idx + (script * DELTA) + INDIC_BLOCK_BEGIN);
sa->add(sa->set, idx + (script * DELTA) + INDIC_BLOCK_BEGIN);
}
}
}
uset_add(set, DANDA);
uset_add(set, DOUBLE_DANDA);
uset_add(set, ZWNJ);
uset_add(set, ZWJ);
sa->add(sa->set, DANDA);
sa->add(sa->set, DOUBLE_DANDA);
sa->add(sa->set, ZWNJ);
sa->add(sa->set, ZWJ);
}
static const UConverterImpl _ISCIIImpl={

View File

@ -332,10 +332,10 @@ noMoreInput:
static void
_Latin1GetUnicodeSet(const UConverter *cnv,
USet *set,
USetAdder *sa,
UConverterUnicodeSet which,
UErrorCode *pErrorCode) {
uset_addRange(set, 0, 0xff);
sa->addRange(sa->set, 0, 0xff);
}
static const UConverterImpl _Latin1Impl={
@ -534,10 +534,10 @@ _ASCIIGetNextUChar(UConverterToUnicodeArgs *pArgs,
static void
_ASCIIGetUnicodeSet(const UConverter *cnv,
USet *set,
USetAdder *sa,
UConverterUnicodeSet which,
UErrorCode *pErrorCode) {
uset_addRange(set, 0, 0x7f);
sa->addRange(sa->set, 0, 0x7f);
}
static const UConverterImpl _ASCIIImpl={

View File

@ -430,7 +430,7 @@ _MBCSSizeofFromUBytes(UConverterMBCSTable *mbcsTable) {
static void
_getUnicodeSetForBytes(const UConverterSharedData *sharedData,
const int32_t (*stateTable)[256], const uint16_t *unicodeCodeUnits,
USet *set,
USetAdder *sa,
UConverterUnicodeSet which,
uint8_t state, uint32_t offset, int32_t lowByte, int32_t highByte,
@ -442,7 +442,7 @@ _getUnicodeSetForBytes(const UConverterSharedData *sharedData,
if(MBCS_ENTRY_IS_TRANSITION(entry)) {
_getUnicodeSetForBytes(
sharedData, stateTable, unicodeCodeUnits,
set, which,
sa, which,
(uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry),
offset+MBCS_ENTRY_TRANSITION_OFFSET(entry),
0, 0xff,
@ -490,7 +490,7 @@ _getUnicodeSetForBytes(const UConverterSharedData *sharedData,
}
if(c>=0) {
uset_add(set, c);
sa->add(sa->set, c);
}
offset=rowOffset;
}
@ -507,20 +507,20 @@ _getUnicodeSetForBytes(const UConverterSharedData *sharedData,
*/
U_CFUNC void
_MBCSGetUnicodeSetForBytes(const UConverterSharedData *sharedData,
USet *set,
USetAdder *sa,
UConverterUnicodeSet which,
uint8_t state, int32_t lowByte, int32_t highByte,
UErrorCode *pErrorCode) {
_getUnicodeSetForBytes(
sharedData, sharedData->mbcs.stateTable, sharedData->mbcs.unicodeCodeUnits,
set, which,
sa, which,
state, 0, lowByte, highByte,
pErrorCode);
}
U_CFUNC void
_MBCSGetUnicodeSetForUnicode(const UConverterSharedData *sharedData,
USet *set,
USetAdder *sa,
UConverterUnicodeSet which,
UErrorCode *pErrorCode) {
const UConverterMBCSTable *mbcsTable;
@ -565,7 +565,7 @@ _MBCSGetUnicodeSetForUnicode(const UConverterSharedData *sharedData,
*/
do {
if(*stage3++>=0xf00) {
uset_add(set, c);
sa->add(sa->set, c);
}
} while((++c&0xf)!=0);
} else {
@ -605,7 +605,7 @@ _MBCSGetUnicodeSetForUnicode(const UConverterSharedData *sharedData,
*/
do {
if((st3&1)!=0 && *stage3>=0x100) {
uset_add(set, c);
sa->add(sa->set, c);
}
st3>>=1;
++stage3;
@ -638,7 +638,7 @@ _MBCSGetUnicodeSetForUnicode(const UConverterSharedData *sharedData,
*/
do {
if(st3&1) {
uset_add(set, c);
sa->add(sa->set, c);
}
st3>>=1;
} while((++c&0xf)!=0);
@ -652,19 +652,19 @@ _MBCSGetUnicodeSetForUnicode(const UConverterSharedData *sharedData,
}
}
ucnv_extGetUnicodeSet(sharedData, set, which, pErrorCode);
ucnv_extGetUnicodeSet(sharedData, sa, which, pErrorCode);
}
static void
_MBCSGetUnicodeSet(const UConverter *cnv,
USet *set,
USetAdder *sa,
UConverterUnicodeSet which,
UErrorCode *pErrorCode) {
if(cnv->options&_MBCS_OPTION_GB18030) {
uset_addRange(set, 0, 0xd7ff);
uset_addRange(set, 0xe000, 0x10ffff);
sa->addRange(sa->set, 0, 0xd7ff);
sa->addRange(sa->set, 0xe000, 0x10ffff);
} else {
_MBCSGetUnicodeSetForUnicode(cnv->sharedData, set, which, pErrorCode);
_MBCSGetUnicodeSetForUnicode(cnv->sharedData, sa, which, pErrorCode);
}
}

View File

@ -373,7 +373,7 @@ _MBCSToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
*/
U_CFUNC void
_MBCSGetUnicodeSetForBytes(const UConverterSharedData *sharedData,
USet *set,
USetAdder *sa,
UConverterUnicodeSet which,
uint8_t state, int32_t lowByte, int32_t highByte,
UErrorCode *pErrorCode);
@ -388,7 +388,7 @@ _MBCSGetUnicodeSetForBytes(const UConverterSharedData *sharedData,
*/
U_CFUNC void
_MBCSGetUnicodeSetForUnicode(const UConverterSharedData *sharedData,
USet *set,
USetAdder *sa,
UConverterUnicodeSet which,
UErrorCode *pErrorCode);

View File

@ -1,7 +1,7 @@
/*
******************************************************************************
*
* Copyright (C) 1999-2003, International Business Machines
* Copyright (C) 1999-2004, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
@ -23,6 +23,7 @@
#include "unicode/uchar.h"
#include "unicode/udata.h"
#include "unicode/uset.h"
#include "uset_imp.h"
#include "ustr_imp.h"
#include "umutex.h"
#include "cmemory.h"
@ -1723,7 +1724,7 @@ uprv_getMaxISOCommentLength() {
* @param uset USet to receive characters. Existing contents are deleted.
*/
static void
charSetToUSet(uint32_t cset[8], USet* uset) {
charSetToUSet(uint32_t cset[8], USetAdder *sa) {
UChar us[256];
char cs[256];
@ -1731,7 +1732,6 @@ charSetToUSet(uint32_t cset[8], USet* uset) {
UErrorCode errorCode;
errorCode=U_ZERO_ERROR;
uset_clear(uset);
if(!calcNameSetsLengths(&errorCode)) {
return;
@ -1751,18 +1751,18 @@ charSetToUSet(uint32_t cset[8], USet* uset) {
/* add each UChar to the USet */
for(i=0; i<length; ++i) {
if(us[i]!=0 || cs[i]==0) { /* non-invariant chars become (UChar)0 */
uset_add(uset, us[i]);
sa->add(sa->set, us[i]);
}
}
}
/**
* Fills set with characters that are used in Unicode character names.
* @param set USet to receive characters. Existing contents are deleted.
* @param set USet to receive characters.
*/
U_CAPI void U_EXPORT2
uprv_getCharNameCharacters(USet* set) {
charSetToUSet(gNameSet, set);
uprv_getCharNameCharacters(USetAdder *sa) {
charSetToUSet(gNameSet, sa);
}
#if 0
@ -1772,11 +1772,11 @@ urename.h and uprops.h changed accordingly.
*/
/**
* Fills set with characters that are used in Unicode character names.
* @param set USet to receive characters. Existing contents are deleted.
* @param set USetAdder to receive characters.
*/
U_CAPI void U_EXPORT2
uprv_getISOCommentCharacters(USet* set) {
charSetToUSet(gISOCommentSet, set);
uprv_getISOCommentCharacters(USetAdder *sa) {
charSetToUSet(gISOCommentSet, sa);
}
#endif

View File

@ -22,6 +22,8 @@
#include "unicode/uchar.h"
#include "unicode/uscript.h"
#include "unicode/symtable.h"
#include "unicode/uset.h"
#include "uset_imp.h"
#include "ruleiter.h"
#include "cmemory.h"
#include "uhash.h"
@ -1466,14 +1468,38 @@ void UnicodeSet::applyPropertyPattern(RuleCharacterIterator& chars,
// Inclusions list
//----------------------------------------------------------------
// USetAdder implementation
// Does not use uset.h to reduce code dependencies
static void U_CALLCONV
_set_add(USet *set, UChar32 c) {
((UnicodeSet *)set)->add(c);
}
static void U_CALLCONV
_set_addRange(USet *set, UChar32 start, UChar32 end) {
((UnicodeSet *)set)->add(start, end);
}
static void U_CALLCONV
_set_addString(USet *set, const UChar *str, int32_t length) {
((UnicodeSet *)set)->add(UnicodeString((UBool)(length<0), str, length));
}
const UnicodeSet* UnicodeSet::getInclusions(UErrorCode &status) {
umtx_lock(NULL);
UBool f = (INCLUSIONS == NULL);
umtx_unlock(NULL);
if (f) {
UnicodeSet* incl = new UnicodeSet();
USetAdder sa = {
(USet *)incl,
_set_add,
_set_addRange,
_set_addString
};
if (incl != NULL) {
uprv_getInclusions((USet*)incl, &status);
uprv_getInclusions(&sa, &status);
if (U_SUCCESS(status)) {
umtx_lock(NULL);
if (INCLUSIONS == NULL) {

View File

@ -262,7 +262,8 @@ isAcceptable(void * /* context */,
static UBool U_CALLCONV
_enumPropertyStartsRange(const void *context, UChar32 start, UChar32 /*limit*/, uint32_t /*value*/) {
/* add the start code point to the USet */
uset_add((USet *)context, start);
USetAdder *sa=(USetAdder *)context;
sa->add(sa->set, start);
return TRUE;
}
@ -1153,7 +1154,7 @@ unorm_isNFSkippable(UChar32 c, UNormalizationMode mode) {
}
U_CAPI void U_EXPORT2
unorm_addPropertyStarts(USet *set, UErrorCode *pErrorCode) {
unorm_addPropertyStarts(USetAdder *sa, UErrorCode *pErrorCode) {
UChar c;
if(!_haveData(*pErrorCode)) {
@ -1161,18 +1162,18 @@ unorm_addPropertyStarts(USet *set, UErrorCode *pErrorCode) {
}
/* add the start code point of each same-value range of each trie */
utrie_enum(&normTrie, NULL, _enumPropertyStartsRange, set);
utrie_enum(&fcdTrie, NULL, _enumPropertyStartsRange, set);
utrie_enum(&normTrie, NULL, _enumPropertyStartsRange, sa);
utrie_enum(&fcdTrie, NULL, _enumPropertyStartsRange, sa);
if(formatVersion_2_1) {
utrie_enum(&auxTrie, NULL, _enumPropertyStartsRange, set);
utrie_enum(&auxTrie, NULL, _enumPropertyStartsRange, sa);
}
/* add Hangul LV syllables and LV+1 because of skippables */
for(c=HANGUL_BASE; c<HANGUL_BASE+HANGUL_COUNT; c+=JAMO_T_COUNT) {
uset_add(set, c);
uset_add(set, c+1);
sa->add(sa->set, c);
sa->add(sa->set, c+1);
}
uset_add(set, HANGUL_BASE+HANGUL_COUNT); /* add Hangul+1 to continue with other properties */
sa->add(sa->set, HANGUL_BASE+HANGUL_COUNT); /* add Hangul+1 to continue with other properties */
}
U_CAPI UNormalizationCheckResult U_EXPORT2

View File

@ -395,7 +395,7 @@ unorm_isNFSkippable(UChar32 c, UNormalizationMode mode);
* @internal
*/
U_CAPI void U_EXPORT2
unorm_addPropertyStarts(USet *set, UErrorCode *pErrorCode);
unorm_addPropertyStarts(USetAdder *sa, UErrorCode *pErrorCode);
/**
* Swap unorm.icu. See udataswp.h.

View File

@ -27,10 +27,6 @@
#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
#ifdef DEBUG
#include <stdio.h>
#endif
/**
* Get the next non-ignorable ASCII character from a property name
* and lowercases it.
@ -584,76 +580,14 @@ strrch(const char* source,uint32_t sourceLen,char find){
#endif
U_CAPI void U_EXPORT2
uprv_getInclusions(USet* set, UErrorCode *pErrorCode) {
uprv_getInclusions(USetAdder *sa, UErrorCode *pErrorCode) {
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
return;
}
uset_clear(set);
#if !UCONFIG_NO_NORMALIZATION
unorm_addPropertyStarts(set, pErrorCode);
#endif
uchar_addPropertyStarts(set, pErrorCode);
ucase_addPropertyStarts(uchar_getCaseProps(pErrorCode), set, pErrorCode);
#ifdef DEBUG
{
UChar* result=NULL;
int32_t resultCapacity=0;
int32_t bufLen = uset_toPattern(set,result,resultCapacity,TRUE,pErrorCode);
char* resultChars = NULL;
if(*pErrorCode == U_BUFFER_OVERFLOW_ERROR){
uint32_t len = 0, add=0;
char *buf=NULL, *current = NULL;
*pErrorCode = U_ZERO_ERROR;
resultCapacity = bufLen;
result = (UChar*) uprv_malloc(resultCapacity * U_SIZEOF_UCHAR);
bufLen = uset_toPattern(set,result,resultCapacity,TRUE,pErrorCode);
resultChars = (char*) uprv_malloc(len+1);
u_UCharsToChars(result,resultChars,bufLen);
resultChars[bufLen] = 0;
buf = resultChars;
/*printf(resultChars);*/
while(len < bufLen){
add = 70-5/* for ", +\n */;
current = buf +len;
if (add < (bufLen-len)) {
uint32_t index = strrch(current,add,'\\');
if (index > add) {
index = add;
} else {
int32_t num =index-1;
uint32_t seqLen;
while(num>0){
if(current[num]=='\\'){
num--;
}else{
break;
}
}
if ((index-num)%2==0) {
index--;
}
seqLen = (current[index+1]=='u') ? 6 : 2;
if ((add-index) < seqLen) {
add = index + seqLen;
}
}
}
fwrite("\"",1,1,stdout);
if(len+add<bufLen){
fwrite(current,1,add,stdout);
fwrite("\" +\n",1,4,stdout);
}else{
fwrite(current,1,bufLen-len,stdout);
}
len+=add;
}
}
uprv_free(result);
uprv_free(resultChars);
}
unorm_addPropertyStarts(sa, pErrorCode);
#endif
uchar_addPropertyStarts(sa, pErrorCode);
ucase_addPropertyStarts(uchar_getCaseProps(pErrorCode), sa, pErrorCode);
}

View File

@ -22,6 +22,7 @@
#include "unicode/utypes.h"
#include "unicode/uset.h"
#include "uset_imp.h"
#include "ucase.h"
#include "udataswp.h"
@ -348,10 +349,10 @@ uprv_getMaxISOCommentLength();
* Fills set with characters that are used in Unicode character names.
* Includes all characters that are used in regular/Unicode 1.0/extended names.
* Just empties the set if no character names are available.
* @param set USet to receive characters. Existing contents are deleted.
* @param sa USetAdder to receive characters.
*/
U_CAPI void U_EXPORT2
uprv_getCharNameCharacters(USet* set);
uprv_getCharNameCharacters(USetAdder *sa);
#if 0
/*
@ -361,10 +362,10 @@ urename.h and unames.c changed accordingly.
/**
* Fills set with characters that are used in Unicode character names.
* Just empties the set if no ISO comments are available.
* @param set USet to receive characters. Existing contents are deleted.
* @param sa USetAdder to receive characters.
*/
U_CAPI void U_EXPORT2
uprv_getISOCommentCharacters(USet* set);
uprv_getISOCommentCharacters(USetAdder *sa);
*/
#endif
@ -374,18 +375,18 @@ uprv_getISOCommentCharacters(USet* set);
* @internal
*/
U_CAPI void U_EXPORT2
uchar_addPropertyStarts(USet *set, UErrorCode *pErrorCode);
uchar_addPropertyStarts(USetAdder *sa, UErrorCode *pErrorCode);
/**
* Return a set of characters for property enumeration.
* For each two consecutive characters (start, limit) in the set,
* all of the properties for start..limit-1 are all the same.
*
* @param set USet to receive result. Existing contents are lost.
* @param sa USetAdder to receive result. Existing contents are lost.
* @internal
*/
U_CAPI void U_EXPORT2
uprv_getInclusions(USet* set, UErrorCode *pErrorCode);
uprv_getInclusions(USetAdder *sa, UErrorCode *pErrorCode);
/**
* Swap the ICU Unicode properties file. See uchar.c.

View File

@ -0,0 +1,51 @@
/*
*******************************************************************************
*
* Copyright (C) 2004, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: uset_imp.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2004sep07
* created by: Markus W. Scherer
*
* Internal USet definitions.
*/
#ifndef __USET_IMP_H__
#define __USET_IMP_H__
#include "unicode/utypes.h"
#include "unicode/uset.h"
U_CDECL_BEGIN
typedef void U_CALLCONV
USetAdd(USet *set, UChar32 c);
typedef void U_CALLCONV
USetAddRange(USet *set, UChar32 start, UChar32 end);
typedef void U_CALLCONV
USetAddString(USet *set, const UChar *str, int32_t length);
/**
* Interface for adding items to a USet, to keep low-level code from
* statically depending on the USet implementation.
* Calls will look like sa->add(sa->set, c);
*/
struct USetAdder {
USet *set;
USetAdd *add;
USetAddRange *addRange;
USetAddString *addString;
};
typedef struct USetAdder USetAdder;
U_CDECL_END
#endif