ICU-7273 remove now-unused unorm.icu, and small changes parallel with Java

X-SVN-Rev: 27562
This commit is contained in:
Markus Scherer 2010-02-13 23:15:05 +00:00
parent b15f884b16
commit 7a3a89e61f
11 changed files with 38 additions and 63 deletions

1
.gitattributes vendored
View File

@ -52,7 +52,6 @@ icu4c/icu4c.css -text
icu4c/source/data/in/nfc.nrm -text
icu4c/source/data/in/nfkc.nrm -text
icu4c/source/data/in/nfkc_cf.nrm -text
icu4c/source/data/in/unorm.icu -text
icu4c/source/data/locales/pool.res -text
icu4c/source/i18n/regextxt.cpp -text
icu4c/source/i18n/regextxt.h -text

View File

@ -824,7 +824,7 @@ void Normalizer2Impl::recompose(ReorderingBuffer &buffer, int32_t recomposeStart
// If c did not combine, then check if it is a starter.
if(cc==0) {
// Found a new starter.
if((compositionsList=getCompositionsListForDecompYesAndZeroCC(norm16))!=NULL) {
if((compositionsList=getCompositionsListForDecompYes(norm16))!=NULL) {
// It may combine with something, prepare for it.
if(U_IS_BMP(c)) {
starterIsSupplementary=FALSE;
@ -1344,7 +1344,7 @@ public:
U_CDECL_BEGIN
// Set the FCD value for a range of same-norm16 charcters.
// Set the FCD value for a range of same-norm16 characters.
static UBool U_CALLCONV
enumRangeHandler(const void *context, UChar32 start, UChar32 end, uint32_t value) {
return ((FCDTrieSingleton *)context)->rangeHandler(start, end, value);

View File

@ -45,6 +45,8 @@ public:
JAMO_V_COUNT=21,
JAMO_T_COUNT=28,
JAMO_VT_COUNT=JAMO_V_COUNT*JAMO_T_COUNT,
HANGUL_COUNT=JAMO_L_COUNT*JAMO_V_COUNT*JAMO_T_COUNT,
HANGUL_LIMIT=HANGUL_BASE+HANGUL_COUNT
};
@ -408,7 +410,7 @@ private:
// Requires minYesNo<norm16<limitNoNo.
const uint16_t *getMapping(uint16_t norm16) const { return extraData+norm16; }
const uint16_t *getCompositionsListForDecompYesAndZeroCC(uint16_t norm16) const {
const uint16_t *getCompositionsListForDecompYes(uint16_t norm16) const {
if(norm16==0 || MIN_NORMAL_MAYBE_YES<=norm16) {
return NULL;
} else if(norm16<minMaybeYes) {

View File

@ -287,14 +287,6 @@ unorm_isCanonSafeStart(UChar32 c) {
}
}
U_CAPI void U_EXPORT2
unorm_getUnicodeVersion(UVersionInfo *versionInfo, UErrorCode *pErrorCode){
if(unorm_haveData(pErrorCode)){
uprv_memcpy(*versionInfo, dataVersion, 4);
}
}
U_CAPI UBool U_EXPORT2
unorm_getCanonStartSet(UChar32 c, USerializedSet *fillSet) {
#if !UNORM_HARDCODE_DATA

View File

@ -186,13 +186,6 @@ enum {
U_CAPI UBool U_EXPORT2
unorm_haveData(UErrorCode *pErrorCode);
/**
* internal API, used by StringPrep
* @internal
*/
U_CAPI void U_EXPORT2
unorm_getUnicodeVersion(UVersionInfo *versionInfo, UErrorCode *pErrorCode);
/**
* Internal API, used by enumeration of canonically equivalent strings
* @internal

View File

@ -183,28 +183,31 @@ u_hasBinaryProperty(UChar32 c, UProperty which) {
break;
}
#endif
} else if(column==UPROPS_SRC_NFC || column==UPROPS_SRC_NFKC) {
} else if(column==UPROPS_SRC_NFC) {
#if !UCONFIG_NO_NORMALIZATION
UErrorCode errorCode=U_ZERO_ERROR;
switch(which) {
case UCHAR_FULL_COMPOSITION_EXCLUSION: {
// By definition, Full_Composition_Exclusion is the same as NFC_QC=No.
const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode);
if(U_SUCCESS(errorCode)) {
return impl->isCompNo(impl->getNorm16(c));
}
return U_SUCCESS(errorCode) && impl->isCompNo(impl->getNorm16(c));
break;
}
default: {
// UCHAR_NF..._INERT properties
// UCHAR_NF[CD]_INERT properties
const Normalizer2 *norm2=Normalizer2Factory::getInstance(
(UNormalizationMode)(which-UCHAR_NFD_INERT+UNORM_NFD), errorCode);
if(U_SUCCESS(errorCode)) {
return norm2->isInert(c);
}
break;
return U_SUCCESS(errorCode) && norm2->isInert(c);
}
}
#endif
} else if(column==UPROPS_SRC_NFKC) {
#if !UCONFIG_NO_NORMALIZATION
// UCHAR_NFK[CD]_INERT properties
UErrorCode errorCode=U_ZERO_ERROR;
const Normalizer2 *norm2=Normalizer2Factory::getInstance(
(UNormalizationMode)(which-UCHAR_NFD_INERT+UNORM_NFD), errorCode);
return U_SUCCESS(errorCode) && norm2->isInert(c);
#endif
} else if(column==UPROPS_SRC_NFKC_CF) {
// currently only for UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED

View File

@ -1,7 +1,7 @@
/*
*******************************************************************************
*
* Copyright (C) 2002-2009, International Business Machines
* Copyright (C) 2002-2010, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@ -494,13 +494,12 @@ uset_getSerializedRange(const USerializedSet* set, int32_t rangeIndex,
if(rangeIndex<bmpLength) {
*pStart=array[rangeIndex++];
if(rangeIndex<bmpLength) {
*pEnd=array[rangeIndex];
*pEnd=array[rangeIndex]-1;
} else if(rangeIndex<length) {
*pEnd=(((int32_t)array[rangeIndex])<<16)|array[rangeIndex+1];
*pEnd=((((int32_t)array[rangeIndex])<<16)|array[rangeIndex+1])-1;
} else {
*pEnd=0x110000;
*pEnd=0x10ffff;
}
--*pEnd;
return TRUE;
} else {
rangeIndex-=bmpLength;
@ -511,11 +510,10 @@ uset_getSerializedRange(const USerializedSet* set, int32_t rangeIndex,
*pStart=(((int32_t)array[rangeIndex])<<16)|array[rangeIndex+1];
rangeIndex+=2;
if(rangeIndex<length) {
*pEnd=(((int32_t)array[rangeIndex])<<16)|array[rangeIndex+1];
*pEnd=((((int32_t)array[rangeIndex])<<16)|array[rangeIndex+1])-1;
} else {
*pEnd=0x110000;
*pEnd=0x10ffff;
}
--*pEnd;
return TRUE;
} else {
return FALSE;

View File

@ -277,7 +277,7 @@ loadData(UStringPrepProfile* profile,
/* initialize some variables */
profile->mappingData=(uint16_t *)((uint8_t *)(p+_SPREP_INDEX_TOP)+profile->indexes[_SPREP_INDEX_TRIE_SIZE]);
unorm_getUnicodeVersion(&normUnicodeVersion, errorCode);
u_getUnicodeVersion(normUnicodeVersion);
normUniVer = (normUnicodeVersion[0] << 24) + (normUnicodeVersion[1] << 16) +
(normUnicodeVersion[2] << 8 ) + (normUnicodeVersion[3]);
sprepUniVer = (dataVersion[0] << 24) + (dataVersion[1] << 16) +

View File

@ -411,7 +411,7 @@ ALL_INDEX_SRC_FILES = $(PKGDATA_LIST) $(INDEX_FILE) $(CURR_INDEX_FILE) $(LANG_IN
# a list to use in the .lst files (package-relative)
ALL_FILES_LIST = $(DAT_FILES_SHORT) $(CNV_FILES_SHORT) $(BRK_FILES_SHORT) $(CTD_FILES_SHORT) $(RES_FILES_SHORT) $(INDEX_RES_FILE_SHORT) $(CURR_FILES_SHORT) $(CURR_INDEX_RES_SHORT) $(LANG_FILES_SHORT) $(LANG_INDEX_RES_SHORT) $(REGION_FILES_SHORT) $(REGION_INDEX_RES_SHORT) $(ZONE_FILES_SHORT) $(ZONE_INDEX_RES_SHORT) $(COLLATION_FILES_SHORT) $(COLLATION_INDEX_RES_SHORT) $(BRK_RES_FILES_SHORT) $(BRK_RES_INDEX_RES_SHORT) $(RBNF_FILES_SHORT) $(RBNF_INDEX_RES_SHORT) $(TRANSLIT_FILES_SHORT) $(SPREP_FILES_SHORT) $(CFU_FILES_SHORT)
UNI_CORE_DATA=uprops.icu ucase.icu ubidi.icu unorm.icu
UNI_CORE_DATA=uprops.icu ucase.icu ubidi.icu
UNI_CORE_TARGET_DATA=$(UNI_CORE_DATA:%=$(BUILDDIR)/%)
ifneq ($(INCLUDE_UNI_CORE_DATA),)
@ -487,16 +487,12 @@ $(BUILDDIR)/ucase.icu: $(UNICODEDATADIR)/UnicodeData.txt $(TOOLBINDIR)/gencase$(
$(BUILDDIR)/pnames.icu: $(UNICODEDATADIR)/PropertyAliases.txt $(UNICODEDATADIR)/PropertyValueAliases.txt $(UNICODEDATADIR)/Blocks.txt $(COMINCDIR)/uscript.h $(COMINCDIR)/uchar.h $(TOOLBINDIR)/genpname$(TOOLEXEEXT)
$(INVOKE) $(TOOLBINDIR)/genpname -d $(BUILDDIR)
# unorm.icu
# ICU 4.4: $(BUILDDIR)/unorm.icu is now prebuilt, see below.
# normalization
$(OUTTMPDIR)/unorm_props_data.c: $(UNICODEDATADIR)/UnicodeData.txt $(UNICODEDATADIR)/DerivedNormalizationProps.txt $(UNICODEDATADIR)/BidiMirroring.txt $(TOOLBINDIR)/gennorm$(TOOLEXEEXT) $(BUILDDIR)/$(ICUDT)pnames.icu $(BUILDDIR)/$(ICUDT)uprops.icu $(BUILDDIR)/$(ICUDT)ucase.icu
$(INVOKE) $(TOOLBINDIR)/gennorm --csource -s $(UNICODEDATADIR) -i $(BUILDDIR) -d $(OUTTMPDIR) -u $(UNICODE_VERSION)
# unorm.icu used to be built like this:
# $(INVOKE) $(TOOLBINDIR)/gennorm -s $(UNICODEDATADIR) -i $(BUILDDIR) -d $(BUILDDIR) -u $(UNICODE_VERSION)
# ucadata.icu
# used to depend on $(BUILDDIR)/$(ICUDT)unorm.icu $(BUILDDIR)/$(ICUDT)ucase.icu
# used to depend on $(BUILDDIR)/$(ICUDT)ucase.icu
# see Jitterbug 4497
$(COLBLDDIR)/ucadata.icu $(COLBLDDIR)/invuca.icu: $(UNICODEDATADIR)/FractionalUCA.txt $(TOOLBINDIR)/genuca$(TOOLEXEEXT) $(BUILDDIR)/$(ICUDT)nfc.nrm
$(INVOKE) $(TOOLBINDIR)/genuca -s $(UNICODEDATADIR) -d $(COLBLDDIR) -i $(BUILDDIR)
@ -510,9 +506,6 @@ $(BUILDDIR)/cnvalias.icu: $(UCMSRCDIR)/convrtrs.txt $(TOOLBINDIR)/gencnval$(TOOL
$(INVOKE) $(TOOLBINDIR)/gencnval -d $(BUILDDIR) $(UCMSRCDIR)/convrtrs.txt
# Targets for prebuilt Unicode data
$(BUILDDIR)/unorm.icu: $(SRCDATADIR)/in/unorm.icu
$(INVOKE) $(TOOLBINDIR)/icupkg -t$(ICUDATA_CHAR) $< $@
$(BUILDDIR)/%.nrm: $(SRCDATADIR)/in/%.nrm
$(INVOKE) $(TOOLBINDIR)/icupkg -t$(ICUDATA_CHAR) $< $@
@ -756,7 +749,7 @@ clean-resindex:
$(INDEX_RES_FILE): $(INDEX_FILE) $(TOOLBINDIR)/genrb$(TOOLEXEEXT)
$(INVOKE) $(TOOLBINDIR)/genrb $(GENRBOPTS) -i $(BUILDDIR) -d $(BUILDDIR) $(INDEX_FILE)
# Starting with ICU4C 3.4, the core Unicode properties files (uprops.icu, ucase.icu, ubidi.icu, unorm.icu)
# The core Unicode properties files (uprops.icu, ucase.icu, ubidi.icu)
# are hardcoded in the common DLL and therefore not included in the data package any more.
# They are not built by default but need to be built for ICU4J data and for getting the .c source files
# when updating the Unicode data.
@ -782,7 +775,7 @@ JAR=jar
# - package them into the .jar file
$(OUTDIR)/icu4j/icudata.jar: build-dir packagedata $(OUTTMPDIR)/$(ICUDATA_PLATFORM_NAME).dat uni-core-data
mkdir -p $(OUTDIR)/icu4j/com/ibm/icu/impl/data/$(ICUDATA_BASENAME_VERSION)b
echo ubidi.icu ucase.icu uprops.icu unorm.icu > $(OUTDIR)/icu4j/add.txt
echo ubidi.icu ucase.icu uprops.icu > $(OUTDIR)/icu4j/add.txt
$(INVOKE) $(TOOLBINDIR)/icupkg $(OUTTMPDIR)/$(ICUDATA_PLATFORM_NAME).dat $(OUTDIR)/icu4j/$(ICUDATA_BASENAME_VERSION)b.dat -a $(OUTDIR)/icu4j/add.txt -s $(BUILDDIR) -x '*' -tb -d $(OUTDIR)/icu4j/com/ibm/icu/impl/data/$(ICUDATA_BASENAME_VERSION)b
$(JAR) cf $(OUTDIR)/icu4j/icudata.jar -C $(OUTDIR)/icu4j com/ibm/icu/impl/data/$(ICUDATA_BASENAME_VERSION)b
@ -825,7 +818,7 @@ pkgdataMakefile:
###########
########### 390 (z/OS) support
UCMFILES390=ebcdic-xml-us.ucm ibm-37_P100-1995.ucm ibm-1047_P100-1995.ucm ibm-4909_P100-1999.ucm
# used to depend on uprops.icu ucase.icu ubidi.icu unorm.icu
# used to depend on uprops.icu ucase.icu ubidi.icu
# see Jitterbug 4497
ALLFILES390=pnames.icu cnvalias.icu $(UCMFILES390:.ucm=.cnv)

Binary file not shown.

View File

@ -468,14 +468,14 @@ COMMON_ICUDATA_ARGUMENTS=-f -e $(U_ICUDATA_NAME) -v $(ICU_PACKAGE_MODE) -c -p $(
ALL : GODATA "$(ICU_LIB_TARGET)" "$(TESTDATAOUT)\testdata.dat"
@echo All targets are up to date
# Starting with ICU4C 3.4, the core Unicode properties files (uprops.icu, ucase.icu, ubidi.icu, unorm.icu)
# The core Unicode properties files (uprops.icu, ucase.icu, ubidi.icu)
# are hardcoded in the common DLL and therefore not included in the data package any more.
# They are not built by default but need to be built for ICU4J data and for getting the .c source files
# when updating the Unicode data.
# Changed in makedata.mak revision 1.117. See Jitterbug 4497.
# Command line:
# C:\svn\icuproj\icu\trunk\source\data>nmake -f makedata.mak ICUMAKE=C:\svn\icuproj\icu\trunk\source\data\ CFG=x86\Debug uni-core-data
uni-core-data: GODATA "$(ICUBLD_PKG)\uprops.icu" "$(ICUBLD_PKG)\ucase.icu" "$(ICUBLD_PKG)\ubidi.icu" "$(ICUBLD_PKG)\unorm.icu" "$(ICUTMP)\unorm_props_data.c"
uni-core-data: GODATA "$(ICUBLD_PKG)\uprops.icu" "$(ICUBLD_PKG)\ucase.icu" "$(ICUBLD_PKG)\ubidi.icu" "$(ICUTMP)\unorm_props_data.c"
@echo Unicode .icu files built to "$(ICUBLD_PKG)"
@echo Unicode .c source files built to "$(ICUTMP)"
@ -489,7 +489,7 @@ uni-core-data: GODATA "$(ICUBLD_PKG)\uprops.icu" "$(ICUBLD_PKG)\ucase.icu" "$(IC
# - package them into the .jar file
"$(ICUOUT)\icu4j\icudata.jar": GODATA "$(ICUOUT)\$(ICUPKG).dat" uni-core-data
if not exist "$(ICUOUT)\icu4j\com\ibm\icu\impl\data\$(U_ICUDATA_NAME)b" mkdir "$(ICUOUT)\icu4j\com\ibm\icu\impl\data\$(U_ICUDATA_NAME)b"
echo ubidi.icu ucase.icu uprops.icu unorm.icu > "$(ICUOUT)\icu4j\add.txt"
echo ubidi.icu ucase.icu uprops.icu > "$(ICUOUT)\icu4j\add.txt"
"$(ICUPBIN)\icupkg" "$(ICUOUT)\$(ICUPKG).dat" "$(ICUOUT)\icu4j\$(U_ICUDATA_NAME)b.dat" -a "$(ICUOUT)\icu4j\add.txt" -s "$(ICUBLD_PKG)" -x * -tb -d "$(ICUOUT)\icu4j\com\ibm\icu\impl\data\$(U_ICUDATA_NAME)b"
"$(JAR)" cf "$(ICUOUT)\icu4j\icudata.jar" -C "$(ICUOUT)\icu4j" com\ibm\icu\impl\data\$(U_ICUDATA_NAME)b
@ -902,11 +902,9 @@ res_index:table(nofallback) {
@"$(ICUTOOLS)\gencase\$(CFG)\gencase" -u $(UNICODE_VERSION) -i "$(ICUBLD_PKG)" -s "$(ICUUNIDATA)" -d "$(ICUBLD_PKG)"
@"$(ICUTOOLS)\gencase\$(CFG)\gencase" --csource -u $(UNICODE_VERSION) -i "$(ICUBLD_PKG)" -s "$(ICUUNIDATA)" -d "$(ICUTMP)"
# Targets for unorm.icu
# ICU 4.4: "$(ICUBLD_PKG)\unorm.icu" is now prebuilt, see below.
# Targets for normalization
"$(ICUTMP)\unorm_props_data.c": "$(ICUUNIDATA)\*.txt" "$(ICUTOOLS)\gennorm\$(CFG)\gennorm.exe" "$(ICUBLD_PKG)\pnames.icu" "$(ICUBLD_PKG)\uprops.icu" "$(ICUBLD_PKG)\ucase.icu"
@echo Creating data file for Unicode Normalization
@rem @"$(ICUTOOLS)\gennorm\$(CFG)\gennorm" -u $(UNICODE_VERSION) -i "$(ICUBLD_PKG)" -s "$(ICUUNIDATA)" -d "$(ICUBLD_PKG)"
@"$(ICUTOOLS)\gennorm\$(CFG)\gennorm" --csource -u $(UNICODE_VERSION) -i "$(ICUBLD_PKG)" -s "$(ICUUNIDATA)" -d "$(ICUTMP)"
# Targets for converters
@ -915,16 +913,13 @@ res_index:table(nofallback) {
@"$(ICUTOOLS)\gencnval\$(CFG)\gencnval" -d "$(ICUBLD_PKG)" "$(ICUSRCDATA)\$(ICUUCM)\convrtrs.txt"
# Targets for ucadata.icu & invuca.icu
# used to depend on "$(ICUBLD_PKG)\uprops.icu" "$(ICUBLD_PKG)\ucase.icu" "$(ICUBLD_PKG)\unorm.icu"
# used to depend on "$(ICUBLD_PKG)\uprops.icu" "$(ICUBLD_PKG)\ucase.icu" "$(ICUBLD_PKG)\nfc.nrm"
# see Jitterbug 4497
"$(ICUBLD_PKG)\$(ICUCOL)\invuca.icu" "$(ICUBLD_PKG)\$(ICUCOL)\ucadata.icu": "$(ICUUNIDATA)\FractionalUCA.txt" "$(ICUTOOLS)\genuca\$(CFG)\genuca.exe" "$(ICUBLD_PKG)\nfc.nrm"
@echo Creating UCA data files
@"$(ICUTOOLS)\genuca\$(CFG)\genuca" -d "$(ICUBLD_PKG)\$(ICUCOL)" -i "$(ICUBLD_PKG)" -s "$(ICUUNIDATA)"
# Targets for prebuilt Unicode data
"$(ICUBLD_PKG)\unorm.icu": $(ICUSRCDATA_RELATIVE_PATH)\in\unorm.icu
"$(ICUPBIN)\icupkg" -tl $? $@
"$(ICUBLD_PKG)\nfc.nrm": $(ICUSRCDATA_RELATIVE_PATH)\in\nfc.nrm
"$(ICUPBIN)\icupkg" -tl $? $@
@ -955,12 +950,12 @@ res_index:table(nofallback) {
!IFNDEF ICUDATA_SOURCE_ARCHIVE
$(UCM_SOURCE) : {"$(ICUTOOLS)\makeconv\$(CFG)"}makeconv.exe
# This used to depend on "$(ICUBLD_PKG)\uprops.icu" "$(ICUBLD_PKG)\ucase.icu" "$(ICUBLD_PKG)\ubidi.icu" "$(ICUBLD_PKG)\unorm.icu"
# This used to depend on "$(ICUBLD_PKG)\uprops.icu" "$(ICUBLD_PKG)\ucase.icu" "$(ICUBLD_PKG)\ubidi.icu"
# This data is now hard coded as a part of the library.
# See Jitterbug 4497 for details.
$(MISC_SOURCE) $(RB_FILES) $(CURR_FILES) $(LANG_FILES) $(REGION_FILES) $(ZONE_FILES) $(COL_COL_FILES) $(RBNF_RES_FILES) $(BRK_RES_FILES) $(TRANSLIT_RES_FILES): {"$(ICUTOOLS)\genrb\$(CFG)"}genrb.exe "$(ICUBLD_PKG)\$(ICUCOL)\ucadata.icu"
$(MISC_SOURCE) $(RB_FILES) $(CURR_FILES) $(LANG_FILES) $(REGION_FILES) $(ZONE_FILES) $(COL_COL_FILES) $(RBNF_RES_FILES) $(BRK_RES_FILES) $(TRANSLIT_RES_FILES): {"$(ICUTOOLS)\genrb\$(CFG)"}genrb.exe "$(ICUBLD_PKG)\nfc.nrm" "$(ICUBLD_PKG)\$(ICUCOL)\ucadata.icu"
# This used to depend on "$(ICUBLD_PKG)\uprops.icu" "$(ICUBLD_PKG)\ucase.icu" "$(ICUBLD_PKG)\ubidi.icu" "$(ICUBLD_PKG)\unorm.icu"
# This used to depend on "$(ICUBLD_PKG)\uprops.icu" "$(ICUBLD_PKG)\ucase.icu" "$(ICUBLD_PKG)\ubidi.icu"
# This data is now hard coded as a part of the library.
# See Jitterbug 4497 for details.
$(BRK_SOURCE) : "$(ICUBLD_PKG)\unames.icu" "$(ICUBLD_PKG)\pnames.icu" "$(ICUBLD_PKG)\nfc.nrm"