ICU-4790 spoof impl merged to trunk.

X-SVN-Rev: 25534
2009-03-09 23:40:15 +00:00 · 2009-03-09 23:40:15 +00:00 · 9715eae02c
commit 9715eae02c
parent a5894c4401
33 changed files with 36784 additions and 293 deletions
--- a/icu4c/source/allinone/allinone.sln
+++ b/icu4c/source/allinone/allinone.sln
@ -239,6 +239,13 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "letest", "..\test\letest\le
 		{37FC2C7F-1904-4811-8955-2F478830EAD1} = {37FC2C7F-1904-4811-8955-2F478830EAD1}
 	EndProjectSection
 EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "gencfu", "..\tools\gencfu\gencfu.vcproj", "{691EE0C0-DC57-4A48-8AEE-8ED75EB3A057}"
+	ProjectSection(ProjectDependencies) = postProject
+		{0178B127-6269-407D-B112-93877BB62776} = {0178B127-6269-407D-B112-93877BB62776}
+		{6B231032-3CB5-4EED-9210-810D666A23A0} = {6B231032-3CB5-4EED-9210-810D666A23A0}
+		{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
+	EndProjectSection
+EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
 		Debug|Win32 = Debug|Win32
@ -511,6 +518,12 @@ Global
 		{67351485-4D18-4245-BE39-A7EF0675ACD2}.Release|Win32.Build.0 = Release|Win32
 		{67351485-4D18-4245-BE39-A7EF0675ACD2}.Release|x64.ActiveCfg = Release|x64
 		{67351485-4D18-4245-BE39-A7EF0675ACD2}.Release|x64.Build.0 = Release|x64
+		{691EE0C0-DC57-4A48-8AEE-8ED75EB3A057}.Debug|Win32.ActiveCfg = Debug|Win32
+		{691EE0C0-DC57-4A48-8AEE-8ED75EB3A057}.Debug|Win32.Build.0 = Debug|Win32
+		{691EE0C0-DC57-4A48-8AEE-8ED75EB3A057}.Debug|x64.ActiveCfg = Debug|Win32
+		{691EE0C0-DC57-4A48-8AEE-8ED75EB3A057}.Release|Win32.ActiveCfg = Release|Win32
+		{691EE0C0-DC57-4A48-8AEE-8ED75EB3A057}.Release|Win32.Build.0 = Release|Win32
+		{691EE0C0-DC57-4A48-8AEE-8ED75EB3A057}.Release|x64.ActiveCfg = Release|Win32
 	EndGlobalSection
 	GlobalSection(SolutionProperties) = preSolution
 		HideSolutionNode = FALSE
--- a/icu4c/source/common/uhash.c
+++ b/icu4c/source/common/uhash.c
@ -1,6 +1,6 @@
 /*
 ******************************************************************************
-*   Copyright (C) 1997-2008, International Business Machines
+*   Copyright (C) 1997-2009, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 ******************************************************************************
 *   Date        Name        Description
@ -569,7 +569,9 @@ uhash_init(UHashtable *fillinResult,

 U_CAPI void U_EXPORT2
 uhash_close(UHashtable *hash) {
-    U_ASSERT(hash != NULL);
+    if (hash == NULL) {
+        return;
+    }
    if (hash->elements != NULL) {
        if (hash->keyDeleter != NULL || hash->valueDeleter != NULL) {
            int32_t pos=-1;
--- a/icu4c/source/common/uhash.h
+++ b/icu4c/source/common/uhash.h
@ -1,6 +1,6 @@
 /*
 ******************************************************************************
-*   Copyright (C) 1997-2007, International Business Machines
+*   Copyright (C) 1997-2009, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 ******************************************************************************
 *   Date        Name        Description
@ -246,7 +246,7 @@ uhash_init(UHashtable *hash,

 /**
 * Close a UHashtable, releasing the memory used.
- * @param hash The UHashtable to close.
+ * @param hash The UHashtable to close. If hash is NULL no operation is performed.
 */
 U_CAPI void U_EXPORT2 
 uhash_close(UHashtable *hash);
--- a/icu4c/source/common/uvector.cpp
+++ b/icu4c/source/common/uvector.cpp
@ -1,6 +1,6 @@
 /*
 ******************************************************************************
-* Copyright (C) 1999-2004, International Business Machines Corporation and   *
+* Copyright (C) 1999-2009, International Business Machines Corporation and   *
 * others. All Rights Reserved.                                               *
 ******************************************************************************
 *   Date        Name        Description
@ -10,6 +10,7 @@

 #include "uvector.h"
 #include "cmemory.h"
+#include "uarrsort.h"

 U_NAMESPACE_BEGIN

@ -466,5 +467,74 @@ void UVector::sortedInsert(UHashTok tok, USortComparator *compare, UErrorCode& e
    }
 }

+/**
+  *  Array sort comparator function.
+  *  Used from UVector::sort()
+  *  Conforms to function signature required for uprv_sortArray().
+  *  This function is essentially just a wrapper, to make a
+  *  UVector style comparator function usable with uprv_sortArray().
+  *
+  *  The context pointer to this function is a pointer back
+  *  (with some extra indirection) to the user supplied comparator.
+  *  
+  */
+static int32_t U_CALLCONV
+sortComparator(const void *context, const void *left, const void *right) {
+    USortComparator *compare = *static_cast<USortComparator * const *>(context);
+    UHashTok tok1 = *static_cast<const UHashTok *>(left);
+    UHashTok tok2 = *static_cast<const UHashTok *>(right);
+    int32_t result = (*compare)(tok1, tok2);
+    return result;
+}
+
+
+/**
+  *  Array sort comparison function for use from UVector::sorti()
+  *  Compares int32_t vector elements.
+  */
+static int32_t U_CALLCONV
+sortiComparator(const void * /*context */, const void *left, const void *right) {
+    const UHashTok *tok1 = static_cast<const UHashTok *>(left);
+    const UHashTok *tok2 = static_cast<const UHashTok *>(right);
+    int32_t result = tok1->integer < tok2->integer? -1 :
+                     tok1->integer == tok2->integer? 0 : 1;
+    return result;
+}
+
+/**
+  * Sort the vector, assuming it constains ints.
+  *     (A more general sort would take a comparison function, but it's
+  *     not clear whether UVector's USortComparator or
+  *     UComparator from uprv_sortAray would be more appropriate.)
+  */
+void UVector::sorti(UErrorCode &ec) {
+    if (U_SUCCESS(ec)) {
+        uprv_sortArray(elements, count, sizeof(UHashTok),
+                       sortiComparator, NULL,  FALSE, &ec);
+    }
+}
+
+
+/**
+ *  Sort with a user supplied comparator.
+ *
+ *    The comparator function handling is confusing because the function type
+ *    for UVector  (as defined for sortedInsert()) is different from the signature
+ *    required by uprv_sortArray().  This is handled by passing the
+ *    the UVector sort function pointer via the context pointer to a
+ *    sortArray() comparator function, which can then call back to
+ *    the original user functtion.
+ *
+ *    An additional twist is that it's not safe to pass a pointer-to-function
+ *    as  a (void *) data pointer, so instead we pass a (data) pointer to a
+ *    pointer-to-function variable.
+ */
+void UVector::sort(USortComparator *compare, UErrorCode &ec) {
+    if (U_SUCCESS(ec)) {
+        uprv_sortArray(elements, count, sizeof(UHashTok),
+                       sortComparator, &compare, FALSE, &ec);
+    }
+}
+
 U_NAMESPACE_END

--- a/icu4c/source/common/uvector.h
+++ b/icu4c/source/common/uvector.h
@ -1,6 +1,6 @@
 /*
 **********************************************************************
-*   Copyright (C) 1999-2006, International Business Machines
+*   Copyright (C) 1999-2009, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 **********************************************************************
 *   Date        Name        Description
@ -245,6 +245,20 @@ public:
     */
    void sortedInsert(int32_t obj, USortComparator *compare, UErrorCode& ec);

+    /**
+     * Sort the contents of the vector, assuming that the contents of the
+     * vector are of type int32_t.
+     */
+    void sorti(UErrorCode &ec);
+
+    /**
+      * Sort the contents of this vector, using a caller-supplied function
+      * to do the comparisons.  (It's confusing that
+      *  UVector's USortComparator function is different from the
+      *  UComparator function type defined in uarrsort.h)
+      */
+    void sort(USortComparator *compare, UErrorCode &ec);
+
    /**
     * ICU "poor man's RTTI", returns a UClassID for this class.
     */
--- a/icu4c/source/configure
+++ b/icu4c/source/configure
@ -10170,7 +10170,7 @@ then
 fi

 # output the Makefiles
-ac_config_files="$ac_config_files icudefs.mk Makefile data/pkgdataMakefile config/Makefile.inc config/pkgdataMakefile data/Makefile stubdata/Makefile common/Makefile i18n/Makefile layout/Makefile layoutex/Makefile io/Makefile extra/Makefile extra/uconv/Makefile extra/uconv/pkgdataMakefile extra/scrptrun/Makefile tools/Makefile tools/ctestfw/Makefile tools/toolutil/Makefile tools/makeconv/Makefile tools/genrb/Makefile tools/genuca/Makefile tools/genccode/Makefile tools/gencmn/Makefile tools/gencnval/Makefile tools/genctd/Makefile tools/gennames/Makefile tools/gentest/Makefile tools/gennorm/Makefile tools/genprops/Makefile tools/gencase/Makefile tools/genbidi/Makefile tools/genpname/Makefile tools/genbrk/Makefile tools/gensprep/Makefile tools/icupkg/Makefile tools/icuswap/Makefile tools/pkgdata/Makefile tools/tzcode/Makefile test/Makefile test/compat/Makefile test/testdata/Makefile test/testdata/pkgdataMakefile test/hdrtst/Makefile test/intltest/Makefile test/cintltst/Makefile test/iotest/Makefile test/letest/Makefile test/perf/Makefile test/perf/collationperf/Makefile test/perf/ubrkperf/Makefile test/perf/charperf/Makefile test/perf/convperf/Makefile test/perf/normperf/Makefile test/perf/strsrchperf/Makefile test/perf/unisetperf/Makefile test/perf/usetperf/Makefile test/perf/ustrperf/Makefile test/perf/utfperf/Makefile test/perf/utrie2perf/Makefile samples/Makefile samples/date/Makefile samples/cal/Makefile samples/layout/Makefile common/unicode/platform.h"
+ac_config_files="$ac_config_files icudefs.mk Makefile data/pkgdataMakefile config/Makefile.inc config/pkgdataMakefile data/Makefile stubdata/Makefile common/Makefile i18n/Makefile layout/Makefile layoutex/Makefile io/Makefile extra/Makefile extra/uconv/Makefile extra/uconv/pkgdataMakefile extra/scrptrun/Makefile tools/Makefile tools/ctestfw/Makefile tools/toolutil/Makefile tools/makeconv/Makefile tools/genrb/Makefile tools/genuca/Makefile tools/genccode/Makefile tools/gencmn/Makefile tools/gencnval/Makefile tools/genctd/Makefile tools/gennames/Makefile tools/gentest/Makefile tools/gennorm/Makefile tools/genprops/Makefile tools/gencase/Makefile tools/genbidi/Makefile tools/genpname/Makefile tools/genbrk/Makefile tools/gensprep/Makefile tools/icupkg/Makefile tools/icuswap/Makefile tools/pkgdata/Makefile tools/tzcode/Makefile tools/gencfu/Makefile test/Makefile test/compat/Makefile test/testdata/Makefile test/testdata/pkgdataMakefile test/hdrtst/Makefile test/intltest/Makefile test/cintltst/Makefile test/iotest/Makefile test/letest/Makefile test/perf/Makefile test/perf/collationperf/Makefile test/perf/ubrkperf/Makefile test/perf/charperf/Makefile test/perf/convperf/Makefile test/perf/normperf/Makefile test/perf/strsrchperf/Makefile test/perf/unisetperf/Makefile test/perf/usetperf/Makefile test/perf/ustrperf/Makefile test/perf/utfperf/Makefile test/perf/utrie2perf/Makefile samples/Makefile samples/date/Makefile samples/cal/Makefile samples/layout/Makefile common/unicode/platform.h"

 cat >confcache <<\_ACEOF
 # This file is a shell script that caches the results of configure
@ -10807,6 +10807,7 @@ do
    "tools/icuswap/Makefile") CONFIG_FILES="$CONFIG_FILES tools/icuswap/Makefile" ;;
    "tools/pkgdata/Makefile") CONFIG_FILES="$CONFIG_FILES tools/pkgdata/Makefile" ;;
    "tools/tzcode/Makefile") CONFIG_FILES="$CONFIG_FILES tools/tzcode/Makefile" ;;
+    "tools/gencfu/Makefile") CONFIG_FILES="$CONFIG_FILES tools/gencfu/Makefile" ;;
    "test/Makefile") CONFIG_FILES="$CONFIG_FILES test/Makefile" ;;
    "test/compat/Makefile") CONFIG_FILES="$CONFIG_FILES test/compat/Makefile" ;;
    "test/testdata/Makefile") CONFIG_FILES="$CONFIG_FILES test/testdata/Makefile" ;;
--- a/icu4c/source/configure.in
+++ b/icu4c/source/configure.in
@ -1116,6 +1116,7 @@ AC_CONFIG_FILES([icudefs.mk \
 		tools/icuswap/Makefile \
 		tools/pkgdata/Makefile \
 		tools/tzcode/Makefile \
+		tools/gencfu/Makefile \
 		test/Makefile \
 		test/compat/Makefile \
 		test/testdata/Makefile \
--- a/icu4c/source/data/Makefile.in
+++ b/icu4c/source/data/Makefile.in
@ -230,6 +230,11 @@ BRS_SRC_FILES = $(BRS_SRC:%=$(BRKSRCDIR)/%)
 INSTALLED_BRS_FILES = $(BRK_RES_SOURCE:%.txt=%)  $(BRK_RES_SOURCE_LOCAL:%.txt=%)
 endif

+## Confusables (Spoofing) files
+ALL_CFU_SOURCE=$(UNICODEDATADIR)/confusables.txt $(UNICODEDATADIR)/confusablesWholeScript.txt
+CFU_FILES_SHORT=confusables.cfu
+CFU_FILES=$(BUILDDIR)/$(CFU_FILES_SHORT)
+
 ## UCM files
 -include $(UCMSRCDIR)/ucmcore.mk
 -include $(UCMSRCDIR)/ucmfiles.mk
@ -331,10 +336,10 @@ SPREP_FILES = $(ALL_SPREP_SOURCE:%.txt=$(BUILDDIR)/%.spp)
 SPREP_FILES_SHORT = $(ALL_SPREP_SOURCE:%.txt=%.spp)

 ## All generated files
-ALL_FILES = $(DAT_FILES) $(CNV_FILES) $(BRK_FILES) $(CTD_FILES) $(RES_FILES) $(INDEX_RES_FILE) $(COLLATION_FILES) $(BRK_RES_FILES) $(RBNF_FILES) $(TRANSLIT_FILES) $(SPREP_FILES)
+ALL_FILES = $(DAT_FILES) $(CNV_FILES) $(BRK_FILES) $(CTD_FILES) $(RES_FILES) $(INDEX_RES_FILE) $(COLLATION_FILES) $(BRK_RES_FILES) $(RBNF_FILES) $(TRANSLIT_FILES) $(SPREP_FILES) $(CFU_FILES)
 ALL_INDEX_SRC_FILES = $(PKGDATA_LIST) $(INDEX_FILE) $(COLLATION_INDEX_FILE) $(BRK_RES_INDEX_FILE) $(RBNF_INDEX_FILE)
 # a list to use in the .lst files (package-relative)
-ALL_FILES_LIST = $(DAT_FILES_SHORT) $(CNV_FILES_SHORT) $(BRK_FILES_SHORT) $(CTD_FILES_SHORT) $(RES_FILES_SHORT) $(INDEX_RES_FILE_SHORT) $(COLLATION_FILES_SHORT) $(COLLATION_INDEX_RES_SHORT) $(BRK_RES_FILES_SHORT) $(BRK_RES_INDEX_RES_SHORT) $(RBNF_FILES_SHORT) $(RBNF_INDEX_RES_SHORT) $(TRANSLIT_FILES_SHORT) $(SPREP_FILES_SHORT)
+ALL_FILES_LIST = $(DAT_FILES_SHORT) $(CNV_FILES_SHORT) $(BRK_FILES_SHORT) $(CTD_FILES_SHORT) $(RES_FILES_SHORT) $(INDEX_RES_FILE_SHORT) $(COLLATION_FILES_SHORT) $(COLLATION_INDEX_RES_SHORT) $(BRK_RES_FILES_SHORT) $(BRK_RES_INDEX_RES_SHORT) $(RBNF_FILES_SHORT) $(RBNF_INDEX_RES_SHORT) $(TRANSLIT_FILES_SHORT) $(SPREP_FILES_SHORT) $(CFU_FILES_SHORT)

 UNI_CORE_DATA=uprops.icu ucase.icu ubidi.icu unorm.icu
 UNI_CORE_TARGET_DATA=$(UNI_CORE_DATA:%=$(BUILDDIR)/%)
@ -452,6 +457,20 @@ $(BRKBLDDIR)/%.brk: $(BRKSRCDIR)/%.txt $(BINDIR)/genbrk$(EXEEXT) $(DAT_FILES)
 $(BRKBLDDIR)/%.ctd: $(BRKSRCDIR)/%.txt $(BINDIR)/genctd$(EXEEXT) $(DAT_FILES)
 	$(INVOKE) $(BINDIR)/genctd -c -i $(BUILDDIR) -o $@ $<

+####################################################    CFU
+# CFU FILES
+#    Note: gencfu requires two input files to produce a single output file.
+#          There will be exactly one target file and two source files.
+#          The $(word n, ...) selects the nth word from the following stuff.
+#          There must be a nicer way to do this.
+
+$(CFU_FILES): $(ALL_CFU_SOURCE) $(BINDIR)/gencfu$(EXEEXT) $(DAT_FILES)
+	$(INVOKE) echo ALL_CFU_SOURCE: $(ALL_CFU_SOURCE)
+	$(INVOKE) echo CFU_FILES: $(CFU_FILES)
+	$(INVOKE) echo CFU_FILES_SHORT: $(CFU_FILES_SHORT)
+	$(INVOKE) $(BINDIR)/gencfu -c -i $(BUILDDIR) -r $(word 1,$(ALL_CFU_SOURCE)) -w $(word 2,$(ALL_CFU_SOURCE)) -o $@
+
+
 ####################################################    CNV
 # CNV FILES
 $(BUILDDIR)/%.cnv: $(UCMSRCDIR)/%.ucm $(BINDIR)/makeconv$(EXEEXT)
--- a/icu4c/source/data/makedata.mak
+++ b/icu4c/source/data/makedata.mak
@ -422,12 +422,13 @@ uni-core-data: GODATA "$(ICUBLD_PKG)\uprops.icu" "$(ICUBLD_PKG)\ucase.icu" "$(IC
 	copy "$(ICUTMP)\$(ICUPKG).dat" "$(ICUOUT)\$(U_ICUDATA_NAME)$(U_ICUDATA_ENDIAN_SUFFIX).dat"
 	-@erase "$(ICUTMP)\$(ICUPKG).dat"
 !ELSE
-"$(ICU_LIB_TARGET)" : $(COMMON_ICUDATA_DEPENDENCIES) $(CNV_FILES) "$(ICUBLD_PKG)\unames.icu" "$(ICUBLD_PKG)\pnames.icu" "$(ICUBLD_PKG)\cnvalias.icu" "$(ICUBLD_PKG)\$(ICUCOL)\ucadata.icu" "$(ICUBLD_PKG)\$(ICUCOL)\invuca.icu" $(BRK_FILES) $(BRK_CTD_FILES) $(BRK_RES_FILES) $(COL_COL_FILES) $(RBNF_RES_FILES) $(TRANSLIT_RES_FILES) $(ALL_RES) $(SPREP_FILES)
+"$(ICU_LIB_TARGET)" : $(COMMON_ICUDATA_DEPENDENCIES) $(CNV_FILES) "$(ICUBLD_PKG)\unames.icu" "$(ICUBLD_PKG)\pnames.icu" "$(ICUBLD_PKG)\cnvalias.icu" "$(ICUBLD_PKG)\$(ICUCOL)\ucadata.icu" "$(ICUBLD_PKG)\$(ICUCOL)\invuca.icu" $(BRK_FILES) $(BRK_CTD_FILES) $(BRK_RES_FILES) $(COL_COL_FILES) $(RBNF_RES_FILES) $(TRANSLIT_RES_FILES) $(ALL_RES) $(SPREP_FILES) "$(ICUBLD_PKG)\confusables.cfu"
 	@echo Building icu data
 	cd "$(ICUBLD_PKG)"
 	"$(ICUPBIN)\pkgdata" $(COMMON_ICUDATA_ARGUMENTS) <<"$(ICUTMP)\icudata.lst"
 pnames.icu
 unames.icu
+confusables.cfu
 $(ICUCOL)\ucadata.icu
 $(ICUCOL)\invuca.icu
 cnvalias.icu
@ -486,6 +487,7 @@ CLEAN : GODATA
 	-@erase "*.res"
 	-@erase "*.spp"
 	-@erase "*.txt"
+	-@erase "*.cfu"
 	@cd "$(ICUBLD_PKG)\$(ICUBRK)"
 	-@erase "*.brk"
 	-@erase "*.ctd"
@ -673,6 +675,12 @@ res_index:table(nofallback) {
 	@echo Creating $@
 	@"$(ICUTOOLS)\gensprep\$(CFG)\gensprep" -s $(<D) -d "$(ICUBLD_PKG)" -b $(@B) -m "$(ICUUNIDATA)" -u 3.2.0 $(<F)

+# Confusables .cfu file generation
+#     Can't use an inference rule because two .txt source files combine to produce a single .cfu output file
+"$(ICUBLD_PKG)\confusables.cfu": "$(ICUUNIDATA)\confusables.txt" "$(ICUUNIDATA)\confusablesWholeScript.txt" "$(ICUTOOLS)\gencfu\$(CFG)\gencfu.exe"
+	@echo Creating $@
+	@"$(ICUTOOLS)\gencfu\$(CFG)\gencfu" -c -r "$(ICUUNIDATA)\confusables.txt" -w "$(ICUUNIDATA)\confusablesWholeScript.txt" -o $@ -i "$(ICUBLD_PKG)"
+
 !IFDEF ICUDATA_ARCHIVE
 "$(ICUDATA_SOURCE_ARCHIVE)": CREATE_DIRS $(ICUDATA_ARCHIVE) "$(ICUTOOLS)\icupkg\$(CFG)\icupkg.exe"
 	"$(ICUTOOLS)\icupkg\$(CFG)\icupkg" -t$(U_ICUDATA_ENDIAN_SUFFIX) "$(ICUDATA_ARCHIVE)" "$(ICUDATA_SOURCE_ARCHIVE)"
--- a/icu4c/source/data/unidata/confusables.txt
+++ b/icu4c/source/data/unidata/confusables.txt
--- a/icu4c/source/data/unidata/confusablesWholeScript.txt
+++ b/icu4c/source/data/unidata/confusablesWholeScript.txt
--- a/icu4c/source/i18n/Makefile.in
+++ b/icu4c/source/i18n/Makefile.in
@ -81,7 +81,8 @@ ulocdata.o measfmt.o currfmt.o curramt.o currunit.o measure.o utmscale.o \
 csdetect.o csmatch.o csr2022.o csrecog.o csrmbcs.o csrsbcs.o csrucode.o csrutf8.o inputext.o \
 wintzimpl.o windtfmt.o winnmfmt.o basictz.o dtrule.o rbtz.o tzrule.o tztrans.o vtzone.o \
 zonemeta.o zstrfmt.o plurrule.o plurfmt.o dtitvfmt.o dtitvinf.o \
-tmunit.o tmutamt.o tmutfmt.o colldata.o bmsearch.o bms.o currpinf.o
+tmunit.o tmutamt.o tmutfmt.o colldata.o bmsearch.o bms.o currpinf.o \
+uspoof.o uspoof_impl.o uspoof_build.o uspoof_buildconf.o uspoof_buildwsconf.o

 ## Header files to install
 HEADERS = $(srcdir)/unicode/*.h
--- a/icu4c/source/i18n/i18n.vcproj
+++ b/icu4c/source/i18n/i18n.vcproj
--- a/icu4c/source/i18n/unicode/uspoof.h
+++ b/icu4c/source/i18n/unicode/uspoof.h
@ -29,6 +29,8 @@
 #ifdef XP_CPLUSPLUS
 #include "unicode/unistr.h"
 #include "unicode/uniset.h"
+
+U_NAMESPACE_USE
 #endif


@ -133,8 +135,8 @@ typedef enum USpoofChecks {
    USPOOF_WHOLE_SCRIPT_CONFUSABLE  =   4,
    
    /** Modifier for single, mixed & whole script checks.
-        Selects between Lower Case Confusable (0) and
-        Any Case Confusable (1).  */
+        Selects between Lower Case Confusable and
+        Any Case Confusable.   */
    USPOOF_ANY_CASE                 =   8,

    /** Check that an identifer contains only characters from a
@ -146,15 +148,13 @@ typedef enum USpoofChecks {
    /** Check that an identifier for the presence of invisble characters,
      * characters, such as zero-width spaces, or character sequences that are
      * likely not to display, such as multiple occurences of the same
-      * non-spacing mark.  This does not test the input string as a whole
+      * non-spacing mark.  This check does not test the input string as a whole
      * for conformance to any particular syntax for identifiers.
      */
    USPOOF_INVISIBLE                =  32,
-    
-    USPOOF_LOCALE_LIMIT             =  64,
-    USPOOF_CHAR_LIMIT               = 128,
+    USPOOF_CHAR_LIMIT               =  64,
    USPOOF_ALL_CHECKS               = 0x7f
-    };
+    } USpoofChecks;
    
    
 /**
@ -298,10 +298,20 @@ uspoof_getChecks(const USpoofChecker *sc, UErrorCode *status);
 * Supplying an empty string removes all restrictions;
 * characters from any script will be allowed.
 *
- * The USPOOF_LOCALE_LIMIT test is automatically enabled for this
+ * The USPOOF_CHAR_LIMIT test is automatically enabled for this
 * USpoofChecker when calling this function with a non-empty set
 * of locales.
 *
+ * The Unicode Set of characters that will be allowed is accessible
+ * via the uspoof_getAllowedChars() function.  uspoof_setAllowedLocales()
+ * will <i>replace</i> any previously applied set of allowed characters.
+ *
+ * Adjustments, such as additions or deletions of certain classes of characters,
+ * can be made to the result of uspoof_setAllowedLocales() by
+ * fetching the resulting set with uspoof_getAllowedChars(),
+ * manipulating it with the Unicode Set API, then resetting the
+ * spoof detectors limits with uspoof_setAllowedChars()
+ *
 * @param sc           The USpoofChecker 
 * @param localesList  A list list of locales, from which the language
 *                     and associated script are extracted.  The list
@ -318,6 +328,8 @@ uspoof_setAllowedLocales(USpoofChecker *sc, const char *localesList, UErrorCode
 *  to be checked.  If no limitations on scripts have been specified,
 *  an empty string will be returned.
 *
+ *  uspoof_setAllowedChars() will reset the list of allowed to be empty.
+ *
 *  The format of the returned list is that of an HTTP Accept-Language
 *  header field, but it may not be identical to the original string passed
 *  to uspoof_setAllowedLocales();  the string may be
@ -339,7 +351,8 @@ uspoof_getAllowedLocales(USpoofChecker *sc, UErrorCode *status);
 /**
 * Limit the acceptable characters to those specified by a Unicode Set.
 *   Any previously specified character limit is
- *   is replaced by the new settings.
+ *   is replaced by the new settings.  This includes limits on
+ *   characters that were set with the uspoof_setAllowedLocales() function.
 *
 * The USPOOF_CHAR_LIMIT test is automatically enabled for this
 * USpoofChecker by this function.
@ -381,14 +394,15 @@ uspoof_setAllowedChars(USpoofChecker *sc, const USet *chars, UErrorCode *status)
 *                 the USPOOF_CHAR_LIMIT test.
 */
 U_DRAFT const USet * U_EXPORT2
-uspoof_getAllowedChars(USpoofChecker *sc, UErrorCode *status);
+uspoof_getAllowedChars(const USpoofChecker *sc, UErrorCode *status);


 #ifdef XP_CPLUSPLUS
 /**
 * Limit the acceptable characters to those specified by a Unicode Set.
 *   Any previously specified character limit is
- *   is replaced by the new settings.
+ *   is replaced by the new settings.    This includes limits on
+ *   characters that were set with the uspoof_setAllowedLocales() function.
 *
 * The USPOOF_CHAR_LIMIT test is automatically enabled for this
 * USoofChecker by this function.
@ -425,7 +439,7 @@ uspoof_setAllowedUnicodeSet(USpoofChecker *sc, const UnicodeSet *chars, UErrorCo
 *                 the USPOOF_CHAR_LIMIT test.
 */
 U_DRAFT const UnicodeSet * U_EXPORT2
-uspoof_getAllowedUnicodeSet(USpoofChecker *sc, UErrorCode *status);
+uspoof_getAllowedUnicodeSet(const USpoofChecker *sc, UErrorCode *status);
 #endif


@ -441,10 +455,11 @@ uspoof_getAllowedUnicodeSet(USpoofChecker *sc, UErrorCode *status);
 *                16 bit UTF-16 code units, or -1 if the string is 
 *                zero terminated.
 * @position      An out parameter that receives the index of the
- *                first string position that fails one of the checks.
+ *                first string position that fails the allowed character
+ *                limitation checks.
 *                This parameter may be null if the position information
 *                is not needed.
- *                If the string passes all of the requested checks the 
+ *                If the string passes the requested checks the
 *                parameter value will not be set.
 * @param status  The error code, set if an error occured while attempting to
 *                perform the check.
@ -473,15 +488,18 @@ uspoof_check(const USpoofChecker *sc,
 * @param length  the length of the string to be checked, or -1 if the string is 
 *                zero terminated.
 * @position      An out parameter that receives the index of the
- *                first string position that fails one of the checks.
+ *                first string position that fails the allowed character
+ *                limitation checks.
 *                This parameter may be null if the position information
 *                is not needed.
- *                If the string passes all of the requested checks the 
+ *                If the string passes the requested checks the
 *                parameter value will not be set.
 * @param status  The error code, set if an error occured while attempting to
 *                perform the check.
 *                Spoofing or security issues detected with the input string are
 *                not reported here, but through the function's return value.
+ *                If the input contains invalid UTF-8 sequences,
+ *                a status of U_INVALID_CHAR_FOUND will be returned.
 * @return        An integer value with bits set for any potential security
 *                or spoofing issues detected.  The bits are defined by
 *                enum USpoofChecks.  Zero is returned if no issues
@ -504,10 +522,11 @@ uspoof_checkUTF8(const USpoofChecker *sc,
 * @param sc      The USpoofChecker 
 * @param text    A UnicodeString to be checked for possible security issues.
 * @position      An out parameter that receives the index of the
- *                first string position that fails one of the checks.
+ *                first string position that fails the allowed character
+ *                limitation checks.
 *                This parameter may be null if the position information
 *                is not needed.
- *                If the string passes all of the requested checks the 
+ *                If the string passes the requested checks the
 *                parameter value will not be set.
 * @param status  The error code, set if an error occured while attempting to
 *                perform the check.
@ -684,7 +703,7 @@ uspoof_areConfusableUnicodeString(const USpoofChecker *sc,
  */
 U_DRAFT int32_t U_EXPORT2
 uspoof_getSkeleton(const USpoofChecker *sc,
-                   USpoofChecks type,
+                   uint32_t type,
                   const UChar *s,  int32_t length,
                   UChar *dest, int32_t destCapacity,
                   UErrorCode *status);
@ -726,7 +745,7 @@ uspoof_getSkeleton(const USpoofChecker *sc,
  */   
 U_DRAFT int32_t U_EXPORT2
 uspoof_getSkeletonUTF8(const USpoofChecker *sc,
-                       USpoofChecks type,
+                       uint32_t type,
                       const char *s,  int32_t length,
                       char *dest, int32_t destCapacity,
                       UErrorCode *status);
@ -762,7 +781,7 @@ uspoof_getSkeletonUTF8(const USpoofChecker *sc,
  */   
 U_DRAFT UnicodeString & U_EXPORT2
 uspoof_getSkeletonUnicodeString(const USpoofChecker *sc,
-                                USpoofChecks type,
+                                uint32_t type,
                                const UnicodeString &s,
                                UnicodeString &dest,
                                UErrorCode *status);
--- a/icu4c/source/i18n/uspoof.cpp
+++ b/icu4c/source/i18n/uspoof.cpp
@ -0,0 +1,540 @@
+/*
+***************************************************************************
+* Copyright (C) 2008-2009, International Business Machines Corporation
+* and others. All Rights Reserved.
+***************************************************************************
+*   file name:  uspoof.cpp
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2008Feb13
+*   created by: Andy Heninger
+*
+*   Unicode Spoof Detection
+*/
+#include "unicode/utypes.h"
+#include "unicode/uspoof.h"
+#include "unicode/unorm.h"
+#include "unicode/ustring.h"
+#include "cmemory.h"
+#include "uspoof_impl.h"
+#include "uassert.h"
+
+#include <stdio.h>      // debug
+
+U_NAMESPACE_USE
+
+
+U_CAPI USpoofChecker * U_EXPORT2
+uspoof_open(UErrorCode *status) {
+    if (U_FAILURE(*status)) {
+        return NULL;
+    }
+    SpoofImpl *si = new SpoofImpl(SpoofData::getDefault(*status), *status);
+    if (U_FAILURE(*status)) {
+        delete si;
+        si = NULL;
+    }
+    return (USpoofChecker *)si;
+}
+
+
+U_CAPI USpoofChecker * U_EXPORT2
+uspoof_openFromSerialized(const void *data, int32_t length, int32_t *pActualLength,
+                          UErrorCode *status) {
+    if (U_FAILURE(*status)) {
+        return NULL;
+    }
+    SpoofData *sd = new SpoofData(data, length, *status);
+    SpoofImpl *si = new SpoofImpl(sd, *status);
+    if (U_FAILURE(*status)) {
+        delete sd;
+        delete si;
+        return NULL;
+    }
+    if (sd == NULL || si == NULL) {
+        *status = U_MEMORY_ALLOCATION_ERROR;
+        delete sd;
+        delete si;
+        return NULL;
+    }
+        
+    if (pActualLength != NULL) {
+        *pActualLength = sd->fRawData->fLength;
+    }
+    return reinterpret_cast<USpoofChecker *>(si);
+}
+
+
+U_CAPI USpoofChecker * U_EXPORT2
+uspoof_clone(const USpoofChecker *sc, UErrorCode *status) {
+    const SpoofImpl *src = SpoofImpl::validateThis(sc, *status);
+    if (src == NULL) {
+        return NULL;
+    }
+    SpoofImpl *result = new SpoofImpl(*src, *status);   // copy constructor
+    if (U_FAILURE(*status)) {
+        delete result;
+        result = NULL;
+    }
+    return (USpoofChecker *)result;
+}
+
+
+U_CAPI void U_EXPORT2
+uspoof_close(USpoofChecker *sc) {
+    UErrorCode status = U_ZERO_ERROR;
+    SpoofImpl *This = SpoofImpl::validateThis(sc, status);
+    delete This;
+}
+
+
+U_CAPI void U_EXPORT2
+uspoof_setChecks(USpoofChecker *sc, int32_t checks, UErrorCode *status) {
+    SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
+    if (This == NULL) {
+        return;
+    }
+
+    // Verify that the requested checks are all ones (bits) that 
+    //   are acceptable, known values.
+    if (checks & ~USPOOF_ALL_CHECKS) {
+        *status = U_ILLEGAL_ARGUMENT_ERROR; 
+        return;
+    }
+
+    This->fChecks = checks;
+}
+
+
+U_CAPI int32_t U_EXPORT2
+uspoof_getChecks(const USpoofChecker *sc, UErrorCode *status) {
+    const SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
+    if (This == NULL) {
+        return 0;
+    }
+    return This->fChecks;
+}
+
+U_CAPI void U_EXPORT2
+uspoof_setAllowedLocales(USpoofChecker *sc, const char * /*localesList*/, UErrorCode *status) {
+    SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
+    if (This == NULL) {
+        return;
+    }
+    // TODO:
+}
+
+
+U_CAPI const USet * U_EXPORT2
+uspoof_getAllowedChars(const USpoofChecker *sc, UErrorCode *status) {
+    const UnicodeSet *result = uspoof_getAllowedUnicodeSet(sc, status);
+    return reinterpret_cast<const USet *>(result);
+}
+
+U_CAPI const UnicodeSet * U_EXPORT2
+uspoof_getAllowedUnicodeSet(const USpoofChecker *sc, UErrorCode *status) {
+    const SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
+    if (This == NULL) {
+        return NULL;
+    }
+    return This->fAllowedCharsSet;
+}
+
+
+U_CAPI void U_EXPORT2
+uspoof_setAllowedChars(USpoofChecker *sc, const USet *chars, UErrorCode *status) {
+    const UnicodeSet *set = reinterpret_cast<const UnicodeSet *>(chars);
+    uspoof_setAllowedUnicodeSet(sc, set, status);
+}
+
+
+U_CAPI void U_EXPORT2
+uspoof_setAllowedUnicodeSet(USpoofChecker *sc, const UnicodeSet *chars, UErrorCode *status) {
+    SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
+    if (This == NULL) {
+        return;
+    }
+    if (chars->isBogus()) {
+        *status = U_ILLEGAL_ARGUMENT_ERROR;
+        return;
+    }
+    UnicodeSet *clonedSet = static_cast<UnicodeSet *>(chars->clone());
+    if (clonedSet == NULL || clonedSet->isBogus()) {
+        *status = U_MEMORY_ALLOCATION_ERROR;
+        return;
+    }
+    clonedSet->freeze();
+    delete This->fAllowedCharsSet;
+    This->fAllowedCharsSet = clonedSet;
+    This->fChecks |= USPOOF_CHAR_LIMIT;
+}
+
+
+U_CAPI int32_t U_EXPORT2
+uspoof_check(const USpoofChecker *sc,
+             const UChar *text, int32_t length,
+             int32_t *position,
+             UErrorCode *status) {
+             
+    const SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
+    if (This == NULL) {
+        return 0;
+    }
+    if (length < -1) {
+        *status = U_ILLEGAL_ARGUMENT_ERROR;
+        return 0;
+    }
+    if (length == -1) {
+        // It's not worth the bother to handle nul terminated strings everywhere.
+        //   Just get the length and be done with it.
+        length = u_strlen(text);
+    }
+
+    int32_t result = 0;
+    int32_t failPos = 0x7fffffff;   // TODO: do we have a #define for max int32?
+
+    // A count of the number of non-Common or inherited scripts.
+    // Needed for both the SINGLE_SCRIPT and the WHOLE/MIXED_SCIRPT_CONFUSABLE tests.
+    // Share the computation when possible.  scriptCount == -1 means that we haven't
+    // done it yet.
+    int32_t scriptCount = -1;
+
+    if ((This->fChecks) & USPOOF_SINGLE_SCRIPT) {
+        scriptCount = This->scriptScan(text, length, failPos, *status);
+        // printf("scriptCount (clipped to 2) = %d\n", scriptCount);
+        if ( scriptCount >= 2) {
+            // Note: scriptCount == 2 covers all cases of the number of scripts >= 2
+            result |= USPOOF_SINGLE_SCRIPT;
+        }
+    }
+
+    if (This->fChecks & USPOOF_CHAR_LIMIT) {
+        int32_t i;
+        UChar32 c;
+        for (i=0; i<length ;) {
+            U16_NEXT(text, i, length, c);
+            if (!This->fAllowedCharsSet->contains(c)) {
+                result |= USPOOF_CHAR_LIMIT;
+                if (i < failPos) {
+                    failPos = i;
+                }
+                break;
+            }
+        }
+    }
+
+    // TODO:  add USPOOF_INVISIBLE check
+    
+    if (This->fChecks & (USPOOF_WHOLE_SCRIPT_CONFUSABLE | USPOOF_MIXED_SCRIPT_CONFUSABLE)) {
+        // The basic test is the same for both whole and mixed script confusables.
+        // Compute the set of scripts that every input character has a confusable in.
+        // For this computation an input character is always considered to be
+        //    confusable with itself in its own script.
+        // If the number of such scripts is two or more, and the input consisted of
+        //   characters all from a single script, we have a whole script confusable.
+        //   (The two scripts will be the original script and the one that is confusable)
+        // If the number of such scripts >= one, and the original input contained characters from
+        //   more than one script, we have a mixed script confusable.  (We can transform
+        //   some of the characters, and end up with a visually similar string all in
+        //   one script.)
+
+        NFKDBuffer   normalizedInput(text, length, *status);
+        const UChar  *nfkdText = normalizedInput.getBuffer();
+        int32_t      nfkdLength = normalizedInput.getLength();
+
+        if (scriptCount == -1) {
+        int32_t t;
+            scriptCount = This->scriptScan(text, length, t, *status);
+        }
+        
+        ScriptSet scripts;
+        This->wholeScriptCheck(nfkdText, nfkdLength, &scripts, *status);
+        int32_t confusableScriptCount = scripts.countMembers();
+        //printf("confusableScriptCount = %d\n", confusableScriptCount);
+        
+        if ((This->fChecks & USPOOF_WHOLE_SCRIPT_CONFUSABLE) &&
+            confusableScriptCount >= 2 &&
+            scriptCount == 1) {
+            result |= USPOOF_WHOLE_SCRIPT_CONFUSABLE;
+        }
+    
+        if ((This->fChecks & USPOOF_MIXED_SCRIPT_CONFUSABLE) &&
+            confusableScriptCount >= 1 &&
+            scriptCount > 1) {
+            result |= USPOOF_MIXED_SCRIPT_CONFUSABLE;
+        }
+    }
+
+    if (position != NULL && failPos != 0x7fffffff) {
+        *position = failPos;
+    }
+    return result;
+}
+
+
+U_CAPI int32_t U_EXPORT2
+uspoof_checkUTF8(const USpoofChecker *sc,
+                 const char *text, int32_t length,
+                 int32_t *position,
+                 UErrorCode *status) {
+
+    if (U_FAILURE(*status)) {
+        return 0;
+    }
+    UChar stackBuf[USPOOF_STACK_BUFFER_SIZE];
+    UChar* text16 = stackBuf;
+    int32_t len16;
+    
+    u_strFromUTF8(text16, USPOOF_STACK_BUFFER_SIZE, &len16, text, length, status);
+    if (U_FAILURE(*status) && *status != U_BUFFER_OVERFLOW_ERROR) {
+        return 0;
+    }
+    if (*status == U_BUFFER_OVERFLOW_ERROR) {
+        text16 = static_cast<UChar *>(uprv_malloc(len16 * sizeof(UChar) + 2));
+        if (text16 == NULL) {
+            *status = U_MEMORY_ALLOCATION_ERROR;
+            return 0;
+        }
+        *status = U_ZERO_ERROR;
+        u_strFromUTF8(text16, len16+1, NULL, text, length, status);
+    }
+
+    int32_t position16 = -1;
+    int32_t result = uspoof_check(sc, text16, len16, &position16, status);
+    if (U_FAILURE(*status)) {
+        return 0;
+    }
+
+    if (position16 > 0) {
+        // Translate a UTF-16 based error position back to a UTF-8 offset.
+        // u_strToUTF8() in preflight mode is an easy way to do it.
+        U_ASSERT(position16 <= len16);
+        u_strToUTF8(NULL, 0, position, text16, position16, status);
+    }
+
+    if (text16 != stackBuf) {
+        uprv_free(text16);
+    }
+    return result;
+    
+}
+
+
+
+U_CAPI int32_t U_EXPORT2
+uspoof_checkUnicodeString(const USpoofChecker *sc,
+                          const U_NAMESPACE_QUALIFIER UnicodeString &text, 
+                          int32_t *position,
+                          UErrorCode *status) {
+    int32_t result = uspoof_check(sc, text.getBuffer(), text.length(), position, status);
+    return result;
+}
+
+
+U_CAPI int32_t U_EXPORT2
+uspoof_getSkeleton(const USpoofChecker *sc,
+                   uint32_t type,
+                   const UChar *s,  int32_t length,
+                   UChar *dest, int32_t destCapacity,
+                   UErrorCode *status) {
+
+    const SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
+    if (U_FAILURE(*status)) {
+        return 0;
+    }
+    if (length<-1 || destCapacity<0 || (destCapacity==0 && dest!=NULL) ||
+        (type & ~(USPOOF_SINGLE_SCRIPT_CONFUSABLE | USPOOF_ANY_CASE)) != 0) {
+        *status = U_ILLEGAL_ARGUMENT_ERROR;
+        return 0;
+    }
+
+   int32_t tableMask = 0;
+   switch (type) {
+      case 0:
+        tableMask = USPOOF_ML_TABLE_FLAG;
+        break;
+      case USPOOF_SINGLE_SCRIPT_CONFUSABLE:
+        tableMask = USPOOF_SL_TABLE_FLAG;
+        break;
+      case USPOOF_ANY_CASE:
+        tableMask = USPOOF_MA_TABLE_FLAG;
+        break;
+      case USPOOF_SINGLE_SCRIPT_CONFUSABLE | USPOOF_ANY_CASE:
+        tableMask = USPOOF_SA_TABLE_FLAG;
+        break;
+      default:
+        *status = U_ILLEGAL_ARGUMENT_ERROR;
+        return 0;
+    }
+
+    // NFKD transform of the user supplied input
+    
+    UChar nfkdBuf[USPOOF_STACK_BUFFER_SIZE];
+    UChar *nfkdInput = nfkdBuf;
+    int32_t normalizedLen = unorm_normalize(
+        s, length, UNORM_NFKD, 0, nfkdInput, USPOOF_STACK_BUFFER_SIZE, status);
+    if (*status == U_BUFFER_OVERFLOW_ERROR) {
+        nfkdInput = (UChar *)uprv_malloc((normalizedLen+1)*sizeof(UChar));
+        if (nfkdInput == NULL) {
+            *status = U_MEMORY_ALLOCATION_ERROR;
+            return 0;
+        }
+        normalizedLen = unorm_normalize(s, length, UNORM_NFKD, 0,
+                                        nfkdInput, normalizedLen+1, status);
+    }
+    if (U_FAILURE(*status)) {
+        return 0;
+    }
+
+    // buffer to hold the Unicode defined mappings for a single code point
+    UChar buf[USPOOF_MAX_SKELETON_EXPANSION];
+
+    // Apply the mapping to the NFKD form string
+    
+    int32_t inputIndex = 0;
+    int32_t resultLen = 0;
+    while (inputIndex < normalizedLen) {
+        UChar32 c;
+        U16_NEXT(nfkdInput, inputIndex, normalizedLen, c);
+        int32_t replaceLen = This->confusableLookup(c, tableMask, buf);
+        if (resultLen + replaceLen < destCapacity) {
+            int i;
+            for (i=0; i<replaceLen; i++) {
+                dest[resultLen++] = buf[i];
+            }
+        } else {
+            // Storing the transformed string would overflow the dest buffer.
+            //   Don't bother storing anything, just sum up the required buffer size.
+            //   (We dont guarantee that a truncated buffer is filled to it's end)
+            resultLen += replaceLen;
+        }
+    }
+    
+    if (resultLen < destCapacity) {
+        dest[resultLen] = 0;
+    } else if (resultLen == destCapacity) {
+        *status = U_STRING_NOT_TERMINATED_WARNING;
+    } else {
+        *status = U_BUFFER_OVERFLOW_ERROR;
+    }
+    if (nfkdInput != nfkdBuf) {
+        uprv_free(nfkdInput);
+    }
+    return resultLen;
+}
+
+
+U_CAPI UnicodeString &  U_EXPORT2
+uspoof_getSkeletonUnicodeString(const USpoofChecker *sc,
+                                uint32_t type,
+                                const UnicodeString &s,
+                                UnicodeString &dest,
+                                UErrorCode *status) {
+    if (U_FAILURE(*status)) {
+        return dest;
+    }
+    dest.remove();
+    
+    const UChar *str = s.getBuffer();
+    int32_t      strLen = s.length();
+    UChar        smallBuf[100];
+    UChar       *buf = smallBuf;
+    int32_t outputSize = uspoof_getSkeleton(sc, type, str, strLen, smallBuf, 100, status);
+    if (*status == U_BUFFER_OVERFLOW_ERROR) {
+        buf = static_cast<UChar *>(uprv_malloc(outputSize+1));
+        if (buf == NULL) {
+            *status = U_MEMORY_ALLOCATION_ERROR;
+        }
+        uspoof_getSkeleton(sc, type, str, strLen, buf, outputSize+1, status);
+    }
+    if (U_SUCCESS(*status)) {
+        dest.setTo(buf, outputSize);
+    }
+
+    if (buf != smallBuf) {
+        uprv_free(buf);
+    }
+    return dest;
+}
+
+
+U_CAPI int32_t U_EXPORT2
+uspoof_getSkeletonUTF8(const USpoofChecker *sc,
+                       uint32_t type,
+                       const char *s,  int32_t length,
+                       char *dest, int32_t destCapacity,
+                       UErrorCode *status) {
+    // Lacking a UTF-8 normalization API, just converting the input to
+    // UTF-16 seems as good an approach as any.  In typical use, input will
+    // be an identifier, which is to say not too long for stack buffers.
+    if (U_FAILURE(*status)) {
+        return 0;
+    }
+    // Buffers for the UChar form of the input and skeleton strings.
+    UChar    smallInBuf[USPOOF_STACK_BUFFER_SIZE];
+    UChar   *inBuf = smallInBuf;
+    UChar    smallOutBuf[USPOOF_STACK_BUFFER_SIZE];
+    UChar   *outBuf = smallOutBuf;
+
+    int32_t  lengthInUChars = 0;
+    int32_t  skelLengthInUChars = 0;
+    int32_t  skelLengthInUTF8 = 0;
+    
+    u_strFromUTF8(inBuf, USPOOF_STACK_BUFFER_SIZE, &lengthInUChars,
+                  s, length, status);
+    if (*status == U_BUFFER_OVERFLOW_ERROR) {
+        *status = U_ZERO_ERROR;
+        inBuf = static_cast<UChar *>(uprv_malloc((lengthInUChars+1)*sizeof(UChar)));
+        if (inBuf == NULL) {
+            *status = U_MEMORY_ALLOCATION_ERROR;
+            goto cleanup;
+        }
+        u_strFromUTF8(inBuf, USPOOF_STACK_BUFFER_SIZE, &lengthInUChars+1,
+                      s, length, status);
+    }
+    
+    skelLengthInUChars = uspoof_getSkeleton(sc, type, outBuf, lengthInUChars,
+                                         outBuf, USPOOF_STACK_BUFFER_SIZE, status);
+    if (*status == U_BUFFER_OVERFLOW_ERROR) {
+        *status = U_ZERO_ERROR;
+        outBuf = static_cast<UChar *>(uprv_malloc((skelLengthInUChars+1)*sizeof(UChar)));
+        if (outBuf == NULL) {
+            *status = U_MEMORY_ALLOCATION_ERROR;
+            goto cleanup;
+        }
+        skelLengthInUChars = uspoof_getSkeleton(sc, type, outBuf, lengthInUChars,
+                                         outBuf, USPOOF_STACK_BUFFER_SIZE, status);
+    }
+
+    u_strToUTF8(dest, destCapacity, &skelLengthInUTF8,
+                outBuf, skelLengthInUChars, status);
+
+  cleanup:
+    if (inBuf != smallInBuf) {
+        delete inBuf;
+    }
+    if (outBuf != smallOutBuf) {
+        delete outBuf;
+    }
+    return skelLengthInUTF8;
+}
+
+
+U_CAPI int32_t U_EXPORT2
+uspoof_serialize(USpoofChecker *sc,void *buf, int32_t capacity, UErrorCode *status) {
+    SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
+    if (This == NULL) {
+        U_ASSERT(U_FAILURE(*status));
+        return 0;
+    }
+    int32_t dataSize = This->fSpoofData->fRawData->fLength;
+    if (capacity < dataSize) {
+        *status = U_BUFFER_OVERFLOW_ERROR;
+        return dataSize;
+    }
+    uprv_memcpy(buf, This->fSpoofData->fRawData, dataSize);
+    return dataSize;
+}
+
--- a/icu4c/source/i18n/uspoof_build.cpp
+++ b/icu4c/source/i18n/uspoof_build.cpp
@ -0,0 +1,81 @@
+/*
+ ***************************************************************************
+ * Copyright (C) 2008-2009, International Business Machines Corporation
+ * and others. All Rights Reserved.
+ ***************************************************************************
+ *   file name:  uspoof_build.cpp
+ *   encoding:   US-ASCII
+ *   tab size:   8 (not used)
+ *   indentation:4
+ *
+ *   created on: 2008 Dec 8
+ *   created by: Andy Heninger
+ *
+ *   Unicode Spoof Detection Data Builder
+ *   Builder-related functions are kept in separate files so that applications not needing
+ *   the builder can more easily exclude them, typically by means of static linking.
+ *
+ *   There are three relatively independent sets of Spoof data,
+ *      Confusables,
+ *      Whole Script Confusables
+ *      ID character extensions.
+ *
+ *   The data tables for each are built separately, each from its own definitions
+ */
+
+#include "unicode/utypes.h"
+#include "unicode/uspoof.h"
+#include "unicode/unorm.h"
+#include "unicode/uregex.h"
+#include "unicode/ustring.h"
+#include "cmemory.h"
+#include "uspoof_impl.h"
+#include "uhash.h"
+#include "uvector.h"
+#include "uassert.h"
+#include "uarrsort.h"
+#include "uspoof_buildconf.h"
+#include "uspoof_buildwsconf.h"
+
+
+#include <stdio.h>   // DEBUG
+
+U_NAMESPACE_USE
+
+
+
+// The main data building function
+
+U_CAPI USpoofChecker * U_EXPORT2
+uspoof_openFromSource(const char *confusables,  int32_t confusablesLen,
+                      const char *confusablesWholeScript, int32_t confusablesWholeScriptLen,
+                      int32_t *errorType, UParseError *pe, UErrorCode *status) {
+
+    if (U_FAILURE(*status)) {
+        return NULL;
+    }
+    if (errorType!=NULL) {
+        *errorType = 0;
+    }
+    if (pe != NULL) {
+        pe->line = 0;
+        pe->offset = 0;
+        pe->preContext[0] = 0;
+        pe->postContext[0] = 0;
+    }
+
+    // Set up a shell of a spoof detector, with empty data.
+    SpoofData *newSpoofData = new SpoofData(*status);
+    SpoofImpl *This = new SpoofImpl(newSpoofData, *status);
+
+    // Compile the binary data from the source (text) format.
+    ConfusabledataBuilder::buildConfusableData(This, confusables, confusablesLen, errorType, pe, *status);
+    buildWSConfusableData(This, confusablesWholeScript, confusablesWholeScriptLen, pe, *status);
+    
+    if (U_FAILURE(*status)) {
+        delete This;
+        This = NULL;
+    }
+    return (USpoofChecker *)This;
+}
+
--- a/icu4c/source/i18n/uspoof_buildconf.cpp
+++ b/icu4c/source/i18n/uspoof_buildconf.cpp
@ -0,0 +1,593 @@
+/*
+******************************************************************************
+*
+*   Copyright (C) 2008-2009, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+******************************************************************************
+*   file name:  uspoof_buildconf.cpp
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2009Jan05  (refactoring earlier files)
+*   created by: Andy Heninger
+*
+*   Internal classes for compililing confusable data into its binary (runtime) form.
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/uspoof.h"
+#include "unicode/unorm.h"
+#include "unicode/uregex.h"
+#include "unicode/ustring.h"
+#include "cmemory.h"
+#include "uspoof_impl.h"
+#include "uhash.h"
+#include "uvector.h"
+#include "uassert.h"
+#include "uarrsort.h"
+#include "uspoof_buildconf.h"
+
+#include "stdio.h"    // DEBUG.  Remove.
+
+U_NAMESPACE_USE
+
+
+//---------------------------------------------------------------------
+//
+//  buildConfusableData   Compile the source confusable data, as defined by
+//                        the Unicode data file confusables.txt, into the binary
+//                        structures used by the confusable detector.
+//
+//                        The binary structures are described in uspoof_impl.h
+//
+//     1.  parse the data, building 4 hash tables, one each for the SL, SA, ML and MA
+//         tables.  Each maps from a UChar32 to a String.
+//
+//     2.  Sort all of the strings encountered by length, since they will need to
+//         be stored in that order in the final string table.
+//
+//     3.  Build a list of keys (UChar32s) from the four mapping tables.  Sort the
+//         list because that will be the ordering of our runtime table.
+//
+//     4.  Generate the run time string table.  This is generated before the key & value
+//         tables because we need the string indexes when building those tables.
+//
+//     5.  Build the run-time key and value tables.  These are parallel tables, and are built
+//         at the same time
+//
+
+SPUString::SPUString(UnicodeString *s) {
+    fStr = s;
+    fStrTableIndex = 0;
+}
+
+
+SPUString::~SPUString() {
+    delete fStr;
+}
+
+
+SPUStringPool::SPUStringPool(UErrorCode &status) : fVec(NULL), fHash(NULL) {
+    fVec = new UVector(status);
+    fHash = uhash_open(uhash_hashUnicodeString,           // key hash function
+                       uhash_compareUnicodeString,        // Key Comparator
+                       NULL,                              // Value Comparator
+                       &status);
+}
+
+
+SPUStringPool::~SPUStringPool() {
+    int i;
+    for (i=fVec->size()-1; i>=0; i--) {
+        SPUString *s = static_cast<SPUString *>(fVec->elementAt(i));
+        delete s;
+    }
+    delete fVec;
+    uhash_close(fHash);
+}
+
+
+int32_t SPUStringPool::size() {
+    return fVec->size();
+}
+
+SPUString *SPUStringPool::getByIndex(int32_t index) {
+    SPUString *retString = (SPUString *)fVec->elementAt(index);
+    return retString;
+}
+
+
+// Comparison function for ordering strings in the string pool.
+// Compare by length first, then, within a group of the same length,
+// by code point order.
+// Conforms to the type signature for a USortComparator in uvector.h
+
+static int8_t U_CALLCONV SPUStringCompare(UHashTok left, UHashTok right) {
+    const SPUString *sL = static_cast<const SPUString *>(left.pointer);
+    const SPUString *sR = static_cast<const SPUString *>(right.pointer);
+    int32_t lenL = sL->fStr->length();
+    int32_t lenR = sR->fStr->length();
+    if (lenL < lenR) {
+        return -1;
+    } else if (lenL > lenR) {
+        return 1;
+    } else {
+        return sL->fStr->compare(*(sR->fStr));
+    }
+}
+
+void SPUStringPool::sort(UErrorCode &status) {
+    fVec->sort(SPUStringCompare, status);
+}
+
+
+SPUString *SPUStringPool::addString(UnicodeString *src, UErrorCode &status) {
+    SPUString *hashedString = static_cast<SPUString *>(uhash_get(fHash, src));
+    if (hashedString != NULL) {
+        delete src;
+    } else {
+        hashedString = new SPUString(src);
+        uhash_put(fHash, src, hashedString, &status);
+        fVec->addElement(hashedString, status);
+    }
+    return hashedString;
+}
+
+
+
+ConfusabledataBuilder::ConfusabledataBuilder(SpoofImpl *spImpl, UErrorCode &status) :
+    fSpoofImpl(spImpl),
+    fInput(NULL),
+    fSLTable(NULL),
+    fSATable(NULL), 
+    fMLTable(NULL),
+    fMATable(NULL),
+    fKeySet(NULL), 
+    fKeyVec(NULL),
+    fValueVec(NULL),
+    fStringTable(NULL),
+    fStringLengthsTable(NULL),
+    stringPool(NULL),
+    fParseLine(NULL),
+    fParseHexNum(NULL),
+    fLineNum(0)
+{
+    if (U_FAILURE(status)) {
+        return;
+    }
+    fSLTable    = uhash_open(uhash_hashLong, uhash_compareLong, NULL, &status);
+    fSATable    = uhash_open(uhash_hashLong, uhash_compareLong, NULL, &status);
+    fMLTable    = uhash_open(uhash_hashLong, uhash_compareLong, NULL, &status);
+    fMATable    = uhash_open(uhash_hashLong, uhash_compareLong, NULL, &status);
+    fKeySet     = new UnicodeSet();
+    fKeyVec     = new UVector(status);
+    fValueVec   = new UVector(status);
+    stringPool = new SPUStringPool(status);
+}
+
+
+ConfusabledataBuilder::~ConfusabledataBuilder() {
+    uprv_free(fInput);
+    uregex_close(fParseLine);
+    uregex_close(fParseHexNum);
+    uhash_close(fSLTable);
+    uhash_close(fSATable);
+    uhash_close(fMLTable);
+    uhash_close(fMATable);
+    delete fKeySet;
+    delete fKeyVec;
+    delete fStringTable;
+    delete fStringLengthsTable;
+    delete fValueVec;
+    delete stringPool;
+}
+
+
+void ConfusabledataBuilder::buildConfusableData(SpoofImpl * spImpl, const char * confusables,
+    int32_t confusablesLen, int32_t *errorType, UParseError *pe, UErrorCode &status) {
+
+    if (U_FAILURE(status)) {
+        return;
+    }
+    ConfusabledataBuilder builder(spImpl, status);
+    builder.build(confusables, confusablesLen, status);
+    if (U_FAILURE(status) && errorType != NULL) {
+        *errorType = USPOOF_SINGLE_SCRIPT_CONFUSABLE;
+        pe->line = builder.fLineNum;
+    }
+}
+
+
+void ConfusabledataBuilder::build(const char * confusables, int32_t confusablesLen,
+               UErrorCode &status) {
+
+    // Convert the user input data from UTF-8 to UChar (UTF-16)
+    int32_t inputLen = 0;
+    if (U_FAILURE(status)) {
+        return;
+    }
+    u_strFromUTF8(NULL, 0, &inputLen, confusables, confusablesLen, &status);
+    if (status != U_BUFFER_OVERFLOW_ERROR) {
+        return;
+    }
+    status = U_ZERO_ERROR;
+    fInput = static_cast<UChar *>(uprv_malloc((inputLen+1) * sizeof(UChar)));
+    if (fInput == NULL) {
+        status = U_MEMORY_ALLOCATION_ERROR;
+    }
+    u_strFromUTF8(fInput, inputLen+1, NULL, confusables, confusablesLen, &status);
+
+
+    // Regular Expression to parse a line from Confusables.txt.  The expression will match
+    // any line.  What was matched is determined by examining which capture groups have a match.
+    //   Capture Group 1:  the source char
+    //   Capture Group 2:  the replacement chars
+    //   Capture Group 3-6  the table type, SL, SA, ML, or MA
+    //   Capture Group 7:  A blank or comment only line.
+    //   Capture Group 8:  A syntactically invalid line.  Anything that didn't match before.
+    // Example Line from the confusables.txt source file:
+    //   "1D702 ;	006E 0329 ;	SL	# MATHEMATICAL ITALIC SMALL ETA ... "
+    fParseLine = uregex_openC(
+        "(?m)^[ \\t]*([0-9A-Fa-f]+)[ \\t]+;"      // Match the source char
+        "[ \\t]*([0-9A-Fa-f]+"                    // Match the replacement char(s)
+           "(?:[ \\t]+[0-9A-Fa-f]+)*)[ \\t]*;"    //     (continued)
+        "\\s*(?:(SL)|(SA)|(ML)|(MA))"             // Match the table type
+        "[ \\t]*(?:#.*?)?$"                       // Match any trailing #comment
+        "|^([ \\t]*(?:#.*?)?)$"       // OR match empty lines or lines with only a #comment
+        "|^(.*?)$",                   // OR match any line, which catches illegal lines.
+        0, NULL, &status);
+        
+    // Regular expression for parsing a hex number out of a space-separated list of them.
+    //   Capture group 1 gets the number, with spaces removed.
+    fParseHexNum = uregex_openC("\\s*([0-9A-F]+)", 0, NULL, &status);
+
+    // Zap any Byte Order Mark at the start of input.  Changing it to a space is benign
+    //   given the syntax of the input.
+    if (*fInput == 0xfeff) {
+        *fInput = 0x20;
+    }
+
+    // Parse the input, one line per iteration of this loop.
+    uregex_setText(fParseLine, fInput, inputLen, &status);
+    while (uregex_findNext(fParseLine, &status)) {
+        fLineNum++;
+        if (uregex_start(fParseLine, 7, &status) >= 0) {
+            // this was a blank or comment line.
+            continue;
+        }
+        if (uregex_start(fParseLine, 8, &status) >= 0) {
+            // input file syntax error.
+            status = U_PARSE_ERROR;
+            return;
+        }
+
+        // We have a good input line.  Extract the key character and mapping string, and
+        //    put them into the appropriate mapping table.
+        UChar32 keyChar = SpoofImpl::ScanHex(fInput, uregex_start(fParseLine, 1, &status),
+                          uregex_end(fParseLine, 1, &status), status);
+                          
+        int32_t mapStringStart = uregex_start(fParseLine, 2, &status);
+        int32_t mapStringLength = uregex_end(fParseLine, 2, &status) - mapStringStart;
+        uregex_setText(fParseHexNum, &fInput[mapStringStart], mapStringLength, &status);
+        
+        UnicodeString  *mapString = new UnicodeString();
+        if (mapString == NULL) {
+            status = U_MEMORY_ALLOCATION_ERROR;
+            return;
+        }
+        while (uregex_findNext(fParseHexNum, &status)) {
+            UChar32 c = SpoofImpl::ScanHex(&fInput[mapStringStart], uregex_start(fParseHexNum, 1, &status),
+                                 uregex_end(fParseHexNum, 1, &status), status);
+            mapString->append(c);
+        }
+        U_ASSERT(mapString->length() >= 1);
+        
+        // Put the map (value) string into the string pool
+        // This a little like a Java intern() - any duplicates will be eliminated.
+        SPUString *smapString = stringPool->addString(mapString, status);
+        
+        // Add the UChar -> string mapping to the appropriate table.
+        UHashtable *table = uregex_start(fParseLine, 3, &status) >= 0 ? fSLTable :
+                            uregex_start(fParseLine, 4, &status) >= 0 ? fSATable :
+                            uregex_start(fParseLine, 5, &status) >= 0 ? fMLTable :
+                            uregex_start(fParseLine, 6, &status) >= 0 ? fMATable :
+                            NULL;
+        U_ASSERT(table != NULL);
+        uhash_iput(table, keyChar, smapString, &status);
+        fKeySet->add(keyChar);
+        if (U_FAILURE(status)) {
+            return;
+        }
+    }
+
+    // Input data is now all parsed and collected.
+    // Now create the run-time binary form of the data.
+    //
+    // This is done in two steps.  First the data is assembled into vectors and strings,
+    //   for ease of construction, then the contents of these collections are dumped
+    //   into the actual raw-bytes data storage.
+
+    // Build up the string array, and record the index of each string therein
+    //  in the (build time only) string pool.
+    // Strings of length one are not entered into the strings array.
+    // At the same time, build up the string lengths table, which records the
+    // position in the string table of the first string of each length >= 4.
+    // (Strings in the table are sorted by length)
+    stringPool->sort(status);
+    fStringTable = new UnicodeString();
+    fStringLengthsTable = new UVector(status);
+    int32_t previousStringLength = 0;
+    int32_t previousStringIndex  = 0;
+    int32_t poolSize = stringPool->size();
+    int32_t i;
+    for (i=0; i<poolSize; i++) {
+        SPUString *s = stringPool->getByIndex(i);
+        int32_t strLen = s->fStr->length();
+        int32_t strIndex = fStringTable->length();
+        U_ASSERT(strLen >= previousStringLength);
+        if (strLen == 1) {
+            // strings of length one do not get an entry in the string table.
+            // Keep the single string character itself here, which is the same
+            //  convention that is used in the final run-time string table index.
+            s->fStrTableIndex = s->fStr->charAt(0);
+        } else {
+            if ((strLen > previousStringLength) && (previousStringLength >= 4)) {
+                fStringLengthsTable->addElement(previousStringIndex, status);
+                fStringLengthsTable->addElement(previousStringLength, status);
+            }
+            s->fStrTableIndex = strIndex;
+            fStringTable->append(*(s->fStr));
+        }
+        previousStringLength = strLen;
+        previousStringIndex  = strIndex;
+    }
+    // Make the final entry to the string lengths table.
+    //   (it holds an entry for the _last_ string of each length, so adding the
+    //    final one doesn't happen in the main loop because no longer string was encountered.)
+    if (previousStringLength >= 4) {
+        fStringLengthsTable->addElement(previousStringIndex, status);
+        fStringLengthsTable->addElement(previousStringLength, status);
+    }
+
+    // Construct the compile-time Key and Value tables
+    //
+    // For each key code point, check which mapping tables it applies to,
+    //   and create the final data for the key & value structures.
+    //
+    //   The four logical mapping tables are conflated into one combined table.
+    //   If multiple logical tables have the same mapping for some key, they
+    //     share a single entry in the combined table.
+    //   If more than one mapping exists for the same key code point, multiple
+    //     entries will be created in the table
+
+    for (int32_t range=0; range<fKeySet->getRangeCount(); range++) {
+        // It is an oddity of the UnicodeSet API that simply enumerating the contained
+        //   code points requires a nested loop.
+        for (UChar32 keyChar=fKeySet->getRangeStart(range);
+                keyChar <= fKeySet->getRangeEnd(range); keyChar++) {
+            addKeyEntry(keyChar, fSLTable, USPOOF_SL_TABLE_FLAG, status);
+            addKeyEntry(keyChar, fSATable, USPOOF_SA_TABLE_FLAG, status);
+            addKeyEntry(keyChar, fMLTable, USPOOF_ML_TABLE_FLAG, status);
+            addKeyEntry(keyChar, fMATable, USPOOF_MA_TABLE_FLAG, status);
+        }
+    }
+
+    // Put the assembled data into the flat runtime array
+    outputData(status);
+
+    // All of the intermediate allocated data belongs to the ConfusabledataBuilder
+    //  object  (this), and is deleted in the destructor. 
+    return;
+}
+
+//
+// outputData     The confusable data has been compiled and stored in intermediate
+//                collections and strings.  Copy it from there to the final flat
+//                binary array.
+//
+//                Note that as each section is added to the output data, the
+//                expand (reserveSpace() function will likely relocate it in memory.
+//                Be careful with pointers.
+//
+void ConfusabledataBuilder::outputData(UErrorCode &status) {
+
+    U_ASSERT(fSpoofImpl->fSpoofData->fDataOwned == TRUE);
+    
+    //  The Key Table
+    //     While copying the keys to the runtime array,
+    //       also sanity check that they are sorted.
+    
+    int32_t numKeys = fKeyVec->size();
+    int32_t *keys =
+        static_cast<int32_t *>(fSpoofImpl->fSpoofData->reserveSpace(numKeys*sizeof(int32_t), status));
+    if (U_FAILURE(status)) {
+        return;
+    }
+    int i;
+    int32_t previousKey = 0;
+    for (i=0; i<numKeys; i++) {
+        int32_t key =  fKeyVec->elementAti(i);
+        U_ASSERT((key & 0x00ffffff) >= (previousKey & 0x00ffffff));
+        U_ASSERT((key & 0xff000000) != 0);
+        keys[i] = key;
+        previousKey = key;
+    }
+    SpoofDataHeader *rawData = fSpoofImpl->fSpoofData->fRawData;
+    rawData->fCFUKeys = (char *)keys - (char *)rawData;
+    rawData->fCFUKeysSize = numKeys;
+    fSpoofImpl->fSpoofData->fCFUKeys = keys;
+
+
+    // The Value Table, parallels the key table
+    int32_t numValues = fValueVec->size();
+    U_ASSERT(numKeys == numValues);
+    uint16_t *values =
+        static_cast<uint16_t *>(fSpoofImpl->fSpoofData->reserveSpace(numKeys*sizeof(uint16_t), status));
+    if (U_FAILURE(status)) {
+        return;
+    }
+    for (i=0; i<numValues; i++) {
+        uint32_t value = static_cast<uint32_t>(fValueVec->elementAti(i));
+        U_ASSERT(value < 0xffff);
+        values[i] = static_cast<uint16_t>(value);
+    }
+    rawData = fSpoofImpl->fSpoofData->fRawData;
+    rawData->fCFUStringIndex = (char *)values - (char *)rawData;
+    rawData->fCFUStringIndexSize = numValues;
+    fSpoofImpl->fSpoofData->fCFUValues = values;
+
+    // The Strings Table.
+    
+    uint32_t stringsLength = fStringTable->length();
+    // Reserve an extra space so the string will be nul-terminated.  This is
+    // only a convenience, for when debugging; it is not needed otherwise.
+    UChar *strings =
+        static_cast<UChar *>(fSpoofImpl->fSpoofData->reserveSpace(stringsLength*sizeof(UChar)+2, status));
+    if (U_FAILURE(status)) {
+        return;
+    }
+    fStringTable->extract(strings, stringsLength+1, status);
+    rawData = fSpoofImpl->fSpoofData->fRawData;
+    U_ASSERT(rawData->fCFUStringTable == 0);
+    rawData->fCFUStringTable = (char *)strings - (char *)rawData;
+    rawData->fCFUStringTableLen = stringsLength;
+    fSpoofImpl->fSpoofData->fCFUStrings = strings;
+    
+    // The String Lengths Table
+    //    While copying into the runtime array do some sanity checks on the values
+    //    Each complete entry contains two fields, an index and an offset.
+    //    Lengths should increase with each entry.
+    //    Offsets should be less than the size of the string table.
+    int32_t lengthTableLength = fStringLengthsTable->size();
+    uint16_t *stringLengths =
+        static_cast<uint16_t *>(fSpoofImpl->fSpoofData->reserveSpace(lengthTableLength*sizeof(uint16_t), status));
+    if (U_FAILURE(status)) {
+        return;
+    }
+    int32_t destIndex = 0;
+    uint32_t previousLength = 0;
+    for (i=0; i<lengthTableLength; i+=2) {
+        uint32_t offset = static_cast<uint32_t>(fStringLengthsTable->elementAti(i));
+        uint32_t length = static_cast<uint32_t>(fStringLengthsTable->elementAti(i+1));
+        U_ASSERT(offset < stringsLength);
+        U_ASSERT(length < 40);
+        U_ASSERT(length > previousLength);
+        stringLengths[destIndex++] = static_cast<uint16_t>(offset);
+        stringLengths[destIndex++] = static_cast<uint16_t>(length);
+        previousLength = length;
+    }
+    rawData = fSpoofImpl->fSpoofData->fRawData;
+    rawData->fCFUStringLengths = (char *)stringLengths - (char *)rawData;
+    // Note: StringLengthsSize in the raw data is the number of complete entries,
+    //       each consisting of a pair of 16 bit values, hence the divide by 2.
+    rawData->fCFUStringLengthsSize = lengthTableLength / 2;
+    fSpoofImpl->fSpoofData->fCFUStringLengths =
+        reinterpret_cast<SpoofStringLengthsElement *>(stringLengths);
+}
+
+
+    
+//  addKeyEntry   Construction of the confusable Key and Mapping Values tables.
+//                This is an intermediate point in the building process.
+//                We already have the mappings in the hash tables fSLTable, etc.
+//                This function builds corresponding run-time style table entries into
+//                  fKeyVec and fValueVec
+
+void ConfusabledataBuilder::addKeyEntry(
+    UChar32     keyChar,     // The key character
+    UHashtable *table,       // The table, one of SATable, MATable, etc.
+    int32_t     tableFlag,   // One of USPOOF_SA_TABLE_FLAG, etc.
+    UErrorCode &status) {
+
+    SPUString *targetMapping = static_cast<SPUString *>(uhash_iget(table, keyChar));
+    if (targetMapping == NULL) {
+        // No mapping for this key character.
+        //   (This function is called for all four tables for each key char that
+        //    is seen anywhere, so this no entry cases are very much expected.)
+        return;
+    }
+    
+    // Check whether there is already an entry with the correct mapping.
+    // If so, simply set the flag in the keyTable saying that the existing entry
+    // applies to the table that we're doing now.
+
+    UBool keyHasMultipleValues = FALSE;
+    int32_t i;
+    for (i=fKeyVec->size()-1; i>=0 ; i--) {
+        int32_t key = fKeyVec->elementAti(i);
+        if ((key & 0x0ffffff) != keyChar) {
+            // We have now checked all existing key entries for this key char (if any)
+            //  without finding one with the same mapping.
+            break;
+        }
+        UnicodeString mapping = getMapping(i);
+        if (mapping == *(targetMapping->fStr)) {
+            // The run time entry we are currently testing has the correct mapping.
+            // Set the flag in it indicating that it applies to the new table also.
+            key |= tableFlag;
+            fKeyVec->setElementAt(key, i);
+            return;
+        }
+        keyHasMultipleValues = TRUE;
+    }
+
+    // Need to add a new entry to the binary data being built for this mapping.
+    // Includes adding entries to both the key table and the parallel values table.
+
+    int32_t newKey = keyChar | tableFlag;
+    if (keyHasMultipleValues) {
+        newKey |= USPOOF_KEY_MULTIPLE_VALUES;
+    }
+    int32_t adjustedMappingLength = targetMapping->fStr->length() - 1;
+    if (adjustedMappingLength>3) {
+        adjustedMappingLength = 3;
+    }
+    newKey |= adjustedMappingLength << USPOOF_KEY_LENGTH_SHIFT;
+    
+    int32_t newData = targetMapping->fStrTableIndex;
+
+    fKeyVec->addElement(newKey, status);
+    fValueVec->addElement(newData, status);
+
+    // If the preceding key entry is for the same key character (but with a different mapping)
+    //   set the multiple-values flag on it.
+    if (keyHasMultipleValues) {
+        int32_t previousKeyIndex = fKeyVec->size() - 2;
+        int32_t previousKey = fKeyVec->elementAti(previousKeyIndex);
+        previousKey |= USPOOF_KEY_MULTIPLE_VALUES;
+        fKeyVec->setElementAt(previousKey, previousKeyIndex);
+    }
+}
+
+
+
+UnicodeString ConfusabledataBuilder::getMapping(int32_t index) {
+    int32_t key = fKeyVec->elementAti(index);
+    int32_t value = fValueVec->elementAti(index);
+    int32_t length = USPOOF_KEY_LENGTH_FIELD(key);
+    int32_t lastIndexWithLen;
+    switch (length) {
+      case 0:
+        return UnicodeString(static_cast<UChar>(value));
+      case 1:
+      case 2:
+        return UnicodeString(*fStringTable, value, length+1);
+      case 3:
+        length = 0;
+        int32_t i;
+        for (i=0; i<fStringLengthsTable->size(); i+=2) {
+            lastIndexWithLen = fStringLengthsTable->elementAti(i);
+            if (value <= lastIndexWithLen) {
+                length = fStringLengthsTable->elementAti(i+1);
+                break;
+            }
+        }
+        U_ASSERT(length>=3);
+        return UnicodeString(*fStringTable, value, length);
+      default:
+        U_ASSERT(FALSE);
+    }
+    return UnicodeString();
+}
--- a/icu4c/source/i18n/uspoof_buildconf.h
+++ b/icu4c/source/i18n/uspoof_buildconf.h
@ -0,0 +1,123 @@
+/*
+******************************************************************************
+*
+*   Copyright (C) 2008-2009, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+******************************************************************************
+*   file name:  uspoof_buildconf.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2009Jan05
+*   created by: Andy Heninger
+*
+*   Internal classes for compiling confusable data into its binary (runtime) form.
+*/
+
+#ifndef __USPOOF_BUILDCONF_H__
+#define __USPOOF_BUILDCONF_H__
+
+#include "uspoof_impl.h"
+
+// SPUString
+//              Holds a string that is the result of one of the mappings defined
+//              by the confusable mapping data (confusables.txt from Unicode.org)
+//              Instances of SPUString exist during the compilation process only.
+
+struct SPUString : public UMemory {
+    UnicodeString  *fStr;             // The actual string.
+    int32_t         fStrTableIndex;   // Index into the final runtime data for this string.
+                                      //  (or, for length 1, the single string char itself,
+                                      //   there being no string table entry for it.)
+    SPUString(UnicodeString *s);
+    ~SPUString();
+};
+
+
+//  String Pool   A utility class for holding the strings that are the result of
+//                the spoof mappings.  These strings will utimately end up in the
+//                run-time String Table.
+//                This is sort of like a sorted set of strings, except that ICU's anemic
+//                built-in collections don't support those, so it is implemented with a
+//                combination of a uhash and a UVector.
+
+
+class SPUStringPool : public UMemory {
+  public:
+    SPUStringPool(UErrorCode &status);
+    ~SPUStringPool();
+    
+    // Add a string. Return the string from the table.
+    // If the input parameter string is already in the table, delete the
+    //  input parameter and return the existing string.
+    SPUString *addString(UnicodeString *src, UErrorCode &status);
+
+
+    // Get the n-th string in the collection.
+    SPUString *getByIndex(int32_t i);
+
+    // Sort the contents; affects the ordering of getByIndex().
+    void sort(UErrorCode &status);
+
+    int32_t size();
+
+  private:
+    UVector     *fVec;    // Elements are SPUString *
+    UHashtable  *fHash;   // Key: UnicodeString  Value: SPUString
+};
+
+
+// class ConfusabledataBuilder
+//     An instance of this class exists while the confusable data is being built from source.
+//     It encapsulates the intermediate data structures that are used for building.
+//     It exports one static function, to do a confusable data build.
+
+class ConfusabledataBuilder : public UMemory {
+  private:
+    SpoofImpl  *fSpoofImpl;
+    UChar      *fInput;
+    UHashtable *fSLTable;
+    UHashtable *fSATable; 
+    UHashtable *fMLTable; 
+    UHashtable *fMATable;
+    UnicodeSet *fKeySet;     // A set of all keys (UChar32s) that go into the four mapping tables.
+
+    // The binary data is first assembled into the following four collections, then
+    //   copied to its final raw-memory destination.
+    UVector            *fKeyVec;
+    UVector            *fValueVec;
+    UnicodeString      *fStringTable;
+    UVector            *fStringLengthsTable;
+    
+    SPUStringPool      *stringPool;
+    URegularExpression *fParseLine;
+    URegularExpression *fParseHexNum;
+    int32_t             fLineNum;
+
+    ConfusabledataBuilder(SpoofImpl *spImpl, UErrorCode &status);
+    ~ConfusabledataBuilder();
+    void build(const char * confusables, int32_t confusablesLen, UErrorCode &status);
+
+    // Add an entry to the key and value tables being built
+    //   input:  data from SLTable, MATable, etc.
+    //   outut:  entry added to fKeyVec and fValueVec
+    void addKeyEntry(UChar32     keyChar,     // The key character
+                     UHashtable *table,       // The table, one of SATable, MATable, etc.
+                     int32_t     tableFlag,   // One of USPOOF_SA_TABLE_FLAG, etc.
+                     UErrorCode &status);
+
+    // From an index into fKeyVec & fValueVec
+    //   get a UnicodeString with the corresponding mapping.
+    UnicodeString getMapping(int32_t key);
+
+    // Populate the final binary output data array with the compiled data.
+    void outputData(UErrorCode &status);
+
+  public:
+    static void buildConfusableData(SpoofImpl *spImpl, const char * confusables,
+        int32_t confusablesLen, int32_t *errorType, UParseError *pe, UErrorCode &status);
+};
+
+#endif
--- a/icu4c/source/i18n/uspoof_buildwsconf.cpp
+++ b/icu4c/source/i18n/uspoof_buildwsconf.cpp
@ -0,0 +1,431 @@
+/*
+******************************************************************************
+*
+*   Copyright (C) 2008-2009, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+******************************************************************************
+*   file name:  uspoof_buildwsconf.cpp
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2009Jan05  (refactoring earlier files)
+*   created by: Andy Heninger
+*
+*   Internal functions for compililing Whole Script confusable source data
+*   into its binary (runtime) form.  The binary data format is described
+*   in uspoof_impl.h
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/uspoof.h"
+#include "unicode/unorm.h"
+#include "unicode/uregex.h"
+#include "unicode/ustring.h"
+#include "cmemory.h"
+#include "uspoof_impl.h"
+#include "uhash.h"
+#include "uvector.h"
+#include "uassert.h"
+#include "uspoof_buildwsconf.h"
+
+
+//#include <stdio.h>       // TODO:  debug.  remove.
+U_NAMESPACE_USE
+
+
+// Regular expression for parsing a line from the Unicode file confusablesWholeScript.txt
+// Example Lines:
+//   006F          ; Latn; Deva; A #      (o)  LATIN SMALL LETTER O
+//   0048..0049    ; Latn; Grek; A #  [2] (H..I)  LATIN CAPITAL LETTER H..LATIN CAPITAL LETTER I
+//    |               |     |    |
+//    |               |     |    |---- Which table, Any Case or Lower Case (A or L)
+//    |               |     |----------Target script.   We need this.
+//    |               |----------------Src script.  Should match the script of the source
+//    |                                code points.  Beyond checking that, we don't keep it.
+//    |--------------------------------Source code points or range.
+//
+// The expression will match _all_ lines, including erroneous lines.
+// The result of the parse is returned via the contents of the (match) groups.
+static const char *parseExp = 
+        
+        "(?m)"                                         // Multi-line mode
+        "^([ \\t]*(?:#.*?)?)$"                         // A blank or comment line.  Matches Group 1.
+        "|^(?:"                                        //   OR
+        "\\s*([0-9A-F]{4,})(?:..([0-9A-F]{4,}))?\\s*;" // Code point range.  Groups 2 and 3.
+        "\\s*([A-Za-z]+)\\s*;"                         // The source script.  Group 4.
+        "\\s*([A-Za-z]+)\\s*;"                         // The target script.  Group 5.
+        "\\s*(?:(A)|(L))"                              // The table A or L.   Group 6 or 7
+        "[ \\t]*(?:#.*?)?"                             // Trailing commment
+        ")$|"                                          //   OR
+        "^(.*?)$";                                     // An error line.      Group 8.
+                                                       //    Any line not matching the preceding
+                                                       //    parts of the expression.will match
+                                                       //    this, and thus be flagged as an error
+
+
+// Extract a regular expression match group into a char * string.
+//    The group must contain only invariant characters.
+//    Used for script names
+// 
+static void extractGroup(
+    URegularExpression *e, int32_t group, char *destBuf, int32_t destCapacity, UErrorCode &status) {
+
+    UChar ubuf[50];
+    ubuf[0] = 0;
+    destBuf[0] = 0;
+    int32_t len = uregex_group(e, group, ubuf, 50, &status);
+    if (U_FAILURE(status) || len == -1 || len >= destCapacity) {
+        return;
+    }
+    UnicodeString s(FALSE, ubuf, len);   // Aliasing constructor
+    s.extract(0, len, destBuf, destCapacity, US_INV);
+}
+
+
+
+//  Build the Whole Script Confusable data
+//
+//     TODO:  Reorganize.  Either get rid of the WSConfusableDataBuilder class,
+//                         because everything is local to this one build function anyhow,
+//                           OR
+//                         break this function into more reasonably sized pieces, with
+//                         state in WSConfusableDataBuilder.
+//
+void buildWSConfusableData(SpoofImpl *spImpl, const char * confusablesWS,
+          int32_t confusablesWSLen, UParseError *pe, UErrorCode &status) 
+{
+    if (U_FAILURE(status)) {
+        return;
+    }
+    URegularExpression *parseRegexp = NULL;
+    int32_t             inputLen    = 0;
+    UChar              *input       = NULL;
+    int32_t             lineNum     = 0;
+    
+    UVector            *scriptSets        = NULL;
+    uint32_t            rtScriptSetsCount = 2;
+
+    UTrie2             *anyCaseTrie   = NULL;
+    UTrie2             *lowerCaseTrie = NULL;
+
+    anyCaseTrie = utrie2_open(0, 0, &status);
+    lowerCaseTrie = utrie2_open(0, 0, &status);
+    
+
+    // The scriptSets vector provides a mapping from TRIE values to the set of scripts.
+    //
+    // Reserved TRIE values:
+    //   0:  Code point has no whole script confusables.
+    //   1:  Code point is of script Common or Inherited.
+    //       These code points do not participate in whole script confusable detection.
+    //       (This is logically equivalent to saying that they contain confusables in
+    //        all scripts)
+    //
+    // Because Trie values are indexes into the ScriptSets vector, pre-fill
+    // vector positions 0 and 1 to avoid conflicts with the reserved values.
+    
+    scriptSets = new UVector(status);
+    if (scriptSets == NULL) {
+        status = U_MEMORY_ALLOCATION_ERROR;
+        goto cleanup;
+    }
+    scriptSets->addElement((void *)NULL, status);
+    scriptSets->addElement((void *)NULL, status);
+
+    // Convert the user input data from UTF-8 to UChar (UTF-16)
+    u_strFromUTF8(NULL, 0, &inputLen, confusablesWS, confusablesWSLen, &status);
+    if (status != U_BUFFER_OVERFLOW_ERROR) {
+        goto cleanup;
+    }
+    status = U_ZERO_ERROR;
+    input = static_cast<UChar *>(uprv_malloc((inputLen+1) * sizeof(UChar)));
+    if (input == NULL) {
+        status = U_MEMORY_ALLOCATION_ERROR;
+        goto cleanup;
+    }
+    u_strFromUTF8(input, inputLen+1, NULL, confusablesWS, confusablesWSLen, &status);
+
+
+
+    parseRegexp = uregex_openC(parseExp, 0, NULL, &status);
+    
+    // Zap any Byte Order Mark at the start of input.  Changing it to a space is benign
+    //   given the syntax of the input.
+    if (*input == 0xfeff) {
+        *input = 0x20;
+    }
+
+    // Parse the input, one line per iteration of this loop.
+    uregex_setText(parseRegexp, input, inputLen, &status);
+    while (uregex_findNext(parseRegexp, &status)) {
+        lineNum++;
+        UChar  line[200];
+        uregex_group(parseRegexp, 0, line, 200, &status);
+        if (uregex_start(parseRegexp, 1, &status) >= 0) {
+            // this was a blank or comment line.
+            continue;
+        }
+        if (uregex_start(parseRegexp, 8, &status) >= 0) {
+            // input file syntax error.
+            status = U_PARSE_ERROR;
+            goto cleanup;
+        }
+        if (U_FAILURE(status)) {
+            goto cleanup;
+        }
+
+        // Pick up the start and optional range end code points from the parsed line.
+        UChar32  startCodePoint = SpoofImpl::ScanHex(
+            input, uregex_start(parseRegexp, 2, &status), uregex_end(parseRegexp, 2, &status), status);
+        UChar32  endCodePoint = startCodePoint;
+        if (uregex_start(parseRegexp, 3, &status) >=0) {
+            endCodePoint = SpoofImpl::ScanHex(
+                input, uregex_start(parseRegexp, 3, &status), uregex_end(parseRegexp, 3, &status), status);
+        }
+
+        // Extract the two script names from the source line.  We need these in an 8 bit
+        //   default encoding (will be EBCDIC on IBM mainframes) in order to pass them on
+        //   to the ICU u_getPropertyValueEnum() function.  Ugh.
+        char  srcScriptName[20];
+        char  targScriptName[20];
+        extractGroup(parseRegexp, 4, srcScriptName, sizeof(srcScriptName), status);
+        extractGroup(parseRegexp, 5, targScriptName, sizeof(targScriptName), status);
+        UScriptCode srcScript  =
+            static_cast<UScriptCode>(u_getPropertyValueEnum(UCHAR_SCRIPT, srcScriptName));
+        UScriptCode targScript =
+            static_cast<UScriptCode>(u_getPropertyValueEnum(UCHAR_SCRIPT, targScriptName));
+        if (U_FAILURE(status)) {
+            goto cleanup;
+        }
+        if (srcScript == USCRIPT_INVALID_CODE || targScript == USCRIPT_INVALID_CODE) {
+            status = U_INVALID_FORMAT_ERROR;
+            goto cleanup;
+        }
+
+        // select the table - (A) any case or (L) lower case only
+        UTrie2 *table = anyCaseTrie;
+        if (uregex_start(parseRegexp, 7, &status) >= 0) {
+            table = lowerCaseTrie;
+        }
+
+        // Build the set of scripts containing confusable characters for
+        //   the code point(s) specified in this input line.
+        // Sanity check that the script of the source code point is the same
+        //   as the source script indicated in the input file.  Failure of this check is
+        //   an error in the input file.
+        // Include the source script in the set (needed for Mixed Script Confusable detection).
+        //
+        UChar32 cp;
+        for (cp=startCodePoint; cp<=endCodePoint; cp++) {
+            int32_t setIndex = utrie2_get32(table, cp);
+            BuilderScriptSet *bsset = NULL;
+            if (setIndex > 0) {
+                U_ASSERT(setIndex < scriptSets->size());
+                bsset = static_cast<BuilderScriptSet *>(scriptSets->elementAt(setIndex));
+            } else {
+                bsset = new BuilderScriptSet();
+                if (bsset == NULL) {
+                    status = U_MEMORY_ALLOCATION_ERROR;
+                    goto cleanup;
+                }
+                bsset->codePoint = cp;
+                bsset->trie = table;
+                bsset->sset = new ScriptSet();
+                setIndex = scriptSets->size();
+                bsset->index = setIndex;
+                bsset->rindex = 0;
+                if (bsset->sset == NULL) {
+                    status = U_MEMORY_ALLOCATION_ERROR;
+                    goto cleanup;
+                }
+                scriptSets->addElement(bsset, status);
+                utrie2_set32(table, cp, setIndex, &status);
+            }
+            bsset->sset->Union(targScript);
+            bsset->sset->Union(srcScript);
+
+            if (U_FAILURE(status)) {
+                goto cleanup;
+            }
+            UScriptCode cpScript = uscript_getScript(cp, &status);
+            if (cpScript != srcScript) {
+                status = U_INVALID_FORMAT_ERROR;
+                goto cleanup;
+            }
+        }
+    }
+
+    // Eliminate duplicate script sets.  At this point we have a separate
+    // script set for every code point that had data in the input file.
+    //
+    // We eliminate underlying ScriptSet objects, not the BuildScriptSets that wrap them
+    //
+    // printf("Number of scriptSets: %d\n", scriptSets->size());
+    {
+        int32_t duplicateCount = 0;
+        rtScriptSetsCount = 2;
+        for (int32_t outeri=2; outeri<scriptSets->size(); outeri++) {
+            BuilderScriptSet *outerSet = static_cast<BuilderScriptSet *>(scriptSets->elementAt(outeri));
+            if (outerSet->index != static_cast<uint32_t>(outeri)) {
+                // This set was already identified as a duplicate.
+                //   It will not be allocated a position in the runtime array of ScriptSets.
+                continue;
+            }
+            outerSet->rindex = rtScriptSetsCount++;
+            for (int32_t inneri=outeri+1; inneri<scriptSets->size(); inneri++) {
+                BuilderScriptSet *innerSet = static_cast<BuilderScriptSet *>(scriptSets->elementAt(inneri));
+                if (*(outerSet->sset) == *(innerSet->sset) && outerSet->sset != innerSet->sset) {
+                    delete innerSet->sset;
+                    innerSet->scriptSetOwned = FALSE;
+                    innerSet->sset = outerSet->sset;
+                    innerSet->index = outeri;
+                    innerSet->rindex = outerSet->rindex;
+                    duplicateCount++;
+                }
+                // But this doesn't get all.  We need to fix the TRIE.
+            }
+        }
+        // printf("Number of distinct script sets: %d\n", rtScriptSetsCount);
+    }
+
+    
+
+    // Update the Trie values to be reflect the run time script indexes (after duplicate merging).
+    //    (Trie Values 0 and 1 are reserved, and the corresponding slots in scriptSets
+    //     are unused, which is why the loop index starts at 2.)
+    {
+        for (int32_t i=2; i<scriptSets->size(); i++) {
+            BuilderScriptSet *bSet = static_cast<BuilderScriptSet *>(scriptSets->elementAt(i));
+            if (bSet->rindex != (uint32_t)i) {
+                utrie2_set32(bSet->trie, bSet->codePoint, bSet->rindex, &status);
+            }
+        }
+    }
+
+    // For code points with script==Common or script==Inherited,
+    //   Set the reserved value of 1 into both Tries.  These characters do not participate
+    //   in Whole Script Confusable detection; this reserved value is the means
+    //   by which they are detected.
+    {
+        UnicodeSet ignoreSet;
+        ignoreSet.applyIntPropertyValue(UCHAR_SCRIPT, USCRIPT_COMMON, status);
+        UnicodeSet inheritedSet;
+        inheritedSet.applyIntPropertyValue(UCHAR_SCRIPT, USCRIPT_INHERITED, status);
+        ignoreSet.addAll(inheritedSet);
+        for (int32_t rn=0; rn<ignoreSet.getRangeCount(); rn++) {
+            UChar32 rangeStart = ignoreSet.getRangeStart(rn);
+            UChar32 rangeEnd   = ignoreSet.getRangeEnd(rn);
+            utrie2_setRange32(anyCaseTrie,   rangeStart, rangeEnd, 1, TRUE, &status);
+            utrie2_setRange32(lowerCaseTrie, rangeStart, rangeEnd, 1, TRUE, &status);
+        }
+    }
+
+    // Serialize the data to the Spoof Detector
+    {
+        utrie2_freeze(anyCaseTrie,   UTRIE2_16_VALUE_BITS, &status);
+        int32_t size = utrie2_serialize(anyCaseTrie, NULL, 0, &status);
+        // printf("Any case Trie size: %d\n", size);
+        if (status != U_BUFFER_OVERFLOW_ERROR) {
+            goto cleanup;
+        }
+        status = U_ZERO_ERROR;
+        spImpl->fSpoofData->fRawData->fAnyCaseTrie = spImpl->fSpoofData->fMemLimit;
+        spImpl->fSpoofData->fRawData->fAnyCaseTrieLength = size;
+        spImpl->fSpoofData->fAnyCaseTrie = anyCaseTrie;
+        void *where = spImpl->fSpoofData->reserveSpace(size, status);
+        utrie2_serialize(anyCaseTrie, where, size, &status);
+        
+        utrie2_freeze(lowerCaseTrie, UTRIE2_16_VALUE_BITS, &status);
+        size = utrie2_serialize(lowerCaseTrie, NULL, 0, &status);
+        // printf("Lower case Trie size: %d\n", size);
+        if (status != U_BUFFER_OVERFLOW_ERROR) {
+            goto cleanup;
+        }
+        status = U_ZERO_ERROR;
+        spImpl->fSpoofData->fRawData->fLowerCaseTrie = spImpl->fSpoofData->fMemLimit;
+        spImpl->fSpoofData->fRawData->fLowerCaseTrieLength = size;
+        spImpl->fSpoofData->fLowerCaseTrie = lowerCaseTrie;
+        where = spImpl->fSpoofData->reserveSpace(size, status);
+        utrie2_serialize(lowerCaseTrie, where, size, &status);
+
+        spImpl->fSpoofData->fRawData->fScriptSets = spImpl->fSpoofData->fMemLimit;
+        spImpl->fSpoofData->fRawData->fScriptSetsLength = rtScriptSetsCount;
+        ScriptSet *rtScriptSets =  static_cast<ScriptSet *>
+            (spImpl->fSpoofData->reserveSpace(rtScriptSetsCount * sizeof(ScriptSet), status));
+        uint32_t rindex = 2;
+        for (int32_t i=2; i<scriptSets->size(); i++) {
+            BuilderScriptSet *bSet = static_cast<BuilderScriptSet *>(scriptSets->elementAt(i));
+            if (bSet->rindex < rindex) {
+                // We have already copied this script set to the serialized data.
+                continue;
+            }
+            U_ASSERT(rindex == bSet->rindex);
+            rtScriptSets[rindex] = *bSet->sset;   // Assignment of a ScriptSet just copies the bits.
+            rindex++;
+        }
+    }
+
+    // Open new utrie2s from the serialized data.  We don't want to keep the ones
+    //   we just built because we would then have two copies of the data, one internal to
+    //   the utries that we have already constructed, and one in the serialized data area.
+    //   An alternative would be to not pre-serialize the Trie data, but that makes the
+    //   spoof detector data different, depending on how the detector was constructed.
+    //   It's simpler to keep the data always the same.
+    
+    spImpl->fSpoofData->fAnyCaseTrie = utrie2_openFromSerialized(
+            UTRIE2_16_VALUE_BITS,
+            (const char *)spImpl->fSpoofData->fRawData + spImpl->fSpoofData->fRawData->fAnyCaseTrie,
+            spImpl->fSpoofData->fRawData->fAnyCaseTrieLength,
+            NULL,
+            &status);
+
+    spImpl->fSpoofData->fLowerCaseTrie = utrie2_openFromSerialized(
+            UTRIE2_16_VALUE_BITS,
+            (const char *)spImpl->fSpoofData->fRawData + spImpl->fSpoofData->fRawData->fLowerCaseTrie,
+            spImpl->fSpoofData->fRawData->fAnyCaseTrieLength,
+            NULL,
+            &status);
+
+    
+
+cleanup:
+    if (U_FAILURE(status)) {
+        pe->line = lineNum;
+    }
+    uregex_close(parseRegexp);
+    uprv_free(input);
+
+    int32_t i;
+    for (i=0; i<scriptSets->size(); i++) {
+        BuilderScriptSet *bsset = static_cast<BuilderScriptSet *>(scriptSets->elementAt(i));
+        delete bsset;
+    }
+    delete scriptSets;
+    utrie2_close(anyCaseTrie);
+    utrie2_close(lowerCaseTrie);
+    return;
+}
+
+
+
+
+
+BuilderScriptSet::BuilderScriptSet() {
+    codePoint = -1;
+    trie = NULL;
+    sset = NULL;
+    index = 0;
+    rindex = 0;
+    scriptSetOwned = TRUE;
+}
+
+BuilderScriptSet::~BuilderScriptSet() {
+    if (scriptSetOwned) {
+        delete sset;
+    }
+}
+
+
+
--- a/icu4c/source/i18n/uspoof_buildwsconf.h
+++ b/icu4c/source/i18n/uspoof_buildwsconf.h
@ -0,0 +1,56 @@
+/*
+******************************************************************************
+*
+*   Copyright (C) 2008-2009, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+******************************************************************************
+*   file name:  uspoof_buildwsconf.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2009Jan19
+*   created by: Andy Heninger
+*
+*   Internal classes and functions
+*   for compiling whole script confusable data into its binary (runtime) form.
+*/
+
+#ifndef __USPOOF_BUILDWSCONF_H__
+#define __USPOOF_BUILDWSCONF_H__
+
+#include "uspoof_impl.h"
+#include "utrie2.h"
+
+//
+// class BuilderScriptSet.   Represents the set of scripts (Script Codes)
+//             containing characters that are confusable with one specific
+//             code point.
+//
+class BuilderScriptSet: public UMemory {
+  public:
+    UChar32      codePoint;       // The source code point.
+    UTrie2      *trie;            // Any-case or Lower-case Trie.
+                                  //   These Trie tables are the final result of the
+                                  //   build.  This flag indicates which of the two
+                                  //   this set of data is for.
+    ScriptSet   *sset;            // The set of scripts itself.
+
+                                  // Vectors of all B
+    uint32_t     index;           // Index of this set in the Build Time vector
+                                  //   of script sets.
+    uint32_t     rindex;          // Index of this set in the final (runtime)
+                                  //   array of sets.
+    UBool        scriptSetOwned;  // True if this BuilderScriptSet owns (should delete)
+                                  //   its underlying sset.
+
+    BuilderScriptSet();
+    ~BuilderScriptSet();
+};
+
+void buildWSConfusableData(SpoofImpl *spImpl, const char * confusablesWS,
+          int32_t confusablesWSLen, UParseError *pe, UErrorCode &status); 
+
+
+#endif
--- a/icu4c/source/i18n/uspoof_impl.cpp
+++ b/icu4c/source/i18n/uspoof_impl.cpp
@ -0,0 +1,841 @@
+/*
+**********************************************************************
+*   Copyright (C) 2008-2009, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+**********************************************************************
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/uspoof.h"
+#include "unicode/unorm.h"
+#include "utrie2.h"
+#include "cmemory.h"
+#include "udatamem.h"
+#include "umutex.h"
+#include "udataswp.h"
+#include "uassert.h"
+#include "uspoof_impl.h"
+
+
+U_NAMESPACE_BEGIN
+
+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(SpoofImpl)
+
+SpoofImpl::SpoofImpl(SpoofData *data, UErrorCode &status) :
+    fMagic(0), fSpoofData(NULL), fAllowedCharsSet(NULL) {
+    if (U_FAILURE(status)) {
+        return;
+    }
+	fMagic = USPOOF_MAGIC;
+	fSpoofData = data;
+	fChecks = USPOOF_ALL_CHECKS;
+    UnicodeSet *allowedCharsSet = new UnicodeSet(0, 0x10ffff);
+    if (allowedCharsSet == NULL) {
+        status = U_MEMORY_ALLOCATION_ERROR;
+    }
+    allowedCharsSet->freeze();
+    fAllowedCharsSet = allowedCharsSet;
+}
+
+
+SpoofImpl::SpoofImpl() {
+    fMagic = USPOOF_MAGIC;
+    fSpoofData = NULL;
+    fChecks = USPOOF_ALL_CHECKS;
+    UnicodeSet *allowedCharsSet = new UnicodeSet(0, 0x10ffff);
+    allowedCharsSet->freeze();
+    fAllowedCharsSet = allowedCharsSet;
+}
+
+
+// Copy Constructor, used by the user level clone() function.
+SpoofImpl::SpoofImpl(const SpoofImpl &src, UErrorCode &status)  :
+    fMagic(0), fSpoofData(NULL), fAllowedCharsSet(NULL) {
+    if (U_FAILURE(status)) {
+        return;
+    }
+    fMagic = src.fMagic;
+    fChecks = src.fChecks;
+    if (src.fSpoofData != NULL) {
+        fSpoofData = src.fSpoofData->addReference();
+    }
+    fCheckMask = src.fCheckMask;
+    fAllowedCharsSet = static_cast<const UnicodeSet *>(src.fAllowedCharsSet->clone());
+    if (fAllowedCharsSet == NULL) {
+        status = U_MEMORY_ALLOCATION_ERROR;
+    }
+}
+
+SpoofImpl::~SpoofImpl() {
+	fMagic = 0;                // head off application errors by preventing use of
+	                           //    of deleted objects.
+	if (fSpoofData != NULL) {
+	    fSpoofData->removeReference();   // Will delete if refCount goes to zero.
+	}
+    delete fAllowedCharsSet;
+}
+
+//
+//  Incoming parameter check on Status and the SpoofChecker object
+//    received from the C API.
+//
+const SpoofImpl *SpoofImpl::validateThis(const USpoofChecker *sc, UErrorCode &status) {
+    if (U_FAILURE(status)) {
+        return NULL;
+    }
+    if (sc == NULL) {
+        status = U_ILLEGAL_ARGUMENT_ERROR;
+        return NULL;
+    };
+    SpoofImpl *This = (SpoofImpl *)sc;
+    if (This->fMagic != USPOOF_MAGIC ||
+        This->fSpoofData == NULL) {
+        status = U_INVALID_FORMAT_ERROR;
+        return NULL;
+    }
+    if (!SpoofData::validateDataVersion(This->fSpoofData->fRawData, status)) {
+        return NULL;
+    }
+    return This;
+}
+
+SpoofImpl *SpoofImpl::validateThis(USpoofChecker *sc, UErrorCode &status) {
+    return const_cast<SpoofImpl *>
+        (SpoofImpl::validateThis(const_cast<const USpoofChecker *>(sc), status));
+}
+
+
+
+//--------------------------------------------------------------------------------------
+//
+//  confusableLookup()    This is the heart of the confusable skeleton generation
+//                        implementation.
+//
+//                        Given a source character, produce the corresponding
+//                        replacement character(s)
+//
+//---------------------------------------------------------------------------------------
+int32_t SpoofImpl::confusableLookup(UChar32 inChar, int32_t tableMask, UChar *destBuf) const {
+
+    // Binary search the spoof data key table for the inChar
+    int32_t  *low   = fSpoofData->fCFUKeys;
+    int32_t  *mid   = NULL;
+    int32_t  *limit = low + fSpoofData->fRawData->fCFUKeysSize;
+    UChar     midc;
+    do {
+        int32_t delta = (limit-low)/2;
+        mid = low + delta;
+        midc = *mid & 0x1fffff;
+        if (inChar == midc) {
+            goto foundChar;
+        } else if (inChar < midc) {
+            limit = mid;
+        } else {
+            low = mid;
+        }
+    } while (low < limit-1);
+    mid = low;
+    midc = *mid & 0x1fffff;
+    if (inChar != midc) {
+        // Char not found.  It maps to itself.
+        int i = 0;
+        U16_APPEND_UNSAFE(destBuf, i, inChar)
+        return i;
+    } 
+  foundChar:
+    int32_t keyFlags = *mid & 0xff000000;
+    if ((keyFlags & tableMask) == 0) {
+        // We found the right key char, but the entry doesn't pertain to the
+        //  table we need.  See if there is an adjacent key that does
+        if (keyFlags & USPOOF_KEY_MULTIPLE_VALUES) {
+            int32_t *altMid;
+            for (altMid = mid-1; (*altMid&0x00ffffff) == inChar; altMid--) {
+                keyFlags = *altMid & 0xff000000;
+                if (keyFlags & tableMask) {
+                    mid = altMid;
+                    goto foundKey;
+                }
+            }
+            for (altMid = mid+1; (*altMid&0x00ffffff) == inChar; altMid++) {
+                keyFlags = *altMid & 0xff000000;
+                if (keyFlags & tableMask) {
+                    mid = altMid;
+                    goto foundKey;
+                }
+            }
+        }
+        // No key entry for this char & table.
+        // The input char maps to itself.
+        int i = 0;
+        U16_APPEND_UNSAFE(destBuf, i, inChar)
+        return i;
+    }
+
+  foundKey:
+    int32_t  stringLen = USPOOF_KEY_LENGTH_FIELD(keyFlags) + 1;
+    int32_t keyTableIndex = mid - fSpoofData->fCFUKeys;
+
+    // Value is either a UChar  (for strings of length 1) or
+    //                 an index into the string table (for longer strings)
+    uint16_t value = fSpoofData->fCFUValues[keyTableIndex];
+    if (stringLen == 1) {
+        destBuf[0] = value;
+        return 1;
+    }
+
+    // String length of 4 from the above lookup is used for all strings of length >= 4.
+    // For these, get the real length from the string lengths table,
+    //   which maps string table indexes to lengths.
+    //   All strings of the same length are stored contiguously in the string table.
+    //   'value' from the lookup above is the starting index for the desired string.
+
+    int32_t ix;
+    if (stringLen == 4) {
+        // TODO:
+        int32_t stringLengthsLimit = fSpoofData->fRawData->fCFUStringLengthsSize;
+        for (ix = 0; ix < stringLengthsLimit; ix++) {
+            if (fSpoofData->fCFUStringLengths[ix].fLastString >= value) {
+                stringLen = fSpoofData->fCFUStringLengths[ix].fStrLength;
+                break;
+            }
+        }
+        U_ASSERT(ix < stringLengthsLimit);
+    }
+
+    U_ASSERT(value + stringLen < fSpoofData->fRawData->fCFUStringTableLen);
+    UChar *src = &fSpoofData->fCFUStrings[value];
+    for (ix=0; ix<stringLen; ix++) {
+        destBuf[ix] = src[ix];
+    }
+    return stringLen;
+}
+
+
+//---------------------------------------------------------------------------------------
+//
+//  wholeScriptCheck()
+//
+//      Input text is already normalized to NFKD
+//      Return the set of scripts, each of which can represent something that is
+//             confusable with the input text.  The script of the input text
+//             is included; input consisting of characters from a single script will
+//             always produce a result consisting of a set containing that script.
+//
+//---------------------------------------------------------------------------------------
+void SpoofImpl::wholeScriptCheck(
+    const UChar *text, int32_t length, ScriptSet *result, UErrorCode &status) const {
+
+    int32_t       inputIdx = 0;
+    UChar32       c;
+
+    UTrie2 *table =
+        (fChecks & USPOOF_ANY_CASE) ? fSpoofData->fAnyCaseTrie : fSpoofData->fLowerCaseTrie;
+    result->setAll();
+    while (inputIdx < length) {
+        U16_NEXT(text, inputIdx, length, c);
+        uint32_t index = utrie2_get32(table, c);
+        if (index == 0) {
+            // No confusables in another script for this char.
+            // TODO:  we should change the data to have sets with just the single script
+            //        bit for the script of this char.  Gets rid of this special case.
+            //        Until then, grab the script from the char and intersect it with the set.
+            UScriptCode cpScript = uscript_getScript(c, &status);
+            U_ASSERT(cpScript > USCRIPT_INHERITED);
+            result->intersect(cpScript);
+        } else if (index == 1) {
+            // Script == Common or Inherited.  Nothing to do.
+        } else {
+            result->intersect(fSpoofData->fScriptSets[index]);
+        }
+    }
+}
+
+
+
+int32_t SpoofImpl::scriptScan
+        (const UChar *text, int32_t length, int32_t &pos, UErrorCode &status) const {
+    if (U_FAILURE(status)) {
+        return 0;
+    }
+    int32_t       inputIdx = 0;
+    UChar32       c;
+    int32_t       scriptCount = 0;
+    UScriptCode   lastScript = USCRIPT_INVALID_CODE;
+    UScriptCode   sc = USCRIPT_INVALID_CODE;
+    while ((inputIdx < length || length == -1) && scriptCount < 2) {
+        U16_NEXT(text, inputIdx, length, c);
+        if (c == 0 && length == -1) {
+            break;
+        }
+        sc = uscript_getScript(c, &status);
+        if (sc == USCRIPT_COMMON || sc == USCRIPT_INHERITED || sc == USCRIPT_UNKNOWN) {
+            continue;
+        }
+        if (sc != lastScript) {
+           scriptCount++;
+           lastScript = sc;
+        }
+    }
+    if (scriptCount == 2) {
+        pos = inputIdx;
+    }
+    return scriptCount;
+}
+
+
+// Convert a text format hex number.  Utility function used by builder code.  Static.
+// Input: UChar *string text.  Output: a UChar32
+// Input has been pre-checked, and will have no non-hex chars.
+// The number must fall in the code point range of 0..0x10ffff
+// Static Function.
+UChar32 SpoofImpl::ScanHex(const UChar *s, int32_t start, int32_t limit, UErrorCode &status) {
+    if (U_FAILURE(status)) {
+        return 0;
+    }
+    U_ASSERT(limit-start > 0);
+    uint32_t val = 0;
+    int i;
+    for (i=start; i<limit; i++) {
+        int digitVal = s[i] - 0x30;
+        if (digitVal>9) {
+            digitVal = 0xa + (s[i] - 0x41);  // Upper Case 'A'
+        }
+        if (digitVal>15) {
+            digitVal = 0xa + (s[i] - 0x61);  // Lower Case 'a'
+        }
+        U_ASSERT(digitVal <= 0xf);
+        val <<= 4;
+        val += digitVal;
+    }
+    if (val > 0x10ffff) {
+        status = U_PARSE_ERROR;
+        val = 0;
+    }
+    return (UChar32)val;
+}
+
+
+
+//----------------------------------------------------------------------------------------------
+//
+//   class SpoofData Implementation
+//
+//----------------------------------------------------------------------------------------------
+
+
+UBool SpoofData::validateDataVersion(const SpoofDataHeader *rawData, UErrorCode &status) {
+    if (U_FAILURE(status) ||
+        rawData == NULL ||
+        rawData->fMagic != USPOOF_MAGIC ||
+        rawData->fFormatVersion[0] > 1 ||
+        rawData->fFormatVersion[1] > 0) {
+            status = U_INVALID_FORMAT_ERROR;
+            return FALSE;
+    }
+    return TRUE;
+}
+
+//
+//  SpoofData::getDefault() - return a wrapper around the spoof data that is
+//                           baked into the default ICU data.
+//
+SpoofData *SpoofData::getDefault(UErrorCode &status) {
+    // TODO:  Cache it.  Lazy create, keep until cleanup.
+
+    UDataMemory *udm = udata_open(NULL, "cfu", "confusables", &status);
+    if (U_FAILURE(status)) {
+        return NULL;
+    }
+    SpoofData *This = new SpoofData(udm, status);
+    if (U_FAILURE(status)) {
+        delete This;
+        return NULL;
+    }
+    if (This == NULL) {
+        status = U_MEMORY_ALLOCATION_ERROR;
+    }
+    return This;
+}
+
+
+SpoofData::SpoofData(UDataMemory *udm, UErrorCode &status)
+{
+    reset();
+    if (U_FAILURE(status)) {
+        return;
+    }
+    fRawData = reinterpret_cast<SpoofDataHeader *>
+                   ((char *)(udm->pHeader) + udm->pHeader->dataHeader.headerSize);
+    fUDM = udm;
+    validateDataVersion(fRawData, status);
+    initPtrs(status);
+}
+
+
+SpoofData::SpoofData(const void *data, int32_t length, UErrorCode &status)
+{
+    reset();
+    if (U_FAILURE(status)) {
+        return;
+    }
+    if ((size_t)length < sizeof(SpoofDataHeader)) {
+        status = U_INVALID_FORMAT_ERROR;
+        return;
+    }
+    void *ncData = const_cast<void *>(data);
+    fRawData = static_cast<SpoofDataHeader *>(ncData);
+    if (length < fRawData->fLength) {
+        status = U_INVALID_FORMAT_ERROR;
+        return;
+    }
+    validateDataVersion(fRawData, status);
+    initPtrs(status);
+}
+
+
+// Spoof Data constructor for use from data builder.
+//   Initializes a new, empty data area that will be populated later.
+SpoofData::SpoofData(UErrorCode &status) {
+    reset();
+    if (U_FAILURE(status)) {
+        return;
+    }
+    fDataOwned = true;
+    fRefCount = 1;
+
+    // The spoof header should already be sized to be a multiple of 16 bytes.
+    // Just in case it's not, round it up.
+    uint32_t initialSize = (sizeof(SpoofDataHeader) + 15) & ~15;
+    U_ASSERT(initialSize == sizeof(SpoofDataHeader));
+    
+    fRawData = static_cast<SpoofDataHeader *>(uprv_malloc(initialSize));
+    fMemLimit = initialSize;
+    if (fRawData == NULL) {
+        status = U_MEMORY_ALLOCATION_ERROR;
+        return;
+    }
+    uprv_memset(fRawData, 0, initialSize);
+
+    fRawData->fMagic = USPOOF_MAGIC;
+    fRawData->fFormatVersion[0] = 1;
+    fRawData->fFormatVersion[1] = 0;
+    fRawData->fFormatVersion[2] = 0;
+    fRawData->fFormatVersion[3] = 0;
+    initPtrs(status);
+}
+
+// reset() - initialize all fields.
+//           Should be updated if any new fields are added.
+//           Called by constructors to put things in a known initial state.
+void SpoofData::reset() {
+   fRawData = NULL;
+   fDataOwned = FALSE;
+   fUDM      = NULL;
+   fMemLimit = 0;
+   fRefCount = 1;
+   fCFUKeys = NULL;
+   fCFUValues = NULL;
+   fCFUStringLengths = NULL;
+   fCFUStrings = NULL;
+   fAnyCaseTrie = NULL;
+   fLowerCaseTrie = NULL;
+   fScriptSets = NULL;
+}
+
+
+//  SpoofData::initPtrs()
+//            Initialize the pointers to the various sections of the raw data.
+//
+//            This function is used both during the Trie building process (multiple
+//            times, as the individual data sections are added), and
+//            during the opening of a Spoof Checker from prebuilt data.
+//
+//            The pointers for non-existent data sections (identified by an offset of 0)
+//            are set to NULL.
+//
+//            Note:  During building the data, adding each new data section
+//            reallocs the raw data area, which likely relocates it, which
+//            in turn requires reinitializing all of the pointers into it, hence
+//            multiple calls to this function during building.
+//
+void SpoofData::initPtrs(UErrorCode &status) {
+    fCFUKeys = NULL;
+    fCFUValues = NULL;
+    fCFUStringLengths = NULL;
+    fCFUStrings = NULL;
+    if (U_FAILURE(status)) {
+        return;
+    }
+    if (fRawData->fCFUKeys != 0) {
+        fCFUKeys = (int32_t *)((char *)fRawData + fRawData->fCFUKeys);
+    }
+    if (fRawData->fCFUStringIndex != 0) {
+        fCFUValues = (uint16_t *)((char *)fRawData + fRawData->fCFUStringIndex);
+    }
+    if (fRawData->fCFUStringLengths != 0) {
+        fCFUStringLengths = (SpoofStringLengthsElement *)((char *)fRawData + fRawData->fCFUStringLengths);
+    }
+    if (fRawData->fCFUStringTable != 0) {
+        fCFUStrings = (UChar *)((char *)fRawData + fRawData->fCFUStringTable);
+    }
+
+    if (fAnyCaseTrie ==  NULL && fRawData->fAnyCaseTrie != 0) {
+        fAnyCaseTrie = utrie2_openFromSerialized(UTRIE2_16_VALUE_BITS,
+            (char *)fRawData + fRawData->fAnyCaseTrie, fRawData->fAnyCaseTrieLength, NULL, &status);
+    }
+    if (fLowerCaseTrie ==  NULL && fRawData->fLowerCaseTrie != 0) {
+        fLowerCaseTrie = utrie2_openFromSerialized(UTRIE2_16_VALUE_BITS,
+            (char *)fRawData + fRawData->fLowerCaseTrie, fRawData->fLowerCaseTrieLength, NULL, &status);
+    }
+    
+    if (fRawData->fScriptSets != 0) {
+        fScriptSets = (ScriptSet *)((char *)fRawData + fRawData->fScriptSets);
+    }
+}
+
+
+SpoofData::~SpoofData() {
+    utrie2_close(fAnyCaseTrie);
+    fAnyCaseTrie = NULL;
+    utrie2_close(fLowerCaseTrie);
+    fLowerCaseTrie = NULL;
+    if (fDataOwned) {
+        uprv_free(fRawData);
+    }
+    fRawData = NULL;
+    if (fUDM != NULL) {
+        udata_close(fUDM);
+    }
+    fUDM = NULL;
+}
+
+
+void SpoofData::removeReference() {
+    if (umtx_atomic_dec(&fRefCount) == 0) {
+        delete this;
+    }
+}
+
+
+SpoofData *SpoofData::addReference() {
+    umtx_atomic_inc(&fRefCount);
+    return this;
+}
+
+
+void *SpoofData::reserveSpace(int32_t numBytes,  UErrorCode &status) {
+    if (U_FAILURE(status)) {
+        return NULL;
+    }
+    if (!fDataOwned) {
+        U_ASSERT(FALSE);
+        status = U_INTERNAL_PROGRAM_ERROR;
+        return NULL;
+    }
+
+    numBytes = (numBytes + 15) & ~15;   // Round up to a multiple of 16
+    uint32_t returnOffset = fMemLimit;
+    fMemLimit += numBytes;
+    fRawData = static_cast<SpoofDataHeader *>(uprv_realloc(fRawData, fMemLimit));
+    fRawData->fLength = fMemLimit;
+    uprv_memset((char *)fRawData + returnOffset, 0, numBytes);
+    initPtrs(status);
+    return (char *)fRawData + returnOffset;
+}
+
+
+//----------------------------------------------------------------------------
+//
+//  ScriptSet implementation
+//
+//----------------------------------------------------------------------------
+ScriptSet::ScriptSet() {
+    for (uint32_t i=0; i<sizeof(bits)/sizeof(uint32_t); i++) {
+        bits[i] = 0;
+    }
+}
+
+ScriptSet::~ScriptSet() {
+}
+
+UBool ScriptSet::operator == (const ScriptSet &other) {
+    for (uint32_t i=0; i<sizeof(bits)/sizeof(uint32_t); i++) {
+        if (bits[i] != other.bits[i]) {
+            return FALSE;
+        }
+    }
+    return TRUE;
+}
+
+void ScriptSet::Union(UScriptCode script) {
+    uint32_t index = script / 32;
+    uint32_t bit   = 1 << (script & 31);
+    U_ASSERT(index < sizeof(bits)*4);
+    bits[index] |= bit;
+}
+
+
+void ScriptSet::Union(const ScriptSet &other) {
+    for (uint32_t i=0; i<sizeof(bits)/sizeof(uint32_t); i++) {
+        bits[i] |= other.bits[i];
+    }
+}
+
+void ScriptSet::intersect(const ScriptSet &other) {
+    for (uint32_t i=0; i<sizeof(bits)/sizeof(uint32_t); i++) {
+        bits[i] &= other.bits[i];
+    }
+}
+
+void ScriptSet::intersect(UScriptCode script) {
+    uint32_t index = script / 32;
+    uint32_t bit   = 1 << (script & 31);
+    U_ASSERT(index < sizeof(bits)*4);
+    uint32_t i;
+    for (i=0; i<index; i++) {
+        bits[i] = 0;
+    }
+    bits[index] &= bit;
+    for (i=index+1; i<sizeof(bits)/sizeof(uint32_t); i++) {
+        bits[i] = 0;
+    }
+}
+
+
+ScriptSet & ScriptSet::operator =(const ScriptSet &other) {
+    for (uint32_t i=0; i<sizeof(bits)/sizeof(uint32_t); i++) {
+        bits[i] = other.bits[i];
+    }
+    return *this;
+}
+
+
+void ScriptSet::setAll() {
+    for (uint32_t i=0; i<sizeof(bits)/sizeof(uint32_t); i++) {
+        bits[i] = 0xffffffffu;
+    }
+}
+
+
+void ScriptSet::resetAll() {
+    for (uint32_t i=0; i<sizeof(bits)/sizeof(uint32_t); i++) {
+        bits[i] = 0;
+    }
+}
+
+int32_t ScriptSet::countMembers() {
+    // This bit counter is good for sparse numbers of '1's, which is
+    //  very much the case that we will usually have.
+    int32_t count = 0;
+    for (uint32_t i=0; i<sizeof(bits)/sizeof(uint32_t); i++) {
+        uint32_t x = bits[i];
+        while (x > 0) {
+            count++;
+            x &= (x - 1);    // and off the least significant one bit.
+        }
+    }
+    return count;
+}
+
+
+
+//-----------------------------------------------------------------------------
+//
+//  NFKDBuffer Implementation.
+//
+//-----------------------------------------------------------------------------
+
+NFKDBuffer::NFKDBuffer(const UChar *text, int32_t length, UErrorCode &status) {
+    fNormalizedText = NULL;
+    fNormalizedTextLength = 0;
+    fOriginalText = text;
+    if (U_FAILURE(status)) {
+        return;
+    }
+    fNormalizedText = fSmallBuf;
+    fNormalizedTextLength = unorm_normalize(
+        text, length, UNORM_NFKD, 0, fNormalizedText, USPOOF_STACK_BUFFER_SIZE, &status);
+    if (status == U_BUFFER_OVERFLOW_ERROR) {
+        status = U_ZERO_ERROR;
+        fNormalizedText = (UChar *)uprv_malloc((fNormalizedTextLength+1)*sizeof(UChar));
+        if (fNormalizedText == NULL) {
+            status = U_MEMORY_ALLOCATION_ERROR;
+        } else {
+            fNormalizedTextLength = unorm_normalize(text, length, UNORM_NFKD, 0,
+                                        fNormalizedText, fNormalizedTextLength+1, &status);
+        }
+    }
+}
+
+
+NFKDBuffer::~NFKDBuffer() {
+    if (fNormalizedText != fSmallBuf) {
+        delete fNormalizedText;
+    }
+    fNormalizedText = 0;
+}
+
+const UChar *NFKDBuffer::getBuffer() {
+    return fNormalizedText;
+}
+
+int32_t NFKDBuffer::getLength() {
+    return fNormalizedTextLength;
+}
+
+
+
+
+
+U_NAMESPACE_END
+
+U_NAMESPACE_USE
+
+//-----------------------------------------------------------------------------
+//
+//  uspoof_swap   -  byte swap and char encoding swap of spoof data
+//
+//-----------------------------------------------------------------------------
+U_CAPI int32_t U_EXPORT2
+uspoof_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outData,
+           UErrorCode *status) {
+
+    if (status == NULL || U_FAILURE(*status)) {
+        return 0;
+    }
+    if(ds==NULL || inData==NULL || length<-1 || (length>0 && outData==NULL)) {
+        *status=U_ILLEGAL_ARGUMENT_ERROR;
+        return 0;
+    }
+
+    //
+    //  Check that the data header is for spoof data.
+    //    (Header contents are defined in gencfu.cpp)
+    //
+    const UDataInfo *pInfo = (const UDataInfo *)((const char *)inData+4);
+    if(!(  pInfo->dataFormat[0]==0x43 &&   /* dataFormat="Cfu " */
+           pInfo->dataFormat[1]==0x66 &&
+           pInfo->dataFormat[2]==0x75 &&
+           pInfo->dataFormat[3]==0x20 &&
+           pInfo->formatVersion[0]==1  )) {
+        udata_printError(ds, "uspoof_swap(): data format %02x.%02x.%02x.%02x "
+                             "(format version %02x %02x %02x %02x) is not recognized\n",
+                         pInfo->dataFormat[0], pInfo->dataFormat[1],
+                         pInfo->dataFormat[2], pInfo->dataFormat[3],
+                         pInfo->formatVersion[0], pInfo->formatVersion[1],
+                         pInfo->formatVersion[2], pInfo->formatVersion[3]);
+        *status=U_UNSUPPORTED_ERROR;
+        return 0;
+    }
+
+    //
+    // Swap the data header.  (This is the generic ICU Data Header, not the uspoof Specific
+    //                         header).  This swap also conveniently gets us
+    //                         the size of the ICU d.h., which lets us locate the start
+    //                         of the uspoof specific data.
+    //
+    int32_t headerSize=udata_swapDataHeader(ds, inData, length, outData, status);
+
+
+    //
+    // Get the Spoof Data Header, and check that it appears to be OK.
+    //
+    //
+    const uint8_t   *inBytes =(const uint8_t *)inData+headerSize;
+    SpoofDataHeader *spoofDH = (SpoofDataHeader *)inBytes;
+    if (ds->readUInt32(spoofDH->fMagic)   != USPOOF_MAGIC ||
+        ds->readUInt32(spoofDH->fLength)  <  sizeof(SpoofDataHeader)) 
+    {
+        udata_printError(ds, "uspoof_swap(): Spoof Data header is invalid.\n");
+        *status=U_UNSUPPORTED_ERROR;
+        return 0;
+    }
+
+    //
+    // Prefight operation?  Just return the size
+    //
+    int32_t spoofDataLength = ds->readUInt32(spoofDH->fLength);
+    int32_t totalSize = headerSize + spoofDataLength;
+    if (length < 0) {
+        return totalSize;
+    }
+
+    //
+    // Check that length passed in is consistent with length from Spoof data header.
+    //
+    if (length < totalSize) {
+        udata_printError(ds, "uspoof_swap(): too few bytes (%d after ICU Data header) for spoof data.\n",
+                            spoofDataLength);
+        *status=U_INDEX_OUTOFBOUNDS_ERROR;
+        return 0;
+        }
+
+
+    //
+    // Swap the Data.  Do the data itself first, then the Spoof Data Header, because
+    //                 we need to reference the header to locate the data, and an
+    //                 inplace swap of the header leaves it unusable.
+    //
+    uint8_t          *outBytes = (uint8_t *)outData + headerSize;
+    SpoofDataHeader  *outputDH = (SpoofDataHeader *)outBytes;
+
+    int32_t   sectionStart;
+    int32_t   sectionLength;
+
+    //
+    // If not swapping in place, zero out the output buffer before starting.
+    //    Gaps may exist between the individual sections, and these must be zeroed in
+    //    the output buffer.  The simplest way to do that is to just zero the whole thing.
+    //
+    if (inBytes != outBytes) {
+        uprv_memset(outBytes, 0, spoofDataLength);
+    }
+
+    // Confusables Keys Section   (fCFUKeys)
+    sectionStart  = ds->readUInt32(spoofDH->fCFUKeys);
+    sectionLength = ds->readUInt32(spoofDH->fCFUKeysSize) * 4;
+    ds->swapArray32(ds, inBytes+sectionStart, sectionLength, outBytes+sectionStart, status);
+
+    // String Index Section
+    sectionStart  = ds->readUInt32(spoofDH->fCFUStringIndex);
+    sectionLength = ds->readUInt32(spoofDH->fCFUStringIndexSize) * 2;
+    ds->swapArray16(ds, inBytes+sectionStart, sectionLength, outBytes+sectionStart, status);
+
+    // String Table Section
+    sectionStart  = ds->readUInt32(spoofDH->fCFUStringTable);
+    sectionLength = ds->readUInt32(spoofDH->fCFUStringTableLen) * 2;
+    ds->swapArray16(ds, inBytes+sectionStart, sectionLength, outBytes+sectionStart, status);
+
+    // String Lengths Section
+    sectionStart  = ds->readUInt32(spoofDH->fCFUStringLengths);
+    sectionLength = ds->readUInt32(spoofDH->fCFUStringLengthsSize) * 4;
+    ds->swapArray16(ds, inBytes+sectionStart, sectionLength, outBytes+sectionStart, status);
+
+    // Any Case Trie
+    sectionStart  = ds->readUInt32(spoofDH->fAnyCaseTrie);
+    sectionLength = ds->readUInt32(spoofDH->fAnyCaseTrieLength);
+    utrie2_swap(ds, inBytes+sectionStart, sectionLength, outBytes+sectionStart, status);
+
+    // Lower Case Trie
+    sectionStart  = ds->readUInt32(spoofDH->fLowerCaseTrie);
+    sectionLength = ds->readUInt32(spoofDH->fLowerCaseTrieLength);
+    utrie2_swap(ds, inBytes+sectionStart, sectionLength, outBytes+sectionStart, status);
+
+    // Script Sets.  The data is an array of int32_t
+    sectionStart  = ds->readUInt32(spoofDH->fScriptSets);
+    sectionLength = ds->readUInt32(spoofDH->fScriptSetsLength) * 4;
+    ds->swapArray32(ds, inBytes+sectionStart, sectionLength, outBytes+sectionStart, status);
+
+    // And, last, swap the header itself.
+    //   int32_t   fMagic             // swap this
+    //   uint8_t   fFormatVersion[4]  // Do not swap this
+    //   int32_t   all the rest       // Swap the rest, all is 32 bit stuff.
+    //
+    uint32_t magic = ds->readUInt32(spoofDH->fMagic);
+    ds->writeUInt32((uint32_t *)&outputDH->fMagic, magic);
+    ds->swapArray32(ds, &spoofDH->fLength, sizeof(SpoofDataHeader)-8, &outputDH->fLength, status);
+
+    return totalSize;
+}
+
+
--- a/icu4c/source/i18n/uspoof_impl.h
+++ b/icu4c/source/i18n/uspoof_impl.h
@ -0,0 +1,397 @@
+/*
+***************************************************************************
+* Copyright (C) 2008-2009, International Business Machines Corporation
+* and others. All Rights Reserved.
+***************************************************************************
+*
+*  uspoof_impl.h
+*
+*    Implemenation header for spoof detection
+*
+*/
+
+#ifndef USPOOFIM_H
+#define USPOOFIM_H
+
+#include "unicode/utypes.h"
+#include "unicode/uspoof.h"
+#include "utrie2.h"
+#include "unicode/uscript.h"
+#include "unicode/udata.h"
+
+
+U_NAMESPACE_BEGIN
+
+// The maximium length (in UTF-16 UChars) of the skeleton replacement string resulting from
+//   a single input code point.  This is function of the unicode.org data.
+#define USPOOF_MAX_SKELETON_EXPANSION 20
+
+// The default stack buffer size for copies or conversions or normalizations
+// of input strings being checked.  (Used in multiple places.)
+#define USPOOF_STACK_BUFFER_SIZE 100
+
+// Magic number for sanity checking spoof data.
+#define USPOOF_MAGIC 0x3845fdef
+
+class SpoofData;
+struct SpoofDataHeader;
+struct SpoofStringLengthsElement;
+class ScriptSet;
+
+/**
+  *  Class SpoofImpl corresponds directly to the plain C API opaque type
+  *  USpoofChecker.  One can be cast to the other.
+  */
+class SpoofImpl : public UObject  {
+public:
+	SpoofImpl(SpoofData *data, UErrorCode &status);
+	SpoofImpl();
+	virtual ~SpoofImpl();
+
+    /** Copy constructor, used by the user level uspoof_clone() function.
+     */
+    SpoofImpl(const SpoofImpl &src, UErrorCode &status);
+    
+	static SpoofImpl *validateThis(USpoofChecker *sc, UErrorCode &status);
+	static const SpoofImpl *validateThis(const USpoofChecker *sc, UErrorCode &status);
+
+	/** Get the confusable skeleton transform for a single code point.
+	 *  The result is a string with a length between 1 and 18.
+	 *  @param    tableMask  bit flag specifying which confusable table to use.
+	 *                       One of USPOOF_SL_TABLE_FLAG, USPOOF_MA_TABLE_FLAG, etc.
+	 *  @return   The length in UTF-16 code units of the substition string.
+	 */  
+	int32_t confusableLookup(UChar32 inChar, int32_t tableMask, UChar *destBuf) const;
+
+    /** parse a hex number.  Untility used by the builders.   */
+	static UChar32 ScanHex(const UChar *s, int32_t start, int32_t limit, UErrorCode &status);
+
+	// Implementation for Whole Script tests.
+	// Return the test bit flag to be ORed into the eventual user return value
+	//    if a Spoof opportunity is detected.
+	void wholeScriptCheck(
+	    const UChar *text, int32_t length, ScriptSet *result, UErrorCode &status) const;
+	    
+    /** Scan a string to determine how many scripts it includes.
+     * Ignore characters with script=Common and scirpt=Inherited.
+     * @param    text     The UChar text to be scanned
+     * @param    length   The length of the input text, -1 for nul termintated.
+     * @param    pos      An out parameter, set to the first input postion at which
+     *                    a second script was encountered, ignoring Common and Inherited.
+     * @param    status   For errors.
+     * @return            the number of (non-common,inherited) scripts encountered,
+     *                    clipped to a max of two.
+     */
+    int32_t scriptScan(const UChar *text, int32_t length, int32_t &pos, UErrorCode &status) const;
+
+
+    // WholeScript and MixedScript check implementation.
+    //
+    ScriptSet *WholeScriptCheck(const UChar *text, int32_t length, UErrorCode &status) const;
+    
+    static UClassID U_EXPORT2 getStaticClassID(void);
+    virtual UClassID getDynamicClassID(void) const;
+
+	//
+	// Data Members
+	//
+
+	int32_t           fMagic;             // Internal sanity check.
+	int32_t           fChecks;            // Bit vector of checks to perform.
+
+	SpoofData        *fSpoofData;
+	
+	int32_t           fCheckMask;         // Spoof table selector.  f(Check Type)
+	
+    const UnicodeSet *fAllowedCharsSet;   // The UnicodeSet of allowed characters.
+                                          //   for this Spoof Checker.  Defaults to all chars. 
+
+};
+
+
+
+//
+//  Confusable Mappings Data Structures
+//
+//    For the confusable data, we are essentially implementing a map,
+//       key:    a code point
+//       value:  a string.  Most commonly one char in length, but can be more.
+//
+//    The keys are stored as a sorted array of 32 bit ints.
+//             bits 0-23    a code point value
+//             bits 24-31   flags
+//                24:  1 if entry applies to SL table
+//                25:  1 if entry applies to SA table
+//                26:  1 if entry applies to ML table
+//                27:  1 if entry applies to MA table
+//                28:  1 if there are multiple entries for this code point.
+//                29-30:  length of value string, in UChars.
+//                         values are (1, 2, 3, other)
+//        The key table is sorted in ascending code point order.  (not on the
+//        32 bit int value, the flag bits do not participate in the sorting.)
+//
+//        Lookup is done by means of a binary search in the key table.
+//
+//    The corresponding values are kept in a parallel array of 16 bit ints.
+//        If the value string is of length 1, it is literally in the value array.
+//        For longer strings, the value array contains an index into the strings table.
+//
+//    String Table:
+//       The strings table contains all of the value strings (those of length two or greater)
+//       concatentated together into one long UChar (UTF-16) array.
+//
+//       The array is arranged by length of the strings - all strings of the same length
+//       are stored together.  The sections are ordered by length of the strings -
+//       all two char strings first, followed by all of the three Char strings, etc.
+//
+//       There is no nul character or other mark between adjacent strings.
+//
+//    String Lengths table
+//       The length of strings from 1 to 3 is flagged in the key table.
+//       For strings of length 4 or longer, the string length table provides a
+//       mapping between an index into the string table and the corresponding length.
+//       Strings of these lengths are rare, so lookup time is not an issue.
+//       Each entry consists of
+//            uint16_t      index of the _last_ string with this length
+//            uint16_t      the length
+//
+
+// Flag bits in the Key entries
+#define USPOOF_SL_TABLE_FLAG (1<<24)
+#define USPOOF_SA_TABLE_FLAG (1<<25)
+#define USPOOF_ML_TABLE_FLAG (1<<26)
+#define USPOOF_MA_TABLE_FLAG (1<<27)
+#define USPOOF_KEY_MULTIPLE_VALUES (1<<28)
+#define USPOOF_KEY_LENGTH_SHIFT 29
+#define USPOOF_KEY_LENGTH_FIELD(x) (((x)>>29) & 3)
+
+
+struct SpoofStringLengthsElement {
+    uint16_t      fLastString;         // index in string table of last string with this length
+    uint16_t      fStrLength;           // Length of strings
+};
+
+
+//-------------------------------------------------------------------------------
+//
+//  ScriptSet - Wrapper class for the Script code bit sets that are part of the
+//              whole script confusable data.
+//
+//              This class is used both at data build and at run time.
+//              The constructor is only used at build time.
+//              At run time, just point at the prebuilt data and go.
+//  
+//-------------------------------------------------------------------------------
+class ScriptSet: public UMemory {
+  public:
+    ScriptSet();
+    ~ScriptSet();
+
+    UBool operator == (const ScriptSet &other);
+    ScriptSet & operator = (const ScriptSet &other);
+
+    void Union(const ScriptSet &other);
+    void Union(UScriptCode script);
+    void intersect(const ScriptSet &other);
+    void intersect(UScriptCode script);
+    void setAll();
+    void resetAll();
+    int32_t countMembers();
+
+  private:
+    uint32_t  bits[6];
+};
+
+
+
+
+//-------------------------------------------------------------------------------
+//
+//  NFKDBuffer   A little class to handle the NFKD normalization that is
+//               needed on incoming identifiers to be checked.
+//               Takes care of buffer handling and normalization
+//
+//               Instances of this class are intended to be stack-allocated.
+//
+//               TODO:  how to map position offsets back to user values?
+//
+//--------------------------------------------------------------------------------
+class NFKDBuffer: public UMemory {
+public:
+    NFKDBuffer(const UChar *text, int32_t length, UErrorCode &status);
+    ~NFKDBuffer();
+    const UChar *getBuffer();
+    int32_t getLength();
+
+  private:
+    const UChar *fOriginalText;
+    UChar       *fNormalizedText;
+    int32_t      fNormalizedTextLength;
+    UChar        fSmallBuf[USPOOF_STACK_BUFFER_SIZE];
+};
+
+
+
+
+
+//-------------------------------------------------------------------------------------
+//
+//  SpoofData
+//
+//    A small class that wraps the raw (usually memory mapped) spoof data.
+//    Serves two primary functions:
+//      1.  Convenience.  Contains real pointers to the data, to avoid dealing with
+//          the offsets in the raw data.
+//      2.  Reference counting.  When a spoof checker is cloned, the raw data is shared
+//          and must be retained until all checkers using the data are closed.
+//    Nothing in this struct includes state that is specific to any particular
+//    USpoofDetector object.
+//
+//---------------------------------------------------------------------------------------
+class SpoofData: public UMemory {
+  public:
+    static SpoofData *getDefault(UErrorCode &status);   // Load standard ICU spoof data.
+    SpoofData(UErrorCode &status);   // Create new spoof data wrapper.
+                                     // Only used when building new data from rules.
+    
+    // Constructor for use when creating from prebuilt default data.
+    //   A UDataMemory is what the ICU internal data loading functions provide.
+    //   The udm is adopted by the SpoofData.
+    SpoofData(UDataMemory *udm, UErrorCode &status);
+
+    // Constructor for use when creating from serialized data.
+    //
+    SpoofData(const void *serializedData, int32_t length, UErrorCode &status);
+
+    //  Check raw Spoof Data Version compatibility.
+    //  Return TRUE it looks good.
+    static UBool validateDataVersion(const SpoofDataHeader *rawData, UErrorCode &status);
+    ~SpoofData();                    // Destructor not normally used.
+                                     // Use removeReference() instead.
+    // Reference Counting functions.
+    //    Clone of a user-level spoof detector increments the ref count on the data.
+    //    Close of a user-level spoof detector decrements the ref count.
+    //    If the data is owned by us, it will be deleted when count goes to zero.
+    SpoofData *addReference(); 
+    void removeReference();
+
+    // Reserve space in the raw data.  For use by builder when putting together a
+    //   new set of data.  Init the new storage to zero, to prevent inconsistent
+    //   results if it is not all otherwise set by the requester.
+    //  Return:
+    //    pointer to the new space that was added by this function.
+    void *reserveSpace(int32_t numBytes, UErrorCode &status);
+
+    // initialize the pointers from this object to the raw data.
+    void initPtrs(UErrorCode &status);
+
+    // Reset all fields to an initial state.
+    // Called from the top of all constructors.
+    void reset();
+    
+    SpoofDataHeader             *fRawData;          // Ptr to the raw memory-mapped data
+    UBool                       fDataOwned;         // True if the raw data is owned, and needs
+                                                    //  to be deleted when refcount goes to zero.
+    UDataMemory                 *fUDM;              // If not NULL, our data came from a
+                                                    //   UDataMemory, which we must close when
+                                                    //   we're done.
+
+    uint32_t                    fMemLimit;          // Limit of available raw data space
+    int32_t                     fRefCount;
+
+    // Confusable data
+    int32_t                     *fCFUKeys;
+    uint16_t                    *fCFUValues;
+    SpoofStringLengthsElement   *fCFUStringLengths;
+    UChar                       *fCFUStrings;
+
+    // Whole Script Confusable Data
+    UTrie2                      *fAnyCaseTrie;
+    UTrie2                      *fLowerCaseTrie;
+    ScriptSet                   *fScriptSets;
+    };
+    
+
+//---------------------------------------------------------------------------------------
+//
+//  Raw Binary Data Formats, as loaded from the ICU data file,
+//    or as built by the builder.
+//
+//---------------------------------------------------------------------------------------
+struct SpoofDataHeader {
+    int32_t       fMagic;                // (0x8345fdef)
+    uint8_t       fFormatVersion[4];     // Data Format. Same as the value in struct UDataInfo
+                                         //   if there is one associated with this data.
+    int32_t       fLength;               // Total lenght in bytes of this spoof data,
+                                         //   including all sections, not just the header.
+
+    // The following four sections refer to data representing the confusable data
+    //   from the Unicode.org data from "confusables.txt"
+
+    int32_t       fCFUKeys;               // byte offset to Keys table (from SpoofDataHeader *)
+    int32_t       fCFUKeysSize;           // number of entries in keys table  (32 bits each)
+
+    // TODO: change name to fCFUValues, for consistency.
+    int32_t       fCFUStringIndex;        // byte offset to String Indexes table
+    int32_t       fCFUStringIndexSize;    // number of entries in String Indexes table (16 bits each)
+                                          //     (number of entries must be same as in Keys table
+
+    int32_t       fCFUStringTable;        // byte offset of String table
+    int32_t       fCFUStringTableLen;     // length of string table (in 16 bit UChars)
+
+    int32_t       fCFUStringLengths;      // byte offset to String Lengths table
+    int32_t       fCFUStringLengthsSize;  // number of entries in lengths table. (2 x 16 bits each)
+
+
+    // The following sections are for data from confusablesWholeScript.txt
+    
+    int32_t       fAnyCaseTrie;           // byte offset to the serialized Any Case Trie
+    int32_t       fAnyCaseTrieLength;     // Length (bytes) of the serialized Any Case Trie
+    
+    int32_t       fLowerCaseTrie;         // byte offset to the serialized Lower Case Trie
+    int32_t       fLowerCaseTrieLength;   // Length (bytes) of the serialized Lower Case Trie
+
+    int32_t       fScriptSets;            // byte offset to array of ScriptSets
+    int32_t       fScriptSetsLength;      // Number of ScriptSets (24 bytes each)
+    
+
+    // The following sections are for data from xidmodifications.txt
+    
+    
+    int32_t       unused[15];              // Padding, Room for Expansion
+    
+ }; 
+
+
+
+    
+//
+//  Structure for the Whole Script Confusable Data
+//    See Unicode UAX-39, Unicode Security Mechanisms, for a description of the
+//    Whole Script confusable data
+//
+//  The data provides mappings from code points to a set of scripts
+//    that contain characters that might be confused with the code point.
+//  There are two mappings, one for lower case only, and one for characters
+//    of any case.
+//
+//  The actual data consists of a utrie2 to map from a code point to an offset,
+//  and an array of UScriptSets (essentially bit maps) that is indexed
+//  by the offsets obtained from the Trie.
+//
+//
+
+
+U_NAMESPACE_END
+
+/**
+  * Endianness swap function for binary spoof data.
+  * @internal
+  */
+U_CAPI int32_t U_EXPORT2
+uspoof_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outData,
+            UErrorCode *status);
+
+
+#endif  /* USPOOFIM_H */
+
--- a/icu4c/source/test/cintltst/Makefile.in
+++ b/icu4c/source/test/cintltst/Makefile.in
@ -1,6 +1,6 @@
 #******************************************************************************
 #
-#   Copyright (C) 1999-2008, International Business Machines
+#   Copyright (C) 1999-2009, International Business Machines
 #   Corporation and others.  All Rights Reserved.
 #
 #******************************************************************************
@ -52,7 +52,7 @@ ncnvfbts.o ncnvtst.o putiltst.o cstrtest.o udatpg_test.o utf8tst.o \
 stdnmtst.o usrchtst.o custrtrn.o sorttest.o trietest.o trie2test.o usettest.o \
 uenumtst.o utmstest.o currtest.o \
 idnatest.o nfsprep.o spreptst.o sprpdata.o \
-hpmufn.o tracetst.o reapits.o utexttst.o ucsdetst.o
+hpmufn.o tracetst.o reapits.o utexttst.o ucsdetst.o spooftest.o

 DEPS = $(OBJECTS:.o=.d)

--- a/icu4c/source/test/cintltst/calltest.c
+++ b/icu4c/source/test/cintltst/calltest.c
@ -1,6 +1,6 @@
 /********************************************************************
 * COPYRIGHT: 
- * Copyright (c) 1996-2008, International Business Machines Corporation and
+ * Copyright (c) 1996-2009, International Business Machines Corporation and
 * others. All Rights Reserved.
 ********************************************************************/
 /********************************************************************************
@ -38,6 +38,7 @@ void addURegexTest(TestNode** root);
 void addUTextTest(TestNode** root);
 void addUCsdetTest(TestNode** root);
 void addCnvSelTest(TestNode** root);
+void addUSpoofTest(TestNode** root);

 void addAllTests(TestNode** root)
 {
@ -75,6 +76,7 @@ void addAllTests(TestNode** root)
 #if !UCONFIG_NO_TRANSLITERATION
    addUTransTest(root);
 #endif
+    addUSpoofTest(root);

 }

--- a/icu4c/source/test/cintltst/cintltst.vcproj
+++ b/icu4c/source/test/cintltst/cintltst.vcproj
@ -931,6 +931,14 @@
 				>
 			</File>
 		</Filter>
+		<Filter
+			Name="spoof"
+			>
+			<File
+				RelativePath=".\spooftest.c"
+				>
+			</File>
+		</Filter>
 	</Files>
 	<Globals>
 	</Globals>
--- a/icu4c/source/test/cintltst/cnmdptst.c
+++ b/icu4c/source/test/cintltst/cnmdptst.c
@ -886,14 +886,7 @@ static void TestGetKeywordValuesForLocale(void) {
            1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1
    };
    UErrorCode status = U_ZERO_ERROR;
-    int32_t i, j, size;
-    UEnumeration *pref, *all;
-    const char *loc = NULL;
-    UBool matchPref, matchAll;
-    const char *value = NULL;
-    int32_t valueLength = 0;
-    
-    UList *ALLList = NULL;
+    int32_t i;
    
    UEnumeration *ALL = ucurr_getKeywordValuesForLocale("currency", uloc_getDefault(), FALSE, &status);
    if (ALL == NULL) {
@ -902,12 +895,15 @@ static void TestGetKeywordValuesForLocale(void) {
    }
    
    for (i = 0; i < PREFERRED_SIZE; i++) {
-        pref = NULL;
-        all = NULL;
-        loc = PREFERRED[i][0];
+        UEnumeration *pref = NULL;
+        UEnumeration *all = NULL;
+        const char *loc = PREFERRED[i][0];
        pref = ucurr_getKeywordValuesForLocale("currency", loc, TRUE, &status);
-        matchPref = FALSE;
-        matchAll = FALSE;
+        UBool matchPref = FALSE;
+        UBool matchAll = FALSE;
+        int32_t size = 0, j;
+        const char *value = NULL, *allValue = NULL;
+        int32_t valueLength = 0, allValueLength = 0;
        
        size = uenum_count(pref, &status);
        
@ -939,7 +935,7 @@ static void TestGetKeywordValuesForLocale(void) {
        
        if (U_SUCCESS(status) && size == uenum_count(ALL, &status)) {
            matchAll = TRUE;
-            ALLList = ulist_getListFromEnum(ALL);
+            UList *ALLList = ulist_getListFromEnum(ALL);
            for (j = 0; j < size; j++) {
                if ((value = uenum_next(all, &valueLength, &status)) != NULL && U_SUCCESS(status)) {
                    if (!ulist_containsString(ALLList, value, uprv_strlen(value))) {
--- a/icu4c/source/test/cintltst/spooftest.c
+++ b/icu4c/source/test/cintltst/spooftest.c
@ -0,0 +1,152 @@
+/********************************************************************
+ * COPYRIGHT: 
+ * Copyright (c) 2009, International Business Machines Corporation and
+ * others. All Rights Reserved.
+ ********************************************************************/
+/********************************************************************************
+*
+* File spooftest.c
+*
+*********************************************************************************/
+/*C API TEST for the uspoof Unicode Indentifier Spoofing and Security API */
+/**
+*   This is an API test for ICU spoof detection in plain C.  It doesn't test very many cases, and doesn't
+*   try to test the full functionality.  It just calls each function and verifies that it
+*   works on a basic level.
+*
+*   More complete testing of spoof detection functionality is done with the C++ tests.
+**/
+
+#include "unicode/utypes.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include "unicode/uspoof.h"
+#include "unicode/ustring.h"
+#include "cintltst.h"
+
+#define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \
+    log_err("Failure at file %s, line %d, error = %s\n", __FILE__, __LINE__, u_errorName(status));}}
+
+#define TEST_CHECK_SUCCESS(status) {if (U_FAILURE(status)) { \
+    log_err("Failure at file %s, line %d, error = %s\n", __FILE__, __LINE__, u_errorName(status)); \
+    goto bailout;} \
+}
+
+#define TEST_ASSERT_TRUE(expr) {if ((expr)==FALSE) { \
+log_err("Test Failure at file %s, line %d: \"%s\" is false.\n", __FILE__, __LINE__, #expr);}}
+
+#define TEST_ASSERT_EQ(a, b) { if ((a) != (b)) { \
+    log_err("Test Failure at file %s, line %d: \"%s\" (%d) != \"%s\" (%d) \n", \
+             __FILE__, __LINE__, #a, (a), #b, (b)); }}
+
+#define TEST_ASSERT_NE(a, b) { if ((a) == (b)) { \
+    log_err("Test Failure at file %s, line %d: \"%s\" (%d) == \"%s\" (%d) \n", \
+             __FILE__, __LINE__, #a, (a), #b, (b)); }}
+
+
+/*
+ *   TEST_SETUP and TEST_TEARDOWN
+ *         macros to handle the boilerplate around setting up test case.
+ *         Put arbitrary test code between SETUP and TEARDOWN.
+ *         "sc" is the ready-to-go  SpoofChecker for use in the tests.
+ */
+#define TEST_SETUP {  \
+    UErrorCode status = U_ZERO_ERROR; \
+    USpoofChecker *sc;     \
+    sc = uspoof_open(&status);  \
+    TEST_CHECK_SUCCESS(status);   \
+    {
+
+#define TEST_TEARDOWN  \
+    }  \
+    TEST_ASSERT_SUCCESS(status);  \
+ bailout: \
+    uspoof_close(sc);  \
+}
+
+
+static void test_assert_string(const char *expected, const UChar *actual, UBool nulTerm, const char *file, int line) {
+     char     buf_inside_macro[120];
+     int32_t  len = (int32_t)strlen(expected);
+     UBool    success;
+     if (nulTerm) {
+         u_austrncpy(buf_inside_macro, (actual), len+1);
+         buf_inside_macro[len+2] = 0;
+         success = (strcmp((expected), buf_inside_macro) == 0);
+     } else {
+         u_austrncpy(buf_inside_macro, (actual), len);
+         buf_inside_macro[len+1] = 0;
+         success = (strncmp((expected), buf_inside_macro, len) == 0);
+     }
+     if (success == FALSE) {
+         log_err("Failure at file %s, line %d, expected \"%s\", got \"%s\"\n",
+             file, line, (expected), buf_inside_macro);
+     }
+}
+
+#define TEST_ASSERT_STRING(expected, actual, nulTerm) test_assert_string(expected, actual, nulTerm, __FILE__, __LINE__)
+
+
+
+static void TestUSpoofCAPI(void);
+
+void addUSpoofTest(TestNode** root);
+
+void addUSpoofTest(TestNode** root)
+{
+    addTest(root, &TestUSpoofCAPI, "uspoof/TestUSpoofCAPI");
+}
+
+
+/*
+ *   Spoof Detction C API Tests
+ */
+static void TestUSpoofCAPI(void) {
+
+    TEST_SETUP
+    const char *dataSrcDir;
+    char       *fileName;
+    char       *confusables;
+    int         confusablesLength;
+    char       *confusablesWholeScript;
+    int         confusablesWholeScriptLength;
+    FILE       *f;
+    UParseError pe;
+    int32_t     errType;
+    USpoofChecker *rsc;
+    
+    dataSrcDir = ctest_dataSrcDir();
+    fileName = malloc(strlen(dataSrcDir) + 100);
+    strcpy(fileName, dataSrcDir);
+    strcat(fileName, U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING "confusables.txt");
+    f = fopen(fileName, "r");
+    TEST_ASSERT_NE(f, NULL);
+    confusables = malloc(3000000);
+    confusablesLength = fread(confusables, 1, 3000000, f);
+    fclose(f);
+
+    
+    strcpy(fileName, dataSrcDir);
+    strcat(fileName, U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING "confusablesWholeScript.txt");
+    f = fopen(fileName, "r");
+    TEST_ASSERT_NE(f, NULL);
+    confusablesWholeScript = malloc(1000000);
+    confusablesWholeScriptLength = fread(confusablesWholeScript, 1, 1000000, f);
+    fclose(f);
+
+    rsc = uspoof_openFromSource(confusables, confusablesLength,
+                                              confusablesWholeScript, confusablesWholeScriptLength,
+                                              &errType, &pe, &status);
+    TEST_ASSERT_SUCCESS(status);
+
+    free(confusablesWholeScript);
+    free(confusables);
+    free(fileName);
+    uspoof_close(rsc);
+    /*  printf("ParseError Line is %d\n", pe.line);  */
+    TEST_TEARDOWN;
+    
+}
+
--- a/icu4c/source/tools/Makefile.in
+++ b/icu4c/source/tools/Makefile.in
@ -1,5 +1,5 @@
 ## Makefile.in for ICU tools
-## Copyright (c) 1999-2008, International Business Machines Corporation and
+## Copyright (c) 1999-2009, International Business Machines Corporation and
 ## others. All Rights Reserved.

 ## Source directory information
@ -15,7 +15,7 @@ subdir = tools

 SUBDIRS = toolutil ctestfw makeconv genrb genuca genbrk genctd \
 gennames genpname gencnval gensprep genccode gencmn icupkg pkgdata \
-gentest genprops gencase genbidi gennorm 
+gentest genprops gencase genbidi gennorm gencfu

 ## List of phony targets
 .PHONY : all all-local all-recursive install install-local	\
--- a/icu4c/source/tools/gencfu/Makefile.in
+++ b/icu4c/source/tools/gencfu/Makefile.in
@ -0,0 +1,96 @@
+## Makefile.in for ICU - tools/gencfu
+## Copyright (c) 2009 International Business Machines Corporation and
+## others. All Rights Reserved.
+
+## Source directory information
+srcdir = @srcdir@
+top_srcdir = @top_srcdir@
+
+top_builddir = ../..
+
+include $(top_builddir)/icudefs.mk
+
+## Build directory information
+subdir = tools/gencfu
+
+TARGET_STUB_NAME = gencfu
+
+SECTION = 1
+
+# MAN_FILES = $(TARGET_STUB_NAME).$(SECTION)
+
+
+## Extra files to remove for 'make clean'
+CLEANFILES = *~ $(DEPS) $(MAN_FILES)
+
+## Target information
+TARGET = $(BINDIR)/$(TARGET_STUB_NAME)$(EXEEXT)
+
+ifneq ($(top_builddir),$(top_srcdir))
+CPPFLAGS += -I$(top_builddir)/common
+endif
+CPPFLAGS += -I$(top_srcdir)/common -I$(top_srcdir)/i18n -I$(srcdir)/../toolutil
+LIBS = $(LIBICUTOOLUTIL) $(LIBICUI18N) $(LIBICUUC) $(DEFAULT_LIBS) $(LIB_M)
+
+OBJECTS = gencfu.o
+
+DEPS = $(OBJECTS:.o=.d)
+
+## List of phony targets
+.PHONY : all all-local install install-local clean clean-local	\
+distclean distclean-local dist dist-local check check-local install-man
+
+## Clear suffix list
+.SUFFIXES :
+
+## List of standard targets
+all: all-local
+install: install-local
+clean: clean-local
+distclean : distclean-local
+dist: dist-local
+check: all check-local
+
+all-local: $(TARGET) $(MAN_FILES)
+
+install-local: all-local install-man
+	$(MKINSTALLDIRS) $(DESTDIR)$(bindir)
+	$(INSTALL) $(TARGET) $(DESTDIR)$(bindir)
+
+install-man: $(MAN_FILES)
+	$(MKINSTALLDIRS) $(DESTDIR)$(mandir)/man$(SECTION)
+	$(INSTALL_DATA) $? $(DESTDIR)$(mandir)/man$(SECTION)
+
+dist-local:
+
+clean-local:
+	test -z "$(CLEANFILES)" || $(RMV) $(CLEANFILES)
+	$(RMV) $(TARGET) $(OBJECTS)
+
+distclean-local: clean-local
+	$(RMV) Makefile
+
+check-local: all-local
+
+Makefile: $(srcdir)/Makefile.in  $(top_builddir)/config.status
+	cd $(top_builddir) \
+	 && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
+
+$(TARGET) : $(OBJECTS)
+	$(LINK.cc) $(OUTOPT)$@ $^ $(LIBS)
+	$(POST_BUILD_STEP)
+
+
+%.$(SECTION): $(srcdir)/%.$(SECTION).in
+	cd $(top_builddir) \
+	 && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
+
+
+ifeq (,$(MAKECMDGOALS))
+-include $(DEPS)
+else
+ifneq ($(patsubst %clean,,$(MAKECMDGOALS)),)
+-include $(DEPS)
+endif
+endif
+
--- a/icu4c/source/tools/gencfu/gencfu.cpp
+++ b/icu4c/source/tools/gencfu/gencfu.cpp
@ -0,0 +1,326 @@
+/*
+**********************************************************************
+*   Copyright (C) 2009, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+**********************************************************************
+*
+* File gencfu.c
+*/
+
+//--------------------------------------------------------------------
+//
+//   Tool for generating Unicode Confusable data files (.cfu files).
+//   .cfu files contain the compiled of the confusable data
+//   derived from the Unicode Consortium data described in
+//   Unicode UAX 39.
+//
+//   Usage:  gencfu [options] -r confusables-file.txt -w whole-script-confusables.txt  -o output-file.cfu
+//
+//       options:   -v         verbose
+//                  -? or -h   help
+//
+//   The input rule filew is are plain text files containing confusable character
+//    definitions in the input format defined by Unicode UAX39 for the files
+//    confusables.txt and confusablesWholeScript.txt.  This source (.txt) format
+//    is also accepted direaccepted by ICU spoof detedtors.  The
+//    files must be encoded in utf-8 format, with or without a BOM.
+//
+//--------------------------------------------------------------------
+
+#include "unicode/utypes.h"
+#include "unicode/unistr.h"
+#include "unicode/uclean.h"
+#include "unicode/udata.h"
+#include "unicode/putil.h"
+
+#include "uoptions.h"
+#include "unewdata.h"
+#include "ucmndata.h"
+#include "uspoof_impl.h"
+#include "cmemory.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+U_NAMESPACE_USE
+
+static char *progName;
+static UOption options[]={
+    UOPTION_HELP_H,             /* 0 */
+    UOPTION_HELP_QUESTION_MARK, /* 1 */
+    UOPTION_VERBOSE,            /* 2 */
+    { "rules", NULL, NULL, NULL, 'r', UOPT_REQUIRES_ARG, 0 },   /* 3 */
+    { "wsrules", NULL, NULL, NULL, 'w', UOPT_REQUIRES_ARG, 0},  /* 4 */
+    { "out",   NULL, NULL, NULL, 'o', UOPT_REQUIRES_ARG, 0 },   /* 5 */
+    UOPTION_ICUDATADIR,         /* 6 */
+    UOPTION_DESTDIR,            /* 7 */
+    UOPTION_COPYRIGHT,          /* 8 */
+};
+
+void usageAndDie(int retCode) {
+        printf("Usage: %s [-v] [-options] -r confusablesRules.txt -w wholeScriptConfusables.txt -o output-file\n", progName);
+        printf("\tRead in Unicode confusable character definitions and write out the binary data\n"
+            "options:\n"
+            "\t-h or -? or --help  this usage text\n"
+            "\t-V or --version     show a version message\n"
+            "\t-c or --copyright   include a copyright notice\n"
+            "\t-v or --verbose     turn on verbose output\n"
+            "\t-i or --icudatadir  directory for locating any needed intermediate data files,\n"
+            "\t                    followed by path, defaults to %s\n"
+            "\t-d or --destdir     destination directory, followed by the path\n",
+            u_getDataDirectory());
+        exit (retCode);
+}
+
+
+#if UCONFIG_NO_BREAK_ITERATION
+
+/* dummy UDataInfo cf. udata.h */
+static UDataInfo dummyDataInfo = {
+    sizeof(UDataInfo),
+    0,
+
+    U_IS_BIG_ENDIAN,
+    U_CHARSET_FAMILY,
+    U_SIZEOF_UCHAR,
+    0,
+
+    { 0, 0, 0, 0 },                 /* dummy dataFormat */
+    { 0, 0, 0, 0 },                 /* dummy formatVersion */
+    { 0, 0, 0, 0 }                  /* dummy dataVersion */
+};
+
+#else
+
+//
+//  Set up the ICU data header, defined in ucmndata.h
+//
+DataHeader dh ={
+    {sizeof(DataHeader),           // Struct MappedData
+        0xda,
+        0x27},
+
+    {                               // struct UDataInfo
+        sizeof(UDataInfo),          //     size
+        0,                          //     reserved
+        U_IS_BIG_ENDIAN,
+        U_CHARSET_FAMILY,
+        U_SIZEOF_UCHAR,
+        0,                          //     reserved
+
+    { 0x43, 0x66, 0x75, 0x20 },     //     dataFormat="Cfu "
+    { 0xff, 0, 0, 0 },              //     formatVersion.  Filled in later with values
+                                    //      from the  builder.  The  values declared
+                                    //      here should never appear in any real data.
+        { 5, 1, 0, 0 }              //   dataVersion (Unicode version)
+    }};
+
+#endif
+
+// Forward declaration for function for reading source files.
+static const char *readFile(const char *fileName, int32_t *len);
+
+//----------------------------------------------------------------------------
+//
+//  main      for gencfu
+//
+//----------------------------------------------------------------------------
+int  main(int argc, char **argv) {
+    UErrorCode  status = U_ZERO_ERROR;
+    const char *confFileName;
+    const char *confWSFileName;
+    const char *outFileName;
+    const char *outDir = NULL;
+    const char *copyright = NULL;
+
+    //
+    // Pick up and check the command line arguments,
+    //    using the standard ICU tool utils option handling.
+    //
+    U_MAIN_INIT_ARGS(argc, argv);
+    progName = argv[0];
+    argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);
+    if(argc<0) {
+        // Unrecognized option
+        fprintf(stderr, "error in command line argument \"%s\"\n", argv[-argc]);
+        usageAndDie(U_ILLEGAL_ARGUMENT_ERROR);
+    }
+
+    if(options[0].doesOccur || options[1].doesOccur) {
+        //  -? or -h for help.
+        usageAndDie(0);
+    }
+
+    if (!(options[3].doesOccur && options[4].doesOccur && options[5].doesOccur)) {
+        fprintf(stderr, "confusables file, whole script confusables file and output file must all be specified.\n");
+        usageAndDie(U_ILLEGAL_ARGUMENT_ERROR);
+    }
+    confFileName   = options[3].value;
+    confWSFileName = options[4].value;
+    outFileName    = options[5].value;
+
+    if (options[6].doesOccur) {
+        u_setDataDirectory(options[6].value);
+    }
+
+    /* Initialize ICU */
+    u_init(&status);
+    if (U_FAILURE(status)) {
+        fprintf(stderr, "%s: can not initialize ICU.  status = %s\n",
+            argv[0], u_errorName(status));
+        exit(1);
+    }
+    status = U_ZERO_ERROR;
+
+    /* Combine the directory with the file name */
+    if(options[7].doesOccur) {
+        outDir = options[7].value;
+    }
+    if (options[8].doesOccur) {
+        copyright = U_COPYRIGHT_STRING;
+    }
+
+#if UCONFIG_NO_SPOOF_DETECTION
+    // TOOD:  implement  UCONFIG_NO_SPOOF_DETECTION in uconfig.h, or decide we don't want it and take this out.
+
+    UNewDataMemory *pData;
+    char msg[1024];
+
+    /* write message with just the name */
+    sprintf(msg, "gencfu writes dummy %s because of UCONFIG_NO_SPOOF_DETECTION, see uconfig.h", outFileName);
+    fprintf(stderr, "%s\n", msg);
+
+    /* write the dummy data file */
+    pData = udata_create(outDir, NULL, outFileName, &dummyDataInfo, NULL, &status);
+    udata_writeBlock(pData, msg, strlen(msg));
+    udata_finish(pData, &status);
+    return (int)status;
+
+#else
+
+    //  Read in the confusables source file
+
+    int32_t      confusablesLen = 0;
+    const char  *confusables = readFile(confFileName, &confusablesLen);
+    if (confusables == NULL) {
+        printf("gencfu: error reading file  \"%s\"\n", confFileName);
+        exit(-1);
+    }
+
+    int32_t     wsConfusablesLen = 0;
+    const char *wsConfsables =  readFile(confWSFileName, &wsConfusablesLen);
+    if (wsConfsables == NULL) {
+        printf("gencfu: error reading file  \"%s\"\n", confFileName);
+        exit(-1);
+    }
+
+    //
+    //  Create the Spoof Detector from the source confusables files.
+    //     This will compile the data.
+    //
+    UParseError parseError;
+    parseError.line = 0;
+    parseError.offset = 0;
+    int32_t errType;
+    USpoofChecker *sc = uspoof_openFromSource(confusables, confusablesLen,
+                                              wsConfsables, wsConfusablesLen,
+                                              &errType, &parseError, &status);
+    if (U_FAILURE(status)) {
+        const char *errFile = 
+            (errType == USPOOF_WHOLE_SCRIPT_CONFUSABLE)? confWSFileName : confFileName;
+        fprintf(stderr, "gencfu: uspoof_openFromSource error \"%s\"  at file %s, line %d, column %d\n",
+                u_errorName(status), errFile, (int)parseError.line, (int)parseError.offset);
+        exit(status);
+    };
+
+
+    //
+    //  Get the compiled rule data from the USpoofChecker.
+    //
+    uint32_t        outDataSize;
+    uint8_t        *outData;
+    outDataSize = uspoof_serialize(sc, NULL, 0, &status);
+    if (status != U_BUFFER_OVERFLOW_ERROR) {
+        fprintf(stderr, "gencfu: uspoof_serialize() returned %s\n", u_errorName(status));
+        exit(status);
+    }
+    status = U_ZERO_ERROR;
+    outData = new uint8_t[outDataSize];
+    uspoof_serialize(sc, outData, outDataSize, &status);
+
+    // Copy the data format version numbers from the spoof data header into the UDataMemory header.
+    
+    uprv_memcpy(dh.info.formatVersion, 
+                reinterpret_cast<SpoofDataHeader *>(outData)->fFormatVersion,
+                sizeof(dh.info.formatVersion));
+
+    //
+    //  Create the output file
+    //
+    size_t bytesWritten;
+    UNewDataMemory *pData;
+    pData = udata_create(outDir, NULL, outFileName, &(dh.info), copyright, &status);
+    if(U_FAILURE(status)) {
+        fprintf(stderr, "gencfu: Could not open output file \"%s\", \"%s\"\n", 
+                         outFileName, u_errorName(status));
+        exit(status);
+    }
+
+
+    //  Write the data itself.
+    udata_writeBlock(pData, outData, outDataSize);
+    // finish up 
+    bytesWritten = udata_finish(pData, &status);
+    if(U_FAILURE(status)) {
+        fprintf(stderr, "gencfu: Error %d writing the output file\n", status);
+        exit(status);
+    }
+    
+    if (bytesWritten != outDataSize) {
+        fprintf(stderr, "gencfu: Error writing to output file \"%s\"\n", outFileName);
+        exit(-1);
+    }
+
+    uspoof_close(sc);
+    delete outData;
+    delete confusables;
+    delete wsConfsables;
+    u_cleanup();
+    printf("gencfu: tool completed successfully.\n");
+    return 0;
+
+#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
+}
+
+
+ //
+ //  Read in a confusables source file
+ //
+ static const char *readFile(const char *fileName, int32_t *len) {
+    char       *result;
+    long        fileSize;
+    FILE        *file;
+
+    file = fopen(fileName, "rb");
+    if( file == 0 ) {
+        return NULL;
+    }
+    fseek(file, 0, SEEK_END);
+    fileSize = ftell(file);
+    fseek(file, 0, SEEK_SET);
+    result = new char[fileSize+10];
+    if (result==NULL) {
+        return result;
+    }
+
+    long t = fread(result, 1, fileSize, file);
+    if (t != fileSize)  {
+        delete result;
+        return NULL;
+    }
+    result[fileSize]=0;
+    *len = static_cast<int32_t>(fileSize);
+    fclose(file);
+    return result;
+ }
--- a/icu4c/source/tools/gencfu/gencfu.vcproj
+++ b/icu4c/source/tools/gencfu/gencfu.vcproj
@ -0,0 +1,404 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<VisualStudioProject
+	ProjectType="Visual C++"
+	Version="9.00"
+	Name="gencfu"
+	ProjectGUID="{691EE0C0-DC57-4A48-8AEE-8ED75EB3A057}"
+	Keyword="Win32Proj"
+	TargetFrameworkVersion="0"
+	>
+	<Platforms>
+		<Platform
+			Name="Win32"
+		/>
+	</Platforms>
+	<ToolFiles>
+	</ToolFiles>
+	<Configurations>
+		<Configuration
+			Name="Debug|Win32"
+			OutputDirectory="Debug"
+			IntermediateDirectory="Debug"
+			ConfigurationType="1"
+			InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
+			CharacterSet="2"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+				CommandLine="copy &quot;$(TargetPath)&quot; ..\..\..\bin"
+				Outputs="..\..\..\bin\$(TargetFileName)"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				Optimization="0"
+				AdditionalIncludeDirectories="..\..\common;..\..\i18n;..\toolutil"
+				PreprocessorDefinitions="WIN32;_DEBUG;_CRT_SECURE_NO_DEPRECATE"
+				MinimalRebuild="false"
+				BasicRuntimeChecks="3"
+				RuntimeLibrary="3"
+				BufferSecurityCheck="true"
+				DisableLanguageExtensions="true"
+				UsePrecompiledHeader="0"
+				AssemblerListingLocation=".\x86\Debug/"
+				ObjectFile=".\x86\Debug/"
+				ProgramDataBaseFileName=".\x86\Debug/"
+				BrowseInformation="1"
+				WarningLevel="3"
+				Detect64BitPortabilityProblems="true"
+				DebugInformationFormat="4"
+				CompileAs="0"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLinkerTool"
+				OutputFile=".\x86\Debug\gencfu.exe"
+				LinkIncremental="2"
+				SuppressStartupBanner="true"
+				GenerateDebugInformation="true"
+				SubSystem="1"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCManifestTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCAppVerifierTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Release|Win32"
+			OutputDirectory="Release"
+			IntermediateDirectory="Release"
+			ConfigurationType="1"
+			InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
+			UseOfMFC="0"
+			ATLMinimizesCRunTimeLibraryUsage="false"
+			CharacterSet="2"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+				CommandLine="copy &quot;$(TargetPath)&quot; ..\..\..\bin"
+				Outputs="..\..\..\bin\$(TargetFileName)"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				AdditionalIncludeDirectories="..\..\common;..\..\i18n;..\toolutil"
+				PreprocessorDefinitions="WIN32;NDEBUG;_CRT_SECURE_NO_DEPRECATE;"
+				StringPooling="true"
+				MinimalRebuild="false"
+				RuntimeLibrary="2"
+				EnableFunctionLevelLinking="true"
+				DisableLanguageExtensions="true"
+				TreatWChar_tAsBuiltInType="true"
+				UsePrecompiledHeader="0"
+				AssemblerListingLocation=".\x86\Release/"
+				ObjectFile=".\x86\Release/"
+				ProgramDataBaseFileName=".\x86\Release/"
+				WarningLevel="3"
+				Detect64BitPortabilityProblems="true"
+				DebugInformationFormat="3"
+				CompileAs="0"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLinkerTool"
+				OutputFile=".\x86\Release\gencfu.exe"
+				LinkIncremental="1"
+				GenerateDebugInformation="true"
+				SubSystem="1"
+				RandomizedBaseAddress="1"
+				DataExecutionPrevention="0"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCManifestTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCAppVerifierTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+				<Configuration
+			Name="Release|x64"
+			OutputDirectory=".\x64\Release"
+			IntermediateDirectory=".\x64\Release"
+			ConfigurationType="1"
+			InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
+			UseOfMFC="0"
+			ATLMinimizesCRunTimeLibraryUsage="false"
+			CharacterSet="2"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+				CommandLine="copy &quot;$(TargetPath)&quot; ..\..\..\bin64&#x0D;&#x0A;"
+				Outputs="..\..\..\bin64\$(TargetFileName)"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+				TargetEnvironment="3"
+				TypeLibraryName=".\x64\Release/genbrk.tlb"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				AdditionalIncludeDirectories="..\..\common;..\toolutil"
+				PreprocessorDefinitions="WIN64;WIN32;NDEBUG;_CRT_SECURE_NO_DEPRECATE"
+				StringPooling="true"
+				RuntimeLibrary="2"
+				EnableFunctionLevelLinking="true"
+				DisableLanguageExtensions="true"
+				TreatWChar_tAsBuiltInType="true"
+				PrecompiledHeaderFile=".\x64\Release/genbrk.pch"
+				AssemblerListingLocation=".\x64\Release/"
+				ObjectFile=".\x64\Release/"
+				ProgramDataBaseFileName=".\x64\Release/"
+				WarningLevel="3"
+				SuppressStartupBanner="true"
+				CompileAs="0"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+				PreprocessorDefinitions="NDEBUG"
+				Culture="1033"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLinkerTool"
+				OutputFile=".\x64\Release/genbrk.exe"
+				LinkIncremental="1"
+				SuppressStartupBanner="true"
+				ProgramDatabaseFile=".\x64\Release/genbrk.pdb"
+				SubSystem="1"
+				TargetMachine="17"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCManifestTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCAppVerifierTool"
+			/>
+			<Tool
+				Name="VCWebDeploymentTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Debug|x64"
+			OutputDirectory=".\x64\Debug"
+			IntermediateDirectory=".\x64\Debug"
+			ConfigurationType="1"
+			InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
+			UseOfMFC="0"
+			ATLMinimizesCRunTimeLibraryUsage="false"
+			CharacterSet="2"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+				CommandLine="copy &quot;$(TargetPath)&quot; ..\..\..\bin64&#x0D;&#x0A;"
+				Outputs="..\..\..\bin64\$(TargetFileName)"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+				TargetEnvironment="3"
+				TypeLibraryName=".\x64\Debug/gencfu.tlb"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				Optimization="0"
+				AdditionalIncludeDirectories="..\..\common;..\toolutil"
+				PreprocessorDefinitions="WIN64;WIN32;_DEBUG;_CRT_SECURE_NO_DEPRECATE"
+				BasicRuntimeChecks="3"
+				RuntimeLibrary="3"
+				BufferSecurityCheck="true"
+				DisableLanguageExtensions="true"
+				TreatWChar_tAsBuiltInType="true"
+				PrecompiledHeaderFile=".\x64\Debug/gencfu.pch"
+				AssemblerListingLocation=".\x64\Debug/"
+				ObjectFile=".\x64\Debug/"
+				ProgramDataBaseFileName=".\x64\Debug/"
+				BrowseInformation="1"
+				WarningLevel="3"
+				SuppressStartupBanner="true"
+				DebugInformationFormat="3"
+				CompileAs="0"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+				PreprocessorDefinitions="_DEBUG"
+				Culture="1033"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLinkerTool"
+				OutputFile=".\x64\Debug/gencfu.exe"
+				LinkIncremental="2"
+				SuppressStartupBanner="true"
+				GenerateDebugInformation="true"
+				ProgramDatabaseFile=".\x64\Debug/gencfu.pdb"
+				SubSystem="1"
+				TargetMachine="17"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCManifestTool"
+				UseFAT32Workaround="true"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCAppVerifierTool"
+			/>
+			<Tool
+				Name="VCWebDeploymentTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+	</Configurations>
+	<References>
+	</References>
+	<Files>
+		<Filter
+			Name="Header Files"
+			Filter="h;hpp;hxx;hm;inl;inc;xsd"
+			UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
+			>
+		</Filter>
+		<Filter
+			Name="Resource Files"
+			Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx"
+			UniqueIdentifier="{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}"
+			>
+		</Filter>
+		<Filter
+			Name="Source Files"
+			Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
+			UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
+			>
+			<File
+				RelativePath=".\gencfu.cpp"
+				>
+			</File>
+		</Filter>
+	</Files>
+	<Globals>
+	</Globals>
+</VisualStudioProject>
--- a/icu4c/source/tools/toolutil/swapimpl.cpp
+++ b/icu4c/source/tools/toolutil/swapimpl.cpp
@ -1,7 +1,7 @@
 /*
 *******************************************************************************
 *
-*   Copyright (C) 2005-2008, International Business Machines
+*   Copyright (C) 2005-2009, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 *******************************************************************************
@ -51,6 +51,8 @@

 /* swapping implementations in i18n */

+#include "uspoof_impl.h"
+
 /* definitions */

 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
@ -556,7 +558,9 @@ static const struct {
    { { 0x54, 0x72, 0x44, 0x63 }, triedict_swap },      /* dataFormat="TrDc " */
 #endif
    { { 0x70, 0x6e, 0x61, 0x6d }, upname_swap },        /* dataFormat="pnam" */
-    { { 0x75, 0x6e, 0x61, 0x6d }, uchar_swapNames }     /* dataFormat="unam" */
+    { { 0x75, 0x6e, 0x61, 0x6d }, uchar_swapNames },    /* dataFormat="unam" */
+
+    { { 0x43, 0x66, 0x75, 0x20 }, uspoof_swap }         /* dataFormat="Cfu " */
 };

 U_CAPI int32_t U_EXPORT2
--- a/icu4c/source/tools/toolutil/toolutil.vcproj
+++ b/icu4c/source/tools/toolutil/toolutil.vcproj
@ -50,7 +50,7 @@
 			<Tool
 				Name="VCCLCompilerTool"
 				WholeProgramOptimization="true"
-				AdditionalIncludeDirectories="..\..\..\include,..\..\common"
+				AdditionalIncludeDirectories="..\..\..\include,..\..\common,..\..\i18n"
 				PreprocessorDefinitions="WIN32;NDEBUG;_CRT_SECURE_NO_DEPRECATE;U_TOOLUTIL_IMPLEMENTATION"
 				StringPooling="true"
 				RuntimeLibrary="2"
@ -145,7 +145,7 @@
 			<Tool
 				Name="VCCLCompilerTool"
 				Optimization="0"
-				AdditionalIncludeDirectories="..\..\..\include,..\..\common"
+				AdditionalIncludeDirectories="..\..\..\include,..\..\common,..\..\i18n"
 				PreprocessorDefinitions="WIN32;_DEBUG;_CRT_SECURE_NO_DEPRECATE;U_TOOLUTIL_IMPLEMENTATION"
 				BasicRuntimeChecks="3"
 				RuntimeLibrary="3"