ICU-5585 fast, freezable UnicodeSet with span() with string support; svn merge -r 21018:22033 http://source.icu-project.org/repos/icu/icu/branches/markus/fastset and revert source/test/perf/unisetperf/draft/* and source/common/utrie.* and source/allinone/allinone.sln (experimental code)

X-SVN-Rev: 22053
2007-07-19 04:33:20 +00:00 · 2007-07-19 04:33:20 +00:00 · 19446aeeba
commit 19446aeeba
parent 4d282a7e02
32 changed files with 6562 additions and 231 deletions
--- a/.gitignore
+++ b/.gitignore
@ -47,7 +47,9 @@ icu4c/source/config/Makefile.inc
 icu4c/source/config/icu-config
 icu4c/source/config/icu-config.1
 icu4c/source/data/*.plg
+icu4c/source/data/Debug
 icu4c/source/data/Makefile
+icu4c/source/data/Release
 icu4c/source/data/icupkg.inc
 icu4c/source/data/in
 icu4c/source/data/makedata.vcproj.*.*.user
@ -305,6 +307,15 @@ icu4c/source/test/perf/ubrkperf/debug
 icu4c/source/test/perf/ubrkperf/release
 icu4c/source/test/perf/ubrkperf/ubrkperf
 icu4c/source/test/perf/ubrkperf/ubrkperf.vcproj.*.*.user
+icu4c/source/test/perf/unisetperf/*.d
+icu4c/source/test/perf/unisetperf/*.o
+icu4c/source/test/perf/unisetperf/Debug
+icu4c/source/test/perf/unisetperf/Makefile
+icu4c/source/test/perf/unisetperf/Release
+icu4c/source/test/perf/unisetperf/debug
+icu4c/source/test/perf/unisetperf/release
+icu4c/source/test/perf/unisetperf/unisetperf
+icu4c/source/test/perf/unisetperf/unisetperf.vcproj.*.*.user
 icu4c/source/test/perf/usetperf/*.d
 icu4c/source/test/perf/usetperf/*.o
 icu4c/source/test/perf/usetperf/Debug
--- a/icu4c/source/common/Makefile.in
+++ b/icu4c/source/common/Makefile.in
@ -80,7 +80,7 @@ utf_impl.o ustring.o ustrcase.o ucasemap.o cstring.o ustrfmt.o ustrtrns.o ustr_w
 normlzr.o unorm.o unormcmp.o unorm_it.o chariter.o schriter.o uchriter.o uiter.o \
 uchar.o uprops.o ucase.o propname.o ubidi_props.o ubidi.o ubidiwrt.o ubidiln.o ushape.o \
 uscript.o usc_impl.o unames.o \
-utrie.o uset_props.o uniset_props.o uset.o uniset.o usetiter.o ruleiter.o caniter.o unifilt.o unifunct.o \
+utrie.o bmpset.o unisetspan.o uset_props.o uniset_props.o uset.o uniset.o usetiter.o ruleiter.o caniter.o unifilt.o unifunct.o \
 uarrsort.o brkiter.o ubrk.o brkeng.o dictbe.o triedict.o \
 rbbi.o rbbidata.o rbbinode.o rbbirb.o rbbiscan.o rbbisetb.o rbbistbl.o rbbitblb.o \
 serv.o servnotf.o servls.o servlk.o servlkf.o servrbf.o servslkf.o \
--- a/icu4c/source/common/bmpset.cpp
+++ b/icu4c/source/common/bmpset.cpp
@ -0,0 +1,714 @@
+/*
+******************************************************************************
+*
+*   Copyright (C) 2007, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+******************************************************************************
+*   file name:  bmpset.cpp
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2007jan29
+*   created by: Markus W. Scherer
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/uniset.h"
+#include "cmemory.h"
+#include "bmpset.h"
+
+U_NAMESPACE_BEGIN
+
+BMPSet::BMPSet(const int32_t *parentList, int32_t parentListLength) :
+        list(parentList), listLength(parentListLength) {
+    uprv_memset(asciiBytes, 0, sizeof(asciiBytes));
+    uprv_memset(table7FF, 0, sizeof(table7FF));
+    uprv_memset(bmpBlockBits, 0, sizeof(bmpBlockBits));
+
+    /*
+     * Set the list indexes for binary searches for
+     * U+0800, U+1000, U+2000, .., U+F000, U+10000.
+     * U+0800 is the first 3-byte-UTF-8 code point. Lower code points are
+     * looked up in the bit tables.
+     * The last pair of indexes is for finding supplementary code points.
+     */
+    list4kStarts[0]=findCodePoint(0x800, 0, listLength-1);
+    int32_t i;
+    for(i=1; i<=0x10; ++i) {
+        list4kStarts[i]=findCodePoint(i<<12, list4kStarts[i-1], listLength-1);
+    }
+    list4kStarts[0x11]=listLength-1;
+
+    initBits();
+    overrideIllegal();
+}
+
+BMPSet::BMPSet(const BMPSet &otherBMPSet, const int32_t *newParentList, int32_t newParentListLength) :
+        list(newParentList), listLength(newParentListLength) {
+    uprv_memcpy(asciiBytes, otherBMPSet.asciiBytes, sizeof(asciiBytes));
+    uprv_memcpy(table7FF, otherBMPSet.table7FF, sizeof(table7FF));
+    uprv_memcpy(bmpBlockBits, otherBMPSet.bmpBlockBits, sizeof(bmpBlockBits));
+    uprv_memcpy(list4kStarts, otherBMPSet.list4kStarts, sizeof(list4kStarts));
+}
+
+/*
+ * Set bits in a bit rectangle in "vertical" bit organization.
+ * start<limit<=0x800
+ */
+static void set32x64Bits(uint32_t table[64], int32_t start, int32_t limit) {
+    int32_t lead=start>>6;
+    int32_t trail=start&0x3f;
+
+    // Set one bit indicating an all-one block.
+    uint32_t bits=(uint32_t)1<<lead;
+    if((start+1)==limit) {  // Single-character shortcut.
+        table[trail]|=bits;
+        return;
+    }
+
+    int32_t limitLead=limit>>6;
+    int32_t limitTrail=limit&0x3f;
+
+    if(lead==limitLead) {
+        // Partial vertical bit column.
+        while(trail<limitTrail) {
+            table[trail++]|=bits;
+        }
+    } else {
+        // Partial vertical bit column,
+        // followed by a bit rectangle,
+        // followed by another partial vertical bit column.
+        if(trail>0) {
+            do {
+                table[trail++]|=bits;
+            } while(trail<64);
+            ++lead;
+        }
+        if(lead<limitLead) {
+            bits=~((1<<lead)-1);
+            if(limitLead<0x20) {
+                bits&=(1<<limitLead)-1;
+            }
+            for(trail=0; trail<64; ++trail) {
+                table[trail]|=bits;
+            }
+        }
+        bits=1<<limitLead;
+        for(trail=0; trail<limitTrail; ++trail) {
+            table[trail]|=bits;
+        }
+    }
+}
+
+void BMPSet::initBits() {
+    UChar32 start, limit;
+    int32_t listIndex=0;
+
+    // Set asciiBytes[].
+    do {
+        start=list[listIndex++];
+        if(listIndex<listLength) {
+            limit=list[listIndex++];
+        }
+        if(start>=0x80) {
+            break;
+        }
+        do {
+            asciiBytes[start++]=1;
+        } while(start<limit && start<0x80);
+    } while(limit<=0x80);
+
+    // Set table7FF[].
+    while(start<0x800) {
+        set32x64Bits(table7FF, start, limit<=0x800 ? limit : 0x800);
+        if(limit>0x800) {
+            start=0x800;
+            break;
+        }
+
+        start=list[listIndex++];
+        if(listIndex<listLength) {
+            limit=list[listIndex++];
+        }
+    }
+
+    // Set bmpBlockBits[].
+    int32_t minStart=0x800;
+    while(start<0x10000) {
+        if(limit>0x10000) {
+            limit=0x10000;
+        }
+
+        if(start<minStart) {
+            start=minStart;
+        }
+        if(start<limit) {  // Else: Another range entirely in a known mixed-value block.
+            if(start&0x3f) {
+                // Mixed-value block of 64 code points.
+                start>>=6;
+                bmpBlockBits[start&0x3f]|=0x10001<<(start>>6);
+                start=(start+1)<<6;  // Round up to the next block boundary.
+                minStart=start;      // Ignore further ranges in this block.
+            }
+            if(start<limit) {
+                if(start<(limit&~0x3f)) {
+                    // Multiple all-ones blocks of 64 code points each.
+                    set32x64Bits(bmpBlockBits, start>>6, limit>>6);
+                }
+
+                if(limit&0x3f) {
+                    // Mixed-value block of 64 code points.
+                    limit>>=6;
+                    bmpBlockBits[limit&0x3f]|=0x10001<<(limit>>6);
+                    limit=(limit+1)<<6;  // Round up to the next block boundary.
+                    minStart=limit;      // Ignore further ranges in this block.
+                }
+            }
+        }
+
+        if(limit==0x10000) {
+            break;
+        }
+
+        start=list[listIndex++];
+        if(listIndex<listLength) {
+            limit=list[listIndex++];
+        }
+    }
+}
+
+/*
+ * Override some bits and bytes to the result of contains(FFFD)
+ * for faster validity checking at runtime.
+ * No need to set 0 values where they were reset to 0 in the constructor
+ * and not modified by initBits().
+ * (asciiBytes[] trail bytes, table7FF[] 0..7F, bmpBlockBits[] 0..7FF)
+ * Need to set 0 values for surrogates D800..DFFF.
+ */
+void BMPSet::overrideIllegal() {
+    uint32_t bits, mask;
+    int32_t i;
+
+    if(containsSlow(0xfffd, list4kStarts[0xf], list4kStarts[0x10])) {
+        // contains(FFFD)==TRUE
+        for(i=0x80; i<0xc0; ++i) {
+            asciiBytes[i]=1;
+        }
+
+        bits=3;                 // Lead bytes 0xC0 and 0xC1.
+        for(i=0; i<64; ++i) {
+            table7FF[i]|=bits;
+        }
+
+        bits=1;                 // Lead byte 0xE0.
+        for(i=0; i<32; ++i) {   // First half of 4k block.
+            bmpBlockBits[i]|=bits;
+        }
+
+        mask=~(0x10001<<0xd);   // Lead byte 0xED.
+        bits=1<<0xd;
+        for(i=32; i<64; ++i) {  // Second half of 4k block.
+            bmpBlockBits[i]=(bmpBlockBits[i]&mask)|bits;
+        }
+    } else {
+        // contains(FFFD)==FALSE
+        mask=~(0x10001<<0xd);   // Lead byte 0xED.
+        for(i=32; i<64; ++i) {  // Second half of 4k block.
+            bmpBlockBits[i]&=mask;
+        }
+    }
+}
+
+int32_t BMPSet::findCodePoint(UChar32 c, int32_t lo, int32_t hi) const {
+    /* Examples:
+                                       findCodePoint(c)
+       set              list[]         c=0 1 3 4 7 8
+       ===              ==============   ===========
+       []               [110000]         0 0 0 0 0 0
+       [\u0000-\u0003]  [0, 4, 110000]   1 1 1 2 2 2
+       [\u0004-\u0007]  [4, 8, 110000]   0 0 0 1 1 2
+       [:Any:]          [0, 110000]      1 1 1 1 1 1
+     */
+
+    // Return the smallest i such that c < list[i].  Assume
+    // list[len - 1] == HIGH and that c is legal (0..HIGH-1).
+    if (c < list[lo])
+        return lo;
+    // High runner test.  c is often after the last range, so an
+    // initial check for this condition pays off.
+    if (lo >= hi || c >= list[hi-1])
+        return hi;
+    // invariant: c >= list[lo]
+    // invariant: c < list[hi]
+    for (;;) {
+        int32_t i = (lo + hi) >> 1;
+        if (i == lo) {
+            break; // Found!
+        } else if (c < list[i]) {
+            hi = i;
+        } else {
+            lo = i;
+        }
+    }
+    return hi;
+}
+
+UBool
+BMPSet::contains(UChar32 c) const {
+    if((uint32_t)c<=0x7f) {
+        return (UBool)asciiBytes[c];
+    } else if((uint32_t)c<=0x7ff) {
+        return (UBool)((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0);
+    } else if((uint32_t)c<0xd800 || (c>=0xe000 && c<=0xffff)) {
+        int lead=c>>12;
+        uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
+        if(twoBits<=1) {
+            // All 64 code points with the same bits 15..6
+            // are either in the set or not.
+            return (UBool)twoBits;
+        } else {
+            // Look up the code point in its 4k block of code points.
+            return containsSlow(c, list4kStarts[lead], list4kStarts[lead+1]);
+        }
+    } else if((uint32_t)c<=0x10ffff) {
+        // surrogate or supplementary code point
+        return containsSlow(c, list4kStarts[0xd], list4kStarts[0x11]);
+    } else {
+        // Out-of-range code points get FALSE, consistent with long-standing
+        // behavior of UnicodeSet::contains(c).
+        return FALSE;
+    }
+}
+
+/*
+ * Check for sufficient length for trail unit for each surrogate pair.
+ * Handle single surrogates as surrogate code points as usual in ICU.
+ */
+const UChar *
+BMPSet::span(const UChar *s, const UChar *limit, USetSpanCondition spanCondition) const {
+    UChar c, c2;
+
+    if(spanCondition) {
+        // span
+        do {
+            c=*s;
+            if(c<=0x7f) {
+                if(!asciiBytes[c]) {
+                    break;
+                }
+            } else if(c<=0x7ff) {
+                if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))==0) {
+                    break;
+                }
+            } else if(c<0xd800 || c>=0xe000) {
+                int lead=c>>12;
+                uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
+                if(twoBits<=1) {
+                    // All 64 code points with the same bits 15..6
+                    // are either in the set or not.
+                    if(twoBits==0) {
+                        break;
+                    }
+                } else {
+                    // Look up the code point in its 4k block of code points.
+                    if(!containsSlow(c, list4kStarts[lead], list4kStarts[lead+1])) {
+                        break;
+                    }
+                }
+            } else if(c>=0xdc00 || (s+1)==limit || (c2=s[1])<0xdc00 || c2>=0xe000) {
+                // surrogate code point
+                if(!containsSlow(c, list4kStarts[0xd], list4kStarts[0xe])) {
+                    break;
+                }
+            } else {
+                // surrogate pair
+                if(!containsSlow(U16_GET_SUPPLEMENTARY(c, c2), list4kStarts[0x10], list4kStarts[0x11])) {
+                    break;
+                }
+                ++s;
+            }
+        } while(++s<limit);
+    } else {
+        // span not
+        do {
+            c=*s;
+            if(c<=0x7f) {
+                if(asciiBytes[c]) {
+                    break;
+                }
+            } else if(c<=0x7ff) {
+                if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0) {
+                    break;
+                }
+            } else if(c<0xd800 || c>=0xe000) {
+                int lead=c>>12;
+                uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
+                if(twoBits<=1) {
+                    // All 64 code points with the same bits 15..6
+                    // are either in the set or not.
+                    if(twoBits!=0) {
+                        break;
+                    }
+                } else {
+                    // Look up the code point in its 4k block of code points.
+                    if(containsSlow(c, list4kStarts[lead], list4kStarts[lead+1])) {
+                        break;
+                    }
+                }
+            } else if(c>=0xdc00 || (s+1)==limit || (c2=s[1])<0xdc00 || c2>=0xe000) {
+                // surrogate code point
+                if(containsSlow(c, list4kStarts[0xd], list4kStarts[0xe])) {
+                    break;
+                }
+            } else {
+                // surrogate pair
+                if(containsSlow(U16_GET_SUPPLEMENTARY(c, c2), list4kStarts[0x10], list4kStarts[0x11])) {
+                    break;
+                }
+                ++s;
+            }
+        } while(++s<limit);
+    }
+    return s;
+}
+
+/* Symmetrical with span(). */
+const UChar *
+BMPSet::spanBack(const UChar *s, const UChar *limit, USetSpanCondition spanCondition) const {
+    UChar c, c2;
+
+    if(spanCondition) {
+        // span
+        for(;;) {
+            c=*(--limit);
+            if(c<=0x7f) {
+                if(!asciiBytes[c]) {
+                    break;
+                }
+            } else if(c<=0x7ff) {
+                if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))==0) {
+                    break;
+                }
+            } else if(c<0xd800 || c>=0xe000) {
+                int lead=c>>12;
+                uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
+                if(twoBits<=1) {
+                    // All 64 code points with the same bits 15..6
+                    // are either in the set or not.
+                    if(twoBits==0) {
+                        break;
+                    }
+                } else {
+                    // Look up the code point in its 4k block of code points.
+                    if(!containsSlow(c, list4kStarts[lead], list4kStarts[lead+1])) {
+                        break;
+                    }
+                }
+            } else if(c<0xdc00 || s==limit || (c2=*(limit-1))<0xd800 || c2>=0xdc00) {
+                // surrogate code point
+                if(!containsSlow(c, list4kStarts[0xd], list4kStarts[0xe])) {
+                    break;
+                }
+            } else {
+                // surrogate pair
+                if(!containsSlow(U16_GET_SUPPLEMENTARY(c2, c), list4kStarts[0x10], list4kStarts[0x11])) {
+                    break;
+                }
+                --limit;
+            }
+            if(s==limit) {
+                return s;
+            }
+        }
+    } else {
+        // span not
+        for(;;) {
+            c=*(--limit);
+            if(c<=0x7f) {
+                if(asciiBytes[c]) {
+                    break;
+                }
+            } else if(c<=0x7ff) {
+                if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0) {
+                    break;
+                }
+            } else if(c<0xd800 || c>=0xe000) {
+                int lead=c>>12;
+                uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
+                if(twoBits<=1) {
+                    // All 64 code points with the same bits 15..6
+                    // are either in the set or not.
+                    if(twoBits!=0) {
+                        break;
+                    }
+                } else {
+                    // Look up the code point in its 4k block of code points.
+                    if(containsSlow(c, list4kStarts[lead], list4kStarts[lead+1])) {
+                        break;
+                    }
+                }
+            } else if(c<0xdc00 || s==limit || (c2=*(limit-1))<0xd800 || c2>=0xdc00) {
+                // surrogate code point
+                if(containsSlow(c, list4kStarts[0xd], list4kStarts[0xe])) {
+                    break;
+                }
+            } else {
+                // surrogate pair
+                if(containsSlow(U16_GET_SUPPLEMENTARY(c2, c), list4kStarts[0x10], list4kStarts[0x11])) {
+                    break;
+                }
+                --limit;
+            }
+            if(s==limit) {
+                return s;
+            }
+        }
+    }
+    return limit+1;
+}
+
+/*
+ * Precheck for sufficient trail bytes at end of string only once per span.
+ * Check validity.
+ */
+const uint8_t *
+BMPSet::spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const {
+    const uint8_t *limit=s+length;
+    uint8_t b=*s;
+    if((int8_t)b>=0) {
+        // Initial all-ASCII span.
+        if(spanCondition) {
+            do {
+                if(!asciiBytes[b] || ++s==limit) {
+                    return s;
+                }
+                b=*s;
+            } while((int8_t)b>=0);
+        } else {
+            do {
+                if(asciiBytes[b] || ++s==limit) {
+                    return s;
+                }
+                b=*s;
+            } while((int8_t)b>=0);
+        }
+        length=(int32_t)(limit-s);
+    }
+
+    if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
+        spanCondition=USET_SPAN_CONTAINED;  // Pin to 0/1 values.
+    }
+
+    const uint8_t *limit0=limit;
+
+    /*
+     * Make sure that the last 1/2/3/4-byte sequence before limit is complete
+     * or runs into a lead byte.
+     * In the span loop compare s with limit only once
+     * per multi-byte character.
+     *
+     * Give a trailing illegal sequence the same value as the result of contains(FFFD),
+     * including it if that is part of the span, otherwise set limit0 to before
+     * the truncated sequence.
+     */
+    b=*(limit-1);
+    if((int8_t)b<0) {
+        // b>=0x80: lead or trail byte
+        if(b<0xc0) {
+            // single trail byte, check for preceding 3- or 4-byte lead byte
+            if(length>=2 && (b=*(limit-2))>=0xe0) {
+                limit-=2;
+                if(asciiBytes[0x80]!=spanCondition) {
+                    limit0=limit;
+                }
+            } else if(b<0xc0 && b>=0x80 && length>=3 && (b=*(limit-3))>=0xf0) {
+                // 4-byte lead byte with only two trail bytes
+                limit-=3;
+                if(asciiBytes[0x80]!=spanCondition) {
+                    limit0=limit;
+                }
+            }
+        } else {
+            // lead byte with no trail bytes
+            --limit;
+            if(asciiBytes[0x80]!=spanCondition) {
+                limit0=limit;
+            }
+        }
+    }
+
+    uint8_t t1, t2, t3;
+
+    while(s<limit) {
+        b=*s;
+        if(b<0xc0) {
+            // ASCII; or trail bytes with the result of contains(FFFD).
+            if(spanCondition) {
+                do {
+                    if(!asciiBytes[b]) {
+                        return s;
+                    } else if(++s==limit) {
+                        return limit0;
+                    }
+                    b=*s;
+                } while(b<0xc0);
+            } else {
+                do {
+                    if(asciiBytes[b]) {
+                        return s;
+                    } else if(++s==limit) {
+                        return limit0;
+                    }
+                    b=*s;
+                } while(b<0xc0);
+            }
+        }
+        ++s;  // Advance past the lead byte.
+        if(b>=0xe0) {
+            if(b<0xf0) {
+                if( /* handle U+0000..U+FFFF inline */
+                    (t1=(uint8_t)(s[0]-0x80)) <= 0x3f &&
+                    (t2=(uint8_t)(s[1]-0x80)) <= 0x3f
+                ) {
+                    b&=0xf;
+                    uint32_t twoBits=(bmpBlockBits[t1]>>b)&0x10001;
+                    if(twoBits<=1) {
+                        // All 64 code points with this lead byte and middle trail byte
+                        // are either in the set or not.
+                        if(twoBits!=spanCondition) {
+                            return s-1;
+                        }
+                    } else {
+                        // Look up the code point in its 4k block of code points.
+                        UChar32 c=(b<<12)|(t1<<6)|t2;
+                        if(containsSlow(c, list4kStarts[b], list4kStarts[b+1]) != spanCondition) {
+                            return s-1;
+                        }
+                    }
+                    s+=2;
+                    continue;
+                }
+            } else if( /* handle U+10000..U+10FFFF inline */
+                (t1=(uint8_t)(s[0]-0x80)) <= 0x3f &&
+                (t2=(uint8_t)(s[1]-0x80)) <= 0x3f &&
+                (t3=(uint8_t)(s[2]-0x80)) <= 0x3f
+            ) {
+                // Give an illegal sequence the same value as the result of contains(FFFD).
+                UChar32 c=((UChar32)(b-0xf0)<<18)|((UChar32)t1<<12)|(t2<<6)|t3;
+                if( (   (0x10000<=c && c<=0x10ffff) ?
+                            containsSlow(c, list4kStarts[0x10], list4kStarts[0x11]) :
+                            asciiBytes[0x80]
+                    ) != spanCondition
+                ) {
+                    return s-1;
+                }
+                s+=3;
+                continue;
+            }
+        } else /* 0xc0<=b<0xe0 */ {
+            if( /* handle U+0000..U+07FF inline */
+                (t1=(uint8_t)(*s-0x80)) <= 0x3f
+            ) {
+                if(((table7FF[t1]&((uint32_t)1<<(b&0x1f)))!=0) != spanCondition) {
+                    return s-1;
+                }
+                ++s;
+                continue;
+            }
+        }
+
+        // Give an illegal sequence the same value as the result of contains(FFFD).
+        // Handle each byte of an illegal sequence separately to simplify the code;
+        // no need to optimize error handling.
+        if(asciiBytes[0x80]!=spanCondition) {
+            return s-1;
+        }
+    }
+
+    return limit0;
+}
+
+/*
+ * While going backwards through UTF-8 optimize only for ASCII.
+ * Unlike UTF-16, UTF-8 is not forward-backward symmetrical, that is, it is not
+ * possible to tell from the last byte in a multi-byte sequence how many
+ * preceding bytes there should be. Therefore, going backwards through UTF-8
+ * is much harder than going forward.
+ */
+int32_t
+BMPSet::spanBackUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const {
+    if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
+        spanCondition=USET_SPAN_CONTAINED;  // Pin to 0/1 values.
+    }
+
+    uint8_t b;
+
+    do {
+        b=s[--length];
+        if((int8_t)b>=0) {
+            // ASCII sub-span
+            if(spanCondition) {
+                do {
+                    if(!asciiBytes[b]) {
+                        return length+1;
+                    } else if(length==0) {
+                        return 0;
+                    }
+                    b=s[--length];
+                } while((int8_t)b>=0);
+            } else {
+                do {
+                    if(asciiBytes[b]) {
+                        return length+1;
+                    } else if(length==0) {
+                        return 0;
+                    }
+                    b=s[--length];
+                } while((int8_t)b>=0);
+            }
+        }
+
+        int32_t prev=length;
+        UChar32 c;
+        if(b<0xc0) {
+            // trail byte: collect a multi-byte character
+            c=utf8_prevCharSafeBody(s, 0, &length, b, -1);
+            if(c<0) {
+                c=0xfffd;
+            }
+        } else {
+            // lead byte in last-trail position
+            c=0xfffd;
+        }
+        // c is a valid code point, not ASCII, not a surrogate
+        if(c<=0x7ff) {
+            if(((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0) != spanCondition) {
+                return prev+1;
+            }
+        } else if(c<=0xffff) {
+            int lead=c>>12;
+            uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
+            if(twoBits<=1) {
+                // All 64 code points with the same bits 15..6
+                // are either in the set or not.
+                if(twoBits!=spanCondition) {
+                    return prev+1;
+                }
+            } else {
+                // Look up the code point in its 4k block of code points.
+                if(containsSlow(c, list4kStarts[lead], list4kStarts[lead+1]) != spanCondition) {
+                    return prev+1;
+                }
+            }
+        } else {
+            if(containsSlow(c, list4kStarts[0x10], list4kStarts[0x11]) != spanCondition) {
+                return prev+1;
+            }
+        }
+    } while(length>0);
+    return 0;
+}
+
+U_NAMESPACE_END
--- a/icu4c/source/common/bmpset.h
+++ b/icu4c/source/common/bmpset.h
@ -0,0 +1,160 @@
+/*
+******************************************************************************
+*
+*   Copyright (C) 2007, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+******************************************************************************
+*   file name:  bmpset.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2007jan29
+*   created by: Markus W. Scherer
+*/
+
+#ifndef __BMPSET_H__
+#define __BMPSET_H__
+
+#include "unicode/utypes.h"
+#include "unicode/uniset.h"
+
+U_NAMESPACE_BEGIN
+
+/*
+ * Helper class for frozen UnicodeSets, implements contains() and span()
+ * optimized for BMP code points. Structured to be UTF-8-friendly.
+ *
+ * ASCII: Look up bytes.
+ * 2-byte characters: Bits organized vertically.
+ * 3-byte characters: Use zero/one/mixed data per 64-block in U+0000..U+FFFF,
+ *                    with mixed for illegal ranges.
+ * Supplementary characters: Call contains() on the parent set.
+ */
+class BMPSet : public UMemory {
+public:
+    BMPSet(const int32_t *parentList, int32_t parentListLength);
+    BMPSet(const BMPSet &otherBMPSet, const int32_t *newParentList, int32_t newParentListLength);
+
+    virtual UBool contains(UChar32 c) const;
+
+    /*
+     * Span the initial substring for which each character c has spanCondition==contains(c).
+     * It must be s<limit and spanCondition==0 or 1.
+     * @return The string pointer which limits the span.
+     */
+    const UChar *span(const UChar *s, const UChar *limit, USetSpanCondition spanCondition) const;
+
+    /*
+     * Span the trailing substring for which each character c has spanCondition==contains(c).
+     * It must be s<limit and spanCondition==0 or 1.
+     * @return The string pointer which starts the span.
+     */
+    const UChar *spanBack(const UChar *s, const UChar *limit, USetSpanCondition spanCondition) const;
+
+    /*
+     * Span the initial substring for which each character c has spanCondition==contains(c).
+     * It must be length>0 and spanCondition==0 or 1.
+     * @return The string pointer which limits the span.
+     */
+    const uint8_t *spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const;
+
+    /*
+     * Span the trailing substring for which each character c has spanCondition==contains(c).
+     * It must be length>0 and spanCondition==0 or 1.
+     * @return The start of the span.
+     */
+    int32_t spanBackUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const;
+
+private:
+    void initBits();
+    void overrideIllegal();
+
+    /**
+     * Same as UnicodeSet::findCodePoint(UChar32 c) const except that the
+     * binary search is restricted for finding code points in a certain range.
+     *
+     * For restricting the search for finding in the range start..end,
+     * pass in
+     *   lo=findCodePoint(start) and
+     *   hi=findCodePoint(end)
+     * with 0<=lo<=hi<len.
+     * findCodePoint(c) defaults to lo=0 and hi=len-1.
+     *
+     * @param c a character in a subrange of MIN_VALUE..MAX_VALUE
+     * @param lo The lowest index to be returned.
+     * @param hi The highest index to be returned.
+     * @return the smallest integer i in the range lo..hi,
+     *         inclusive, such that c < list[i]
+     */
+    int32_t findCodePoint(UChar32 c, int32_t lo, int32_t hi) const;
+
+    inline UBool containsSlow(UChar32 c, int32_t lo, int32_t hi) const;
+
+    /*
+     * One byte per ASCII character, or trail byte in lead position.
+     * 0 or 1 for ASCII characters.
+     * The value for trail bytes is the result of contains(FFFD)
+     * for faster validity checking at runtime.
+     */
+    UBool asciiBytes[0xc0];
+
+    /*
+     * One bit per code point from U+0000..U+07FF.
+     * The bits are organized vertically; consecutive code points
+     * correspond to the same bit positions in consecutive table words.
+     * With code point parts
+     *   lead=c{10..6}
+     *   trail=c{5..0}
+     * it is set.contains(c)==(table7FF[trail] bit lead)
+     *
+     * Bits for 0..7F (non-shortest forms) are set to the result of contains(FFFD)
+     * for faster validity checking at runtime.
+     */
+    uint32_t table7FF[64];
+
+    /*
+     * One bit per 64 BMP code points.
+     * The bits are organized vertically; consecutive 64-code point blocks
+     * correspond to the same bit position in consecutive table words.
+     * With code point parts
+     *   lead=c{15..12}
+     *   t1=c{11..6}
+     * test bits (lead+16) and lead in bmpBlockBits[t1].
+     * If the upper bit is 0, then the lower bit indicates if contains(c)
+     * for all code points in the 64-block.
+     * If the upper bit is 1, then the block is mixed and set.contains(c)
+     * must be called.
+     *
+     * Bits for 0..7FF (non-shortest forms) and D800..DFFF are set to
+     * the result of contains(FFFD) for faster validity checking at runtime.
+     */
+    uint32_t bmpBlockBits[64];
+
+    /*
+     * Inversion list indexes for restricted binary searches in
+     * findCodePoint(), from
+     * findCodePoint(U+0800, U+1000, U+2000, .., U+F000, U+10000).
+     * U+0800 is the first 3-byte-UTF-8 code point. Code points below U+0800 are
+     * always looked up in the bit tables.
+     * The last pair of indexes is for finding supplementary code points.
+     */
+    int32_t list4kStarts[18];
+
+    /*
+     * The inversion list of the parent set, for the slower contains() implementation
+     * for mixed BMP blocks and for supplementary code points.
+     * The list is terminated with list[listLength-1]=0x110000.
+     */
+    const int32_t *list;
+    int32_t listLength;
+};
+
+inline UBool BMPSet::containsSlow(UChar32 c, int32_t lo, int32_t hi) const {
+    return (UBool)(findCodePoint(c, lo, hi) & 1);
+}
+
+U_NAMESPACE_END
+
+#endif
--- a/icu4c/source/common/common.vcproj
+++ b/icu4c/source/common/common.vcproj
@ -1771,6 +1771,14 @@
 		<Filter
 			Name="properties &amp; sets"
 			>
+			<File
+				RelativePath=".\bmpset.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\bmpset.h"
+				>
+			</File>
 			<File
 				RelativePath=".\propname.cpp"
 				>
@ -1951,6 +1959,14 @@
 				RelativePath=".\uniset_props.cpp"
 				>
 			</File>
+			<File
+				RelativePath=".\unisetspan.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\unisetspan.h"
+				>
+			</File>
 			<File
 				RelativePath=".\uprops.c"
 				>
--- a/icu4c/source/common/unicode/uniset.h
+++ b/icu4c/source/common/unicode/uniset.h
@ -22,8 +22,10 @@
 
 U_NAMESPACE_BEGIN

+class BMPSet;
 class ParsePosition;
 class SymbolTable;
+class UnicodeSetStringSpan;
 class UVector;
 class RuleCharacterIterator;

@ -263,6 +265,7 @@ class U_COMMON_API UnicodeSet : public UnicodeFilter {
    int32_t len; // length of list used; 0 <= len <= capacity
    int32_t capacity; // capacity of list
    UChar32* list; // MUST be terminated with HIGH
+    BMPSet *bmpSet; // The set is frozen iff either bmpSet or stringSpan is not NULL.
    UChar32* buffer; // internal buffer, may be NULL
    int32_t bufferCapacity; // capacity of buffer
    int32_t patLen;
@ -278,6 +281,7 @@ class U_COMMON_API UnicodeSet : public UnicodeFilter {
     */
    UChar *pat;
    UVector* strings; // maintained in sorted order
+    UnicodeSetStringSpan *stringSpan;

 public:

@ -377,6 +381,7 @@ public:

    /**
     * Assigns this object to be a copy of another.
+     * A frozen set will not be modified.
     * @stable ICU 2.0
     */
    UnicodeSet& operator=(const UnicodeSet& o);
@ -405,6 +410,9 @@ public:
     * Returns a copy of this object.  All UnicodeFunctor objects have
     * to support cloning in order to allow classes using
     * UnicodeFunctors, such as Transliterator, to implement cloning.
+     * If this set is frozen, then the clone will be frozen as well.
+     * Use cloneAsThawed() for a mutable clone of a frozen set.
+     * @see cloneAsThawed
     * @stable ICU 2.0
     */
    virtual UnicodeFunctor* clone() const;
@ -418,6 +426,45 @@ public:
     */
    virtual int32_t hashCode(void) const;

+    //----------------------------------------------------------------
+    // Freezable API
+    //----------------------------------------------------------------
+
+    /**
+     * Determines whether the set has been frozen (made immutable) or not.
+     * See the ICU4J Freezable interface for details.
+     * @return TRUE/FALSE for whether the set has been frozen
+     * @see freeze
+     * @see cloneAsThawed
+     * @draft ICU 3.8
+     */
+    inline UBool isFrozen() const;
+
+    /**
+     * Freeze the set (make it immutable).
+     * Once frozen, it cannot be unfrozen and is therefore thread-safe
+     * until it is deleted.
+     * See the ICU4J Freezable interface for details.
+     * Freezing the set may also make some operations faster, for example
+     * contains() and span().
+     * A frozen set will not be modified. (It remains frozen.)
+     * @return this set.
+     * @see isFrozen
+     * @see cloneAsThawed
+     * @draft ICU 3.8
+     */
+    UnicodeFunctor *freeze();
+
+    /**
+     * Clone the set and make the clone mutable.
+     * See the ICU4J Freezable interface for details.
+     * @return the mutable clone
+     * @see freeze
+     * @see isFrozen
+     * @draft ICU 3.8
+     */
+    UnicodeFunctor *cloneAsThawed() const;
+
    //----------------------------------------------------------------
    // Public API
    //----------------------------------------------------------------
@ -426,6 +473,7 @@ public:
     * Make this object represent the range <code>start - end</code>.
     * If <code>end > start</code> then this object is set to an
     * an empty range.
+     * A frozen set will not be modified.
     *
     * @param start first character in the set, inclusive
     * @param end last character in the set, inclusive
@ -445,6 +493,7 @@ public:
     * Modifies this set to represent the set specified by the given
     * pattern, optionally ignoring white space.  See the class
     * description for the syntax of the pattern language.
+     * A frozen set will not be modified.
     * @param pattern a string specifying what characters are in the set
     * @param status returns <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the pattern
     * contains a syntax error.
@ -459,6 +508,7 @@ public:
     * Modifies this set to represent the set specified by the given
     * pattern, optionally ignoring white space.  See the class
     * description for the syntax of the pattern language.
+     * A frozen set will not be modified.
     * @param pattern a string specifying what characters are in the set
     * @param options bitmask for options to apply to the pattern.
     * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
@ -486,6 +536,7 @@ public:
     * pairs list for the parsed pattern is returned.  This method calls
     * itself recursively to parse embedded subpatterns.
     *<em> Empties the set passed before applying the pattern.</em>
+     * A frozen set will not be modified.
     *
     * @param pattern the string containing the pattern to be parsed.
     * The portion of the string from pos.getIndex(), which must be a
@ -515,6 +566,7 @@ public:
     * Returns a string representation of this set.  If the result of
     * calling this function is passed to a UnicodeSet constructor, it
     * will produce another set that is equal to this one.
+     * A frozen set will not be modified.
     * @param result the string to receive the rules.  Previous
     * contents will be deleted.
     * @param escapeUnprintable if TRUE then convert unprintable
@ -530,6 +582,7 @@ public:
     * Modifies this set to contain those code points which have the given value
     * for the given binary or enumerated property, as returned by
     * u_getIntPropertyValue.  Prior contents of this set are lost.
+     * A frozen set will not be modified.
     *
     * @param prop a property in the range UCHAR_BIN_START..UCHAR_BIN_LIMIT-1
     * or UCHAR_INT_START..UCHAR_INT_LIMIT-1
@ -555,6 +608,7 @@ public:
     * Modifies this set to contain those code points which have the
     * given value for the given property.  Prior contents of this
     * set are lost.
+     * A frozen set will not be modified.
     *
     * @param prop a property alias, either short or long.  The name is matched
     * loosely.  See PropertyAliases.txt for names and a description of loose
@ -603,6 +657,7 @@ public:

    /**
     * Returns true if this set contains the given character.
+     * This function works faster with a frozen set.
     * @param c character to be checked for containment
     * @return true if the test condition is met
     * @stable ICU 2.0
@ -702,6 +757,84 @@ public:
     */
    inline UBool containsSome(const UnicodeString& s) const;

+    /**
+     * Returns the length of the initial substring of the input string which
+     * consists only of characters and strings that are contained in this set
+     * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
+     * or only of characters and strings that are not contained
+     * in this set (USET_SPAN_NOT_CONTAINED).
+     * See USetSpanCondition for details.
+     * Similar to the strspn() C library function.
+     * Unpaired surrogates are treated according to contains() of their surrogate code points.
+     * This function works faster with a frozen set and with a non-negative string length argument.
+     * @param s start of the string
+     * @param length of the string; can be -1 for NUL-terminated
+     * @spanCondition specifies the containment condition
+     * @return the length of the initial substring according to the spanCondition;
+     *         0 if the start of the string does not fit the spanCondition
+     * @draft ICU 3.8
+     * @see USetSpanCondition
+     */
+    int32_t span(const UChar *s, int32_t length, USetSpanCondition spanCondition) const;
+
+    /**
+     * Returns the start of the trailing substring of the input string which
+     * consists only of characters and strings that are contained in this set
+     * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
+     * or only of characters and strings that are not contained
+     * in this set (USET_SPAN_NOT_CONTAINED).
+     * See USetSpanCondition for details.
+     * Unpaired surrogates are treated according to contains() of their surrogate code points.
+     * This function works faster with a frozen set and with a non-negative string length argument.
+     * @param s start of the string
+     * @param length of the string; can be -1 for NUL-terminated
+     * @spanCondition specifies the containment condition
+     * @return the start of the trailing substring according to the spanCondition;
+     *         the string length if the end of the string does not fit the spanCondition
+     * @draft ICU 3.8
+     * @see USetSpanCondition
+     */
+    int32_t spanBack(const UChar *s, int32_t length, USetSpanCondition spanCondition) const;
+
+    /**
+     * Returns the length of the initial substring of the input string which
+     * consists only of characters and strings that are contained in this set
+     * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
+     * or only of characters and strings that are not contained
+     * in this set (USET_SPAN_NOT_CONTAINED).
+     * See USetSpanCondition for details.
+     * Similar to the strspn() C library function.
+     * Malformed byte sequences are treated according to contains(0xfffd).
+     * This function works faster with a frozen set and with a non-negative string length argument.
+     * @param s start of the string (UTF-8)
+     * @param length of the string; can be -1 for NUL-terminated
+     * @spanCondition specifies the containment condition
+     * @return the length of the initial substring according to the spanCondition;
+     *         0 if the start of the string does not fit the spanCondition
+     * @draft ICU 3.8
+     * @see USetSpanCondition
+     */
+    int32_t spanUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const;
+
+    /**
+     * Returns the start of the trailing substring of the input string which
+     * consists only of characters and strings that are contained in this set
+     * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
+     * or only of characters and strings that are not contained
+     * in this set (USET_SPAN_NOT_CONTAINED).
+     * See USetSpanCondition for details.
+     * Malformed byte sequences are treated according to contains(0xfffd).
+     * This function works faster with a frozen set and with a non-negative string length argument.
+     * @param s start of the string (UTF-8)
+     * @param length of the string; can be -1 for NUL-terminated
+     * @spanCondition specifies the containment condition
+     * @return the start of the trailing substring according to the spanCondition;
+     *         the string length if the end of the string does not fit the spanCondition
+     * @draft ICU 3.8
+     * @see USetSpanCondition
+     */
+    int32_t spanBackUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const;
+
    /**
     * Implement UnicodeMatcher::matches()
     * @stable ICU 2.4
@ -786,6 +919,7 @@ public:
     * the call leaves this set unchanged.  If <code>end > start</code>
     * then an empty range is added, leaving the set unchanged.
     * This is equivalent to a boolean logic OR, or a set UNION.
+     * A frozen set will not be modified.
     *
     * @param start first character, inclusive, of range to be added
     * to this set.
@ -799,6 +933,7 @@ public:
     * Adds the specified character to this set if it is not already
     * present.  If this set already contains the specified character,
     * the call leaves this set unchanged.
+     * A frozen set will not be modified.
     * @stable ICU 2.0
     */
    UnicodeSet& add(UChar32 c);
@ -809,6 +944,7 @@ public:
     * the call leaves this set unchanged.
     * Thus "ch" => {"ch"}
     * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
+     * A frozen set will not be modified.
     * @param s the source string
     * @return this object, for chaining
     * @stable ICU 2.4
@ -829,6 +965,7 @@ public:
    /**
     * Adds each of the characters in this string to the set. Thus "ch" => {"c", "h"}
     * If this set already any particular character, it has no effect on that character.
+     * A frozen set will not be modified.
     * @param s the source string
     * @return this object, for chaining
     * @stable ICU 2.4
@ -838,6 +975,7 @@ public:
    /**
     * Retains EACH of the characters in this string. Note: "ch" == {"c", "h"}
     * If this set already any particular character, it has no effect on that character.
+     * A frozen set will not be modified.
     * @param s the source string
     * @return this object, for chaining
     * @stable ICU 2.4
@ -847,6 +985,7 @@ public:
    /**
     * Complement EACH of the characters in this string. Note: "ch" == {"c", "h"}
     * If this set already any particular character, it has no effect on that character.
+     * A frozen set will not be modified.
     * @param s the source string
     * @return this object, for chaining
     * @stable ICU 2.4
@ -856,6 +995,7 @@ public:
    /**
     * Remove EACH of the characters in this string. Note: "ch" == {"c", "h"}
     * If this set already any particular character, it has no effect on that character.
+     * A frozen set will not be modified.
     * @param s the source string
     * @return this object, for chaining
     * @stable ICU 2.4
@ -887,6 +1027,7 @@ public:
     * specified range.  If <code>end > start</code> then an empty range is
     * retained, leaving the set empty.  This is equivalent to
     * a boolean logic AND, or a set INTERSECTION.
+     * A frozen set will not be modified.
     *
     * @param start first character, inclusive, of range to be retained
     * to this set.
@ -899,6 +1040,7 @@ public:

    /**
     * Retain the specified character from this set if it is present.
+     * A frozen set will not be modified.
     * @stable ICU 2.0
     */
    UnicodeSet& retain(UChar32 c);
@ -908,6 +1050,7 @@ public:
     * The set will not contain the specified range once the call
     * returns.  If <code>end > start</code> then an empty range is
     * removed, leaving the set unchanged.
+     * A frozen set will not be modified.
     *
     * @param start first character, inclusive, of range to be removed
     * from this set.
@ -921,6 +1064,7 @@ public:
     * Removes the specified character from this set if it is present.
     * The set will not contain the specified range once the call
     * returns.
+     * A frozen set will not be modified.
     * @stable ICU 2.0
     */
    UnicodeSet& remove(UChar32 c);
@ -929,6 +1073,7 @@ public:
     * Removes the specified string from this set if it is present.
     * The set will not contain the specified character once the call
     * returns.
+     * A frozen set will not be modified.
     * @param s the source string
     * @return this object, for chaining
     * @stable ICU 2.4
@ -939,6 +1084,7 @@ public:
     * Inverts this set.  This operation modifies this set so that
     * its value is its complement.  This is equivalent to
     * <code>complement(MIN_VALUE, MAX_VALUE)</code>.
+     * A frozen set will not be modified.
     * @stable ICU 2.0
     */
    virtual UnicodeSet& complement(void);
@ -949,6 +1095,7 @@ public:
     * added if it is not in this set.  If <code>end > start</code>
     * then an empty range is complemented, leaving the set unchanged.
     * This is equivalent to a boolean logic XOR.
+     * A frozen set will not be modified.
     *
     * @param start first character, inclusive, of range to be removed
     * from this set.
@ -962,6 +1109,7 @@ public:
     * Complements the specified character in this set.  The character
     * will be removed if it is in this set, or will be added if it is
     * not in this set.
+     * A frozen set will not be modified.
     * @stable ICU 2.0
     */
    UnicodeSet& complement(UChar32 c);
@ -971,6 +1119,7 @@ public:
     * The set will not contain the specified string once the call
     * returns.
     * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
+     * A frozen set will not be modified.
     * @param s the string to complement
     * @return this object, for chaining
     * @stable ICU 2.4
@ -983,6 +1132,7 @@ public:
     * modifies this set so that its value is the <i>union</i> of the two
     * sets.  The behavior of this operation is unspecified if the specified
     * collection is modified while the operation is in progress.
+     * A frozen set will not be modified.
     *
     * @param c set whose elements are to be added to this set.
     * @see #add(char, char)
@ -996,6 +1146,7 @@ public:
     * its elements that are not contained in the specified set.  This
     * operation effectively modifies this set so that its value is
     * the <i>intersection</i> of the two sets.
+     * A frozen set will not be modified.
     *
     * @param c set that defines which elements this set will retain.
     * @stable ICU 2.0
@ -1007,6 +1158,7 @@ public:
     * specified set.  This operation effectively modifies this
     * set so that its value is the <i>asymmetric set difference</i> of
     * the two sets.
+     * A frozen set will not be modified.
     *
     * @param c set that defines which elements will be removed from
     *          this set.
@ -1018,6 +1170,7 @@ public:
     * Complements in this set all elements contained in the specified
     * set.  Any character in the other set will be removed if it is
     * in this set, or will be added if it is not in this set.
+     * A frozen set will not be modified.
     *
     * @param c set that defines which elements will be xor'ed from
     *          this set.
@ -1028,6 +1181,7 @@ public:
    /**
     * Removes all of the elements from this set.  This set will be
     * empty after this call returns.
+     * A frozen set will not be modified.
     * @stable ICU 2.0
     */
    virtual UnicodeSet& clear(void);
@ -1049,6 +1203,8 @@ public:
     * == b denotes that the contents are the same, not pointer
     * comparison.)
     *
+     * A frozen set will not be modified.
+     *
     * @param attribute bitmask for attributes to close over.
     * Currently only the USET_CASE bit is supported.  Any undefined bits
     * are ignored.
@ -1137,6 +1293,7 @@ public:
    /**
     * Reallocate this objects internal structures to take up the least
     * possible space, without changing this object's value.
+     * A frozen set will not be modified.
     * @stable ICU 2.4
     */
    virtual UnicodeSet& compact();
@ -1189,6 +1346,12 @@ private:

 private:

+    //----------------------------------------------------------------
+    // Implementation: Clone as thawed (see ICU4J Freezable)
+    //----------------------------------------------------------------
+
+    UnicodeSet::UnicodeSet(const UnicodeSet& o, UBool /* asThawed */);
+
    //----------------------------------------------------------------
    // Implementation: Pattern parsing
    //----------------------------------------------------------------
@ -1324,6 +1487,10 @@ inline UBool UnicodeSet::operator!=(const UnicodeSet& o) const {
    return !operator==(o);
 }

+inline UBool UnicodeSet::isFrozen() const {
+    return (UBool)(bmpSet!=NULL || stringSpan!=NULL);
+}
+
 inline UBool UnicodeSet::containsSome(UChar32 start, UChar32 end) const {
    return !containsNone(start, end);
 }
--- a/icu4c/source/common/unicode/uset.h
+++ b/icu4c/source/common/unicode/uset.h
@ -1,7 +1,7 @@
 /*
 *******************************************************************************
 *
-*   Copyright (C) 2002-2006, International Business Machines
+*   Copyright (C) 2002-2007, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 *******************************************************************************
@ -97,6 +97,120 @@ enum {
    USET_SERIALIZED_STATIC_ARRAY_CAPACITY=8
 };

+#ifndef U_HIDE_DRAFT_API
+
+/**
+ * Argument values for whether span() and similar functions continue while
+ * the current character is contained vs. not contained in the set.
+ *
+ * The functionality is straightforward for sets with only single code points,
+ * without strings (which is the common case):
+ * - USET_SPAN_CONTAINED and USET_SPAN_SIMPLE
+ *   work the same.
+ * - span() and spanBack() partition any string the same way when
+ *   alternating between span(USET_SPAN_NOT_CONTAINED) and
+ *   span(either "contained" condition).
+ * - Using a complemented (inverted) set and the opposite span conditions
+ *   yields the same results.
+ *
+ * When a set contains multi-code point strings, then these statements may not
+ * be true, depending on the strings in the set (for example, whether they
+ * overlap with each other) and the string that is processed.
+ * For a set with strings:
+ * - The complement of the set contains the opposite set of code points,
+ *   but the same set of strings.
+ *   Therefore, complementing both the set and the span conditions
+ *   may yield different results.
+ * - When starting spans at different positions in a string
+ *   (span(s, ...) vs. span(s+1, ...)) the ends of the spans may be different
+ *   because a set string may start before the later position.
+ * - span(USET_SPAN_SIMPLE) may be shorter than
+ *   span(USET_SPAN_CONTAINED) because it will not recursively try
+ *   all possible paths.
+ *   For example, with a set which contains the three strings "xy", "xya" and "ax",
+ *   span("xyax", USET_SPAN_CONTAINED) will return 4 but
+ *   span("xyax", USET_SPAN_SIMPLE) will return 3.
+ *   span(USET_SPAN_SIMPLE) will never be longer than
+ *   span(USET_SPAN_CONTAINED).
+ * - With either "contained" condition, span() and spanBack() may partition
+ *   a string in different ways.
+ *   For example, with a set which contains the two strings "ab" and "ba",
+ *   and when processing the string "aba",
+ *   span() will yield contained/not-contained boundaries of { 0, 2, 3 }
+ *   while spanBack() will yield boundaries of { 0, 1, 3 }.
+ *
+ * Note: If it is important to get the same boundaries whether iterating forward
+ * or backward through a string, then either only span() should be used and
+ * the boundaries cached for backward operation, or an ICU BreakIterator
+ * could be used.
+ *
+ * Note: Unpaired surrogates are treated like surrogate code points.
+ * Similarly, set strings match only on code point boundaries,
+ * never in the middle of a surrogate pair.
+ * Illegal UTF-8 sequences are treated like U+FFFD.
+ * When processing UTF-8 strings, malformed set strings
+ * (strings with unpaired surrogates which cannot be converted to UTF-8)
+ * are ignored.
+ *
+ * @draft ICU 3.8
+ */
+enum USetSpanCondition {
+    /**
+     * Continue a span() while there is no set element at the current position.
+     * Stops before the first set element (character or string).
+     * (For code points only, this is like while contains(current)==FALSE).
+     *
+     * When span() returns, the substring between where it started and the position
+     * it returned consists only of characters that are not in the set,
+     * and none of its strings overlap with the span.
+     *
+     * @draft ICU 3.8
+     */
+    USET_SPAN_NOT_CONTAINED = 0,
+    /**
+     * Continue a span() while there is a set element at the current position.
+     * (For characters only, this is like while contains(current)==TRUE).
+     *
+     * When span() returns, the substring between where it started and the position
+     * it returned consists only of set elements (characters or strings) that are in the set.
+     *
+     * If a set contains strings, then the span will be the longest substring
+     * matching any of the possible concatenations of set elements (characters or strings).
+     * (There must be a single, non-overlapping concatenation of characters or strings.)
+     * This is equivalent to a POSIX regular expression for (OR of each set element)*.
+     *
+     * @draft ICU 3.8
+     */
+    USET_SPAN_CONTAINED = 1,
+    /**
+     * Continue a span() while there is a set element at the current position.
+     * (For characters only, this is like while contains(current)==TRUE).
+     *
+     * When span() returns, the substring between where it started and the position
+     * it returned consists only of set elements (characters or strings) that are in the set.
+     *
+     * If a set only contains single characters, then this is the same
+     * as USET_SPAN_CONTAINED.
+     *
+     * If a set contains strings, then the span will be the longest substring
+     * with a match at each position with the longest single set element (character or string).
+     *
+     * Use this span condition together with other longest-match algorithms,
+     * such as ICU converters (ucnv_getUnicodeSet()).
+     *
+     * @draft ICU 3.8
+     */
+    USET_SPAN_SIMPLE = 2,
+    /**
+     * One more than the last span condition.
+     * @draft ICU 3.8
+     */
+    USET_SPAN_CONDITION_COUNT
+};
+typedef enum USetSpanCondition USetSpanCondition;
+
+#endif /* U_HIDE_DRAFT_API */
+
 /**
 * A serialized form of a Unicode set.  Limited manipulations are
 * possible directly on a serialized set.  See below.
@ -179,9 +293,72 @@ uset_openPatternOptions(const UChar* pattern, int32_t patternLength,
 U_STABLE void U_EXPORT2
 uset_close(USet* set);

+#ifndef U_HIDE_DRAFT_API
+
+/**
+ * Returns a copy of this object.
+ * If this set is frozen, then the clone will be frozen as well.
+ * Use uset_cloneAsThawed() for a mutable clone of a frozen set.
+ * @param set the original set
+ * @return the newly allocated copy of the set
+ * @see uset_cloneAsThawed
+ * @draft ICU 3.8
+ */
+U_DRAFT USet * U_EXPORT2
+uset_clone(const USet *set);
+
+//----------------------------------------------------------------
+// Freezable API
+//----------------------------------------------------------------
+
+/**
+ * Determines whether the set has been frozen (made immutable) or not.
+ * See the ICU4J Freezable interface for details.
+ * @param set the set
+ * @return TRUE/FALSE for whether the set has been frozen
+ * @see uset_freeze
+ * @see uset_cloneAsThawed
+ * @draft ICU 3.8
+ */
+U_DRAFT UBool U_EXPORT2
+uset_isFrozen(const USet *set);
+
+/**
+ * Freeze the set (make it immutable).
+ * Once frozen, it cannot be unfrozen and is therefore thread-safe
+ * until it is deleted.
+ * See the ICU4J Freezable interface for details.
+ * Freezing the set may also make some operations faster, for example
+ * uset_contains() and uset_span().
+ * A frozen set will not be modified. (It remains frozen.)
+ * @param set the set
+ * @return the same set, now frozen
+ * @see uset_isFrozen
+ * @see uset_cloneAsThawed
+ * @draft ICU 3.8
+ */
+U_DRAFT void U_EXPORT2
+uset_freeze(USet *set);
+
+/**
+ * Clone the set and make the clone mutable.
+ * See the ICU4J Freezable interface for details.
+ * @param set the set
+ * @return the mutable clone
+ * @see uset_freeze
+ * @see uset_isFrozen
+ * @see uset_clone
+ * @draft ICU 3.8
+ */
+U_DRAFT USet * U_EXPORT2
+uset_cloneAsThawed(const USet *set);
+
+#endif /* U_HIDE_DRAFT_API */
+
 /**
 * Causes the USet object to represent the range <code>start - end</code>.
 * If <code>start > end</code> then this USet is set to an empty range.
+ * A frozen set will not be modified.
 * @param set the object to set to the given range
 * @param start first character in the set, inclusive
 * @param end last character in the set, inclusive
@ -196,6 +373,7 @@ uset_set(USet* set,
 * pattern. See the UnicodeSet class description for the syntax of 
 * the pattern language. See also the User Guide chapter about UnicodeSet.
 * <em>Empties the set passed before applying the pattern.</em>
+ * A frozen set will not be modified.
 * @param set               The set to which the pattern is to be applied. 
 * @param pattern           A pointer to UChar string specifying what characters are in the set.
 *                          The character at pattern[0] must be a '['.
@ -221,6 +399,7 @@ uset_applyPattern(USet *set,
 * Modifies the set to contain those code points which have the given value
 * for the given binary or enumerated property, as returned by
 * u_getIntPropertyValue.  Prior contents of this set are lost.
+ * A frozen set will not be modified.
 *
 * @param set the object to contain the code points defined by the property
 *
@ -246,6 +425,7 @@ uset_applyIntPropertyValue(USet* set,
 * Modifies the set to contain those code points which have the
 * given value for the given property.  Prior contents of this
 * set are lost.
+ * A frozen set will not be modified.
 *
 * @param set the object to contain the code points defined by the given
 * property and value alias
@ -319,6 +499,7 @@ uset_toPattern(const USet* set,
 /**
 * Adds the given character to the given USet.  After this call,
 * uset_contains(set, c) will return TRUE.
+ * A frozen set will not be modified.
 * @param set the object to which to add the character
 * @param c the character to add
 * @stable ICU 2.4
@ -332,6 +513,7 @@ uset_add(USet* set, UChar32 c);
 * modifies this set so that its value is the <i>union</i> of the two
 * sets.  The behavior of this operation is unspecified if the specified
 * collection is modified while the operation is in progress.
+ * A frozen set will not be modified.
 *
 * @param set the object to which to add the set
 * @param additionalSet the source set whose elements are to be added to this set.
@ -343,6 +525,7 @@ uset_addAll(USet* set, const USet *additionalSet);
 /**
 * Adds the given range of characters to the given USet.  After this call,
 * uset_contains(set, start, end) will return TRUE.
+ * A frozen set will not be modified.
 * @param set the object to which to add the character
 * @param start the first character of the range to add, inclusive
 * @param end the last character of the range to add, inclusive
@ -354,6 +537,7 @@ uset_addRange(USet* set, UChar32 start, UChar32 end);
 /**
 * Adds the given string to the given USet.  After this call,
 * uset_containsString(set, str, strLen) will return TRUE.
+ * A frozen set will not be modified.
 * @param set the object to which to add the character
 * @param str the string to add
 * @param strLen the length of the string or -1 if null terminated.
@ -365,6 +549,7 @@ uset_addString(USet* set, const UChar* str, int32_t strLen);
 /**
 * Adds each of the characters in this string to the set. Thus "ch" => {"c", "h"}
 * If this set already any particular character, it has no effect on that character.
+ * A frozen set will not be modified.
 * @param set the object to which to add the character
 * @param str the source string
 * @param strLen the length of the string or -1 if null terminated.
@ -376,6 +561,7 @@ uset_addAllCodePoints(USet* set, const UChar *str, int32_t strLen);
 /**
 * Removes the given character from the given USet.  After this call,
 * uset_contains(set, c) will return FALSE.
+ * A frozen set will not be modified.
 * @param set the object from which to remove the character
 * @param c the character to remove
 * @stable ICU 2.4
@ -386,6 +572,7 @@ uset_remove(USet* set, UChar32 c);
 /**
 * Removes the given range of characters from the given USet.  After this call,
 * uset_contains(set, start, end) will return FALSE.
+ * A frozen set will not be modified.
 * @param set the object to which to add the character
 * @param start the first character of the range to remove, inclusive
 * @param end the last character of the range to remove, inclusive
@ -397,6 +584,7 @@ uset_removeRange(USet* set, UChar32 start, UChar32 end);
 /**
 * Removes the given string to the given USet.  After this call,
 * uset_containsString(set, str, strLen) will return FALSE.
+ * A frozen set will not be modified.
 * @param set the object to which to add the character
 * @param str the string to remove
 * @param strLen the length of the string or -1 if null terminated.
@ -410,6 +598,7 @@ uset_removeString(USet* set, const UChar* str, int32_t strLen);
 * specified set.  This operation effectively modifies this
 * set so that its value is the <i>asymmetric set difference</i> of
 * the two sets.
+ * A frozen set will not be modified.
 * @param set the object from which the elements are to be removed
 * @param removeSet the object that defines which elements will be
 * removed from this set
@ -423,6 +612,7 @@ uset_removeAll(USet* set, const USet* removeSet);
 * specified range.  If <code>start > end</code> then an empty range is
 * retained, leaving the set empty.  This is equivalent to
 * a boolean logic AND, or a set INTERSECTION.
+ * A frozen set will not be modified.
 *
 * @param set the object for which to retain only the specified range
 * @param start first character, inclusive, of range to be retained
@ -440,6 +630,7 @@ uset_retain(USet* set, UChar32 start, UChar32 end);
 * its elements that are not contained in the specified set.  This
 * operation effectively modifies this set so that its value is
 * the <i>intersection</i> of the two sets.
+ * A frozen set will not be modified.
 *
 * @param set the object on which to perform the retain
 * @param retain set that defines which elements this set will retain
@ -451,6 +642,7 @@ uset_retainAll(USet* set, const USet* retain);
 /**
 * Reallocate this objects internal structures to take up the least
 * possible space, without changing this object's value.
+ * A frozen set will not be modified.
 *
 * @param set the object on which to perfrom the compact
 * @stable ICU 3.2
@ -462,6 +654,7 @@ uset_compact(USet* set);
 * Inverts this set.  This operation modifies this set so that
 * its value is its complement.  This operation does not affect
 * the multicharacter strings, if any.
+ * A frozen set will not be modified.
 * @param set the set
 * @stable ICU 2.4
 */
@ -472,6 +665,7 @@ uset_complement(USet* set);
 * Complements in this set all elements contained in the specified
 * set.  Any character in the other set will be removed if it is
 * in this set, or will be added if it is not in this set.
+ * A frozen set will not be modified.
 *
 * @param set the set with which to complement
 * @param complement set that defines which elements will be xor'ed
@ -484,6 +678,7 @@ uset_complementAll(USet* set, const USet* complement);
 /**
 * Removes all of the elements from this set.  This set will be
 * empty after this call returns.
+ * A frozen set will not be modified.
 * @param set the set
 * @stable ICU 2.4
 */
@ -502,6 +697,7 @@ uset_isEmpty(const USet* set);

 /**
 * Returns TRUE if the given USet contains the given character.
+ * This function works faster with a frozen set.
 * @param set the set
 * @param c The codepoint to check for within the set
 * @return true if set contains c
@ -651,6 +847,96 @@ uset_containsNone(const USet* set1, const USet* set2);
 U_STABLE UBool U_EXPORT2
 uset_containsSome(const USet* set1, const USet* set2);

+#ifndef U_HIDE_DRAFT_API
+
+/**
+ * Returns the length of the initial substring of the input string which
+ * consists only of characters and strings that are contained in this set
+ * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
+ * or only of characters and strings that are not contained
+ * in this set (USET_SPAN_NOT_CONTAINED).
+ * See USetSpanCondition for details.
+ * Similar to the strspn() C library function.
+ * Unpaired surrogates are treated according to contains() of their surrogate code points.
+ * This function works faster with a frozen set and with a non-negative string length argument.
+ * @param set the set
+ * @param s start of the string
+ * @param length of the string; can be -1 for NUL-terminated
+ * @spanCondition specifies the containment condition
+ * @return the length of the initial substring according to the spanCondition;
+ *         0 if the start of the string does not fit the spanCondition
+ * @draft ICU 3.8
+ * @see USetSpanCondition
+ */
+U_DRAFT int32_t U_EXPORT2
+uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition);
+
+/**
+ * Returns the start of the trailing substring of the input string which
+ * consists only of characters and strings that are contained in this set
+ * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
+ * or only of characters and strings that are not contained
+ * in this set (USET_SPAN_NOT_CONTAINED).
+ * See USetSpanCondition for details.
+ * Unpaired surrogates are treated according to contains() of their surrogate code points.
+ * This function works faster with a frozen set and with a non-negative string length argument.
+ * @param set the set
+ * @param s start of the string
+ * @param length of the string; can be -1 for NUL-terminated
+ * @spanCondition specifies the containment condition
+ * @return the start of the trailing substring according to the spanCondition;
+ *         the string length if the end of the string does not fit the spanCondition
+ * @draft ICU 3.8
+ * @see USetSpanCondition
+ */
+U_DRAFT int32_t U_EXPORT2
+uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition);
+
+/**
+ * Returns the length of the initial substring of the input string which
+ * consists only of characters and strings that are contained in this set
+ * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
+ * or only of characters and strings that are not contained
+ * in this set (USET_SPAN_NOT_CONTAINED).
+ * See USetSpanCondition for details.
+ * Similar to the strspn() C library function.
+ * Malformed byte sequences are treated according to contains(0xfffd).
+ * This function works faster with a frozen set and with a non-negative string length argument.
+ * @param set the set
+ * @param s start of the string (UTF-8)
+ * @param length of the string; can be -1 for NUL-terminated
+ * @spanCondition specifies the containment condition
+ * @return the length of the initial substring according to the spanCondition;
+ *         0 if the start of the string does not fit the spanCondition
+ * @draft ICU 3.8
+ * @see USetSpanCondition
+ */
+U_DRAFT int32_t U_EXPORT2
+uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition);
+
+/**
+ * Returns the start of the trailing substring of the input string which
+ * consists only of characters and strings that are contained in this set
+ * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
+ * or only of characters and strings that are not contained
+ * in this set (USET_SPAN_NOT_CONTAINED).
+ * See USetSpanCondition for details.
+ * Malformed byte sequences are treated according to contains(0xfffd).
+ * This function works faster with a frozen set and with a non-negative string length argument.
+ * @param set the set
+ * @param s start of the string (UTF-8)
+ * @param length of the string; can be -1 for NUL-terminated
+ * @spanCondition specifies the containment condition
+ * @return the start of the trailing substring according to the spanCondition;
+ *         the string length if the end of the string does not fit the spanCondition
+ * @draft ICU 3.8
+ * @see USetSpanCondition
+ */
+U_DRAFT int32_t U_EXPORT2
+uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition);
+
+#endif /* U_HIDE_DRAFT_API */
+
 /**
 * Returns true if set1 contains all of the characters and strings
 * of set2, and vis versa. It answers the question, 'Is set1 equal to set2?'
--- a/icu4c/source/common/uniset.cpp
+++ b/icu4c/source/common/uniset.cpp
@ -14,6 +14,7 @@
 #include "unicode/symtable.h"
 #include "ruleiter.h"
 #include "cmemory.h"
+#include "cstring.h"
 #include "uhash.h"
 #include "util.h"
 #include "uvector.h"
@ -21,6 +22,8 @@
 #include "ustrfmt.h"
 #include "uassert.h"
 #include "hash.h"
+#include "bmpset.h"
+#include "unisetspan.h"

 // Define UChar constants using hex for EBCDIC compatibility
 // Used #define to reduce private static exports and memory access time.
@ -138,8 +141,8 @@ static int8_t U_CALLCONV compareUnicodeString(UHashTok t1, UHashTok t2) {
 * Constructs an empty set.
 */
 UnicodeSet::UnicodeSet() :
-    len(1), capacity(1 + START_EXTRA), list(0), buffer(0),
-    bufferCapacity(0), patLen(0), pat(NULL), strings(NULL)
+    len(1), capacity(1 + START_EXTRA), list(0), bmpSet(0), buffer(0),
+    bufferCapacity(0), patLen(0), pat(NULL), strings(NULL), stringSpan(NULL)
 {
    list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity);
    if(list!=NULL){
@ -158,8 +161,8 @@ UnicodeSet::UnicodeSet() :
 * @param end last character, inclusive, of range
 */
 UnicodeSet::UnicodeSet(UChar32 start, UChar32 end) :
-    len(1), capacity(1 + START_EXTRA), list(0), buffer(0),
-    bufferCapacity(0), patLen(0), pat(NULL), strings(NULL)
+    len(1), capacity(1 + START_EXTRA), list(0), bmpSet(0), buffer(0),
+    bufferCapacity(0), patLen(0), pat(NULL), strings(NULL), stringSpan(NULL)
 {
    list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity);
    if(list!=NULL){
@ -177,8 +180,10 @@ UnicodeSet::UnicodeSet(UChar32 start, UChar32 end) :
 */
 UnicodeSet::UnicodeSet(const UnicodeSet& o) :
    UnicodeFilter(o),
-    len(0), capacity(o.len + GROW_EXTRA), list(0), buffer(0),
-    bufferCapacity(0), patLen(0), pat(NULL), strings(NULL)
+    len(0), capacity(o.isFrozen() ? o.len : o.len + GROW_EXTRA), list(0),
+    bmpSet(0),
+    buffer(0), bufferCapacity(0),
+    patLen(0), pat(NULL), strings(NULL), stringSpan(NULL)
 {
    list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity);
    if(list!=NULL){
@ -189,16 +194,41 @@ UnicodeSet::UnicodeSet(const UnicodeSet& o) :
    _dbgct(this);
 }

+// Copy-construct as thawed.
+UnicodeSet::UnicodeSet(const UnicodeSet& o, UBool /* asThawed */) :
+    UnicodeFilter(o),
+    len(0), capacity(o.len + GROW_EXTRA), list(0),
+    bmpSet(0),
+    buffer(0), bufferCapacity(0),
+    patLen(0), pat(NULL), strings(NULL), stringSpan(NULL)
+{
+    list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity);
+    if(list!=NULL){
+        UErrorCode status = U_ZERO_ERROR;
+        allocateStrings(status);
+        // *this = o except for bmpSet and stringSpan
+        len = o.len;
+        uprv_memcpy(list, o.list, len*sizeof(UChar32));
+        strings->assign(*o.strings, cloneUnicodeString, status);
+        if (o.pat) {
+            setPattern(UnicodeString(o.pat, o.patLen));
+        }
+    }
+    _dbgct(this);
+}
+
 /**
 * Destructs the set.
 */
 UnicodeSet::~UnicodeSet() {
    _dbgdt(this); // first!
    uprv_free(list);
+    delete bmpSet;
    if (buffer) {
        uprv_free(buffer);
    }
    delete strings;
+    delete stringSpan;
    releasePattern();
 }

@ -206,11 +236,24 @@ UnicodeSet::~UnicodeSet() {
 * Assigns this object to be a copy of another.
 */
 UnicodeSet& UnicodeSet::operator=(const UnicodeSet& o) {
+    if (isFrozen()) {
+        return *this;
+    }
    ensureCapacity(o.len);
    len = o.len;
    uprv_memcpy(list, o.list, len*sizeof(UChar32));
+    if (o.bmpSet == NULL) {
+        bmpSet = NULL;
+    } else {
+        bmpSet = new BMPSet(*o.bmpSet, list, len);
+    }
    UErrorCode ec = U_ZERO_ERROR;
    strings->assign(*o.strings, cloneUnicodeString, ec);
+    if (o.stringSpan == NULL) {
+        stringSpan = NULL;
+    } else {
+        stringSpan = new UnicodeSetStringSpan(*o.stringSpan, *strings);
+    }
    releasePattern();
    if (o.pat) {
        setPattern(UnicodeString(o.pat, o.patLen));
@ -218,6 +261,19 @@ UnicodeSet& UnicodeSet::operator=(const UnicodeSet& o) {
    return *this;
 }

+/**
+ * Returns a copy of this object.  All UnicodeMatcher objects have
+ * to support cloning in order to allow classes using
+ * UnicodeMatchers, such as Transliterator, to implement cloning.
+ */
+UnicodeFunctor* UnicodeSet::clone() const {
+    return new UnicodeSet(*this);
+}
+
+UnicodeFunctor *UnicodeSet::cloneAsThawed() const {
+    return new UnicodeSet(*this, TRUE);
+}
+
 /**
 * Compares the specified object with this set for equality.  Returns
 * <tt>true</tt> if the two sets
@ -237,15 +293,6 @@ UBool UnicodeSet::operator==(const UnicodeSet& o) const {
    return TRUE;
 }

-/**
- * Returns a copy of this object.  All UnicodeMatcher objects have
- * to support cloning in order to allow classes using
- * UnicodeMatchers, such as Transliterator, to implement cloning.
- */
-UnicodeFunctor* UnicodeSet::clone() const {
-    return new UnicodeSet(*this);
-}
-
 /**
 * Returns the hash code value for this set.
 *
@ -265,20 +312,6 @@ int32_t UnicodeSet::hashCode(void) const {
 // Public API
 //----------------------------------------------------------------

-/**
- * Make this object represent the range <code>start - end</code>.
- * If <code>end > start</code> then this object is set to an
- * an empty range.
- *
- * @param start first character in the set, inclusive
- * @rparam end last character in the set, inclusive
- */
-UnicodeSet& UnicodeSet::set(UChar32 start, UChar32 end) {
-    clear();
-    complement(start, end);
-    return *this;
-}
-
 /**
 * Returns the number of elements in this set (its cardinality),
 * Note than the elements of a set may include both individual
@ -317,11 +350,17 @@ UBool UnicodeSet::contains(UChar32 c) const {
    //for (;;) {
    //    if (c < list[++i]) break;
    //}
+    if (bmpSet != NULL) {
+        return bmpSet->contains(c);
+    }
+    if (stringSpan != NULL) {
+        return stringSpan->contains(c);
+    }
    if (c >= UNICODESET_HIGH) { // Don't need to check LOW bound
        return FALSE;
    }
    int32_t i = findCodePoint(c);
-    return ((i & 1) != 0); // return true if odd
+    return (UBool)(i & 1); // return true if odd
 }

 /**
@ -350,10 +389,10 @@ int32_t UnicodeSet::findCodePoint(UChar32 c) const {
        return 0;
    // High runner test.  c is often after the last range, so an
    // initial check for this condition pays off.
-    if (len >= 2 && c >= list[len-2])
-        return len-1;
    int32_t lo = 0;
    int32_t hi = len - 1;
+    if (lo >= hi || c >= list[hi-1])
+        return hi;
    // invariant: c >= list[lo]
    // invariant: c < list[hi]
    for (;;) {
@ -428,12 +467,8 @@ UBool UnicodeSet::containsAll(const UnicodeSet& c) const {
 * @return true if the test condition is met
 */
 UBool UnicodeSet::containsAll(const UnicodeString& s) const {
-    UChar32 cp;
-    for (int32_t i = 0; i < s.length(); i += UTF_CHAR_LENGTH(cp)) {
-        cp = s.char32At(i);
-        if (!contains(cp)) return FALSE;
-    }
-    return TRUE;
+    return (UBool)(span(s.getBuffer(), s.length(), USET_SPAN_CONTAINED) ==
+                   s.length());
 }

 /**
@ -479,12 +514,8 @@ UBool UnicodeSet::containsNone(const UnicodeSet& c) const {
 * @return true if the test condition is met
 */
 UBool UnicodeSet::containsNone(const UnicodeString& s) const {
-    UChar32 cp;
-    for (int32_t i = 0; i < s.length(); i += UTF_CHAR_LENGTH(cp)) {
-        cp = s.char32At(i);
-        if (contains(cp)) return FALSE;
-    }
-    return TRUE;
+    return (UBool)(span(s.getBuffer(), s.length(), USET_SPAN_NOT_CONTAINED) ==
+                   s.length());
 }

 /**
@ -723,6 +754,20 @@ UChar32 UnicodeSet::charAt(int32_t index) const {
    return (UChar32)-1;
 }

+/**
+ * Make this object represent the range <code>start - end</code>.
+ * If <code>end > start</code> then this object is set to an
+ * an empty range.
+ *
+ * @param start first character in the set, inclusive
+ * @rparam end last character in the set, inclusive
+ */
+UnicodeSet& UnicodeSet::set(UChar32 start, UChar32 end) {
+    clear();
+    complement(start, end);
+    return *this;
+}
+
 /**
 * Adds the specified range to this set if it is not already
 * present.  If this set already contains the specified range,
@ -777,7 +822,7 @@ UnicodeSet& UnicodeSet::add(UChar32 c) {
    int32_t i = findCodePoint(pinCodePoint(c));

    // already in set?
-    if ((i & 1) != 0) return *this;
+    if ((i & 1) != 0  || isFrozen()) return *this;

    // HIGH is 0x110000
    // assert(list[len-1] == HIGH);
@ -888,7 +933,7 @@ UnicodeSet& UnicodeSet::add(UChar32 c) {
 * @return the modified set, for chaining
 */
 UnicodeSet& UnicodeSet::add(const UnicodeString& s) {
-    if (s.length() == 0) return *this;
+    if (s.length() == 0 || isFrozen()) return *this;
    int32_t cp = getSingleCP(s);
    if (cp < 0) {
        if (!strings->contains((void*) &s)) {
@ -896,7 +941,7 @@ UnicodeSet& UnicodeSet::add(const UnicodeString& s) {
            releasePattern();
        }
    } else {
-        add((UChar32)cp, (UChar32)cp);
+        add((UChar32)cp);
    }
    return *this;
 }
@ -907,6 +952,9 @@ UnicodeSet& UnicodeSet::add(const UnicodeString& s) {
 * already be in 'strings'.
 */
 void UnicodeSet::_add(const UnicodeString& s) {
+    if (isFrozen()) {
+        return;
+    }
    UnicodeString* t = new UnicodeString(s);
    UErrorCode ec = U_ZERO_ERROR;
    strings->sortedInsert(t, compareUnicodeString, ec);
@ -942,7 +990,7 @@ UnicodeSet& UnicodeSet::addAll(const UnicodeString& s) {
    UChar32 cp;
    for (int32_t i = 0; i < s.length(); i += UTF_CHAR_LENGTH(cp)) {
        cp = s.char32At(i);
-        add(cp, cp);
+        add(cp);
    }
    return *this;
 }
@ -1070,7 +1118,7 @@ UnicodeSet& UnicodeSet::remove(UChar32 c) {
 * @return the modified set, for chaining
 */
 UnicodeSet& UnicodeSet::remove(const UnicodeString& s) {
-    if (s.length() == 0) return *this;
+    if (s.length() == 0 || isFrozen()) return *this;
    int32_t cp = getSingleCP(s);
    if (cp < 0) {
        strings->removeElement((void*) &s);
@ -1093,6 +1141,9 @@ UnicodeSet& UnicodeSet::remove(const UnicodeString& s) {
 * from this set.
 */
 UnicodeSet& UnicodeSet::complement(UChar32 start, UChar32 end) {
+    if (isFrozen()) {
+        return *this;
+    }
    if (pinCodePoint(start) <= pinCodePoint(end)) {
        UChar32 range[3] = { start, end+1, UNICODESET_HIGH };
        exclusiveOr(range, 2, 0);
@ -1110,6 +1161,9 @@ UnicodeSet& UnicodeSet::complement(UChar32 c) {
 * <code>complement(MIN_VALUE, MAX_VALUE)</code>.
 */
 UnicodeSet& UnicodeSet::complement(void) {
+    if (isFrozen()) {
+        return *this;
+    }
    if (list[0] == UNICODESET_LOW) {
        ensureBufferCapacity(len-1);
        uprv_memcpy(buffer, list + 1, (len-1)*sizeof(UChar32));
@ -1134,7 +1188,7 @@ UnicodeSet& UnicodeSet::complement(void) {
 * @return this object, for chaining
 */
 UnicodeSet& UnicodeSet::complement(const UnicodeString& s) {
-    if (s.length() == 0) return *this;
+    if (s.length() == 0 || isFrozen()) return *this;
    int32_t cp = getSingleCP(s);
    if (cp < 0) {
        if (strings->contains((void*) &s)) {
@ -1182,6 +1236,9 @@ UnicodeSet& UnicodeSet::addAll(const UnicodeSet& c) {
 * @param c set that defines which elements this set will retain.
 */
 UnicodeSet& UnicodeSet::retainAll(const UnicodeSet& c) {
+    if (isFrozen()) {
+        return *this;
+    }
    retain(c.list, c.len, 0);
    strings->retainAll(*c.strings);
    return *this;
@ -1197,6 +1254,9 @@ UnicodeSet& UnicodeSet::retainAll(const UnicodeSet& c) {
 *          this set.
 */
 UnicodeSet& UnicodeSet::removeAll(const UnicodeSet& c) {
+    if (isFrozen()) {
+        return *this;
+    }
    retain(c.list, c.len, 2);
    strings->removeAll(*c.strings);
    return *this;
@ -1211,6 +1271,9 @@ UnicodeSet& UnicodeSet::removeAll(const UnicodeSet& c) {
 *          this set.
 */
 UnicodeSet& UnicodeSet::complementAll(const UnicodeSet& c) {
+    if (isFrozen()) {
+        return *this;
+    }
    exclusiveOr(c.list, c.len, 0);

    for (int32_t i=0; i<c.strings->size(); ++i) {
@ -1227,6 +1290,9 @@ UnicodeSet& UnicodeSet::complementAll(const UnicodeSet& c) {
 * empty after this call returns.
 */
 UnicodeSet& UnicodeSet::clear(void) {
+    if (isFrozen()) {
+        return *this;
+    }
    list[0] = UNICODESET_HIGH;
    len = 1;
    releasePattern();
@ -1277,9 +1343,14 @@ const UnicodeString* UnicodeSet::getString(int32_t index) const {
 * possible space, without changing this object's value.
 */
 UnicodeSet& UnicodeSet::compact() {
+    if (isFrozen()) {
+        return *this;
+    }
    // Delete buffer first to defragment memory less.
-    uprv_free(buffer);
-    buffer = NULL;
+    if (buffer != NULL) {
+        uprv_free(buffer);
+        buffer = NULL;
+    }
    if (len < capacity) {
        // Make the capacity equal to len or 1.
        // We don't want to realloc of 0 size.
@ -1437,6 +1508,9 @@ static inline UChar32 max(UChar32 a, UChar32 b) {
 // polarity = 1, 2: x xor ~y == x === y

 void UnicodeSet::exclusiveOr(const UChar32* other, int32_t otherLen, int8_t polarity) {
+    if (isFrozen()) {
+        return;
+    }
    ensureBufferCapacity(len + otherLen);
    int32_t i = 0, j = 0, k = 0;
    UChar32 a = list[i++];
@ -1479,6 +1553,9 @@ void UnicodeSet::exclusiveOr(const UChar32* other, int32_t otherLen, int8_t pola
 // polarity = 3: ~x union ~y

 void UnicodeSet::add(const UChar32* other, int32_t otherLen, int8_t polarity) {
+    if (isFrozen()) {
+        return;
+    }
    ensureBufferCapacity(len + otherLen);
    int32_t i = 0, j = 0, k = 0;
    UChar32 a = list[i++];
@ -1584,6 +1661,9 @@ void UnicodeSet::add(const UChar32* other, int32_t otherLen, int8_t polarity) {
 // polarity = 3: ~x intersect ~y

 void UnicodeSet::retain(const UChar32* other, int32_t otherLen, int8_t polarity) {
+    if (isFrozen()) {
+        return;
+    }
    ensureBufferCapacity(len + otherLen);
    int32_t i = 0, j = 0, k = 0;
    UChar32 a = list[i++];
@ -1864,4 +1944,199 @@ void UnicodeSet::setPattern(const UnicodeString& newPat) {
    // We can regenerate an equivalent pattern later when requested.
 }

+UnicodeFunctor *UnicodeSet::freeze() {
+    if(!isFrozen()) {
+        // Do most of what compact() does before freezing because
+        // compact() will not work when the set is frozen.
+        // Small modification: Don't shrink if the savings would be tiny (<=GROW_EXTRA).
+
+        // Delete buffer first to defragment memory less.
+        if (buffer != NULL) {
+            uprv_free(buffer);
+            buffer = NULL;
+        }
+        if (capacity > (len + GROW_EXTRA)) {
+            // Make the capacity equal to len or 1.
+            // We don't want to realloc of 0 size.
+            capacity = len + (len == 0);
+            list = (UChar32*) uprv_realloc(list, sizeof(UChar32) * capacity);
+        }
+
+        // Optimize contains() and span() and similar functions.
+        if (!strings->isEmpty()) {
+            stringSpan = new UnicodeSetStringSpan(*this, *strings, UnicodeSetStringSpan::ALL);
+            if (stringSpan != NULL && !stringSpan->needsStringSpanUTF16()) {
+                // All strings are irrelevant for span() etc. because
+                // all of each string's code points are contained in this set.
+                // Do not check needsStringSpanUTF8() because UTF-8 has at most as
+                // many relevant strings as UTF-16.
+                // (Thus needsStringSpanUTF8() implies needsStringSpanUTF16().)
+                delete stringSpan;
+                stringSpan = NULL;
+            }
+        }
+        if (stringSpan == NULL) {
+            // No span-relevant strings: Optimize for code point spans.
+            bmpSet=new BMPSet(list, len);
+        }
+    }
+    return this;
+}
+
+int32_t UnicodeSet::span(const UChar *s, int32_t length, USetSpanCondition spanCondition) const {
+    if(length>0 && bmpSet!=NULL) {
+        return (int32_t)(bmpSet->span(s, s+length, spanCondition)-s);
+    }
+    if(length<0) {
+        length=u_strlen(s);
+    }
+    if(length==0) {
+        return 0;
+    }
+    if(stringSpan!=NULL) {
+        return stringSpan->span(s, length, spanCondition);
+    } else if(!strings->isEmpty()) {
+        uint32_t which= spanCondition==USET_SPAN_NOT_CONTAINED ?
+                            UnicodeSetStringSpan::FWD_UTF16_NOT_CONTAINED :
+                            UnicodeSetStringSpan::FWD_UTF16_CONTAINED;
+        UnicodeSetStringSpan strSpan(*this, *strings, which);
+        if(strSpan.needsStringSpanUTF16()) {
+            return strSpan.span(s, length, spanCondition);
+        }
+    }
+
+    if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
+        spanCondition=USET_SPAN_CONTAINED;  // Pin to 0/1 values.
+    }
+
+    UChar32 c;
+    int32_t start=0, prev=0;
+    do {
+        U16_NEXT(s, start, length, c);
+        if(spanCondition!=contains(c)) {
+            break;
+        }
+    } while((prev=start)<length);
+    return prev;
+}
+
+int32_t UnicodeSet::spanBack(const UChar *s, int32_t length, USetSpanCondition spanCondition) const {
+    if(length>0 && bmpSet!=NULL) {
+        return (int32_t)(bmpSet->spanBack(s, s+length, spanCondition)-s);
+    }
+    if(length<0) {
+        length=u_strlen(s);
+    }
+    if(length==0) {
+        return 0;
+    }
+    if(stringSpan!=NULL) {
+        return stringSpan->spanBack(s, length, spanCondition);
+    } else if(!strings->isEmpty()) {
+        uint32_t which= spanCondition==USET_SPAN_NOT_CONTAINED ?
+                            UnicodeSetStringSpan::BACK_UTF16_NOT_CONTAINED :
+                            UnicodeSetStringSpan::BACK_UTF16_CONTAINED;
+        UnicodeSetStringSpan strSpan(*this, *strings, which);
+        if(strSpan.needsStringSpanUTF16()) {
+            return strSpan.spanBack(s, length, spanCondition);
+        }
+    }
+
+    if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
+        spanCondition=USET_SPAN_CONTAINED;  // Pin to 0/1 values.
+    }
+
+    UChar32 c;
+    int32_t prev=length;
+    do {
+        U16_PREV(s, 0, length, c);
+        if(spanCondition!=contains(c)) {
+            break;
+        }
+    } while((prev=length)>0);
+    return prev;
+}
+
+int32_t UnicodeSet::spanUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const {
+    if(length>0 && bmpSet!=NULL) {
+        const uint8_t *s0=(const uint8_t *)s;
+        return (int32_t)(bmpSet->spanUTF8(s0, length, spanCondition)-s0);
+    }
+    if(length<0) {
+        length=uprv_strlen(s);
+    }
+    if(length==0) {
+        return 0;
+    }
+    if(stringSpan!=NULL) {
+        return stringSpan->spanUTF8((const uint8_t *)s, length, spanCondition);
+    } else if(!strings->isEmpty()) {
+        uint32_t which= spanCondition==USET_SPAN_NOT_CONTAINED ?
+                            UnicodeSetStringSpan::FWD_UTF8_NOT_CONTAINED :
+                            UnicodeSetStringSpan::FWD_UTF8_CONTAINED;
+        UnicodeSetStringSpan strSpan(*this, *strings, which);
+        if(strSpan.needsStringSpanUTF8()) {
+            return strSpan.spanUTF8((const uint8_t *)s, length, spanCondition);
+        }
+    }
+
+    if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
+        spanCondition=USET_SPAN_CONTAINED;  // Pin to 0/1 values.
+    }
+
+    UChar32 c;
+    int32_t start=0, prev=0;
+    do {
+        U8_NEXT(s, start, length, c);
+        if(c<0) {
+            c=0xfffd;
+        }
+        if(spanCondition!=contains(c)) {
+            break;
+        }
+    } while((prev=start)<length);
+    return prev;
+}
+
+int32_t UnicodeSet::spanBackUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const {
+    if(length>0 && bmpSet!=NULL) {
+        const uint8_t *s0=(const uint8_t *)s;
+        return bmpSet->spanBackUTF8(s0, length, spanCondition);
+    }
+    if(length<0) {
+        length=uprv_strlen(s);
+    }
+    if(length==0) {
+        return 0;
+    }
+    if(stringSpan!=NULL) {
+        return stringSpan->spanBackUTF8((const uint8_t *)s, length, spanCondition);
+    } else if(!strings->isEmpty()) {
+        uint32_t which= spanCondition==USET_SPAN_NOT_CONTAINED ?
+                            UnicodeSetStringSpan::BACK_UTF8_NOT_CONTAINED :
+                            UnicodeSetStringSpan::BACK_UTF8_CONTAINED;
+        UnicodeSetStringSpan strSpan(*this, *strings, which);
+        if(strSpan.needsStringSpanUTF8()) {
+            return strSpan.spanBackUTF8((const uint8_t *)s, length, spanCondition);
+        }
+    }
+
+    if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
+        spanCondition=USET_SPAN_CONTAINED;  // Pin to 0/1 values.
+    }
+
+    UChar32 c;
+    int32_t prev=length;
+    do {
+        U8_PREV(s, 0, length, c);
+        if(c<0) {
+            c=0xfffd;
+        }
+        if(spanCondition!=contains(c)) {
+            break;
+        }
+    } while((prev=length)>0);
+    return prev;
+}
+
 U_NAMESPACE_END
--- a/icu4c/source/common/uniset_props.cpp
+++ b/icu4c/source/common/uniset_props.cpp
@ -248,8 +248,8 @@ isPOSIXClose(const UnicodeString &pattern, int32_t pos) {
 */
 UnicodeSet::UnicodeSet(const UnicodeString& pattern,
                       UErrorCode& status) :
-    len(0), capacity(START_EXTRA), list(0), buffer(0),
-    bufferCapacity(0), patLen(0), pat(NULL), strings(NULL)
+    len(0), capacity(START_EXTRA), list(0), bmpSet(0), buffer(0),
+    bufferCapacity(0), patLen(0), pat(NULL), strings(NULL), stringSpan(NULL)
 {   
    if(U_SUCCESS(status)){
        list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity);
@ -276,8 +276,8 @@ UnicodeSet::UnicodeSet(const UnicodeString& pattern,
                       uint32_t options,
                       const SymbolTable* symbols,
                       UErrorCode& status) :
-    len(0), capacity(START_EXTRA), list(0), buffer(0),
-    bufferCapacity(0), patLen(0), pat(NULL), strings(NULL)
+    len(0), capacity(START_EXTRA), list(0), bmpSet(0), buffer(0),
+    bufferCapacity(0), patLen(0), pat(NULL), strings(NULL), stringSpan(NULL)
 {   
    if(U_SUCCESS(status)){
        list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity);
@ -296,8 +296,8 @@ UnicodeSet::UnicodeSet(const UnicodeString& pattern, ParsePosition& pos,
                       uint32_t options,
                       const SymbolTable* symbols,
                       UErrorCode& status) :
-    len(0), capacity(START_EXTRA), list(0), buffer(0),
-    bufferCapacity(0), patLen(0), pat(NULL), strings(NULL)
+    len(0), capacity(START_EXTRA), list(0), bmpSet(0), buffer(0),
+    bufferCapacity(0), patLen(0), pat(NULL), strings(NULL), stringSpan(NULL)
 {
    if(U_SUCCESS(status)){
        list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity);
@ -348,7 +348,7 @@ UnicodeSet& UnicodeSet::applyPattern(const UnicodeString& pattern,
                                     uint32_t options,
                                     const SymbolTable* symbols,
                                     UErrorCode& status) {
-    if (U_FAILURE(status)) {
+    if (U_FAILURE(status) || isFrozen()) {
        return *this;
    }

@ -374,7 +374,7 @@ UnicodeSet& UnicodeSet::applyPattern(const UnicodeString& pattern,
                              uint32_t options,
                              const SymbolTable* symbols,
                              UErrorCode& status) {
-    if (U_FAILURE(status)) {
+    if (U_FAILURE(status) || isFrozen()) {
        return *this;
    }
    // Need to build the pattern in a temporary string because
@ -938,7 +938,7 @@ static UBool mungeCharName(char* dst, const char* src, int32_t dstCapacity) {

 UnicodeSet&
 UnicodeSet::applyIntPropertyValue(UProperty prop, int32_t value, UErrorCode& ec) {
-    if (U_FAILURE(ec)) return *this;
+    if (U_FAILURE(ec) || isFrozen()) return *this;

    if (prop == UCHAR_GENERAL_CATEGORY_MASK) {
        applyFilter(generalCategoryMaskFilter, &value, UPROPS_SRC_CHAR, ec);
@ -953,7 +953,7 @@ UnicodeSet&
 UnicodeSet::applyPropertyAlias(const UnicodeString& prop,
                               const UnicodeString& value,
                               UErrorCode& ec) {
-    if (U_FAILURE(ec)) return *this;
+    if (U_FAILURE(ec) || isFrozen()) return *this;

    // prop and value used to be converted to char * using the default
    // converter instead of the invariant conversion.
@ -1293,6 +1293,9 @@ addCaseMapping(UnicodeSet &set, int32_t result, const UChar *full, UnicodeString
 }

 UnicodeSet& UnicodeSet::closeOver(int32_t attribute) {
+    if (isFrozen()) {
+        return *this;
+    }
    if (attribute & (USET_CASE_INSENSITIVE | USET_ADD_CASE_MAPPINGS)) {
        UErrorCode status = U_ZERO_ERROR;
        const UCaseProps *csp = ucase_getSingleton(&status);
--- a/icu4c/source/common/unisetspan.cpp
+++ b/icu4c/source/common/unisetspan.cpp
--- a/icu4c/source/common/unisetspan.h
+++ b/icu4c/source/common/unisetspan.h
@ -0,0 +1,155 @@
+/*
+******************************************************************************
+*
+*   Copyright (C) 2007, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+******************************************************************************
+*   file name:  unisetspan.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2007mar01
+*   created by: Markus W. Scherer
+*/
+
+#ifndef __UNISETSPAN_H__
+#define __UNISETSPAN_H__
+
+#include "unicode/utypes.h"
+#include "unicode/uniset.h"
+
+U_NAMESPACE_BEGIN
+
+/*
+ * Implement span() etc. for a set with strings.
+ * Avoid recursion because of its exponential complexity.
+ * Instead, try multiple paths at once and track them with an IndexList.
+ */
+class UnicodeSetStringSpan : public UMemory {
+public:
+    /*
+     * Which span() variant will be used?
+     * The object is either built for one variant and used once,
+     * or built for all and may be used many times.
+     */
+    enum {
+        FWD             = 0x20,
+        BACK            = 0x10,
+        UTF16           = 8,
+        UTF8            = 4,
+        CONTAINED       = 2,
+        NOT_CONTAINED   = 1,
+
+        ALL             = 0x3f,
+
+        FWD_UTF16_CONTAINED     = FWD  | UTF16 |     CONTAINED,
+        FWD_UTF16_NOT_CONTAINED = FWD  | UTF16 | NOT_CONTAINED,
+        FWD_UTF8_CONTAINED      = FWD  | UTF8  |     CONTAINED,
+        FWD_UTF8_NOT_CONTAINED  = FWD  | UTF8  | NOT_CONTAINED,
+        BACK_UTF16_CONTAINED    = BACK | UTF16 |     CONTAINED,
+        BACK_UTF16_NOT_CONTAINED= BACK | UTF16 | NOT_CONTAINED,
+        BACK_UTF8_CONTAINED     = BACK | UTF8  |     CONTAINED,
+        BACK_UTF8_NOT_CONTAINED = BACK | UTF8  | NOT_CONTAINED
+    };
+
+    UnicodeSetStringSpan(const UnicodeSet &set, const UVector &setStrings, uint32_t which);
+
+    // Copy constructor. Assumes which==ALL for a frozen set.
+    UnicodeSetStringSpan(const UnicodeSetStringSpan &otherStringSpan, const UVector &newParentSetStrings);
+
+    ~UnicodeSetStringSpan();
+
+    /*
+     * Do the strings need to be checked in span() etc.?
+     * @return TRUE if strings need to be checked (call span() here),
+     *         FALSE if not (use a BMPSet for best performance).
+     */
+    inline UBool needsStringSpanUTF16();
+    inline UBool needsStringSpanUTF8();
+
+    // For fast UnicodeSet::contains(c).
+    inline UBool contains(UChar32 c) const;
+
+    int32_t span(const UChar *s, int32_t length, USetSpanCondition spanCondition) const;
+
+    int32_t spanBack(const UChar *s, int32_t length, USetSpanCondition spanCondition) const;
+
+    int32_t spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const;
+
+    int32_t spanBackUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const;
+
+private:
+    // Special spanLength byte values.
+    enum {
+        // The spanLength is >=0xfe.
+        LONG_SPAN=0xfe,
+        // All code points in the string are contained in the parent set.
+        ALL_CP_CONTAINED=0xff
+    };
+
+    // Add a starting or ending string character to the spanNotSet
+    // so that a character span ends before any string.
+    void addToSpanNotSet(UChar32 c);
+
+    int32_t spanNot(const UChar *s, int32_t length) const;
+    int32_t spanNotBack(const UChar *s, int32_t length) const;
+    int32_t spanNotUTF8(const uint8_t *s, int32_t length) const;
+    int32_t spanNotBackUTF8(const uint8_t *s, int32_t length) const;
+
+    // Set for span(). Same as parent but without strings.
+    UnicodeSet spanSet;
+
+    // Set for span(not contained).
+    // Same as spanSet, plus characters that start or end strings.
+    UnicodeSet *pSpanNotSet;
+
+    // The strings of the parent set.
+    const UVector &strings;
+
+    // Pointer to the UTF-8 string lengths.
+    // Also pointer to further allocated storage for meta data and
+    // UTF-8 string contents as necessary.
+    int32_t *utf8Lengths;
+
+    // Pointer to the part of the (utf8Lengths) memory block that stores
+    // the lengths of span(), spanBack() etc. for each string.
+    uint8_t *spanLengths;
+
+    // Pointer to the part of the (utf8Lengths) memory block that stores
+    // the UTF-8 versions of the parent set's strings.
+    uint8_t *utf8;
+
+    // Number of bytes for all UTF-8 versions of strings together.
+    int32_t utf8Length;
+
+    // Maximum lengths of relevant strings.
+    int32_t maxLength16;
+    int32_t maxLength8;
+
+    // Set up for all variants of span()?
+    UBool all;
+
+    // Memory for small numbers and lengths of strings.
+    // For example, for 8 strings:
+    // 8 UTF-8 lengths, 8*4 bytes span lengths, 8*2 3-byte UTF-8 characters
+    // = 112 bytes = int32_t[28].
+    int32_t staticLengths[32];
+};
+
+UBool UnicodeSetStringSpan::needsStringSpanUTF16() {
+    return (UBool)(maxLength16!=0);
+}
+
+UBool UnicodeSetStringSpan::needsStringSpanUTF8() {
+    return (UBool)(maxLength8!=0);
+}
+
+UBool UnicodeSetStringSpan::contains(UChar32 c) const {
+    return spanSet.contains(c);
+}
+
+U_NAMESPACE_END
+
+#endif
--- a/icu4c/source/common/uset.cpp
+++ b/icu4c/source/common/uset.cpp
@ -1,7 +1,7 @@
 /*
 *******************************************************************************
 *
-*   Copyright (C) 2002-2006, International Business Machines
+*   Copyright (C) 2002-2007, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 *******************************************************************************
@ -41,6 +41,26 @@ uset_close(USet* set) {
    delete (UnicodeSet*) set;
 }

+U_DRAFT USet * U_EXPORT2
+uset_clone(const USet *set) {
+    return (USet*) (((UnicodeSet*) set)->UnicodeSet::clone());
+}
+
+U_DRAFT UBool U_EXPORT2
+uset_isFrozen(const USet *set) {
+    return ((UnicodeSet*) set)->UnicodeSet::isFrozen();
+}
+
+U_DRAFT void U_EXPORT2
+uset_freeze(USet *set) {
+    ((UnicodeSet*) set)->UnicodeSet::freeze();
+}
+
+U_DRAFT USet * U_EXPORT2
+uset_cloneAsThawed(const USet *set) {
+    return (USet*) (((UnicodeSet*) set)->UnicodeSet::cloneAsThawed());
+}
+
 U_CAPI void U_EXPORT2
 uset_set(USet* set,
     UChar32 start, UChar32 end) {
@ -64,12 +84,8 @@ uset_addRange(USet* set, UChar32 start, UChar32 end) {

 U_CAPI void U_EXPORT2
 uset_addString(USet* set, const UChar* str, int32_t strLen) {
-    // WRONG! Do not alias, it will stay aliased, even after 
-    // copying. TODO: do we need a copy ctor that unaliases
-    //UnicodeString s(strLen==-1, str, strLen);
-
    // UnicodeString handles -1 for strLen
-    UnicodeString s(str, strLen);
+    UnicodeString s(strLen<0, str, strLen);
    ((UnicodeSet*) set)->UnicodeSet::add(s);
 }

@ -174,6 +190,26 @@ uset_containsSome(const USet* set1, const USet* set2) {
    return ((const UnicodeSet*) set1)->UnicodeSet::containsSome(* (const UnicodeSet*) set2);
 }

+U_DRAFT int32_t U_EXPORT2
+uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition) {
+    return ((UnicodeSet*) set)->UnicodeSet::span(s, length, spanCondition);
+}
+
+U_DRAFT int32_t U_EXPORT2
+uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition) {
+    return ((UnicodeSet*) set)->UnicodeSet::spanBack(s, length, spanCondition);
+}
+
+U_DRAFT int32_t U_EXPORT2
+uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition) {
+    return ((UnicodeSet*) set)->UnicodeSet::spanUTF8(s, length, spanCondition);
+}
+
+U_DRAFT int32_t U_EXPORT2
+uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition) {
+    return ((UnicodeSet*) set)->UnicodeSet::spanBackUTF8(s, length, spanCondition);
+}
+
 U_CAPI UBool U_EXPORT2
 uset_equals(const USet* set1, const USet* set2) {
    return *(const UnicodeSet*)set1 == *(const UnicodeSet*)set2;
--- a/icu4c/source/test/cintltst/usettest.c
+++ b/icu4c/source/test/cintltst/usettest.c
@ -1,6 +1,6 @@
 /*
 **********************************************************************
-* Copyright (c) 2002-2005, International Business Machines
+* Copyright (c) 2002-2007, International Business Machines
 * Corporation and others.  All Rights Reserved.
 **********************************************************************
 */
@ -19,6 +19,8 @@ static void Testj2269(void);
 static void TestSerialized(void);
 static void TestNonInvariantPattern(void);
 static void TestBadPattern(void);
+static void TestFreezable(void);
+static void TestSpan(void);

 void addUSetTest(TestNode** root);

@ -40,6 +42,8 @@ addUSetTest(TestNode** root) {
    TEST(TestSerialized);
    TEST(TestNonInvariantPattern);
    TEST(TestBadPattern);
+    TEST(TestFreezable);
+    TEST(TestSpan);
 }

 /*------------------------------------------------------------------
@ -529,4 +533,80 @@ static void TestBadPattern(void) {
    }
 }

+static USet *openIDSet() {
+    UErrorCode errorCode = U_ZERO_ERROR;
+    U_STRING_DECL(pattern, "[:ID_Continue:]", 15);
+    U_STRING_INIT(pattern, "[:ID_Continue:]", 15);
+    return uset_openPattern(pattern, 15, &errorCode);
+}
+
+static void TestFreezable() {
+    USet *idSet=openIDSet();
+    USet *frozen=uset_clone(idSet);
+    USet *thawed;
+    if(!uset_equals(frozen, idSet)) {
+        log_err("uset_clone() did not make an equal copy\n");
+    }
+    uset_freeze(frozen);
+    uset_addRange(frozen, 0xd802, 0xd805);
+    if(uset_isFrozen(idSet) || !uset_isFrozen(frozen) || !uset_equals(frozen, idSet)) {
+        log_err("uset_freeze() or uset_isFrozen() does not work\n");
+    }
+    thawed=uset_cloneAsThawed(frozen);
+    uset_addRange(thawed, 0xd802, 0xd805);
+    if(uset_isFrozen(thawed) || uset_equals(thawed, idSet) || !uset_containsRange(thawed, 0xd802, 0xd805)) {
+        log_err("uset_cloneAsThawed() does not work\n");
+    }
+    uset_close(idSet);
+    uset_close(frozen);
+    uset_close(thawed);
+}
+
+static void TestSpan() {
+    static const UChar s16[2]={ 0xe01, 0x3000 };
+    static const char* s8="\xE0\xB8\x81\xE3\x80\x80";
+
+    USet *idSet=openIDSet();
+
+    if(
+        1!=uset_span(idSet, s16, 2, USET_SPAN_CONTAINED) ||
+        0!=uset_span(idSet, s16, 2, USET_SPAN_NOT_CONTAINED) ||
+        2!=uset_spanBack(idSet, s16, 2, USET_SPAN_CONTAINED) ||
+        1!=uset_spanBack(idSet, s16, 2, USET_SPAN_NOT_CONTAINED)
+    ) {
+        log_err("uset_span() or uset_spanBack() does not work\n");
+    }
+
+    if(
+        3!=uset_spanUTF8(idSet, s8, 6, USET_SPAN_CONTAINED) ||
+        0!=uset_spanUTF8(idSet, s8, 6, USET_SPAN_NOT_CONTAINED) ||
+        6!=uset_spanBackUTF8(idSet, s8, 6, USET_SPAN_CONTAINED) ||
+        3!=uset_spanBackUTF8(idSet, s8, 6, USET_SPAN_NOT_CONTAINED)
+    ) {
+        log_err("uset_spanUTF8() or uset_spanBackUTF8() does not work\n");
+    }
+
+    uset_freeze(idSet);
+
+    if(
+        1!=uset_span(idSet, s16, 2, USET_SPAN_CONTAINED) ||
+        0!=uset_span(idSet, s16, 2, USET_SPAN_NOT_CONTAINED) ||
+        2!=uset_spanBack(idSet, s16, 2, USET_SPAN_CONTAINED) ||
+        1!=uset_spanBack(idSet, s16, 2, USET_SPAN_NOT_CONTAINED)
+    ) {
+        log_err("uset_span(frozen) or uset_spanBack(frozen) does not work\n");
+    }
+
+    if(
+        3!=uset_spanUTF8(idSet, s8, 6, USET_SPAN_CONTAINED) ||
+        0!=uset_spanUTF8(idSet, s8, 6, USET_SPAN_NOT_CONTAINED) ||
+        6!=uset_spanBackUTF8(idSet, s8, 6, USET_SPAN_CONTAINED) ||
+        3!=uset_spanBackUTF8(idSet, s8, 6, USET_SPAN_NOT_CONTAINED)
+    ) {
+        log_err("uset_spanUTF8(frozen) or uset_spanBackUTF8(frozen) does not work\n");
+    }
+
+    uset_close(idSet);
+}
+
 /*eof*/
--- a/icu4c/source/test/intltest/ittrans.cpp
+++ b/icu4c/source/test/intltest/ittrans.cpp
@ -1,6 +1,6 @@
 /***************************************************************************
 *
-*   Copyright (C) 2000-2004, International Business Machines
+*   Copyright (C) 2000-2007, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 ************************************************************************
@ -21,7 +21,6 @@
 #include "transapi.h"
 #include "cpdtrtst.h"
 #include "transrt.h"
-#include "usettest.h"
 #include "jamotest.h"
 #include "trnserr.h"
 #include "reptest.h"
@ -29,7 +28,7 @@
 #define CASE(id,test) case id:                                \
                          name = #test;                       \
                          if (exec) {                         \
-                              logln(#test "---"); logln("");  \
+                              logln(#test "---"); logln();    \
                              test t;                         \
                              callTest(t, par);               \
                          }                                   \
@ -43,12 +42,11 @@ void IntlTestTransliterator::runIndexedTest( int32_t index, UBool exec, const ch
        CASE(1, TransliteratorAPITest);
        CASE(2, CompoundTransliteratorTest);
        CASE(3, TransliteratorRoundTripTest);
-        CASE(4, UnicodeSetTest);
-        CASE(5, JamoTest);
-        CASE(6, TransliteratorErrorTest);
-        CASE(7, ReplaceableTest);
+        CASE(4, JamoTest);
+        CASE(5, TransliteratorErrorTest);
+        CASE(6, ReplaceableTest);
 #if !UCONFIG_NO_TRANSLITERATION && defined(U_USE_UNICODE_FILTER_LOGIC_OBSOLETE_2_8)
-        CASE(10, UnicodeFilterLogicTest);
+        CASE(7, UnicodeFilterLogicTest);
 #endif

        default: name=""; break;
--- a/icu4c/source/test/intltest/itutil.cpp
+++ b/icu4c/source/test/intltest/itutil.cpp
@ -1,6 +1,6 @@
 /********************************************************************
 * COPYRIGHT: 
- * Copyright (c) 1997-2005, International Business Machines Corporation and
+ * Copyright (c) 1997-2007, International Business Machines Corporation and
 * others. All Rights Reserved.
 ********************************************************************/

@ -25,141 +25,41 @@
 #include "v32test.h"
 #include "uvectest.h" 
 #include "aliastst.h"
+#include "usettest.h"
 //#include "custrtest.h"
 //#include "ccitrtst.h"
 //#include "cloctest.h"
 //#include "ctres.h"
 //#include "ctucd.h"

+#define CASE(id, test) case id:                               \
+                          name = #test;                       \
+                          if (exec) {                         \
+                              logln(#test "---"); logln();    \
+                              test t;                         \
+                              callTest(t, par);               \
+                          }                                   \
+                          break
+
 void IntlTestUtilities::runIndexedTest( int32_t index, UBool exec, const char* &name, char* par )
 {
    if (exec) logln("TestSuite Utilities: ");
    switch (index) {
-        case 0:
-            name = "MultithreadTest"; 
-            if (exec) {
-                logln("MultithreadTest---"); logln("");
-                MultithreadTest test;
-                callTest( test, par );
-            }
-            break;
-
-        case 1:
-            name = "StringTest"; 
-            if (exec) {
-                logln("StringTest---"); logln("");
-                StringTest test;
-                callTest( test, par );
-            }
-            break;
-
-        case 2:
-            name = "UnicodeStringTest"; 
-            if (exec) {
-                logln("UnicodeStringTest---"); logln("");
-                UnicodeStringTest test;
-                callTest( test, par );
-            }
-            break;
-
-        case 3:
-            name = "LocaleTest"; 
-            if (exec) {
-                logln("LocaleTest---"); logln("");
-                LocaleTest test;
-                callTest( test, par );
-            }
-            break;
-
-        case 4:
-            name = "CharIterTest"; 
-            if (exec) {
-                logln("CharIterTest---"); logln("");
-                CharIterTest test;
-                callTest( test, par );
-            }
-            break;
-
-        case 5:
-            name = "UnicodeTest"; 
-            if (exec) {
-                logln("UnicodeTest---"); logln("");
-                UnicodeTest test;
-                callTest( test, par );
-            }
-            break;
-
-        case 6:
-            name = "ResourceBundleTest"; 
-            if (exec) {
-                logln("ResourceBundleTest---"); logln("");
-                ResourceBundleTest test;
-                callTest( test, par );
-            }
-            break;
-        case 7:
-            name = "NewResourceBundleTest"; 
-            if (exec) {
-                logln("NewResourceBundleTest---"); logln("");
-                NewResourceBundleTest test;
-                callTest( test, par );
-            }
-            break;
-
-        case 8:
-            name = "PUtilTest"; 
-            if (exec) {
-                logln("PUtilTest---"); logln("");
-                PUtilTest test;
-                callTest( test, par );
-            }
-            break;
-            
-        case 9:
-            name = "UObjectTest";
-            if(exec) {
-                logln ("UObjectTest---"); logln("");
-                UObjectTest test;
-                callTest( test, par );
-            }
-            break;;
-
-        case 10:
-            name = "UVector32Test";
-            if(exec) {
-                logln ("UVector32Test---"); logln("");
-                UVector32Test test;
-                callTest( test, par );
-            }
-            break;;
-
-        case 11:
-            name = "UVectorTest";
-            if(exec) {
-                logln ("UVectorTest---"); logln("");
-                UVectorTest test;
-                callTest( test, par );
-            }
-            break;;
-
-        case 12:
-            name = "UTextTest";
-            if(exec) {
-                logln ("UTextTest---"); logln("");
-                UTextTest test;
-                callTest( test, par );
-            }
-            break;
-
-         case 13:
-            name = "LocaleAliasTest"; 
-            if (exec) {
-                logln("LocaleAliasTest---"); logln("");
-                LocaleAliasTest test;
-                callTest( test, par );
-            }
-            break;
-
+        CASE(0, MultithreadTest); 
+        CASE(1, StringTest); 
+        CASE(2, UnicodeStringTest); 
+        CASE(3, LocaleTest); 
+        CASE(4, CharIterTest); 
+        CASE(5, UnicodeTest); 
+        CASE(6, ResourceBundleTest); 
+        CASE(7, NewResourceBundleTest); 
+        CASE(8, PUtilTest); 
+        CASE(9, UObjectTest); 
+        CASE(10, UVector32Test); 
+        CASE(11, UVectorTest); 
+        CASE(12, UTextTest); 
+        CASE(13, MultithreadTest); 
+        CASE(14, UnicodeSetTest); 
        default: name = ""; break; //needed to end loop
    }
 }
--- a/icu4c/source/test/intltest/usettest.cpp
+++ b/icu4c/source/test/intltest/usettest.cpp
--- a/icu4c/source/test/intltest/usettest.h
+++ b/icu4c/source/test/intltest/usettest.h
@ -1,7 +1,7 @@

 /********************************************************************
 * COPYRIGHT: 
- * Copyright (c) 1997-2006, International Business Machines Corporation and
+ * Copyright (c) 1997-2007, International Business Machines Corporation and
 * others. All Rights Reserved.
 ********************************************************************
 **********************************************************************
@ -16,16 +16,21 @@

 #include "unicode/unistr.h"
 #include "unicode/uniset.h"
+#include "unicode/ucnv_err.h"
 #include "intltest.h"

+class UnicodeSetWithStrings;
+
 /**
 * UnicodeSet test
 */
 class UnicodeSetTest: public IntlTest {
+public:
+    UnicodeSetTest();
+    ~UnicodeSetTest();

-    void runIndexedTest(int32_t index, UBool exec, const char* &name, char* par=NULL);
-    
 private:
+    void runIndexedTest(int32_t index, UBool exec, const char* &name, char* par=NULL);

    void Testj2268();

@ -76,6 +81,12 @@ private:

    void TestPosixClasses();

+    void TestFreezable();
+
+    void TestSpan();
+
+    void TestStringSpan();
+
 private:

    UBool toPatternAux(UChar32 start, UChar32 end);
@ -152,6 +163,26 @@ private:
                     const UnicodeSet& set,
                     UChar32 start, UChar32 end);
    void doAssert(UBool, const char*);
+
+    void testSpan(const UnicodeSetWithStrings *sets[4], const void *s, int32_t length, UBool isUTF16,
+                  uint32_t whichSpans,
+                  int32_t expectLimits[], int32_t &expectCount,
+                  const char *testName, int32_t index);
+    void testSpan(const UnicodeSetWithStrings *sets[4], const void *s, int32_t length, UBool isUTF16,
+                  uint32_t whichSpans,
+                  const char *testName, int32_t index);
+    void testSpanBothUTFs(const UnicodeSetWithStrings *sets[4],
+                          const UChar *s16, int32_t length16,
+                          uint32_t whichSpans,
+                          const char *testName, int32_t index);
+    void testSpanContents(const UnicodeSetWithStrings *sets[4], uint32_t whichSpans, const char *testName);
+    void testSpanUTF16String(const UnicodeSetWithStrings *sets[4], uint32_t whichSpans, const char *testName);
+    void testSpanUTF8String(const UnicodeSetWithStrings *sets[4], uint32_t whichSpans, const char *testName);
+
+    UConverter *openUTF8Converter();
+
+    UConverter *utf8Cnv;
+
 public:
    static UnicodeString escape(const UnicodeString& s);
 };
--- a/icu4c/source/test/letest/letest.vcproj
+++ b/icu4c/source/test/letest/letest.vcproj
@ -70,7 +70,7 @@
 			/>
 			<Tool
 				Name="VCLinkerTool"
-				AdditionalDependencies="../../../lib/icule.lib ../../../lib/icuuc.lib odbc32.lib odbccp32.lib"
+				AdditionalDependencies="../../../lib/icule.lib ../../../lib/icuuc.lib"
 				OutputFile=".\Release/letest.exe"
 				LinkIncremental="1"
 				SuppressStartupBanner="true"
--- a/icu4c/source/test/perf/unisetperf/Makefile.in
+++ b/icu4c/source/test/perf/unisetperf/Makefile.in
@ -0,0 +1,78 @@
+## Makefile.in for ICU - test/perf/unisetperf
+## Copyright (c) 2001-2007, International Business Machines Corporation and
+## others. All Rights Reserved.
+
+## Source directory information
+srcdir = @srcdir@
+top_srcdir = @top_srcdir@
+
+top_builddir = ../../..
+
+include $(top_builddir)/icudefs.mk
+
+## Build directory information
+subdir = test/perf/unisetperf
+
+## Extra files to remove for 'make clean'
+CLEANFILES = *~ $(DEPS)
+
+## Target information
+TARGET = unisetperf
+
+CPPFLAGS += -I$(top_builddir)/common -I$(top_srcdir)/common -I$(top_srcdir)/tools/toolutil -I$(top_srcdir)/tools/ctestfw
+LIBS = $(LIBCTESTFW) $(LIBICUI18N) $(LIBICUUC) $(LIBICUTOOLUTIL) $(DEFAULT_LIBS) $(LIB_M)
+
+OBJECTS = unisetperf.o
+
+DEPS = $(OBJECTS:.o=.d)
+
+## List of phony targets
+.PHONY : all all-local install install-local clean clean-local	\
+distclean distclean-local dist dist-local check check-local
+
+## Clear suffix list
+.SUFFIXES :
+
+## List of standard targets
+all: all-local
+install: install-local
+clean: clean-local
+distclean : distclean-local
+dist: dist-local
+check: all check-local
+
+all-local: $(TARGET)
+
+install-local:
+
+dist-local:
+
+clean-local:
+	test -z "$(CLEANFILES)" || $(RMV) $(CLEANFILES)
+	$(RMV) $(OBJECTS) $(TARGET)
+
+distclean-local: clean-local
+	$(RMV) Makefile
+
+check-local: all-local
+
+Makefile: $(srcdir)/Makefile.in  $(top_builddir)/config.status
+	cd $(top_builddir) \
+	 && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
+
+$(TARGET) : $(OBJECTS)
+	$(LINK.cc) -o $@ $^ $(LIBS)
+
+invoke:
+	ICU_DATA=$${ICU_DATA:-$(top_builddir)/data/} TZ=PST8PDT $(INVOKE) $(INVOCATION)
+
+ifeq (,$(MAKECMDGOALS))
+-include $(DEPS)
+else
+ifneq ($(patsubst %clean,,$(MAKECMDGOALS)),)
+ifneq ($(patsubst %install,,$(MAKECMDGOALS)),)
+-include $(DEPS)
+endif
+endif
+endif
+
--- a/icu4c/source/test/perf/unisetperf/draft/bitset.cpp
+++ b/icu4c/source/test/perf/unisetperf/draft/bitset.cpp
@ -0,0 +1,197 @@
+/*  
+**********************************************************************
+*   Copyright (C) 2007, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+**********************************************************************
+*   file name:  bitset.cpp
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2007jan15
+*   created by: Markus Scherer
+*
+*   Idea for a "compiled", fast, read-only (immutable) version of a UnicodeSet
+*   using a folded bit set consisting of a 1k-entry index table and a
+*   compacted array of 64-bit words.
+*   Uses a simple hash table for compaction.
+*   Uses the original set for supplementary code points.
+*/
+
+#include "unicode/utypes.h"
+#include "unicont.h"
+
+/*
+ * Hash table for up to 1k 64-bit words, for 1 bit per BMP code point.
+ * Hashes 64-bit words and maps them to 16-bit integers which are
+ * assigned in order of new incoming words for subsequent storage
+ * in a contiguous array.
+ */
+struct BMPBitHash : public UObject {
+    int64_t keys[0x800];  // 2k
+    uint16_t values[0x800];
+    uint16_t reverse[0x400];
+    uint16_t count;
+    const int32_t prime=1301;  // Less than 2k.
+
+    BMPBitHash() : count(0) {
+        // Fill values[] with 0xffff.
+        uprv_memset(values, 0xff, sizeof(values));
+    }
+
+    /*
+     * Map a key to an integer count.
+     * Map at most 1k=0x400 different keys with this data structure.
+     */
+    uint16_t map(int64_t key) {
+        int32_t hash=(int32_t)(key>>55)&0x1ff;
+        hash^=(int32_t)(key>>44)&0x7ff;
+        hash^=(int32_t)(key>>33)&0x7ff;
+        hash^=(int32_t)(key>>22)&0x7ff;
+        hash^=(int32_t)(key>>11)&0x7ff;
+        hash^=(int32_t)key&0x7ff;
+        for(;;) {
+            if(values[hash]==0xffff) {
+                // Unused slot.
+                keys[hash]=key;
+                reverse[count]=hash;
+                return values[hash]=count++;
+            } else if(keys[hash]==key) {
+                // Found a slot with this key.
+                return values[hash];
+            } else {
+                // Used slot with a different key, move to another slot.
+                hash=(hash+prime)&0x7ff;
+            }
+        }
+    }
+
+    uint16_t countKeys() const { return count; }
+
+    /*
+     * Invert the hash map: Fill an array of length countKeys() with the keys
+     * indexed by their mapped values.
+     */
+    void invert(int64_t *k) const {
+        uint16_t i;
+
+        for(i=0; i<count; ++i) {
+            k[i]=keys[reverse[i]];
+        }
+    }
+};
+
+class BitSet : public UObject, public UnicodeContainable {
+public:
+    BitSet(const UnicodeSet &set, UErrorCode &errorCode) : bits(shortBits), restSet(set.clone()) {
+        if(U_FAILURE(errorCode)) {
+            return;
+        }
+        BMPBitHash *bitHash=new BMPBitHash;
+        if(bitHash==NULL || restSet==NULL) {
+            errorCode=U_MEMORY_ALLOCATION_ERROR;
+            return;
+        }
+
+        UnicodeSetIterator iter(set);
+        int64_t b;
+        UChar32 start, end;
+        int32_t prevIndex, i, j;
+
+        b=0;  // Not necessary but makes compilers happy.
+        prevIndex=-1;
+        for(;;) {
+            if(iter.nextRange() && !iter.isString()) {
+                start=iter.getCodepoint();
+                end=iter.getCodepointEnd();
+            } else {
+                start=0x10000;
+            }
+            i=start>>6;
+            if(prevIndex!=i) {
+                // Finish the end of the previous range.
+                if(prevIndex<0) {
+                    prevIndex=0;
+                } else {
+                    index[prevIndex++]=bitHash->map(b);
+                }
+                // Fill all-zero entries between ranges.
+                if(prevIndex<i) {
+                    uint16_t zero=bitHash->map(0);
+                    do {
+                        index[prevIndex++]=zero;
+                    } while(prevIndex<i);
+                }
+                b=0;
+            }
+            if(start>0xffff) {
+                break;
+            }
+            b|=~((INT64_C(1)<<(start&0x3f))-1);
+            j=end>>6;
+            if(i<j) {
+                // Set bits for the start of the range.
+                index[i++]=bitHash->map(b);
+                // Fill all-one entries inside the range.
+                if(i<j) {
+                    uint16_t all=bitHash->map(INT64_C(0xffffffffffffffff));
+                    do {
+                        index[i++]=all;
+                    } while(i<j);
+                }
+                b=INT64_C(0xffffffffffffffff);
+            }
+            /* i==j */
+            b&=(INT64_C(1)<<(end&0x3f))-1;
+            prevIndex=j;
+        }
+
+        if(bitHash->countKeys()>LENGTHOF(shortBits)) {
+            bits=(int64_t *)uprv_malloc(bitHash->countKeys()*8);
+        }
+        if(bits!=NULL) {
+            bitHash->invert(bits);
+        } else {
+            bits=shortBits;
+            errorCode=U_MEMORY_ALLOCATION_ERROR;
+            return;
+        }
+
+        latin1Set[0]=(uint32_t)bits[0];
+        latin1Set[1]=(uint32_t)(bits[0]>>32);
+        latin1Set[2]=(uint32_t)bits[1];
+        latin1Set[3]=(uint32_t)(bits[1]>>32);
+        latin1Set[4]=(uint32_t)bits[2];
+        latin1Set[5]=(uint32_t)(bits[2]>>32);
+        latin1Set[6]=(uint32_t)bits[3];
+        latin1Set[7]=(uint32_t)(bits[3]>>32);
+
+        restSet.remove(0, 0xffff);
+    }
+
+    ~BitSet() {
+        if(bits!=shortBits) {
+            uprv_free(bits);
+        }
+        delete restSet;
+    }
+
+    UBool contains(UChar32 c) const {
+        if((uint32_t)c<=0xff) {
+            return (UBool)((latin1Set[c>>5]&((uint32_t)1<<(c&0x1f)))!=0);
+        } else if((uint32_t)c<0xffff) {
+            return (UBool)((bits[c>>6]&(INT64_C(1)<<(c&0x3f)))!=0);
+        } else {
+            return restSet->contains(c);
+        }
+    }
+
+private:
+    uint16_t index[0x400];
+    int64_t shortBits[32];
+    int64_t *bits;
+
+    uint32_t latin1Bits[8];
+
+    UnicodeSet *restSet;
+};
--- a/icu4c/source/test/perf/unisetperf/draft/contperf.bat
+++ b/icu4c/source/test/perf/unisetperf/draft/contperf.bat
@ -0,0 +1,19 @@
+rem  Copyright (c) 2007, International Business Machines Corporation and
+rem  others. All Rights Reserved.
+
+set PERF=c:\svn\icuproj\icu\ucnvutf8\source\test\perf\unisetperf\release\unisetperf
+rem types: slow Bv Bv0 B0
+rem --pattern [:White_Space:]
+
+for %%f in (udhr_eng.txt
+            udhr_deu.txt
+            udhr_fra.txt
+            udhr_rus.txt
+            udhr_tha.txt
+            udhr_jpn.txt
+            udhr_cmn.txt
+            udhr_jpn.html) do (
+  for %%t in (slow Bv Bv0 B0) do (
+    %PERF% Contains  --type %%t -f \temp\udhr\%%f --pattern [:White_Space:] -v -e UTF-8 --passes 3 --iterations 10000
+  )
+)
--- a/icu4c/source/test/perf/unisetperf/draft/contperf.sh
+++ b/icu4c/source/test/perf/unisetperf/draft/contperf.sh
@ -0,0 +1,23 @@
+#!/bin/sh
+# Copyright (c) 2007, International Business Machines Corporation and
+# others. All Rights Reserved.
+
+# Echo shell script commands.
+set -ex
+
+PERF=test/perf/unisetperf/unisetperf
+# slow Bv Bv0 B0
+# --pattern [:White_Space:]
+
+for file in udhr_eng.txt \
+            udhr_deu.txt \
+            udhr_fra.txt \
+            udhr_rus.txt \
+            udhr_tha.txt \
+            udhr_jpn.txt \
+            udhr_cmn.txt \
+            udhr_jpn.html; do
+  for type in slow Bv Bv0; do
+    $PERF Contains  --type $type -f ~/udhr/$file -v -e UTF-8 --passes 3 --iterations 10000
+  done
+done
--- a/icu4c/source/test/perf/unisetperf/draft/span16perf.bat
+++ b/icu4c/source/test/perf/unisetperf/draft/span16perf.bat
@ -0,0 +1,19 @@
+rem  Copyright (c) 2007, International Business Machines Corporation and
+rem  others. All Rights Reserved.
+
+set PERF=c:\svn\icuproj\icu\ucnvutf8\source\test\perf\unisetperf\release\unisetperf
+rem types: slow Bv Bv0 B0
+rem --pattern [:White_Space:]
+
+for %%f in (udhr_eng.txt
+            udhr_deu.txt
+            udhr_fra.txt
+            udhr_rus.txt
+            udhr_tha.txt
+            udhr_jpn.txt
+            udhr_cmn.txt
+            udhr_jpn.html) do (
+  for %%t in (slow Bv Bv0) do (
+    %PERF% SpanUTF16 --type %%t -f \temp\udhr\%%f --pattern [:White_Space:] -v -e UTF-8 --passes 3 --iterations 10000
+  )
+)
--- a/icu4c/source/test/perf/unisetperf/draft/span16perf.sh
+++ b/icu4c/source/test/perf/unisetperf/draft/span16perf.sh
@ -0,0 +1,23 @@
+#!/bin/sh
+# Copyright (c) 2007, International Business Machines Corporation and
+# others. All Rights Reserved.
+
+# Echo shell script commands.
+set -ex
+
+PERF=test/perf/unisetperf/unisetperf
+# slow Bv Bv0 B0
+# --pattern [:White_Space:]
+
+for file in udhr_eng.txt \
+            udhr_deu.txt \
+            udhr_fra.txt \
+            udhr_rus.txt \
+            udhr_tha.txt \
+            udhr_jpn.txt \
+            udhr_cmn.txt \
+            udhr_jpn.html; do
+  for type in slow Bv Bv0; do
+    $PERF SpanUTF16 --type $type -f ~/udhr/$file -v -e UTF-8 --passes 3 --iterations 10000
+  done
+done
--- a/icu4c/source/test/perf/unisetperf/draft/span8perf.bat
+++ b/icu4c/source/test/perf/unisetperf/draft/span8perf.bat
@ -0,0 +1,19 @@
+rem  Copyright (c) 2007, International Business Machines Corporation and
+rem  others. All Rights Reserved.
+
+set PERF=c:\svn\icuproj\icu\ucnvutf8\source\test\perf\unisetperf\release\unisetperf
+rem types: slow Bh bh Bv Bv0 B0 BvF Bvp BvpF L Bvl BvL
+rem --pattern [:White_Space:]
+
+for %%f in (udhr_eng.txt
+            udhr_deu.txt
+            udhr_fra.txt
+            udhr_rus.txt
+            udhr_tha.txt
+            udhr_jpn.txt
+            udhr_cmn.txt
+            udhr_jpn.html) do (
+  for %%t in (slow BvF BvpF Bvl BvL) do (
+    %PERF% SpanUTF8  --type %%t -f \temp\udhr\%%f --pattern [:White_Space:] -v -e UTF-8 --passes 3 --iterations 10000
+  )
+)
--- a/icu4c/source/test/perf/unisetperf/draft/span8perf.sh
+++ b/icu4c/source/test/perf/unisetperf/draft/span8perf.sh
@ -0,0 +1,23 @@
+#!/bin/sh
+# Copyright (c) 2007, International Business Machines Corporation and
+# others. All Rights Reserved.
+
+# Echo shell script commands.
+set -ex
+
+PERF=test/perf/unisetperf/unisetperf
+# slow Bh bh Bv Bv0 B0 BvF Bvp BvpF L Bvl BvL
+# --pattern [:White_Space:]
+
+for file in udhr_eng.txt \
+            udhr_deu.txt \
+            udhr_fra.txt \
+            udhr_rus.txt \
+            udhr_tha.txt \
+            udhr_jpn.txt \
+            udhr_cmn.txt \
+            udhr_jpn.html; do
+  for type in slow BvF BvpF Bvl BvL; do
+    $PERF SpanUTF8  --type $type -f ~/udhr/$file -v -e UTF-8 --passes 3 --iterations 10000
+  done
+done
--- a/icu4c/source/test/perf/unisetperf/draft/trieset.cpp
+++ b/icu4c/source/test/perf/unisetperf/draft/trieset.cpp
@ -0,0 +1,111 @@
+/*  
+**********************************************************************
+*   Copyright (C) 2007, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+**********************************************************************
+*   file name:  trieset.cpp
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2007jan15
+*   created by: Markus Scherer
+*
+*   Idea for a "compiled", fast, read-only (immutable) version of a UnicodeSet
+*   using a UTrie with 8-bit (byte) results per code point.
+*   Modifies the trie index to make the BMP linear, and uses the original set
+*   for supplementary code points.
+*/
+
+#include "unicode/utypes.h"
+#include "unicont.h"
+
+#define UTRIE_GET8_LATIN1(trie) ((const uint8_t *)(trie)->data32+UTRIE_DATA_BLOCK_LENGTH)
+
+#define UTRIE_GET8_FROM_LEAD(trie, c16) \
+    ((const uint8_t *)(trie)->data32)[ \
+        ((int32_t)((trie)->index[(c16)>>UTRIE_SHIFT])<<UTRIE_INDEX_SHIFT)+ \
+        ((c16)&UTRIE_MASK) \
+    ]
+
+class TrieSet : public UObject, public UnicodeContainable {
+public:
+    TrieSet(const UnicodeSet &set, UErrorCode &errorCode)
+            : trieData(NULL), latin1(NULL), restSet(set.clone()) {
+        if(U_FAILURE(errorCode)) {
+            return;
+        }
+        if(restSet==NULL) {
+            errorCode=U_MEMORY_ALLOCATION_ERROR;
+            return;
+        }
+
+        UNewTrie *newTrie=utrie_open(NULL, NULL, 0x11000, 0, 0, TRUE);
+        UChar32 start, end;
+
+        UnicodeSetIterator iter(set);
+
+        while(iter.nextRange() && !iter.isString()) {
+            start=iter.getCodepoint();
+            end=iter.getCodepointEnd();
+            if(start>0xffff) {
+                break;
+            }
+            if(end>0xffff) {
+                end=0xffff;
+            }
+            if(!utrie_setRange32(newTrie, start, end+1, TRUE, TRUE)) {
+                errorCode=U_INTERNAL_PROGRAM_ERROR;
+                return;
+            }
+        }
+
+        // Preflight the trie length.
+        int32_t length=utrie_serialize(newTrie, NULL, 0, NULL, 8, &errorCode);
+        if(errorCode!=U_BUFFER_OVERFLOW_ERROR) {
+            return;
+        }
+
+        trieData=(uint32_t *)uprv_malloc(length);
+        if(trieData==NULL) {
+            errorCode=U_MEMORY_ALLOCATION_ERROR;
+            return;
+        }
+
+        errorCode=U_ZERO_ERROR;
+        utrie_serialize(newTrie, trieData, length, NULL, 8, &errorCode);
+        utrie_unserialize(&trie, trieData, length, &errorCode);  // TODO: Implement for 8-bit UTrie!
+
+        if(U_SUCCESS(errorCode)) {
+            // Copy the indexes for surrogate code points into the BMP range
+            // for simple access across the entire BMP.
+            uprv_memcpy((uint16_t *)trie.index+(0xd800>>UTRIE_SHIFT),
+                        trie.index+UTRIE_BMP_INDEX_LENGTH,
+                        (0x800>>UTRIE_SHIFT)*2);
+            latin1=UTRIE_GET8_LATIN1(&trie);
+        }
+
+        restSet.remove(0, 0xffff);
+    }
+
+    ~TrieSet() {
+        uprv_free(trieData);
+        delete restSet;
+    }
+
+    UBool contains(UChar32 c) const {
+        if((uint32_t)c<=0xff) {
+            return (UBool)latin1[c];
+        } else if((uint32_t)c<0xffff) {
+            return (UBool)UTRIE_GET8_FROM_LEAD(&trie, c);
+        } else {
+            return restSet->contains(c);
+        }
+    }
+
+private:
+    uint32_t *trieData;
+    const uint8_t *latin1;
+    UTrie trie;
+    UnicodeSet *restSet;
+};
--- a/icu4c/source/test/perf/unisetperf/draft/unicont.h
+++ b/icu4c/source/test/perf/unisetperf/draft/unicont.h
@ -0,0 +1,34 @@
+/*  
+**********************************************************************
+*   Copyright (C) 2007, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+**********************************************************************
+*   file name:  unicont.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2007jan15
+*   created by: Markus Scherer
+*
+*   Idea for new common interface underneath the normal UnicodeSet
+*   and other classes, such as "compiled", fast, read-only (immutable)
+*   versions of UnicodeSet.
+*/
+
+class UnicodeContainable {
+public:
+    virtual ~UnicodeContainable() {}
+
+    virtual UBool contains(UChar32 c) const = 0;
+
+    virtual int32_t span(const UChar *s, int32_t length);
+
+    virtual int32_t spanNot(const UChar *s, int32_t length);
+
+    virtual int32_t spanUTF8(const UChar *s, int32_t length);
+
+    virtual int32_t spanNotUTF8(const UChar *s, int32_t length);
+
+    virtual UClassID getDynamicClassID(void) const;
+};
--- a/icu4c/source/test/perf/unisetperf/unisetperf.cpp
+++ b/icu4c/source/test/perf/unisetperf/unisetperf.cpp
@ -0,0 +1,441 @@
+/*  
+**********************************************************************
+*   Copyright (C) 2007, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+**********************************************************************
+*   file name:  unisetperf.cpp
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2007jan31
+*   created by: Markus Scherer
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "unicode/uperf.h"
+#include "unicode/uniset.h"
+#include "unicode/unistr.h"
+#include "uoptions.h"
+
+#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
+
+// Command-line options specific to unisetperf.
+// Options do not have abbreviations: Force readable command lines.
+// (Using U+0001 for abbreviation characters.)
+enum {
+    SET_PATTERN,
+    FAST_TYPE,
+    UNISETPERF_OPTIONS_COUNT
+};
+
+static UOption options[UNISETPERF_OPTIONS_COUNT]={
+    UOPTION_DEF("pattern", '\x01', UOPT_REQUIRES_ARG),
+    UOPTION_DEF("type",    '\x01', UOPT_REQUIRES_ARG)
+};
+
+static const char *const unisetperf_usage =
+    "\t--pattern   UnicodeSet pattern for instantiation.\n"
+    "\t            Default: [:ID_Continue:]\n"
+    "\t--type      Type of UnicodeSet: slow fast\n"
+    "\t            Default: slow\n";
+
+// Test object with setup data.
+class UnicodeSetPerformanceTest : public UPerfTest {
+public:
+    UnicodeSetPerformanceTest(int32_t argc, const char *argv[], UErrorCode &status)
+            : UPerfTest(argc, argv, options, LENGTHOF(options), unisetperf_usage, status),
+              utf8(NULL), utf8Length(0), countInputCodePoints(0), spanCount(0) {
+        if (U_SUCCESS(status)) {
+            UnicodeString pattern=UnicodeString(options[SET_PATTERN].value, -1, US_INV).unescape();
+            set.applyPattern(pattern, status);
+            prefrozen=set;
+            if(0==strcmp(options[FAST_TYPE].value, "fast")) {
+                set.freeze();
+            }
+
+            int32_t inputLength;
+            UPerfTest::getBuffer(inputLength, status);
+            if(U_SUCCESS(status) && inputLength>0) {
+                countInputCodePoints = u_countChar32(buffer, bufferLen);
+
+                countSpans();
+
+                // Preflight the UTF-8 length and allocate utf8.
+                u_strToUTF8(NULL, 0, &utf8Length, buffer, bufferLen, &status);
+                if(status==U_BUFFER_OVERFLOW_ERROR) {
+                    utf8=(char *)malloc(utf8Length);
+                    if(utf8!=NULL) {
+                        status=U_ZERO_ERROR;
+                        u_strToUTF8(utf8, utf8Length, NULL, buffer, bufferLen, &status);
+                    } else {
+                        status=U_MEMORY_ALLOCATION_ERROR;
+                    }
+                }
+
+                if(verbose) {
+                    printf("code points:%ld  len16:%ld  len8:%ld  spans:%ld  "
+                           "cp/span:%.3g  UChar/span:%.3g  B/span:%.3g  B/cp:%.3g\n",
+                           (long)countInputCodePoints, (long)bufferLen, (long)utf8Length, (long)spanCount,
+                           (double)countInputCodePoints/spanCount, (double)bufferLen/spanCount, (double)utf8Length/spanCount,
+                           (double)utf8Length/countInputCodePoints);
+                }
+            }
+        }
+    }
+
+    virtual UPerfFunction* runIndexedTest(int32_t index, UBool exec, const char* &name, char* par = NULL);
+
+    // Count spans of characters that are in the set,
+    // and spans of characters that are not in the set.
+    // If the very first character is in the set, then one additional
+    // not-span is counted.
+    void countSpans() {
+        const UChar *s=getBuffer();
+        int32_t length=getBufferLen();
+        int32_t i=0;
+        UBool tf=FALSE;
+        while(i<length) {
+            i=span(s, length, i, tf);
+            tf=(UBool)(!tf);
+            ++spanCount;
+        }
+    }
+    int32_t span(const UChar *s, int32_t length, int32_t start, UBool tf) const {
+        UChar32 c;
+        int32_t prev;
+        while((prev=start)<length) {
+            U16_NEXT(s, start, length, c);
+            if(tf!=set.contains(c)) {
+                break;
+            }
+        }
+        return prev;
+    }
+
+    const UChar *getBuffer() const { return buffer; }
+    int32_t getBufferLen() const { return bufferLen; }
+
+    char *utf8;
+    int32_t utf8Length;
+
+    // Number of code points in the input text.
+    int32_t countInputCodePoints;
+    int32_t spanCount;
+
+    UnicodeSet set;
+    UnicodeSet prefrozen;
+};
+
+// Performance test function object.
+class Command : public UPerfFunction {
+protected:
+    Command(const UnicodeSetPerformanceTest &testcase) : testcase(testcase) {}
+
+public:
+    virtual ~Command() {}
+
+    // virtual void call(UErrorCode* pErrorCode) { ... }
+
+    virtual long getOperationsPerIteration() {
+        // Number of code points tested:
+        // Input code points, plus one for the end of each span except the last span.
+        return testcase.countInputCodePoints+testcase.spanCount-1;
+    }
+
+    virtual long getEventsPerIteration() {
+        return testcase.spanCount;
+    }
+
+    const UnicodeSetPerformanceTest &testcase;
+};
+
+class Contains : public Command {
+protected:
+    Contains(const UnicodeSetPerformanceTest &testcase) : Command(testcase) {
+        // Verify that the frozen set is equal to the unfrozen one.
+        UnicodeSet set;
+        UChar32 c;
+
+        for(c=0; c<=0x10ffff; ++c) {
+            if(testcase.set.contains(c)) {
+                set.add(c);
+            }
+        }
+        if(set!=testcase.set) {
+            fprintf(stderr, "error: frozen set != original!\n");
+        }
+    }
+public:
+    static UPerfFunction* get(const UnicodeSetPerformanceTest &testcase) {
+        return new Contains(testcase);
+    }
+    virtual void call(UErrorCode* pErrorCode) {
+        const UnicodeSet &set=testcase.set;
+        const UChar *s=testcase.getBuffer();
+        int32_t length=testcase.getBufferLen();
+        int32_t count=0;
+        int32_t i=0;
+        UBool tf=FALSE;
+        while(i<length) {
+            i+=span(set, s+i, length-i, tf);
+            tf=(UBool)(!tf);
+            ++count;
+        }
+        if(count!=testcase.spanCount) {
+            fprintf(stderr, "error: Contains() count=%ld != %ld=UnicodeSetPerformanceTest.spanCount\n",
+                    (long)count, (long)testcase.spanCount);
+        }
+    }
+    static int32_t span(const UnicodeSet &set, const UChar *s, int32_t length, UBool tf) {
+        UChar32 c;
+        int32_t start=0, prev;
+        while((prev=start)<length) {
+            U16_NEXT(s, start, length, c);
+            if(tf!=set.contains(c)) {
+                break;
+            }
+        }
+        return prev;
+    }
+};
+
+class SpanUTF16 : public Command {
+protected:
+    SpanUTF16(const UnicodeSetPerformanceTest &testcase) : Command(testcase) {
+        // Verify that the frozen set is equal to the unfrozen one.
+        UnicodeSet set;
+        UChar utf16[2];
+        UChar32 c, c2;
+
+        for(c=0; c<=0xffff; ++c) {
+            utf16[0]=(UChar)c;
+            if(testcase.set.span(utf16, 1, USET_SPAN_CONTAINED)>0) {
+                set.add(c);
+            }
+        }
+        for(c=0xd800; c<=0xdbff; ++c) {
+            utf16[0]=(UChar)c;
+            for(c2=0xdc00; c2<=0xdfff; ++c2) {
+                utf16[1]=(UChar)c2;
+                if(testcase.set.span(utf16, 2, USET_SPAN_CONTAINED)>0) {
+                    set.add(U16_GET_SUPPLEMENTARY(c, c2));
+                }
+            }
+        }
+
+        if(set!=testcase.set) {
+            fprintf(stderr, "error: frozen set != original!\n");
+        }
+    }
+public:
+    static UPerfFunction* get(const UnicodeSetPerformanceTest &testcase) {
+        return new SpanUTF16(testcase);
+    }
+    virtual void call(UErrorCode* pErrorCode) {
+        const UnicodeSet &set=testcase.set;
+        const UChar *s=testcase.getBuffer();
+        int32_t length=testcase.getBufferLen();
+        int32_t count=0;
+        int32_t i=0;
+        UBool tf=FALSE;
+        while(i<length) {
+            i+=set.span(s+i, length-i, (USetSpanCondition)tf);
+            tf=(UBool)(!tf);
+            ++count;
+        }
+        if(count!=testcase.spanCount) {
+            fprintf(stderr, "error: SpanUTF16() count=%ld != %ld=UnicodeSetPerformanceTest.spanCount\n",
+                    (long)count, (long)testcase.spanCount);
+        }
+    }
+};
+
+class SpanBackUTF16 : public Command {
+protected:
+    SpanBackUTF16(const UnicodeSetPerformanceTest &testcase) : Command(testcase) {
+        // Verify that the frozen set is equal to the unfrozen one.
+        UnicodeSet set;
+        UChar utf16[2];
+        UChar32 c, c2;
+
+        for(c=0; c<=0xffff; ++c) {
+            utf16[0]=(UChar)c;
+            if(testcase.set.spanBack(utf16, 1, USET_SPAN_CONTAINED)==0) {
+                set.add(c);
+            }
+        }
+        for(c=0xd800; c<=0xdbff; ++c) {
+            utf16[0]=(UChar)c;
+            for(c2=0xdc00; c2<=0xdfff; ++c2) {
+                utf16[1]=(UChar)c2;
+                if(testcase.set.spanBack(utf16, 2, USET_SPAN_CONTAINED)==0) {
+                    set.add(U16_GET_SUPPLEMENTARY(c, c2));
+                }
+            }
+        }
+
+        if(set!=testcase.set) {
+            fprintf(stderr, "error: frozen set != original!\n");
+        }
+    }
+public:
+    static UPerfFunction* get(const UnicodeSetPerformanceTest &testcase) {
+        return new SpanBackUTF16(testcase);
+    }
+    virtual void call(UErrorCode* pErrorCode) {
+        const UnicodeSet &set=testcase.set;
+        const UChar *s=testcase.getBuffer();
+        int32_t length=testcase.getBufferLen();
+        int32_t count=0;
+        /*
+         * Get the same spans as with span() where we always start with a not-contained span.
+         * If testcase.spanCount is an odd number, then the last span() was not-contained.
+         * The last spanBack() must be not-contained to match the first span().
+         */
+        UBool tf=(UBool)((testcase.spanCount&1)==0);
+        while(length>0 || !tf) {
+            length=set.spanBack(s, length, (USetSpanCondition)tf);
+            tf=(UBool)(!tf);
+            ++count;
+        }
+        if(count!=testcase.spanCount) {
+            fprintf(stderr, "error: SpanBackUTF16() count=%ld != %ld=UnicodeSetPerformanceTest.spanCount\n",
+                    (long)count, (long)testcase.spanCount);
+        }
+    }
+};
+
+class SpanUTF8 : public Command {
+protected:
+    SpanUTF8(const UnicodeSetPerformanceTest &testcase) : Command(testcase) {
+        // Verify that the frozen set is equal to the unfrozen one.
+        UnicodeSet set;
+        char utf8[4];
+        UChar32 c;
+        int32_t length;
+
+        for(c=0; c<=0x10ffff; ++c) {
+            if(c==0xd800) {
+                c=0xe000;
+            }
+            length=0;
+            U8_APPEND_UNSAFE(utf8, length, c);
+            if(testcase.set.spanUTF8(utf8, length, USET_SPAN_CONTAINED)>0) {
+                set.add(c);
+            }
+        }
+        if(set!=testcase.set) {
+            fprintf(stderr, "error: frozen set != original!\n");
+        }
+    }
+public:
+    static UPerfFunction* get(const UnicodeSetPerformanceTest &testcase) {
+        return new SpanUTF8(testcase);
+    }
+    virtual void call(UErrorCode* pErrorCode) {
+        const UnicodeSet &set=testcase.set;
+        const char *s=testcase.utf8;
+        int32_t length=testcase.utf8Length;
+        int32_t count=0;
+        int32_t i=0;
+        UBool tf=FALSE;
+        while(i<length) {
+            i+=set.spanUTF8(s+i, length-i, (USetSpanCondition)tf);
+            tf=(UBool)(!tf);
+            ++count;
+        }
+        if(count!=testcase.spanCount) {
+            fprintf(stderr, "error: SpanUTF8() count=%ld != %ld=UnicodeSetPerformanceTest.spanCount\n",
+                    (long)count, (long)testcase.spanCount);
+        }
+    }
+};
+
+class SpanBackUTF8 : public Command {
+protected:
+    SpanBackUTF8(const UnicodeSetPerformanceTest &testcase) : Command(testcase) {
+        // Verify that the frozen set is equal to the unfrozen one.
+        UnicodeSet set;
+        char utf8[4];
+        UChar32 c;
+        int32_t length;
+
+        for(c=0; c<=0x10ffff; ++c) {
+            if(c==0xd800) {
+                c=0xe000;
+            }
+            length=0;
+            U8_APPEND_UNSAFE(utf8, length, c);
+            if(testcase.set.spanBackUTF8(utf8, length, USET_SPAN_CONTAINED)==0) {
+                set.add(c);
+            }
+        }
+        if(set!=testcase.set) {
+            fprintf(stderr, "error: frozen set != original!\n");
+        }
+    }
+public:
+    static UPerfFunction* get(const UnicodeSetPerformanceTest &testcase) {
+        return new SpanBackUTF8(testcase);
+    }
+    virtual void call(UErrorCode* pErrorCode) {
+        const UnicodeSet &set=testcase.set;
+        const char *s=testcase.utf8;
+        int32_t length=testcase.utf8Length;
+        int32_t count=0;
+        /*
+         * Get the same spans as with span() where we always start with a not-contained span.
+         * If testcase.spanCount is an odd number, then the last span() was not-contained.
+         * The last spanBack() must be not-contained to match the first span().
+         */
+        UBool tf=(UBool)((testcase.spanCount&1)==0);
+        while(length>0 || !tf) {
+            length=set.spanBackUTF8(s, length, (USetSpanCondition)tf);
+            tf=(UBool)(!tf);
+            ++count;
+        }
+        if(count!=testcase.spanCount) {
+            fprintf(stderr, "error: SpanBackUTF8() count=%ld != %ld=UnicodeSetPerformanceTest.spanCount\n",
+                    (long)count, (long)testcase.spanCount);
+        }
+    }
+};
+
+UPerfFunction* UnicodeSetPerformanceTest::runIndexedTest(int32_t index, UBool exec, const char* &name, char* par) {
+    switch (index) {
+        case 0: name = "Contains";     if (exec) return Contains::get(*this); break;
+        case 1: name = "SpanUTF16";    if (exec) return SpanUTF16::get(*this); break;
+        case 2: name = "SpanBackUTF16";if (exec) return SpanBackUTF16::get(*this); break;
+        case 3: name = "SpanUTF8";     if (exec) return SpanUTF8::get(*this); break;
+        case 4: name = "SpanBackUTF8"; if (exec) return SpanBackUTF8::get(*this); break;
+        default: name = ""; break;
+    }
+    return NULL;
+}
+
+int main(int argc, const char *argv[])
+{
+    // Default values for command-line options.
+    options[SET_PATTERN].value = "[:ID_Continue:]";
+    options[FAST_TYPE].value = "slow";
+
+    UErrorCode status = U_ZERO_ERROR;
+    UnicodeSetPerformanceTest test(argc, argv, status);
+
+	if (U_FAILURE(status)){
+        printf("The error is %s\n", u_errorName(status));
+        test.usage();
+        return status;
+    }
+        
+    if (test.run() == FALSE){
+        fprintf(stderr, "FAILED: Tests could not be run, please check the "
+			            "arguments.\n");
+        return 1;
+    }
+
+    return 0;
+}
--- a/icu4c/source/test/perf/unisetperf/unisetperf.pl
+++ b/icu4c/source/test/perf/unisetperf/unisetperf.pl
@ -0,0 +1,76 @@
+#!/usr/bin/perl -w
+#  ********************************************************************
+#  * COPYRIGHT:
+#  * Copyright (c) 2005-2007, International Business Machines Corporation and
+#  * others. All Rights Reserved.
+#  ********************************************************************
+
+use strict;
+use lib '../perldriver';
+use PerfFramework;
+
+my $options = {
+	       "title"=>"UnicodeSet span()/contains() performance",
+	       "headers"=>"Bv Bv0",
+	       "operationIs"=>"tested Unicode code point",
+	       "passes"=>"3",
+	       "time"=>"2",
+	       #"outputType"=>"HTML",
+	       "dataDir"=>"/temp/udhr",
+	       "outputDir"=>"../results"
+	      };
+
+# programs
+# tests will be done for all the programs. Results will be stored and connected
+my $p =   "Release/unisetperf.exe -e UTF-8";
+my $pc =  "$p Contains";
+my $p16 = "$p SpanUTF16";
+my $p8 =  "$p SpanUTF8";
+
+my $tests = {
+	     "Contains",  ["$pc  --type Bv",
+	                   "$pc  --type Bv0"
+	                   ],
+	     "SpanUTF16", ["$p16 --type Bv",
+	                   "$p16 --type Bv0"
+	                   ]
+	    };
+
+my $dataFiles = {
+		 "",
+		 [
+		  "udhr_eng.txt",
+          "udhr_deu.txt",
+          "udhr_fra.txt",
+          "udhr_rus.txt",
+          "udhr_tha.txt",
+          "udhr_jpn.txt",
+          "udhr_cmn.txt",
+          "udhr_jpn.html"
+		 ]
+		};
+
+runTests($options, $tests, $dataFiles);
+
+$options = {
+	       "title"=>"UnicodeSet span()/contains() performance",
+	       "headers"=>"Bv BvF Bvp BvpF L Bvl",
+	       "operationIs"=>"tested Unicode code point",
+	       "passes"=>"3",
+	       "time"=>"2",
+	       #"outputType"=>"HTML",
+	       "dataDir"=>"/temp/udhr",
+	       "outputDir"=>"../results"
+	      };
+
+$tests = {
+	     "SpanUTF8",  ["$p8  --type Bv",
+	                   "$p8  --type BvF",
+	                   "$p8  --type Bvp",
+	                   "$p8  --type BvpF",
+	                   "$p8  --type L",
+	                   "$p8  --type Bvl"
+	                   ]
+	    };
+
+runTests($options, $tests, $dataFiles);
--- a/icu4c/source/test/perf/unisetperf/unisetperf.vcproj
+++ b/icu4c/source/test/perf/unisetperf/unisetperf.vcproj
@ -0,0 +1,209 @@
+<?xml version="1.0" encoding="Windows-1252"?>
+<VisualStudioProject
+	ProjectType="Visual C++"
+	Version="8.00"
+	Name="unisetperf"
+	ProjectGUID="{E7728E98-0469-AF37-43F4-4529A3D52C6B}"
+	>
+	<Platforms>
+		<Platform
+			Name="Win32"
+		/>
+	</Platforms>
+	<ToolFiles>
+	</ToolFiles>
+	<Configurations>
+		<Configuration
+			Name="Debug|Win32"
+			OutputDirectory=".\Debug"
+			IntermediateDirectory=".\Debug"
+			ConfigurationType="1"
+			InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
+			UseOfMFC="0"
+			ATLMinimizesCRunTimeLibraryUsage="false"
+			CharacterSet="2"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+				TypeLibraryName=".\Debug/unisetperf.tlb"
+				HeaderFileName=""
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				Optimization="0"
+				AdditionalIncludeDirectories="..\..\..\..\include;..\..\..\tools\toolutil;..\..\..\common;..\..\..\tools\ctestfw"
+				PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
+				BasicRuntimeChecks="3"
+				RuntimeLibrary="3"
+				PrecompiledHeaderFile=".\Debug/unisetperf.pch"
+				AssemblerListingLocation=".\Debug/"
+				ObjectFile=".\Debug/"
+				ProgramDataBaseFileName=".\Debug/"
+				WarningLevel="3"
+				SuppressStartupBanner="true"
+				DebugInformationFormat="4"
+				CompileAs="0"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+				PreprocessorDefinitions="_DEBUG"
+				Culture="1033"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLinkerTool"
+				AdditionalDependencies="icuucd.lib icutud.lib winmm.lib icutestd.lib"
+				OutputFile=".\Debug/unisetperf.exe"
+				LinkIncremental="1"
+				SuppressStartupBanner="true"
+				AdditionalLibraryDirectories="..\..\..\..\lib\"
+				GenerateDebugInformation="true"
+				ProgramDatabaseFile=".\Debug/unisetperf.pdb"
+				SubSystem="1"
+				TargetMachine="1"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCManifestTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCAppVerifierTool"
+			/>
+			<Tool
+				Name="VCWebDeploymentTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Release|Win32"
+			OutputDirectory=".\Release"
+			IntermediateDirectory=".\Release"
+			ConfigurationType="1"
+			InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
+			UseOfMFC="0"
+			ATLMinimizesCRunTimeLibraryUsage="false"
+			CharacterSet="2"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+				TypeLibraryName=".\Release/unisetperf.tlb"
+				HeaderFileName=""
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				Optimization="2"
+				InlineFunctionExpansion="1"
+				AdditionalIncludeDirectories="..\..\..\..\include;..\..\..\tools\toolutil;..\..\..\common;..\..\..\tools\ctestfw"
+				PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
+				StringPooling="true"
+				RuntimeLibrary="2"
+				EnableFunctionLevelLinking="true"
+				PrecompiledHeaderFile=".\Release/unisetperf.pch"
+				AssemblerListingLocation=".\Release/"
+				ObjectFile=".\Release/"
+				ProgramDataBaseFileName=".\Release/"
+				WarningLevel="3"
+				SuppressStartupBanner="true"
+				CompileAs="0"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+				PreprocessorDefinitions="NDEBUG"
+				Culture="1033"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLinkerTool"
+				AdditionalDependencies="icuuc.lib icutu.lib icutest.lib winmm.lib"
+				OutputFile=".\Release/unisetperf.exe"
+				LinkIncremental="1"
+				SuppressStartupBanner="true"
+				AdditionalLibraryDirectories="..\..\..\..\lib\"
+				ProgramDatabaseFile=".\Release/unisetperf.pdb"
+				SubSystem="1"
+				TargetMachine="1"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCManifestTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCAppVerifierTool"
+			/>
+			<Tool
+				Name="VCWebDeploymentTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+	</Configurations>
+	<References>
+	</References>
+	<Files>
+		<File
+			RelativePath=".\unisetperf.cpp"
+			>
+		</File>
+	</Files>
+	<Globals>
+	</Globals>
+</VisualStudioProject>
--- a/icu4c/source/tools/ctestfw/uperf.cpp
+++ b/icu4c/source/tools/ctestfw/uperf.cpp
@ -341,6 +341,7 @@ UBool UPerfTest::runTestLoop( char* testname, char* par )
    int32_t loops = 0;
    double t=0;
    int32_t n = 1;
+    long ops;
    do {
        this->runIndexedTest( index, FALSE, name );
        if (!name || (name[0] == 0))
@ -358,7 +359,8 @@ UBool UPerfTest::runTestLoop( char* testname, char* par )
                fprintf(stderr,"%s function returned NULL", name);
                return FALSE;
            }
-            if (testFunction->getOperationsPerIteration() < 1) {
+            ops = testFunction->getOperationsPerIteration();
+            if (ops < 1) {
                fprintf(stderr, "%s returned an illegal operations/iteration()\n", name);
                return FALSE;
            }
@ -396,8 +398,10 @@ UBool UPerfTest::runTestLoop( char* testname, char* par )
                loops = iterations;
            }

+            double min_t=1000000.0, sum_t=0.0;
+            long events = -1;
+
            for(int32_t ps =0; ps < passes; ps++){
-                long events = -1;
                fprintf(stdout,"= %s begin " ,name);
                if(verbose==TRUE){
                    if(iterations > 0) {
@ -413,36 +417,40 @@ UBool UPerfTest::runTestLoop( char* testname, char* par )
                    printf("Performance test failed with error: %s \n", u_errorName(status));
                    break;
                }
+                sum_t+=t;
+                if(t<min_t) {
+                    min_t=t;
+                }
                events = testFunction->getEventsPerIteration();
                //print info only in verbose mode
                if(verbose==TRUE){
-/*
                    if(events == -1){
-                        fprintf(stdout,"= %s end %f %i %i\n",name , t , loops, testFunction->getOperationsPerIteration());
+                        fprintf(stdout, "= %s end: %f loops: %i operations: %li \n", name, t, (int)loops, ops);
                    }else{
-                        fprintf(stdout,"= %s end %f %i %i %i\n",name , t , loops, testFunction->getOperationsPerIteration(), events);
-                    }
-*/
-                    if(events == -1){
-                        fprintf(stdout, "= %s end: %f loops: %i operations: %li \n", name, t, (int)loops, testFunction->getOperationsPerIteration());
-                    }else{
-                        fprintf(stdout, "= %s end: %f loops: %i operations: %li events: %li\n", name, t, (int)loops, testFunction->getOperationsPerIteration(), events);
+                        fprintf(stdout, "= %s end: %f loops: %i operations: %li events: %li\n", name, t, (int)loops, ops, events);
                    }
                }else{
-/*
                    if(events == -1){
-                        fprintf(stdout,"= %f %i %i \n", t , loops, testFunction->getOperationsPerIteration());
+                        fprintf(stdout,"= %s end %f %i %li\n", name, t, (int)loops, ops);
                    }else{
-                        fprintf(stdout,"= %f %i %i %i\n", t , loops, testFunction->getOperationsPerIteration(), events);
-                    }
-*/
-                    if(events == -1){
-                        fprintf(stdout,"= %s end %f %i %li\n", name, t, (int)loops, testFunction->getOperationsPerIteration());
-                    }else{
-                        fprintf(stdout,"= %s end %f %i %li %li\n", name, t, (int)loops, testFunction->getOperationsPerIteration(), events);
+                        fprintf(stdout,"= %s end %f %i %li %li\n", name, t, (int)loops, ops, events);
                    }
                }
            }
+            if(verbose && U_SUCCESS(status)) {
+                double avg_t = sum_t/passes;
+                if(events == -1) {
+                    fprintf(stdout, "%%= %s avg: %.4g loops: %i avg/op: %.4g ns\n",
+                            name, avg_t, (int)loops, (avg_t*1E9)/(loops*ops));
+                    fprintf(stdout, "_= %s min: %.4g loops: %i min/op: %.4g ns\n",
+                            name, min_t, (int)loops, (min_t*1E9)/(loops*ops));
+                } else {
+                    fprintf(stdout, "%%= %s avg: %.4g loops: %i avg/op: %.4g ns avg/event: %.4g ns\n",
+                            name, avg_t, (int)loops, (avg_t*1E9)/(loops*ops), (avg_t*1E9)/(loops*events));
+                    fprintf(stdout, "_= %s min: %.4g loops: %i min/op: %.4g ns min/event: %.4g ns\n",
+                            name, min_t, (int)loops, (min_t*1E9)/(loops*ops), (min_t*1E9)/(loops*events));
+                }
+            }
            delete testFunction;
        }
        index++;