ICU-2235 optimize for inCharset==outCharset

X-SVN-Rev: 13199
2003-09-25 04:17:12 +00:00 · 2003-09-25 04:17:12 +00:00 · 5b97a31dbe
commit 5b97a31dbe
parent 8c303baf8a
3 changed files with 122 additions and 118 deletions
--- a/icu4c/source/common/propname.cpp
+++ b/icu4c/source/common/propname.cpp
@ -363,12 +363,12 @@ NameToEnum::swap(const UDataSwapper *ds,
        inNameArray=(const Offset *)(inEnumArray+tempMap->count);
        outNameArray=(Offset *)(outEnumArray+tempMap->count);

-        /*
-         * ### TODO optimize
-         * After some testing, add a test
-         * if(inCharset==outCharset) { only swap enums and names, do not sort; }
-         * else { sort/copy/swap/permutate as below; }
-         */
+        if(ds->inCharset==ds->outCharset) {
+            /* no need to sort, just swap the enum/name arrays */
+            ds->swapArray32(ds, inEnumArray, tempMap->count*4, outEnumArray, pErrorCode);
+            ds->swapArray16(ds, inNameArray, tempMap->count*2, outNameArray, pErrorCode);
+            return size;
+        }

        /*
         * The name and enum arrays are sorted by names and must be resorted
@ -385,9 +385,13 @@ NameToEnum::swap(const UDataSwapper *ds,
            sortArray[i].index=(Offset)i;
        }

+        /*
+         * use a stable sort to avoid shuffling of equal strings,
+         * which makes testing harder
+         */
        uprv_sortArray(sortArray, tempMap->count, sizeof(NameAndIndex),
                       upname_compareRows, outBytes,
-                       FALSE, pErrorCode);
+                       TRUE, pErrorCode);
        if(U_FAILURE(*pErrorCode)) {
            udata_printError(ds, "upname_swap(NameToEnum).uprv_sortArray(%d items) failed - %s\n",
                             tempMap->count, u_errorName(*pErrorCode));
--- a/icu4c/source/common/ucnv_io.c
+++ b/icu4c/source/common/ucnv_io.c
@ -1215,120 +1215,115 @@ ucnv_swapAliases(const UDataSwapper *ds,
            return 0;
        }

-        /*
-         * ### TODO optimize
-         * After some testing, add a test
-         * if(inCharset==outCharset) {
-         *     only swap 16-bit units, do not sort;
-            -- swap all 16-bit values --
+        if(ds->inCharset==ds->outCharset) {
+            /* no need to sort, just swap all 16-bit values together */
            ds->swapArray16(ds,
                            inTable+offsets[converterListIndex],
                            2*(int32_t)(offsets[stringTableIndex]-offsets[converterListIndex]),
                            outTable+offsets[converterListIndex],
                            pErrorCode);
-         * } else { sort/copy/swap/permutate as below; }
-         */
-
-        /* allocate the temporary table for sorting */
-        count=toc[aliasListIndex];
-
-        tempTable.chars=(const char *)(outTable+offsets[stringTableIndex]); /* sort by outCharset */
-
-        if(count<=STACK_ROW_CAPACITY) {
-            tempTable.rows=rows;
-            tempTable.resort=resort;
        } else {
-            tempTable.rows=(Row *)uprv_malloc(count*sizeof(Row)+count*2);
-            if(tempTable.rows==NULL) {
-                udata_printError(ds, "ucnv_swapAliases(): unable to allocate memory for sorting tables (max length: %u)\n",
-                                 count);
-                *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
+            /* allocate the temporary table for sorting */
+            count=toc[aliasListIndex];
+
+            tempTable.chars=(const char *)(outTable+offsets[stringTableIndex]); /* sort by outCharset */
+
+            if(count<=STACK_ROW_CAPACITY) {
+                tempTable.rows=rows;
+                tempTable.resort=resort;
+            } else {
+                tempTable.rows=(Row *)uprv_malloc(count*sizeof(Row)+count*2);
+                if(tempTable.rows==NULL) {
+                    udata_printError(ds, "ucnv_swapAliases(): unable to allocate memory for sorting tables (max length: %u)\n",
+                                     count);
+                    *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
+                    return 0;
+                }
+                tempTable.resort=(uint16_t *)(tempTable.rows+count);
+            }
+
+            if(ds->outCharset==U_ASCII_FAMILY) {
+                tempTable.stripForCompare=ucnv_io_stripASCIIForCompare;
+            } else /* U_EBCDIC_FAMILY */ {
+                tempTable.stripForCompare=ucnv_io_stripEBCDICForCompare;
+            }
+
+            /*
+             * Sort unique aliases+mapped names.
+             *
+             * We need to sort the list again by outCharset strings because they
+             * sort differently for different charset families.
+             * First we set up a temporary table with the string indexes and
+             * sorting indexes and sort that.
+             * Then we permutate and copy/swap the actual values.
+             */
+            p=inTable+offsets[aliasListIndex];
+            q=outTable+offsets[aliasListIndex];
+
+            p2=inTable+offsets[untaggedConvArrayIndex];
+            q2=outTable+offsets[untaggedConvArrayIndex];
+
+            for(i=0; i<count; ++i) {
+                tempTable.rows[i].strIndex=ds->readUInt16(p[i]);
+                tempTable.rows[i].sortIndex=(uint16_t)i;
+            }
+
+            uprv_sortArray(tempTable.rows, (int32_t)count, sizeof(Row),
+                           io_compareRows, &tempTable,
+                           FALSE, pErrorCode);
+
+            if(U_SUCCESS(*pErrorCode)) {
+                /* copy/swap/permutate items */
+                if(p!=q) {
+                    for(i=0; i<count; ++i) {
+                        oldIndex=tempTable.rows[i].sortIndex;
+                        ds->swapArray16(ds, p+oldIndex, 2, q+i, pErrorCode);
+                        ds->swapArray16(ds, p2+oldIndex, 2, q2+i, pErrorCode);
+                    }
+                } else {
+                    /*
+                     * If we swap in-place, then the permutation must use another
+                     * temporary array (tempTable.resort)
+                     * before the results are copied to the outBundle.
+                     */
+                    uint16_t *r=tempTable.resort;
+
+                    for(i=0; i<count; ++i) {
+                        oldIndex=tempTable.rows[i].sortIndex;
+                        ds->swapArray16(ds, p+oldIndex, 2, r+i, pErrorCode);
+                    }
+                    uprv_memcpy(q, r, 2*count);
+
+                    for(i=0; i<count; ++i) {
+                        oldIndex=tempTable.rows[i].sortIndex;
+                        ds->swapArray16(ds, p2+oldIndex, 2, r+i, pErrorCode);
+                    }
+                    uprv_memcpy(q2, r, 2*count);
+                }
+            }
+
+            if(tempTable.rows!=rows) {
+                uprv_free(tempTable.rows);
+            }
+
+            if(U_FAILURE(*pErrorCode)) {
+                udata_printError(ds, "ucnv_swapAliases().uprv_sortArray(%u items) failed - %s\n",
+                                 count, u_errorName(*pErrorCode));
                return 0;
            }
-            tempTable.resort=(uint16_t *)(tempTable.rows+count);
+
+            /* swap remaining 16-bit values */
+            ds->swapArray16(ds,
+                            inTable+offsets[converterListIndex],
+                            2*(int32_t)(offsets[aliasListIndex]-offsets[converterListIndex]),
+                            outTable+offsets[converterListIndex],
+                            pErrorCode);
+            ds->swapArray16(ds,
+                            inTable+offsets[taggedAliasArrayIndex],
+                            2*(int32_t)(offsets[stringTableIndex]-offsets[taggedAliasArrayIndex]),
+                            outTable+offsets[taggedAliasArrayIndex],
+                            pErrorCode);
        }
-
-        if(ds->outCharset==U_ASCII_FAMILY) {
-            tempTable.stripForCompare=ucnv_io_stripASCIIForCompare;
-        } else /* U_EBCDIC_FAMILY */ {
-            tempTable.stripForCompare=ucnv_io_stripEBCDICForCompare;
-        }
-
-        /*
-         * Sort unique aliases+mapped names.
-         *
-         * We need to sort the list again by outCharset strings because they
-         * sort differently for different charset families.
-         * First we set up a temporary table with the string indexes and
-         * sorting indexes and sort that.
-         * Then we permutate and copy/swap the actual values.
-         */
-        p=inTable+offsets[aliasListIndex];
-        q=outTable+offsets[aliasListIndex];
-
-        p2=inTable+offsets[untaggedConvArrayIndex];
-        q2=outTable+offsets[untaggedConvArrayIndex];
-
-        for(i=0; i<count; ++i) {
-            tempTable.rows[i].strIndex=ds->readUInt16(p[i]);
-            tempTable.rows[i].sortIndex=(uint16_t)i;
-        }
-
-        uprv_sortArray(tempTable.rows, (int32_t)count, sizeof(Row),
-                       io_compareRows, &tempTable,
-                       FALSE, pErrorCode);
-
-        if(U_SUCCESS(*pErrorCode)) {
-            /* copy/swap/permutate items */
-            if(p!=q) {
-                for(i=0; i<count; ++i) {
-                    oldIndex=tempTable.rows[i].sortIndex;
-                    ds->swapArray16(ds, p+oldIndex, 2, q+i, pErrorCode);
-                    ds->swapArray16(ds, p2+oldIndex, 2, q2+i, pErrorCode);
-                }
-            } else {
-                /*
-                 * If we swap in-place, then the permutation must use another
-                 * temporary array (tempTable.resort)
-                 * before the results are copied to the outBundle.
-                 */
-                uint16_t *r=tempTable.resort;
-
-                for(i=0; i<count; ++i) {
-                    oldIndex=tempTable.rows[i].sortIndex;
-                    ds->swapArray16(ds, p+oldIndex, 2, r+i, pErrorCode);
-                }
-                uprv_memcpy(q, r, 2*count);
-
-                for(i=0; i<count; ++i) {
-                    oldIndex=tempTable.rows[i].sortIndex;
-                    ds->swapArray16(ds, p2+oldIndex, 2, r+i, pErrorCode);
-                }
-                uprv_memcpy(q2, r, 2*count);
-            }
-        }
-
-        if(tempTable.rows!=rows) {
-            uprv_free(tempTable.rows);
-        }
-
-        if(U_FAILURE(*pErrorCode)) {
-            udata_printError(ds, "ucnv_swapAliases().uprv_sortArray(%u items) failed - %s\n",
-                             count, u_errorName(*pErrorCode));
-            return 0;
-        }
-
-        /* swap remaining 16-bit values */
-        ds->swapArray16(ds,
-                        inTable+offsets[converterListIndex],
-                        2*(int32_t)(offsets[aliasListIndex]-offsets[converterListIndex]),
-                        outTable+offsets[converterListIndex],
-                        pErrorCode);
-        ds->swapArray16(ds,
-                        inTable+offsets[taggedAliasArrayIndex],
-                        2*(int32_t)(offsets[stringTableIndex]-offsets[taggedAliasArrayIndex]),
-                        outTable+offsets[taggedAliasArrayIndex],
-                        pErrorCode);
    }

    return headerSize+2*(int32_t)topOffset;
--- a/icu4c/source/common/uresdata.c
+++ b/icu4c/source/common/uresdata.c
@ -849,12 +849,17 @@ ures_swapResource(const UDataSwapper *ds,
                }
            }

-            /*
-             * ### TODO optimize
-             * After some testing, add a test
-             * if(inCharset==outCharset) { only swap keys and items, do not sort; }
-             * else { sort/copy/swap/permutate as below; }
-             */
+            if(ds->inCharset==ds->outCharset) {
+                /* no need to sort, just swap the offset/value arrays */
+                if(pKey16!=NULL) {
+                    ds->swapArray16(ds, pKey16, count*2, qKey16, pErrorCode);
+                    ds->swapArray32(ds, p, count*4, q, pErrorCode);
+                } else {
+                    /* swap key offsets and items as one array */
+                    ds->swapArray32(ds, pKey32, count*2*4, qKey32, pErrorCode);
+                }
+                break;
+            }

            /*
             * We need to sort tables by outCharset key strings because they