From f8fdc62c1948b4c052fd06618756368e2c85bc72 Mon Sep 17 00:00:00 2001 From: "erik.corry@gmail.com" Date: Wed, 29 Jun 2011 08:35:10 +0000 Subject: [PATCH] Improvement to SmiLexicalCompare. Landing http://codereview.chromium.org/7261008 for Stephen Adams git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@8456 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- src/misc-intrinsics.h | 89 ++++++++++++++++++++++++++++++++++++++ src/runtime.cc | 80 +++++++++++++++++++++------------- src/runtime.h | 8 ---- test/mjsunit/array-sort.js | 63 +++++++++++++++++++-------- 4 files changed, 185 insertions(+), 55 deletions(-) create mode 100644 src/misc-intrinsics.h diff --git a/src/misc-intrinsics.h b/src/misc-intrinsics.h new file mode 100644 index 0000000000..5393de2c21 --- /dev/null +++ b/src/misc-intrinsics.h @@ -0,0 +1,89 @@ +// Copyright 2011 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef V8_MISC_INTRINSICS_H_ +#define V8_MISC_INTRINSICS_H_ + +#include "../include/v8.h" +#include "globals.h" + +namespace v8 { +namespace internal { + +// Returns the index of the leading 1 bit, counting the least significant bit at +// index 0. (1 << IntegerLog2(x)) is a mask for the most significant bit of x. +// Result is undefined if input is zero. +int IntegerLog2(uint32_t value); + +#if defined(__GNUC__) + +inline int IntegerLog2(uint32_t value) { + return 31 - __builtin_clz(value); +} + +#elif defined(_MSC_VER) + +#pragma intrinsic(_BitScanReverse) + +inline int IntegerLog2(uint32_t value) { + unsigned long result; // NOLINT: MSVC intrinsic demands this type. + _BitScanReverse(&result, value); + return result; +} + +#else + +// Default version using regular operations. Code taken from: +// http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog +inline int IntegerLog2(uint32_t value) { + int result, shift; + + shift = (value > 0xFFFF) << 4; + value >>= shift; + result = shift; + + shift = (value > 0xFF) << 3; + value >>= shift; + result |= shift; + + shift = (value > 0xF) << 2; + value >>= shift; + result |= shift; + + shift = (value > 0x3) << 1; + value >>= shift; + result |= shift; + + result |= (value >> 1); + + return result; +} +#endif + +} } // namespace v8::internal + +#endif // V8_MISC_INTRINSICS_H_ diff --git a/src/runtime.cc b/src/runtime.cc index dd49798f05..df99fdc671 100644 --- a/src/runtime.cc +++ b/src/runtime.cc @@ -45,6 +45,7 @@ #include "json-parser.h" #include "liveedit.h" #include "liveobjectlist-inl.h" +#include "misc-intrinsics.h" #include "parser.h" #include "platform.h" #include "runtime-profiler.h" @@ -6642,50 +6643,69 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_SmiLexicographicCompare) { // If the integers are equal so are the string representations. if (x_value == y_value) return Smi::FromInt(EQUAL); - // If one of the integers are zero the normal integer order is the + // If one of the integers is zero the normal integer order is the // same as the lexicographic order of the string representations. - if (x_value == 0 || y_value == 0) return Smi::FromInt(x_value - y_value); + if (x_value == 0 || y_value == 0) + return Smi::FromInt(x_value < y_value ? LESS : GREATER); // If only one of the integers is negative the negative number is // smallest because the char code of '-' is less than the char code // of any digit. Otherwise, we make both values positive. + + // Use unsigned values otherwise the logic is incorrect for -MIN_INT on + // architectures using 32-bit Smis. + uint32_t x_scaled = x_value; + uint32_t y_scaled = y_value; if (x_value < 0 || y_value < 0) { if (y_value >= 0) return Smi::FromInt(LESS); if (x_value >= 0) return Smi::FromInt(GREATER); - x_value = -x_value; - y_value = -y_value; + x_scaled = -x_value; + y_scaled = -y_value; } - // Arrays for the individual characters of the two Smis. Smis are - // 31 bit integers and 10 decimal digits are therefore enough. - // TODO(isolates): maybe we should simply allocate 20 bytes on the stack. - int* x_elms = isolate->runtime_state()->smi_lexicographic_compare_x_elms(); - int* y_elms = isolate->runtime_state()->smi_lexicographic_compare_y_elms(); + static const uint32_t kPowersOf10[] = { + 1, 10, 100, 1000, 10*1000, 100*1000, + 1000*1000, 10*1000*1000, 100*1000*1000, + 1000*1000*1000 + }; + // If the integers have the same number of decimal digits they can be + // compared directly as the numeric order is the same as the + // lexicographic order. If one integer has fewer digits, it is scaled + // by some power of 10 to have the same number of digits as the longer + // integer. If the scaled integers are equal it means the shorter + // integer comes first in the lexicographic order. - // Convert the integers to arrays of their decimal digits. - int x_index = 0; - int y_index = 0; - while (x_value > 0) { - x_elms[x_index++] = x_value % 10; - x_value /= 10; - } - while (y_value > 0) { - y_elms[y_index++] = y_value % 10; - y_value /= 10; + // From http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10 + int x_log2 = IntegerLog2(x_scaled); + int x_log10 = ((x_log2 + 1) * 1233) >> 12; + x_log10 -= x_scaled < kPowersOf10[x_log10]; + + int y_log2 = IntegerLog2(y_scaled); + int y_log10 = ((y_log2 + 1) * 1233) >> 12; + y_log10 -= y_scaled < kPowersOf10[y_log10]; + + int tie = EQUAL; + + if (x_log10 < y_log10) { + // X has fewer digits. We would like to simply scale up X but that + // might overflow, e.g when comparing 9 with 1_000_000_000, 9 would + // be scaled up to 9_000_000_000. So we scale up by the next + // smallest power and scale down Y to drop one digit. It is OK to + // drop one digit from the longer integer since the final digit is + // past the length of the shorter integer. + x_scaled *= kPowersOf10[y_log10 - x_log10 - 1]; + y_scaled /= 10; + tie = LESS; + } else if (y_log10 < x_log10) { + y_scaled *= kPowersOf10[x_log10 - y_log10 - 1]; + x_scaled /= 10; + tie = GREATER; } - // Loop through the arrays of decimal digits finding the first place - // where they differ. - while (--x_index >= 0 && --y_index >= 0) { - int diff = x_elms[x_index] - y_elms[y_index]; - if (diff != 0) return Smi::FromInt(diff); - } - - // If one array is a suffix of the other array, the longest array is - // the representation of the largest of the Smis in the - // lexicographic ordering. - return Smi::FromInt(x_index - y_index); + if (x_scaled < y_scaled) return Smi::FromInt(LESS); + if (x_scaled > y_scaled) return Smi::FromInt(GREATER); + return Smi::FromInt(tie); } diff --git a/src/runtime.h b/src/runtime.h index 6eb6405427..5efc057df9 100644 --- a/src/runtime.h +++ b/src/runtime.h @@ -519,12 +519,6 @@ class RuntimeState { StringInputBuffer* string_locale_compare_buf2() { return &string_locale_compare_buf2_; } - int* smi_lexicographic_compare_x_elms() { - return smi_lexicographic_compare_x_elms_; - } - int* smi_lexicographic_compare_y_elms() { - return smi_lexicographic_compare_y_elms_; - } private: RuntimeState() {} @@ -536,8 +530,6 @@ class RuntimeState { StringInputBuffer string_input_buffer_compare_bufy_; StringInputBuffer string_locale_compare_buf1_; StringInputBuffer string_locale_compare_buf2_; - int smi_lexicographic_compare_x_elms_[10]; - int smi_lexicographic_compare_y_elms_[10]; friend class Isolate; friend class Runtime; diff --git a/test/mjsunit/array-sort.js b/test/mjsunit/array-sort.js index 7060c5f366..fd18a5b2d9 100644 --- a/test/mjsunit/array-sort.js +++ b/test/mjsunit/array-sort.js @@ -70,30 +70,59 @@ function TestNumberSort() { a.sort(); assertArrayEquals([-1000000000, -10000000000, -1000000001, 1000000000, 10000000000, 1000000001], a); + // Other cases are tested implicitly in TestSmiLexicographicCompare. +} - for (var xb = 1; xb <= 1000 * 1000 * 1000; xb *= 10) { +TestNumberSort(); + +function TestSmiLexicographicCompare() { + + assertFalse(%_IsSmi(2147483648), 'Update test for >32 bit Smi'); + + // Collect a list of interesting Smis. + var seen = {}; + var smis = []; + function add(x) { + if (x | 0 == x) { + x = x | 0; // Canonicalizes to Smi if 32-bit signed and fits in Smi. + } + if (%_IsSmi(x) && !seen[x]) { + seen[x] = 1; + smis.push(x); + } + } + function addSigned(x) { + add(x); + add(-x); + } + + var BIGGER_THAN_ANY_SMI = 10 * 1000 * 1000 * 1000; + for (var xb = 1; xb <= BIGGER_THAN_ANY_SMI; xb *= 10) { for (var xf = 0; xf <= 9; xf++) { for (var xo = -1; xo <= 1; xo++) { - for (var yb = 1; yb <= 1000 * 1000 * 1000; yb *= 10) { - for (var yf = 0; yf <= 9; yf++) { - for (var yo = -1; yo <= 1; yo++) { - var x = xb * xf + xo; - var y = yb * yf + yo; - if (!%_IsSmi(x)) continue; - if (!%_IsSmi(y)) continue; - var lex = %SmiLexicographicCompare(x, y); - if (lex < 0) lex = -1; - if (lex > 0) lex = 1; - assertEquals(lex, (x == y) ? 0 : ((x + "") < (y + "") ? -1 : 1), x + " < " + y); - } - } - } + addSigned(xb * xf + xo); } } } + + for (var yb = 1; yb <= BIGGER_THAN_ANY_SMI; yb *= 2) { + for (var yo = -2; yo <= 2; yo++) { + addSigned(yb + yo); + } + } + + for (var i = 0; i < smis.length; i++) { + for (var j = 0; j < smis.length; j++) { + var x = smis[i]; + var y = smis[j]; + var lex = %SmiLexicographicCompare(x, y); + var expected = (x == y) ? 0 : ((x + "") < (y + "") ? -1 : 1); + assertEquals(lex, expected, x + " < " + y); + } + } } -TestNumberSort(); +TestSmiLexicographicCompare(); // Test lexicographical string sorting. @@ -374,4 +403,4 @@ function cmpTest(a, b) { assertTrue(b instanceof o); return a.val - b.val; } -arr.sort(cmpTest); \ No newline at end of file +arr.sort(cmpTest);