Improvement to SmiLexicalCompare. Landing http://codereview.chromium.org/7261008 for Stephen Adams

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@8456 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
erik.corry@gmail.com 2011-06-29 08:35:10 +00:00
parent 81e8b42e1b
commit f8fdc62c19
4 changed files with 185 additions and 55 deletions

89
src/misc-intrinsics.h Normal file
View File

@ -0,0 +1,89 @@
// Copyright 2011 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifndef V8_MISC_INTRINSICS_H_
#define V8_MISC_INTRINSICS_H_
#include "../include/v8.h"
#include "globals.h"
namespace v8 {
namespace internal {
// Returns the index of the leading 1 bit, counting the least significant bit at
// index 0. (1 << IntegerLog2(x)) is a mask for the most significant bit of x.
// Result is undefined if input is zero.
int IntegerLog2(uint32_t value);
#if defined(__GNUC__)
inline int IntegerLog2(uint32_t value) {
return 31 - __builtin_clz(value);
}
#elif defined(_MSC_VER)
#pragma intrinsic(_BitScanReverse)
inline int IntegerLog2(uint32_t value) {
unsigned long result; // NOLINT: MSVC intrinsic demands this type.
_BitScanReverse(&result, value);
return result;
}
#else
// Default version using regular operations. Code taken from:
// http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog
inline int IntegerLog2(uint32_t value) {
int result, shift;
shift = (value > 0xFFFF) << 4;
value >>= shift;
result = shift;
shift = (value > 0xFF) << 3;
value >>= shift;
result |= shift;
shift = (value > 0xF) << 2;
value >>= shift;
result |= shift;
shift = (value > 0x3) << 1;
value >>= shift;
result |= shift;
result |= (value >> 1);
return result;
}
#endif
} } // namespace v8::internal
#endif // V8_MISC_INTRINSICS_H_

View File

@ -45,6 +45,7 @@
#include "json-parser.h"
#include "liveedit.h"
#include "liveobjectlist-inl.h"
#include "misc-intrinsics.h"
#include "parser.h"
#include "platform.h"
#include "runtime-profiler.h"
@ -6642,50 +6643,69 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_SmiLexicographicCompare) {
// If the integers are equal so are the string representations.
if (x_value == y_value) return Smi::FromInt(EQUAL);
// If one of the integers are zero the normal integer order is the
// If one of the integers is zero the normal integer order is the
// same as the lexicographic order of the string representations.
if (x_value == 0 || y_value == 0) return Smi::FromInt(x_value - y_value);
if (x_value == 0 || y_value == 0)
return Smi::FromInt(x_value < y_value ? LESS : GREATER);
// If only one of the integers is negative the negative number is
// smallest because the char code of '-' is less than the char code
// of any digit. Otherwise, we make both values positive.
// Use unsigned values otherwise the logic is incorrect for -MIN_INT on
// architectures using 32-bit Smis.
uint32_t x_scaled = x_value;
uint32_t y_scaled = y_value;
if (x_value < 0 || y_value < 0) {
if (y_value >= 0) return Smi::FromInt(LESS);
if (x_value >= 0) return Smi::FromInt(GREATER);
x_value = -x_value;
y_value = -y_value;
x_scaled = -x_value;
y_scaled = -y_value;
}
// Arrays for the individual characters of the two Smis. Smis are
// 31 bit integers and 10 decimal digits are therefore enough.
// TODO(isolates): maybe we should simply allocate 20 bytes on the stack.
int* x_elms = isolate->runtime_state()->smi_lexicographic_compare_x_elms();
int* y_elms = isolate->runtime_state()->smi_lexicographic_compare_y_elms();
static const uint32_t kPowersOf10[] = {
1, 10, 100, 1000, 10*1000, 100*1000,
1000*1000, 10*1000*1000, 100*1000*1000,
1000*1000*1000
};
// If the integers have the same number of decimal digits they can be
// compared directly as the numeric order is the same as the
// lexicographic order. If one integer has fewer digits, it is scaled
// by some power of 10 to have the same number of digits as the longer
// integer. If the scaled integers are equal it means the shorter
// integer comes first in the lexicographic order.
// Convert the integers to arrays of their decimal digits.
int x_index = 0;
int y_index = 0;
while (x_value > 0) {
x_elms[x_index++] = x_value % 10;
x_value /= 10;
}
while (y_value > 0) {
y_elms[y_index++] = y_value % 10;
y_value /= 10;
// From http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10
int x_log2 = IntegerLog2(x_scaled);
int x_log10 = ((x_log2 + 1) * 1233) >> 12;
x_log10 -= x_scaled < kPowersOf10[x_log10];
int y_log2 = IntegerLog2(y_scaled);
int y_log10 = ((y_log2 + 1) * 1233) >> 12;
y_log10 -= y_scaled < kPowersOf10[y_log10];
int tie = EQUAL;
if (x_log10 < y_log10) {
// X has fewer digits. We would like to simply scale up X but that
// might overflow, e.g when comparing 9 with 1_000_000_000, 9 would
// be scaled up to 9_000_000_000. So we scale up by the next
// smallest power and scale down Y to drop one digit. It is OK to
// drop one digit from the longer integer since the final digit is
// past the length of the shorter integer.
x_scaled *= kPowersOf10[y_log10 - x_log10 - 1];
y_scaled /= 10;
tie = LESS;
} else if (y_log10 < x_log10) {
y_scaled *= kPowersOf10[x_log10 - y_log10 - 1];
x_scaled /= 10;
tie = GREATER;
}
// Loop through the arrays of decimal digits finding the first place
// where they differ.
while (--x_index >= 0 && --y_index >= 0) {
int diff = x_elms[x_index] - y_elms[y_index];
if (diff != 0) return Smi::FromInt(diff);
}
// If one array is a suffix of the other array, the longest array is
// the representation of the largest of the Smis in the
// lexicographic ordering.
return Smi::FromInt(x_index - y_index);
if (x_scaled < y_scaled) return Smi::FromInt(LESS);
if (x_scaled > y_scaled) return Smi::FromInt(GREATER);
return Smi::FromInt(tie);
}

View File

@ -519,12 +519,6 @@ class RuntimeState {
StringInputBuffer* string_locale_compare_buf2() {
return &string_locale_compare_buf2_;
}
int* smi_lexicographic_compare_x_elms() {
return smi_lexicographic_compare_x_elms_;
}
int* smi_lexicographic_compare_y_elms() {
return smi_lexicographic_compare_y_elms_;
}
private:
RuntimeState() {}
@ -536,8 +530,6 @@ class RuntimeState {
StringInputBuffer string_input_buffer_compare_bufy_;
StringInputBuffer string_locale_compare_buf1_;
StringInputBuffer string_locale_compare_buf2_;
int smi_lexicographic_compare_x_elms_[10];
int smi_lexicographic_compare_y_elms_[10];
friend class Isolate;
friend class Runtime;

View File

@ -70,30 +70,59 @@ function TestNumberSort() {
a.sort();
assertArrayEquals([-1000000000, -10000000000, -1000000001, 1000000000, 10000000000, 1000000001], a);
// Other cases are tested implicitly in TestSmiLexicographicCompare.
}
for (var xb = 1; xb <= 1000 * 1000 * 1000; xb *= 10) {
TestNumberSort();
function TestSmiLexicographicCompare() {
assertFalse(%_IsSmi(2147483648), 'Update test for >32 bit Smi');
// Collect a list of interesting Smis.
var seen = {};
var smis = [];
function add(x) {
if (x | 0 == x) {
x = x | 0; // Canonicalizes to Smi if 32-bit signed and fits in Smi.
}
if (%_IsSmi(x) && !seen[x]) {
seen[x] = 1;
smis.push(x);
}
}
function addSigned(x) {
add(x);
add(-x);
}
var BIGGER_THAN_ANY_SMI = 10 * 1000 * 1000 * 1000;
for (var xb = 1; xb <= BIGGER_THAN_ANY_SMI; xb *= 10) {
for (var xf = 0; xf <= 9; xf++) {
for (var xo = -1; xo <= 1; xo++) {
for (var yb = 1; yb <= 1000 * 1000 * 1000; yb *= 10) {
for (var yf = 0; yf <= 9; yf++) {
for (var yo = -1; yo <= 1; yo++) {
var x = xb * xf + xo;
var y = yb * yf + yo;
if (!%_IsSmi(x)) continue;
if (!%_IsSmi(y)) continue;
var lex = %SmiLexicographicCompare(x, y);
if (lex < 0) lex = -1;
if (lex > 0) lex = 1;
assertEquals(lex, (x == y) ? 0 : ((x + "") < (y + "") ? -1 : 1), x + " < " + y);
}
}
}
addSigned(xb * xf + xo);
}
}
}
for (var yb = 1; yb <= BIGGER_THAN_ANY_SMI; yb *= 2) {
for (var yo = -2; yo <= 2; yo++) {
addSigned(yb + yo);
}
}
for (var i = 0; i < smis.length; i++) {
for (var j = 0; j < smis.length; j++) {
var x = smis[i];
var y = smis[j];
var lex = %SmiLexicographicCompare(x, y);
var expected = (x == y) ? 0 : ((x + "") < (y + "") ? -1 : 1);
assertEquals(lex, expected, x + " < " + y);
}
}
}
TestNumberSort();
TestSmiLexicographicCompare();
// Test lexicographical string sorting.
@ -374,4 +403,4 @@ function cmpTest(a, b) {
assertTrue(b instanceof o);
return a.val - b.val;
}
arr.sort(cmpTest);
arr.sort(cmpTest);