Optimize bitops with non-Smi inputs. Instead of converting both inputs

to floating point and then converting back we convert directly to a
32 bit integer.  In addition the bit twiddling implementation of float-
to-integer conversion has been ported from ARM.  Testing has shown that
this runs faster than the x87 or SSE3 rounding instructions.  This change
is IA32 only.  There may be a smaller benefit from doing the same on x64.
Review URL: http://codereview.chromium.org/506052

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@3492 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
erik.corry@gmail.com 2009-12-18 09:33:24 +00:00
parent c7c821fdfb
commit 0553c7feb9
2 changed files with 172 additions and 55 deletions

View File

@ -305,7 +305,9 @@ class UnarySubStub : public CodeStub {
int MinorKey() { return overwrite_ ? 1 : 0; }
void Generate(MacroAssembler* masm);
const char* GetName() { return "UnarySubStub"; }
const char* GetName() {
return overwrite_ ? "UnarySubStub_Overwrite" : "UnarySubStub_Alloc";
}
};

View File

@ -761,6 +761,10 @@ class FloatingPointHelper : public AllStatic {
static void CheckFloatOperands(MacroAssembler* masm,
Label* non_float,
Register scratch);
// Takes the operands in edx and eax and loads them as integers in eax
// and ecx.
static void LoadAsIntegers(MacroAssembler* masm,
Label* operand_conversion_failure);
// Test if operands are numbers (smi or HeapNumber objects), and load
// them into xmm0 and xmm1 if they are. Jump to label not_numbers if
// either operand is not a number. Operands are in edx and eax.
@ -771,8 +775,8 @@ class FloatingPointHelper : public AllStatic {
const char* GenericBinaryOpStub::GetName() {
if (name_ != NULL) return name_;
const int len = 100;
name_ = Bootstrapper::AllocateAutoDeletedArray(len);
const int kMaxNameLength = 100;
name_ = Bootstrapper::AllocateAutoDeletedArray(kMaxNameLength);
if (name_ == NULL) return "OOM";
const char* op_name = Token::Name(op_);
const char* overwrite_name;
@ -783,7 +787,7 @@ const char* GenericBinaryOpStub::GetName() {
default: overwrite_name = "UnknownOverwrite"; break;
}
OS::SNPrintF(Vector<char>(name_, len),
OS::SNPrintF(Vector<char>(name_, kMaxNameLength),
"GenericBinaryOpStub_%s_%s%s_%s%s",
op_name,
overwrite_name,
@ -7234,42 +7238,11 @@ void GenericBinaryOpStub::Generate(MacroAssembler* masm) {
case Token::SAR:
case Token::SHL:
case Token::SHR: {
FloatingPointHelper::CheckFloatOperands(masm, &call_runtime, ebx);
FloatingPointHelper::LoadFloatOperands(masm, ecx);
Label skip_allocation, non_smi_result, operand_conversion_failure;
// Reserve space for converted numbers.
__ sub(Operand(esp), Immediate(2 * kPointerSize));
if (use_sse3_) {
// Truncate the operands to 32-bit integers and check for
// exceptions in doing so.
CpuFeatures::Scope scope(SSE3);
__ fisttp_s(Operand(esp, 0 * kPointerSize));
__ fisttp_s(Operand(esp, 1 * kPointerSize));
__ fnstsw_ax();
__ test(eax, Immediate(1));
__ j(not_zero, &operand_conversion_failure);
} else {
// Check if right operand is int32.
__ fist_s(Operand(esp, 0 * kPointerSize));
__ fild_s(Operand(esp, 0 * kPointerSize));
__ FCmp();
__ j(not_zero, &operand_conversion_failure);
__ j(parity_even, &operand_conversion_failure);
// Check if left operand is int32.
__ fist_s(Operand(esp, 1 * kPointerSize));
__ fild_s(Operand(esp, 1 * kPointerSize));
__ FCmp();
__ j(not_zero, &operand_conversion_failure);
__ j(parity_even, &operand_conversion_failure);
}
// Get int32 operands and perform bitop.
__ pop(ecx);
__ pop(eax);
Label non_smi_result, skip_allocation;
Label operand_conversion_failure;
FloatingPointHelper::LoadAsIntegers(
masm,
&operand_conversion_failure);
switch (op_) {
case Token::BIT_OR: __ or_(eax, Operand(ecx)); break;
case Token::BIT_AND: __ and_(eax, Operand(ecx)); break;
@ -7321,22 +7294,8 @@ void GenericBinaryOpStub::Generate(MacroAssembler* masm) {
GenerateReturn(masm);
}
// Clear the FPU exception flag and reset the stack before calling
// the runtime system.
// Go to runtime for non-number inputs.
__ bind(&operand_conversion_failure);
__ add(Operand(esp), Immediate(2 * kPointerSize));
if (use_sse3_) {
// If we've used the SSE3 instructions for truncating the
// floating point values to integers and it failed, we have a
// pending #IA exception. Clear it.
__ fnclex();
} else {
// The non-SSE3 variant does early bailout if the right
// operand isn't a 32-bit integer, so we may have a single
// value on the FPU stack we need to get rid of.
__ ffree(0);
}
// SHR should return uint32 - go to runtime for non-smi/negative result.
if (op_ == Token::SHR) {
__ bind(&non_smi_result);
@ -7460,6 +7419,162 @@ void GenericBinaryOpStub::GenerateReturn(MacroAssembler* masm) {
}
// Get the integer part of a heap number. Surprisingly, all this bit twiddling
// is faster than using the built-in instructions on floating point registers.
// Trashes edi and ebx. Dest is ecx. Source cannot be ecx or one of the
// trashed registers.
void IntegerConvert(MacroAssembler* masm,
Register source,
Label* conversion_failure) {
Label done, right_exponent, normal_exponent;
Register scratch = ebx;
Register scratch2 = edi;
// Get exponent word.
__ mov(scratch, FieldOperand(source, HeapNumber::kExponentOffset));
// Get exponent alone in scratch2.
__ mov(scratch2, scratch);
__ and_(scratch2, HeapNumber::kExponentMask);
// Load ecx with zero. We use this either for the final shift or
// for the answer.
__ xor_(ecx, Operand(ecx));
// Check whether the exponent matches a 32 bit signed int that cannot be
// represented by a Smi. A non-smi 32 bit integer is 1.xxx * 2^30 so the
// exponent is 30 (biased). This is the exponent that we are fastest at and
// also the highest exponent we can handle here.
const uint32_t non_smi_exponent =
(HeapNumber::kExponentBias + 30) << HeapNumber::kExponentShift;
__ cmp(Operand(scratch2), Immediate(non_smi_exponent));
// If we have a match of the int32-but-not-Smi exponent then skip some logic.
__ j(equal, &right_exponent);
// If the exponent is higher than that then go to slow case. This catches
// numbers that don't fit in a signed int32, infinities and NaNs.
__ j(less, &normal_exponent);
{
// Handle a big exponent. The only reason we have this code is that the >>>
// operator has a tendency to generate numbers with an exponent of 31.
const uint32_t big_non_smi_exponent =
(HeapNumber::kExponentBias + 31) << HeapNumber::kExponentShift;
__ cmp(Operand(scratch2), Immediate(big_non_smi_exponent));
__ j(not_equal, conversion_failure);
// We have the big exponent, typically from >>>. This means the number is
// in the range 2^31 to 2^32 - 1. Get the top bits of the mantissa.
__ mov(scratch2, scratch);
__ and_(scratch2, HeapNumber::kMantissaMask);
// Put back the implicit 1.
__ or_(scratch2, 1 << HeapNumber::kExponentShift);
// Shift up the mantissa bits to take up the space the exponent used to
// take. We just orred in the implicit bit so that took care of one and
// we want to use the full unsigned range so we subtract 1 bit from the
// shift distance.
const int big_shift_distance = HeapNumber::kNonMantissaBitsInTopWord - 1;
__ shl(scratch2, big_shift_distance);
// Get the second half of the double.
__ mov(ecx, FieldOperand(source, HeapNumber::kMantissaOffset));
// Shift down 21 bits to get the most significant 11 bits or the low
// mantissa word.
__ shr(ecx, 32 - big_shift_distance);
__ or_(ecx, Operand(scratch2));
// We have the answer in ecx, but we may need to negate it.
__ test(scratch, Operand(scratch));
__ j(positive, &done);
__ neg(ecx);
__ jmp(&done);
}
__ bind(&normal_exponent);
// Exponent word in scratch, exponent part of exponent word in scratch2.
// Zero in ecx.
// We know the exponent is smaller than 30 (biased). If it is less than
// 0 (biased) then the number is smaller in magnitude than 1.0 * 2^0, ie
// it rounds to zero.
const uint32_t zero_exponent =
(HeapNumber::kExponentBias + 0) << HeapNumber::kExponentShift;
__ sub(Operand(scratch2), Immediate(zero_exponent));
// ecx already has a Smi zero.
__ j(less, &done);
// We have a shifted exponent between 0 and 30 in scratch2.
__ shr(scratch2, HeapNumber::kExponentShift);
__ mov(ecx, Immediate(30));
__ sub(ecx, Operand(scratch2));
__ bind(&right_exponent);
// Here ecx is the shift, scratch is the exponent word.
// Get the top bits of the mantissa.
__ and_(scratch, HeapNumber::kMantissaMask);
// Put back the implicit 1.
__ or_(scratch, 1 << HeapNumber::kExponentShift);
// Shift up the mantissa bits to take up the space the exponent used to
// take. We have kExponentShift + 1 significant bits int he low end of the
// word. Shift them to the top bits.
const int shift_distance = HeapNumber::kNonMantissaBitsInTopWord - 2;
__ shl(scratch, shift_distance);
// Get the second half of the double. For some exponents we don't
// actually need this because the bits get shifted out again, but
// it's probably slower to test than just to do it.
__ mov(scratch2, FieldOperand(source, HeapNumber::kMantissaOffset));
// Shift down 22 bits to get the most significant 10 bits or the low mantissa
// word.
__ shr(scratch2, 32 - shift_distance);
__ or_(scratch2, Operand(scratch));
// Move down according to the exponent.
__ shr_cl(scratch2);
// Now the unsigned answer is in scratch2. We need to move it to ecx and
// we may need to fix the sign.
Label negative;
__ xor_(ecx, Operand(ecx));
__ cmp(ecx, FieldOperand(source, HeapNumber::kExponentOffset));
__ j(greater, &negative);
__ mov(ecx, scratch2);
__ jmp(&done);
__ bind(&negative);
__ sub(ecx, Operand(scratch2));
__ bind(&done);
}
// Input: edx, eax are the left and right objects of a bit op.
// Output: eax, ecx are left and right integers for a bit op.
void FloatingPointHelper::LoadAsIntegers(MacroAssembler* masm,
Label* conversion_failure) {
// Check float operands.
Label arg1_is_object, arg2_is_object, load_arg2;
Label done;
__ test(edx, Immediate(kSmiTagMask));
__ j(not_zero, &arg1_is_object);
__ sar(edx, kSmiTagSize);
__ jmp(&load_arg2);
__ bind(&arg1_is_object);
__ mov(ebx, FieldOperand(edx, HeapObject::kMapOffset));
__ cmp(ebx, Factory::heap_number_map());
__ j(not_equal, conversion_failure);
// Get the untagged integer version of the edx heap number in ecx.
IntegerConvert(masm, edx, conversion_failure);
__ mov(edx, ecx);
// Here edx has the untagged integer, eax has a Smi or a heap number.
__ bind(&load_arg2);
// Test if arg2 is a Smi.
__ test(eax, Immediate(kSmiTagMask));
__ j(not_zero, &arg2_is_object);
__ sar(eax, kSmiTagSize);
__ mov(ecx, eax);
__ jmp(&done);
__ bind(&arg2_is_object);
__ mov(ebx, FieldOperand(eax, HeapObject::kMapOffset));
__ cmp(ebx, Factory::heap_number_map());
__ j(not_equal, conversion_failure);
// Get the untagged integer version of the eax heap number in ecx.
IntegerConvert(masm, eax, conversion_failure);
__ bind(&done);
__ mov(eax, edx);
}
void FloatingPointHelper::LoadFloatOperand(MacroAssembler* masm,
Register number) {
Label load_smi, done;