Optimize bitops with non-Smi inputs. Instead of converting both inputs
to floating point and then converting back we convert directly to a 32 bit integer. In addition the bit twiddling implementation of float- to-integer conversion has been ported from ARM. Testing has shown that this runs faster than the x87 or SSE3 rounding instructions. This change is IA32 only. There may be a smaller benefit from doing the same on x64. Review URL: http://codereview.chromium.org/506052 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@3492 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
parent
c7c821fdfb
commit
0553c7feb9
@ -305,7 +305,9 @@ class UnarySubStub : public CodeStub {
|
||||
int MinorKey() { return overwrite_ ? 1 : 0; }
|
||||
void Generate(MacroAssembler* masm);
|
||||
|
||||
const char* GetName() { return "UnarySubStub"; }
|
||||
const char* GetName() {
|
||||
return overwrite_ ? "UnarySubStub_Overwrite" : "UnarySubStub_Alloc";
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
@ -761,6 +761,10 @@ class FloatingPointHelper : public AllStatic {
|
||||
static void CheckFloatOperands(MacroAssembler* masm,
|
||||
Label* non_float,
|
||||
Register scratch);
|
||||
// Takes the operands in edx and eax and loads them as integers in eax
|
||||
// and ecx.
|
||||
static void LoadAsIntegers(MacroAssembler* masm,
|
||||
Label* operand_conversion_failure);
|
||||
// Test if operands are numbers (smi or HeapNumber objects), and load
|
||||
// them into xmm0 and xmm1 if they are. Jump to label not_numbers if
|
||||
// either operand is not a number. Operands are in edx and eax.
|
||||
@ -771,8 +775,8 @@ class FloatingPointHelper : public AllStatic {
|
||||
|
||||
const char* GenericBinaryOpStub::GetName() {
|
||||
if (name_ != NULL) return name_;
|
||||
const int len = 100;
|
||||
name_ = Bootstrapper::AllocateAutoDeletedArray(len);
|
||||
const int kMaxNameLength = 100;
|
||||
name_ = Bootstrapper::AllocateAutoDeletedArray(kMaxNameLength);
|
||||
if (name_ == NULL) return "OOM";
|
||||
const char* op_name = Token::Name(op_);
|
||||
const char* overwrite_name;
|
||||
@ -783,7 +787,7 @@ const char* GenericBinaryOpStub::GetName() {
|
||||
default: overwrite_name = "UnknownOverwrite"; break;
|
||||
}
|
||||
|
||||
OS::SNPrintF(Vector<char>(name_, len),
|
||||
OS::SNPrintF(Vector<char>(name_, kMaxNameLength),
|
||||
"GenericBinaryOpStub_%s_%s%s_%s%s",
|
||||
op_name,
|
||||
overwrite_name,
|
||||
@ -7234,42 +7238,11 @@ void GenericBinaryOpStub::Generate(MacroAssembler* masm) {
|
||||
case Token::SAR:
|
||||
case Token::SHL:
|
||||
case Token::SHR: {
|
||||
FloatingPointHelper::CheckFloatOperands(masm, &call_runtime, ebx);
|
||||
FloatingPointHelper::LoadFloatOperands(masm, ecx);
|
||||
|
||||
Label skip_allocation, non_smi_result, operand_conversion_failure;
|
||||
|
||||
// Reserve space for converted numbers.
|
||||
__ sub(Operand(esp), Immediate(2 * kPointerSize));
|
||||
|
||||
if (use_sse3_) {
|
||||
// Truncate the operands to 32-bit integers and check for
|
||||
// exceptions in doing so.
|
||||
CpuFeatures::Scope scope(SSE3);
|
||||
__ fisttp_s(Operand(esp, 0 * kPointerSize));
|
||||
__ fisttp_s(Operand(esp, 1 * kPointerSize));
|
||||
__ fnstsw_ax();
|
||||
__ test(eax, Immediate(1));
|
||||
__ j(not_zero, &operand_conversion_failure);
|
||||
} else {
|
||||
// Check if right operand is int32.
|
||||
__ fist_s(Operand(esp, 0 * kPointerSize));
|
||||
__ fild_s(Operand(esp, 0 * kPointerSize));
|
||||
__ FCmp();
|
||||
__ j(not_zero, &operand_conversion_failure);
|
||||
__ j(parity_even, &operand_conversion_failure);
|
||||
|
||||
// Check if left operand is int32.
|
||||
__ fist_s(Operand(esp, 1 * kPointerSize));
|
||||
__ fild_s(Operand(esp, 1 * kPointerSize));
|
||||
__ FCmp();
|
||||
__ j(not_zero, &operand_conversion_failure);
|
||||
__ j(parity_even, &operand_conversion_failure);
|
||||
}
|
||||
|
||||
// Get int32 operands and perform bitop.
|
||||
__ pop(ecx);
|
||||
__ pop(eax);
|
||||
Label non_smi_result, skip_allocation;
|
||||
Label operand_conversion_failure;
|
||||
FloatingPointHelper::LoadAsIntegers(
|
||||
masm,
|
||||
&operand_conversion_failure);
|
||||
switch (op_) {
|
||||
case Token::BIT_OR: __ or_(eax, Operand(ecx)); break;
|
||||
case Token::BIT_AND: __ and_(eax, Operand(ecx)); break;
|
||||
@ -7321,22 +7294,8 @@ void GenericBinaryOpStub::Generate(MacroAssembler* masm) {
|
||||
GenerateReturn(masm);
|
||||
}
|
||||
|
||||
// Clear the FPU exception flag and reset the stack before calling
|
||||
// the runtime system.
|
||||
// Go to runtime for non-number inputs.
|
||||
__ bind(&operand_conversion_failure);
|
||||
__ add(Operand(esp), Immediate(2 * kPointerSize));
|
||||
if (use_sse3_) {
|
||||
// If we've used the SSE3 instructions for truncating the
|
||||
// floating point values to integers and it failed, we have a
|
||||
// pending #IA exception. Clear it.
|
||||
__ fnclex();
|
||||
} else {
|
||||
// The non-SSE3 variant does early bailout if the right
|
||||
// operand isn't a 32-bit integer, so we may have a single
|
||||
// value on the FPU stack we need to get rid of.
|
||||
__ ffree(0);
|
||||
}
|
||||
|
||||
// SHR should return uint32 - go to runtime for non-smi/negative result.
|
||||
if (op_ == Token::SHR) {
|
||||
__ bind(&non_smi_result);
|
||||
@ -7460,6 +7419,162 @@ void GenericBinaryOpStub::GenerateReturn(MacroAssembler* masm) {
|
||||
}
|
||||
|
||||
|
||||
// Get the integer part of a heap number. Surprisingly, all this bit twiddling
|
||||
// is faster than using the built-in instructions on floating point registers.
|
||||
// Trashes edi and ebx. Dest is ecx. Source cannot be ecx or one of the
|
||||
// trashed registers.
|
||||
void IntegerConvert(MacroAssembler* masm,
|
||||
Register source,
|
||||
Label* conversion_failure) {
|
||||
Label done, right_exponent, normal_exponent;
|
||||
Register scratch = ebx;
|
||||
Register scratch2 = edi;
|
||||
// Get exponent word.
|
||||
__ mov(scratch, FieldOperand(source, HeapNumber::kExponentOffset));
|
||||
// Get exponent alone in scratch2.
|
||||
__ mov(scratch2, scratch);
|
||||
__ and_(scratch2, HeapNumber::kExponentMask);
|
||||
// Load ecx with zero. We use this either for the final shift or
|
||||
// for the answer.
|
||||
__ xor_(ecx, Operand(ecx));
|
||||
// Check whether the exponent matches a 32 bit signed int that cannot be
|
||||
// represented by a Smi. A non-smi 32 bit integer is 1.xxx * 2^30 so the
|
||||
// exponent is 30 (biased). This is the exponent that we are fastest at and
|
||||
// also the highest exponent we can handle here.
|
||||
const uint32_t non_smi_exponent =
|
||||
(HeapNumber::kExponentBias + 30) << HeapNumber::kExponentShift;
|
||||
__ cmp(Operand(scratch2), Immediate(non_smi_exponent));
|
||||
// If we have a match of the int32-but-not-Smi exponent then skip some logic.
|
||||
__ j(equal, &right_exponent);
|
||||
// If the exponent is higher than that then go to slow case. This catches
|
||||
// numbers that don't fit in a signed int32, infinities and NaNs.
|
||||
__ j(less, &normal_exponent);
|
||||
|
||||
{
|
||||
// Handle a big exponent. The only reason we have this code is that the >>>
|
||||
// operator has a tendency to generate numbers with an exponent of 31.
|
||||
const uint32_t big_non_smi_exponent =
|
||||
(HeapNumber::kExponentBias + 31) << HeapNumber::kExponentShift;
|
||||
__ cmp(Operand(scratch2), Immediate(big_non_smi_exponent));
|
||||
__ j(not_equal, conversion_failure);
|
||||
// We have the big exponent, typically from >>>. This means the number is
|
||||
// in the range 2^31 to 2^32 - 1. Get the top bits of the mantissa.
|
||||
__ mov(scratch2, scratch);
|
||||
__ and_(scratch2, HeapNumber::kMantissaMask);
|
||||
// Put back the implicit 1.
|
||||
__ or_(scratch2, 1 << HeapNumber::kExponentShift);
|
||||
// Shift up the mantissa bits to take up the space the exponent used to
|
||||
// take. We just orred in the implicit bit so that took care of one and
|
||||
// we want to use the full unsigned range so we subtract 1 bit from the
|
||||
// shift distance.
|
||||
const int big_shift_distance = HeapNumber::kNonMantissaBitsInTopWord - 1;
|
||||
__ shl(scratch2, big_shift_distance);
|
||||
// Get the second half of the double.
|
||||
__ mov(ecx, FieldOperand(source, HeapNumber::kMantissaOffset));
|
||||
// Shift down 21 bits to get the most significant 11 bits or the low
|
||||
// mantissa word.
|
||||
__ shr(ecx, 32 - big_shift_distance);
|
||||
__ or_(ecx, Operand(scratch2));
|
||||
// We have the answer in ecx, but we may need to negate it.
|
||||
__ test(scratch, Operand(scratch));
|
||||
__ j(positive, &done);
|
||||
__ neg(ecx);
|
||||
__ jmp(&done);
|
||||
}
|
||||
|
||||
__ bind(&normal_exponent);
|
||||
// Exponent word in scratch, exponent part of exponent word in scratch2.
|
||||
// Zero in ecx.
|
||||
// We know the exponent is smaller than 30 (biased). If it is less than
|
||||
// 0 (biased) then the number is smaller in magnitude than 1.0 * 2^0, ie
|
||||
// it rounds to zero.
|
||||
const uint32_t zero_exponent =
|
||||
(HeapNumber::kExponentBias + 0) << HeapNumber::kExponentShift;
|
||||
__ sub(Operand(scratch2), Immediate(zero_exponent));
|
||||
// ecx already has a Smi zero.
|
||||
__ j(less, &done);
|
||||
|
||||
// We have a shifted exponent between 0 and 30 in scratch2.
|
||||
__ shr(scratch2, HeapNumber::kExponentShift);
|
||||
__ mov(ecx, Immediate(30));
|
||||
__ sub(ecx, Operand(scratch2));
|
||||
|
||||
__ bind(&right_exponent);
|
||||
// Here ecx is the shift, scratch is the exponent word.
|
||||
// Get the top bits of the mantissa.
|
||||
__ and_(scratch, HeapNumber::kMantissaMask);
|
||||
// Put back the implicit 1.
|
||||
__ or_(scratch, 1 << HeapNumber::kExponentShift);
|
||||
// Shift up the mantissa bits to take up the space the exponent used to
|
||||
// take. We have kExponentShift + 1 significant bits int he low end of the
|
||||
// word. Shift them to the top bits.
|
||||
const int shift_distance = HeapNumber::kNonMantissaBitsInTopWord - 2;
|
||||
__ shl(scratch, shift_distance);
|
||||
// Get the second half of the double. For some exponents we don't
|
||||
// actually need this because the bits get shifted out again, but
|
||||
// it's probably slower to test than just to do it.
|
||||
__ mov(scratch2, FieldOperand(source, HeapNumber::kMantissaOffset));
|
||||
// Shift down 22 bits to get the most significant 10 bits or the low mantissa
|
||||
// word.
|
||||
__ shr(scratch2, 32 - shift_distance);
|
||||
__ or_(scratch2, Operand(scratch));
|
||||
// Move down according to the exponent.
|
||||
__ shr_cl(scratch2);
|
||||
// Now the unsigned answer is in scratch2. We need to move it to ecx and
|
||||
// we may need to fix the sign.
|
||||
Label negative;
|
||||
__ xor_(ecx, Operand(ecx));
|
||||
__ cmp(ecx, FieldOperand(source, HeapNumber::kExponentOffset));
|
||||
__ j(greater, &negative);
|
||||
__ mov(ecx, scratch2);
|
||||
__ jmp(&done);
|
||||
__ bind(&negative);
|
||||
__ sub(ecx, Operand(scratch2));
|
||||
__ bind(&done);
|
||||
}
|
||||
|
||||
|
||||
// Input: edx, eax are the left and right objects of a bit op.
|
||||
// Output: eax, ecx are left and right integers for a bit op.
|
||||
void FloatingPointHelper::LoadAsIntegers(MacroAssembler* masm,
|
||||
Label* conversion_failure) {
|
||||
// Check float operands.
|
||||
Label arg1_is_object, arg2_is_object, load_arg2;
|
||||
Label done;
|
||||
|
||||
__ test(edx, Immediate(kSmiTagMask));
|
||||
__ j(not_zero, &arg1_is_object);
|
||||
__ sar(edx, kSmiTagSize);
|
||||
__ jmp(&load_arg2);
|
||||
|
||||
__ bind(&arg1_is_object);
|
||||
__ mov(ebx, FieldOperand(edx, HeapObject::kMapOffset));
|
||||
__ cmp(ebx, Factory::heap_number_map());
|
||||
__ j(not_equal, conversion_failure);
|
||||
// Get the untagged integer version of the edx heap number in ecx.
|
||||
IntegerConvert(masm, edx, conversion_failure);
|
||||
__ mov(edx, ecx);
|
||||
|
||||
// Here edx has the untagged integer, eax has a Smi or a heap number.
|
||||
__ bind(&load_arg2);
|
||||
// Test if arg2 is a Smi.
|
||||
__ test(eax, Immediate(kSmiTagMask));
|
||||
__ j(not_zero, &arg2_is_object);
|
||||
__ sar(eax, kSmiTagSize);
|
||||
__ mov(ecx, eax);
|
||||
__ jmp(&done);
|
||||
|
||||
__ bind(&arg2_is_object);
|
||||
__ mov(ebx, FieldOperand(eax, HeapObject::kMapOffset));
|
||||
__ cmp(ebx, Factory::heap_number_map());
|
||||
__ j(not_equal, conversion_failure);
|
||||
// Get the untagged integer version of the eax heap number in ecx.
|
||||
IntegerConvert(masm, eax, conversion_failure);
|
||||
__ bind(&done);
|
||||
__ mov(eax, edx);
|
||||
}
|
||||
|
||||
|
||||
void FloatingPointHelper::LoadFloatOperand(MacroAssembler* masm,
|
||||
Register number) {
|
||||
Label load_smi, done;
|
||||
|
Loading…
Reference in New Issue
Block a user