Port inlined version of swap primitive for sorting from ia32 to x64.

Original code review for ia32 version: http://codereview.chromium.org/1709008
Review URL: http://codereview.chromium.org/1858002

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@4569 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
ager@chromium.org 2010-05-03 18:18:25 +00:00
parent 130d6adf78
commit cf54120a58
5 changed files with 163 additions and 61 deletions

View File

@ -636,7 +636,9 @@ class CodeGenerator: public AstVisitor {
// Fast support for number to string.
void GenerateNumberToString(ZoneList<Expression*>* args);
// Fast swapping of elements.
// Fast swapping of elements. Takes three expressions, the object and two
// indices. This should only be used if the indices are known to be
// non-negative and within bounds of the elements array at the call site.
void GenerateSwapElements(ZoneList<Expression*>* args);
// Fast call for custom callbacks.

View File

@ -4487,6 +4487,28 @@ void CodeGenerator::GenerateNumberToString(ZoneList<Expression*>* args) {
}
class DeferredSwapElements: public DeferredCode {
public:
DeferredSwapElements(Register object, Register index1, Register index2)
: object_(object), index1_(index1), index2_(index2) {
set_comment("[ DeferredSwapElements");
}
virtual void Generate();
private:
Register object_, index1_, index2_;
};
void DeferredSwapElements::Generate() {
__ push(object_);
__ push(index1_);
__ push(index2_);
__ CallRuntime(Runtime::kSwapElements, 3);
}
void CodeGenerator::GenerateSwapElements(ZoneList<Expression*>* args) {
Comment cmnt(masm_, "[ GenerateSwapElements");
@ -4496,8 +4518,81 @@ void CodeGenerator::GenerateSwapElements(ZoneList<Expression*>* args) {
Load(args->at(1));
Load(args->at(2));
Result result = frame_->CallRuntime(Runtime::kSwapElements, 3);
frame_->Push(&result);
Result index2 = frame_->Pop();
index2.ToRegister();
Result index1 = frame_->Pop();
index1.ToRegister();
Result object = frame_->Pop();
object.ToRegister();
Result tmp1 = allocator()->Allocate();
tmp1.ToRegister();
Result tmp2 = allocator()->Allocate();
tmp2.ToRegister();
frame_->Spill(object.reg());
frame_->Spill(index1.reg());
frame_->Spill(index2.reg());
DeferredSwapElements* deferred = new DeferredSwapElements(object.reg(),
index1.reg(),
index2.reg());
// Fetch the map and check if array is in fast case.
// Check that object doesn't require security checks and
// has no indexed interceptor.
__ CmpObjectType(object.reg(), FIRST_JS_OBJECT_TYPE, tmp1.reg());
deferred->Branch(below);
__ testb(FieldOperand(tmp1.reg(), Map::kBitFieldOffset),
Immediate(KeyedLoadIC::kSlowCaseBitFieldMask));
deferred->Branch(not_zero);
// Check the object's elements are in fast case.
__ movq(tmp1.reg(), FieldOperand(object.reg(), JSObject::kElementsOffset));
__ CompareRoot(FieldOperand(tmp1.reg(), HeapObject::kMapOffset),
Heap::kFixedArrayMapRootIndex);
deferred->Branch(not_equal);
// Check that both indices are smis.
Condition both_smi = __ CheckBothSmi(index1.reg(), index2.reg());
deferred->Branch(NegateCondition(both_smi));
// Bring addresses into index1 and index2.
__ SmiToInteger32(index1.reg(), index1.reg());
__ lea(index1.reg(), FieldOperand(tmp1.reg(),
index1.reg(),
times_pointer_size,
FixedArray::kHeaderSize));
__ SmiToInteger32(index2.reg(), index2.reg());
__ lea(index2.reg(), FieldOperand(tmp1.reg(),
index2.reg(),
times_pointer_size,
FixedArray::kHeaderSize));
// Swap elements.
__ movq(object.reg(), Operand(index1.reg(), 0));
__ movq(tmp2.reg(), Operand(index2.reg(), 0));
__ movq(Operand(index2.reg(), 0), object.reg());
__ movq(Operand(index1.reg(), 0), tmp2.reg());
Label done;
__ InNewSpace(tmp1.reg(), tmp2.reg(), equal, &done);
// Possible optimization: do a check that both values are Smis
// (or them and test against Smi mask.)
__ movq(tmp2.reg(), tmp1.reg());
RecordWriteStub recordWrite1(tmp2.reg(), index1.reg(), object.reg());
__ CallStub(&recordWrite1);
RecordWriteStub recordWrite2(tmp1.reg(), index2.reg(), object.reg());
__ CallStub(&recordWrite2);
__ bind(&done);
deferred->BindExit();
frame_->Push(Factory::undefined_value());
}
@ -8341,6 +8436,12 @@ void NumberToStringStub::Generate(MacroAssembler* masm) {
}
void RecordWriteStub::Generate(MacroAssembler* masm) {
masm->RecordWriteHelper(object_, addr_, scratch_);
masm->ret(0);
}
static int NegativeComparisonResult(Condition cc) {
ASSERT(cc != equal);
ASSERT((cc == less) || (cc == less_equal)

View File

@ -591,7 +591,9 @@ class CodeGenerator: public AstVisitor {
// Fast support for number to string.
void GenerateNumberToString(ZoneList<Expression*>* args);
// Fast swapping of elements.
// Fast swapping of elements. Takes three expressions, the object and two
// indices. This should only be used if the indices are known to be
// non-negative and within bounds of the elements array at the call site.
void GenerateSwapElements(ZoneList<Expression*>* args);
// Fast call for custom callbacks.
@ -1011,6 +1013,42 @@ class NumberToStringStub: public CodeStub {
};
class RecordWriteStub : public CodeStub {
public:
RecordWriteStub(Register object, Register addr, Register scratch)
: object_(object), addr_(addr), scratch_(scratch) { }
void Generate(MacroAssembler* masm);
private:
Register object_;
Register addr_;
Register scratch_;
#ifdef DEBUG
void Print() {
PrintF("RecordWriteStub (object reg %d), (addr reg %d), (scratch reg %d)\n",
object_.code(), addr_.code(), scratch_.code());
}
#endif
// Minor key encoding in 12 bits of three registers (object, address and
// scratch) OOOOAAAASSSS.
class ScratchBits : public BitField<uint32_t, 0, 4> {};
class AddressBits : public BitField<uint32_t, 4, 4> {};
class ObjectBits : public BitField<uint32_t, 8, 4> {};
Major MajorKey() { return RecordWrite; }
int MinorKey() {
// Encode the registers.
return ObjectBits::encode(object_.code()) |
AddressBits::encode(addr_.code()) |
ScratchBits::encode(scratch_.code());
}
};
} } // namespace v8::internal
#endif // V8_X64_CODEGEN_X64_H_

View File

@ -72,8 +72,7 @@ void MacroAssembler::StackLimitCheck(Label* on_stack_overflow) {
}
static void RecordWriteHelper(MacroAssembler* masm,
Register object,
void MacroAssembler::RecordWriteHelper(Register object,
Register addr,
Register scratch) {
Label fast;
@ -81,26 +80,26 @@ static void RecordWriteHelper(MacroAssembler* masm,
// Compute the page start address from the heap object pointer, and reuse
// the 'object' register for it.
ASSERT(is_int32(~Page::kPageAlignmentMask));
masm->and_(object,
and_(object,
Immediate(static_cast<int32_t>(~Page::kPageAlignmentMask)));
Register page_start = object;
// Compute the bit addr in the remembered set/index of the pointer in the
// page. Reuse 'addr' as pointer_offset.
masm->subq(addr, page_start);
masm->shr(addr, Immediate(kPointerSizeLog2));
subq(addr, page_start);
shr(addr, Immediate(kPointerSizeLog2));
Register pointer_offset = addr;
// If the bit offset lies beyond the normal remembered set range, it is in
// the extra remembered set area of a large object.
masm->cmpq(pointer_offset, Immediate(Page::kPageSize / kPointerSize));
masm->j(less, &fast);
cmpq(pointer_offset, Immediate(Page::kPageSize / kPointerSize));
j(less, &fast);
// Adjust 'page_start' so that addressing using 'pointer_offset' hits the
// extra remembered set after the large object.
// Load the array length into 'scratch'.
masm->movl(scratch,
movl(scratch,
Operand(page_start,
Page::kObjectStartOffset + FixedArray::kLengthOffset));
Register array_length = scratch;
@ -111,7 +110,7 @@ static void RecordWriteHelper(MacroAssembler* masm,
// Add the delta between the end of the normal RSet and the start of the
// extra RSet to 'page_start', so that addressing the bit using
// 'pointer_offset' hits the extra RSet words.
masm->lea(page_start,
lea(page_start,
Operand(page_start, array_length, times_pointer_size,
Page::kObjectStartOffset + FixedArray::kHeaderSize
- Page::kRSetEndOffset));
@ -120,50 +119,8 @@ static void RecordWriteHelper(MacroAssembler* masm,
// to limit code size. We should probably evaluate this decision by
// measuring the performance of an equivalent implementation using
// "simpler" instructions
masm->bind(&fast);
masm->bts(Operand(page_start, Page::kRSetOffset), pointer_offset);
}
class RecordWriteStub : public CodeStub {
public:
RecordWriteStub(Register object, Register addr, Register scratch)
: object_(object), addr_(addr), scratch_(scratch) { }
void Generate(MacroAssembler* masm);
private:
Register object_;
Register addr_;
Register scratch_;
#ifdef DEBUG
void Print() {
PrintF("RecordWriteStub (object reg %d), (addr reg %d), (scratch reg %d)\n",
object_.code(), addr_.code(), scratch_.code());
}
#endif
// Minor key encoding in 12 bits of three registers (object, address and
// scratch) OOOOAAAASSSS.
class ScratchBits : public BitField<uint32_t, 0, 4> {};
class AddressBits : public BitField<uint32_t, 4, 4> {};
class ObjectBits : public BitField<uint32_t, 8, 4> {};
Major MajorKey() { return RecordWrite; }
int MinorKey() {
// Encode the registers.
return ObjectBits::encode(object_.code()) |
AddressBits::encode(addr_.code()) |
ScratchBits::encode(scratch_.code());
}
};
void RecordWriteStub::Generate(MacroAssembler* masm) {
RecordWriteHelper(masm, object_, addr_, scratch_);
masm->ret(0);
bind(&fast);
bts(Operand(page_start, Page::kRSetOffset), pointer_offset);
}
@ -279,7 +236,7 @@ void MacroAssembler::RecordWriteNonSmi(Register object,
// If we are already generating a shared stub, not inlining the
// record write code isn't going to save us any memory.
if (generating_stub()) {
RecordWriteHelper(this, object, dst, scratch);
RecordWriteHelper(object, dst, scratch);
} else {
RecordWriteStub stub(object, dst, scratch);
CallStub(&stub);

View File

@ -66,6 +66,10 @@ class MacroAssembler: public Assembler {
// ---------------------------------------------------------------------------
// GC Support
void RecordWriteHelper(Register object,
Register addr,
Register scratch);
// Check if object is in new space. The condition cc can be equal or
// not_equal. If it is equal a jump will be done if the object is on new
// space. The register scratch can be object itself, but it will be clobbered.