From c26f2c32f9dd14e6588ff650d9dac388e510bb7f Mon Sep 17 00:00:00 2001 From: "lrn@chromium.org" Date: Thu, 24 Mar 2011 12:24:28 +0000 Subject: [PATCH] Implemented FastAsciiStringJoin in X64 full codegen. Review URL: http://codereview.chromium.org/6729016 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@7345 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- src/x64/assembler-x64.cc | 6 + src/x64/assembler-x64.h | 1 + src/x64/full-codegen-x64.cc | 283 ++++++++++++++++++++++++++++++++- src/x64/macro-assembler-x64.cc | 93 +++++++++-- src/x64/macro-assembler-x64.h | 41 +++++ 5 files changed, 409 insertions(+), 15 deletions(-) diff --git a/src/x64/assembler-x64.cc b/src/x64/assembler-x64.cc index 2a6501500a..0744b8a306 100644 --- a/src/x64/assembler-x64.cc +++ b/src/x64/assembler-x64.cc @@ -944,6 +944,12 @@ void Assembler::clc() { emit(0xF8); } +void Assembler::cld() { + EnsureSpace ensure_space(this); + last_pc_ = pc_; + emit(0xFC); +} + void Assembler::cdq() { EnsureSpace ensure_space(this); last_pc_ = pc_; diff --git a/src/x64/assembler-x64.h b/src/x64/assembler-x64.h index 5d91436470..52aca63778 100644 --- a/src/x64/assembler-x64.h +++ b/src/x64/assembler-x64.h @@ -1137,6 +1137,7 @@ class Assembler : public AssemblerBase { // Miscellaneous void clc(); + void cld(); void cpuid(); void hlt(); void int3(); diff --git a/src/x64/full-codegen-x64.cc b/src/x64/full-codegen-x64.cc index dc02bc00eb..90afd85717 100644 --- a/src/x64/full-codegen-x64.cc +++ b/src/x64/full-codegen-x64.cc @@ -3224,7 +3224,288 @@ void FullCodeGenerator::EmitGetCachedArrayIndex(ZoneList* args) { void FullCodeGenerator::EmitFastAsciiArrayJoin(ZoneList* args) { - context()->Plug(Heap::kUndefinedValueRootIndex); + Label bailout, return_result, done, one_char_separator, long_separator, + non_trivial_array, not_size_one_array, loop, + loop_1, loop_1_condition, loop_2, loop_2_entry, loop_3, loop_3_entry; + ASSERT(args->length() == 2); + // We will leave the separator on the stack until the end of the function. + VisitForStackValue(args->at(1)); + // Load this to rax (= array) + VisitForAccumulatorValue(args->at(0)); + // All aliases of the same register have disjoint lifetimes. + Register array = rax; + Register elements = no_reg; // Will be rax. + + Register index = rdx; + + Register string_length = rcx; + + Register string = rsi; + + Register scratch = rbx; + + Register array_length = rdi; + Register result_pos = no_reg; // Will be rdi. + + Operand separator_operand = Operand(rsp, 2 * kPointerSize); + Operand result_operand = Operand(rsp, 1 * kPointerSize); + Operand array_length_operand = Operand(rsp, 0 * kPointerSize); + // Separator operand is already pushed. Make room for the two + // other stack fields, and clear the direction flag in anticipation + // of calling CopyBytes. + __ subq(rsp, Immediate(2 * kPointerSize)); + __ cld(); + // Check that the array is a JSArray + __ JumpIfSmi(array, &bailout); + __ CmpObjectType(array, JS_ARRAY_TYPE, scratch); + __ j(not_equal, &bailout); + + // Check that the array has fast elements. + __ testb(FieldOperand(scratch, Map::kBitField2Offset), + Immediate(1 << Map::kHasFastElements)); + __ j(zero, &bailout); + + // Array has fast elements, so its length must be a smi. + // If the array has length zero, return the empty string. + __ movq(array_length, FieldOperand(array, JSArray::kLengthOffset)); + __ SmiCompare(array_length, Smi::FromInt(0)); + __ j(not_zero, &non_trivial_array); + __ LoadRoot(rax, Heap::kEmptyStringRootIndex); + __ jmp(&return_result); + + // Save the array length on the stack. + __ bind(&non_trivial_array); + __ SmiToInteger32(array_length, array_length); + __ movl(array_length_operand, array_length); + + // Save the FixedArray containing array's elements. + // End of array's live range. + elements = array; + __ movq(elements, FieldOperand(array, JSArray::kElementsOffset)); + array = no_reg; + + + // Check that all array elements are sequential ASCII strings, and + // accumulate the sum of their lengths, as a smi-encoded value. + __ Set(index, 0); + __ Set(string_length, 0); + // Loop condition: while (index < array_length). + // Live loop registers: index(int32), array_length(int32), string(String*), + // scratch, string_length(int32), elements(FixedArray*). + if (FLAG_debug_code) { + __ cmpq(index, array_length); + __ Assert(below, "No empty arrays here in EmitFastAsciiArrayJoin"); + } + __ bind(&loop); + __ movq(string, FieldOperand(elements, + index, + times_pointer_size, + FixedArray::kHeaderSize)); + __ JumpIfSmi(string, &bailout); + __ movq(scratch, FieldOperand(string, HeapObject::kMapOffset)); + __ movzxbl(scratch, FieldOperand(scratch, Map::kInstanceTypeOffset)); + __ andb(scratch, Immediate( + kIsNotStringMask | kStringEncodingMask | kStringRepresentationMask)); + __ cmpb(scratch, Immediate(kStringTag | kAsciiStringTag | kSeqStringTag)); + __ j(not_equal, &bailout); + __ AddSmiField(string_length, + FieldOperand(string, SeqAsciiString::kLengthOffset)); + __ j(overflow, &bailout); + __ incl(index); + __ cmpl(index, array_length); + __ j(less, &loop); + + // Live registers: + // string_length: Sum of string lengths. + // elements: FixedArray of strings. + // index: Array length. + // array_length: Array length. + + // If array_length is 1, return elements[0], a string. + __ cmpl(array_length, Immediate(1)); + __ j(not_equal, ¬_size_one_array); + __ movq(rax, FieldOperand(elements, FixedArray::kHeaderSize)); + __ jmp(&return_result); + + __ bind(¬_size_one_array); + + // End of array_length live range. + result_pos = array_length; + array_length = no_reg; + + // Live registers: + // string_length: Sum of string lengths. + // elements: FixedArray of strings. + // index: Array length. + + // Check that the separator is a sequential ASCII string. + __ movq(string, separator_operand); + __ JumpIfSmi(string, &bailout); + __ movq(scratch, FieldOperand(string, HeapObject::kMapOffset)); + __ movzxbl(scratch, FieldOperand(scratch, Map::kInstanceTypeOffset)); + __ andb(scratch, Immediate( + kIsNotStringMask | kStringEncodingMask | kStringRepresentationMask)); + __ cmpb(scratch, Immediate(kStringTag | kAsciiStringTag | kSeqStringTag)); + __ j(not_equal, &bailout); + + // Live registers: + // string_length: Sum of string lengths. + // elements: FixedArray of strings. + // index: Array length. + // string: Separator string. + + // Add (separator length times (array_length - 1)) to string_length. + __ SmiToInteger32(scratch, + FieldOperand(string, SeqAsciiString::kLengthOffset)); + __ decl(index); + __ imull(scratch, index); + __ j(overflow, &bailout); + __ addl(string_length, scratch); + __ j(overflow, &bailout); + + // Live registers and stack values: + // string_length: Total length of result string. + // elements: FixedArray of strings. + __ AllocateAsciiString(result_pos, string_length, scratch, + index, string, &bailout); + __ movq(result_operand, result_pos); + __ lea(result_pos, FieldOperand(result_pos, SeqAsciiString::kHeaderSize)); + + __ movq(string, separator_operand); + __ SmiCompare(FieldOperand(string, SeqAsciiString::kLengthOffset), + Smi::FromInt(1)); + __ j(equal, &one_char_separator); + __ j(greater, &long_separator); + + + // Empty separator case: + __ Set(index, 0); + __ movl(scratch, array_length_operand); + __ jmp(&loop_1_condition); + // Loop condition: while (index < array_length). + __ bind(&loop_1); + // Each iteration of the loop concatenates one string to the result. + // Live values in registers: + // index: which element of the elements array we are adding to the result. + // result_pos: the position to which we are currently copying characters. + // elements: the FixedArray of strings we are joining. + // scratch: array length. + + // Get string = array[index]. + __ movq(string, FieldOperand(elements, index, + times_pointer_size, + FixedArray::kHeaderSize)); + __ SmiToInteger32(string_length, + FieldOperand(string, String::kLengthOffset)); + __ lea(string, + FieldOperand(string, SeqAsciiString::kHeaderSize)); + __ CopyBytes(result_pos, string, string_length); + __ incl(index); + __ bind(&loop_1_condition); + __ cmpl(index, scratch); + __ j(less, &loop_1); // Loop while (index < array_length). + __ jmp(&done); + + // Generic bailout code used from several places. + __ bind(&bailout); + __ LoadRoot(rax, Heap::kUndefinedValueRootIndex); + __ jmp(&return_result); + + + // One-character separator case + __ bind(&one_char_separator); + // Get the separator ascii character value. + // Register "string" holds the separator. + __ movzxbl(scratch, FieldOperand(string, SeqAsciiString::kHeaderSize)); + __ Set(index, 0); + // Jump into the loop after the code that copies the separator, so the first + // element is not preceded by a separator + __ jmp(&loop_2_entry); + // Loop condition: while (index < length). + __ bind(&loop_2); + // Each iteration of the loop concatenates one string to the result. + // Live values in registers: + // elements: The FixedArray of strings we are joining. + // index: which element of the elements array we are adding to the result. + // result_pos: the position to which we are currently copying characters. + // scratch: Separator character. + + // Copy the separator character to the result. + __ movb(Operand(result_pos, 0), scratch); + __ incq(result_pos); + + __ bind(&loop_2_entry); + // Get string = array[index]. + __ movq(string, FieldOperand(elements, index, + times_pointer_size, + FixedArray::kHeaderSize)); + __ SmiToInteger32(string_length, + FieldOperand(string, String::kLengthOffset)); + __ lea(string, + FieldOperand(string, SeqAsciiString::kHeaderSize)); + __ CopyBytes(result_pos, string, string_length); + __ incl(index); + __ cmpl(index, array_length_operand); + __ j(less, &loop_2); // End while (index < length). + __ jmp(&done); + + + // Long separator case (separator is more than one character). + __ bind(&long_separator); + + // Make elements point to end of elements array, and index + // count from -array_length to zero, so we don't need to maintain + // a loop limit. + __ movl(index, array_length_operand); + __ lea(elements, FieldOperand(elements, index, times_pointer_size, + FixedArray::kHeaderSize)); + __ neg(index); + + // Replace separator string with pointer to its first character, and + // make scratch be its length. + __ movq(string, separator_operand); + __ SmiToInteger32(scratch, + FieldOperand(string, String::kLengthOffset)); + __ lea(string, + FieldOperand(string, SeqAsciiString::kHeaderSize)); + __ movq(separator_operand, string); + + // Jump into the loop after the code that copies the separator, so the first + // element is not preceded by a separator + __ jmp(&loop_3_entry); + // Loop condition: while (index < length). + __ bind(&loop_3); + // Each iteration of the loop concatenates one string to the result. + // Live values in registers: + // index: which element of the elements array we are adding to the result. + // result_pos: the position to which we are currently copying characters. + // scratch: Separator length. + // separator_operand (rsp[0x10]): Address of first char of separator. + + // Copy the separator to the result. + __ movq(string, separator_operand); + __ movl(string_length, scratch); + __ CopyBytes(result_pos, string, string_length, 2); + + __ bind(&loop_3_entry); + // Get string = array[index]. + __ movq(string, Operand(elements, index, times_pointer_size, 0)); + __ SmiToInteger32(string_length, + FieldOperand(string, String::kLengthOffset)); + __ lea(string, + FieldOperand(string, SeqAsciiString::kHeaderSize)); + __ CopyBytes(result_pos, string, string_length); + __ incq(index); + __ j(not_equal, &loop_3); // Loop while (index < 0). + + __ bind(&done); + __ movq(rax, result_operand); + + __ bind(&return_result); + // Drop temp values from the stack, and restore context register. + __ addq(rsp, Immediate(3 * kPointerSize)); + __ movq(rsi, Operand(rbp, StandardFrameConstants::kContextOffset)); + context()->Plug(rax); } diff --git a/src/x64/macro-assembler-x64.cc b/src/x64/macro-assembler-x64.cc index 837afe81bb..46692bc16c 100644 --- a/src/x64/macro-assembler-x64.cc +++ b/src/x64/macro-assembler-x64.cc @@ -909,9 +909,9 @@ void MacroAssembler::Integer64PlusConstantToSmi(Register dst, Register src, int constant) { if (dst.is(src)) { - addq(dst, Immediate(constant)); + addl(dst, Immediate(constant)); } else { - lea(dst, Operand(src, constant)); + leal(dst, Operand(src, constant)); } shl(dst, Immediate(kSmiShift)); } @@ -1245,12 +1245,10 @@ void MacroAssembler::SmiAdd(Register dst, // No overflow checking. Use only when it's known that // overflowing is impossible. ASSERT(!dst.is(src2)); - if (dst.is(src1)) { - addq(dst, src2); - } else { + if (!dst.is(src1)) { movq(dst, src1); - addq(dst, src2); } + addq(dst, src2); Assert(no_overflow, "Smi addition overflow"); } @@ -1259,12 +1257,10 @@ void MacroAssembler::SmiSub(Register dst, Register src1, Register src2) { // No overflow checking. Use only when it's known that // overflowing is impossible (e.g., subtracting two positive smis). ASSERT(!dst.is(src2)); - if (dst.is(src1)) { - subq(dst, src2); - } else { + if (!dst.is(src1)) { movq(dst, src1); - subq(dst, src2); } + subq(dst, src2); Assert(no_overflow, "Smi subtraction overflow"); } @@ -1274,12 +1270,10 @@ void MacroAssembler::SmiSub(Register dst, const Operand& src2) { // No overflow checking. Use only when it's known that // overflowing is impossible (e.g., subtracting two positive smis). - if (dst.is(src1)) { - subq(dst, src2); - } else { + if (!dst.is(src1)) { movq(dst, src1); - subq(dst, src2); } + subq(dst, src2); Assert(no_overflow, "Smi subtraction overflow"); } @@ -1466,6 +1460,13 @@ SmiIndex MacroAssembler::SmiToNegativeIndex(Register dst, } +void MacroAssembler::AddSmiField(Register dst, const Operand& src) { + ASSERT_EQ(0, kSmiShift % kBitsPerByte); + addl(dst, Operand(src, kSmiShift / kBitsPerByte)); +} + + + void MacroAssembler::Move(Register dst, Register src) { if (!dst.is(src)) { movq(dst, src); @@ -2701,6 +2702,70 @@ void MacroAssembler::AllocateAsciiConsString(Register result, } +// Copy memory, byte-by-byte, from source to destination. Not optimized for +// long or aligned copies. The contents of scratch and length are destroyed. +// Destination is incremented by length, source, length and scratch are +// clobbered. +// A simpler loop is faster on small copies, but slower on large ones. +// The cld() instruction must have been emitted, to set the direction flag(), +// before calling this function. +void MacroAssembler::CopyBytes(Register destination, + Register source, + Register length, + int min_length, + Register scratch) { + ASSERT(min_length >= 0); + if (FLAG_debug_code) { + cmpl(length, Immediate(min_length)); + Assert(greater_equal, "Invalid min_length"); + } + Label loop, done, short_string, short_loop; + + const int kLongStringLimit = 20; + if (min_length <= kLongStringLimit) { + cmpl(length, Immediate(kLongStringLimit)); + j(less_equal, &short_string); + } + + ASSERT(source.is(rsi)); + ASSERT(destination.is(rdi)); + ASSERT(length.is(rcx)); + + // Because source is 8-byte aligned in our uses of this function, + // we keep source aligned for the rep movs operation by copying the odd bytes + // at the end of the ranges. + movq(scratch, length); + shrl(length, Immediate(3)); + repmovsq(); + // Move remaining bytes of length. + andl(scratch, Immediate(0x7)); + movq(length, Operand(source, scratch, times_1, -8)); + movq(Operand(destination, scratch, times_1, -8), length); + addq(destination, scratch); + + if (min_length <= kLongStringLimit) { + jmp(&done); + + bind(&short_string); + if (min_length == 0) { + testl(length, length); + j(zero, &done); + } + lea(scratch, Operand(destination, length, times_1, 0)); + + bind(&short_loop); + movb(length, Operand(source, 0)); + movb(Operand(destination, 0), length); + incq(source); + incq(destination); + cmpq(destination, scratch); + j(not_equal, &short_loop); + + bind(&done); + } +} + + void MacroAssembler::LoadContext(Register dst, int context_chain_length) { if (context_chain_length > 0) { // Move up the chain of contexts to the context containing the slot. diff --git a/src/x64/macro-assembler-x64.h b/src/x64/macro-assembler-x64.h index 384073bb5e..1ee0fe0204 100644 --- a/src/x64/macro-assembler-x64.h +++ b/src/x64/macro-assembler-x64.h @@ -470,6 +470,11 @@ class MacroAssembler: public Assembler { Register src1, Register src2, LabelType* on_not_smi_result); + template + void SmiAdd(Register dst, + Register src1, + const Operand& src2, + LabelType* on_not_smi_result); void SmiAdd(Register dst, Register src1, @@ -590,6 +595,10 @@ class MacroAssembler: public Assembler { // Converts a positive smi to a negative index. SmiIndex SmiToNegativeIndex(Register dst, Register src, int shift); + // Add the value of a smi in memory to an int32 register. + // Sets flags as a normal add. + void AddSmiField(Register dst, const Operand& src); + // Basic Smi operations. void Move(Register dst, Smi* source) { LoadSmiConstant(dst, source); @@ -1022,6 +1031,18 @@ class MacroAssembler: public Assembler { Handle CodeObject() { return code_object_; } + // Copy length bytes from source to destination. + // Uses scratch register internally (if you have a low-eight register + // free, do use it, otherwise kScratchRegister will be used). + // The min_length is a minimum limit on the value that length will have. + // The algorithm has some special cases that might be omitted if the string + // is known to always be long. + void CopyBytes(Register destination, + Register source, + Register length, + int min_length = 0, + Register scratch = kScratchRegister); + // --------------------------------------------------------------------------- // StatsCounter support @@ -1272,6 +1293,26 @@ void MacroAssembler::SmiAdd(Register dst, } +template +void MacroAssembler::SmiAdd(Register dst, + Register src1, + const Operand& src2, + LabelType* on_not_smi_result) { + ASSERT_NOT_NULL(on_not_smi_result); + if (dst.is(src1)) { + movq(kScratchRegister, src1); + addq(kScratchRegister, src2); + j(overflow, on_not_smi_result); + movq(dst, kScratchRegister); + } else { + ASSERT(!src2.AddressUsesRegister(dst)); + movq(dst, src1); + addq(dst, src2); + j(overflow, on_not_smi_result); + } +} + + template void MacroAssembler::SmiSub(Register dst, Register src1,