From c887e40c9ae7cb09b96fd018999608ff4a36bddd Mon Sep 17 00:00:00 2001 From: Clemens Hammacher Date: Tue, 11 Sep 2018 15:47:29 +0200 Subject: [PATCH] [assembler][ia32] Don't clobber random registers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The fallback for {Pinsrd} and {Pextrd} for the non-avx and non-sse variant clobbered the {xmm0} register. This CL fixes this by storing the values on the stack and modifying them there instead. The alternative would have been to pass in a scratch register. But this path is not commonly used and we cannot express in the API whether the scratch register is needed or not. So we would sometimes have to spill a register to pass it as scratch register even though it is then unused. R=titzer@chromium.org CC=​mstarzinger@chromium.org Bug: v8:6600 Change-Id: Ieae53b892cc55eed4fcfa3d0e7f82f3e1afe72be Reviewed-on: https://chromium-review.googlesource.com/1219633 Reviewed-by: Michael Starzinger Reviewed-by: Ben Titzer Commit-Queue: Clemens Hammacher Cr-Commit-Position: refs/heads/master@{#55800} --- src/compiler/ia32/code-generator-ia32.cc | 4 +- src/ia32/macro-assembler-ia32.cc | 51 ++++++++++++++---------- src/ia32/macro-assembler-ia32.h | 8 ++-- 3 files changed, 34 insertions(+), 29 deletions(-) diff --git a/src/compiler/ia32/code-generator-ia32.cc b/src/compiler/ia32/code-generator-ia32.cc index a81e2a110e..1feaee3f59 100644 --- a/src/compiler/ia32/code-generator-ia32.cc +++ b/src/compiler/ia32/code-generator-ia32.cc @@ -1463,10 +1463,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } break; case kSSEFloat64InsertLowWord32: - __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 0, true); + __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 0); break; case kSSEFloat64InsertHighWord32: - __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 1, true); + __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 1); break; case kSSEFloat64LoadLowWord32: __ movd(i.OutputDoubleRegister(), i.InputOperand(0)); diff --git a/src/ia32/macro-assembler-ia32.cc b/src/ia32/macro-assembler-ia32.cc index aab9b3d3fc..88ed5f9df0 100644 --- a/src/ia32/macro-assembler-ia32.cc +++ b/src/ia32/macro-assembler-ia32.cc @@ -1509,37 +1509,44 @@ void TurboAssembler::Pextrd(Register dst, XMMRegister src, int8_t imm8) { pextrd(dst, src, imm8); return; } - DCHECK_LT(imm8, 4); - pshufd(xmm0, src, imm8); - movd(dst, xmm0); + // Without AVX or SSE, we can only have 64-bit values in xmm registers. + // We don't have an xmm scratch register, so move the data via the stack. This + // path is rarely required, so it's acceptable to be slow. + DCHECK_LT(imm8, 2); + sub(esp, Immediate(kDoubleSize)); + movsd(Operand(esp, 0), src); + mov(dst, Operand(esp, imm8 * kUInt32Size)); + add(esp, Immediate(kDoubleSize)); } -void TurboAssembler::Pinsrd(XMMRegister dst, Operand src, int8_t imm8, - bool is_64_bits) { +void TurboAssembler::Pinsrd(XMMRegister dst, Operand src, int8_t imm8) { + if (CpuFeatures::IsSupported(AVX)) { + CpuFeatureScope scope(this, AVX); + vpinsrd(dst, dst, src, imm8); + return; + } if (CpuFeatures::IsSupported(SSE4_1)) { CpuFeatureScope sse_scope(this, SSE4_1); pinsrd(dst, src, imm8); return; } - if (is_64_bits) { - movd(xmm0, src); - if (imm8 == 1) { - punpckldq(dst, xmm0); - } else { - DCHECK_EQ(0, imm8); - psrlq(dst, 32); - punpckldq(xmm0, dst); - movaps(dst, xmm0); - } + // Without AVX or SSE, we can only have 64-bit values in xmm registers. + // We don't have an xmm scratch register, so move the data via the stack. This + // path is rarely required, so it's acceptable to be slow. + DCHECK_LT(imm8, 2); + sub(esp, Immediate(kDoubleSize)); + // Write original content of {dst} to the stack. + movsd(Operand(esp, 0), dst); + // Overwrite the portion specified in {imm8}. + if (src.is_reg_only()) { + mov(Operand(esp, imm8 * kUInt32Size), src.reg()); } else { - DCHECK_LT(imm8, 4); - push(eax); - mov(eax, src); - pinsrw(dst, eax, imm8 * 2); - shr(eax, 16); - pinsrw(dst, eax, imm8 * 2 + 1); - pop(eax); + movss(dst, src); + movss(Operand(esp, imm8 * kUInt32Size), dst); } + // Load back the full value into {dst}. + movsd(dst, Operand(esp, 0)); + add(esp, Immediate(kDoubleSize)); } void TurboAssembler::Lzcnt(Register dst, Operand src) { diff --git a/src/ia32/macro-assembler-ia32.h b/src/ia32/macro-assembler-ia32.h index e3e89f3d4b..c48d1cec34 100644 --- a/src/ia32/macro-assembler-ia32.h +++ b/src/ia32/macro-assembler-ia32.h @@ -401,12 +401,10 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase { void Pextrb(Register dst, XMMRegister src, int8_t imm8); void Pextrw(Register dst, XMMRegister src, int8_t imm8); void Pextrd(Register dst, XMMRegister src, int8_t imm8); - void Pinsrd(XMMRegister dst, Register src, int8_t imm8, - bool is_64_bits = false) { - Pinsrd(dst, Operand(src), imm8, is_64_bits); + void Pinsrd(XMMRegister dst, Register src, int8_t imm8) { + Pinsrd(dst, Operand(src), imm8); } - void Pinsrd(XMMRegister dst, Operand src, int8_t imm8, - bool is_64_bits = false); + void Pinsrd(XMMRegister dst, Operand src, int8_t imm8); // Expression support // cvtsi2sd instruction only writes to the low 64-bit of dst register, which