diff --git a/src/compiler/backend/ia32/code-generator-ia32.cc b/src/compiler/backend/ia32/code-generator-ia32.cc index 0ceb8a8ca9..bf4ae33a7c 100644 --- a/src/compiler/backend/ia32/code-generator-ia32.cc +++ b/src/compiler/backend/ia32/code-generator-ia32.cc @@ -3149,39 +3149,62 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( case kIA32I8x16Shl: { XMMRegister dst = i.OutputSimd128Register(); DCHECK_EQ(dst, i.InputSimd128Register(0)); - Register shift = i.InputRegister(1); Register tmp = i.ToRegister(instr->TempAt(0)); XMMRegister tmp_simd = i.TempSimd128Register(1); - // Take shift value modulo 8. - __ and_(shift, 7); - // Mask off the unwanted bits before word-shifting. - __ Pcmpeqw(kScratchDoubleReg, kScratchDoubleReg); - __ mov(tmp, shift); - __ add(tmp, Immediate(8)); - __ Movd(tmp_simd, tmp); - __ Psrlw(kScratchDoubleReg, kScratchDoubleReg, tmp_simd); - __ Packuswb(kScratchDoubleReg, kScratchDoubleReg); - __ Pand(dst, kScratchDoubleReg); - __ Movd(tmp_simd, shift); - __ Psllw(dst, dst, tmp_simd); + + if (HasImmediateInput(instr, 1)) { + // Perform 16-bit shift, then mask away low bits. + uint8_t shift = i.InputInt3(1); + __ Psllw(dst, dst, static_cast(shift)); + + uint8_t bmask = static_cast(0xff << shift); + uint32_t mask = bmask << 24 | bmask << 16 | bmask << 8 | bmask; + __ mov(tmp, mask); + __ Movd(tmp_simd, tmp); + __ Pshufd(tmp_simd, tmp_simd, 0); + __ Pand(dst, tmp_simd); + } else { + Register shift = i.InputRegister(1); + // Take shift value modulo 8. + __ and_(shift, 7); + // Mask off the unwanted bits before word-shifting. + __ Pcmpeqw(kScratchDoubleReg, kScratchDoubleReg); + __ mov(tmp, shift); + __ add(tmp, Immediate(8)); + __ Movd(tmp_simd, tmp); + __ Psrlw(kScratchDoubleReg, kScratchDoubleReg, tmp_simd); + __ Packuswb(kScratchDoubleReg, kScratchDoubleReg); + __ Pand(dst, kScratchDoubleReg); + __ Movd(tmp_simd, shift); + __ Psllw(dst, dst, tmp_simd); + } break; } case kIA32I8x16ShrS: { XMMRegister dst = i.OutputSimd128Register(); DCHECK_EQ(dst, i.InputSimd128Register(0)); - Register tmp = i.ToRegister(instr->TempAt(0)); - XMMRegister tmp_simd = i.TempSimd128Register(1); - // Unpack the bytes into words, do arithmetic shifts, and repack. - __ punpckhbw(kScratchDoubleReg, dst); - __ punpcklbw(dst, dst); - __ mov(tmp, i.InputRegister(1)); - // Take shift value modulo 8. - __ and_(tmp, 7); - __ add(tmp, Immediate(8)); - __ movd(tmp_simd, tmp); - __ psraw(kScratchDoubleReg, tmp_simd); - __ psraw(dst, tmp_simd); - __ packsswb(dst, kScratchDoubleReg); + if (HasImmediateInput(instr, 1)) { + __ punpckhbw(kScratchDoubleReg, dst); + __ punpcklbw(dst, dst); + uint8_t shift = i.InputInt3(1) + 8; + __ psraw(kScratchDoubleReg, shift); + __ psraw(dst, shift); + __ packsswb(dst, kScratchDoubleReg); + } else { + Register tmp = i.ToRegister(instr->TempAt(0)); + XMMRegister tmp_simd = i.TempSimd128Register(1); + // Unpack the bytes into words, do arithmetic shifts, and repack. + __ punpckhbw(kScratchDoubleReg, dst); + __ punpcklbw(dst, dst); + __ mov(tmp, i.InputRegister(1)); + // Take shift value modulo 8. + __ and_(tmp, 7); + __ add(tmp, Immediate(8)); + __ movd(tmp_simd, tmp); + __ psraw(kScratchDoubleReg, tmp_simd); + __ psraw(dst, tmp_simd); + __ packsswb(dst, kScratchDoubleReg); + } break; } case kSSEI8x16Add: { @@ -3423,21 +3446,35 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; } case kIA32I8x16ShrU: { - DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); XMMRegister dst = i.OutputSimd128Register(); + DCHECK_EQ(dst, i.InputSimd128Register(0)); Register tmp = i.ToRegister(instr->TempAt(0)); XMMRegister tmp_simd = i.TempSimd128Register(1); - // Unpack the bytes into words, do logical shifts, and repack. - __ punpckhbw(kScratchDoubleReg, dst); - __ punpcklbw(dst, dst); - __ mov(tmp, i.InputRegister(1)); - // Take shift value modulo 8. - __ and_(tmp, 7); - __ add(tmp, Immediate(8)); - __ movd(tmp_simd, tmp); - __ psrlw(kScratchDoubleReg, tmp_simd); - __ psrlw(dst, tmp_simd); - __ packuswb(dst, kScratchDoubleReg); + + if (HasImmediateInput(instr, 1)) { + // Perform 16-bit shift, then mask away high bits. + uint8_t shift = i.InputInt3(1); + __ Psrlw(dst, dst, static_cast(shift)); + + uint8_t bmask = 0xff >> shift; + uint32_t mask = bmask << 24 | bmask << 16 | bmask << 8 | bmask; + __ mov(tmp, mask); + __ Movd(tmp_simd, tmp); + __ Pshufd(tmp_simd, tmp_simd, 0); + __ Pand(dst, tmp_simd); + } else { + // Unpack the bytes into words, do logical shifts, and repack. + __ punpckhbw(kScratchDoubleReg, dst); + __ punpcklbw(dst, dst); + __ mov(tmp, i.InputRegister(1)); + // Take shift value modulo 8. + __ and_(tmp, 7); + __ add(tmp, Immediate(8)); + __ movd(tmp_simd, tmp); + __ psrlw(kScratchDoubleReg, tmp_simd); + __ psrlw(dst, tmp_simd); + __ packuswb(dst, kScratchDoubleReg); + } break; } case kSSEI8x16MinU: { diff --git a/src/compiler/backend/ia32/instruction-selector-ia32.cc b/src/compiler/backend/ia32/instruction-selector-ia32.cc index 33365ba1d2..f7a18f95cc 100644 --- a/src/compiler/backend/ia32/instruction-selector-ia32.cc +++ b/src/compiler/backend/ia32/instruction-selector-ia32.cc @@ -320,8 +320,8 @@ void VisitRROSimdShift(InstructionSelector* selector, Node* node, } } -void VisitRROI8x16SimdRightShift(InstructionSelector* selector, Node* node, - ArchOpcode opcode) { +void VisitRROI8x16SimdShift(InstructionSelector* selector, Node* node, + ArchOpcode opcode) { IA32OperandGenerator g(selector); InstructionOperand operand0 = g.UseUniqueRegister(node->InputAt(0)); InstructionOperand operand1 = g.UseUniqueRegister(node->InputAt(1)); @@ -329,7 +329,6 @@ void VisitRROI8x16SimdRightShift(InstructionSelector* selector, Node* node, selector->Emit(opcode, g.DefineSameAsFirst(node), operand0, operand1, arraysize(temps), temps); } - } // namespace void InstructionSelector::VisitStackSlot(Node* node) { @@ -2135,10 +2134,6 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) { V(I16x8ShrS) \ V(I16x8ShrU) -#define SIMD_I8X16_RIGHT_SHIFT_OPCODES(V) \ - V(I8x16ShrS) \ - V(I8x16ShrU) - void InstructionSelector::VisitF64x2Min(Node* node) { IA32OperandGenerator g(this); InstructionOperand temps[] = {g.TempSimd128Register()}; @@ -2361,14 +2356,6 @@ SIMD_SHIFT_OPCODES_UNIFED_SSE_AVX(VISIT_SIMD_SHIFT_UNIFIED_SSE_AVX) #undef VISIT_SIMD_SHIFT_UNIFIED_SSE_AVX #undef SIMD_SHIFT_OPCODES_UNIFED_SSE_AVX -#define VISIT_SIMD_I8x16_RIGHT_SHIFT(Opcode) \ - void InstructionSelector::Visit##Opcode(Node* node) { \ - VisitRROI8x16SimdRightShift(this, node, kIA32##Opcode); \ - } -SIMD_I8X16_RIGHT_SHIFT_OPCODES(VISIT_SIMD_I8x16_RIGHT_SHIFT) -#undef SIMD_I8X16_RIGHT_SHIFT_OPCODES -#undef VISIT_SIMD_I8x16_RIGHT_SHIFT - #define VISIT_SIMD_UNOP(Opcode) \ void InstructionSelector::Visit##Opcode(Node* node) { \ IA32OperandGenerator g(this); \ @@ -2448,11 +2435,37 @@ void InstructionSelector::VisitI8x16UConvertI16x8(Node* node) { void InstructionSelector::VisitI8x16Shl(Node* node) { IA32OperandGenerator g(this); - InstructionOperand operand0 = g.UseUniqueRegister(node->InputAt(0)); - InstructionOperand operand1 = g.UseUniqueRegister(node->InputAt(1)); - InstructionOperand temps[] = {g.TempRegister(), g.TempSimd128Register()}; - Emit(kIA32I8x16Shl, g.DefineSameAsFirst(node), operand0, operand1, - arraysize(temps), temps); + if (g.CanBeImmediate(node->InputAt(1))) { + InstructionOperand temps[] = {g.TempRegister(), g.TempSimd128Register()}; + this->Emit(kIA32I8x16Shl, g.DefineSameAsFirst(node), + g.UseRegister(node->InputAt(0)), + g.UseImmediate(node->InputAt(1)), arraysize(temps), temps); + } else { + VisitRROI8x16SimdShift(this, node, kIA32I8x16Shl); + } +} + +void InstructionSelector::VisitI8x16ShrS(Node* node) { + IA32OperandGenerator g(this); + if (g.CanBeImmediate(node->InputAt(1))) { + this->Emit(kIA32I8x16ShrS, g.DefineSameAsFirst(node), + g.UseRegister(node->InputAt(0)), + g.UseImmediate(node->InputAt(1))); + } else { + VisitRROI8x16SimdShift(this, node, kIA32I8x16ShrS); + } +} + +void InstructionSelector::VisitI8x16ShrU(Node* node) { + IA32OperandGenerator g(this); + if (g.CanBeImmediate(node->InputAt(1))) { + InstructionOperand temps[] = {g.TempRegister(), g.TempSimd128Register()}; + this->Emit(kIA32I8x16ShrU, g.DefineSameAsFirst(node), + g.UseRegister(node->InputAt(0)), + g.UseImmediate(node->InputAt(1)), arraysize(temps), temps); + } else { + VisitRROI8x16SimdShift(this, node, kIA32I8x16ShrU); + } } void InstructionSelector::VisitInt32AbsWithOverflow(Node* node) {