PPC: [wasm-simd] Use P9 vector extract to implement ExtractLane

Power 9 offers new Vector Extract instructions which now can be used
to implement Extract Lane opcodes.

Change-Id: Ie81960a5cc9ca3f5af4bf248a720859951f43ed3
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2521361
Reviewed-by: Junliang Yan <junyan@redhat.com>
Commit-Queue: Milad Fa <mfarazma@redhat.com>
Cr-Commit-Position: refs/heads/master@{#70996}
This commit is contained in:
Milad Fa 2020-11-05 14:20:18 -05:00 committed by Commit Bot
parent 289d25c1ac
commit 62eb001935
2 changed files with 37 additions and 56 deletions

View File

@ -2214,9 +2214,13 @@ using Instr = uint32_t;
/* Vector Splat Halfword */ \ /* Vector Splat Halfword */ \
V(vsplth, VSPLTH, 0x1000024C) \ V(vsplth, VSPLTH, 0x1000024C) \
/* Vector Extract Unsigned Byte */ \ /* Vector Extract Unsigned Byte */ \
V(vextractub, VEXTRACTUB, 0x1000020d) \ V(vextractub, VEXTRACTUB, 0x1000020D) \
/* Vector Extract Unsigned Halfword */ \ /* Vector Extract Unsigned Halfword */ \
V(vextractuh, VEXTRACTUH, 0x1000024D) \ V(vextractuh, VEXTRACTUH, 0x1000024D) \
/* Vector Extract Unsigned Word */ \
V(vextractuw, VEXTRACTUW, 0x1000028D) \
/* Vector Extract Doubleword */ \
V(vextractd, VEXTRACTD, 0x100002CD) \
/* Vector Insert Byte */ \ /* Vector Insert Byte */ \
V(vinsertb, VINSERTB, 0x1000030D) \ V(vinsertb, VINSERTB, 0x1000030D) \
/* Vector Insert Halfword */ \ /* Vector Insert Halfword */ \

View File

@ -2269,87 +2269,64 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ vspltb(dst, dst, Operand(7)); __ vspltb(dst, dst, Operand(7));
break; break;
} }
#define SHIFT_TO_CORRECT_LANE(starting_lane_nummber, lane_input, \
lane_width_in_bytes, input_register) \
int shift_bits = abs(lane_input - starting_lane_nummber) * \
lane_width_in_bytes * kBitsPerByte; \
if (shift_bits > 0) { \
__ li(ip, Operand(shift_bits)); \
__ mtvsrd(kScratchDoubleReg, ip); \
__ vspltb(kScratchDoubleReg, kScratchDoubleReg, Operand(7)); \
if (lane_input < starting_lane_nummber) { \
__ vsro(kScratchDoubleReg, input_register, kScratchDoubleReg); \
} else { \
DCHECK(lane_input > starting_lane_nummber); \
__ vslo(kScratchDoubleReg, input_register, kScratchDoubleReg); \
} \
input_register = kScratchDoubleReg; \
}
case kPPC_F64x2ExtractLane: { case kPPC_F64x2ExtractLane: {
int32_t lane = 1 - i.InputInt8(1); constexpr int lane_width_in_bytes = 8;
Simd128Register src = i.InputSimd128Register(0); __ vextractd(kScratchDoubleReg, i.InputSimd128Register(0),
SHIFT_TO_CORRECT_LANE(0, lane, 8, src); Operand((1 - i.InputInt8(1)) * lane_width_in_bytes));
__ mfvsrd(kScratchReg, src); __ mfvsrd(kScratchReg, kScratchDoubleReg);
__ MovInt64ToDouble(i.OutputDoubleRegister(), kScratchReg); __ MovInt64ToDouble(i.OutputDoubleRegister(), kScratchReg);
break; break;
} }
case kPPC_F32x4ExtractLane: { case kPPC_F32x4ExtractLane: {
int32_t lane = 3 - i.InputInt8(1); constexpr int lane_width_in_bytes = 4;
Simd128Register src = i.InputSimd128Register(0); __ vextractuw(kScratchDoubleReg, i.InputSimd128Register(0),
SHIFT_TO_CORRECT_LANE(1, lane, 4, src) Operand((3 - i.InputInt8(1)) * lane_width_in_bytes));
__ mfvsrwz(kScratchReg, src); __ mfvsrd(kScratchReg, kScratchDoubleReg);
__ MovIntToFloat(i.OutputDoubleRegister(), kScratchReg); __ MovIntToFloat(i.OutputDoubleRegister(), kScratchReg);
break; break;
} }
case kPPC_I64x2ExtractLane: { case kPPC_I64x2ExtractLane: {
int32_t lane = 1 - i.InputInt8(1); constexpr int lane_width_in_bytes = 8;
Simd128Register src = i.InputSimd128Register(0); __ vextractd(kScratchDoubleReg, i.InputSimd128Register(0),
SHIFT_TO_CORRECT_LANE(0, lane, 8, src) Operand((1 - i.InputInt8(1)) * lane_width_in_bytes));
__ mfvsrd(i.OutputRegister(), src); __ mfvsrd(i.OutputRegister(), kScratchDoubleReg);
break; break;
} }
case kPPC_I32x4ExtractLane: { case kPPC_I32x4ExtractLane: {
int32_t lane = 3 - i.InputInt8(1); constexpr int lane_width_in_bytes = 4;
Simd128Register src = i.InputSimd128Register(0); __ vextractuw(kScratchDoubleReg, i.InputSimd128Register(0),
SHIFT_TO_CORRECT_LANE(1, lane, 4, src) Operand((3 - i.InputInt8(1)) * lane_width_in_bytes));
__ mfvsrwz(i.OutputRegister(), src); __ mfvsrd(i.OutputRegister(), kScratchDoubleReg);
break; break;
} }
case kPPC_I16x8ExtractLaneU: { case kPPC_I16x8ExtractLaneU: {
int32_t lane = 7 - i.InputInt8(1); constexpr int lane_width_in_bytes = 2;
Simd128Register src = i.InputSimd128Register(0); __ vextractuh(kScratchDoubleReg, i.InputSimd128Register(0),
SHIFT_TO_CORRECT_LANE(2, lane, 2, src) Operand((7 - i.InputInt8(1)) * lane_width_in_bytes));
__ mfvsrwz(r0, src); __ mfvsrd(i.OutputRegister(), kScratchDoubleReg);
__ li(ip, Operand(16));
__ srd(i.OutputRegister(), r0, ip);
break; break;
} }
case kPPC_I16x8ExtractLaneS: { case kPPC_I16x8ExtractLaneS: {
int32_t lane = 7 - i.InputInt8(1); constexpr int lane_width_in_bytes = 2;
Simd128Register src = i.InputSimd128Register(0); __ vextractuh(kScratchDoubleReg, i.InputSimd128Register(0),
SHIFT_TO_CORRECT_LANE(2, lane, 2, src) Operand((7 - i.InputInt8(1)) * lane_width_in_bytes));
__ mfvsrwz(kScratchReg, src); __ mfvsrd(kScratchReg, kScratchDoubleReg);
__ sradi(i.OutputRegister(), kScratchReg, 16); __ extsh(i.OutputRegister(), kScratchReg);
break; break;
} }
case kPPC_I8x16ExtractLaneU: { case kPPC_I8x16ExtractLaneU: {
int32_t lane = 15 - i.InputInt8(1); __ vextractub(kScratchDoubleReg, i.InputSimd128Register(0),
Simd128Register src = i.InputSimd128Register(0); Operand(15 - i.InputInt8(1)));
SHIFT_TO_CORRECT_LANE(4, lane, 1, src) __ mfvsrd(i.OutputRegister(), kScratchDoubleReg);
__ mfvsrwz(r0, src);
__ li(ip, Operand(24));
__ srd(i.OutputRegister(), r0, ip);
break; break;
} }
case kPPC_I8x16ExtractLaneS: { case kPPC_I8x16ExtractLaneS: {
int32_t lane = 15 - i.InputInt8(1); __ vextractub(kScratchDoubleReg, i.InputSimd128Register(0),
Simd128Register src = i.InputSimd128Register(0); Operand(15 - i.InputInt8(1)));
SHIFT_TO_CORRECT_LANE(4, lane, 1, src) __ mfvsrd(kScratchReg, kScratchDoubleReg);
__ mfvsrwz(kScratchReg, src); __ extsb(i.OutputRegister(), kScratchReg);
__ sradi(i.OutputRegister(), kScratchReg, 24);
break; break;
} }
#undef SHIFT_TO_CORRECT_LANE
case kPPC_F64x2ReplaceLane: { case kPPC_F64x2ReplaceLane: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
constexpr int lane_width_in_bytes = 8; constexpr int lane_width_in_bytes = 8;