PPC: [wasm-simd] Use P9 vector extract to implement ExtractLane
Power 9 offers new Vector Extract instructions which now can be used to implement Extract Lane opcodes. Change-Id: Ie81960a5cc9ca3f5af4bf248a720859951f43ed3 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2521361 Reviewed-by: Junliang Yan <junyan@redhat.com> Commit-Queue: Milad Fa <mfarazma@redhat.com> Cr-Commit-Position: refs/heads/master@{#70996}
This commit is contained in:
parent
289d25c1ac
commit
62eb001935
@ -2214,9 +2214,13 @@ using Instr = uint32_t;
|
|||||||
/* Vector Splat Halfword */ \
|
/* Vector Splat Halfword */ \
|
||||||
V(vsplth, VSPLTH, 0x1000024C) \
|
V(vsplth, VSPLTH, 0x1000024C) \
|
||||||
/* Vector Extract Unsigned Byte */ \
|
/* Vector Extract Unsigned Byte */ \
|
||||||
V(vextractub, VEXTRACTUB, 0x1000020d) \
|
V(vextractub, VEXTRACTUB, 0x1000020D) \
|
||||||
/* Vector Extract Unsigned Halfword */ \
|
/* Vector Extract Unsigned Halfword */ \
|
||||||
V(vextractuh, VEXTRACTUH, 0x1000024D) \
|
V(vextractuh, VEXTRACTUH, 0x1000024D) \
|
||||||
|
/* Vector Extract Unsigned Word */ \
|
||||||
|
V(vextractuw, VEXTRACTUW, 0x1000028D) \
|
||||||
|
/* Vector Extract Doubleword */ \
|
||||||
|
V(vextractd, VEXTRACTD, 0x100002CD) \
|
||||||
/* Vector Insert Byte */ \
|
/* Vector Insert Byte */ \
|
||||||
V(vinsertb, VINSERTB, 0x1000030D) \
|
V(vinsertb, VINSERTB, 0x1000030D) \
|
||||||
/* Vector Insert Halfword */ \
|
/* Vector Insert Halfword */ \
|
||||||
|
@ -2269,87 +2269,64 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
|||||||
__ vspltb(dst, dst, Operand(7));
|
__ vspltb(dst, dst, Operand(7));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
#define SHIFT_TO_CORRECT_LANE(starting_lane_nummber, lane_input, \
|
|
||||||
lane_width_in_bytes, input_register) \
|
|
||||||
int shift_bits = abs(lane_input - starting_lane_nummber) * \
|
|
||||||
lane_width_in_bytes * kBitsPerByte; \
|
|
||||||
if (shift_bits > 0) { \
|
|
||||||
__ li(ip, Operand(shift_bits)); \
|
|
||||||
__ mtvsrd(kScratchDoubleReg, ip); \
|
|
||||||
__ vspltb(kScratchDoubleReg, kScratchDoubleReg, Operand(7)); \
|
|
||||||
if (lane_input < starting_lane_nummber) { \
|
|
||||||
__ vsro(kScratchDoubleReg, input_register, kScratchDoubleReg); \
|
|
||||||
} else { \
|
|
||||||
DCHECK(lane_input > starting_lane_nummber); \
|
|
||||||
__ vslo(kScratchDoubleReg, input_register, kScratchDoubleReg); \
|
|
||||||
} \
|
|
||||||
input_register = kScratchDoubleReg; \
|
|
||||||
}
|
|
||||||
case kPPC_F64x2ExtractLane: {
|
case kPPC_F64x2ExtractLane: {
|
||||||
int32_t lane = 1 - i.InputInt8(1);
|
constexpr int lane_width_in_bytes = 8;
|
||||||
Simd128Register src = i.InputSimd128Register(0);
|
__ vextractd(kScratchDoubleReg, i.InputSimd128Register(0),
|
||||||
SHIFT_TO_CORRECT_LANE(0, lane, 8, src);
|
Operand((1 - i.InputInt8(1)) * lane_width_in_bytes));
|
||||||
__ mfvsrd(kScratchReg, src);
|
__ mfvsrd(kScratchReg, kScratchDoubleReg);
|
||||||
__ MovInt64ToDouble(i.OutputDoubleRegister(), kScratchReg);
|
__ MovInt64ToDouble(i.OutputDoubleRegister(), kScratchReg);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case kPPC_F32x4ExtractLane: {
|
case kPPC_F32x4ExtractLane: {
|
||||||
int32_t lane = 3 - i.InputInt8(1);
|
constexpr int lane_width_in_bytes = 4;
|
||||||
Simd128Register src = i.InputSimd128Register(0);
|
__ vextractuw(kScratchDoubleReg, i.InputSimd128Register(0),
|
||||||
SHIFT_TO_CORRECT_LANE(1, lane, 4, src)
|
Operand((3 - i.InputInt8(1)) * lane_width_in_bytes));
|
||||||
__ mfvsrwz(kScratchReg, src);
|
__ mfvsrd(kScratchReg, kScratchDoubleReg);
|
||||||
__ MovIntToFloat(i.OutputDoubleRegister(), kScratchReg);
|
__ MovIntToFloat(i.OutputDoubleRegister(), kScratchReg);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case kPPC_I64x2ExtractLane: {
|
case kPPC_I64x2ExtractLane: {
|
||||||
int32_t lane = 1 - i.InputInt8(1);
|
constexpr int lane_width_in_bytes = 8;
|
||||||
Simd128Register src = i.InputSimd128Register(0);
|
__ vextractd(kScratchDoubleReg, i.InputSimd128Register(0),
|
||||||
SHIFT_TO_CORRECT_LANE(0, lane, 8, src)
|
Operand((1 - i.InputInt8(1)) * lane_width_in_bytes));
|
||||||
__ mfvsrd(i.OutputRegister(), src);
|
__ mfvsrd(i.OutputRegister(), kScratchDoubleReg);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case kPPC_I32x4ExtractLane: {
|
case kPPC_I32x4ExtractLane: {
|
||||||
int32_t lane = 3 - i.InputInt8(1);
|
constexpr int lane_width_in_bytes = 4;
|
||||||
Simd128Register src = i.InputSimd128Register(0);
|
__ vextractuw(kScratchDoubleReg, i.InputSimd128Register(0),
|
||||||
SHIFT_TO_CORRECT_LANE(1, lane, 4, src)
|
Operand((3 - i.InputInt8(1)) * lane_width_in_bytes));
|
||||||
__ mfvsrwz(i.OutputRegister(), src);
|
__ mfvsrd(i.OutputRegister(), kScratchDoubleReg);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case kPPC_I16x8ExtractLaneU: {
|
case kPPC_I16x8ExtractLaneU: {
|
||||||
int32_t lane = 7 - i.InputInt8(1);
|
constexpr int lane_width_in_bytes = 2;
|
||||||
Simd128Register src = i.InputSimd128Register(0);
|
__ vextractuh(kScratchDoubleReg, i.InputSimd128Register(0),
|
||||||
SHIFT_TO_CORRECT_LANE(2, lane, 2, src)
|
Operand((7 - i.InputInt8(1)) * lane_width_in_bytes));
|
||||||
__ mfvsrwz(r0, src);
|
__ mfvsrd(i.OutputRegister(), kScratchDoubleReg);
|
||||||
__ li(ip, Operand(16));
|
|
||||||
__ srd(i.OutputRegister(), r0, ip);
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case kPPC_I16x8ExtractLaneS: {
|
case kPPC_I16x8ExtractLaneS: {
|
||||||
int32_t lane = 7 - i.InputInt8(1);
|
constexpr int lane_width_in_bytes = 2;
|
||||||
Simd128Register src = i.InputSimd128Register(0);
|
__ vextractuh(kScratchDoubleReg, i.InputSimd128Register(0),
|
||||||
SHIFT_TO_CORRECT_LANE(2, lane, 2, src)
|
Operand((7 - i.InputInt8(1)) * lane_width_in_bytes));
|
||||||
__ mfvsrwz(kScratchReg, src);
|
__ mfvsrd(kScratchReg, kScratchDoubleReg);
|
||||||
__ sradi(i.OutputRegister(), kScratchReg, 16);
|
__ extsh(i.OutputRegister(), kScratchReg);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case kPPC_I8x16ExtractLaneU: {
|
case kPPC_I8x16ExtractLaneU: {
|
||||||
int32_t lane = 15 - i.InputInt8(1);
|
__ vextractub(kScratchDoubleReg, i.InputSimd128Register(0),
|
||||||
Simd128Register src = i.InputSimd128Register(0);
|
Operand(15 - i.InputInt8(1)));
|
||||||
SHIFT_TO_CORRECT_LANE(4, lane, 1, src)
|
__ mfvsrd(i.OutputRegister(), kScratchDoubleReg);
|
||||||
__ mfvsrwz(r0, src);
|
|
||||||
__ li(ip, Operand(24));
|
|
||||||
__ srd(i.OutputRegister(), r0, ip);
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case kPPC_I8x16ExtractLaneS: {
|
case kPPC_I8x16ExtractLaneS: {
|
||||||
int32_t lane = 15 - i.InputInt8(1);
|
__ vextractub(kScratchDoubleReg, i.InputSimd128Register(0),
|
||||||
Simd128Register src = i.InputSimd128Register(0);
|
Operand(15 - i.InputInt8(1)));
|
||||||
SHIFT_TO_CORRECT_LANE(4, lane, 1, src)
|
__ mfvsrd(kScratchReg, kScratchDoubleReg);
|
||||||
__ mfvsrwz(kScratchReg, src);
|
__ extsb(i.OutputRegister(), kScratchReg);
|
||||||
__ sradi(i.OutputRegister(), kScratchReg, 24);
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
#undef SHIFT_TO_CORRECT_LANE
|
|
||||||
case kPPC_F64x2ReplaceLane: {
|
case kPPC_F64x2ReplaceLane: {
|
||||||
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
|
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
|
||||||
constexpr int lane_width_in_bytes = 8;
|
constexpr int lane_width_in_bytes = 8;
|
||||||
|
Loading…
Reference in New Issue
Block a user