PPC: [wasm-simd] Use P9 vector extract to implement ExtractLane
Power 9 offers new Vector Extract instructions which now can be used to implement Extract Lane opcodes. Change-Id: Ie81960a5cc9ca3f5af4bf248a720859951f43ed3 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2521361 Reviewed-by: Junliang Yan <junyan@redhat.com> Commit-Queue: Milad Fa <mfarazma@redhat.com> Cr-Commit-Position: refs/heads/master@{#70996}
This commit is contained in:
parent
289d25c1ac
commit
62eb001935
@ -2214,9 +2214,13 @@ using Instr = uint32_t;
|
||||
/* Vector Splat Halfword */ \
|
||||
V(vsplth, VSPLTH, 0x1000024C) \
|
||||
/* Vector Extract Unsigned Byte */ \
|
||||
V(vextractub, VEXTRACTUB, 0x1000020d) \
|
||||
V(vextractub, VEXTRACTUB, 0x1000020D) \
|
||||
/* Vector Extract Unsigned Halfword */ \
|
||||
V(vextractuh, VEXTRACTUH, 0x1000024D) \
|
||||
/* Vector Extract Unsigned Word */ \
|
||||
V(vextractuw, VEXTRACTUW, 0x1000028D) \
|
||||
/* Vector Extract Doubleword */ \
|
||||
V(vextractd, VEXTRACTD, 0x100002CD) \
|
||||
/* Vector Insert Byte */ \
|
||||
V(vinsertb, VINSERTB, 0x1000030D) \
|
||||
/* Vector Insert Halfword */ \
|
||||
|
@ -2269,87 +2269,64 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
__ vspltb(dst, dst, Operand(7));
|
||||
break;
|
||||
}
|
||||
#define SHIFT_TO_CORRECT_LANE(starting_lane_nummber, lane_input, \
|
||||
lane_width_in_bytes, input_register) \
|
||||
int shift_bits = abs(lane_input - starting_lane_nummber) * \
|
||||
lane_width_in_bytes * kBitsPerByte; \
|
||||
if (shift_bits > 0) { \
|
||||
__ li(ip, Operand(shift_bits)); \
|
||||
__ mtvsrd(kScratchDoubleReg, ip); \
|
||||
__ vspltb(kScratchDoubleReg, kScratchDoubleReg, Operand(7)); \
|
||||
if (lane_input < starting_lane_nummber) { \
|
||||
__ vsro(kScratchDoubleReg, input_register, kScratchDoubleReg); \
|
||||
} else { \
|
||||
DCHECK(lane_input > starting_lane_nummber); \
|
||||
__ vslo(kScratchDoubleReg, input_register, kScratchDoubleReg); \
|
||||
} \
|
||||
input_register = kScratchDoubleReg; \
|
||||
}
|
||||
case kPPC_F64x2ExtractLane: {
|
||||
int32_t lane = 1 - i.InputInt8(1);
|
||||
Simd128Register src = i.InputSimd128Register(0);
|
||||
SHIFT_TO_CORRECT_LANE(0, lane, 8, src);
|
||||
__ mfvsrd(kScratchReg, src);
|
||||
constexpr int lane_width_in_bytes = 8;
|
||||
__ vextractd(kScratchDoubleReg, i.InputSimd128Register(0),
|
||||
Operand((1 - i.InputInt8(1)) * lane_width_in_bytes));
|
||||
__ mfvsrd(kScratchReg, kScratchDoubleReg);
|
||||
__ MovInt64ToDouble(i.OutputDoubleRegister(), kScratchReg);
|
||||
break;
|
||||
}
|
||||
case kPPC_F32x4ExtractLane: {
|
||||
int32_t lane = 3 - i.InputInt8(1);
|
||||
Simd128Register src = i.InputSimd128Register(0);
|
||||
SHIFT_TO_CORRECT_LANE(1, lane, 4, src)
|
||||
__ mfvsrwz(kScratchReg, src);
|
||||
constexpr int lane_width_in_bytes = 4;
|
||||
__ vextractuw(kScratchDoubleReg, i.InputSimd128Register(0),
|
||||
Operand((3 - i.InputInt8(1)) * lane_width_in_bytes));
|
||||
__ mfvsrd(kScratchReg, kScratchDoubleReg);
|
||||
__ MovIntToFloat(i.OutputDoubleRegister(), kScratchReg);
|
||||
break;
|
||||
}
|
||||
case kPPC_I64x2ExtractLane: {
|
||||
int32_t lane = 1 - i.InputInt8(1);
|
||||
Simd128Register src = i.InputSimd128Register(0);
|
||||
SHIFT_TO_CORRECT_LANE(0, lane, 8, src)
|
||||
__ mfvsrd(i.OutputRegister(), src);
|
||||
constexpr int lane_width_in_bytes = 8;
|
||||
__ vextractd(kScratchDoubleReg, i.InputSimd128Register(0),
|
||||
Operand((1 - i.InputInt8(1)) * lane_width_in_bytes));
|
||||
__ mfvsrd(i.OutputRegister(), kScratchDoubleReg);
|
||||
break;
|
||||
}
|
||||
case kPPC_I32x4ExtractLane: {
|
||||
int32_t lane = 3 - i.InputInt8(1);
|
||||
Simd128Register src = i.InputSimd128Register(0);
|
||||
SHIFT_TO_CORRECT_LANE(1, lane, 4, src)
|
||||
__ mfvsrwz(i.OutputRegister(), src);
|
||||
constexpr int lane_width_in_bytes = 4;
|
||||
__ vextractuw(kScratchDoubleReg, i.InputSimd128Register(0),
|
||||
Operand((3 - i.InputInt8(1)) * lane_width_in_bytes));
|
||||
__ mfvsrd(i.OutputRegister(), kScratchDoubleReg);
|
||||
break;
|
||||
}
|
||||
case kPPC_I16x8ExtractLaneU: {
|
||||
int32_t lane = 7 - i.InputInt8(1);
|
||||
Simd128Register src = i.InputSimd128Register(0);
|
||||
SHIFT_TO_CORRECT_LANE(2, lane, 2, src)
|
||||
__ mfvsrwz(r0, src);
|
||||
__ li(ip, Operand(16));
|
||||
__ srd(i.OutputRegister(), r0, ip);
|
||||
constexpr int lane_width_in_bytes = 2;
|
||||
__ vextractuh(kScratchDoubleReg, i.InputSimd128Register(0),
|
||||
Operand((7 - i.InputInt8(1)) * lane_width_in_bytes));
|
||||
__ mfvsrd(i.OutputRegister(), kScratchDoubleReg);
|
||||
break;
|
||||
}
|
||||
case kPPC_I16x8ExtractLaneS: {
|
||||
int32_t lane = 7 - i.InputInt8(1);
|
||||
Simd128Register src = i.InputSimd128Register(0);
|
||||
SHIFT_TO_CORRECT_LANE(2, lane, 2, src)
|
||||
__ mfvsrwz(kScratchReg, src);
|
||||
__ sradi(i.OutputRegister(), kScratchReg, 16);
|
||||
constexpr int lane_width_in_bytes = 2;
|
||||
__ vextractuh(kScratchDoubleReg, i.InputSimd128Register(0),
|
||||
Operand((7 - i.InputInt8(1)) * lane_width_in_bytes));
|
||||
__ mfvsrd(kScratchReg, kScratchDoubleReg);
|
||||
__ extsh(i.OutputRegister(), kScratchReg);
|
||||
break;
|
||||
}
|
||||
case kPPC_I8x16ExtractLaneU: {
|
||||
int32_t lane = 15 - i.InputInt8(1);
|
||||
Simd128Register src = i.InputSimd128Register(0);
|
||||
SHIFT_TO_CORRECT_LANE(4, lane, 1, src)
|
||||
__ mfvsrwz(r0, src);
|
||||
__ li(ip, Operand(24));
|
||||
__ srd(i.OutputRegister(), r0, ip);
|
||||
__ vextractub(kScratchDoubleReg, i.InputSimd128Register(0),
|
||||
Operand(15 - i.InputInt8(1)));
|
||||
__ mfvsrd(i.OutputRegister(), kScratchDoubleReg);
|
||||
break;
|
||||
}
|
||||
case kPPC_I8x16ExtractLaneS: {
|
||||
int32_t lane = 15 - i.InputInt8(1);
|
||||
Simd128Register src = i.InputSimd128Register(0);
|
||||
SHIFT_TO_CORRECT_LANE(4, lane, 1, src)
|
||||
__ mfvsrwz(kScratchReg, src);
|
||||
__ sradi(i.OutputRegister(), kScratchReg, 24);
|
||||
__ vextractub(kScratchDoubleReg, i.InputSimd128Register(0),
|
||||
Operand(15 - i.InputInt8(1)));
|
||||
__ mfvsrd(kScratchReg, kScratchDoubleReg);
|
||||
__ extsb(i.OutputRegister(), kScratchReg);
|
||||
break;
|
||||
}
|
||||
#undef SHIFT_TO_CORRECT_LANE
|
||||
case kPPC_F64x2ReplaceLane: {
|
||||
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
|
||||
constexpr int lane_width_in_bytes = 8;
|
||||
|
Loading…
Reference in New Issue
Block a user