PPC: [wasm-simd] Use P9 vector extract to implement ExtractLane

Power 9 offers new Vector Extract instructions which now can be used
to implement Extract Lane opcodes.

Change-Id: Ie81960a5cc9ca3f5af4bf248a720859951f43ed3
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2521361
Reviewed-by: Junliang Yan <junyan@redhat.com>
Commit-Queue: Milad Fa <mfarazma@redhat.com>
Cr-Commit-Position: refs/heads/master@{#70996}
This commit is contained in:
Milad Fa 2020-11-05 14:20:18 -05:00 committed by Commit Bot
parent 289d25c1ac
commit 62eb001935
2 changed files with 37 additions and 56 deletions

View File

@ -2214,9 +2214,13 @@ using Instr = uint32_t;
/* Vector Splat Halfword */ \
V(vsplth, VSPLTH, 0x1000024C) \
/* Vector Extract Unsigned Byte */ \
V(vextractub, VEXTRACTUB, 0x1000020d) \
V(vextractub, VEXTRACTUB, 0x1000020D) \
/* Vector Extract Unsigned Halfword */ \
V(vextractuh, VEXTRACTUH, 0x1000024D) \
/* Vector Extract Unsigned Word */ \
V(vextractuw, VEXTRACTUW, 0x1000028D) \
/* Vector Extract Doubleword */ \
V(vextractd, VEXTRACTD, 0x100002CD) \
/* Vector Insert Byte */ \
V(vinsertb, VINSERTB, 0x1000030D) \
/* Vector Insert Halfword */ \

View File

@ -2269,87 +2269,64 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ vspltb(dst, dst, Operand(7));
break;
}
#define SHIFT_TO_CORRECT_LANE(starting_lane_nummber, lane_input, \
lane_width_in_bytes, input_register) \
int shift_bits = abs(lane_input - starting_lane_nummber) * \
lane_width_in_bytes * kBitsPerByte; \
if (shift_bits > 0) { \
__ li(ip, Operand(shift_bits)); \
__ mtvsrd(kScratchDoubleReg, ip); \
__ vspltb(kScratchDoubleReg, kScratchDoubleReg, Operand(7)); \
if (lane_input < starting_lane_nummber) { \
__ vsro(kScratchDoubleReg, input_register, kScratchDoubleReg); \
} else { \
DCHECK(lane_input > starting_lane_nummber); \
__ vslo(kScratchDoubleReg, input_register, kScratchDoubleReg); \
} \
input_register = kScratchDoubleReg; \
}
case kPPC_F64x2ExtractLane: {
int32_t lane = 1 - i.InputInt8(1);
Simd128Register src = i.InputSimd128Register(0);
SHIFT_TO_CORRECT_LANE(0, lane, 8, src);
__ mfvsrd(kScratchReg, src);
constexpr int lane_width_in_bytes = 8;
__ vextractd(kScratchDoubleReg, i.InputSimd128Register(0),
Operand((1 - i.InputInt8(1)) * lane_width_in_bytes));
__ mfvsrd(kScratchReg, kScratchDoubleReg);
__ MovInt64ToDouble(i.OutputDoubleRegister(), kScratchReg);
break;
}
case kPPC_F32x4ExtractLane: {
int32_t lane = 3 - i.InputInt8(1);
Simd128Register src = i.InputSimd128Register(0);
SHIFT_TO_CORRECT_LANE(1, lane, 4, src)
__ mfvsrwz(kScratchReg, src);
constexpr int lane_width_in_bytes = 4;
__ vextractuw(kScratchDoubleReg, i.InputSimd128Register(0),
Operand((3 - i.InputInt8(1)) * lane_width_in_bytes));
__ mfvsrd(kScratchReg, kScratchDoubleReg);
__ MovIntToFloat(i.OutputDoubleRegister(), kScratchReg);
break;
}
case kPPC_I64x2ExtractLane: {
int32_t lane = 1 - i.InputInt8(1);
Simd128Register src = i.InputSimd128Register(0);
SHIFT_TO_CORRECT_LANE(0, lane, 8, src)
__ mfvsrd(i.OutputRegister(), src);
constexpr int lane_width_in_bytes = 8;
__ vextractd(kScratchDoubleReg, i.InputSimd128Register(0),
Operand((1 - i.InputInt8(1)) * lane_width_in_bytes));
__ mfvsrd(i.OutputRegister(), kScratchDoubleReg);
break;
}
case kPPC_I32x4ExtractLane: {
int32_t lane = 3 - i.InputInt8(1);
Simd128Register src = i.InputSimd128Register(0);
SHIFT_TO_CORRECT_LANE(1, lane, 4, src)
__ mfvsrwz(i.OutputRegister(), src);
constexpr int lane_width_in_bytes = 4;
__ vextractuw(kScratchDoubleReg, i.InputSimd128Register(0),
Operand((3 - i.InputInt8(1)) * lane_width_in_bytes));
__ mfvsrd(i.OutputRegister(), kScratchDoubleReg);
break;
}
case kPPC_I16x8ExtractLaneU: {
int32_t lane = 7 - i.InputInt8(1);
Simd128Register src = i.InputSimd128Register(0);
SHIFT_TO_CORRECT_LANE(2, lane, 2, src)
__ mfvsrwz(r0, src);
__ li(ip, Operand(16));
__ srd(i.OutputRegister(), r0, ip);
constexpr int lane_width_in_bytes = 2;
__ vextractuh(kScratchDoubleReg, i.InputSimd128Register(0),
Operand((7 - i.InputInt8(1)) * lane_width_in_bytes));
__ mfvsrd(i.OutputRegister(), kScratchDoubleReg);
break;
}
case kPPC_I16x8ExtractLaneS: {
int32_t lane = 7 - i.InputInt8(1);
Simd128Register src = i.InputSimd128Register(0);
SHIFT_TO_CORRECT_LANE(2, lane, 2, src)
__ mfvsrwz(kScratchReg, src);
__ sradi(i.OutputRegister(), kScratchReg, 16);
constexpr int lane_width_in_bytes = 2;
__ vextractuh(kScratchDoubleReg, i.InputSimd128Register(0),
Operand((7 - i.InputInt8(1)) * lane_width_in_bytes));
__ mfvsrd(kScratchReg, kScratchDoubleReg);
__ extsh(i.OutputRegister(), kScratchReg);
break;
}
case kPPC_I8x16ExtractLaneU: {
int32_t lane = 15 - i.InputInt8(1);
Simd128Register src = i.InputSimd128Register(0);
SHIFT_TO_CORRECT_LANE(4, lane, 1, src)
__ mfvsrwz(r0, src);
__ li(ip, Operand(24));
__ srd(i.OutputRegister(), r0, ip);
__ vextractub(kScratchDoubleReg, i.InputSimd128Register(0),
Operand(15 - i.InputInt8(1)));
__ mfvsrd(i.OutputRegister(), kScratchDoubleReg);
break;
}
case kPPC_I8x16ExtractLaneS: {
int32_t lane = 15 - i.InputInt8(1);
Simd128Register src = i.InputSimd128Register(0);
SHIFT_TO_CORRECT_LANE(4, lane, 1, src)
__ mfvsrwz(kScratchReg, src);
__ sradi(i.OutputRegister(), kScratchReg, 24);
__ vextractub(kScratchDoubleReg, i.InputSimd128Register(0),
Operand(15 - i.InputInt8(1)));
__ mfvsrd(kScratchReg, kScratchDoubleReg);
__ extsb(i.OutputRegister(), kScratchReg);
break;
}
#undef SHIFT_TO_CORRECT_LANE
case kPPC_F64x2ReplaceLane: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
constexpr int lane_width_in_bytes = 8;