PPC[liftoff]: Implement simd shuffle
Change-Id: Id40e71c5c18b7003452d250f9b545304e880ee8d Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/4102760 Commit-Queue: Milad Farazmand <mfarazma@redhat.com> Reviewed-by: Vasili Skurydzin <vasili.skurydzin@ibm.com> Cr-Commit-Position: refs/heads/main@{#84825}
This commit is contained in:
parent
770e66c483
commit
c3568fdb87
@ -4559,6 +4559,16 @@ void TurboAssembler::I8x16Swizzle(Simd128Register dst, Simd128Register src1,
|
||||
vperm(dst, dst, kSimd128RegZero, scratch);
|
||||
}
|
||||
|
||||
void TurboAssembler::I8x16Shuffle(Simd128Register dst, Simd128Register src1,
|
||||
Simd128Register src2, uint64_t high,
|
||||
uint64_t low, Register scratch1,
|
||||
Register scratch2, Simd128Register scratch3) {
|
||||
mov(scratch1, Operand(low));
|
||||
mov(scratch2, Operand(high));
|
||||
mtvsrdd(scratch3, scratch2, scratch1);
|
||||
vperm(dst, src1, src2, scratch3);
|
||||
}
|
||||
|
||||
void TurboAssembler::V128AnyTrue(Register dst, Simd128Register src,
|
||||
Register scratch1, Register scratch2,
|
||||
Simd128Register scratch3) {
|
||||
|
@ -1380,6 +1380,10 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
|
||||
Register scratch1, Simd128Register scratch2);
|
||||
void I8x16BitMask(Register dst, Simd128Register src, Register scratch1,
|
||||
Register scratch2, Simd128Register scratch3);
|
||||
void I8x16Shuffle(Simd128Register dst, Simd128Register src1,
|
||||
Simd128Register src2, uint64_t high, uint64_t low,
|
||||
Register scratch1, Register scratch2,
|
||||
Simd128Register scratch3);
|
||||
void V128AnyTrue(Register dst, Simd128Register src, Register scratch1,
|
||||
Register scratch2, Simd128Register scratch3);
|
||||
void S128Const(Simd128Register dst, uint64_t high, uint64_t low,
|
||||
|
@ -2634,15 +2634,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
break;
|
||||
}
|
||||
case kPPC_I8x16Shuffle: {
|
||||
Simd128Register dst = i.OutputSimd128Register(),
|
||||
src0 = i.InputSimd128Register(0),
|
||||
src1 = i.InputSimd128Register(1);
|
||||
uint64_t low = make_uint64(i.InputUint32(3), i.InputUint32(2));
|
||||
uint64_t high = make_uint64(i.InputUint32(5), i.InputUint32(4));
|
||||
__ mov(r0, Operand(low));
|
||||
__ mov(ip, Operand(high));
|
||||
__ mtvsrdd(kScratchSimd128Reg, ip, r0);
|
||||
__ vperm(dst, src0, src1, kScratchSimd128Reg);
|
||||
__ I8x16Shuffle(i.OutputSimd128Register(), i.InputSimd128Register(0),
|
||||
i.InputSimd128Register(1), high, low, r0, ip,
|
||||
kScratchSimd128Reg);
|
||||
break;
|
||||
}
|
||||
case kPPC_I64x2BitMask: {
|
||||
|
@ -2391,7 +2391,26 @@ void LiftoffAssembler::emit_i8x16_shuffle(LiftoffRegister dst,
|
||||
LiftoffRegister rhs,
|
||||
const uint8_t shuffle[16],
|
||||
bool is_swizzle) {
|
||||
bailout(kSimd, "i8x16_shuffle");
|
||||
// Remap the shuffle indices to match IBM lane numbering.
|
||||
// TODO(miladfarca): Put this in a function and share it with the instruction
|
||||
// selector.
|
||||
int max_index = 15;
|
||||
int total_lane_count = 2 * kSimd128Size;
|
||||
uint8_t shuffle_remapped[kSimd128Size];
|
||||
for (int i = 0; i < kSimd128Size; i++) {
|
||||
uint8_t current_index = shuffle[i];
|
||||
shuffle_remapped[i] = (current_index <= max_index
|
||||
? max_index - current_index
|
||||
: total_lane_count - current_index + max_index);
|
||||
}
|
||||
uint64_t vals[2];
|
||||
memcpy(vals, shuffle_remapped, sizeof(shuffle_remapped));
|
||||
#ifdef V8_TARGET_BIG_ENDIAN
|
||||
vals[0] = ByteReverse(vals[0]);
|
||||
vals[1] = ByteReverse(vals[1]);
|
||||
#endif
|
||||
I8x16Shuffle(dst.fp().toSimd(), lhs.fp().toSimd(), rhs.fp().toSimd(), vals[1],
|
||||
vals[0], r0, ip, kScratchSimd128Reg);
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_v128_anytrue(LiftoffRegister dst,
|
||||
|
Loading…
Reference in New Issue
Block a user