PPC[liftoff]: Implement simd shuffle

Change-Id: Id40e71c5c18b7003452d250f9b545304e880ee8d
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/4102760
Commit-Queue: Milad Farazmand <mfarazma@redhat.com>
Reviewed-by: Vasili Skurydzin <vasili.skurydzin@ibm.com>
Cr-Commit-Position: refs/heads/main@{#84825}
This commit is contained in:
Milad Fa 2022-12-13 12:46:12 -05:00 committed by V8 LUCI CQ
parent 770e66c483
commit c3568fdb87
4 changed files with 37 additions and 8 deletions

View File

@ -4559,6 +4559,16 @@ void TurboAssembler::I8x16Swizzle(Simd128Register dst, Simd128Register src1,
vperm(dst, dst, kSimd128RegZero, scratch);
}
void TurboAssembler::I8x16Shuffle(Simd128Register dst, Simd128Register src1,
Simd128Register src2, uint64_t high,
uint64_t low, Register scratch1,
Register scratch2, Simd128Register scratch3) {
mov(scratch1, Operand(low));
mov(scratch2, Operand(high));
mtvsrdd(scratch3, scratch2, scratch1);
vperm(dst, src1, src2, scratch3);
}
void TurboAssembler::V128AnyTrue(Register dst, Simd128Register src,
Register scratch1, Register scratch2,
Simd128Register scratch3) {

View File

@ -1380,6 +1380,10 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
Register scratch1, Simd128Register scratch2);
void I8x16BitMask(Register dst, Simd128Register src, Register scratch1,
Register scratch2, Simd128Register scratch3);
void I8x16Shuffle(Simd128Register dst, Simd128Register src1,
Simd128Register src2, uint64_t high, uint64_t low,
Register scratch1, Register scratch2,
Simd128Register scratch3);
void V128AnyTrue(Register dst, Simd128Register src, Register scratch1,
Register scratch2, Simd128Register scratch3);
void S128Const(Simd128Register dst, uint64_t high, uint64_t low,

View File

@ -2634,15 +2634,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kPPC_I8x16Shuffle: {
Simd128Register dst = i.OutputSimd128Register(),
src0 = i.InputSimd128Register(0),
src1 = i.InputSimd128Register(1);
uint64_t low = make_uint64(i.InputUint32(3), i.InputUint32(2));
uint64_t high = make_uint64(i.InputUint32(5), i.InputUint32(4));
__ mov(r0, Operand(low));
__ mov(ip, Operand(high));
__ mtvsrdd(kScratchSimd128Reg, ip, r0);
__ vperm(dst, src0, src1, kScratchSimd128Reg);
__ I8x16Shuffle(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1), high, low, r0, ip,
kScratchSimd128Reg);
break;
}
case kPPC_I64x2BitMask: {

View File

@ -2391,7 +2391,26 @@ void LiftoffAssembler::emit_i8x16_shuffle(LiftoffRegister dst,
LiftoffRegister rhs,
const uint8_t shuffle[16],
bool is_swizzle) {
bailout(kSimd, "i8x16_shuffle");
// Remap the shuffle indices to match IBM lane numbering.
// TODO(miladfarca): Put this in a function and share it with the instruction
// selector.
int max_index = 15;
int total_lane_count = 2 * kSimd128Size;
uint8_t shuffle_remapped[kSimd128Size];
for (int i = 0; i < kSimd128Size; i++) {
uint8_t current_index = shuffle[i];
shuffle_remapped[i] = (current_index <= max_index
? max_index - current_index
: total_lane_count - current_index + max_index);
}
uint64_t vals[2];
memcpy(vals, shuffle_remapped, sizeof(shuffle_remapped));
#ifdef V8_TARGET_BIG_ENDIAN
vals[0] = ByteReverse(vals[0]);
vals[1] = ByteReverse(vals[1]);
#endif
I8x16Shuffle(dst.fp().toSimd(), lhs.fp().toSimd(), rhs.fp().toSimd(), vals[1],
vals[0], r0, ip, kScratchSimd128Reg);
}
void LiftoffAssembler::emit_v128_anytrue(LiftoffRegister dst,