S390 [liftoff]: Implement simd bitmask
Implementations are added to macro-assembler to be shared between liftoff and code generator. Change-Id: I6bde65dc50f1e52b8fbca150854e0b0863dff301 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3416190 Reviewed-by: Junliang Yan <junyan@redhat.com> Commit-Queue: Milad Farazmand <mfarazma@redhat.com> Cr-Commit-Position: refs/heads/main@{#78760}
This commit is contained in:
parent
019be504c7
commit
83ad3be6fc
@ -5487,6 +5487,40 @@ void TurboAssembler::I8x16GeU(Simd128Register dst, Simd128Register src1,
|
||||
vo(dst, dst, kScratchDoubleReg, Condition(0), Condition(0), Condition(0));
|
||||
}
|
||||
|
||||
void TurboAssembler::I64x2BitMask(Register dst, Simd128Register src,
|
||||
Register scratch1, Simd128Register scratch2) {
|
||||
mov(scratch1, Operand(0x8080808080800040));
|
||||
vlvg(scratch2, scratch1, MemOperand(r0, 1), Condition(3));
|
||||
vbperm(scratch2, src, scratch2, Condition(0), Condition(0), Condition(0));
|
||||
vlgv(dst, scratch2, MemOperand(r0, 7), Condition(0));
|
||||
}
|
||||
|
||||
void TurboAssembler::I32x4BitMask(Register dst, Simd128Register src,
|
||||
Register scratch1, Simd128Register scratch2) {
|
||||
mov(scratch1, Operand(0x8080808000204060));
|
||||
vlvg(scratch2, scratch1, MemOperand(r0, 1), Condition(3));
|
||||
vbperm(scratch2, src, scratch2, Condition(0), Condition(0), Condition(0));
|
||||
vlgv(dst, scratch2, MemOperand(r0, 7), Condition(0));
|
||||
}
|
||||
|
||||
void TurboAssembler::I16x8BitMask(Register dst, Simd128Register src,
|
||||
Register scratch1, Simd128Register scratch2) {
|
||||
mov(scratch1, Operand(0x10203040506070));
|
||||
vlvg(scratch2, scratch1, MemOperand(r0, 1), Condition(3));
|
||||
vbperm(scratch2, src, scratch2, Condition(0), Condition(0), Condition(0));
|
||||
vlgv(dst, scratch2, MemOperand(r0, 7), Condition(0));
|
||||
}
|
||||
|
||||
void TurboAssembler::I8x16BitMask(Register dst, Simd128Register src,
|
||||
Register scratch1, Register scratch2,
|
||||
Simd128Register scratch3) {
|
||||
mov(scratch1, Operand(0x4048505860687078));
|
||||
mov(scratch2, Operand(0x8101820283038));
|
||||
vlvgp(scratch3, scratch2, scratch1);
|
||||
vbperm(scratch3, src, scratch3, Condition(0), Condition(0), Condition(0));
|
||||
vlgv(dst, scratch3, MemOperand(r0, 3), Condition(1));
|
||||
}
|
||||
|
||||
// Vector LE Load and Transform instructions.
|
||||
#ifdef V8_TARGET_BIG_ENDIAN
|
||||
#define IS_BIG_ENDIAN true
|
||||
|
@ -1105,6 +1105,14 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
|
||||
Register src2, uint8_t imm_lane_idx);
|
||||
void I8x16ReplaceLane(Simd128Register dst, Simd128Register src1,
|
||||
Register src2, uint8_t imm_lane_idx);
|
||||
void I64x2BitMask(Register dst, Simd128Register src, Register scratch1,
|
||||
Simd128Register scratch2);
|
||||
void I32x4BitMask(Register dst, Simd128Register src, Register scratch1,
|
||||
Simd128Register scratch2);
|
||||
void I16x8BitMask(Register dst, Simd128Register src, Register scratch1,
|
||||
Simd128Register scratch2);
|
||||
void I8x16BitMask(Register dst, Simd128Register src, Register scratch1,
|
||||
Register scratch2, Simd128Register scratch3);
|
||||
|
||||
#define SIMD_UNOP_LIST(V) \
|
||||
V(F64x2Abs) \
|
||||
|
@ -3136,45 +3136,23 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
break;
|
||||
}
|
||||
case kS390_I64x2BitMask: {
|
||||
__ mov(kScratchReg, Operand(0x80800040));
|
||||
__ iihf(kScratchReg, Operand(0x80808080)); // Zeroing the high bits.
|
||||
__ vlvg(kScratchDoubleReg, kScratchReg, MemOperand(r0, 1), Condition(3));
|
||||
__ vbperm(kScratchDoubleReg, i.InputSimd128Register(0), kScratchDoubleReg,
|
||||
Condition(0), Condition(0), Condition(0));
|
||||
__ vlgv(i.OutputRegister(), kScratchDoubleReg, MemOperand(r0, 7),
|
||||
Condition(0));
|
||||
__ I64x2BitMask(i.OutputRegister(), i.InputSimd128Register(0),
|
||||
kScratchReg, kScratchDoubleReg);
|
||||
break;
|
||||
}
|
||||
case kS390_I32x4BitMask: {
|
||||
__ mov(kScratchReg, Operand(0x204060));
|
||||
__ iihf(kScratchReg, Operand(0x80808080)); // Zeroing the high bits.
|
||||
__ vlvg(kScratchDoubleReg, kScratchReg, MemOperand(r0, 1), Condition(3));
|
||||
__ vbperm(kScratchDoubleReg, i.InputSimd128Register(0), kScratchDoubleReg,
|
||||
Condition(0), Condition(0), Condition(0));
|
||||
__ vlgv(i.OutputRegister(), kScratchDoubleReg, MemOperand(r0, 7),
|
||||
Condition(0));
|
||||
__ I32x4BitMask(i.OutputRegister(), i.InputSimd128Register(0),
|
||||
kScratchReg, kScratchDoubleReg);
|
||||
break;
|
||||
}
|
||||
case kS390_I16x8BitMask: {
|
||||
__ mov(kScratchReg, Operand(0x40506070));
|
||||
__ iihf(kScratchReg, Operand(0x102030));
|
||||
__ vlvg(kScratchDoubleReg, kScratchReg, MemOperand(r0, 1), Condition(3));
|
||||
__ vbperm(kScratchDoubleReg, i.InputSimd128Register(0), kScratchDoubleReg,
|
||||
Condition(0), Condition(0), Condition(0));
|
||||
__ vlgv(i.OutputRegister(), kScratchDoubleReg, MemOperand(r0, 7),
|
||||
Condition(0));
|
||||
__ I16x8BitMask(i.OutputRegister(), i.InputSimd128Register(0),
|
||||
kScratchReg, kScratchDoubleReg);
|
||||
break;
|
||||
}
|
||||
case kS390_I8x16BitMask: {
|
||||
__ mov(r0, Operand(0x60687078));
|
||||
__ iihf(r0, Operand(0x40485058));
|
||||
__ mov(ip, Operand(0x20283038));
|
||||
__ iihf(ip, Operand(0x81018));
|
||||
__ vlvgp(kScratchDoubleReg, ip, r0);
|
||||
__ vbperm(kScratchDoubleReg, i.InputSimd128Register(0), kScratchDoubleReg,
|
||||
Condition(0), Condition(0), Condition(0));
|
||||
__ vlgv(i.OutputRegister(), kScratchDoubleReg, MemOperand(r0, 3),
|
||||
Condition(1));
|
||||
__ I8x16BitMask(i.OutputRegister(), i.InputSimd128Register(0), r0, ip,
|
||||
kScratchDoubleReg);
|
||||
break;
|
||||
}
|
||||
case kS390_I32x4DotI16x8S: {
|
||||
|
@ -2530,7 +2530,7 @@ void LiftoffAssembler::emit_i64x2_extmul_high_i32x4_s(LiftoffRegister dst,
|
||||
|
||||
void LiftoffAssembler::emit_i64x2_bitmask(LiftoffRegister dst,
|
||||
LiftoffRegister src) {
|
||||
bailout(kSimd, "i64x2_bitmask");
|
||||
I64x2BitMask(dst.gp(), src.fp(), r0, kScratchDoubleReg);
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_i64x2_sconvert_i32x4_low(LiftoffRegister dst,
|
||||
@ -2566,7 +2566,7 @@ void LiftoffAssembler::emit_i32x4_alltrue(LiftoffRegister dst,
|
||||
|
||||
void LiftoffAssembler::emit_i32x4_bitmask(LiftoffRegister dst,
|
||||
LiftoffRegister src) {
|
||||
bailout(kSimd, "i32x4_bitmask");
|
||||
I32x4BitMask(dst.gp(), src.fp(), r0, kScratchDoubleReg);
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_i32x4_dot_i16x8_s(LiftoffRegister dst,
|
||||
@ -2616,7 +2616,7 @@ void LiftoffAssembler::emit_i16x8_alltrue(LiftoffRegister dst,
|
||||
|
||||
void LiftoffAssembler::emit_i16x8_bitmask(LiftoffRegister dst,
|
||||
LiftoffRegister src) {
|
||||
bailout(kSimd, "i16x8_bitmask");
|
||||
I16x8BitMask(dst.gp(), src.fp(), r0, kScratchDoubleReg);
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_i16x8_add_sat_s(LiftoffRegister dst,
|
||||
@ -2708,7 +2708,7 @@ void LiftoffAssembler::emit_i8x16_alltrue(LiftoffRegister dst,
|
||||
|
||||
void LiftoffAssembler::emit_i8x16_bitmask(LiftoffRegister dst,
|
||||
LiftoffRegister src) {
|
||||
bailout(kSimd, "i8x16_bitmask");
|
||||
I8x16BitMask(dst.gp(), src.fp(), r0, ip, kScratchDoubleReg);
|
||||
}
|
||||
|
||||
void LiftoffAssembler::emit_i8x16_add_sat_s(LiftoffRegister dst,
|
||||
|
Loading…
Reference in New Issue
Block a user