S390 [liftoff]: Implement simd bitmask

Implementations are added to macro-assembler to be shared between
liftoff and code generator.

Change-Id: I6bde65dc50f1e52b8fbca150854e0b0863dff301
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3416190
Reviewed-by: Junliang Yan <junyan@redhat.com>
Commit-Queue: Milad Farazmand <mfarazma@redhat.com>
Cr-Commit-Position: refs/heads/main@{#78760}
This commit is contained in:
Milad Fa 2022-01-25 16:45:55 -05:00 committed by V8 LUCI CQ
parent 019be504c7
commit 83ad3be6fc
4 changed files with 54 additions and 34 deletions

View File

@ -5487,6 +5487,40 @@ void TurboAssembler::I8x16GeU(Simd128Register dst, Simd128Register src1,
vo(dst, dst, kScratchDoubleReg, Condition(0), Condition(0), Condition(0));
}
void TurboAssembler::I64x2BitMask(Register dst, Simd128Register src,
Register scratch1, Simd128Register scratch2) {
mov(scratch1, Operand(0x8080808080800040));
vlvg(scratch2, scratch1, MemOperand(r0, 1), Condition(3));
vbperm(scratch2, src, scratch2, Condition(0), Condition(0), Condition(0));
vlgv(dst, scratch2, MemOperand(r0, 7), Condition(0));
}
void TurboAssembler::I32x4BitMask(Register dst, Simd128Register src,
Register scratch1, Simd128Register scratch2) {
mov(scratch1, Operand(0x8080808000204060));
vlvg(scratch2, scratch1, MemOperand(r0, 1), Condition(3));
vbperm(scratch2, src, scratch2, Condition(0), Condition(0), Condition(0));
vlgv(dst, scratch2, MemOperand(r0, 7), Condition(0));
}
void TurboAssembler::I16x8BitMask(Register dst, Simd128Register src,
Register scratch1, Simd128Register scratch2) {
mov(scratch1, Operand(0x10203040506070));
vlvg(scratch2, scratch1, MemOperand(r0, 1), Condition(3));
vbperm(scratch2, src, scratch2, Condition(0), Condition(0), Condition(0));
vlgv(dst, scratch2, MemOperand(r0, 7), Condition(0));
}
void TurboAssembler::I8x16BitMask(Register dst, Simd128Register src,
Register scratch1, Register scratch2,
Simd128Register scratch3) {
mov(scratch1, Operand(0x4048505860687078));
mov(scratch2, Operand(0x8101820283038));
vlvgp(scratch3, scratch2, scratch1);
vbperm(scratch3, src, scratch3, Condition(0), Condition(0), Condition(0));
vlgv(dst, scratch3, MemOperand(r0, 3), Condition(1));
}
// Vector LE Load and Transform instructions.
#ifdef V8_TARGET_BIG_ENDIAN
#define IS_BIG_ENDIAN true

View File

@ -1105,6 +1105,14 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
Register src2, uint8_t imm_lane_idx);
void I8x16ReplaceLane(Simd128Register dst, Simd128Register src1,
Register src2, uint8_t imm_lane_idx);
void I64x2BitMask(Register dst, Simd128Register src, Register scratch1,
Simd128Register scratch2);
void I32x4BitMask(Register dst, Simd128Register src, Register scratch1,
Simd128Register scratch2);
void I16x8BitMask(Register dst, Simd128Register src, Register scratch1,
Simd128Register scratch2);
void I8x16BitMask(Register dst, Simd128Register src, Register scratch1,
Register scratch2, Simd128Register scratch3);
#define SIMD_UNOP_LIST(V) \
V(F64x2Abs) \

View File

@ -3136,45 +3136,23 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kS390_I64x2BitMask: {
__ mov(kScratchReg, Operand(0x80800040));
__ iihf(kScratchReg, Operand(0x80808080)); // Zeroing the high bits.
__ vlvg(kScratchDoubleReg, kScratchReg, MemOperand(r0, 1), Condition(3));
__ vbperm(kScratchDoubleReg, i.InputSimd128Register(0), kScratchDoubleReg,
Condition(0), Condition(0), Condition(0));
__ vlgv(i.OutputRegister(), kScratchDoubleReg, MemOperand(r0, 7),
Condition(0));
__ I64x2BitMask(i.OutputRegister(), i.InputSimd128Register(0),
kScratchReg, kScratchDoubleReg);
break;
}
case kS390_I32x4BitMask: {
__ mov(kScratchReg, Operand(0x204060));
__ iihf(kScratchReg, Operand(0x80808080)); // Zeroing the high bits.
__ vlvg(kScratchDoubleReg, kScratchReg, MemOperand(r0, 1), Condition(3));
__ vbperm(kScratchDoubleReg, i.InputSimd128Register(0), kScratchDoubleReg,
Condition(0), Condition(0), Condition(0));
__ vlgv(i.OutputRegister(), kScratchDoubleReg, MemOperand(r0, 7),
Condition(0));
__ I32x4BitMask(i.OutputRegister(), i.InputSimd128Register(0),
kScratchReg, kScratchDoubleReg);
break;
}
case kS390_I16x8BitMask: {
__ mov(kScratchReg, Operand(0x40506070));
__ iihf(kScratchReg, Operand(0x102030));
__ vlvg(kScratchDoubleReg, kScratchReg, MemOperand(r0, 1), Condition(3));
__ vbperm(kScratchDoubleReg, i.InputSimd128Register(0), kScratchDoubleReg,
Condition(0), Condition(0), Condition(0));
__ vlgv(i.OutputRegister(), kScratchDoubleReg, MemOperand(r0, 7),
Condition(0));
__ I16x8BitMask(i.OutputRegister(), i.InputSimd128Register(0),
kScratchReg, kScratchDoubleReg);
break;
}
case kS390_I8x16BitMask: {
__ mov(r0, Operand(0x60687078));
__ iihf(r0, Operand(0x40485058));
__ mov(ip, Operand(0x20283038));
__ iihf(ip, Operand(0x81018));
__ vlvgp(kScratchDoubleReg, ip, r0);
__ vbperm(kScratchDoubleReg, i.InputSimd128Register(0), kScratchDoubleReg,
Condition(0), Condition(0), Condition(0));
__ vlgv(i.OutputRegister(), kScratchDoubleReg, MemOperand(r0, 3),
Condition(1));
__ I8x16BitMask(i.OutputRegister(), i.InputSimd128Register(0), r0, ip,
kScratchDoubleReg);
break;
}
case kS390_I32x4DotI16x8S: {

View File

@ -2530,7 +2530,7 @@ void LiftoffAssembler::emit_i64x2_extmul_high_i32x4_s(LiftoffRegister dst,
void LiftoffAssembler::emit_i64x2_bitmask(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "i64x2_bitmask");
I64x2BitMask(dst.gp(), src.fp(), r0, kScratchDoubleReg);
}
void LiftoffAssembler::emit_i64x2_sconvert_i32x4_low(LiftoffRegister dst,
@ -2566,7 +2566,7 @@ void LiftoffAssembler::emit_i32x4_alltrue(LiftoffRegister dst,
void LiftoffAssembler::emit_i32x4_bitmask(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "i32x4_bitmask");
I32x4BitMask(dst.gp(), src.fp(), r0, kScratchDoubleReg);
}
void LiftoffAssembler::emit_i32x4_dot_i16x8_s(LiftoffRegister dst,
@ -2616,7 +2616,7 @@ void LiftoffAssembler::emit_i16x8_alltrue(LiftoffRegister dst,
void LiftoffAssembler::emit_i16x8_bitmask(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "i16x8_bitmask");
I16x8BitMask(dst.gp(), src.fp(), r0, kScratchDoubleReg);
}
void LiftoffAssembler::emit_i16x8_add_sat_s(LiftoffRegister dst,
@ -2708,7 +2708,7 @@ void LiftoffAssembler::emit_i8x16_alltrue(LiftoffRegister dst,
void LiftoffAssembler::emit_i8x16_bitmask(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "i8x16_bitmask");
I8x16BitMask(dst.gp(), src.fp(), r0, ip, kScratchDoubleReg);
}
void LiftoffAssembler::emit_i8x16_add_sat_s(LiftoffRegister dst,