S390: [wasm-simd] Prototype extended pairwise addition

Bug: v8:11086
Change-Id: Ic59e270282b5b7f3d2f8e8b46586964c69e4447a
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2618289
Reviewed-by: Junliang Yan <junyan@redhat.com>
Commit-Queue: Milad Fa <mfarazma@redhat.com>
Cr-Commit-Position: refs/heads/master@{#71991}
This commit is contained in:
Milad Fa 2021-01-08 15:30:24 -05:00 committed by Commit Bot
parent 4d90b88285
commit ff0b78bbfd
4 changed files with 86 additions and 22 deletions

View File

@ -4187,6 +4187,40 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
#undef ASSEMBLE_SIMD_I32X4_I16X8_EXT_MUL
#define EXT_ADD_PAIRWISE(lane_size, mul_even, mul_odd) \
Simd128Register src = i.InputSimd128Register(0); \
Simd128Register dst = i.OutputSimd128Register(); \
Simd128Register tempFPReg1 = i.ToSimd128Register(instr->TempAt(0)); \
__ vrepi(kScratchDoubleReg, Operand(1), Condition(lane_size)); \
__ mul_even(tempFPReg1, src, kScratchDoubleReg, Condition(0), Condition(0), \
Condition(lane_size)); \
__ mul_odd(kScratchDoubleReg, src, kScratchDoubleReg, Condition(0), \
Condition(0), Condition(lane_size)); \
__ va(dst, tempFPReg1, kScratchDoubleReg, Condition(0), Condition(0), \
Condition(lane_size + 1));
case kS390_I32x4ExtAddPairwiseI16x8S: {
EXT_ADD_PAIRWISE(1, vme, vmo)
break;
}
case kS390_I32x4ExtAddPairwiseI16x8U: {
Simd128Register src0 = i.InputSimd128Register(0);
Simd128Register dst = i.OutputSimd128Register();
__ vx(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg,
Condition(0), Condition(0), Condition(3));
__ vsum(dst, src0, kScratchDoubleReg, Condition(0), Condition(0),
Condition(1));
break;
}
case kS390_I16x8ExtAddPairwiseI8x16S: {
EXT_ADD_PAIRWISE(0, vme, vmo)
break;
}
case kS390_I16x8ExtAddPairwiseI8x16U: {
EXT_ADD_PAIRWISE(0, vmle, vmlo)
break;
}
#undef EXT_ADD_PAIRWISE
case kS390_StoreCompressTagged: {
CHECK(!instr->HasOutput());
size_t index = 0;

View File

@ -294,6 +294,8 @@ namespace compiler {
V(S390_I32x4ExtMulHighI16x8S) \
V(S390_I32x4ExtMulLowI16x8U) \
V(S390_I32x4ExtMulHighI16x8U) \
V(S390_I32x4ExtAddPairwiseI16x8S) \
V(S390_I32x4ExtAddPairwiseI16x8U) \
V(S390_I16x8Splat) \
V(S390_I16x8ExtractLaneU) \
V(S390_I16x8ExtractLaneS) \
@ -333,6 +335,8 @@ namespace compiler {
V(S390_I16x8ExtMulHighI8x16S) \
V(S390_I16x8ExtMulLowI8x16U) \
V(S390_I16x8ExtMulHighI8x16U) \
V(S390_I16x8ExtAddPairwiseI8x16S) \
V(S390_I16x8ExtAddPairwiseI8x16U) \
V(S390_I8x16Splat) \
V(S390_I8x16ExtractLaneU) \
V(S390_I8x16ExtractLaneS) \

View File

@ -240,6 +240,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kS390_I32x4ExtMulHighI16x8S:
case kS390_I32x4ExtMulLowI16x8U:
case kS390_I32x4ExtMulHighI16x8U:
case kS390_I32x4ExtAddPairwiseI16x8S:
case kS390_I32x4ExtAddPairwiseI16x8U:
case kS390_I16x8Splat:
case kS390_I16x8ExtractLaneU:
case kS390_I16x8ExtractLaneS:
@ -279,6 +281,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kS390_I16x8ExtMulHighI8x16S:
case kS390_I16x8ExtMulLowI8x16U:
case kS390_I16x8ExtMulHighI8x16U:
case kS390_I16x8ExtAddPairwiseI8x16S:
case kS390_I16x8ExtAddPairwiseI8x16U:
case kS390_I8x16Splat:
case kS390_I8x16ExtractLaneU:
case kS390_I8x16ExtractLaneS:

View File

@ -765,7 +765,9 @@ void Simulator::EvalTableInit() {
V(vs, VS, 0xE7F7) /* type = VRR_C VECTOR SUBTRACT */ \
V(vml, VML, 0xE7A2) /* type = VRR_C VECTOR MULTIPLY LOW */ \
V(vme, VME, 0xE7A6) /* type = VRR_C VECTOR MULTIPLY EVEN */ \
V(vmle, VMLE, 0xE7A4) /* type = VRR_C VECTOR MULTIPLY EVEN LOGICAL */ \
V(vmo, VMO, 0xE7A7) /* type = VRR_C VECTOR MULTIPLY ODD */ \
V(vmlo, VMLO, 0xE7A75) /* type = VRR_C VECTOR MULTIPLY LOGICAL ODD */ \
V(vnc, VNC, 0xE769) /* type = VRR_C VECTOR AND WITH COMPLEMENT */ \
V(vsum, VSUM, 0xE764) /* type = VRR_C VECTOR SUM ACROSS WORD */ \
V(vsumg, VSUMG, 0xE765) /* type = VRR_C VECTOR SUM ACROSS DOUBLEWORD */ \
@ -3220,29 +3222,32 @@ EVALUATE(VML) {
input_type src1 = get_simd_register_by_lane<input_type>(r3, i); \
set_simd_register_by_lane<result_type>(r1, k, src0 * src1); \
}
#define VECTOR_MULTIPLY_EVEN_ODD(r1, r2, r3, is_odd) \
switch (m4) { \
case 0: { \
VECTOR_MULTIPLY_EVEN_ODD_TYPE(r1, r2, r3, int8_t, int16_t, is_odd) \
break; \
} \
case 1: { \
VECTOR_MULTIPLY_EVEN_ODD_TYPE(r1, r2, r3, int16_t, int32_t, is_odd) \
break; \
} \
case 2: { \
VECTOR_MULTIPLY_EVEN_ODD_TYPE(r1, r2, r3, int32_t, int64_t, is_odd) \
break; \
} \
default: \
UNREACHABLE(); \
#define VECTOR_MULTIPLY_EVEN_ODD(r1, r2, r3, is_odd, sign) \
switch (m4) { \
case 0: { \
VECTOR_MULTIPLY_EVEN_ODD_TYPE(r1, r2, r3, sign##int8_t, sign##int16_t, \
is_odd) \
break; \
} \
case 1: { \
VECTOR_MULTIPLY_EVEN_ODD_TYPE(r1, r2, r3, sign##int16_t, sign##int32_t, \
is_odd) \
break; \
} \
case 2: { \
VECTOR_MULTIPLY_EVEN_ODD_TYPE(r1, r2, r3, sign##int32_t, sign##int64_t, \
is_odd) \
break; \
} \
default: \
UNREACHABLE(); \
}
EVALUATE(VME) {
DCHECK_OPCODE(VME);
DECODE_VRR_C_INSTRUCTION(r1, r2, r3, m6, m5, m4);
USE(m5);
USE(m6);
VECTOR_MULTIPLY_EVEN_ODD(r1, r2, r3, false)
VECTOR_MULTIPLY_EVEN_ODD(r1, r2, r3, false, )
return length;
}
@ -3251,7 +3256,24 @@ EVALUATE(VMO) {
DECODE_VRR_C_INSTRUCTION(r1, r2, r3, m6, m5, m4);
USE(m5);
USE(m6);
VECTOR_MULTIPLY_EVEN_ODD(r1, r2, r3, true)
VECTOR_MULTIPLY_EVEN_ODD(r1, r2, r3, true, )
return length;
}
EVALUATE(VMLE) {
DCHECK_OPCODE(VMLE);
DECODE_VRR_C_INSTRUCTION(r1, r2, r3, m6, m5, m4);
USE(m5);
USE(m6);
VECTOR_MULTIPLY_EVEN_ODD(r1, r2, r3, false, u)
return length;
}
EVALUATE(VMLO) {
DCHECK_OPCODE(VMLO);
DECODE_VRR_C_INSTRUCTION(r1, r2, r3, m6, m5, m4);
USE(m5);
USE(m6);
VECTOR_MULTIPLY_EVEN_ODD(r1, r2, r3, true, u)
return length;
}
#undef VECTOR_MULTIPLY_EVEN_ODD
@ -3295,8 +3317,8 @@ EVALUATE(VSUM) {
USE(m6);
USE(m5);
switch (m4) {
CASE(1, int8_t, int32_t);
CASE(2, int16_t, int32_t);
CASE(0, uint8_t, uint32_t);
CASE(1, uint16_t, uint32_t);
default:
UNREACHABLE();
}
@ -3309,8 +3331,8 @@ EVALUATE(VSUMG) {
USE(m6);
USE(m5);
switch (m4) {
CASE(1, int16_t, int64_t);
CASE(2, int32_t, int64_t);
CASE(1, uint16_t, uint64_t);
CASE(2, uint32_t, uint64_t);
default:
UNREACHABLE();
}