[wasm simd] Handle more shuffles
- Shuffle canonicalization improved to reverse operands to match more architectural shuffles. - Handles shuffles where the order of operands is reversed. - Adds tests for non-canonical shuffles, and for swizzles. - Improves TryMatchConcat method. - Substantially rewrites shuffles on ia32 to better handle swizzles and fix bugs on reversed shuffles where source registers are overwritten. - Adds Palignr macro-assembler instructions for ia32. Bug: v8:6020 Change-Id: I8e43a1e7650057c66690af1504b67509a1437d75 Reviewed-on: https://chromium-review.googlesource.com/1070934 Commit-Queue: Bill Budge <bbudge@chromium.org> Reviewed-by: Deepti Gandluri <gdeepti@chromium.org> Reviewed-by: Martyn Capewell <martyn.capewell@arm.com> Reviewed-by: Jaroslav Sevcik <jarin@chromium.org> Cr-Commit-Position: refs/heads/master@{#53648}
This commit is contained in:
parent
43886bc305
commit
bcb4fbd4ad
@ -2445,7 +2445,9 @@ static const ShuffleEntry arch_shuffles[] = {
|
||||
{{1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14}, kArmS8x2Reverse}};
|
||||
|
||||
bool TryMatchArchShuffle(const uint8_t* shuffle, const ShuffleEntry* table,
|
||||
size_t num_entries, uint8_t mask, ArchOpcode* opcode) {
|
||||
size_t num_entries, bool is_swizzle,
|
||||
ArchOpcode* opcode) {
|
||||
uint8_t mask = is_swizzle ? kSimd128Size - 1 : 2 * kSimd128Size - 1;
|
||||
for (size_t i = 0; i < num_entries; ++i) {
|
||||
const ShuffleEntry& entry = table[i];
|
||||
int j = 0;
|
||||
@ -2477,48 +2479,48 @@ void ArrangeShuffleTable(ArmOperandGenerator* g, Node* input0, Node* input1,
|
||||
} // namespace
|
||||
|
||||
void InstructionSelector::VisitS8x16Shuffle(Node* node) {
|
||||
const uint8_t* shuffle = OpParameter<uint8_t*>(node->op());
|
||||
uint8_t mask = CanonicalizeShuffle(node);
|
||||
uint8_t shuffle[kSimd128Size];
|
||||
bool is_swizzle;
|
||||
CanonicalizeShuffle(node, shuffle, &is_swizzle);
|
||||
Node* input0 = node->InputAt(0);
|
||||
Node* input1 = node->InputAt(1);
|
||||
uint8_t shuffle32x4[4];
|
||||
ArmOperandGenerator g(this);
|
||||
int index = 0;
|
||||
if (TryMatch32x4Shuffle(shuffle, shuffle32x4)) {
|
||||
if (TryMatchDup<4>(shuffle, &index)) {
|
||||
InstructionOperand src = index < 4 ? g.UseRegister(node->InputAt(0))
|
||||
: g.UseRegister(node->InputAt(1));
|
||||
InstructionOperand src =
|
||||
index < 4 ? g.UseRegister(input0) : g.UseRegister(input1);
|
||||
Emit(kArmS128Dup, g.DefineAsRegister(node), src, g.UseImmediate(Neon32),
|
||||
g.UseImmediate(index % 4));
|
||||
} else {
|
||||
Emit(kArmS32x4Shuffle, g.DefineAsRegister(node),
|
||||
g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)),
|
||||
g.UseImmediate(Pack4Lanes(shuffle32x4, mask)));
|
||||
Emit(kArmS32x4Shuffle, g.DefineAsRegister(node), g.UseRegister(input0),
|
||||
g.UseRegister(input1), g.UseImmediate(Pack4Lanes(shuffle32x4)));
|
||||
}
|
||||
return;
|
||||
}
|
||||
if (TryMatchDup<8>(shuffle, &index)) {
|
||||
InstructionOperand src = index < 8 ? g.UseRegister(node->InputAt(0))
|
||||
: g.UseRegister(node->InputAt(1));
|
||||
InstructionOperand src =
|
||||
index < 8 ? g.UseRegister(input0) : g.UseRegister(input1);
|
||||
Emit(kArmS128Dup, g.DefineAsRegister(node), src, g.UseImmediate(Neon16),
|
||||
g.UseImmediate(index % 8));
|
||||
return;
|
||||
}
|
||||
if (TryMatchDup<16>(shuffle, &index)) {
|
||||
InstructionOperand src = index < 16 ? g.UseRegister(node->InputAt(0))
|
||||
: g.UseRegister(node->InputAt(1));
|
||||
InstructionOperand src =
|
||||
index < 16 ? g.UseRegister(input0) : g.UseRegister(input1);
|
||||
Emit(kArmS128Dup, g.DefineAsRegister(node), src, g.UseImmediate(Neon8),
|
||||
g.UseImmediate(index % 16));
|
||||
return;
|
||||
}
|
||||
ArchOpcode opcode;
|
||||
if (TryMatchArchShuffle(shuffle, arch_shuffles, arraysize(arch_shuffles),
|
||||
mask, &opcode)) {
|
||||
is_swizzle, &opcode)) {
|
||||
VisitRRRShuffle(this, opcode, node);
|
||||
return;
|
||||
}
|
||||
Node* input0 = node->InputAt(0);
|
||||
Node* input1 = node->InputAt(1);
|
||||
uint8_t offset;
|
||||
if (TryMatchConcat(shuffle, mask, &offset)) {
|
||||
if (TryMatchConcat(shuffle, &offset)) {
|
||||
Emit(kArmS8x16Concat, g.DefineAsRegister(node), g.UseRegister(input0),
|
||||
g.UseRegister(input1), g.UseImmediate(offset));
|
||||
return;
|
||||
@ -2527,10 +2529,10 @@ void InstructionSelector::VisitS8x16Shuffle(Node* node) {
|
||||
InstructionOperand src0, src1;
|
||||
ArrangeShuffleTable(&g, input0, input1, &src0, &src1);
|
||||
Emit(kArmS8x16Shuffle, g.DefineAsRegister(node), src0, src1,
|
||||
g.UseImmediate(Pack4Lanes(shuffle, mask)),
|
||||
g.UseImmediate(Pack4Lanes(shuffle + 4, mask)),
|
||||
g.UseImmediate(Pack4Lanes(shuffle + 8, mask)),
|
||||
g.UseImmediate(Pack4Lanes(shuffle + 12, mask)));
|
||||
g.UseImmediate(Pack4Lanes(shuffle)),
|
||||
g.UseImmediate(Pack4Lanes(shuffle + 4)),
|
||||
g.UseImmediate(Pack4Lanes(shuffle + 8)),
|
||||
g.UseImmediate(Pack4Lanes(shuffle + 12)));
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitSignExtendWord8ToInt32(Node* node) {
|
||||
|
@ -3088,7 +3088,9 @@ static const ShuffleEntry arch_shuffles[] = {
|
||||
kArm64S8x2Reverse}};
|
||||
|
||||
bool TryMatchArchShuffle(const uint8_t* shuffle, const ShuffleEntry* table,
|
||||
size_t num_entries, uint8_t mask, ArchOpcode* opcode) {
|
||||
size_t num_entries, bool is_swizzle,
|
||||
ArchOpcode* opcode) {
|
||||
uint8_t mask = is_swizzle ? kSimd128Size - 1 : 2 * kSimd128Size - 1;
|
||||
for (size_t i = 0; i < num_entries; i++) {
|
||||
const ShuffleEntry& entry = table[i];
|
||||
int j = 0;
|
||||
@ -3120,48 +3122,48 @@ void ArrangeShuffleTable(Arm64OperandGenerator* g, Node* input0, Node* input1,
|
||||
} // namespace
|
||||
|
||||
void InstructionSelector::VisitS8x16Shuffle(Node* node) {
|
||||
const uint8_t* shuffle = OpParameter<uint8_t*>(node->op());
|
||||
uint8_t mask = CanonicalizeShuffle(node);
|
||||
uint8_t shuffle[kSimd128Size];
|
||||
bool is_swizzle;
|
||||
CanonicalizeShuffle(node, shuffle, &is_swizzle);
|
||||
uint8_t shuffle32x4[4];
|
||||
Arm64OperandGenerator g(this);
|
||||
ArchOpcode opcode;
|
||||
if (TryMatchArchShuffle(shuffle, arch_shuffles, arraysize(arch_shuffles),
|
||||
mask, &opcode)) {
|
||||
is_swizzle, &opcode)) {
|
||||
VisitRRR(this, opcode, node);
|
||||
return;
|
||||
}
|
||||
Node* input0 = node->InputAt(0);
|
||||
Node* input1 = node->InputAt(1);
|
||||
uint8_t bias;
|
||||
if (TryMatchConcat(shuffle, mask, &bias)) {
|
||||
uint8_t offset;
|
||||
if (TryMatchConcat(shuffle, &offset)) {
|
||||
Emit(kArm64S8x16Concat, g.DefineAsRegister(node), g.UseRegister(input0),
|
||||
g.UseRegister(input1), g.UseImmediate(bias));
|
||||
g.UseRegister(input1), g.UseImmediate(offset));
|
||||
return;
|
||||
}
|
||||
int index = 0;
|
||||
if (TryMatch32x4Shuffle(shuffle, shuffle32x4)) {
|
||||
if (TryMatchDup<4>(shuffle, &index)) {
|
||||
InstructionOperand src = index < 4 ? g.UseRegister(node->InputAt(0))
|
||||
: g.UseRegister(node->InputAt(1));
|
||||
InstructionOperand src =
|
||||
index < 4 ? g.UseRegister(input0) : g.UseRegister(input1);
|
||||
Emit(kArm64S128Dup, g.DefineAsRegister(node), src, g.UseImmediate(4),
|
||||
g.UseImmediate(index % 4));
|
||||
} else {
|
||||
Emit(kArm64S32x4Shuffle, g.DefineAsRegister(node),
|
||||
g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)),
|
||||
g.UseImmediate(Pack4Lanes(shuffle32x4, mask)));
|
||||
Emit(kArm64S32x4Shuffle, g.DefineAsRegister(node), g.UseRegister(input0),
|
||||
g.UseRegister(input1), g.UseImmediate(Pack4Lanes(shuffle32x4)));
|
||||
}
|
||||
return;
|
||||
}
|
||||
if (TryMatchDup<8>(shuffle, &index)) {
|
||||
InstructionOperand src = index < 8 ? g.UseRegister(node->InputAt(0))
|
||||
: g.UseRegister(node->InputAt(1));
|
||||
InstructionOperand src =
|
||||
index < 8 ? g.UseRegister(input0) : g.UseRegister(input1);
|
||||
Emit(kArm64S128Dup, g.DefineAsRegister(node), src, g.UseImmediate(8),
|
||||
g.UseImmediate(index % 8));
|
||||
return;
|
||||
}
|
||||
if (TryMatchDup<16>(shuffle, &index)) {
|
||||
InstructionOperand src = index < 16 ? g.UseRegister(node->InputAt(0))
|
||||
: g.UseRegister(node->InputAt(1));
|
||||
InstructionOperand src =
|
||||
index < 16 ? g.UseRegister(input0) : g.UseRegister(input1);
|
||||
Emit(kArm64S128Dup, g.DefineAsRegister(node), src, g.UseImmediate(16),
|
||||
g.UseImmediate(index % 16));
|
||||
return;
|
||||
@ -3170,10 +3172,10 @@ void InstructionSelector::VisitS8x16Shuffle(Node* node) {
|
||||
InstructionOperand src0, src1;
|
||||
ArrangeShuffleTable(&g, input0, input1, &src0, &src1);
|
||||
Emit(kArm64S8x16Shuffle, g.DefineAsRegister(node), src0, src1,
|
||||
g.UseImmediate(Pack4Lanes(shuffle, mask)),
|
||||
g.UseImmediate(Pack4Lanes(shuffle + 4, mask)),
|
||||
g.UseImmediate(Pack4Lanes(shuffle + 8, mask)),
|
||||
g.UseImmediate(Pack4Lanes(shuffle + 12, mask)));
|
||||
g.UseImmediate(Pack4Lanes(shuffle)),
|
||||
g.UseImmediate(Pack4Lanes(shuffle + 4)),
|
||||
g.UseImmediate(Pack4Lanes(shuffle + 8)),
|
||||
g.UseImmediate(Pack4Lanes(shuffle + 12)));
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitSignExtendWord8ToInt32(Node* node) {
|
||||
|
@ -3139,13 +3139,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
Operand src0 = i.InputOperand(0);
|
||||
Register tmp = i.TempRegister(0);
|
||||
if (!src0.is_reg(dst)) {
|
||||
__ movups(dst, src0);
|
||||
}
|
||||
// Prepare 16-byte boundary buffer for shuffle control mask
|
||||
// Prepare 16 byte aligned buffer for shuffle control mask
|
||||
__ mov(tmp, esp);
|
||||
__ and_(esp, -16);
|
||||
if (instr->InputCount() == 5) { // only one input operand
|
||||
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
|
||||
for (int j = 4; j > 0; j--) {
|
||||
uint32_t mask = i.InputUint32(j);
|
||||
__ push(Immediate(mask));
|
||||
@ -3153,6 +3151,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
__ Pshufb(dst, Operand(esp, 0));
|
||||
} else { // two input operands
|
||||
DCHECK_EQ(6, instr->InputCount());
|
||||
__ movups(kScratchDoubleReg, src0);
|
||||
for (int j = 5; j > 1; j--) {
|
||||
uint32_t lanes = i.InputUint32(j);
|
||||
uint32_t mask = 0;
|
||||
@ -3162,8 +3161,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
}
|
||||
__ push(Immediate(mask));
|
||||
}
|
||||
__ Pshufb(dst, Operand(esp, 0));
|
||||
__ movups(kScratchDoubleReg, i.InputOperand(1));
|
||||
__ Pshufb(kScratchDoubleReg, Operand(esp, 0));
|
||||
Operand src1 = i.InputOperand(1);
|
||||
if (!src1.is_reg(dst)) __ movups(dst, src1);
|
||||
for (int j = 5; j > 1; j--) {
|
||||
uint32_t lanes = i.InputUint32(j);
|
||||
uint32_t mask = 0;
|
||||
@ -3173,74 +3173,55 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
}
|
||||
__ push(Immediate(mask));
|
||||
}
|
||||
__ Pshufb(kScratchDoubleReg, Operand(esp, 0));
|
||||
__ Pshufb(dst, Operand(esp, 0));
|
||||
__ por(dst, kScratchDoubleReg);
|
||||
}
|
||||
__ mov(esp, tmp);
|
||||
break;
|
||||
}
|
||||
case kIA32S32x4Swizzle: {
|
||||
__ Pshufd(i.OutputSimd128Register(), i.InputOperand(0), i.InputInt8(2));
|
||||
DCHECK_EQ(2, instr->InputCount());
|
||||
__ Pshufd(i.OutputSimd128Register(), i.InputOperand(0), i.InputInt8(1));
|
||||
break;
|
||||
}
|
||||
case kIA32S32x4Shuffle: {
|
||||
DCHECK_EQ(4, instr->InputCount()); // Swizzles should be handled above.
|
||||
__ Pshufd(i.OutputSimd128Register(), i.InputOperand(0), i.InputInt8(2));
|
||||
__ Pshufd(kScratchDoubleReg, i.InputOperand(1), i.InputInt8(2));
|
||||
__ Pshufd(i.OutputSimd128Register(), i.InputOperand(0), i.InputInt8(2));
|
||||
__ Pblendw(i.OutputSimd128Register(), kScratchDoubleReg, i.InputInt8(3));
|
||||
break;
|
||||
}
|
||||
case kSSES16x8Blend: {
|
||||
CpuFeatureScope sse_scope(tasm(), SSSE3);
|
||||
if (instr->InputCount() == 2) {
|
||||
// swizzle
|
||||
__ pblendw(i.OutputSimd128Register(), i.InputOperand(0),
|
||||
i.InputInt8(1));
|
||||
} else {
|
||||
// shuffle
|
||||
DCHECK_EQ(3, instr->InputCount());
|
||||
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
|
||||
__ pblendw(i.OutputSimd128Register(), i.InputOperand(1),
|
||||
i.InputInt8(2));
|
||||
}
|
||||
CpuFeatureScope sse_scope(tasm(), SSE4_1);
|
||||
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
|
||||
__ pblendw(i.OutputSimd128Register(), i.InputOperand(1), i.InputInt8(2));
|
||||
break;
|
||||
}
|
||||
case kAVXS16x8Blend: {
|
||||
CpuFeatureScope avx_scope(tasm(), AVX);
|
||||
CpuFeatureScope sse_scope(tasm(), AVX);
|
||||
__ vpblendw(i.OutputSimd128Register(), i.InputSimd128Register(0),
|
||||
i.InputOperand(1), i.InputInt8(2));
|
||||
break;
|
||||
}
|
||||
case kIA32S16x8ShuffleBlend: {
|
||||
case kIA32S16x8HalfShuffle1: {
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
if (instr->InputCount() == 3) {
|
||||
// swizzle
|
||||
__ Pshuflw(dst, i.InputOperand(0), i.InputInt8(1));
|
||||
__ Pshufhw(dst, dst, i.InputInt8(2));
|
||||
} else {
|
||||
// shuffle
|
||||
DCHECK_EQ(5, instr->InputCount());
|
||||
__ Pshuflw(dst, i.InputOperand(0), i.InputInt8(2));
|
||||
__ Pshufhw(dst, dst, i.InputInt8(3));
|
||||
__ Pshuflw(kScratchDoubleReg, i.InputOperand(1), i.InputInt8(2));
|
||||
__ Pshufhw(kScratchDoubleReg, kScratchDoubleReg, i.InputInt8(3));
|
||||
__ Pblendw(dst, kScratchDoubleReg, i.InputInt8(4));
|
||||
}
|
||||
__ Pshuflw(dst, i.InputOperand(0), i.InputInt8(1));
|
||||
__ Pshufhw(dst, dst, i.InputInt8(2));
|
||||
break;
|
||||
}
|
||||
case kIA32S16x8HalfShuffle2: {
|
||||
XMMRegister dst = i.OutputSimd128Register();
|
||||
__ Pshuflw(kScratchDoubleReg, i.InputOperand(1), i.InputInt8(2));
|
||||
__ Pshufhw(kScratchDoubleReg, kScratchDoubleReg, i.InputInt8(3));
|
||||
__ Pshuflw(dst, i.InputOperand(0), i.InputInt8(2));
|
||||
__ Pshufhw(dst, dst, i.InputInt8(3));
|
||||
__ Pblendw(dst, kScratchDoubleReg, i.InputInt8(4));
|
||||
break;
|
||||
}
|
||||
case kSSES8x16Alignr: {
|
||||
CpuFeatureScope sse_scope(tasm(), SSSE3);
|
||||
if (instr->InputCount() == 2) {
|
||||
// swizzle
|
||||
__ palignr(i.OutputSimd128Register(), i.InputOperand(0),
|
||||
i.InputInt8(1));
|
||||
} else {
|
||||
// shuffle
|
||||
DCHECK_EQ(3, instr->InputCount());
|
||||
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
|
||||
__ palignr(i.OutputSimd128Register(), i.InputOperand(1),
|
||||
i.InputInt8(2));
|
||||
}
|
||||
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
|
||||
__ palignr(i.OutputSimd128Register(), i.InputOperand(1), i.InputInt8(2));
|
||||
break;
|
||||
}
|
||||
case kAVXS8x16Alignr: {
|
||||
|
@ -305,7 +305,8 @@ namespace compiler {
|
||||
V(IA32S32x4Shuffle) \
|
||||
V(SSES16x8Blend) \
|
||||
V(AVXS16x8Blend) \
|
||||
V(IA32S16x8ShuffleBlend) \
|
||||
V(IA32S16x8HalfShuffle1) \
|
||||
V(IA32S16x8HalfShuffle2) \
|
||||
V(SSES8x16Alignr) \
|
||||
V(AVXS8x16Alignr) \
|
||||
V(IA32S1x4AnyTrue) \
|
||||
|
@ -287,7 +287,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
||||
case kIA32S32x4Shuffle:
|
||||
case kSSES16x8Blend:
|
||||
case kAVXS16x8Blend:
|
||||
case kIA32S16x8ShuffleBlend:
|
||||
case kIA32S16x8HalfShuffle1:
|
||||
case kIA32S16x8HalfShuffle2:
|
||||
case kSSES8x16Alignr:
|
||||
case kAVXS8x16Alignr:
|
||||
case kIA32S1x4AnyTrue:
|
||||
|
@ -2058,23 +2058,13 @@ bool Is16x8BlendedShuffle(uint8_t* shuffle16x8, uint8_t* blend_mask) {
|
||||
return true;
|
||||
}
|
||||
|
||||
void SwapShuffleInputs(Node* node) {
|
||||
Node* input0 = node->InputAt(0);
|
||||
Node* input1 = node->InputAt(1);
|
||||
node->ReplaceInput(0, input1);
|
||||
node->ReplaceInput(1, input0);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
// TODO(bbudge) Make sure identity shuffle emits no instructions.
|
||||
void InstructionSelector::VisitS8x16Shuffle(Node* node) {
|
||||
static const int kMaxSwizzleIndex = 15;
|
||||
static const int kMaxShuffleIndex = 31;
|
||||
const uint8_t* shuffle = OpParameter<uint8_t*>(node->op());
|
||||
uint8_t mask = CanonicalizeShuffle(node);
|
||||
bool is_swizzle = (mask == kMaxSwizzleIndex);
|
||||
DCHECK_IMPLIES(!is_swizzle, mask == kMaxShuffleIndex);
|
||||
USE(kMaxShuffleIndex);
|
||||
uint8_t shuffle[kSimd128Size];
|
||||
bool is_swizzle;
|
||||
CanonicalizeShuffle(node, shuffle, &is_swizzle);
|
||||
|
||||
int imm_count = 0;
|
||||
static const int kMaxImms = 6;
|
||||
@ -2085,23 +2075,30 @@ void InstructionSelector::VisitS8x16Shuffle(Node* node) {
|
||||
|
||||
IA32OperandGenerator g(this);
|
||||
bool use_avx = CpuFeatures::IsSupported(AVX);
|
||||
// AVX and swizzles don't generally need DefineSameAsFirst to avoid a move.
|
||||
bool no_same_as_first = use_avx || is_swizzle;
|
||||
// We generally need UseRegister for the first source.
|
||||
bool no_use_register = false;
|
||||
ArchOpcode opcode = kIA32S8x16Shuffle; // general shuffle is the default
|
||||
|
||||
uint8_t offset;
|
||||
uint8_t shuffle32x4[4];
|
||||
uint8_t shuffle16x8[8];
|
||||
if (TryMatchConcat(shuffle, mask, &offset)) {
|
||||
// Swap inputs for (v)palignr.
|
||||
// TODO(bbudge) Handle concatenations where the sources are reversed.
|
||||
if (TryMatchConcat(shuffle, &offset)) {
|
||||
// Swap inputs from the normal order for (v)palignr.
|
||||
SwapShuffleInputs(node);
|
||||
// palignr takes a single imm8 offset.
|
||||
is_swizzle = false; // It's simpler to just handle the general case.
|
||||
no_same_as_first = use_avx; // SSE requires same-as-first.
|
||||
opcode = use_avx ? kAVXS8x16Alignr : kSSES8x16Alignr;
|
||||
// palignr takes a single imm8 offset.
|
||||
imms[imm_count++] = offset;
|
||||
} else if (TryMatch32x4Shuffle(shuffle, shuffle32x4)) {
|
||||
uint8_t shuffle_mask = PackShuffle4(shuffle32x4);
|
||||
if (is_swizzle) {
|
||||
// pshufd takes a single imm8 shuffle mask.
|
||||
opcode = kIA32S32x4Swizzle;
|
||||
no_same_as_first = true;
|
||||
no_use_register = true;
|
||||
imms[imm_count++] = shuffle_mask;
|
||||
} else {
|
||||
// 2 operand shuffle
|
||||
@ -2112,6 +2109,8 @@ void InstructionSelector::VisitS8x16Shuffle(Node* node) {
|
||||
imms[imm_count++] = blend_mask;
|
||||
} else {
|
||||
opcode = kIA32S32x4Shuffle;
|
||||
no_same_as_first = true;
|
||||
no_use_register = true;
|
||||
imms[imm_count++] = shuffle_mask;
|
||||
int8_t blend_mask = PackBlend4(shuffle32x4);
|
||||
imms[imm_count++] = blend_mask;
|
||||
@ -2124,39 +2123,46 @@ void InstructionSelector::VisitS8x16Shuffle(Node* node) {
|
||||
blend_mask = PackBlend8(shuffle16x8);
|
||||
imms[imm_count++] = blend_mask;
|
||||
} else if (Is16x8BlendedShuffle(shuffle16x8, &blend_mask)) {
|
||||
opcode = kIA32S16x8ShuffleBlend;
|
||||
opcode = is_swizzle ? kIA32S16x8HalfShuffle1 : kIA32S16x8HalfShuffle2;
|
||||
// Half-shuffles don't need DefineSameAsFirst or UseRegister(src0).
|
||||
no_same_as_first = true;
|
||||
no_use_register = true;
|
||||
uint8_t mask_lo = PackShuffle4(shuffle16x8);
|
||||
uint8_t mask_hi = PackShuffle4(shuffle16x8 + 4);
|
||||
imms[imm_count++] = mask_lo;
|
||||
imms[imm_count++] = mask_hi;
|
||||
// TODO(bbudge) eliminate the blend for swizzles.
|
||||
imms[imm_count++] = blend_mask;
|
||||
if (!is_swizzle) imms[imm_count++] = blend_mask;
|
||||
}
|
||||
}
|
||||
if (opcode == kIA32S8x16Shuffle) {
|
||||
// General shuffle.
|
||||
imms[imm_count++] = Pack4Lanes(shuffle, mask);
|
||||
imms[imm_count++] = Pack4Lanes(shuffle + 4, mask);
|
||||
imms[imm_count++] = Pack4Lanes(shuffle + 8, mask);
|
||||
imms[imm_count++] = Pack4Lanes(shuffle + 12, mask);
|
||||
// Use same-as-first for general swizzle, but not shuffle.
|
||||
no_same_as_first = !is_swizzle;
|
||||
no_use_register = no_same_as_first;
|
||||
imms[imm_count++] = Pack4Lanes(shuffle);
|
||||
imms[imm_count++] = Pack4Lanes(shuffle + 4);
|
||||
imms[imm_count++] = Pack4Lanes(shuffle + 8);
|
||||
imms[imm_count++] = Pack4Lanes(shuffle + 12);
|
||||
temps[temp_count++] = g.TempRegister();
|
||||
}
|
||||
|
||||
// Swizzles and AVX don't require input[0] == output.
|
||||
InstructionOperand output = use_avx || is_swizzle ? g.DefineAsRegister(node)
|
||||
: g.DefineSameAsFirst(node);
|
||||
// Use DefineAsRegister(node) and Use(src0) if we can without forcing an extra
|
||||
// move instruction in the CodeGenerator.
|
||||
Node* input0 = node->InputAt(0);
|
||||
InstructionOperand dst =
|
||||
no_same_as_first ? g.DefineAsRegister(node) : g.DefineSameAsFirst(node);
|
||||
InstructionOperand src0 =
|
||||
no_use_register ? g.Use(input0) : g.UseRegister(input0);
|
||||
|
||||
int input_count = 0;
|
||||
InstructionOperand inputs[2 + kMaxImms + kMaxTemps];
|
||||
InstructionOperand src0 = g.UseRegister(node->InputAt(0));
|
||||
inputs[input_count++] = src0;
|
||||
if (!is_swizzle || (use_avx && opcode != kIA32S8x16Shuffle)) {
|
||||
if (!is_swizzle) {
|
||||
inputs[input_count++] = g.Use(node->InputAt(1));
|
||||
}
|
||||
for (int i = 0; i < imm_count; ++i) {
|
||||
inputs[input_count++] = g.UseImmediate(imms[i]);
|
||||
}
|
||||
Emit(opcode, 1, &output, input_count, inputs, temp_count, temps);
|
||||
Emit(opcode, 1, &dst, input_count, inputs, temp_count, temps);
|
||||
}
|
||||
|
||||
// static
|
||||
|
@ -2884,16 +2884,19 @@ bool InstructionSelector::TryMatch16x8Shuffle(const uint8_t* shuffle,
|
||||
}
|
||||
|
||||
// static
|
||||
bool InstructionSelector::TryMatchConcat(const uint8_t* shuffle, uint8_t mask,
|
||||
bool InstructionSelector::TryMatchConcat(const uint8_t* shuffle,
|
||||
uint8_t* offset) {
|
||||
// Don't match the identity shuffle (e.g. [0 1 2 ... 15]).
|
||||
uint8_t start = shuffle[0];
|
||||
int i = 1;
|
||||
for (; i < 16 - start; ++i) {
|
||||
if ((shuffle[i] & mask) != ((shuffle[i - 1] + 1) & mask)) return false;
|
||||
}
|
||||
uint8_t wrap = 16;
|
||||
for (; i < 16; ++i, ++wrap) {
|
||||
if ((shuffle[i] & mask) != (wrap & mask)) return false;
|
||||
if (start == 0) return false;
|
||||
DCHECK_GT(kSimd128Size, start); // The shuffle should be canonicalized.
|
||||
// A concatenation is a series of consecutive indices, with at most one jump
|
||||
// in the middle from the last lane to the first.
|
||||
for (int i = 1; i < kSimd128Size; ++i) {
|
||||
if ((shuffle[i]) != ((shuffle[i - 1] + 1))) {
|
||||
if (shuffle[i - 1] != 15) return false;
|
||||
if (shuffle[i] % kSimd128Size != 0) return false;
|
||||
}
|
||||
}
|
||||
*offset = start;
|
||||
return true;
|
||||
@ -2907,23 +2910,21 @@ bool InstructionSelector::TryMatchBlend(const uint8_t* shuffle) {
|
||||
return true;
|
||||
}
|
||||
|
||||
uint8_t InstructionSelector::CanonicalizeShuffle(Node* node) {
|
||||
static const int kMaxLaneIndex = 15;
|
||||
static const int kMaxShuffleIndex = 31;
|
||||
void InstructionSelector::CanonicalizeShuffle(Node* node, uint8_t* shuffle,
|
||||
bool* is_swizzle) {
|
||||
// Get raw shuffle indices.
|
||||
memcpy(shuffle, OpParameter<uint8_t*>(node->op()), kSimd128Size);
|
||||
|
||||
const uint8_t* shuffle = OpParameter<uint8_t*>(node->op());
|
||||
uint8_t mask = kMaxShuffleIndex;
|
||||
// If shuffle is unary, set 'mask' to ignore the high bit of the indices.
|
||||
// Replace any unused source with the other.
|
||||
// Detect shuffles that only operate on one input.
|
||||
if (GetVirtualRegister(node->InputAt(0)) ==
|
||||
GetVirtualRegister(node->InputAt(1))) {
|
||||
// unary, src0 == src1.
|
||||
mask = kMaxLaneIndex;
|
||||
*is_swizzle = true;
|
||||
} else {
|
||||
// Inputs are distinct; check that both are required.
|
||||
bool src0_is_used = false;
|
||||
bool src1_is_used = false;
|
||||
for (int i = 0; i < 16; ++i) {
|
||||
if (shuffle[i] <= kMaxLaneIndex) {
|
||||
for (int i = 0; i < kSimd128Size; ++i) {
|
||||
if (shuffle[i] < kSimd128Size) {
|
||||
src0_is_used = true;
|
||||
} else {
|
||||
src1_is_used = true;
|
||||
@ -2931,25 +2932,47 @@ uint8_t InstructionSelector::CanonicalizeShuffle(Node* node) {
|
||||
}
|
||||
if (src0_is_used && !src1_is_used) {
|
||||
node->ReplaceInput(1, node->InputAt(0));
|
||||
mask = kMaxLaneIndex;
|
||||
*is_swizzle = true;
|
||||
} else if (src1_is_used && !src0_is_used) {
|
||||
node->ReplaceInput(0, node->InputAt(1));
|
||||
mask = kMaxLaneIndex;
|
||||
*is_swizzle = true;
|
||||
} else {
|
||||
*is_swizzle = false;
|
||||
// Canonicalize general 2 input shuffles so that the first input lanes are
|
||||
// encountered first. This makes architectural shuffle pattern matching
|
||||
// easier, since we only need to consider 1 input ordering instead of 2.
|
||||
if (shuffle[0] >= kSimd128Size) {
|
||||
// The second operand is used first. Swap inputs and adjust the shuffle.
|
||||
SwapShuffleInputs(node);
|
||||
for (int i = 0; i < kSimd128Size; ++i) {
|
||||
shuffle[i] ^= kSimd128Size;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return mask;
|
||||
if (*is_swizzle) {
|
||||
for (int i = 0; i < kSimd128Size; ++i) shuffle[i] &= kSimd128Size - 1;
|
||||
}
|
||||
}
|
||||
|
||||
// static
|
||||
int32_t InstructionSelector::Pack4Lanes(const uint8_t* shuffle, uint8_t mask) {
|
||||
int32_t InstructionSelector::Pack4Lanes(const uint8_t* shuffle) {
|
||||
int32_t result = 0;
|
||||
for (int i = 3; i >= 0; --i) {
|
||||
result <<= 8;
|
||||
result |= shuffle[i] & mask;
|
||||
result |= shuffle[i];
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// static
|
||||
void InstructionSelector::SwapShuffleInputs(Node* node) {
|
||||
Node* input0 = node->InputAt(0);
|
||||
Node* input1 = node->InputAt(1);
|
||||
node->ReplaceInput(0, input1);
|
||||
node->ReplaceInput(1, input0);
|
||||
}
|
||||
|
||||
bool InstructionSelector::NeedsPoisoning(IsSafetyCheck safety_check) const {
|
||||
switch (poisoning_level_) {
|
||||
case PoisoningMitigationLevel::kDontPoison:
|
||||
|
@ -633,22 +633,25 @@ class V8_EXPORT_PRIVATE InstructionSelector final {
|
||||
// Tries to match a byte shuffle to a concatenate operation, formed by taking
|
||||
// 16 bytes from the 32 byte concatenation of the inputs. If successful, it
|
||||
// writes the byte offset. E.g. [4 5 6 7 .. 16 17 18 19] concatenates both
|
||||
// source vectors with offset 4.
|
||||
static bool TryMatchConcat(const uint8_t* shuffle, uint8_t mask,
|
||||
uint8_t* offset);
|
||||
// source vectors with offset 4. The shuffle should be canonicalized.
|
||||
static bool TryMatchConcat(const uint8_t* shuffle, uint8_t* offset);
|
||||
|
||||
// Tries to match a byte shuffle to a blend operation, which is a shuffle
|
||||
// where no lanes change position. E.g. [0 9 2 11 .. 14 31] interleaves the
|
||||
// even lanes of the first source with the odd lanes of the second.
|
||||
// even lanes of the first source with the odd lanes of the second. The
|
||||
// shuffle should be canonicalized.
|
||||
static bool TryMatchBlend(const uint8_t* shuffle);
|
||||
|
||||
// Packs 4 bytes of shuffle into a 32 bit immediate, using a mask from
|
||||
// CanonicalizeShuffle to convert unary shuffles.
|
||||
static int32_t Pack4Lanes(const uint8_t* shuffle, uint8_t mask);
|
||||
// Packs 4 bytes of shuffle into a 32 bit immediate.
|
||||
static int32_t Pack4Lanes(const uint8_t* shuffle);
|
||||
|
||||
// Canonicalize shuffles to make pattern matching simpler. Returns a mask that
|
||||
// will clear the high bit of indices if shuffle is unary (a swizzle).
|
||||
uint8_t CanonicalizeShuffle(Node* node);
|
||||
// Canonicalize shuffles to make pattern matching simpler. Returns the shuffle
|
||||
// indices, and a boolean indicating if the shuffle is a swizzle (one input).
|
||||
void CanonicalizeShuffle(Node* node, uint8_t* shuffle, bool* is_swizzle);
|
||||
|
||||
// Swaps the two first input operands of the node, to help match shuffles
|
||||
// to specific architectural instructions.
|
||||
void SwapShuffleInputs(Node* node);
|
||||
|
||||
// ===========================================================================
|
||||
|
||||
|
@ -2133,7 +2133,9 @@ static const ShuffleEntry arch_shuffles[] = {
|
||||
{{1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14}, kMipsS8x2Reverse}};
|
||||
|
||||
bool TryMatchArchShuffle(const uint8_t* shuffle, const ShuffleEntry* table,
|
||||
size_t num_entries, uint8_t mask, ArchOpcode* opcode) {
|
||||
size_t num_entries, bool is_swizzle,
|
||||
ArchOpcode* opcode) {
|
||||
uint8_t mask = is_swizzle ? kSimd128Size - 1 : 2 * kSimd128Size - 1;
|
||||
for (size_t i = 0; i < num_entries; ++i) {
|
||||
const ShuffleEntry& entry = table[i];
|
||||
int j = 0;
|
||||
@ -2153,35 +2155,35 @@ bool TryMatchArchShuffle(const uint8_t* shuffle, const ShuffleEntry* table,
|
||||
} // namespace
|
||||
|
||||
void InstructionSelector::VisitS8x16Shuffle(Node* node) {
|
||||
const uint8_t* shuffle = OpParameter<uint8_t*>(node->op());
|
||||
uint8_t mask = CanonicalizeShuffle(node);
|
||||
uint8_t shuffle[kSimd128Size];
|
||||
bool is_swizzle;
|
||||
CanonicalizeShuffle(node, shuffle, &is_swizzle);
|
||||
uint8_t shuffle32x4[4];
|
||||
ArchOpcode opcode;
|
||||
if (TryMatchArchShuffle(shuffle, arch_shuffles, arraysize(arch_shuffles),
|
||||
mask, &opcode)) {
|
||||
is_swizzle, &opcode)) {
|
||||
VisitRRR(this, opcode, node);
|
||||
return;
|
||||
}
|
||||
Node* input0 = node->InputAt(0);
|
||||
Node* input1 = node->InputAt(1);
|
||||
uint8_t offset;
|
||||
MipsOperandGenerator g(this);
|
||||
if (TryMatchConcat(shuffle, mask, &offset)) {
|
||||
Emit(kMipsS8x16Concat, g.DefineSameAsFirst(node),
|
||||
g.UseRegister(node->InputAt(1)), g.UseRegister(node->InputAt(0)),
|
||||
g.UseImmediate(offset));
|
||||
if (TryMatchConcat(shuffle, &offset)) {
|
||||
Emit(kMipsS8x16Concat, g.DefineSameAsFirst(node), g.UseRegister(input0),
|
||||
g.UseRegister(input1), g.UseImmediate(offset));
|
||||
return;
|
||||
}
|
||||
if (TryMatch32x4Shuffle(shuffle, shuffle32x4)) {
|
||||
Emit(kMipsS32x4Shuffle, g.DefineAsRegister(node),
|
||||
g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)),
|
||||
g.UseImmediate(Pack4Lanes(shuffle32x4, mask)));
|
||||
Emit(kMipsS32x4Shuffle, g.DefineAsRegister(node), g.UseRegister(input0),
|
||||
g.UseRegister(input1), g.UseImmediate(Pack4Lanes(shuffle32x4)));
|
||||
return;
|
||||
}
|
||||
Emit(kMipsS8x16Shuffle, g.DefineAsRegister(node),
|
||||
g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)),
|
||||
g.UseImmediate(Pack4Lanes(shuffle, mask)),
|
||||
g.UseImmediate(Pack4Lanes(shuffle + 4, mask)),
|
||||
g.UseImmediate(Pack4Lanes(shuffle + 8, mask)),
|
||||
g.UseImmediate(Pack4Lanes(shuffle + 12, mask)));
|
||||
Emit(kMipsS8x16Shuffle, g.DefineAsRegister(node), g.UseRegister(input0),
|
||||
g.UseRegister(input1), g.UseImmediate(Pack4Lanes(shuffle)),
|
||||
g.UseImmediate(Pack4Lanes(shuffle + 4)),
|
||||
g.UseImmediate(Pack4Lanes(shuffle + 8)),
|
||||
g.UseImmediate(Pack4Lanes(shuffle + 12)));
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitSignExtendWord8ToInt32(Node* node) {
|
||||
|
@ -2802,7 +2802,9 @@ static const ShuffleEntry arch_shuffles[] = {
|
||||
kMips64S8x2Reverse}};
|
||||
|
||||
bool TryMatchArchShuffle(const uint8_t* shuffle, const ShuffleEntry* table,
|
||||
size_t num_entries, uint8_t mask, ArchOpcode* opcode) {
|
||||
size_t num_entries, bool is_swizzle,
|
||||
ArchOpcode* opcode) {
|
||||
uint8_t mask = is_swizzle ? kSimd128Size - 1 : 2 * kSimd128Size - 1;
|
||||
for (size_t i = 0; i < num_entries; ++i) {
|
||||
const ShuffleEntry& entry = table[i];
|
||||
int j = 0;
|
||||
@ -2822,35 +2824,35 @@ bool TryMatchArchShuffle(const uint8_t* shuffle, const ShuffleEntry* table,
|
||||
} // namespace
|
||||
|
||||
void InstructionSelector::VisitS8x16Shuffle(Node* node) {
|
||||
const uint8_t* shuffle = OpParameter<uint8_t*>(node->op());
|
||||
uint8_t mask = CanonicalizeShuffle(node);
|
||||
uint8_t shuffle[kSimd128Size];
|
||||
bool is_swizzle;
|
||||
CanonicalizeShuffle(node, shuffle, &is_swizzle);
|
||||
uint8_t shuffle32x4[4];
|
||||
ArchOpcode opcode;
|
||||
if (TryMatchArchShuffle(shuffle, arch_shuffles, arraysize(arch_shuffles),
|
||||
mask, &opcode)) {
|
||||
is_swizzle, &opcode)) {
|
||||
VisitRRR(this, opcode, node);
|
||||
return;
|
||||
}
|
||||
Node* input0 = node->InputAt(0);
|
||||
Node* input1 = node->InputAt(1);
|
||||
uint8_t offset;
|
||||
Mips64OperandGenerator g(this);
|
||||
if (TryMatchConcat(shuffle, mask, &offset)) {
|
||||
Emit(kMips64S8x16Concat, g.DefineSameAsFirst(node),
|
||||
g.UseRegister(node->InputAt(1)), g.UseRegister(node->InputAt(0)),
|
||||
g.UseImmediate(offset));
|
||||
if (TryMatchConcat(shuffle, &offset)) {
|
||||
Emit(kMips64S8x16Concat, g.DefineSameAsFirst(node), g.UseRegister(input0),
|
||||
g.UseRegister(input1), g.UseImmediate(offset));
|
||||
return;
|
||||
}
|
||||
if (TryMatch32x4Shuffle(shuffle, shuffle32x4)) {
|
||||
Emit(kMips64S32x4Shuffle, g.DefineAsRegister(node),
|
||||
g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)),
|
||||
g.UseImmediate(Pack4Lanes(shuffle32x4, mask)));
|
||||
Emit(kMips64S32x4Shuffle, g.DefineAsRegister(node), g.UseRegister(input0),
|
||||
g.UseRegister(input1), g.UseImmediate(Pack4Lanes(shuffle32x4)));
|
||||
return;
|
||||
}
|
||||
Emit(kMips64S8x16Shuffle, g.DefineAsRegister(node),
|
||||
g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)),
|
||||
g.UseImmediate(Pack4Lanes(shuffle, mask)),
|
||||
g.UseImmediate(Pack4Lanes(shuffle + 4, mask)),
|
||||
g.UseImmediate(Pack4Lanes(shuffle + 8, mask)),
|
||||
g.UseImmediate(Pack4Lanes(shuffle + 12, mask)));
|
||||
Emit(kMips64S8x16Shuffle, g.DefineAsRegister(node), g.UseRegister(input0),
|
||||
g.UseRegister(input1), g.UseImmediate(Pack4Lanes(shuffle)),
|
||||
g.UseImmediate(Pack4Lanes(shuffle + 4)),
|
||||
g.UseImmediate(Pack4Lanes(shuffle + 8)),
|
||||
g.UseImmediate(Pack4Lanes(shuffle + 12)));
|
||||
}
|
||||
|
||||
void InstructionSelector::VisitSignExtendWord8ToInt32(Node* node) {
|
||||
|
@ -1678,262 +1678,213 @@ WASM_SIMD_COMPILED_AND_LOWERED_TEST(F32x4AddHoriz) {
|
||||
{{1.0f, 5.0f, 9.0f, 13.0f}});
|
||||
}
|
||||
|
||||
// Test shuffle ops.
|
||||
template <typename T>
|
||||
void RunShuffleOpTest(WasmExecutionMode execution_mode, LowerSimd lower_simd,
|
||||
WasmOpcode simd_op,
|
||||
const std::array<T, kSimd128Size / sizeof(T)>& shuffle) {
|
||||
// Test the original shuffle.
|
||||
RunBinaryLaneOpTest<T>(execution_mode, lower_simd, simd_op, shuffle);
|
||||
|
||||
// Test a non-canonical (inputs reversed) version of the shuffle.
|
||||
std::array<T, kSimd128Size / sizeof(T)> other_shuffle(shuffle);
|
||||
for (size_t i = 0; i < shuffle.size(); ++i) other_shuffle[i] ^= kSimd128Size;
|
||||
RunBinaryLaneOpTest<T>(execution_mode, lower_simd, simd_op, other_shuffle);
|
||||
|
||||
// Test the swizzle (one-operand) version of the shuffle.
|
||||
std::array<T, kSimd128Size / sizeof(T)> swizzle(shuffle);
|
||||
for (size_t i = 0; i < shuffle.size(); ++i) swizzle[i] &= (kSimd128Size - 1);
|
||||
RunBinaryLaneOpTest<T>(execution_mode, lower_simd, simd_op, swizzle);
|
||||
|
||||
// Test the non-canonical swizzle (one-operand) version of the shuffle.
|
||||
std::array<T, kSimd128Size / sizeof(T)> other_swizzle(shuffle);
|
||||
for (size_t i = 0; i < shuffle.size(); ++i) other_swizzle[i] |= kSimd128Size;
|
||||
RunBinaryLaneOpTest<T>(execution_mode, lower_simd, simd_op, other_swizzle);
|
||||
}
|
||||
|
||||
#if V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS || \
|
||||
V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32
|
||||
// Test some regular shuffles that may have special handling on some targets.
|
||||
// Test a normal and unary versions (where second operand isn't used).
|
||||
WASM_SIMD_COMPILED_AND_LOWERED_TEST(S32x4Dup) {
|
||||
RunBinaryLaneOpTest<int8_t>(
|
||||
RunShuffleOpTest<int8_t>(
|
||||
execution_mode, lower_simd, kExprS8x16Shuffle,
|
||||
{{16, 17, 18, 19, 16, 17, 18, 19, 16, 17, 18, 19, 16, 17, 18, 19}});
|
||||
RunBinaryLaneOpTest<int8_t>(
|
||||
execution_mode, lower_simd, kExprS8x16Shuffle,
|
||||
{{4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7}});
|
||||
}
|
||||
|
||||
WASM_SIMD_COMPILED_AND_LOWERED_TEST(S32x4ZipLeft) {
|
||||
RunBinaryLaneOpTest<int8_t>(
|
||||
RunShuffleOpTest<int8_t>(
|
||||
execution_mode, lower_simd, kExprS8x16Shuffle,
|
||||
{{0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23}});
|
||||
RunBinaryLaneOpTest<int8_t>(
|
||||
execution_mode, lower_simd, kExprS8x16Shuffle,
|
||||
{{0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 4, 5, 6, 7}});
|
||||
}
|
||||
|
||||
WASM_SIMD_COMPILED_AND_LOWERED_TEST(S32x4ZipRight) {
|
||||
RunBinaryLaneOpTest<int8_t>(
|
||||
RunShuffleOpTest<int8_t>(
|
||||
execution_mode, lower_simd, kExprS8x16Shuffle,
|
||||
{{8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31}});
|
||||
RunBinaryLaneOpTest<int8_t>(
|
||||
execution_mode, lower_simd, kExprS8x16Shuffle,
|
||||
{{8, 9, 10, 11, 8, 9, 10, 11, 12, 13, 14, 15, 12, 13, 14, 15}});
|
||||
}
|
||||
|
||||
WASM_SIMD_COMPILED_AND_LOWERED_TEST(S32x4UnzipLeft) {
|
||||
RunBinaryLaneOpTest<int8_t>(
|
||||
RunShuffleOpTest<int8_t>(
|
||||
execution_mode, lower_simd, kExprS8x16Shuffle,
|
||||
{{0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27}});
|
||||
RunBinaryLaneOpTest<int8_t>(
|
||||
execution_mode, lower_simd, kExprS8x16Shuffle,
|
||||
{{0, 1, 2, 3, 8, 9, 10, 11, 0, 1, 2, 3, 8, 9, 10, 11}});
|
||||
}
|
||||
|
||||
WASM_SIMD_COMPILED_AND_LOWERED_TEST(S32x4UnzipRight) {
|
||||
RunBinaryLaneOpTest<int8_t>(
|
||||
RunShuffleOpTest<int8_t>(
|
||||
execution_mode, lower_simd, kExprS8x16Shuffle,
|
||||
{{4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31}});
|
||||
RunBinaryLaneOpTest<int8_t>(
|
||||
execution_mode, lower_simd, kExprS8x16Shuffle,
|
||||
{{4, 5, 6, 7, 12, 13, 14, 15, 4, 5, 6, 7, 12, 13, 14, 15}});
|
||||
}
|
||||
|
||||
WASM_SIMD_COMPILED_AND_LOWERED_TEST(S32x4TransposeLeft) {
|
||||
RunBinaryLaneOpTest<int8_t>(
|
||||
RunShuffleOpTest<int8_t>(
|
||||
execution_mode, lower_simd, kExprS8x16Shuffle,
|
||||
{{0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27}});
|
||||
RunBinaryLaneOpTest<int8_t>(
|
||||
execution_mode, lower_simd, kExprS8x16Shuffle,
|
||||
{{0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 8, 9, 10, 11}});
|
||||
}
|
||||
|
||||
WASM_SIMD_COMPILED_AND_LOWERED_TEST(S32x4TransposeRight) {
|
||||
RunBinaryLaneOpTest<int8_t>(
|
||||
RunShuffleOpTest<int8_t>(
|
||||
execution_mode, lower_simd, kExprS8x16Shuffle,
|
||||
{{4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31}});
|
||||
RunBinaryLaneOpTest<int8_t>(
|
||||
execution_mode, lower_simd, kExprS8x16Shuffle,
|
||||
{{4, 5, 6, 7, 4, 5, 6, 7, 12, 13, 14, 15, 12, 13, 14, 15}});
|
||||
}
|
||||
|
||||
// Reverses are only unary.
|
||||
WASM_SIMD_COMPILED_AND_LOWERED_TEST(S32x2Reverse) {
|
||||
RunBinaryLaneOpTest<int8_t>(
|
||||
RunShuffleOpTest<int8_t>(
|
||||
execution_mode, lower_simd, kExprS8x16Shuffle,
|
||||
{{4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11}});
|
||||
}
|
||||
|
||||
// Test irregular shuffle.
|
||||
WASM_SIMD_COMPILED_AND_LOWERED_TEST(S32x4Irregular) {
|
||||
RunBinaryLaneOpTest<int8_t>(
|
||||
RunShuffleOpTest<int8_t>(
|
||||
execution_mode, lower_simd, kExprS8x16Shuffle,
|
||||
{{0, 1, 2, 3, 16, 17, 18, 19, 16, 17, 18, 19, 20, 21, 22, 23}});
|
||||
RunBinaryLaneOpTest<int8_t>(
|
||||
execution_mode, lower_simd, kExprS8x16Shuffle,
|
||||
{{0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7}});
|
||||
}
|
||||
|
||||
WASM_SIMD_COMPILED_AND_LOWERED_TEST(S16x8Dup) {
|
||||
RunBinaryLaneOpTest<int8_t>(
|
||||
RunShuffleOpTest<int8_t>(
|
||||
execution_mode, lower_simd, kExprS8x16Shuffle,
|
||||
{{18, 19, 18, 19, 18, 19, 18, 19, 18, 19, 18, 19, 18, 19, 18, 19}});
|
||||
RunBinaryLaneOpTest<int8_t>(
|
||||
execution_mode, lower_simd, kExprS8x16Shuffle,
|
||||
{{6, 7, 6, 7, 6, 7, 6, 7, 6, 7, 6, 7, 6, 7, 6, 7}});
|
||||
}
|
||||
|
||||
WASM_SIMD_COMPILED_AND_LOWERED_TEST(S16x8ZipLeft) {
|
||||
RunBinaryLaneOpTest<int8_t>(
|
||||
RunShuffleOpTest<int8_t>(
|
||||
execution_mode, lower_simd, kExprS8x16Shuffle,
|
||||
{{0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23}});
|
||||
RunBinaryLaneOpTest<int8_t>(
|
||||
execution_mode, lower_simd, kExprS8x16Shuffle,
|
||||
{{0, 1, 0, 1, 2, 3, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7}});
|
||||
}
|
||||
|
||||
WASM_SIMD_COMPILED_AND_LOWERED_TEST(S16x8ZipRight) {
|
||||
RunBinaryLaneOpTest<int8_t>(
|
||||
RunShuffleOpTest<int8_t>(
|
||||
execution_mode, lower_simd, kExprS8x16Shuffle,
|
||||
{{8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31}});
|
||||
RunBinaryLaneOpTest<int8_t>(
|
||||
execution_mode, lower_simd, kExprS8x16Shuffle,
|
||||
{{8, 9, 8, 9, 10, 11, 10, 11, 12, 13, 12, 13, 14, 15, 14, 15}});
|
||||
}
|
||||
|
||||
WASM_SIMD_COMPILED_AND_LOWERED_TEST(S16x8UnzipLeft) {
|
||||
RunBinaryLaneOpTest<int8_t>(
|
||||
RunShuffleOpTest<int8_t>(
|
||||
execution_mode, lower_simd, kExprS8x16Shuffle,
|
||||
{{0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29}});
|
||||
RunBinaryLaneOpTest<int8_t>(
|
||||
execution_mode, lower_simd, kExprS8x16Shuffle,
|
||||
{{0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 4, 5, 8, 9, 12, 13}});
|
||||
}
|
||||
|
||||
WASM_SIMD_COMPILED_AND_LOWERED_TEST(S16x8UnzipRight) {
|
||||
RunBinaryLaneOpTest<int8_t>(
|
||||
RunShuffleOpTest<int8_t>(
|
||||
execution_mode, lower_simd, kExprS8x16Shuffle,
|
||||
{{2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31}});
|
||||
RunBinaryLaneOpTest<int8_t>(
|
||||
execution_mode, lower_simd, kExprS8x16Shuffle,
|
||||
{{2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15}});
|
||||
}
|
||||
|
||||
WASM_SIMD_COMPILED_AND_LOWERED_TEST(S16x8TransposeLeft) {
|
||||
RunBinaryLaneOpTest<int8_t>(
|
||||
RunShuffleOpTest<int8_t>(
|
||||
execution_mode, lower_simd, kExprS8x16Shuffle,
|
||||
{{0, 1, 16, 17, 4, 5, 20, 21, 8, 9, 24, 25, 12, 13, 28, 29}});
|
||||
RunBinaryLaneOpTest<int8_t>(
|
||||
execution_mode, lower_simd, kExprS8x16Shuffle,
|
||||
{{0, 1, 0, 1, 4, 5, 4, 5, 8, 9, 8, 9, 12, 13, 12, 13}});
|
||||
}
|
||||
|
||||
WASM_SIMD_COMPILED_AND_LOWERED_TEST(S16x8TransposeRight) {
|
||||
RunBinaryLaneOpTest<int8_t>(
|
||||
RunShuffleOpTest<int8_t>(
|
||||
execution_mode, lower_simd, kExprS8x16Shuffle,
|
||||
{{2, 3, 18, 19, 6, 7, 22, 23, 10, 11, 26, 27, 14, 15, 30, 31}});
|
||||
RunBinaryLaneOpTest<int8_t>(
|
||||
execution_mode, lower_simd, kExprS8x16Shuffle,
|
||||
{{2, 3, 2, 3, 6, 7, 6, 7, 10, 11, 10, 11, 14, 15, 14, 15}});
|
||||
}
|
||||
|
||||
WASM_SIMD_COMPILED_AND_LOWERED_TEST(S16x4Reverse) {
|
||||
RunBinaryLaneOpTest<int8_t>(
|
||||
RunShuffleOpTest<int8_t>(
|
||||
execution_mode, lower_simd, kExprS8x16Shuffle,
|
||||
{{6, 7, 4, 5, 2, 3, 0, 1, 14, 15, 12, 13, 10, 11, 8, 9}});
|
||||
}
|
||||
|
||||
WASM_SIMD_COMPILED_AND_LOWERED_TEST(S16x2Reverse) {
|
||||
RunBinaryLaneOpTest<int8_t>(
|
||||
RunShuffleOpTest<int8_t>(
|
||||
execution_mode, lower_simd, kExprS8x16Shuffle,
|
||||
{{2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13}});
|
||||
}
|
||||
|
||||
WASM_SIMD_COMPILED_AND_LOWERED_TEST(S16x8Irregular) {
|
||||
RunBinaryLaneOpTest<int8_t>(
|
||||
RunShuffleOpTest<int8_t>(
|
||||
execution_mode, lower_simd, kExprS8x16Shuffle,
|
||||
{{0, 1, 16, 17, 16, 17, 0, 1, 4, 5, 20, 21, 6, 7, 22, 23}});
|
||||
RunBinaryLaneOpTest<int8_t>(
|
||||
execution_mode, lower_simd, kExprS8x16Shuffle,
|
||||
{{0, 1, 0, 1, 0, 1, 0, 1, 4, 5, 4, 5, 6, 7, 6, 7}});
|
||||
}
|
||||
|
||||
WASM_SIMD_COMPILED_AND_LOWERED_TEST(S8x16Dup) {
|
||||
RunBinaryLaneOpTest<int8_t>(
|
||||
RunShuffleOpTest<int8_t>(
|
||||
execution_mode, lower_simd, kExprS8x16Shuffle,
|
||||
{{19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19}});
|
||||
RunBinaryLaneOpTest<int8_t>(
|
||||
execution_mode, lower_simd, kExprS8x16Shuffle,
|
||||
{{7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7}});
|
||||
}
|
||||
|
||||
WASM_SIMD_COMPILED_AND_LOWERED_TEST(S8x16ZipLeft) {
|
||||
RunBinaryLaneOpTest<int8_t>(
|
||||
RunShuffleOpTest<int8_t>(
|
||||
execution_mode, lower_simd, kExprS8x16Shuffle,
|
||||
{{0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}});
|
||||
RunBinaryLaneOpTest<int8_t>(
|
||||
execution_mode, lower_simd, kExprS8x16Shuffle,
|
||||
{{0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7}});
|
||||
}
|
||||
|
||||
WASM_SIMD_COMPILED_AND_LOWERED_TEST(S8x16ZipRight) {
|
||||
RunBinaryLaneOpTest<int8_t>(
|
||||
RunShuffleOpTest<int8_t>(
|
||||
execution_mode, lower_simd, kExprS8x16Shuffle,
|
||||
{{8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}});
|
||||
RunBinaryLaneOpTest<int8_t>(
|
||||
execution_mode, lower_simd, kExprS8x16Shuffle,
|
||||
{{8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15}});
|
||||
}
|
||||
|
||||
WASM_SIMD_COMPILED_AND_LOWERED_TEST(S8x16UnzipLeft) {
|
||||
RunBinaryLaneOpTest<int8_t>(
|
||||
RunShuffleOpTest<int8_t>(
|
||||
execution_mode, lower_simd, kExprS8x16Shuffle,
|
||||
{{0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30}});
|
||||
RunBinaryLaneOpTest<int8_t>(
|
||||
execution_mode, lower_simd, kExprS8x16Shuffle,
|
||||
{{0, 2, 4, 6, 8, 10, 12, 14, 0, 2, 4, 6, 8, 10, 12, 14}});
|
||||
}
|
||||
|
||||
WASM_SIMD_COMPILED_AND_LOWERED_TEST(S8x16UnzipRight) {
|
||||
RunBinaryLaneOpTest<int8_t>(
|
||||
RunShuffleOpTest<int8_t>(
|
||||
execution_mode, lower_simd, kExprS8x16Shuffle,
|
||||
{{1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31}});
|
||||
RunBinaryLaneOpTest<int8_t>(
|
||||
execution_mode, lower_simd, kExprS8x16Shuffle,
|
||||
{{1, 3, 5, 7, 9, 11, 13, 15, 1, 3, 5, 7, 9, 11, 13, 15}});
|
||||
}
|
||||
|
||||
WASM_SIMD_COMPILED_AND_LOWERED_TEST(S8x16TransposeLeft) {
|
||||
RunBinaryLaneOpTest<int8_t>(
|
||||
RunShuffleOpTest<int8_t>(
|
||||
execution_mode, lower_simd, kExprS8x16Shuffle,
|
||||
{{0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30}});
|
||||
RunBinaryLaneOpTest<int8_t>(
|
||||
execution_mode, lower_simd, kExprS8x16Shuffle,
|
||||
{{0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14}});
|
||||
}
|
||||
|
||||
WASM_SIMD_COMPILED_AND_LOWERED_TEST(S8x16TransposeRight) {
|
||||
RunBinaryLaneOpTest<int8_t>(
|
||||
RunShuffleOpTest<int8_t>(
|
||||
execution_mode, lower_simd, kExprS8x16Shuffle,
|
||||
{{1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31}});
|
||||
RunBinaryLaneOpTest<int8_t>(
|
||||
execution_mode, lower_simd, kExprS8x16Shuffle,
|
||||
{{1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15}});
|
||||
}
|
||||
|
||||
WASM_SIMD_COMPILED_AND_LOWERED_TEST(S8x8Reverse) {
|
||||
RunBinaryLaneOpTest<int8_t>(
|
||||
RunShuffleOpTest<int8_t>(
|
||||
execution_mode, lower_simd, kExprS8x16Shuffle,
|
||||
{{7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8}});
|
||||
}
|
||||
|
||||
WASM_SIMD_COMPILED_AND_LOWERED_TEST(S8x4Reverse) {
|
||||
RunBinaryLaneOpTest<int8_t>(
|
||||
RunShuffleOpTest<int8_t>(
|
||||
execution_mode, lower_simd, kExprS8x16Shuffle,
|
||||
{{3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12}});
|
||||
}
|
||||
|
||||
WASM_SIMD_COMPILED_AND_LOWERED_TEST(S8x2Reverse) {
|
||||
RunBinaryLaneOpTest<int8_t>(
|
||||
RunShuffleOpTest<int8_t>(
|
||||
execution_mode, lower_simd, kExprS8x16Shuffle,
|
||||
{{1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14}});
|
||||
}
|
||||
|
||||
WASM_SIMD_COMPILED_AND_LOWERED_TEST(S8x16Irregular) {
|
||||
RunBinaryLaneOpTest<int8_t>(
|
||||
RunShuffleOpTest<int8_t>(
|
||||
execution_mode, lower_simd, kExprS8x16Shuffle,
|
||||
{{0, 16, 0, 16, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}});
|
||||
RunBinaryLaneOpTest<int8_t>(
|
||||
execution_mode, lower_simd, kExprS8x16Shuffle,
|
||||
{{0, 0, 0, 0, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7}});
|
||||
}
|
||||
|
||||
// Test shuffles that blend the two vectors (elements remain in their lanes.)
|
||||
@ -1943,8 +1894,7 @@ WASM_SIMD_COMPILED_AND_LOWERED_TEST(S8x16Blend) {
|
||||
for (int bias = 1; bias < kLanes; bias++) {
|
||||
for (int i = 0; i < bias; i++) expected[i] = i;
|
||||
for (int i = bias; i < kLanes; i++) expected[i] = i + kLanes;
|
||||
RunBinaryLaneOpTest(execution_mode, lower_simd, kExprS8x16Shuffle,
|
||||
expected);
|
||||
RunShuffleOpTest(execution_mode, lower_simd, kExprS8x16Shuffle, expected);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1952,18 +1902,18 @@ WASM_SIMD_COMPILED_AND_LOWERED_TEST(S8x16Blend) {
|
||||
WASM_SIMD_COMPILED_AND_LOWERED_TEST(S8x16Concat) {
|
||||
static const int kLanes = 16;
|
||||
std::array<uint8_t, kLanes> expected;
|
||||
for (int bias = 1; bias < kLanes; bias++) {
|
||||
// n is offset or bias of concatenation.
|
||||
for (int n = 1; n < kLanes; ++n) {
|
||||
int i = 0;
|
||||
// last kLanes - bias bytes of first vector.
|
||||
for (int j = bias; j < kLanes; j++) {
|
||||
// last kLanes - n bytes of first vector.
|
||||
for (int j = n; j < kLanes; ++j) {
|
||||
expected[i++] = j;
|
||||
}
|
||||
// first bias lanes of second vector
|
||||
for (int j = 0; j < bias; j++) {
|
||||
// first n bytes of second vector
|
||||
for (int j = 0; j < n; ++j) {
|
||||
expected[i++] = j + kLanes;
|
||||
}
|
||||
RunBinaryLaneOpTest(execution_mode, lower_simd, kExprS8x16Shuffle,
|
||||
expected);
|
||||
RunShuffleOpTest(execution_mode, lower_simd, kExprS8x16Shuffle, expected);
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user