i16x2 ops

SE(val) -> S(dst_type, val) to make this work.

Change-Id: Icf42f706b2e7761db8ce83f1e1ef95c288bfecf4
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/274120
Commit-Queue: Mike Klein <mtklein@google.com>
Reviewed-by: Herb Derby <herb@google.com>
This commit is contained in:
Mike Klein 2020-02-28 13:20:06 -06:00 committed by Skia Commit-Bot
parent 07248d9143
commit e96207a9e3
2 changed files with 58 additions and 23 deletions

View File

@ -1942,20 +1942,24 @@ namespace skvm {
auto emit = [&](size_t i, bool scalar, IRBuilder* b) {
auto [op, x,y,z, immy,immz, death,can_hoist,used_in_loop] = instructions[i];
llvm::Type *i1 = llvm::Type::getInt1Ty (ctx),
*i8 = llvm::Type::getInt8Ty (ctx),
*i16 = llvm::Type::getInt16Ty(ctx),
*i32 = llvm::Type::getInt32Ty(ctx),
*f32 = llvm::Type::getFloatTy(ctx),
*I1 = scalar ? i1 : llvm::VectorType::get(i1 , K),
*I8 = scalar ? i8 : llvm::VectorType::get(i8 , K),
*I16 = scalar ? i16 : llvm::VectorType::get(i16, K),
*I32 = scalar ? i32 : llvm::VectorType::get(i32, K),
*F32 = scalar ? f32 : llvm::VectorType::get(f32, K);
llvm::Type *i1 = llvm::Type::getInt1Ty (ctx),
*i8 = llvm::Type::getInt8Ty (ctx),
*i16 = llvm::Type::getInt16Ty(ctx),
*i32 = llvm::Type::getInt32Ty(ctx),
*i16x2 = llvm::VectorType::get(i16, 2),
*f32 = llvm::Type::getFloatTy(ctx),
*I1 = scalar ? i1 : llvm::VectorType::get(i1 , K ),
*I8 = scalar ? i8 : llvm::VectorType::get(i8 , K ),
*I16 = scalar ? i16 : llvm::VectorType::get(i16, K ),
*I16x2 = scalar ? i16x2 : llvm::VectorType::get(i16, K*2),
*I32 = scalar ? i32 : llvm::VectorType::get(i32, K ),
*F32 = scalar ? f32 : llvm::VectorType::get(f32, K );
auto I = [&](llvm::Value* v) { return b->CreateBitCast(v, I32); };
auto F = [&](llvm::Value* v) { return b->CreateBitCast(v, F32); };
auto SE = [&](llvm::Value* v) { return b->CreateSExt (v, I32); };
auto I = [&](llvm::Value* v) { return b->CreateBitCast(v, I32 ); };
auto F = [&](llvm::Value* v) { return b->CreateBitCast(v, F32 ); };
auto x2 = [&](llvm::Value* v) { return b->CreateBitCast(v, I16x2); };
auto S = [&](llvm::Type* dst, llvm::Value* v) { return b->CreateSExt(v, dst); };
switch (llvm::Type* t = nullptr; op) {
default:
@ -2034,20 +2038,20 @@ namespace skvm {
case Op::sra_i32: vals[i] = b->CreateAShr(vals[x], immy); break;
case Op::shr_i32: vals[i] = b->CreateLShr(vals[x], immy); break;
case Op::eq_i32: vals[i] = SE(b->CreateICmpEQ (vals[x], vals[y])); break;
case Op::neq_i32: vals[i] = SE(b->CreateICmpNE (vals[x], vals[y])); break;
case Op::gt_i32: vals[i] = SE(b->CreateICmpSGT(vals[x], vals[y])); break;
case Op::gte_i32: vals[i] = SE(b->CreateICmpSGE(vals[x], vals[y])); break;
case Op:: eq_i32: vals[i] = S(I32, b->CreateICmpEQ (vals[x], vals[y])); break;
case Op::neq_i32: vals[i] = S(I32, b->CreateICmpNE (vals[x], vals[y])); break;
case Op:: gt_i32: vals[i] = S(I32, b->CreateICmpSGT(vals[x], vals[y])); break;
case Op::gte_i32: vals[i] = S(I32, b->CreateICmpSGE(vals[x], vals[y])); break;
case Op::add_f32: vals[i] = I(b->CreateFAdd(F(vals[x]), F(vals[y]))); break;
case Op::sub_f32: vals[i] = I(b->CreateFSub(F(vals[x]), F(vals[y]))); break;
case Op::mul_f32: vals[i] = I(b->CreateFMul(F(vals[x]), F(vals[y]))); break;
case Op::div_f32: vals[i] = I(b->CreateFDiv(F(vals[x]), F(vals[y]))); break;
case Op::eq_f32: vals[i] = SE(b->CreateFCmpOEQ(F(vals[x]), F(vals[y]))); break;
case Op::neq_f32: vals[i] = SE(b->CreateFCmpUNE(F(vals[x]), F(vals[y]))); break;
case Op::gt_f32: vals[i] = SE(b->CreateFCmpOGT(F(vals[x]), F(vals[y]))); break;
case Op::gte_f32: vals[i] = SE(b->CreateFCmpOGE(F(vals[x]), F(vals[y]))); break;
case Op:: eq_f32: vals[i] = S(I32, b->CreateFCmpOEQ(F(vals[x]), F(vals[y]))); break;
case Op::neq_f32: vals[i] = S(I32, b->CreateFCmpUNE(F(vals[x]), F(vals[y]))); break;
case Op:: gt_f32: vals[i] = S(I32, b->CreateFCmpOGT(F(vals[x]), F(vals[y]))); break;
case Op::gte_f32: vals[i] = S(I32, b->CreateFCmpOGE(F(vals[x]), F(vals[y]))); break;
case Op::mad_f32:
vals[i] = I(b->CreateIntrinsic(llvm::Intrinsic::fmuladd, {F32},
@ -2066,6 +2070,27 @@ namespace skvm {
vals[i] = b->CreateFPToSI(b->CreateFAdd(F(vals[x]),
llvm::ConstantFP::get(F32, 0.5)), I32);
break;
case Op::add_i16x2: vals[i] = I(b->CreateAdd(x2(vals[x]), x2(vals[y]))); break;
case Op::sub_i16x2: vals[i] = I(b->CreateSub(x2(vals[x]), x2(vals[y]))); break;
case Op::mul_i16x2: vals[i] = I(b->CreateMul(x2(vals[x]), x2(vals[y]))); break;
case Op::shl_i16x2: vals[i] = I(b->CreateShl (x2(vals[x]), immy)); break;
case Op::sra_i16x2: vals[i] = I(b->CreateAShr(x2(vals[x]), immy)); break;
case Op::shr_i16x2: vals[i] = I(b->CreateLShr(x2(vals[x]), immy)); break;
case Op:: eq_i16x2:
vals[i] = I(S(I16x2, b->CreateICmpEQ (x2(vals[x]), x2(vals[y]))));
break;
case Op::neq_i16x2:
vals[i] = I(S(I16x2, b->CreateICmpNE (x2(vals[x]), x2(vals[y]))));
break;
case Op:: gt_i16x2:
vals[i] = I(S(I16x2, b->CreateICmpSGT(x2(vals[x]), x2(vals[y]))));
break;
case Op::gte_i16x2:
vals[i] = I(S(I16x2, b->CreateICmpSGE(x2(vals[x]), x2(vals[y]))));
break;
}
return true;
};

View File

@ -604,7 +604,12 @@ DEF_TEST(SkVM_i16x2, r) {
b.store32(buf, u);
}
test_interpreter_only(r, b.done(), [&](const skvm::Program& program) {
#if defined(SKVM_LLVM)
test_jit_and_interpreter
#else
test_interpreter_only
#endif
(r, b.done(), [&](const skvm::Program& program) {
uint16_t buf[] = { 0,1,2,3,4,5,6,7,8,9,10,11,12,13 };
program.eval(SK_ARRAY_COUNT(buf)/2, buf);
@ -637,7 +642,12 @@ DEF_TEST(SkVM_cmp_i16, r) {
b.store32(buf, m);
}
test_interpreter_only(r, b.done(), [&](const skvm::Program& program) {
#if defined(SKVM_LLVM)
test_jit_and_interpreter
#else
test_interpreter_only
#endif
(r, b.done(), [&](const skvm::Program& program) {
int16_t buf[] = { 0,1, 2,3, 4,5, 6,7, 8,9 };
program.eval(SK_ARRAY_COUNT(buf)/2, buf);