implement some useful 16-bit instructions
Add a slew of 16-bit instructions for experiments. I want to try a fixed-point path through SkVMBlitter, continuing to represent geometry with F32, but color channels in 16 bits, with several possible representations: - unorm8 lowp like SkRasterPipeline (0 -> 0.0, 0x00ff -> 1.0) - 15-bit SkFixed15 fixed-point (0 -> 0.0, 0x8000 -> 1.0) - 14-bit signed fixed-point (0 -> 0.0, ±0x4000 -> ±1.0) I'm leaning towards the 14-bit version for being able to hold a good range of temporary values in [-2,2), or perhaps even a 13-bit analog for even a little more safety range. Mostly something new to try. Most of these instructions are pretty obvious, with notes on a few: vpavgw is an unsigned (x+y+1)>>1, and is useful for converting unorm8 up to Q14. There are a couple ways to do this pretty well, and using vpavgw is the best, and uses the fewest instructions: A) (x << 6) + ( x >> 2) + (x == 255) // Ok approx. B) (x << 6) + ((x+1) >> 2) // Better approx. C) vpavgw(x << 7, x >> 1) // Perfect math! The best good reverse math I've found is (x >> 6) - (x > 16319). vpmulhrsw is the key to the whole thing as usual, letting us do 16x16->16-bit multiplies. An SkFixed15 multiply is vpmulhrsw followed by vpabsw (also added here), and a Q14 multiply is vpmulhrsw followed by a simple <<1. I've added both signed and unsigned min and max. Not entirely sure they'll all be used, but I do have my eye on vpminuw as a single-instruction clamp to [0,0x4000] ~~> [0.0,1.0], treating any negative Q14 as very large unsigned. Change-Id: I0db7f3f943ef6c9a600821444cc5b003fe5f675d Reviewed-on: https://skia-review.googlesource.com/c/skia/+/317119 Commit-Queue: Herb Derby <herb@google.com> Auto-Submit: Mike Klein <mtklein@google.com> Reviewed-by: Herb Derby <herb@google.com>
This commit is contained in:
parent
272e8bcd24
commit
84dd8f9912
@ -2016,8 +2016,18 @@ namespace skvm {
|
||||
void Assembler::vpsubd (Ymm dst, Ymm x, Operand y) { this->op(0x66, 0x0f,0xfa, dst,x,y); }
|
||||
void Assembler::vpmulld(Ymm dst, Ymm x, Operand y) { this->op(0x66,0x380f,0x40, dst,x,y); }
|
||||
|
||||
void Assembler::vpsubw (Ymm dst, Ymm x, Operand y) { this->op(0x66,0x0f,0xf9, dst,x,y); }
|
||||
void Assembler::vpmullw(Ymm dst, Ymm x, Operand y) { this->op(0x66,0x0f,0xd5, dst,x,y); }
|
||||
void Assembler::vpaddw (Ymm dst, Ymm x, Operand y) { this->op(0x66, 0x0f,0xfd, dst,x,y); }
|
||||
void Assembler::vpsubw (Ymm dst, Ymm x, Operand y) { this->op(0x66, 0x0f,0xf9, dst,x,y); }
|
||||
void Assembler::vpmullw (Ymm dst, Ymm x, Operand y) { this->op(0x66, 0x0f,0xd5, dst,x,y); }
|
||||
void Assembler::vpavgw (Ymm dst, Ymm x, Operand y) { this->op(0x66, 0x0f,0xe3, dst,x,y); }
|
||||
void Assembler::vpmulhrsw(Ymm dst, Ymm x, Operand y) { this->op(0x66,0x380f,0x0b, dst,x,y); }
|
||||
void Assembler::vpminsw (Ymm dst, Ymm x, Operand y) { this->op(0x66, 0x0f,0xea, dst,x,y); }
|
||||
void Assembler::vpmaxsw (Ymm dst, Ymm x, Operand y) { this->op(0x66, 0x0f,0xee, dst,x,y); }
|
||||
void Assembler::vpminuw (Ymm dst, Ymm x, Operand y) { this->op(0x66,0x380f,0x3a, dst,x,y); }
|
||||
void Assembler::vpmaxuw (Ymm dst, Ymm x, Operand y) { this->op(0x66,0x380f,0x3e, dst,x,y); }
|
||||
|
||||
void Assembler::vpabsw(Ymm dst, Operand x) { this->op(0x66,0x380f,0x1d, dst,x); }
|
||||
|
||||
|
||||
void Assembler::vpand (Ymm dst, Ymm x, Operand y) { this->op(0x66,0x0f,0xdb, dst,x,y); }
|
||||
void Assembler::vpor (Ymm dst, Ymm x, Operand y) { this->op(0x66,0x0f,0xeb, dst,x,y); }
|
||||
@ -2050,7 +2060,9 @@ namespace skvm {
|
||||
void Assembler::vpunpckhdq(Ymm dst, Ymm x, Operand y) { this->op(0x66,0x0f,0x6a, dst,x,y); }
|
||||
|
||||
void Assembler::vpcmpeqd(Ymm dst, Ymm x, Operand y) { this->op(0x66,0x0f,0x76, dst,x,y); }
|
||||
void Assembler::vpcmpeqw(Ymm dst, Ymm x, Operand y) { this->op(0x66,0x0f,0x75, dst,x,y); }
|
||||
void Assembler::vpcmpgtd(Ymm dst, Ymm x, Operand y) { this->op(0x66,0x0f,0x66, dst,x,y); }
|
||||
void Assembler::vpcmpgtw(Ymm dst, Ymm x, Operand y) { this->op(0x66,0x0f,0x65, dst,x,y); }
|
||||
|
||||
|
||||
void Assembler::imm_byte_after_operand(const Operand& operand, int imm) {
|
||||
@ -2089,10 +2101,18 @@ namespace skvm {
|
||||
this->op(0x66,0x0f,0x72,(Ymm)4, dst,x);
|
||||
this->byte(imm);
|
||||
}
|
||||
void Assembler::vpsllw(Ymm dst, Ymm x, int imm) {
|
||||
this->op(0x66,0x0f,0x71,(Ymm)6, dst,x);
|
||||
this->byte(imm);
|
||||
}
|
||||
void Assembler::vpsrlw(Ymm dst, Ymm x, int imm) {
|
||||
this->op(0x66,0x0f,0x71,(Ymm)2, dst,x);
|
||||
this->byte(imm);
|
||||
}
|
||||
void Assembler::vpsraw(Ymm dst, Ymm x, int imm) {
|
||||
this->op(0x66,0x0f,0x71,(Ymm)4, dst,x);
|
||||
this->byte(imm);
|
||||
}
|
||||
|
||||
void Assembler::vpermq(Ymm dst, Operand x, int imm) {
|
||||
// A bit unusual among the instructions we use, this is 64-bit operation, so we set W.
|
||||
|
@ -132,8 +132,17 @@ namespace skvm {
|
||||
void vpsubd (Ymm dst, Ymm x, Operand y);
|
||||
void vpmulld(Ymm dst, Ymm x, Operand y);
|
||||
|
||||
void vpsubw (Ymm dst, Ymm x, Operand y);
|
||||
void vpmullw(Ymm dst, Ymm x, Operand y);
|
||||
void vpaddw (Ymm dst, Ymm x, Operand y);
|
||||
void vpsubw (Ymm dst, Ymm x, Operand y);
|
||||
void vpmullw (Ymm dst, Ymm x, Operand y);
|
||||
|
||||
void vpabsw (Ymm dst, Operand x);
|
||||
void vpavgw (Ymm dst, Ymm x, Operand y); // dst = (x+y+1)>>1, unsigned.
|
||||
void vpmulhrsw(Ymm dst, Ymm x, Operand y); // dst = (x*y + (1<<14)) >> 15, signed.
|
||||
void vpminsw (Ymm dst, Ymm x, Operand y);
|
||||
void vpminuw (Ymm dst, Ymm x, Operand y);
|
||||
void vpmaxsw (Ymm dst, Ymm x, Operand y);
|
||||
void vpmaxuw (Ymm dst, Ymm x, Operand y);
|
||||
|
||||
void vaddps(Ymm dst, Ymm x, Operand y);
|
||||
void vsubps(Ymm dst, Ymm x, Operand y);
|
||||
@ -164,6 +173,8 @@ namespace skvm {
|
||||
|
||||
void vpcmpeqd(Ymm dst, Ymm x, Operand y);
|
||||
void vpcmpgtd(Ymm dst, Ymm x, Operand y);
|
||||
void vpcmpeqw(Ymm dst, Ymm x, Operand y);
|
||||
void vpcmpgtw(Ymm dst, Ymm x, Operand y);
|
||||
|
||||
void vcmpps (Ymm dst, Ymm x, Operand y, int imm);
|
||||
void vcmpeqps (Ymm dst, Ymm x, Operand y) { this->vcmpps(dst,x,y,0); }
|
||||
@ -175,7 +186,10 @@ namespace skvm {
|
||||
void vpslld(Ymm dst, Ymm x, int imm);
|
||||
void vpsrld(Ymm dst, Ymm x, int imm);
|
||||
void vpsrad(Ymm dst, Ymm x, int imm);
|
||||
|
||||
void vpsllw(Ymm dst, Ymm x, int imm);
|
||||
void vpsrlw(Ymm dst, Ymm x, int imm);
|
||||
void vpsraw(Ymm dst, Ymm x, int imm);
|
||||
|
||||
void vpermq (Ymm dst, Operand x, int imm);
|
||||
void vperm2f128(Ymm dst, Ymm x, Operand y, int imm);
|
||||
|
@ -1180,6 +1180,38 @@ DEF_TEST(SkVM_Assembler, r) {
|
||||
0xc5, 0xf5, 0xfa, 0xc2,
|
||||
});
|
||||
|
||||
test_asm(r, [&](A& a) {
|
||||
a.vpaddw (A::ymm4, A::ymm3, A::ymm2);
|
||||
a.vpavgw (A::ymm4, A::ymm3, A::ymm2);
|
||||
a.vpcmpeqw (A::ymm4, A::ymm3, A::ymm2);
|
||||
a.vpcmpgtw (A::ymm4, A::ymm3, A::ymm2);
|
||||
|
||||
a.vpminsw (A::ymm4, A::ymm3, A::ymm2);
|
||||
a.vpmaxsw (A::ymm4, A::ymm3, A::ymm2);
|
||||
a.vpminuw (A::ymm4, A::ymm3, A::ymm2);
|
||||
a.vpmaxuw (A::ymm4, A::ymm3, A::ymm2);
|
||||
|
||||
a.vpmulhrsw(A::ymm4, A::ymm3, A::ymm2);
|
||||
a.vpabsw (A::ymm4, A::ymm3);
|
||||
a.vpsllw (A::ymm4, A::ymm3, 12);
|
||||
a.vpsraw (A::ymm4, A::ymm3, 12);
|
||||
},{
|
||||
0xc5, 0xe5, 0xfd, 0xe2,
|
||||
0xc5, 0xe5, 0xe3, 0xe2,
|
||||
0xc5, 0xe5, 0x75, 0xe2,
|
||||
0xc5, 0xe5, 0x65, 0xe2,
|
||||
|
||||
0xc5, 0xe5, 0xea, 0xe2,
|
||||
0xc5, 0xe5, 0xee, 0xe2,
|
||||
0xc4,0xe2,0x65, 0x3a, 0xe2,
|
||||
0xc4,0xe2,0x65, 0x3e, 0xe2,
|
||||
|
||||
0xc4,0xe2,0x65, 0x0b, 0xe2,
|
||||
0xc4,0xe2,0x7d, 0x1d, 0xe3,
|
||||
0xc5,0xdd,0x71, 0xf3, 0x0c,
|
||||
0xc5,0xdd,0x71, 0xe3, 0x0c,
|
||||
});
|
||||
|
||||
test_asm(r, [&](A& a) {
|
||||
A::Label l;
|
||||
a.vcmpeqps (A::ymm0, A::ymm1, &l); // vcmpeqps 0x1c(%rip), %ymm1, %ymm0
|
||||
|
Loading…
Reference in New Issue
Block a user