use u32 in mul_unorm8
This lets the compiler know the /256 can be >>8 like we'd want it to be. At head the compiler does a signed divide using two shifts and an add. Before: 49b: 49 c1 e1 07 shl $0x7,%r9 49f: 48 c1 e0 07 shl $0x7,%rax 4a3: c5 fd 6f 04 01 vmovdqa (%rcx,%rax,1),%ymm0 4a8: c4 a2 7d 40 04 09 vpmulld (%rcx,%r9,1),%ymm0,%ymm0 4ae: c5 fd 6f 4c 01 20 vmovdqa 0x20(%rcx,%rax,1),%ymm1 4b4: c4 a2 75 40 4c 09 20 vpmulld 0x20(%rcx,%r9,1),%ymm1,%ymm1 4bb: c5 fd 6f 54 01 40 vmovdqa 0x40(%rcx,%rax,1),%ymm2 4c1: c4 a2 6d 40 54 09 40 vpmulld 0x40(%rcx,%r9,1),%ymm2,%ymm2 4c8: c5 fd 6f 5c 01 60 vmovdqa 0x60(%rcx,%rax,1),%ymm3 4ce: c4 a2 65 40 5c 09 60 vpmulld 0x60(%rcx,%r9,1),%ymm3,%ymm3 4d5: c4 e2 7d 58 25 00 00 00 00 vpbroadcastd 0x0(%rip),%ymm4 # 255 4de: c5 e5 fe dc vpaddd %ymm4,%ymm3,%ymm3 4e2: c5 ed fe d4 vpaddd %ymm4,%ymm2,%ymm2 4e6: c5 f5 fe cc vpaddd %ymm4,%ymm1,%ymm1 4ea: c5 fd fe c4 vpaddd %ymm4,%ymm0,%ymm0 4ee: c5 dd 72 e0 1f vpsrad $0x1f,%ymm0,%ymm4 4f3: c5 dd 72 d4 18 vpsrld $0x18,%ymm4,%ymm4 4f8: c5 fd fe c4 vpaddd %ymm4,%ymm0,%ymm0 4fc: c5 fd 72 e0 08 vpsrad $0x8,%ymm0,%ymm0 501: c5 dd 72 e1 1f vpsrad $0x1f,%ymm1,%ymm4 506: c5 dd 72 d4 18 vpsrld $0x18,%ymm4,%ymm4 50b: c5 f5 fe cc vpaddd %ymm4,%ymm1,%ymm1 50f: c5 f5 72 e1 08 vpsrad $0x8,%ymm1,%ymm1 514: c5 dd 72 e2 1f vpsrad $0x1f,%ymm2,%ymm4 519: c5 dd 72 d4 18 vpsrld $0x18,%ymm4,%ymm4 51e: c5 ed fe d4 vpaddd %ymm4,%ymm2,%ymm2 522: c5 ed 72 e2 08 vpsrad $0x8,%ymm2,%ymm2 527: c5 dd 72 e3 1f vpsrad $0x1f,%ymm3,%ymm4 52c: c5 dd 72 d4 18 vpsrld $0x18,%ymm4,%ymm4 531: c5 e5 fe dc vpaddd %ymm4,%ymm3,%ymm3 535: c5 e5 72 e3 08 vpsrad $0x8,%ymm3,%ymm3 53a: e9 71 02 00 00 jmpq 7b0 After: 49b: 49 c1 e1 07 shl $0x7,%r9 49f: 48 c1 e0 07 shl $0x7,%rax 4a3: c5 fd 6f 04 01 vmovdqa (%rcx,%rax,1),%ymm0 4a8: c4 a2 7d 40 04 09 vpmulld (%rcx,%r9,1),%ymm0,%ymm0 4ae: c5 fd 6f 4c 01 20 vmovdqa 0x20(%rcx,%rax,1),%ymm1 4b4: c4 a2 75 40 4c 09 20 vpmulld 0x20(%rcx,%r9,1),%ymm1,%ymm1 4bb: c5 fd 6f 54 01 40 vmovdqa 0x40(%rcx,%rax,1),%ymm2 4c1: c4 a2 6d 40 54 09 40 vpmulld 0x40(%rcx,%r9,1),%ymm2,%ymm2 4c8: c5 fd 6f 5c 01 60 vmovdqa 0x60(%rcx,%rax,1),%ymm3 4ce: c4 a2 65 40 5c 09 60 vpmulld 0x60(%rcx,%r9,1),%ymm3,%ymm3 4d5: c4 e2 7d 58 25 00 00 00 00 vpbroadcastd 0x0(%rip),%ymm4 # 255 4de: c5 e5 fe dc vpaddd %ymm4,%ymm3,%ymm3 4e2: c5 ed fe d4 vpaddd %ymm4,%ymm2,%ymm2 4e6: c5 f5 fe cc vpaddd %ymm4,%ymm1,%ymm1 4ea: c5 fd fe c4 vpaddd %ymm4,%ymm0,%ymm0 4ee: c5 fd 72 d0 08 vpsrld $0x8,%ymm0,%ymm0 4f3: c5 f5 72 d1 08 vpsrld $0x8,%ymm1,%ymm1 4f8: c5 ed 72 d2 08 vpsrld $0x8,%ymm2,%ymm2 4fd: c5 e5 72 d3 08 vpsrld $0x8,%ymm3,%ymm3 502: e9 79 02 00 00 jmpq 780 Change-Id: If38f59aeb3c37aba365e647cda1a753819a16aa8 Reviewed-on: https://skia-review.googlesource.com/c/skia/+/218613 Auto-Submit: Mike Klein <mtklein@google.com> Commit-Queue: Mike Klein <mtklein@google.com> Commit-Queue: Brian Osman <brianosman@google.com> Reviewed-by: Brian Osman <brianosman@google.com>
This commit is contained in:
parent
f1df3979c6
commit
e4227613e7
@ -132,7 +132,7 @@ namespace SK_OPTS_NS {
|
||||
CASE(Op::sra): r(d).i32 = r(x).i32 >> y.imm; break;
|
||||
CASE(Op::shr): r(d).u32 = r(x).u32 >> y.imm; break;
|
||||
|
||||
CASE(Op::mul_unorm8): r(d).i32 = (r(x).i32 * r(y.id).i32 + 255) / 256; break;
|
||||
CASE(Op::mul_unorm8): r(d).u32 = (r(x).u32 * r(y.id).u32 + 255) / 256; break;
|
||||
|
||||
CASE(Op::extract): r(d).u32 = (r(x).u32 & y.imm) >> z.imm; break;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user