use u32 in mul_unorm8

This lets the compiler know the /256 can be >>8 like we'd want it to be.
At head the compiler does a signed divide using two shifts and an add.

Before:
 49b:   49 c1 e1 07                     shl    $0x7,%r9
 49f:   48 c1 e0 07                     shl    $0x7,%rax
 4a3:   c5 fd 6f 04 01                  vmovdqa (%rcx,%rax,1),%ymm0
 4a8:   c4 a2 7d 40 04 09               vpmulld (%rcx,%r9,1),%ymm0,%ymm0
 4ae:   c5 fd 6f 4c 01 20               vmovdqa 0x20(%rcx,%rax,1),%ymm1
 4b4:   c4 a2 75 40 4c 09 20            vpmulld 0x20(%rcx,%r9,1),%ymm1,%ymm1
 4bb:   c5 fd 6f 54 01 40               vmovdqa 0x40(%rcx,%rax,1),%ymm2
 4c1:   c4 a2 6d 40 54 09 40            vpmulld 0x40(%rcx,%r9,1),%ymm2,%ymm2
 4c8:   c5 fd 6f 5c 01 60               vmovdqa 0x60(%rcx,%rax,1),%ymm3
 4ce:   c4 a2 65 40 5c 09 60            vpmulld 0x60(%rcx,%r9,1),%ymm3,%ymm3
 4d5:   c4 e2 7d 58 25 00 00 00 00      vpbroadcastd 0x0(%rip),%ymm4        # 255
 4de:   c5 e5 fe dc                     vpaddd %ymm4,%ymm3,%ymm3
 4e2:   c5 ed fe d4                     vpaddd %ymm4,%ymm2,%ymm2
 4e6:   c5 f5 fe cc                     vpaddd %ymm4,%ymm1,%ymm1
 4ea:   c5 fd fe c4                     vpaddd %ymm4,%ymm0,%ymm0
 4ee:   c5 dd 72 e0 1f                  vpsrad $0x1f,%ymm0,%ymm4
 4f3:   c5 dd 72 d4 18                  vpsrld $0x18,%ymm4,%ymm4
 4f8:   c5 fd fe c4                     vpaddd %ymm4,%ymm0,%ymm0
 4fc:   c5 fd 72 e0 08                  vpsrad $0x8,%ymm0,%ymm0
 501:   c5 dd 72 e1 1f                  vpsrad $0x1f,%ymm1,%ymm4
 506:   c5 dd 72 d4 18                  vpsrld $0x18,%ymm4,%ymm4
 50b:   c5 f5 fe cc                     vpaddd %ymm4,%ymm1,%ymm1
 50f:   c5 f5 72 e1 08                  vpsrad $0x8,%ymm1,%ymm1
 514:   c5 dd 72 e2 1f                  vpsrad $0x1f,%ymm2,%ymm4
 519:   c5 dd 72 d4 18                  vpsrld $0x18,%ymm4,%ymm4
 51e:   c5 ed fe d4                     vpaddd %ymm4,%ymm2,%ymm2
 522:   c5 ed 72 e2 08                  vpsrad $0x8,%ymm2,%ymm2
 527:   c5 dd 72 e3 1f                  vpsrad $0x1f,%ymm3,%ymm4
 52c:   c5 dd 72 d4 18                  vpsrld $0x18,%ymm4,%ymm4
 531:   c5 e5 fe dc                     vpaddd %ymm4,%ymm3,%ymm3
 535:   c5 e5 72 e3 08                  vpsrad $0x8,%ymm3,%ymm3
 53a:   e9 71 02 00 00                  jmpq   7b0

After:
 49b:   49 c1 e1 07                     shl    $0x7,%r9
 49f:   48 c1 e0 07                     shl    $0x7,%rax
 4a3:   c5 fd 6f 04 01                  vmovdqa (%rcx,%rax,1),%ymm0
 4a8:   c4 a2 7d 40 04 09               vpmulld (%rcx,%r9,1),%ymm0,%ymm0
 4ae:   c5 fd 6f 4c 01 20               vmovdqa 0x20(%rcx,%rax,1),%ymm1
 4b4:   c4 a2 75 40 4c 09 20            vpmulld 0x20(%rcx,%r9,1),%ymm1,%ymm1
 4bb:   c5 fd 6f 54 01 40               vmovdqa 0x40(%rcx,%rax,1),%ymm2
 4c1:   c4 a2 6d 40 54 09 40            vpmulld 0x40(%rcx,%r9,1),%ymm2,%ymm2
 4c8:   c5 fd 6f 5c 01 60               vmovdqa 0x60(%rcx,%rax,1),%ymm3
 4ce:   c4 a2 65 40 5c 09 60            vpmulld 0x60(%rcx,%r9,1),%ymm3,%ymm3
 4d5:   c4 e2 7d 58 25 00 00 00 00      vpbroadcastd 0x0(%rip),%ymm4        # 255
 4de:   c5 e5 fe dc                     vpaddd %ymm4,%ymm3,%ymm3
 4e2:   c5 ed fe d4                     vpaddd %ymm4,%ymm2,%ymm2
 4e6:   c5 f5 fe cc                     vpaddd %ymm4,%ymm1,%ymm1
 4ea:   c5 fd fe c4                     vpaddd %ymm4,%ymm0,%ymm0
 4ee:   c5 fd 72 d0 08                  vpsrld $0x8,%ymm0,%ymm0
 4f3:   c5 f5 72 d1 08                  vpsrld $0x8,%ymm1,%ymm1
 4f8:   c5 ed 72 d2 08                  vpsrld $0x8,%ymm2,%ymm2
 4fd:   c5 e5 72 d3 08                  vpsrld $0x8,%ymm3,%ymm3
 502:   e9 79 02 00 00                  jmpq   780

Change-Id: If38f59aeb3c37aba365e647cda1a753819a16aa8
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/218613
Auto-Submit: Mike Klein <mtklein@google.com>
Commit-Queue: Mike Klein <mtklein@google.com>
Commit-Queue: Brian Osman <brianosman@google.com>
Reviewed-by: Brian Osman <brianosman@google.com>
This commit is contained in:
Mike Klein 2019-06-05 13:50:33 -05:00 committed by Skia Commit-Bot
parent f1df3979c6
commit e4227613e7

View File

@ -132,7 +132,7 @@ namespace SK_OPTS_NS {
CASE(Op::sra): r(d).i32 = r(x).i32 >> y.imm; break;
CASE(Op::shr): r(d).u32 = r(x).u32 >> y.imm; break;
CASE(Op::mul_unorm8): r(d).i32 = (r(x).i32 * r(y.id).i32 + 255) / 256; break;
CASE(Op::mul_unorm8): r(d).u32 = (r(x).u32 * r(y.id).u32 + 255) / 256; break;
CASE(Op::extract): r(d).u32 = (r(x).u32 & y.imm) >> z.imm; break;