From 12818515b5a36b792a72e528fe6475b6bfaa5b5c Mon Sep 17 00:00:00 2001 From: Mike Klein Date: Mon, 3 Jun 2019 16:32:49 -0500 Subject: [PATCH] add a x*y+0 -> x*y peephole This is mostly to test how easy rebaselining SkVMTest is. Change-Id: I27ab6f6bb8b7e126327374009783afd86d416f55 Reviewed-on: https://skia-review.googlesource.com/c/skia/+/218039 Reviewed-by: Brian Osman --- resources/SkVMTest.expected | 116 ++++++++++++++++++------------------ src/core/SkVM.cpp | 11 ++++ 2 files changed, 68 insertions(+), 59 deletions(-) diff --git a/resources/SkVMTest.expected b/resources/SkVMTest.expected index 901cd10a48..b40149b48c 100644 --- a/resources/SkVMTest.expected +++ b/resources/SkVMTest.expected @@ -16,74 +16,72 @@ r0 = to_i32 r0 store8 arg(1) r0 A8 over G8 -r0 = splat 0 (0) -r1 = load8 arg(0) -r2 = splat 3B808081 (0.0039215689) -r1 = to_f32 r1 +r0 = load8 arg(0) +r1 = splat 3B808081 (0.0039215689) +r0 = to_f32 r0 +r0 = mul_f32 r1 r0 +r2 = load8 arg(1) +r2 = to_f32 r2 +r2 = mul_f32 r1 r2 +r1 = splat 3F800000 (1) +r1 = sub_f32 r1 r0 r1 = mul_f32 r2 r1 -r3 = load8 arg(1) -r3 = to_f32 r3 -r3 = mul_f32 r2 r3 -r2 = splat 3F800000 (1) -r2 = sub_f32 r2 r1 -r2 = mad_f32 r3 r2 r0 -r3 = splat 3E59B3D0 (0.21259999) +r2 = splat 3E59B3D0 (0.21259999) r0 = splat 3F371759 (0.71520001) -r1 = splat 3D93DD98 (0.0722) -r1 = mul_f32 r2 r1 -r1 = mad_f32 r2 r0 r1 -r1 = mad_f32 r2 r3 r1 -r3 = splat 437F0000 (255) -r2 = splat 3F000000 (0.5) -r2 = mad_f32 r1 r3 r2 -r2 = to_i32 r2 -store8 arg(1) r2 +r3 = splat 3D93DD98 (0.0722) +r3 = mul_f32 r1 r3 +r3 = mad_f32 r1 r0 r3 +r3 = mad_f32 r1 r2 r3 +r2 = splat 437F0000 (255) +r1 = splat 3F000000 (0.5) +r1 = mad_f32 r3 r2 r1 +r1 = to_i32 r1 +store8 arg(1) r1 A8 over RGBA_8888 -r0 = splat 0 (0) -r1 = load8 arg(0) -r2 = splat 3B808081 (0.0039215689) -r1 = to_f32 r1 -r1 = mul_f32 r2 r1 -r3 = load32 arg(1) -r4 = splat FF (3.5733111e-43) -r5 = bit_and r3 r4 +r0 = load8 arg(0) +r1 = splat 3B808081 (0.0039215689) +r0 = to_f32 r0 +r0 = mul_f32 r1 r0 +r2 = load32 arg(1) +r3 = splat FF (3.5733111e-43) +r4 = bit_and r2 r3 +r4 = to_f32 r4 +r4 = mul_f32 r1 r4 +r5 = shr r2 8 (1.1210388e-44) +r5 = bit_and r5 r3 r5 = to_f32 r5 -r5 = mul_f32 r2 r5 -r6 = shr r3 8 (1.1210388e-44) -r6 = bit_and r6 r4 +r5 = mul_f32 r1 r5 +r6 = shr r2 10 (2.2420775e-44) +r6 = bit_and r6 r3 r6 = to_f32 r6 -r6 = mul_f32 r2 r6 -r7 = shr r3 10 (2.2420775e-44) -r7 = bit_and r7 r4 -r7 = to_f32 r7 -r7 = mul_f32 r2 r7 -r3 = shr r3 18 (3.3631163e-44) -r3 = to_f32 r3 -r3 = mul_f32 r2 r3 -r2 = splat 3F800000 (1) -r2 = sub_f32 r2 r1 +r6 = mul_f32 r1 r6 +r2 = shr r2 18 (3.3631163e-44) +r2 = to_f32 r2 +r2 = mul_f32 r1 r2 +r1 = splat 3F800000 (1) +r1 = sub_f32 r1 r0 +r4 = mul_f32 r4 r1 +r5 = mul_f32 r5 r1 +r6 = mul_f32 r6 r1 +r1 = mad_f32 r2 r1 r0 +r2 = splat 437F0000 (255) +r0 = splat 3F000000 (0.5) +r4 = mad_f32 r4 r2 r0 +r4 = to_i32 r4 r5 = mad_f32 r5 r2 r0 -r6 = mad_f32 r6 r2 r0 -r7 = mad_f32 r7 r2 r0 -r2 = mad_f32 r3 r2 r1 -r3 = splat 437F0000 (255) -r1 = splat 3F000000 (0.5) -r5 = mad_f32 r5 r3 r1 r5 = to_i32 r5 -r6 = mad_f32 r6 r3 r1 +r5 = shl r5 8 (1.1210388e-44) +r6 = mad_f32 r6 r2 r0 r6 = to_i32 r6 -r6 = shl r6 8 (1.1210388e-44) -r7 = mad_f32 r7 r3 r1 -r7 = to_i32 r7 -r7 = shl r7 10 (2.2420775e-44) -r1 = mad_f32 r2 r3 r1 -r1 = to_i32 r1 -r1 = shl r1 18 (3.3631163e-44) -r6 = bit_or r5 r6 -r6 = bit_or r6 r7 -r6 = bit_or r6 r1 -store32 arg(1) r6 +r6 = shl r6 10 (2.2420775e-44) +r0 = mad_f32 r1 r2 r0 +r0 = to_i32 r0 +r0 = shl r0 18 (3.3631163e-44) +r5 = bit_or r4 r5 +r5 = bit_or r5 r6 +r5 = bit_or r5 r0 +store32 arg(1) r5 G8 over A8 r0 = splat 3B808081 (0.0039215689) diff --git a/src/core/SkVM.cpp b/src/core/SkVM.cpp index 545cd451a0..fcf12466b6 100644 --- a/src/core/SkVM.cpp +++ b/src/core/SkVM.cpp @@ -114,6 +114,17 @@ namespace skvm { ID Builder::push(Op op, ID x=NA, ID y=NA, ID z=NA, int imm=0) { Instruction inst{op, /*life=*/NA, x, y, z, imm}; + // Simple peepholes that come up fairly often. + + auto is_zero = [&](ID id) { + return fProgram[id].op == Op::splat + && fProgram[id].imm == 0; + }; + + // x*y+0 --> x*y + if (op == Op::mad_f32 && is_zero(z)) { inst = { Op::mul_f32, NA, x,y,NA, 0 }; } + + // Basic common subexpression elimination: // if we've already seen this exact Instruction, use it instead of creating a new one. auto lookup = fIndex.find(inst);