Optimize commutative operations in SkVM.

We now canonicalize commutative operations by ordering their value IDs.
The lower-numbered value ID is always placed first into a commutative
instruction. In other words, this instruction:
   bit_and result, v7, v5

Would be silently converted to this:
   bit_and result, v5, v7

This will allow these two logically-equivalent instructions to be
deduplicated:
   bit_and result, v7, v5
   bit_and result, v5, v7

Of course, deduplicating these ops can unlock additional free CSE/DCE.
The affected instructions are listed in http://review.skia.org/473238

Change-Id: Ib9beb79d6b72d7903184aaa9a53e8e5a02ae126d
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/473239
Commit-Queue: John Stiles <johnstiles@google.com>
Auto-Submit: John Stiles <johnstiles@google.com>
Reviewed-by: Brian Osman <brianosman@google.com>
Reviewed-by: Herb Derby <herb@google.com>
This commit is contained in:
John Stiles 2021-11-18 15:30:09 -05:00 committed by SkCQ
parent 626dbe195a
commit c809e5ba9f
12 changed files with 1519 additions and 1552 deletions

View File

@ -765,7 +765,7 @@ namespace skvm {
return {this, this->push(Op::fma_f32, fProgram[y.id].x, fProgram[y.id].y, x.id)};
}
}
return {this, this->push(Op::add_f32, x.id, y.id)};
return {this, this->push(Op::add_f32, std::min(x.id, y.id), std::max(x.id, y.id))};
}
F32 Builder::sub(F32 x, F32 y) {
@ -786,7 +786,7 @@ namespace skvm {
if (float X,Y; this->allImm(x.id,&X, y.id,&Y)) { return splat(X*Y); }
if (this->isImm(y.id, 1.0f)) { return x; } // x*1 == x
if (this->isImm(x.id, 1.0f)) { return y; } // 1*y == y
return {this, this->push(Op::mul_f32, x.id, y.id)};
return {this, this->push(Op::mul_f32, std::min(x.id, y.id), std::max(x.id, y.id))};
}
F32 Builder::fast_mul(F32 x, F32 y) {
@ -977,7 +977,7 @@ namespace skvm {
if (int X,Y; this->allImm(x.id,&X, y.id,&Y)) { return splat(X+Y); }
if (this->isImm(x.id, 0)) { return y; }
if (this->isImm(y.id, 0)) { return x; }
return {this, this->push(Op::add_i32, x.id, y.id)};
return {this, this->push(Op::add_i32, std::min(x.id, y.id), std::max(x.id, y.id))};
}
SK_ATTRIBUTE(no_sanitize("signed-integer-overflow"))
I32 Builder::sub(I32 x, I32 y) {
@ -992,7 +992,7 @@ namespace skvm {
if (this->isImm(y.id, 0)) { return splat(0); }
if (this->isImm(x.id, 1)) { return y; }
if (this->isImm(y.id, 1)) { return x; }
return {this, this->push(Op::mul_i32, x.id, y.id)};
return {this, this->push(Op::mul_i32, std::min(x.id, y.id), std::max(x.id, y.id))};
}
SK_ATTRIBUTE(no_sanitize("shift"))
@ -1014,11 +1014,11 @@ namespace skvm {
I32 Builder:: eq(F32 x, F32 y) {
if (float X,Y; this->allImm(x.id,&X, y.id,&Y)) { return splat(X==Y ? ~0 : 0); }
return {this, this->push(Op::eq_f32, x.id, y.id)};
return {this, this->push(Op::eq_f32, std::min(x.id, y.id), std::max(x.id, y.id))};
}
I32 Builder::neq(F32 x, F32 y) {
if (float X,Y; this->allImm(x.id,&X, y.id,&Y)) { return splat(X!=Y ? ~0 : 0); }
return {this, this->push(Op::neq_f32, x.id, y.id)};
return {this, this->push(Op::neq_f32, std::min(x.id, y.id), std::max(x.id, y.id))};
}
I32 Builder::lt(F32 x, F32 y) {
if (float X,Y; this->allImm(x.id,&X, y.id,&Y)) { return splat(Y> X ? ~0 : 0); }
@ -1040,7 +1040,7 @@ namespace skvm {
I32 Builder:: eq(I32 x, I32 y) {
if (x.id == y.id) { return splat(~0); }
if (int X,Y; this->allImm(x.id,&X, y.id,&Y)) { return splat(X==Y ? ~0 : 0); }
return {this, this->push(Op:: eq_i32, x.id, y.id)};
return {this, this->push(Op:: eq_i32, std::min(x.id, y.id), std::max(x.id, y.id))};
}
I32 Builder::neq(I32 x, I32 y) {
if (int X,Y; this->allImm(x.id,&X, y.id,&Y)) { return splat(X!=Y ? ~0 : 0); }
@ -1065,7 +1065,7 @@ namespace skvm {
if (this->isImm(x.id, 0)) { return splat(0); } // (false & y) == false
if (this->isImm(y.id,~0)) { return x; } // (x & true) == x
if (this->isImm(x.id,~0)) { return y; } // (true & y) == y
return {this, this->push(Op::bit_and, x.id, y.id)};
return {this, this->push(Op::bit_and, std::min(x.id, y.id), std::max(x.id, y.id))};
}
I32 Builder::bit_or(I32 x, I32 y) {
if (x.id == y.id) { return x; }
@ -1074,14 +1074,14 @@ namespace skvm {
if (this->isImm(x.id, 0)) { return y; } // (false | y) == y
if (this->isImm(y.id,~0)) { return splat(~0); } // (x | true) == true
if (this->isImm(x.id,~0)) { return splat(~0); } // (true | y) == true
return {this, this->push(Op::bit_or, x.id, y.id)};
return {this, this->push(Op::bit_or, std::min(x.id, y.id), std::max(x.id, y.id))};
}
I32 Builder::bit_xor(I32 x, I32 y) {
if (x.id == y.id) { return splat(0); }
if (int X,Y; this->allImm(x.id,&X, y.id,&Y)) { return splat(X^Y); }
if (this->isImm(y.id, 0)) { return x; } // (x ^ false) == x
if (this->isImm(x.id, 0)) { return y; } // (false ^ y) == y
return {this, this->push(Op::bit_xor, x.id, y.id)};
return {this, this->push(Op::bit_xor, std::min(x.id, y.id), std::max(x.id, y.id))};
}
I32 Builder::bit_clear(I32 x, I32 y) {

View File

@ -72,17 +72,17 @@ F4 = float index_out_of_bounds_checked()
28 r28 = uniform32 ptr0 6C
29 r29 = uniform32 ptr0 70
30 r30 = splat 0 (0)
31 r31 = eq_f32 r1 r0
31 r31 = eq_f32 r0 r1
32 r32 = splat 3 (4.2038954e-45)
33 r33 = splat 2 (2.8025969e-45)
34 r34 = add_f32 r9 r8
34 r34 = add_f32 r8 r9
35 r35 = splat 1 (1.4012985e-45)
36 r36 = add_f32 r34 r7
37 r37 = add_f32 r36 r6
36 r36 = add_f32 r7 r34
37 r37 = add_f32 r6 r36
38 r38 = splat 3F800000 (1)
39 r39 = mul_f32 r10 r10
40 r40 = mul_f32 r39 r12
41 r41 = mul_f32 r40 r13
40 r40 = mul_f32 r12 r39
41 r41 = mul_f32 r13 r40
42 r42 = splat C0133333 (-2.3)
43 r43 = splat 3FB33334 (1.4000001)
44 r44 = sub_f32 r30 r15
@ -95,7 +95,7 @@ F4 = float index_out_of_bounds_checked()
51 r51 = splat 4181999A (16.200001)
loop:
52 r52 = index
53 r53 = eq_f32 r52 r0
53 r53 = eq_f32 r0 r52
54 r53 = bit_and r53 r31
55 trace_var r53 $0 = r2 (F32)
56 trace_var r53 $1 = r3 (F32)

View File

@ -1,4 +1,4 @@
17 registers, 56 instructions:
11 registers, 23 instructions:
0 r0 = uniform32 ptr0 4
1 r1 = uniform32 ptr0 8
2 r2 = uniform32 ptr0 C
@ -7,52 +7,19 @@
5 r5 = uniform32 ptr0 18
6 r6 = uniform32 ptr0 1C
7 r7 = uniform32 ptr0 20
8 r8 = uniform32 ptr0 24
9 r9 = uniform32 ptr0 28
10 r10 = uniform32 ptr0 2C
11 r11 = uniform32 ptr0 30
12 r12 = splat FFFFFFFF (nan)
13 r13 = bit_and r8 r9
14 r14 = bit_and r9 r8
15 r14 = eq_i32 r13 r14
16 r13 = bit_or r8 r9
17 r15 = bit_or r9 r8
18 r15 = eq_i32 r13 r15
19 r15 = bit_and r14 r15
20 r14 = bit_xor r8 r9
21 r13 = bit_xor r9 r8
22 r13 = eq_i32 r14 r13
23 r13 = bit_and r15 r13
24 r15 = eq_i32 r8 r9
25 r14 = eq_i32 r9 r8
26 r16 = eq_i32 r15 r14
27 r16 = bit_and r13 r16
28 r15 = bit_xor r12 r15
29 r14 = bit_xor r12 r14
30 r14 = eq_i32 r15 r14
31 r14 = bit_and r16 r14
32 r16 = add_i32 r8 r9
33 r15 = add_i32 r9 r8
34 r15 = eq_i32 r16 r15
35 r15 = bit_and r14 r15
36 r14 = add_f32 r10 r11
37 r16 = add_f32 r11 r10
38 r16 = eq_f32 r14 r16
39 r16 = bit_and r15 r16
40 r15 = mul_i32 r8 r9
41 r8 = mul_i32 r9 r8
42 r8 = eq_i32 r15 r8
43 r8 = bit_and r16 r8
44 r16 = mul_f32 r10 r11
45 r10 = mul_f32 r11 r10
46 r10 = eq_f32 r16 r10
47 r10 = bit_and r8 r10
48 r4 = select r10 r0 r4
49 r5 = select r10 r1 r5
50 r6 = select r10 r2 r6
51 r7 = select r10 r3 r7
8 r8 = uniform32 ptr0 2C
9 r9 = uniform32 ptr0 30
10 r10 = add_f32 r8 r9
11 r10 = eq_f32 r10 r10
12 r9 = mul_f32 r8 r9
13 r9 = eq_f32 r9 r9
14 r9 = bit_and r10 r9
15 r4 = select r9 r0 r4
16 r5 = select r9 r1 r5
17 r6 = select r9 r2 r6
18 r7 = select r9 r3 r7
loop:
52 store32 ptr1 r4
53 store32 ptr2 r5
54 store32 ptr3 r6
55 store32 ptr4 r7
19 store32 ptr1 r4
20 store32 ptr2 r5
21 store32 ptr3 r6
22 store32 ptr4 r7

View File

@ -6,15 +6,15 @@
4 r4 = splat FFFFFFFF (nan)
5 r5 = neq_f32 r0 r3
6 r6 = splat 3F800000 (1)
7 r6 = bit_and r6 r2
7 r6 = bit_and r2 r6
8 r7 = to_f32 r1
9 r7 = select r5 r6 r7
10 r3 = eq_i32 r1 r3
11 r3 = bit_xor r4 r3
12 r4 = splat 1 (1.4012985e-45)
13 r2 = bit_and r4 r2
13 r4 = bit_and r2 r4
14 r0 = trunc r0
15 r0 = select r3 r2 r0
15 r0 = select r3 r4 r0
16 r0 = to_f32 r0
17 r0 = mul_f32 r7 r0
loop:

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -156,7 +156,7 @@ F7 = bool lowp_param(half value)
8 r8 = uniform32 ptr0 18
9 r9 = uniform32 ptr0 1C
10 r10 = uniform32 ptr0 20
11 r11 = eq_f32 r2 r1
11 r11 = eq_f32 r1 r2
12 r12 = splat 3F800000 (1)
13 r13 = splat 2 (2.8025969e-45)
14 r14 = splat 40000000 (2)
@ -166,7 +166,7 @@ F7 = bool lowp_param(half value)
18 r18 = splat 4 (5.6051939e-45)
loop:
19 r19 = index
20 r20 = eq_f32 r19 r1
20 r20 = eq_f32 r1 r19
21 r20 = bit_and r20 r11
22 trace_var r20 $0 = r3 (F32)
23 trace_var r20 $1 = r4 (F32)

View File

@ -8,7 +8,7 @@
loop:
6 r5 = index
7 r5 = trunc r5
8 r5 = mul_i32 r5 r1
8 r5 = mul_i32 r1 r5
9 r5 = add_i32 r0 r5
10 r5 = shl_i32 r5 2
11 r6 = gather32 ptr0 4 r5

View File

@ -18,19 +18,19 @@
16 r15 = bit_xor r9 r8
17 r8 = bit_and r8 r15
18 r16 = splat 1 (1.4012985e-45)
19 r10 = eq_i32 r16 r10
20 r10 = bit_or r8 r10
21 r10 = bit_and r10 r15
22 r11 = select r10 r0 r11
23 r12 = select r10 r1 r12
24 r13 = select r10 r2 r13
25 r14 = select r10 r3 r14
26 r10 = bit_xor r9 r10
27 r10 = bit_and r15 r10
28 r11 = select r10 r4 r11
29 r12 = select r10 r5 r12
30 r13 = select r10 r6 r13
31 r14 = select r10 r7 r14
19 r16 = eq_i32 r10 r16
20 r16 = bit_or r8 r16
21 r16 = bit_and r15 r16
22 r11 = select r16 r0 r11
23 r12 = select r16 r1 r12
24 r13 = select r16 r2 r13
25 r14 = select r16 r3 r14
26 r16 = bit_xor r9 r16
27 r16 = bit_and r15 r16
28 r11 = select r16 r4 r11
29 r12 = select r16 r5 r12
30 r13 = select r16 r6 r13
31 r14 = select r16 r7 r14
loop:
32 store32 ptr1 r11
33 store32 ptr2 r12

View File

@ -11,39 +11,39 @@
9 r9 = splat FFFFFFFF (nan)
10 r10 = trunc r1
11 r11 = splat 2 (2.8025969e-45)
12 r11 = eq_i32 r11 r10
12 r11 = eq_i32 r10 r11
13 r12 = bit_xor r9 r11
14 r13 = bit_and r11 r12
15 r14 = splat 1 (1.4012985e-45)
16 r14 = eq_i32 r14 r10
16 r14 = eq_i32 r10 r14
17 r15 = bit_or r13 r14
18 r15 = bit_and r15 r12
18 r15 = bit_and r12 r15
19 r13 = select r15 r9 r13
20 r8 = eq_i32 r8 r10
21 r13 = bit_or r13 r8
22 r12 = bit_and r13 r12
23 r8 = bit_and r12 r8
24 r13 = bit_xor r9 r8
25 r8 = bit_and r8 r13
26 r14 = bit_or r8 r14
27 r14 = bit_and r12 r14
28 r14 = bit_and r14 r13
22 r13 = bit_and r12 r13
23 r8 = bit_and r8 r13
24 r12 = bit_xor r9 r8
25 r8 = bit_and r8 r12
26 r14 = bit_or r14 r8
27 r14 = bit_and r13 r14
28 r14 = bit_and r12 r14
29 r8 = select r14 r9 r8
30 r11 = bit_or r8 r11
31 r11 = bit_and r12 r11
32 r11 = bit_and r11 r13
30 r11 = bit_or r11 r8
31 r11 = bit_and r13 r11
32 r11 = bit_and r12 r11
33 r8 = select r11 r9 r8
34 r9 = splat 3 (4.2038954e-45)
35 r10 = eq_i32 r9 r10
36 r10 = bit_or r8 r10
37 r10 = bit_and r12 r10
38 r13 = bit_and r10 r13
39 r13 = bit_and r13 r12
40 r13 = bit_and r12 r13
41 r4 = select r13 r0 r4
42 r5 = select r13 r1 r5
43 r6 = select r13 r2 r6
44 r7 = select r13 r3 r7
35 r9 = eq_i32 r10 r9
36 r9 = bit_or r8 r9
37 r9 = bit_and r13 r9
38 r9 = bit_and r12 r9
39 r9 = bit_and r13 r9
40 r9 = bit_and r13 r9
41 r4 = select r9 r0 r4
42 r5 = select r9 r1 r5
43 r6 = select r9 r2 r6
44 r7 = select r9 r3 r7
loop:
45 store32 ptr1 r4
46 store32 ptr2 r5

View File

@ -10,286 +10,286 @@
8 r8 = splat FFFFFFFF (nan)
9 r9 = trunc r1
10 r10 = splat 1 (1.4012985e-45)
11 r9 = eq_i32 r10 r9
11 r9 = eq_i32 r9 r10
12 r11 = bit_and r10 r9
13 r12 = bit_xor r8 r9
14 r13 = add_i32 r11 r10
14 r13 = add_i32 r10 r11
15 r14 = bit_and r9 r12
16 r11 = select r14 r13 r11
17 r13 = add_i32 r11 r10
17 r13 = add_i32 r10 r11
18 r11 = select r14 r13 r11
19 r14 = bit_xor r8 r14
20 r14 = bit_and r12 r14
21 r12 = add_i32 r11 r10
21 r12 = add_i32 r10 r11
22 r13 = bit_and r9 r14
23 r11 = select r13 r12 r11
24 r12 = splat 2 (2.8025969e-45)
25 r15 = add_i32 r11 r10
25 r15 = add_i32 r10 r11
26 r11 = select r13 r15 r11
27 r13 = bit_xor r8 r13
28 r13 = bit_and r14 r13
29 r14 = add_i32 r11 r10
29 r14 = add_i32 r10 r11
30 r15 = bit_and r9 r13
31 r11 = select r15 r14 r11
32 r14 = add_i32 r11 r10
32 r14 = add_i32 r10 r11
33 r11 = select r15 r14 r11
34 r15 = bit_xor r8 r15
35 r15 = bit_and r13 r15
36 r13 = add_i32 r11 r10
36 r13 = add_i32 r10 r11
37 r14 = bit_and r9 r15
38 r11 = select r14 r13 r11
39 r13 = add_i32 r11 r10
39 r13 = add_i32 r10 r11
40 r11 = select r14 r13 r11
41 r14 = bit_xor r8 r14
42 r14 = bit_and r15 r14
43 r15 = add_i32 r11 r10
43 r15 = add_i32 r10 r11
44 r13 = bit_and r9 r14
45 r11 = select r13 r15 r11
46 r15 = add_i32 r11 r10
46 r15 = add_i32 r10 r11
47 r11 = select r13 r15 r11
48 r13 = bit_xor r8 r13
49 r13 = bit_and r14 r13
50 r14 = add_i32 r11 r10
50 r14 = add_i32 r10 r11
51 r15 = bit_and r9 r13
52 r11 = select r15 r14 r11
53 r14 = add_i32 r11 r10
53 r14 = add_i32 r10 r11
54 r11 = select r15 r14 r11
55 r15 = bit_xor r8 r15
56 r15 = bit_and r13 r15
57 r13 = add_i32 r11 r10
57 r13 = add_i32 r10 r11
58 r14 = bit_and r9 r15
59 r11 = select r14 r13 r11
60 r13 = add_i32 r11 r10
60 r13 = add_i32 r10 r11
61 r11 = select r14 r13 r11
62 r14 = bit_xor r8 r14
63 r14 = bit_and r15 r14
64 r15 = add_i32 r11 r10
64 r15 = add_i32 r10 r11
65 r13 = bit_and r9 r14
66 r11 = select r13 r15 r11
67 r15 = add_i32 r11 r10
67 r15 = add_i32 r10 r11
68 r11 = select r13 r15 r11
69 r13 = bit_xor r8 r13
70 r13 = bit_and r14 r13
71 r14 = add_i32 r11 r10
71 r14 = add_i32 r10 r11
72 r15 = bit_and r9 r13
73 r11 = select r15 r14 r11
74 r14 = add_i32 r11 r10
74 r14 = add_i32 r10 r11
75 r11 = select r15 r14 r11
76 r15 = bit_xor r8 r15
77 r15 = bit_and r13 r15
78 r13 = add_i32 r11 r10
78 r13 = add_i32 r10 r11
79 r15 = bit_and r9 r15
80 r11 = select r15 r13 r11
81 r11 = add_i32 r11 r10
82 r12 = eq_i32 r11 r12
83 r11 = bit_and r12 r9
84 r13 = bit_and r10 r11
85 r15 = bit_xor r8 r11
86 r14 = add_i32 r13 r10
87 r16 = bit_and r11 r15
81 r11 = add_i32 r10 r11
82 r11 = eq_i32 r12 r11
83 r12 = bit_and r9 r11
84 r13 = bit_and r10 r12
85 r15 = bit_xor r8 r12
86 r14 = add_i32 r10 r13
87 r16 = bit_and r12 r15
88 r13 = select r16 r14 r13
89 r15 = bit_or r15 r11
90 r14 = add_i32 r13 r10
91 r16 = bit_and r11 r15
89 r15 = bit_or r12 r15
90 r14 = add_i32 r10 r13
91 r16 = bit_and r12 r15
92 r13 = select r16 r14 r13
93 r14 = bit_xor r8 r16
94 r14 = bit_and r15 r14
95 r15 = add_i32 r13 r10
96 r17 = bit_and r11 r14
95 r15 = add_i32 r10 r13
96 r17 = bit_and r12 r14
97 r13 = select r17 r15 r13
98 r16 = bit_or r14 r16
99 r14 = add_i32 r13 r10
100 r15 = bit_and r11 r16
101 r13 = select r15 r14 r13
102 r14 = bit_xor r8 r15
103 r14 = bit_and r16 r14
104 r16 = add_i32 r13 r10
105 r17 = bit_and r11 r14
106 r13 = select r17 r16 r13
107 r15 = bit_or r14 r15
108 r14 = add_i32 r13 r10
109 r16 = bit_and r11 r15
110 r13 = select r16 r14 r13
111 r14 = bit_xor r8 r16
112 r14 = bit_and r15 r14
113 r15 = add_i32 r13 r10
114 r17 = bit_and r11 r14
115 r13 = select r17 r15 r13
116 r16 = bit_or r14 r16
117 r14 = add_i32 r13 r10
118 r15 = bit_and r11 r16
119 r13 = select r15 r14 r13
120 r14 = bit_xor r8 r15
121 r14 = bit_and r16 r14
122 r16 = add_i32 r13 r10
123 r17 = bit_and r11 r14
124 r13 = select r17 r16 r13
125 r15 = bit_or r14 r15
126 r14 = add_i32 r13 r10
127 r16 = bit_and r11 r15
128 r13 = select r16 r14 r13
129 r14 = bit_xor r8 r16
130 r14 = bit_and r15 r14
131 r15 = add_i32 r13 r10
132 r17 = bit_and r11 r14
133 r13 = select r17 r15 r13
134 r16 = bit_or r14 r16
135 r14 = add_i32 r13 r10
136 r15 = bit_and r11 r16
137 r13 = select r15 r14 r13
138 r14 = bit_xor r8 r15
139 r14 = bit_and r16 r14
140 r16 = add_i32 r13 r10
141 r17 = bit_and r11 r14
98 r14 = bit_or r16 r14
99 r16 = add_i32 r10 r13
100 r15 = bit_and r12 r14
101 r13 = select r15 r16 r13
102 r16 = bit_xor r8 r15
103 r16 = bit_and r14 r16
104 r14 = add_i32 r10 r13
105 r17 = bit_and r12 r16
106 r13 = select r17 r14 r13
107 r16 = bit_or r15 r16
108 r15 = add_i32 r10 r13
109 r14 = bit_and r12 r16
110 r13 = select r14 r15 r13
111 r15 = bit_xor r8 r14
112 r15 = bit_and r16 r15
113 r16 = add_i32 r10 r13
114 r17 = bit_and r12 r15
115 r13 = select r17 r16 r13
116 r15 = bit_or r14 r15
117 r14 = add_i32 r10 r13
118 r16 = bit_and r12 r15
119 r13 = select r16 r14 r13
120 r14 = bit_xor r8 r16
121 r14 = bit_and r15 r14
122 r15 = add_i32 r10 r13
123 r17 = bit_and r12 r14
124 r13 = select r17 r15 r13
125 r14 = bit_or r16 r14
126 r16 = add_i32 r10 r13
127 r15 = bit_and r12 r14
128 r13 = select r15 r16 r13
129 r16 = bit_xor r8 r15
130 r16 = bit_and r14 r16
131 r14 = add_i32 r10 r13
132 r17 = bit_and r12 r16
133 r13 = select r17 r14 r13
134 r16 = bit_or r15 r16
135 r15 = add_i32 r10 r13
136 r14 = bit_and r12 r16
137 r13 = select r14 r15 r13
138 r15 = bit_xor r8 r14
139 r15 = bit_and r16 r15
140 r16 = add_i32 r10 r13
141 r17 = bit_and r12 r15
142 r13 = select r17 r16 r13
143 r15 = bit_or r14 r15
144 r14 = add_i32 r13 r10
145 r16 = bit_and r11 r15
144 r14 = add_i32 r10 r13
145 r16 = bit_and r12 r15
146 r13 = select r16 r14 r13
147 r14 = bit_xor r8 r16
148 r14 = bit_and r15 r14
149 r15 = add_i32 r13 r10
150 r17 = bit_and r11 r14
149 r15 = add_i32 r10 r13
150 r17 = bit_and r12 r14
151 r13 = select r17 r15 r13
152 r16 = bit_or r14 r16
153 r14 = add_i32 r13 r10
154 r15 = bit_and r11 r16
155 r13 = select r15 r14 r13
156 r14 = bit_xor r8 r15
157 r14 = bit_and r16 r14
158 r16 = add_i32 r13 r10
159 r17 = bit_and r11 r14
160 r13 = select r17 r16 r13
161 r15 = bit_or r14 r15
162 r14 = add_i32 r13 r10
163 r16 = bit_and r11 r15
164 r13 = select r16 r14 r13
165 r16 = bit_xor r8 r16
166 r16 = bit_and r15 r16
167 r15 = add_i32 r13 r10
168 r16 = bit_and r11 r16
169 r13 = select r16 r15 r13
170 r15 = add_i32 r13 r10
171 r13 = select r12 r15 r13
172 r15 = splat B (1.5414283e-44)
173 r15 = eq_i32 r13 r15
174 r15 = bit_and r15 r12
175 r15 = bit_and r12 r15
176 r9 = bit_and r15 r9
177 r12 = bit_and r10 r9
152 r14 = bit_or r16 r14
153 r16 = add_i32 r10 r13
154 r15 = bit_and r12 r14
155 r13 = select r15 r16 r13
156 r16 = bit_xor r8 r15
157 r16 = bit_and r14 r16
158 r14 = add_i32 r10 r13
159 r17 = bit_and r12 r16
160 r13 = select r17 r14 r13
161 r16 = bit_or r15 r16
162 r15 = add_i32 r10 r13
163 r14 = bit_and r12 r16
164 r13 = select r14 r15 r13
165 r14 = bit_xor r8 r14
166 r14 = bit_and r16 r14
167 r16 = add_i32 r10 r13
168 r14 = bit_and r12 r14
169 r13 = select r14 r16 r13
170 r16 = add_i32 r10 r13
171 r13 = select r11 r16 r13
172 r16 = splat B (1.5414283e-44)
173 r16 = eq_i32 r13 r16
174 r16 = bit_and r11 r16
175 r16 = bit_and r11 r16
176 r9 = bit_and r9 r16
177 r11 = bit_and r10 r9
178 r13 = bit_xor r8 r9
179 r13 = bit_and r15 r13
180 r16 = add_i32 r12 r10
181 r11 = bit_xor r8 r13
182 r14 = bit_and r15 r11
183 r12 = select r14 r16 r12
184 r16 = add_i32 r12 r10
185 r14 = bit_and r9 r11
186 r12 = select r14 r16 r12
187 r14 = bit_xor r8 r14
188 r14 = bit_and r15 r14
189 r11 = bit_and r14 r11
190 r11 = bit_or r13 r11
191 r13 = add_i32 r12 r10
192 r14 = bit_xor r8 r11
193 r16 = bit_and r15 r14
194 r12 = select r16 r13 r12
195 r13 = add_i32 r12 r10
196 r16 = bit_and r9 r14
197 r12 = select r16 r13 r12
198 r16 = bit_xor r8 r16
199 r16 = bit_and r15 r16
200 r14 = bit_and r16 r14
201 r14 = bit_or r11 r14
202 r11 = add_i32 r12 r10
203 r16 = bit_xor r8 r14
204 r13 = bit_and r15 r16
205 r12 = select r13 r11 r12
206 r11 = add_i32 r12 r10
207 r13 = bit_and r9 r16
208 r12 = select r13 r11 r12
179 r13 = bit_and r16 r13
180 r14 = add_i32 r10 r11
181 r12 = bit_xor r8 r13
182 r15 = bit_and r16 r12
183 r11 = select r15 r14 r11
184 r14 = add_i32 r10 r11
185 r15 = bit_and r9 r12
186 r11 = select r15 r14 r11
187 r15 = bit_xor r8 r15
188 r15 = bit_and r16 r15
189 r15 = bit_and r12 r15
190 r15 = bit_or r13 r15
191 r13 = add_i32 r10 r11
192 r12 = bit_xor r8 r15
193 r14 = bit_and r16 r12
194 r11 = select r14 r13 r11
195 r13 = add_i32 r10 r11
196 r14 = bit_and r9 r12
197 r11 = select r14 r13 r11
198 r14 = bit_xor r8 r14
199 r14 = bit_and r16 r14
200 r14 = bit_and r12 r14
201 r14 = bit_or r15 r14
202 r15 = add_i32 r10 r11
203 r12 = bit_xor r8 r14
204 r13 = bit_and r16 r12
205 r11 = select r13 r15 r11
206 r15 = add_i32 r10 r11
207 r13 = bit_and r9 r12
208 r11 = select r13 r15 r11
209 r13 = bit_xor r8 r13
210 r13 = bit_and r15 r13
211 r16 = bit_and r13 r16
212 r16 = bit_or r14 r16
213 r14 = add_i32 r12 r10
214 r13 = bit_xor r8 r16
215 r11 = bit_and r15 r13
216 r12 = select r11 r14 r12
217 r14 = add_i32 r12 r10
218 r11 = bit_and r9 r13
219 r12 = select r11 r14 r12
220 r11 = bit_xor r8 r11
221 r11 = bit_and r15 r11
222 r13 = bit_and r11 r13
223 r13 = bit_or r16 r13
224 r16 = add_i32 r12 r10
225 r11 = bit_xor r8 r13
226 r14 = bit_and r15 r11
227 r12 = select r14 r16 r12
228 r16 = add_i32 r12 r10
229 r14 = bit_and r9 r11
230 r12 = select r14 r16 r12
210 r13 = bit_and r16 r13
211 r13 = bit_and r12 r13
212 r13 = bit_or r14 r13
213 r14 = add_i32 r10 r11
214 r12 = bit_xor r8 r13
215 r15 = bit_and r16 r12
216 r11 = select r15 r14 r11
217 r14 = add_i32 r10 r11
218 r15 = bit_and r9 r12
219 r11 = select r15 r14 r11
220 r15 = bit_xor r8 r15
221 r15 = bit_and r16 r15
222 r15 = bit_and r12 r15
223 r15 = bit_or r13 r15
224 r13 = add_i32 r10 r11
225 r12 = bit_xor r8 r15
226 r14 = bit_and r16 r12
227 r11 = select r14 r13 r11
228 r13 = add_i32 r10 r11
229 r14 = bit_and r9 r12
230 r11 = select r14 r13 r11
231 r14 = bit_xor r8 r14
232 r14 = bit_and r15 r14
233 r11 = bit_and r14 r11
234 r11 = bit_or r13 r11
235 r13 = add_i32 r12 r10
236 r14 = bit_xor r8 r11
237 r16 = bit_and r15 r14
238 r12 = select r16 r13 r12
239 r13 = add_i32 r12 r10
240 r16 = bit_and r9 r14
241 r12 = select r16 r13 r12
242 r16 = bit_xor r8 r16
243 r16 = bit_and r15 r16
244 r14 = bit_and r16 r14
245 r14 = bit_or r11 r14
246 r11 = add_i32 r12 r10
247 r16 = bit_xor r8 r14
248 r13 = bit_and r15 r16
249 r12 = select r13 r11 r12
250 r11 = add_i32 r12 r10
251 r13 = bit_and r9 r16
252 r12 = select r13 r11 r12
253 r13 = bit_xor r8 r13
254 r13 = bit_and r15 r13
255 r16 = bit_and r13 r16
256 r16 = bit_or r14 r16
257 r14 = add_i32 r12 r10
258 r13 = bit_xor r8 r16
259 r11 = bit_and r15 r13
260 r12 = select r11 r14 r12
261 r14 = add_i32 r12 r10
262 r11 = bit_and r9 r13
263 r12 = select r11 r14 r12
264 r11 = bit_xor r8 r11
265 r11 = bit_and r15 r11
266 r13 = bit_and r11 r13
267 r13 = bit_or r16 r13
268 r16 = add_i32 r12 r10
269 r11 = bit_xor r8 r13
270 r14 = bit_and r15 r11
271 r12 = select r14 r16 r12
272 r16 = add_i32 r12 r10
273 r9 = bit_and r9 r11
274 r12 = select r9 r16 r12
232 r14 = bit_and r16 r14
233 r14 = bit_and r12 r14
234 r14 = bit_or r15 r14
235 r15 = add_i32 r10 r11
236 r12 = bit_xor r8 r14
237 r13 = bit_and r16 r12
238 r11 = select r13 r15 r11
239 r15 = add_i32 r10 r11
240 r13 = bit_and r9 r12
241 r11 = select r13 r15 r11
242 r13 = bit_xor r8 r13
243 r13 = bit_and r16 r13
244 r13 = bit_and r12 r13
245 r13 = bit_or r14 r13
246 r14 = add_i32 r10 r11
247 r12 = bit_xor r8 r13
248 r15 = bit_and r16 r12
249 r11 = select r15 r14 r11
250 r14 = add_i32 r10 r11
251 r15 = bit_and r9 r12
252 r11 = select r15 r14 r11
253 r15 = bit_xor r8 r15
254 r15 = bit_and r16 r15
255 r15 = bit_and r12 r15
256 r15 = bit_or r13 r15
257 r13 = add_i32 r10 r11
258 r12 = bit_xor r8 r15
259 r14 = bit_and r16 r12
260 r11 = select r14 r13 r11
261 r13 = add_i32 r10 r11
262 r14 = bit_and r9 r12
263 r11 = select r14 r13 r11
264 r14 = bit_xor r8 r14
265 r14 = bit_and r16 r14
266 r14 = bit_and r12 r14
267 r14 = bit_or r15 r14
268 r15 = add_i32 r10 r11
269 r12 = bit_xor r8 r14
270 r13 = bit_and r16 r12
271 r11 = select r13 r15 r11
272 r15 = add_i32 r10 r11
273 r9 = bit_and r9 r12
274 r11 = select r9 r15 r11
275 r9 = bit_xor r8 r9
276 r9 = bit_and r15 r9
277 r11 = bit_and r9 r11
278 r11 = bit_or r13 r11
279 r10 = add_i32 r12 r10
280 r11 = bit_xor r8 r11
281 r11 = bit_and r15 r11
282 r12 = select r11 r10 r12
276 r9 = bit_and r16 r9
277 r9 = bit_and r12 r9
278 r9 = bit_or r14 r9
279 r10 = add_i32 r10 r11
280 r9 = bit_xor r8 r9
281 r9 = bit_and r16 r9
282 r11 = select r9 r10 r11
283 r10 = splat 14 (2.8025969e-44)
284 r10 = eq_i32 r12 r10
285 r11 = bit_and r10 r11
286 r11 = bit_and r15 r11
287 r4 = select r11 r0 r4
288 r5 = select r11 r1 r5
289 r6 = select r11 r2 r6
290 r7 = select r11 r3 r7
284 r10 = eq_i32 r11 r10
285 r10 = bit_and r9 r10
286 r10 = bit_and r16 r10
287 r4 = select r10 r0 r4
288 r5 = select r10 r1 r5
289 r6 = select r10 r2 r6
290 r7 = select r10 r3 r7
loop:
291 store32 ptr1 r4
292 store32 ptr2 r5

View File

@ -9,13 +9,13 @@
7 r7 = uniform32 ptr0 30
8 r8 = uniform32 ptr0 40
9 r2 = add_f32 r1 r2
10 r3 = add_f32 r2 r3
11 r4 = add_f32 r3 r4
10 r2 = add_f32 r3 r2
11 r2 = add_f32 r4 r2
12 r5 = mul_f32 r5 r5
13 r6 = mul_f32 r5 r6
14 r7 = mul_f32 r6 r7
13 r5 = mul_f32 r6 r5
14 r5 = mul_f32 r7 r5
loop:
15 store32 ptr1 r0
16 store32 ptr2 r4
17 store32 ptr3 r7
16 store32 ptr2 r2
17 store32 ptr3 r5
18 store32 ptr4 r8