6d675ae6a2
Don't use `addsat()`/`subsat()`; that'll erroneously flag cases where the sum is exactly the maximum integer value, or the difference is exactly 0. Also, correct the condition for the `select()` function; it's basically `mix()` with a boolean factor. (What was I *thinking*?)
178 lines
3.1 KiB
Plaintext
178 lines
3.1 KiB
Plaintext
#include <metal_stdlib>
|
|
#include <simd/simd.h>
|
|
|
|
using namespace metal;
|
|
|
|
struct SSBOUint
|
|
{
|
|
uint a;
|
|
uint b;
|
|
uint c;
|
|
uint d;
|
|
uint2 a2;
|
|
uint2 b2;
|
|
uint2 c2;
|
|
uint2 d2;
|
|
uint3 a3;
|
|
uint3 b3;
|
|
uint3 c3;
|
|
uint3 d3;
|
|
uint4 a4;
|
|
uint4 b4;
|
|
uint4 c4;
|
|
uint4 d4;
|
|
};
|
|
|
|
struct ResType
|
|
{
|
|
uint _m0;
|
|
uint _m1;
|
|
};
|
|
|
|
struct ResType_1
|
|
{
|
|
uint2 _m0;
|
|
uint2 _m1;
|
|
};
|
|
|
|
struct ResType_2
|
|
{
|
|
uint3 _m0;
|
|
uint3 _m1;
|
|
};
|
|
|
|
struct ResType_3
|
|
{
|
|
uint4 _m0;
|
|
uint4 _m1;
|
|
};
|
|
|
|
struct SSBOInt
|
|
{
|
|
int a;
|
|
int b;
|
|
int c;
|
|
int d;
|
|
int2 a2;
|
|
int2 b2;
|
|
int2 c2;
|
|
int2 d2;
|
|
int3 a3;
|
|
int3 b3;
|
|
int3 c3;
|
|
int3 d3;
|
|
int4 a4;
|
|
int4 b4;
|
|
int4 c4;
|
|
int4 d4;
|
|
};
|
|
|
|
struct ResType_4
|
|
{
|
|
int _m0;
|
|
int _m1;
|
|
};
|
|
|
|
struct ResType_5
|
|
{
|
|
int2 _m0;
|
|
int2 _m1;
|
|
};
|
|
|
|
struct ResType_6
|
|
{
|
|
int3 _m0;
|
|
int3 _m1;
|
|
};
|
|
|
|
struct ResType_7
|
|
{
|
|
int4 _m0;
|
|
int4 _m1;
|
|
};
|
|
|
|
kernel void main0(device SSBOUint& u [[buffer(0)]], device SSBOInt& i [[buffer(1)]])
|
|
{
|
|
ResType _25;
|
|
_25._m0 = u.a + u.b;
|
|
_25._m1 = select(uint(1), uint(0), _25._m0 >= max(u.a, u.b));
|
|
u.d = _25._m1;
|
|
u.c = _25._m0;
|
|
ResType_1 _40;
|
|
_40._m0 = u.a2 + u.b2;
|
|
_40._m1 = select(uint2(1), uint2(0), _40._m0 >= max(u.a2, u.b2));
|
|
u.d2 = _40._m1;
|
|
u.c2 = _40._m0;
|
|
ResType_2 _55;
|
|
_55._m0 = u.a3 + u.b3;
|
|
_55._m1 = select(uint3(1), uint3(0), _55._m0 >= max(u.a3, u.b3));
|
|
u.d3 = _55._m1;
|
|
u.c3 = _55._m0;
|
|
ResType_3 _70;
|
|
_70._m0 = u.a4 + u.b4;
|
|
_70._m1 = select(uint4(1), uint4(0), _70._m0 >= max(u.a4, u.b4));
|
|
u.d4 = _70._m1;
|
|
u.c4 = _70._m0;
|
|
ResType _79;
|
|
_79._m0 = u.a - u.b;
|
|
_79._m1 = select(uint(1), uint(0), u.a >= u.b);
|
|
u.d = _79._m1;
|
|
u.c = _79._m0;
|
|
ResType_1 _88;
|
|
_88._m0 = u.a2 - u.b2;
|
|
_88._m1 = select(uint2(1), uint2(0), u.a2 >= u.b2);
|
|
u.d2 = _88._m1;
|
|
u.c2 = _88._m0;
|
|
ResType_2 _97;
|
|
_97._m0 = u.a3 - u.b3;
|
|
_97._m1 = select(uint3(1), uint3(0), u.a3 >= u.b3);
|
|
u.d3 = _97._m1;
|
|
u.c3 = _97._m0;
|
|
ResType_3 _106;
|
|
_106._m0 = u.a4 - u.b4;
|
|
_106._m1 = select(uint4(1), uint4(0), u.a4 >= u.b4);
|
|
u.d4 = _106._m1;
|
|
u.c4 = _106._m0;
|
|
ResType _116;
|
|
_116._m0 = u.a * u.b;
|
|
_116._m1 = mulhi(u.a, u.b);
|
|
u.d = _116._m0;
|
|
u.c = _116._m1;
|
|
ResType_1 _125;
|
|
_125._m0 = u.a2 * u.b2;
|
|
_125._m1 = mulhi(u.a2, u.b2);
|
|
u.d2 = _125._m0;
|
|
u.c2 = _125._m1;
|
|
ResType_2 _134;
|
|
_134._m0 = u.a3 * u.b3;
|
|
_134._m1 = mulhi(u.a3, u.b3);
|
|
u.d3 = _134._m0;
|
|
u.c3 = _134._m1;
|
|
ResType_3 _143;
|
|
_143._m0 = u.a4 * u.b4;
|
|
_143._m1 = mulhi(u.a4, u.b4);
|
|
u.d4 = _143._m0;
|
|
u.c4 = _143._m1;
|
|
ResType_4 _160;
|
|
_160._m0 = i.a * i.b;
|
|
_160._m1 = mulhi(i.a, i.b);
|
|
i.d = _160._m0;
|
|
i.c = _160._m1;
|
|
ResType_5 _171;
|
|
_171._m0 = i.a2 * i.b2;
|
|
_171._m1 = mulhi(i.a2, i.b2);
|
|
i.d2 = _171._m0;
|
|
i.c2 = _171._m1;
|
|
ResType_6 _182;
|
|
_182._m0 = i.a3 * i.b3;
|
|
_182._m1 = mulhi(i.a3, i.b3);
|
|
i.d3 = _182._m0;
|
|
i.c3 = _182._m1;
|
|
ResType_7 _193;
|
|
_193._m0 = i.a4 * i.b4;
|
|
_193._m1 = mulhi(i.a4, i.b4);
|
|
i.d4 = _193._m0;
|
|
i.c4 = _193._m1;
|
|
}
|
|
|