94 lines
4.4 KiB
Plaintext
94 lines
4.4 KiB
Plaintext
|
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||
|
|
||
|
#include <metal_stdlib>
|
||
|
#include <simd/simd.h>
|
||
|
|
||
|
using namespace metal;
|
||
|
|
||
|
struct BUF0
|
||
|
{
|
||
|
half2 f16s;
|
||
|
ushort2 u16;
|
||
|
short2 i16;
|
||
|
ushort4 u16s;
|
||
|
short4 i16s;
|
||
|
half f16;
|
||
|
};
|
||
|
|
||
|
static inline __attribute__((always_inline))
|
||
|
void test_u16(device BUF0& v_24)
|
||
|
{
|
||
|
v_24.f16 += as_type<half>(ushort(((device ushort*)&v_24.u16)[0u] + ((device ushort*)&v_24.u16)[1u]));
|
||
|
v_24.f16 += as_type<half>(ushort(((device ushort*)&v_24.u16)[0u] - ((device ushort*)&v_24.u16)[1u]));
|
||
|
v_24.f16 += as_type<half>(ushort(((device ushort*)&v_24.u16)[0u] * ((device ushort*)&v_24.u16)[1u]));
|
||
|
v_24.f16 += as_type<half>(ushort(((device ushort*)&v_24.u16)[0u] / ((device ushort*)&v_24.u16)[1u]));
|
||
|
v_24.f16 += as_type<half>(ushort(((device ushort*)&v_24.u16)[0u] % ((device ushort*)&v_24.u16)[1u]));
|
||
|
v_24.f16 += as_type<half>(ushort(((device ushort*)&v_24.u16)[0u] << ((device ushort*)&v_24.u16)[1u]));
|
||
|
v_24.f16 += as_type<half>(ushort(((device ushort*)&v_24.u16)[0u] >> ((device ushort*)&v_24.u16)[1u]));
|
||
|
v_24.f16 += as_type<half>(ushort(~((device ushort*)&v_24.u16)[0u]));
|
||
|
v_24.f16 += as_type<half>(ushort(-((device ushort*)&v_24.u16)[0u]));
|
||
|
v_24.f16 += as_type<half>(ushort(((device ushort*)&v_24.u16)[0u] ^ ((device ushort*)&v_24.u16)[1u]));
|
||
|
v_24.f16 += as_type<half>(ushort(((device ushort*)&v_24.u16)[0u] & ((device ushort*)&v_24.u16)[1u]));
|
||
|
v_24.f16 += as_type<half>(ushort(((device ushort*)&v_24.u16)[0u] | ((device ushort*)&v_24.u16)[1u]));
|
||
|
}
|
||
|
|
||
|
static inline __attribute__((always_inline))
|
||
|
void test_i16(device BUF0& v_24)
|
||
|
{
|
||
|
v_24.f16 += as_type<half>(short(((device short*)&v_24.i16)[0u] + ((device short*)&v_24.i16)[1u]));
|
||
|
v_24.f16 += as_type<half>(short(((device short*)&v_24.i16)[0u] - ((device short*)&v_24.i16)[1u]));
|
||
|
v_24.f16 += as_type<half>(short(((device short*)&v_24.i16)[0u] * ((device short*)&v_24.i16)[1u]));
|
||
|
v_24.f16 += as_type<half>(short(((device short*)&v_24.i16)[0u] / ((device short*)&v_24.i16)[1u]));
|
||
|
v_24.f16 += as_type<half>(short(((device short*)&v_24.i16)[0u] % ((device short*)&v_24.i16)[1u]));
|
||
|
v_24.f16 += as_type<half>(short(((device short*)&v_24.i16)[0u] << ((device short*)&v_24.i16)[1u]));
|
||
|
v_24.f16 += as_type<half>(short(((device short*)&v_24.i16)[0u] >> ((device short*)&v_24.i16)[1u]));
|
||
|
v_24.f16 += as_type<half>(short(~((device short*)&v_24.i16)[0u]));
|
||
|
v_24.f16 += as_type<half>(short(-((device short*)&v_24.i16)[0u]));
|
||
|
v_24.f16 += as_type<half>(short(((device short*)&v_24.i16)[0u] ^ ((device short*)&v_24.i16)[1u]));
|
||
|
v_24.f16 += as_type<half>(short(((device short*)&v_24.i16)[0u] & ((device short*)&v_24.i16)[1u]));
|
||
|
v_24.f16 += as_type<half>(short(((device short*)&v_24.i16)[0u] | ((device short*)&v_24.i16)[1u]));
|
||
|
}
|
||
|
|
||
|
static inline __attribute__((always_inline))
|
||
|
void test_u16s(device BUF0& v_24)
|
||
|
{
|
||
|
v_24.f16s += as_type<half2>(v_24.u16s.xy + v_24.u16s.zw);
|
||
|
v_24.f16s += as_type<half2>(v_24.u16s.xy - v_24.u16s.zw);
|
||
|
v_24.f16s += as_type<half2>(v_24.u16s.xy * v_24.u16s.zw);
|
||
|
v_24.f16s += as_type<half2>(v_24.u16s.xy / v_24.u16s.zw);
|
||
|
v_24.f16s += as_type<half2>(v_24.u16s.xy % v_24.u16s.zw);
|
||
|
v_24.f16s += as_type<half2>(v_24.u16s.xy << v_24.u16s.zw);
|
||
|
v_24.f16s += as_type<half2>(v_24.u16s.xy >> v_24.u16s.zw);
|
||
|
v_24.f16s += as_type<half2>(~v_24.u16s.xy);
|
||
|
v_24.f16s += as_type<half2>(-v_24.u16s.xy);
|
||
|
v_24.f16s += as_type<half2>(v_24.u16s.xy ^ v_24.u16s.zw);
|
||
|
v_24.f16s += as_type<half2>(v_24.u16s.xy & v_24.u16s.zw);
|
||
|
v_24.f16s += as_type<half2>(v_24.u16s.xy | v_24.u16s.zw);
|
||
|
}
|
||
|
|
||
|
static inline __attribute__((always_inline))
|
||
|
void test_i16s(device BUF0& v_24)
|
||
|
{
|
||
|
v_24.f16s += as_type<half2>(v_24.i16s.xy + v_24.i16s.zw);
|
||
|
v_24.f16s += as_type<half2>(v_24.i16s.xy - v_24.i16s.zw);
|
||
|
v_24.f16s += as_type<half2>(v_24.i16s.xy * v_24.i16s.zw);
|
||
|
v_24.f16s += as_type<half2>(v_24.i16s.xy / v_24.i16s.zw);
|
||
|
v_24.f16s += as_type<half2>(v_24.i16s.xy % v_24.i16s.zw);
|
||
|
v_24.f16s += as_type<half2>(v_24.i16s.xy << v_24.i16s.zw);
|
||
|
v_24.f16s += as_type<half2>(v_24.i16s.xy >> v_24.i16s.zw);
|
||
|
v_24.f16s += as_type<half2>(~v_24.i16s.xy);
|
||
|
v_24.f16s += as_type<half2>(-v_24.i16s.xy);
|
||
|
v_24.f16s += as_type<half2>(v_24.i16s.xy ^ v_24.i16s.zw);
|
||
|
v_24.f16s += as_type<half2>(v_24.i16s.xy & v_24.i16s.zw);
|
||
|
v_24.f16s += as_type<half2>(v_24.i16s.xy | v_24.i16s.zw);
|
||
|
}
|
||
|
|
||
|
kernel void main0(device BUF0& v_24 [[buffer(0)]])
|
||
|
{
|
||
|
test_u16(v_24);
|
||
|
test_i16(v_24);
|
||
|
test_u16s(v_24);
|
||
|
test_i16s(v_24);
|
||
|
}
|
||
|
|