Support bitcasts of 16-bit types.

This commit is contained in:
Chip Davis 2018-11-05 14:55:56 -06:00
parent ca4744ab72
commit 0d949e11ff
14 changed files with 415 additions and 11 deletions

View File

@ -0,0 +1,22 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct SSBO0
{
short4 inputs[1];
};
struct SSBO1
{
int4 outputs[1];
};
kernel void main0(device SSBO0& _25 [[buffer(0)]], device SSBO1& _39 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
{
_39.outputs[gl_GlobalInvocationID.x].x = int(as_type<uint>(as_type<half2>(_25.inputs[gl_GlobalInvocationID.x].xy) + half2(half(1))));
_39.outputs[gl_GlobalInvocationID.x].y = as_type<int>(_25.inputs[gl_GlobalInvocationID.x].zw);
_39.outputs[gl_GlobalInvocationID.x].z = int(as_type<uint>(ushort2(_25.inputs[gl_GlobalInvocationID.x].xy)));
}

View File

@ -0,0 +1,28 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct SSBO1
{
short4 outputs[1];
};
struct SSBO0
{
int4 inputs[1];
};
struct UBO
{
half4 const0;
};
kernel void main0(device SSBO0& _29 [[buffer(0)]], device SSBO1& _21 [[buffer(1)]], constant UBO& _40 [[buffer(2)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
{
short2 _47 = as_type<short2>(_29.inputs[gl_GlobalInvocationID.x].x) + as_type<short2>(_40.const0.xy);
_21.outputs[gl_GlobalInvocationID.x] = short4(_47.x, _47.y, _21.outputs[gl_GlobalInvocationID.x].z, _21.outputs[gl_GlobalInvocationID.x].w);
short2 _66 = short2(as_type<ushort2>(uint(_29.inputs[gl_GlobalInvocationID.x].y)) - as_type<ushort2>(_40.const0.zw));
_21.outputs[gl_GlobalInvocationID.x] = short4(_21.outputs[gl_GlobalInvocationID.x].x, _21.outputs[gl_GlobalInvocationID.x].y, _66.x, _66.y);
}

View File

@ -0,0 +1,38 @@
#version 450
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_NV_gpu_shader5)
#extension GL_NV_gpu_shader5 : require
#elif defined(GL_EXT_shader_16bit_storage)
#extension GL_EXT_shader_16bit_storage : require
#else
#error No extension available for FP16.
#endif
#if defined(GL_AMD_gpu_shader_int16)
#extension GL_AMD_gpu_shader_int16 : require
#elif defined(GL_EXT_shader_16bit_storage)
#extension GL_EXT_shader_16bit_storage : require
#else
#error No extension available for Int16.
#endif
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
layout(binding = 0, std430) buffer SSBO0
{
i16vec4 inputs[];
} _25;
layout(binding = 1, std430) buffer SSBO1
{
ivec4 outputs[];
} _39;
void main()
{
uint ident = gl_GlobalInvocationID.x;
f16vec2 a = int16BitsToFloat16(_25.inputs[ident].xy);
_39.outputs[ident].x = int(packFloat2x16(a + f16vec2(float16_t(1), float16_t(1))));
_39.outputs[ident].y = packInt2x16(_25.inputs[ident].zw);
_39.outputs[ident].z = int(packUint2x16(u16vec2(_25.inputs[ident].xy)));
}

View File

@ -0,0 +1,43 @@
#version 450
#if defined(GL_AMD_gpu_shader_int16)
#extension GL_AMD_gpu_shader_int16 : require
#elif defined(GL_EXT_shader_16bit_storage)
#extension GL_EXT_shader_16bit_storage : require
#else
#error No extension available for Int16.
#endif
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_NV_gpu_shader5)
#extension GL_NV_gpu_shader5 : require
#elif defined(GL_EXT_shader_16bit_storage)
#extension GL_EXT_shader_16bit_storage : require
#else
#error No extension available for FP16.
#endif
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
layout(binding = 1, std430) buffer SSBO1
{
i16vec4 outputs[];
} _21;
layout(binding = 0, std430) buffer SSBO0
{
ivec4 inputs[];
} _29;
layout(binding = 2, std140) uniform UBO
{
f16vec4 const0;
} _40;
void main()
{
uint ident = gl_GlobalInvocationID.x;
i16vec2 _47 = unpackInt2x16(_29.inputs[ident].x) + float16BitsToInt16(_40.const0.xy);
_21.outputs[ident] = i16vec4(_47.x, _47.y, _21.outputs[ident].z, _21.outputs[ident].w);
i16vec2 _66 = i16vec2(unpackUint2x16(uint(_29.inputs[ident].y)) - float16BitsToUint16(_40.const0.zw));
_21.outputs[ident] = i16vec4(_21.outputs[ident].x, _21.outputs[ident].y, _66.x, _66.y);
}

View File

@ -0,0 +1,24 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct SSBO0
{
short4 inputs[1];
};
struct SSBO1
{
int4 outputs[1];
};
kernel void main0(device SSBO0& _25 [[buffer(0)]], device SSBO1& _39 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
{
uint ident = gl_GlobalInvocationID.x;
half2 a = as_type<half2>(_25.inputs[ident].xy);
_39.outputs[ident].x = int(as_type<uint>(a + half2(half(1), half(1))));
_39.outputs[ident].y = as_type<int>(_25.inputs[ident].zw);
_39.outputs[ident].z = int(as_type<uint>(ushort2(_25.inputs[ident].xy)));
}

View File

@ -0,0 +1,29 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct SSBO1
{
short4 outputs[1];
};
struct SSBO0
{
int4 inputs[1];
};
struct UBO
{
half4 const0;
};
kernel void main0(device SSBO0& _29 [[buffer(0)]], device SSBO1& _21 [[buffer(1)]], constant UBO& _40 [[buffer(2)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
{
uint ident = gl_GlobalInvocationID.x;
short2 _47 = as_type<short2>(_29.inputs[ident].x) + as_type<short2>(_40.const0.xy);
_21.outputs[ident] = short4(_47.x, _47.y, _21.outputs[ident].z, _21.outputs[ident].w);
short2 _66 = short2(as_type<ushort2>(uint(_29.inputs[ident].y)) - as_type<ushort2>(_40.const0.zw));
_21.outputs[ident] = short4(_21.outputs[ident].x, _21.outputs[ident].y, _66.x, _66.y);
}

View File

@ -0,0 +1,38 @@
#version 450
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_NV_gpu_shader5)
#extension GL_NV_gpu_shader5 : require
#elif defined(GL_EXT_shader_16bit_storage)
#extension GL_EXT_shader_16bit_storage : require
#else
#error No extension available for FP16.
#endif
#if defined(GL_AMD_gpu_shader_int16)
#extension GL_AMD_gpu_shader_int16 : require
#elif defined(GL_EXT_shader_16bit_storage)
#extension GL_EXT_shader_16bit_storage : require
#else
#error No extension available for Int16.
#endif
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
layout(binding = 0, std430) buffer SSBO0
{
i16vec4 inputs[];
} _25;
layout(binding = 1, std430) buffer SSBO1
{
ivec4 outputs[];
} _39;
void main()
{
uint ident = gl_GlobalInvocationID.x;
f16vec2 a = int16BitsToFloat16(_25.inputs[ident].xy);
_39.outputs[ident].x = int(packFloat2x16(a + f16vec2(float16_t(1), float16_t(1))));
_39.outputs[ident].y = packInt2x16(_25.inputs[ident].zw);
_39.outputs[ident].z = int(packUint2x16(u16vec2(_25.inputs[ident].xy)));
}

View File

@ -0,0 +1,43 @@
#version 450
#if defined(GL_AMD_gpu_shader_int16)
#extension GL_AMD_gpu_shader_int16 : require
#elif defined(GL_EXT_shader_16bit_storage)
#extension GL_EXT_shader_16bit_storage : require
#else
#error No extension available for Int16.
#endif
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_NV_gpu_shader5)
#extension GL_NV_gpu_shader5 : require
#elif defined(GL_EXT_shader_16bit_storage)
#extension GL_EXT_shader_16bit_storage : require
#else
#error No extension available for FP16.
#endif
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
layout(binding = 1, std430) buffer SSBO1
{
i16vec4 outputs[];
} _21;
layout(binding = 0, std430) buffer SSBO0
{
ivec4 inputs[];
} _29;
layout(binding = 2, std140) uniform UBO
{
f16vec4 const0;
} _40;
void main()
{
uint ident = gl_GlobalInvocationID.x;
i16vec2 _47 = unpackInt2x16(_29.inputs[ident].x) + float16BitsToInt16(_40.const0.xy);
_21.outputs[ident] = i16vec4(_47.x, _47.y, _21.outputs[ident].z, _21.outputs[ident].w);
i16vec2 _66 = i16vec2(unpackUint2x16(uint(_29.inputs[ident].y)) - float16BitsToUint16(_40.const0.zw));
_21.outputs[ident] = i16vec4(_21.outputs[ident].x, _21.outputs[ident].y, _66.x, _66.y);
}

View File

@ -0,0 +1,23 @@
#version 450 core
#extension GL_AMD_gpu_shader_half_float : require
#extension GL_AMD_gpu_shader_int16 : require
layout(local_size_x = 1) in;
layout(binding = 0, std430) buffer SSBO0
{
i16vec4 inputs[];
};
layout(binding = 1, std430) buffer SSBO1
{
ivec4 outputs[];
};
void main()
{
uint ident = gl_GlobalInvocationID.x;
f16vec2 a = int16BitsToFloat16(inputs[ident].xy);
outputs[ident].x = int(packFloat2x16(a + f16vec2(1, 1)));
outputs[ident].y = packInt2x16(inputs[ident].zw);
outputs[ident].z = int(packUint2x16(u16vec2(inputs[ident].xy)));
}

View File

@ -0,0 +1,26 @@
#version 450 core
#extension GL_AMD_gpu_shader_half_float : require
#extension GL_AMD_gpu_shader_int16 : require
layout(local_size_x = 1) in;
layout(binding = 0, std430) buffer SSBO0
{
ivec4 inputs[];
};
layout(binding = 1, std430) buffer SSBO1
{
i16vec4 outputs[];
};
layout(binding = 2) uniform UBO
{
f16vec4 const0;
};
void main()
{
uint ident = gl_GlobalInvocationID.x;
outputs[ident].xy = unpackInt2x16(inputs[ident].x) + float16BitsToInt16(const0.xy);
outputs[ident].zw = i16vec2(unpackUint2x16(uint(inputs[ident].y)) - float16BitsToUint16(const0.zw));
}

View File

@ -0,0 +1,23 @@
#version 450 core
#extension GL_AMD_gpu_shader_half_float : require
#extension GL_AMD_gpu_shader_int16 : require
layout(local_size_x = 1) in;
layout(binding = 0, std430) buffer SSBO0
{
i16vec4 inputs[];
};
layout(binding = 1, std430) buffer SSBO1
{
ivec4 outputs[];
};
void main()
{
uint ident = gl_GlobalInvocationID.x;
f16vec2 a = int16BitsToFloat16(inputs[ident].xy);
outputs[ident].x = int(packFloat2x16(a + f16vec2(1, 1)));
outputs[ident].y = packInt2x16(inputs[ident].zw);
outputs[ident].z = int(packUint2x16(u16vec2(inputs[ident].xy)));
}

View File

@ -0,0 +1,26 @@
#version 450 core
#extension GL_AMD_gpu_shader_half_float : require
#extension GL_AMD_gpu_shader_int16 : require
layout(local_size_x = 1) in;
layout(binding = 0, std430) buffer SSBO0
{
ivec4 inputs[];
};
layout(binding = 1, std430) buffer SSBO1
{
i16vec4 outputs[];
};
layout(binding = 2) uniform UBO
{
f16vec4 const0;
};
void main()
{
uint ident = gl_GlobalInvocationID.x;
outputs[ident].xy = unpackInt2x16(inputs[ident].x) + float16BitsToInt16(const0.xy);
outputs[ident].zw = i16vec2(unpackUint2x16(uint(inputs[ident].y)) - float16BitsToUint16(const0.zw));
}

View File

@ -1851,20 +1851,22 @@ void CompilerGLSL::replace_illegal_names()
"dFdx", "dFdxCoarse", "dFdxFine",
"dFdy", "dFdyCoarse", "dFdyFine",
"distance", "dot", "EmitStreamVertex", "EmitVertex", "EndPrimitive", "EndStreamPrimitive", "equal", "exp", "exp2",
"faceforward", "findLSB", "findMSB", "floatBitsToInt", "floatBitsToUint", "floor", "fma", "fract", "frexp", "fwidth", "fwidthCoarse", "fwidthFine",
"faceforward", "findLSB", "findMSB", "float16BitsToInt16", "float16BitsToUint16", "floatBitsToInt", "floatBitsToUint", "floor", "fma", "fract",
"frexp", "fwidth", "fwidthCoarse", "fwidthFine",
"greaterThan", "greaterThanEqual", "groupMemoryBarrier",
"imageAtomicAdd", "imageAtomicAnd", "imageAtomicCompSwap", "imageAtomicExchange", "imageAtomicMax", "imageAtomicMin", "imageAtomicOr", "imageAtomicXor",
"imageLoad", "imageSamples", "imageSize", "imageStore", "imulExtended", "intBitsToFloat", "interpolateAtOffset", "interpolateAtCentroid", "interpolateAtSample",
"imageLoad", "imageSamples", "imageSize", "imageStore", "imulExtended", "int16BitsToFloat16", "intBitsToFloat", "interpolateAtOffset", "interpolateAtCentroid", "interpolateAtSample",
"inverse", "inversesqrt", "isinf", "isnan", "ldexp", "length", "lessThan", "lessThanEqual", "log", "log2",
"matrixCompMult", "max", "memoryBarrier", "memoryBarrierAtomicCounter", "memoryBarrierBuffer", "memoryBarrierImage", "memoryBarrierShared",
"min", "mix", "mod", "modf", "noise", "noise1", "noise2", "noise3", "noise4", "normalize", "not", "notEqual",
"outerProduct", "packDouble2x32", "packHalf2x16", "packSnorm2x16", "packSnorm4x8", "packUnorm2x16", "packUnorm4x8", "pow",
"outerProduct", "packDouble2x32", "packHalf2x16", "packInt2x16", "packInt4x16", "packSnorm2x16", "packSnorm4x8",
"packUint2x16", "packUint4x16", "packUnorm2x16", "packUnorm4x8", "pow",
"radians", "reflect", "refract", "round", "roundEven", "sign", "sin", "sinh", "smoothstep", "sqrt", "step",
"tan", "tanh", "texelFetch", "texelFetchOffset", "texture", "textureGather", "textureGatherOffset", "textureGatherOffsets",
"textureGrad", "textureGradOffset", "textureLod", "textureLodOffset", "textureOffset", "textureProj", "textureProjGrad",
"textureProjGradOffset", "textureProjLod", "textureProjLodOffset", "textureProjOffset", "textureQueryLevels", "textureQueryLod", "textureSamples", "textureSize",
"transpose", "trunc", "uaddCarry", "uintBitsToFloat", "umulExtended", "unpackDouble2x32", "unpackHalf2x16", "unpackSnorm2x16", "unpackSnorm4x8",
"unpackUnorm2x16", "unpackUnorm4x8", "usubBorrow",
"transpose", "trunc", "uaddCarry", "uint16BitsToFloat16", "uintBitsToFloat", "umulExtended", "unpackDouble2x32", "unpackHalf2x16", "unpackInt2x16", "unpackInt4x16",
"unpackSnorm2x16", "unpackSnorm4x8", "unpackUint2x16", "unpackUint4x16", "unpackUnorm2x16", "unpackUnorm4x8", "usubBorrow",
"active", "asm", "atomic_uint", "attribute", "bool", "break", "buffer",
"bvec2", "bvec3", "bvec4", "case", "cast", "centroid", "class", "coherent", "common", "const", "continue", "default", "discard",
@ -5150,12 +5152,16 @@ case OpGroupNonUniform##op: \
string CompilerGLSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &in_type)
{
if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Int)
if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::Short)
return type_to_glsl(out_type);
else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Int)
return type_to_glsl(out_type);
else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::Int64)
return type_to_glsl(out_type);
else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Float)
return "floatBitsToUint";
else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::UShort)
return type_to_glsl(out_type);
else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::UInt)
return type_to_glsl(out_type);
else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::UInt64)
@ -5174,12 +5180,36 @@ string CompilerGLSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &i
return "int64BitsToDouble";
else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::UInt64)
return "uint64BitsToDouble";
else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Half)
return "float16BitsToInt16";
else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::Half)
return "float16BitsToUint16";
else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::Short)
return "int16BitsToFloat16";
else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UShort)
return "uint16BitsToFloat16";
else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::UInt && in_type.vecsize == 2)
return "packUint2x32";
else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UInt && in_type.vecsize == 1)
return "unpackFloat2x16";
else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Half && in_type.vecsize == 2)
return "packFloat2x16";
else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::Short && in_type.vecsize == 2)
return "packInt2x16";
else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Int && in_type.vecsize == 1)
return "unpackInt2x16";
else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::UShort && in_type.vecsize == 2)
return "packUint2x16";
else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::UInt && in_type.vecsize == 1)
return "unpackUint2x16";
else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::Short && in_type.vecsize == 4)
return "packInt4x16";
else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Int64 && in_type.vecsize == 1)
return "unpackInt4x16";
else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::UShort && in_type.vecsize == 4)
return "packUint4x16";
else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::UInt64 && in_type.vecsize == 1)
return "unpackUint4x16";
else
return "";
}

View File

@ -983,9 +983,8 @@ uint32_t CompilerMSL::add_interface_block(StorageClass storage)
mbr_idx++;
}
}
else if (type.basetype == SPIRType::Boolean || type.basetype == SPIRType::Char ||
type_is_integral(type) || type_is_floating_point(type) ||
type.basetype == SPIRType::Boolean)
else if (type.basetype == SPIRType::Boolean || type.basetype == SPIRType::Char || type_is_integral(type) ||
type_is_floating_point(type) || type.basetype == SPIRType::Boolean)
{
bool is_builtin = is_builtin_variable(*p_var);
BuiltIn builtin = BuiltIn(get_decoration(p_var->self, DecorationBuiltIn));
@ -4501,7 +4500,9 @@ string CompilerMSL::image_type_glsl(const SPIRType &type, uint32_t id)
string CompilerMSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &in_type)
{
if ((out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Int) ||
if ((out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::Short) ||
(out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::UShort) ||
(out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Int) ||
(out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::UInt) ||
(out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::Int64) ||
(out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::UInt64))
@ -4516,7 +4517,17 @@ string CompilerMSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &in
(out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::Int64) ||
(out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::UInt64) ||
(out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UInt) ||
(out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Half))
(out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Half) ||
(out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::Int) ||
(out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::Half) ||
(out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UShort) ||
(out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::Half) ||
(out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::Short) ||
(out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Half) ||
(out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::UShort && in_type.vecsize == 2) ||
(out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::UInt && in_type.vecsize == 1) ||
(out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::Short && in_type.vecsize == 2) ||
(out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Int && in_type.vecsize == 1))
return "as_type<" + type_to_glsl(out_type) + ">";
return "";