Add support for 8- and 16-bit types to GLSL and MSL.

In GLSL, 8-bit types require GL_EXT_shader_8bit_storage. 16-bit types
can use either GL_AMD_gpu_shader_int16/GL_AMD_gpu_shader_half_float or
GL_EXT_shader_16bit_storage.
This commit is contained in:
Chip Davis 2018-10-31 09:43:03 -05:00
parent 13633c0500
commit 1fb27b4cda
13 changed files with 497 additions and 9 deletions

View File

@ -0,0 +1,48 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct block
{
short2 a;
ushort2 b;
char2 c;
uchar2 d;
half2 e;
};
struct storage
{
short3 f;
ushort3 g;
char3 h;
uchar3 i;
half3 j;
};
struct main0_out
{
short4 p [[user(locn0)]];
ushort4 q [[user(locn1)]];
half4 r [[user(locn2)]];
float4 gl_Position [[position]];
};
struct main0_in
{
short foo [[attribute(0)]];
ushort bar [[attribute(1)]];
half baz [[attribute(2)]];
};
vertex main0_out main0(main0_in in [[stage_in]], constant block& _26 [[buffer(0)]], const device storage& _53 [[buffer(1)]])
{
main0_out out = {};
out.p = short4((int4(int(in.foo)) + int4(int2(_26.a), int2(_26.c))) - int4(int3(_53.f) / int3(_53.h), 1));
out.q = ushort4((uint4(uint(in.bar)) + uint4(uint2(_26.b), uint2(_26.d))) - uint4(uint3(_53.g) / uint3(_53.i), 1u));
out.r = half4((float4(float(in.baz)) + float4(float2(_26.e), 0.0, 1.0)) - float4(float3(_53.j), 1.0));
out.gl_Position = float4(0.0, 0.0, 0.0, 1.0);
return out;
}

View File

@ -3,6 +3,8 @@
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_NV_gpu_shader5)
#extension GL_NV_gpu_shader5 : require
#elif defined(GL_EXT_shader_16bit_storage)
#extension GL_EXT_shader_16bit_storage : require
#else
#error No extension available for FP16.
#endif

View File

@ -0,0 +1,63 @@
#version 450
#if defined(GL_AMD_gpu_shader_int16)
#extension GL_AMD_gpu_shader_int16 : require
#elif defined(GL_EXT_shader_16bit_storage)
#extension GL_EXT_shader_16bit_storage : require
#else
#error No extension available for Int16.
#endif
#extension GL_EXT_shader_8bit_storage : require
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_NV_gpu_shader5)
#extension GL_NV_gpu_shader5 : require
#elif defined(GL_EXT_shader_16bit_storage)
#extension GL_EXT_shader_16bit_storage : require
#else
#error No extension available for FP16.
#endif
layout(binding = 0, std140) uniform block
{
layout(offset = 0) i16vec2 a;
layout(offset = 4) u16vec2 b;
layout(offset = 8) i8vec2 c;
layout(offset = 10) u8vec2 d;
layout(offset = 12) f16vec2 e;
} _26;
layout(binding = 1, std140) readonly buffer storage
{
layout(offset = 0) i16vec3 f;
layout(offset = 8) u16vec3 g;
layout(offset = 16) i8vec3 h;
layout(offset = 20) u8vec3 i;
layout(offset = 24) f16vec3 j;
} _53;
struct pushconst
{
i16vec4 k;
u16vec4 l;
i8vec4 m;
u8vec4 n;
f16vec4 o;
};
uniform pushconst _76;
layout(location = 0) out i16vec4 p;
layout(location = 0, component = 0) in int16_t foo;
layout(location = 1) out u16vec4 q;
layout(location = 0, component = 1) in uint16_t bar;
layout(location = 2) out f16vec4 r;
layout(location = 1) in float16_t baz;
void main()
{
p = i16vec4((((ivec4(int(foo)) + ivec4(ivec2(_26.a), ivec2(_26.c))) - ivec4(ivec3(_53.f) / ivec3(_53.h), 1)) + ivec4(_76.k)) + ivec4(_76.m));
q = u16vec4((((uvec4(uint(bar)) + uvec4(uvec2(_26.b), uvec2(_26.d))) - uvec4(uvec3(_53.g) / uvec3(_53.i), 1u)) + uvec4(_76.l)) + uvec4(_76.n));
r = f16vec4(((vec4(float(baz)) + vec4(vec2(_26.e), 0.0, 1.0)) - vec4(vec3(_53.j), 1.0)) + vec4(_76.o));
gl_Position = vec4(0.0, 0.0, 0.0, 1.0);
}

View File

@ -0,0 +1,59 @@
#version 450
#if defined(GL_AMD_gpu_shader_int16)
#extension GL_AMD_gpu_shader_int16 : require
#elif defined(GL_EXT_shader_16bit_storage)
#extension GL_EXT_shader_16bit_storage : require
#else
#error No extension available for Int16.
#endif
#extension GL_EXT_shader_8bit_storage : require
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_16bit_storage)
#extension GL_EXT_shader_16bit_storage : require
#else
#error No extension available for FP16.
#endif
layout(set = 0, binding = 0, std140) uniform block
{
layout(offset = 0) i16vec2 a;
layout(offset = 4) u16vec2 b;
layout(offset = 8) i8vec2 c;
layout(offset = 10) u8vec2 d;
layout(offset = 12) f16vec2 e;
} _26;
layout(set = 0, binding = 1, std140) readonly buffer storage
{
layout(offset = 0) i16vec3 f;
layout(offset = 8) u16vec3 g;
layout(offset = 16) i8vec3 h;
layout(offset = 20) u8vec3 i;
layout(offset = 24) f16vec3 j;
} _53;
layout(push_constant, std140) uniform pushconst
{
layout(offset = 0) i16vec4 k;
layout(offset = 8) u16vec4 l;
layout(offset = 16) i8vec4 m;
layout(offset = 20) u8vec4 n;
layout(offset = 24) f16vec4 o;
} _76;
layout(location = 0) out i16vec4 p;
layout(location = 0, component = 0) in int16_t foo;
layout(location = 1) out u16vec4 q;
layout(location = 0, component = 1) in uint16_t bar;
layout(location = 2) out f16vec4 r;
layout(location = 1) in float16_t baz;
void main()
{
p = i16vec4((((ivec4(int(foo)) + ivec4(ivec2(_26.a), ivec2(_26.c))) - ivec4(ivec3(_53.f) / ivec3(_53.h), 1)) + ivec4(_76.k)) + ivec4(_76.m));
q = u16vec4((((uvec4(uint(bar)) + uvec4(uvec2(_26.b), uvec2(_26.d))) - uvec4(uvec3(_53.g) / uvec3(_53.i), 1u)) + uvec4(_76.l)) + uvec4(_76.n));
r = f16vec4(((vec4(float(baz)) + vec4(vec2(_26.e), 0.0, 1.0)) - vec4(vec3(_53.j), 1.0)) + vec4(_76.o));
gl_Position = vec4(0.0, 0.0, 0.0, 1.0);
}

View File

@ -0,0 +1,48 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct block
{
short2 a;
ushort2 b;
char2 c;
uchar2 d;
half2 e;
};
struct storage
{
short3 f;
ushort3 g;
char3 h;
uchar3 i;
half3 j;
};
struct main0_out
{
short4 p [[user(locn0)]];
ushort4 q [[user(locn1)]];
half4 r [[user(locn2)]];
float4 gl_Position [[position]];
};
struct main0_in
{
short foo [[attribute(0)]];
ushort bar [[attribute(1)]];
half baz [[attribute(2)]];
};
vertex main0_out main0(main0_in in [[stage_in]], constant block& _26 [[buffer(0)]], const device storage& _53 [[buffer(1)]])
{
main0_out out = {};
out.p = short4((int4(int(in.foo)) + int4(int2(_26.a), int2(_26.c))) - int4(int3(_53.f) / int3(_53.h), 1));
out.q = ushort4((uint4(uint(in.bar)) + uint4(uint2(_26.b), uint2(_26.d))) - uint4(uint3(_53.g) / uint3(_53.i), 1u));
out.r = half4((float4(float(in.baz)) + float4(float2(_26.e), 0.0, 1.0)) - float4(float3(_53.j), 1.0));
out.gl_Position = float4(0.0, 0.0, 0.0, 1.0);
return out;
}

View File

@ -3,6 +3,8 @@
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_NV_gpu_shader5)
#extension GL_NV_gpu_shader5 : require
#elif defined(GL_EXT_shader_16bit_storage)
#extension GL_EXT_shader_16bit_storage : require
#else
#error No extension available for FP16.
#endif

View File

@ -0,0 +1,63 @@
#version 450
#if defined(GL_AMD_gpu_shader_int16)
#extension GL_AMD_gpu_shader_int16 : require
#elif defined(GL_EXT_shader_16bit_storage)
#extension GL_EXT_shader_16bit_storage : require
#else
#error No extension available for Int16.
#endif
#extension GL_EXT_shader_8bit_storage : require
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_NV_gpu_shader5)
#extension GL_NV_gpu_shader5 : require
#elif defined(GL_EXT_shader_16bit_storage)
#extension GL_EXT_shader_16bit_storage : require
#else
#error No extension available for FP16.
#endif
layout(binding = 0, std140) uniform block
{
layout(offset = 0) i16vec2 a;
layout(offset = 4) u16vec2 b;
layout(offset = 8) i8vec2 c;
layout(offset = 10) u8vec2 d;
layout(offset = 12) f16vec2 e;
} _26;
layout(binding = 1, std140) readonly buffer storage
{
layout(offset = 0) i16vec3 f;
layout(offset = 8) u16vec3 g;
layout(offset = 16) i8vec3 h;
layout(offset = 20) u8vec3 i;
layout(offset = 24) f16vec3 j;
} _53;
struct pushconst
{
i16vec4 k;
u16vec4 l;
i8vec4 m;
u8vec4 n;
f16vec4 o;
};
uniform pushconst _76;
layout(location = 0) out i16vec4 p;
layout(location = 0, component = 0) in int16_t foo;
layout(location = 1) out u16vec4 q;
layout(location = 0, component = 1) in uint16_t bar;
layout(location = 2) out f16vec4 r;
layout(location = 1) in float16_t baz;
void main()
{
p = i16vec4((((ivec4(int(foo)) + ivec4(ivec2(_26.a), ivec2(_26.c))) - ivec4(ivec3(_53.f) / ivec3(_53.h), 1)) + ivec4(_76.k)) + ivec4(_76.m));
q = u16vec4((((uvec4(uint(bar)) + uvec4(uvec2(_26.b), uvec2(_26.d))) - uvec4(uvec3(_53.g) / uvec3(_53.i), 1u)) + uvec4(_76.l)) + uvec4(_76.n));
r = f16vec4(((vec4(float(baz)) + vec4(vec2(_26.e), 0.0, 1.0)) - vec4(vec3(_53.j), 1.0)) + vec4(_76.o));
gl_Position = vec4(0.0, 0.0, 0.0, 1.0);
}

View File

@ -0,0 +1,59 @@
#version 450
#if defined(GL_AMD_gpu_shader_int16)
#extension GL_AMD_gpu_shader_int16 : require
#elif defined(GL_EXT_shader_16bit_storage)
#extension GL_EXT_shader_16bit_storage : require
#else
#error No extension available for Int16.
#endif
#extension GL_EXT_shader_8bit_storage : require
#if defined(GL_AMD_gpu_shader_half_float)
#extension GL_AMD_gpu_shader_half_float : require
#elif defined(GL_EXT_shader_16bit_storage)
#extension GL_EXT_shader_16bit_storage : require
#else
#error No extension available for FP16.
#endif
layout(set = 0, binding = 0, std140) uniform block
{
layout(offset = 0) i16vec2 a;
layout(offset = 4) u16vec2 b;
layout(offset = 8) i8vec2 c;
layout(offset = 10) u8vec2 d;
layout(offset = 12) f16vec2 e;
} _26;
layout(set = 0, binding = 1, std140) readonly buffer storage
{
layout(offset = 0) i16vec3 f;
layout(offset = 8) u16vec3 g;
layout(offset = 16) i8vec3 h;
layout(offset = 20) u8vec3 i;
layout(offset = 24) f16vec3 j;
} _53;
layout(push_constant, std140) uniform pushconst
{
layout(offset = 0) i16vec4 k;
layout(offset = 8) u16vec4 l;
layout(offset = 16) i8vec4 m;
layout(offset = 20) u8vec4 n;
layout(offset = 24) f16vec4 o;
} _76;
layout(location = 0) out i16vec4 p;
layout(location = 0, component = 0) in int16_t foo;
layout(location = 1) out u16vec4 q;
layout(location = 0, component = 1) in uint16_t bar;
layout(location = 2) out f16vec4 r;
layout(location = 1) in float16_t baz;
void main()
{
p = i16vec4((((ivec4(int(foo)) + ivec4(ivec2(_26.a), ivec2(_26.c))) - ivec4(ivec3(_53.f) / ivec3(_53.h), 1)) + ivec4(_76.k)) + ivec4(_76.m));
q = u16vec4((((uvec4(uint(bar)) + uvec4(uvec2(_26.b), uvec2(_26.d))) - uvec4(uvec3(_53.g) / uvec3(_53.i), 1u)) + uvec4(_76.l)) + uvec4(_76.n));
r = f16vec4(((vec4(float(baz)) + vec4(vec2(_26.e), 0.0, 1.0)) - vec4(vec3(_53.j), 1.0)) + vec4(_76.o));
gl_Position = vec4(0.0, 0.0, 0.0, 1.0);
}

View File

@ -0,0 +1,38 @@
#version 450 core
// GL_EXT_shader_16bit_storage doesn't support input/output.
#extension GL_EXT_shader_8bit_storage : require
#extension GL_AMD_gpu_shader_int16 : require
#extension GL_AMD_gpu_shader_half_float : require
layout(location = 0) in int16_t foo;
layout(location = 1) in uint16_t bar;
layout(location = 2) in float16_t baz;
layout(binding = 0) uniform block {
i16vec2 a;
u16vec2 b;
i8vec2 c;
u8vec2 d;
f16vec2 e;
};
layout(binding = 1) readonly buffer storage {
i16vec3 f;
u16vec3 g;
i8vec3 h;
u8vec3 i;
f16vec3 j;
};
layout(location = 0) out i16vec4 p;
layout(location = 1) out u16vec4 q;
layout(location = 2) out f16vec4 r;
void main() {
p = i16vec4(int(foo) + ivec4(ivec2(a), ivec2(c)) - ivec4(ivec3(f) / ivec3(h), 1));
q = u16vec4(uint(bar) + uvec4(uvec2(b), uvec2(d)) - uvec4(uvec3(g) / uvec3(i), 1));
r = f16vec4(float(baz) + vec4(vec2(e), 0, 1) - vec4(vec3(j), 1));
gl_Position = vec4(0, 0, 0, 1);
}

View File

@ -0,0 +1,46 @@
#version 450 core
// GL_EXT_shader_16bit_storage doesn't support input/output.
#extension GL_EXT_shader_8bit_storage : require
#extension GL_AMD_gpu_shader_int16 : require
#extension GL_AMD_gpu_shader_half_float : require
layout(location = 0, component = 0) in int16_t foo;
layout(location = 0, component = 1) in uint16_t bar;
layout(location = 1) in float16_t baz;
layout(binding = 0) uniform block {
i16vec2 a;
u16vec2 b;
i8vec2 c;
u8vec2 d;
f16vec2 e;
};
layout(binding = 1) readonly buffer storage {
i16vec3 f;
u16vec3 g;
i8vec3 h;
u8vec3 i;
f16vec3 j;
};
layout(push_constant) uniform pushconst {
i16vec4 k;
u16vec4 l;
i8vec4 m;
u8vec4 n;
f16vec4 o;
};
layout(location = 0) out i16vec4 p;
layout(location = 1) out u16vec4 q;
layout(location = 2) out f16vec4 r;
void main() {
p = i16vec4(int(foo) + ivec4(ivec2(a), ivec2(c)) - ivec4(ivec3(f) / ivec3(h), 1) + ivec4(k) + ivec4(m));
q = u16vec4(uint(bar) + uvec4(uvec2(b), uvec2(d)) - uvec4(uvec3(g) / uvec3(i), 1) + uvec4(l) + uvec4(n));
r = f16vec4(float(baz) + vec4(vec2(e), 0, 1) - vec4(vec3(j), 1) + vec4(o));
gl_Position = vec4(0, 0, 0, 1);
}

View File

@ -366,6 +366,14 @@ void CompilerGLSL::find_static_extensions()
if (type.basetype == SPIRType::Half)
require_extension_internal("GL_AMD_gpu_shader_half_float");
if (type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt)
{
if (type.width == 8)
require_extension_internal("GL_EXT_shader_8bit_storage");
else if (type.width == 16)
require_extension_internal("GL_AMD_gpu_shader_int16");
}
}
}
@ -516,18 +524,34 @@ void CompilerGLSL::emit_header()
for (auto &ext : forced_extensions)
{
if (ext == "GL_AMD_gpu_shader_half_float" && !options.vulkan_semantics)
if (ext == "GL_AMD_gpu_shader_half_float")
{
// Special case, this extension has a potential fallback to another vendor extension in normal GLSL.
// GL_AMD_gpu_shader_half_float is a superset, so try that first.
statement("#if defined(GL_AMD_gpu_shader_half_float)");
statement("#extension GL_AMD_gpu_shader_half_float : require");
statement("#elif defined(GL_NV_gpu_shader5)");
statement("#extension GL_NV_gpu_shader5 : require");
if (!options.vulkan_semantics)
{
statement("#elif defined(GL_NV_gpu_shader5)");
statement("#extension GL_NV_gpu_shader5 : require");
}
statement("#elif defined(GL_EXT_shader_16bit_storage)");
statement("#extension GL_EXT_shader_16bit_storage : require");
statement("#else");
statement("#error No extension available for FP16.");
statement("#endif");
}
else if (ext == "GL_AMD_gpu_shader_int16")
{
// GL_AMD_gpu_shader_int16 is a superset, so try that first.
statement("#if defined(GL_AMD_gpu_shader_int16)");
statement("#extension GL_AMD_gpu_shader_int16 : require");
statement("#elif defined(GL_EXT_shader_16bit_storage)");
statement("#extension GL_EXT_shader_16bit_storage : require");
statement("#else");
statement("#error No extension available for Int16.");
statement("#endif");
}
else
statement("#extension ", ext, " : require");
}
@ -8834,9 +8858,23 @@ string CompilerGLSL::type_to_glsl(const SPIRType &type, uint32_t id)
case SPIRType::Boolean:
return "bool";
case SPIRType::Int:
return backend.basic_int_type;
switch (type.width) {
case 8:
return backend.basic_int8_type;
case 16:
return backend.basic_int16_type;
default:
return backend.basic_int_type;
}
case SPIRType::UInt:
return backend.basic_uint_type;
switch (type.width) {
case 8:
return backend.basic_uint8_type;
case 16:
return backend.basic_uint16_type;
default:
return backend.basic_uint_type;
}
case SPIRType::AtomicCounter:
return "atomic_uint";
case SPIRType::Half:
@ -8860,9 +8898,23 @@ string CompilerGLSL::type_to_glsl(const SPIRType &type, uint32_t id)
case SPIRType::Boolean:
return join("bvec", type.vecsize);
case SPIRType::Int:
return join("ivec", type.vecsize);
switch (type.width) {
case 8:
return join("i8vec", type.vecsize);
case 16:
return join("i16vec", type.vecsize);
default:
return join("ivec", type.vecsize);
}
case SPIRType::UInt:
return join("uvec", type.vecsize);
switch (type.width) {
case 8:
return join("u8vec", type.vecsize);
case 16:
return join("u16vec", type.vecsize);
default:
return join("uvec", type.vecsize);
}
case SPIRType::Half:
return join("f16vec", type.vecsize);
case SPIRType::Float:

View File

@ -361,6 +361,10 @@ protected:
bool long_long_literal_suffix = false;
const char *basic_int_type = "int";
const char *basic_uint_type = "uint";
const char *basic_int8_type = "int8_t";
const char *basic_uint8_type = "uint8_t";
const char *basic_int16_type = "int16_t";
const char *basic_uint16_type = "uint16_t";
const char *half_literal_suffix = "hf";
bool swizzle_is_function = false;
bool shared_is_implied = false;

View File

@ -392,6 +392,10 @@ string CompilerMSL::compile()
backend.uint32_t_literal_suffix = true;
backend.basic_int_type = "int";
backend.basic_uint_type = "uint";
backend.basic_int8_type = "char";
backend.basic_uint8_type = "uchar";
backend.basic_int16_type = "short";
backend.basic_uint16_type = "ushort";
backend.discard_literal = "discard_fragment()";
backend.swizzle_is_function = false;
backend.shared_is_implied = false;
@ -4269,10 +4273,10 @@ string CompilerMSL::type_to_glsl(const SPIRType &type, uint32_t id)
type_name = "char";
break;
case SPIRType::Int:
type_name = (type.width == 16 ? "short" : "int");
type_name = (type.width == 8 ? "char" : (type.width == 16 ? "short" : "int"));
break;
case SPIRType::UInt:
type_name = (type.width == 16 ? "ushort" : "uint");
type_name = (type.width == 8 ? "uchar" : (type.width == 16 ? "ushort" : "uint"));
break;
case SPIRType::Int64:
type_name = "long"; // Currently unsupported