Merge pull request #485 from KhronosGroup/fix-484

Add true FP16 support.
2018-03-07 11:59:12 +01:00 · 2018-03-07 11:59:12 +01:00 · 2e5d06d1ce
commit 2e5d06d1ce
parent 770ed25f29 bdabd0c73a
18 changed files with 1342 additions and 22 deletions
--- a/reference/opt/shaders-hlsl/frag/fp16.desktop.frag
+++ b/reference/opt/shaders-hlsl/frag/fp16.desktop.frag
@ -0,0 +1,45 @@
+static min16float4 v4;
+static min16float3 v3;
+static min16float v1;
+static min16float2 v2;
+static float o1;
+static float2 o2;
+static float3 o3;
+static float4 o4;
+
+struct SPIRV_Cross_Input
+{
+    min16float v1 : TEXCOORD0;
+    min16float2 v2 : TEXCOORD1;
+    min16float3 v3 : TEXCOORD2;
+    min16float4 v4 : TEXCOORD3;
+};
+
+struct SPIRV_Cross_Output
+{
+    float o1 : SV_Target0;
+    float2 o2 : SV_Target1;
+    float3 o3 : SV_Target2;
+    float4 o4 : SV_Target3;
+};
+
+void frag_main()
+{
+    min16float4 _324;
+    min16float4 _387 = modf(v4, _324);
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    v4 = stage_input.v4;
+    v3 = stage_input.v3;
+    v1 = stage_input.v1;
+    v2 = stage_input.v2;
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.o1 = o1;
+    stage_output.o2 = o2;
+    stage_output.o3 = o3;
+    stage_output.o4 = o4;
+    return stage_output;
+}
--- a/reference/opt/shaders-msl/frag/fp16.desktop.frag
+++ b/reference/opt/shaders-msl/frag/fp16.desktop.frag
@ -0,0 +1,22 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct ResType
+{
+    half4 _m0;
+    int4 _m1;
+};
+
+struct main0_in
+{
+    half4 v4 [[user(locn3)]];
+};
+
+fragment void main0(main0_in in [[stage_in]])
+{
+    half4 _491;
+    half4 _563 = modf(in.v4, _491);
+}
+
--- a/reference/opt/shaders-msl/vert/functions.vert
+++ b/reference/opt/shaders-msl/vert/functions.vert
@ -33,14 +33,14 @@ struct main0_out
 template<typename T>
 T radians(T d)
 {
-    return d * 0.01745329251;
+    return d * T(0.01745329251);
 }

 // Implementation of the GLSL degrees() function
 template<typename T>
 T degrees(T r)
 {
-    return r * 57.2957795131;
+    return r * T(57.2957795131);
 }

 // Implementation of the GLSL findLSB() function
--- a/reference/opt/shaders/desktop-only/frag/fp16.desktop.frag
+++ b/reference/opt/shaders/desktop-only/frag/fp16.desktop.frag
@ -0,0 +1,17 @@
+#version 450
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct ResType
+{
+    f16vec4 _m0;
+    ivec4 _m1;
+};
+
+layout(location = 3) in f16vec4 v4;
+
+void main()
+{
+    f16vec4 _505;
+    f16vec4 _577 = modf(v4, _505);
+}
+
--- a/reference/shaders-hlsl/frag/fp16.desktop.frag
+++ b/reference/shaders-hlsl/frag/fp16.desktop.frag
@ -0,0 +1,179 @@
+static min16float4 v4;
+static min16float3 v3;
+static min16float v1;
+static min16float2 v2;
+static float o1;
+static float2 o2;
+static float3 o3;
+static float4 o4;
+
+struct SPIRV_Cross_Input
+{
+    min16float v1 : TEXCOORD0;
+    min16float2 v2 : TEXCOORD1;
+    min16float3 v3 : TEXCOORD2;
+    min16float4 v4 : TEXCOORD3;
+};
+
+struct SPIRV_Cross_Output
+{
+    float o1 : SV_Target0;
+    float2 o2 : SV_Target1;
+    float3 o3 : SV_Target2;
+    float4 o4 : SV_Target3;
+};
+
+float mod(float x, float y)
+{
+    return x - y * floor(x / y);
+}
+
+float2 mod(float2 x, float2 y)
+{
+    return x - y * floor(x / y);
+}
+
+float3 mod(float3 x, float3 y)
+{
+    return x - y * floor(x / y);
+}
+
+float4 mod(float4 x, float4 y)
+{
+    return x - y * floor(x / y);
+}
+
+uint SPIRV_Cross_packFloat2x16(min16float2 value)
+{
+    uint2 Packed = f32tof16(value);
+    return Packed.x | (Packed.y << 16);
+}
+
+min16float2 SPIRV_Cross_unpackFloat2x16(uint value)
+{
+    return min16float2(f16tof32(uint2(value & 0xffff, value >> 16)));
+}
+
+void test_constants()
+{
+    min16float a = min16float(1.0);
+    min16float b = min16float(1.5);
+    min16float c = min16float(-1.5);
+    min16float d = min16float(0.0 / 0.0);
+    min16float e = min16float(1.0 / 0.0);
+    min16float f = min16float(-1.0 / 0.0);
+    min16float g = min16float(1014.0);
+    min16float h = min16float(9.5367431640625e-07);
+}
+
+min16float test_result()
+{
+    return min16float(1.0);
+}
+
+void test_conversions()
+{
+    min16float one = test_result();
+    int a = int(one);
+    uint b = uint(one);
+    bool c = one != min16float(0.0);
+    float d = float(one);
+    double e = double(one);
+    min16float a2 = min16float(a);
+    min16float b2 = min16float(b);
+    min16float c2 = min16float(c);
+    min16float d2 = min16float(d);
+    min16float e2 = min16float(e);
+}
+
+void test_builtins()
+{
+    min16float4 res = radians(v4);
+    res = degrees(v4);
+    res = sin(v4);
+    res = cos(v4);
+    res = tan(v4);
+    res = asin(v4);
+    res = atan2(v4, v3.xyzz);
+    res = atan(v4);
+    res = sinh(v4);
+    res = cosh(v4);
+    res = tanh(v4);
+    res = pow(v4, v4);
+    res = exp(v4);
+    res = log(v4);
+    res = exp2(v4);
+    res = log2(v4);
+    res = sqrt(v4);
+    res = rsqrt(v4);
+    res = abs(v4);
+    res = sign(v4);
+    res = floor(v4);
+    res = trunc(v4);
+    res = round(v4);
+    res = ceil(v4);
+    res = frac(v4);
+    res = mod(v4, v4);
+    min16float4 tmp;
+    min16float4 _144 = modf(v4, tmp);
+    res = _144;
+    res = min(v4, v4);
+    res = max(v4, v4);
+    res = clamp(v4, v4, v4);
+    res = lerp(v4, v4, v4);
+    bool4 _164 = bool4(v4.x < v4.x, v4.y < v4.y, v4.z < v4.z, v4.w < v4.w);
+    res = min16float4(_164.x ? v4.x : v4.x, _164.y ? v4.y : v4.y, _164.z ? v4.z : v4.z, _164.w ? v4.w : v4.w);
+    res = step(v4, v4);
+    res = smoothstep(v4, v4, v4);
+    bool4 btmp = isnan(v4);
+    btmp = isinf(v4);
+    res = mad(v4, v4, v4);
+    uint pack0 = SPIRV_Cross_packFloat2x16(v4.xy);
+    uint pack1 = SPIRV_Cross_packFloat2x16(v4.zw);
+    res = min16float4(SPIRV_Cross_unpackFloat2x16(pack0), SPIRV_Cross_unpackFloat2x16(pack1));
+    min16float t0 = length(v4);
+    t0 = distance(v4, v4);
+    t0 = dot(v4, v4);
+    min16float3 res3 = cross(v3, v3);
+    res = normalize(v4);
+    res = faceforward(v4, v4, v4);
+    res = reflect(v4, v4);
+    res = refract(v4, v4, v1);
+    btmp = bool4(v4.x < v4.x, v4.y < v4.y, v4.z < v4.z, v4.w < v4.w);
+    btmp = bool4(v4.x <= v4.x, v4.y <= v4.y, v4.z <= v4.z, v4.w <= v4.w);
+    btmp = bool4(v4.x > v4.x, v4.y > v4.y, v4.z > v4.z, v4.w > v4.w);
+    btmp = bool4(v4.x >= v4.x, v4.y >= v4.y, v4.z >= v4.z, v4.w >= v4.w);
+    btmp = bool4(v4.x == v4.x, v4.y == v4.y, v4.z == v4.z, v4.w == v4.w);
+    btmp = bool4(v4.x != v4.x, v4.y != v4.y, v4.z != v4.z, v4.w != v4.w);
+    res = ddx(v4);
+    res = ddy(v4);
+    res = ddx_fine(v4);
+    res = ddy_fine(v4);
+    res = ddx_coarse(v4);
+    res = ddy_coarse(v4);
+    res = fwidth(v4);
+    res = fwidth(v4);
+    res = fwidth(v4);
+}
+
+void frag_main()
+{
+    test_constants();
+    test_conversions();
+    test_builtins();
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    v4 = stage_input.v4;
+    v3 = stage_input.v3;
+    v1 = stage_input.v1;
+    v2 = stage_input.v2;
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.o1 = o1;
+    stage_output.o2 = o2;
+    stage_output.o3 = o3;
+    stage_output.o4 = o4;
+    return stage_output;
+}
--- a/reference/shaders-msl/frag/fp16.desktop.frag
+++ b/reference/shaders-msl/frag/fp16.desktop.frag
@ -0,0 +1,180 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct ResType
+{
+    half4 _m0;
+    int4 _m1;
+};
+
+struct main0_in
+{
+    half4 v4 [[user(locn3)]];
+    half3 v3 [[user(locn2)]];
+    half2 v2 [[user(locn1)]];
+    half v1 [[user(locn0)]];
+};
+
+// Implementation of the GLSL mod() function, which is slightly different than Metal fmod()
+template<typename Tx, typename Ty>
+Tx mod(Tx x, Ty y)
+{
+    return x - y * floor(x / y);
+}
+
+// Implementation of the GLSL radians() function
+template<typename T>
+T radians(T d)
+{
+    return d * T(0.01745329251);
+}
+
+// Implementation of the GLSL degrees() function
+template<typename T>
+T degrees(T r)
+{
+    return r * T(57.2957795131);
+}
+
+half2x2 test_mat2(thread const half2& a, thread const half2& b, thread const half2& c, thread const half2& d)
+{
+    return half2x2(half2(a), half2(b)) * half2x2(half2(c), half2(d));
+}
+
+half3x3 test_mat3(thread const half3& a, thread const half3& b, thread const half3& c, thread const half3& d, thread const half3& e, thread const half3& f)
+{
+    return half3x3(half3(a), half3(b), half3(c)) * half3x3(half3(d), half3(e), half3(f));
+}
+
+void test_constants()
+{
+    half a = 1.0h;
+    half b = 1.5h;
+    half c = -1.5h;
+    half d = (0.0h / 0.0h);
+    half e = (1.0h / 0.0h);
+    half f = (-1.0h / 0.0h);
+    half g = 1014.0h;
+    half h = 9.5367431640625e-07h;
+}
+
+half test_result()
+{
+    return 1.0h;
+}
+
+void test_conversions()
+{
+    half one = test_result();
+    int a = int(one);
+    uint b = uint(one);
+    bool c = one != 0.0h;
+    float d = float(one);
+    half a2 = half(a);
+    half b2 = half(b);
+    half c2 = half(c);
+    half d2 = half(d);
+}
+
+void test_builtins(thread half4& v4, thread half3& v3, thread half& v1)
+{
+    half4 res = radians(v4);
+    res = degrees(v4);
+    res = sin(v4);
+    res = cos(v4);
+    res = tan(v4);
+    res = asin(v4);
+    res = atan2(v4, v3.xyzz);
+    res = atan(v4);
+    res = sinh(v4);
+    res = cosh(v4);
+    res = tanh(v4);
+    res = asinh(v4);
+    res = acosh(v4);
+    res = atanh(v4);
+    res = pow(v4, v4);
+    res = exp(v4);
+    res = log(v4);
+    res = exp2(v4);
+    res = log2(v4);
+    res = sqrt(v4);
+    res = rsqrt(v4);
+    res = abs(v4);
+    res = sign(v4);
+    res = floor(v4);
+    res = trunc(v4);
+    res = round(v4);
+    res = rint(v4);
+    res = ceil(v4);
+    res = fract(v4);
+    res = mod(v4, v4);
+    half4 tmp;
+    half4 _223 = modf(v4, tmp);
+    res = _223;
+    res = min(v4, v4);
+    res = max(v4, v4);
+    res = clamp(v4, v4, v4);
+    res = mix(v4, v4, v4);
+    bool4 _243 = v4 < v4;
+    res = half4(_243.x ? v4.x : v4.x, _243.y ? v4.y : v4.y, _243.z ? v4.z : v4.z, _243.w ? v4.w : v4.w);
+    res = step(v4, v4);
+    res = smoothstep(v4, v4, v4);
+    bool4 btmp = isnan(v4);
+    btmp = isinf(v4);
+    res = fma(v4, v4, v4);
+    ResType _267;
+    _267._m0 = frexp(v4, _267._m1);
+    int4 itmp = _267._m1;
+    res = _267._m0;
+    res = ldexp(res, itmp);
+    uint pack0 = as_type<uint>(v4.xy);
+    uint pack1 = as_type<uint>(v4.zw);
+    res = half4(as_type<half2>(pack0), as_type<half2>(pack1));
+    half t0 = length(v4);
+    t0 = distance(v4, v4);
+    t0 = dot(v4, v4);
+    half3 res3 = cross(v3, v3);
+    res = normalize(v4);
+    res = faceforward(v4, v4, v4);
+    res = reflect(v4, v4);
+    res = refract(v4, v4, v1);
+    btmp = v4 < v4;
+    btmp = v4 <= v4;
+    btmp = v4 > v4;
+    btmp = v4 >= v4;
+    btmp = v4 == v4;
+    btmp = v4 != v4;
+    res = dfdx(v4);
+    res = dfdy(v4);
+    res = dfdx(v4);
+    res = dfdy(v4);
+    res = dfdx(v4);
+    res = dfdy(v4);
+    res = fwidth(v4);
+    res = fwidth(v4);
+    res = fwidth(v4);
+}
+
+fragment void main0(main0_in in [[stage_in]])
+{
+    half2 param = in.v2;
+    half2 param_1 = in.v2;
+    half2 param_2 = in.v3.xy;
+    half2 param_3 = in.v3.xy;
+    half2x2 m0 = test_mat2(param, param_1, param_2, param_3);
+    half3 param_4 = in.v3;
+    half3 param_5 = in.v3;
+    half3 param_6 = in.v3;
+    half3 param_7 = in.v4.xyz;
+    half3 param_8 = in.v4.xyz;
+    half3 param_9 = in.v4.yzw;
+    half3x3 m1 = test_mat3(param_4, param_5, param_6, param_7, param_8, param_9);
+    test_constants();
+    test_conversions();
+    test_builtins(in.v4, in.v3, in.v1);
+}
+
--- a/reference/shaders-msl/vert/functions.vert
+++ b/reference/shaders-msl/vert/functions.vert
@ -33,14 +33,14 @@ struct main0_out
 template<typename T>
 T radians(T d)
 {
-    return d * 0.01745329251;
+    return d * T(0.01745329251);
 }

 // Implementation of the GLSL degrees() function
 template<typename T>
 T degrees(T r)
 {
-    return r * 57.2957795131;
+    return r * T(57.2957795131);
 }

 // Implementation of the GLSL findLSB() function
--- a/reference/shaders/desktop-only/frag/fp16.desktop.frag
+++ b/reference/shaders/desktop-only/frag/fp16.desktop.frag
@ -0,0 +1,153 @@
+#version 450
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct ResType
+{
+    f16vec4 _m0;
+    ivec4 _m1;
+};
+
+layout(location = 3) in f16vec4 v4;
+layout(location = 2) in f16vec3 v3;
+layout(location = 0) in float16_t v1;
+layout(location = 1) in f16vec2 v2;
+
+f16mat2 test_mat2(f16vec2 a, f16vec2 b, f16vec2 c, f16vec2 d)
+{
+    return f16mat2(f16vec2(a), f16vec2(b)) * f16mat2(f16vec2(c), f16vec2(d));
+}
+
+f16mat3 test_mat3(f16vec3 a, f16vec3 b, f16vec3 c, f16vec3 d, f16vec3 e, f16vec3 f)
+{
+    return f16mat3(f16vec3(a), f16vec3(b), f16vec3(c)) * f16mat3(f16vec3(d), f16vec3(e), f16vec3(f));
+}
+
+void test_constants()
+{
+    float16_t a = 1.0hf;
+    float16_t b = 1.5hf;
+    float16_t c = -1.5hf;
+    float16_t d = (0.0hf / 0.0hf);
+    float16_t e = (1.0hf / 0.0hf);
+    float16_t f = (-1.0hf / 0.0hf);
+    float16_t g = 1014.0hf;
+    float16_t h = 9.5367431640625e-07hf;
+}
+
+float16_t test_result()
+{
+    return 1.0hf;
+}
+
+void test_conversions()
+{
+    float16_t one = test_result();
+    int a = int(one);
+    uint b = uint(one);
+    bool c = one != 0.0hf;
+    float d = float(one);
+    double e = double(one);
+    float16_t a2 = float16_t(a);
+    float16_t b2 = float16_t(b);
+    float16_t c2 = float16_t(c);
+    float16_t d2 = float16_t(d);
+    float16_t e2 = float16_t(e);
+}
+
+void test_builtins()
+{
+    f16vec4 res = radians(v4);
+    res = degrees(v4);
+    res = sin(v4);
+    res = cos(v4);
+    res = tan(v4);
+    res = asin(v4);
+    res = atan(v4, v3.xyzz);
+    res = atan(v4);
+    res = sinh(v4);
+    res = cosh(v4);
+    res = tanh(v4);
+    res = asinh(v4);
+    res = acosh(v4);
+    res = atanh(v4);
+    res = pow(v4, v4);
+    res = exp(v4);
+    res = log(v4);
+    res = exp2(v4);
+    res = log2(v4);
+    res = sqrt(v4);
+    res = inversesqrt(v4);
+    res = abs(v4);
+    res = sign(v4);
+    res = floor(v4);
+    res = trunc(v4);
+    res = round(v4);
+    res = roundEven(v4);
+    res = ceil(v4);
+    res = fract(v4);
+    res = mod(v4, v4);
+    f16vec4 tmp;
+    f16vec4 _231 = modf(v4, tmp);
+    res = _231;
+    res = min(v4, v4);
+    res = max(v4, v4);
+    res = clamp(v4, v4, v4);
+    res = mix(v4, v4, v4);
+    res = mix(v4, v4, lessThan(v4, v4));
+    res = step(v4, v4);
+    res = smoothstep(v4, v4, v4);
+    bvec4 btmp = isnan(v4);
+    btmp = isinf(v4);
+    res = fma(v4, v4, v4);
+    ResType _275;
+    _275._m0 = frexp(v4, _275._m1);
+    ivec4 itmp = _275._m1;
+    res = _275._m0;
+    res = ldexp(res, itmp);
+    uint pack0 = packFloat2x16(v4.xy);
+    uint pack1 = packFloat2x16(v4.zw);
+    res = f16vec4(unpackFloat2x16(pack0), unpackFloat2x16(pack1));
+    float16_t t0 = length(v4);
+    t0 = distance(v4, v4);
+    t0 = dot(v4, v4);
+    f16vec3 res3 = cross(v3, v3);
+    res = normalize(v4);
+    res = faceforward(v4, v4, v4);
+    res = reflect(v4, v4);
+    res = refract(v4, v4, v1);
+    btmp = lessThan(v4, v4);
+    btmp = lessThanEqual(v4, v4);
+    btmp = greaterThan(v4, v4);
+    btmp = greaterThanEqual(v4, v4);
+    btmp = equal(v4, v4);
+    btmp = notEqual(v4, v4);
+    res = dFdx(v4);
+    res = dFdy(v4);
+    res = dFdxFine(v4);
+    res = dFdyFine(v4);
+    res = dFdxCoarse(v4);
+    res = dFdyCoarse(v4);
+    res = fwidth(v4);
+    res = fwidthFine(v4);
+    res = fwidthCoarse(v4);
+}
+
+void main()
+{
+    f16vec2 param = v2;
+    f16vec2 param_1 = v2;
+    f16vec2 param_2 = v3.xy;
+    f16vec2 param_3 = v3.xy;
+    f16mat2 m0 = test_mat2(param, param_1, param_2, param_3);
+    f16vec3 param_4 = v3;
+    f16vec3 param_5 = v3;
+    f16vec3 param_6 = v3;
+    f16vec3 param_7 = v4.xyz;
+    f16vec3 param_8 = v4.xyz;
+    f16vec3 param_9 = v4.yzw;
+    f16mat3 m1 = test_mat3(param_4, param_5, param_6, param_7, param_8, param_9);
+    test_constants();
+    test_conversions();
+    test_builtins();
+}
+
--- a/shaders-hlsl/frag/fp16.desktop.frag
+++ b/shaders-hlsl/frag/fp16.desktop.frag
@ -0,0 +1,156 @@
+#version 450
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(location = 0) in float16_t v1;
+layout(location = 1) in f16vec2 v2;
+layout(location = 2) in f16vec3 v3;
+layout(location = 3) in f16vec4 v4;
+
+layout(location = 0) out float o1;
+layout(location = 1) out vec2 o2;
+layout(location = 2) out vec3 o3;
+layout(location = 3) out vec4 o4;
+
+#if 0
+// Doesn't work on glslang yet.
+f16mat2 test_mat2(f16vec2 a, f16vec2 b, f16vec2 c, f16vec2 d)
+{
+	return f16mat2(a, b) * f16mat2(c, d);
+}
+
+f16mat3 test_mat3(f16vec3 a, f16vec3 b, f16vec3 c, f16vec3 d, f16vec3 e, f16vec3 f)
+{
+	return f16mat3(a, b, c) * f16mat3(d, e, f);
+}
+#endif
+
+void test_constants()
+{
+	float16_t a = 1.0hf;
+	float16_t b = 1.5hf;
+	float16_t c = -1.5hf; // Negatives
+	float16_t d = (0.0hf / 0.0hf); // NaN
+	float16_t e = (1.0hf / 0.0hf); // +Inf
+	float16_t f = (-1.0hf / 0.0hf); // -Inf
+	float16_t g = 1014.0hf; // Large.
+	float16_t h = 0.000001hf; // Denormal
+}
+
+float16_t test_result()
+{
+	return 1.0hf;
+}
+
+void test_conversions()
+{
+	float16_t one = test_result();
+	int a = int(one);
+	uint b = uint(one);
+	bool c = bool(one);
+	float d = float(one);
+	double e = double(one);
+	float16_t a2 = float16_t(a);
+	float16_t b2 = float16_t(b);
+	float16_t c2 = float16_t(c);
+	float16_t d2 = float16_t(d);
+	float16_t e2 = float16_t(e);
+}
+
+void test_builtins()
+{
+	f16vec4 res;
+	res = radians(v4);
+	res = degrees(v4);
+	res = sin(v4);
+	res = cos(v4);
+	res = tan(v4);
+	res = asin(v4);
+	res = atan(v4, v3.xyzz);
+	res = atan(v4);
+	res = sinh(v4);
+	res = cosh(v4);
+	res = tanh(v4);
+	//res = asinh(v4);
+	//res = acosh(v4);
+	//res = atanh(v4);
+	res = pow(v4, v4);
+	res = exp(v4);
+	res = log(v4);
+	res = exp2(v4);
+	res = log2(v4);
+	res = sqrt(v4);
+	res = inversesqrt(v4);
+	res = abs(v4);
+	res = sign(v4);
+	res = floor(v4);
+	res = trunc(v4);
+	res = round(v4);
+	//res = roundEven(v4);
+	res = ceil(v4);
+	res = fract(v4);
+	res = mod(v4, v4);
+	f16vec4 tmp;
+	res = modf(v4, tmp);
+	res = min(v4, v4);
+	res = max(v4, v4);
+	res = clamp(v4, v4, v4);
+	res = mix(v4, v4, v4);
+	res = mix(v4, v4, lessThan(v4, v4));
+	res = step(v4, v4);
+	res = smoothstep(v4, v4, v4);
+
+	bvec4 btmp = isnan(v4);
+	btmp = isinf(v4);
+	res = fma(v4, v4, v4);
+
+	//ivec4 itmp;
+	//res = frexp(v4, itmp);
+	//res = ldexp(res, itmp);
+
+	uint pack0 = packFloat2x16(v4.xy);
+	uint pack1 = packFloat2x16(v4.zw);
+	res = f16vec4(unpackFloat2x16(pack0), unpackFloat2x16(pack1));
+
+	float16_t t0 = length(v4);
+	t0 = distance(v4, v4);
+	t0 = dot(v4, v4);
+	f16vec3 res3 = cross(v3, v3);
+	res = normalize(v4);
+	res = faceforward(v4, v4, v4);
+	res = reflect(v4, v4);
+	res = refract(v4, v4, v1);
+
+	btmp = lessThan(v4, v4);
+	btmp = lessThanEqual(v4, v4);
+	btmp = greaterThan(v4, v4);
+	btmp = greaterThanEqual(v4, v4);
+	btmp = equal(v4, v4);
+	btmp = notEqual(v4, v4);
+
+	res = dFdx(v4);
+	res = dFdy(v4);
+	res = dFdxFine(v4);
+	res = dFdyFine(v4);
+	res = dFdxCoarse(v4);
+	res = dFdyCoarse(v4);
+	res = fwidth(v4);
+	res = fwidthFine(v4);
+	res = fwidthCoarse(v4);
+
+	//res = interpolateAtCentroid(v4);
+	//res = interpolateAtSample(v4, 0);
+	//res = interpolateAtOffset(v4, f16vec2(0.1hf));
+}
+
+void main()
+{
+	// Basic matrix tests.
+#if 0
+	f16mat2 m0 = test_mat2(v2, v2, v3.xy, v3.xy);
+	f16mat3 m1 = test_mat3(v3, v3, v3, v4.xyz, v4.xyz, v4.yzw);
+#endif
+
+	test_constants();
+	test_conversions();
+	test_builtins();
+}
--- a/shaders-msl/frag/fp16.desktop.frag
+++ b/shaders-msl/frag/fp16.desktop.frag
@ -0,0 +1,151 @@
+#version 450
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(location = 0) in float16_t v1;
+layout(location = 1) in f16vec2 v2;
+layout(location = 2) in f16vec3 v3;
+layout(location = 3) in f16vec4 v4;
+
+layout(location = 0) out float o1;
+layout(location = 1) out vec2 o2;
+layout(location = 2) out vec3 o3;
+layout(location = 3) out vec4 o4;
+
+f16mat2 test_mat2(f16vec2 a, f16vec2 b, f16vec2 c, f16vec2 d)
+{
+	return f16mat2(a, b) * f16mat2(c, d);
+}
+
+f16mat3 test_mat3(f16vec3 a, f16vec3 b, f16vec3 c, f16vec3 d, f16vec3 e, f16vec3 f)
+{
+	return f16mat3(a, b, c) * f16mat3(d, e, f);
+}
+
+void test_constants()
+{
+	float16_t a = 1.0hf;
+	float16_t b = 1.5hf;
+	float16_t c = -1.5hf; // Negatives
+	float16_t d = (0.0hf / 0.0hf); // NaN
+	float16_t e = (1.0hf / 0.0hf); // +Inf
+	float16_t f = (-1.0hf / 0.0hf); // -Inf
+	float16_t g = 1014.0hf; // Large.
+	float16_t h = 0.000001hf; // Denormal
+}
+
+float16_t test_result()
+{
+	return 1.0hf;
+}
+
+void test_conversions()
+{
+	float16_t one = test_result();
+	int a = int(one);
+	uint b = uint(one);
+	bool c = bool(one);
+	float d = float(one);
+	//double e = double(one);
+	float16_t a2 = float16_t(a);
+	float16_t b2 = float16_t(b);
+	float16_t c2 = float16_t(c);
+	float16_t d2 = float16_t(d);
+	//float16_t e2 = float16_t(e);
+}
+
+void test_builtins()
+{
+	f16vec4 res;
+	res = radians(v4);
+	res = degrees(v4);
+	res = sin(v4);
+	res = cos(v4);
+	res = tan(v4);
+	res = asin(v4);
+	res = atan(v4, v3.xyzz);
+	res = atan(v4);
+	res = sinh(v4);
+	res = cosh(v4);
+	res = tanh(v4);
+	res = asinh(v4);
+	res = acosh(v4);
+	res = atanh(v4);
+	res = pow(v4, v4);
+	res = exp(v4);
+	res = log(v4);
+	res = exp2(v4);
+	res = log2(v4);
+	res = sqrt(v4);
+	res = inversesqrt(v4);
+	res = abs(v4);
+	res = sign(v4);
+	res = floor(v4);
+	res = trunc(v4);
+	res = round(v4);
+	res = roundEven(v4);
+	res = ceil(v4);
+	res = fract(v4);
+	res = mod(v4, v4);
+	f16vec4 tmp;
+	res = modf(v4, tmp);
+	res = min(v4, v4);
+	res = max(v4, v4);
+	res = clamp(v4, v4, v4);
+	res = mix(v4, v4, v4);
+	res = mix(v4, v4, lessThan(v4, v4));
+	res = step(v4, v4);
+	res = smoothstep(v4, v4, v4);
+
+	bvec4 btmp = isnan(v4);
+	btmp = isinf(v4);
+	res = fma(v4, v4, v4);
+
+	ivec4 itmp;
+	res = frexp(v4, itmp);
+	res = ldexp(res, itmp);
+
+	uint pack0 = packFloat2x16(v4.xy);
+	uint pack1 = packFloat2x16(v4.zw);
+	res = f16vec4(unpackFloat2x16(pack0), unpackFloat2x16(pack1));
+
+	float16_t t0 = length(v4);
+	t0 = distance(v4, v4);
+	t0 = dot(v4, v4);
+	f16vec3 res3 = cross(v3, v3);
+	res = normalize(v4);
+	res = faceforward(v4, v4, v4);
+	res = reflect(v4, v4);
+	res = refract(v4, v4, v1);
+
+	btmp = lessThan(v4, v4);
+	btmp = lessThanEqual(v4, v4);
+	btmp = greaterThan(v4, v4);
+	btmp = greaterThanEqual(v4, v4);
+	btmp = equal(v4, v4);
+	btmp = notEqual(v4, v4);
+
+	res = dFdx(v4);
+	res = dFdy(v4);
+	res = dFdxFine(v4);
+	res = dFdyFine(v4);
+	res = dFdxCoarse(v4);
+	res = dFdyCoarse(v4);
+	res = fwidth(v4);
+	res = fwidthFine(v4);
+	res = fwidthCoarse(v4);
+
+	//res = interpolateAtCentroid(v4);
+	//res = interpolateAtSample(v4, 0);
+	//res = interpolateAtOffset(v4, f16vec2(0.1hf));
+}
+
+void main()
+{
+	// Basic matrix tests.
+	f16mat2 m0 = test_mat2(v2, v2, v3.xy, v3.xy);
+	f16mat3 m1 = test_mat3(v3, v3, v3, v4.xyz, v4.xyz, v4.yzw);
+
+	test_constants();
+	test_conversions();
+	test_builtins();
+}
--- a/shaders/desktop-only/frag/fp16.desktop.frag
+++ b/shaders/desktop-only/frag/fp16.desktop.frag
@ -0,0 +1,151 @@
+#version 450
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(location = 0) in float16_t v1;
+layout(location = 1) in f16vec2 v2;
+layout(location = 2) in f16vec3 v3;
+layout(location = 3) in f16vec4 v4;
+
+layout(location = 0) out float o1;
+layout(location = 1) out vec2 o2;
+layout(location = 2) out vec3 o3;
+layout(location = 3) out vec4 o4;
+
+f16mat2 test_mat2(f16vec2 a, f16vec2 b, f16vec2 c, f16vec2 d)
+{
+	return f16mat2(a, b) * f16mat2(c, d);
+}
+
+f16mat3 test_mat3(f16vec3 a, f16vec3 b, f16vec3 c, f16vec3 d, f16vec3 e, f16vec3 f)
+{
+	return f16mat3(a, b, c) * f16mat3(d, e, f);
+}
+
+void test_constants()
+{
+	float16_t a = 1.0hf;
+	float16_t b = 1.5hf;
+	float16_t c = -1.5hf; // Negatives
+	float16_t d = (0.0hf / 0.0hf); // NaN
+	float16_t e = (1.0hf / 0.0hf); // +Inf
+	float16_t f = (-1.0hf / 0.0hf); // -Inf
+	float16_t g = 1014.0hf; // Large.
+	float16_t h = 0.000001hf; // Denormal
+}
+
+float16_t test_result()
+{
+	return 1.0hf;
+}
+
+void test_conversions()
+{
+	float16_t one = test_result();
+	int a = int(one);
+	uint b = uint(one);
+	bool c = bool(one);
+	float d = float(one);
+	double e = double(one);
+	float16_t a2 = float16_t(a);
+	float16_t b2 = float16_t(b);
+	float16_t c2 = float16_t(c);
+	float16_t d2 = float16_t(d);
+	float16_t e2 = float16_t(e);
+}
+
+void test_builtins()
+{
+	f16vec4 res;
+	res = radians(v4);
+	res = degrees(v4);
+	res = sin(v4);
+	res = cos(v4);
+	res = tan(v4);
+	res = asin(v4);
+	res = atan(v4, v3.xyzz);
+	res = atan(v4);
+	res = sinh(v4);
+	res = cosh(v4);
+	res = tanh(v4);
+	res = asinh(v4);
+	res = acosh(v4);
+	res = atanh(v4);
+	res = pow(v4, v4);
+	res = exp(v4);
+	res = log(v4);
+	res = exp2(v4);
+	res = log2(v4);
+	res = sqrt(v4);
+	res = inversesqrt(v4);
+	res = abs(v4);
+	res = sign(v4);
+	res = floor(v4);
+	res = trunc(v4);
+	res = round(v4);
+	res = roundEven(v4);
+	res = ceil(v4);
+	res = fract(v4);
+	res = mod(v4, v4);
+	f16vec4 tmp;
+	res = modf(v4, tmp);
+	res = min(v4, v4);
+	res = max(v4, v4);
+	res = clamp(v4, v4, v4);
+	res = mix(v4, v4, v4);
+	res = mix(v4, v4, lessThan(v4, v4));
+	res = step(v4, v4);
+	res = smoothstep(v4, v4, v4);
+
+	bvec4 btmp = isnan(v4);
+	btmp = isinf(v4);
+	res = fma(v4, v4, v4);
+
+	ivec4 itmp;
+	res = frexp(v4, itmp);
+	res = ldexp(res, itmp);
+
+	uint pack0 = packFloat2x16(v4.xy);
+	uint pack1 = packFloat2x16(v4.zw);
+	res = f16vec4(unpackFloat2x16(pack0), unpackFloat2x16(pack1));
+
+	float16_t t0 = length(v4);
+	t0 = distance(v4, v4);
+	t0 = dot(v4, v4);
+	f16vec3 res3 = cross(v3, v3);
+	res = normalize(v4);
+	res = faceforward(v4, v4, v4);
+	res = reflect(v4, v4);
+	res = refract(v4, v4, v1);
+
+	btmp = lessThan(v4, v4);
+	btmp = lessThanEqual(v4, v4);
+	btmp = greaterThan(v4, v4);
+	btmp = greaterThanEqual(v4, v4);
+	btmp = equal(v4, v4);
+	btmp = notEqual(v4, v4);
+
+	res = dFdx(v4);
+	res = dFdy(v4);
+	res = dFdxFine(v4);
+	res = dFdyFine(v4);
+	res = dFdxCoarse(v4);
+	res = dFdyCoarse(v4);
+	res = fwidth(v4);
+	res = fwidthFine(v4);
+	res = fwidthCoarse(v4);
+
+	//res = interpolateAtCentroid(v4);
+	//res = interpolateAtSample(v4, 0);
+	//res = interpolateAtOffset(v4, f16vec2(0.1hf));
+}
+
+void main()
+{
+	// Basic matrix tests.
+	f16mat2 m0 = test_mat2(v2, v2, v3.xy, v3.xy);
+	f16mat3 m1 = test_mat3(v3, v3, v3, v4.xyz, v4.xyz, v4.yzw);
+
+	test_constants();
+	test_conversions();
+	test_builtins();
+}
--- a/spirv_common.hpp
+++ b/spirv_common.hpp
@ -263,6 +263,7 @@ struct SPIRType : IVariant
 		Int64,
 		UInt64,
 		AtomicCounter,
+		Half,
 		Float,
 		Double,
 		Struct,
@ -751,6 +752,57 @@ struct SPIRConstant : IVariant
 		}
 	};

+	static inline float f16_to_f32(uint16_t u16_value)
+	{
+		// Based on the GLM implementation.
+		int s = (u16_value >> 15) & 0x1;
+		int e = (u16_value >> 10) & 0x1f;
+		int m = (u16_value >> 0) & 0x3ff;
+
+		union {
+			float f32;
+			uint32_t u32;
+		} u;
+
+		if (e == 0)
+		{
+			if (m == 0)
+			{
+				u.u32 = uint32_t(s) << 31;
+				return u.f32;
+			}
+			else
+			{
+				while ((m & 0x400) == 0)
+				{
+					m <<= 1;
+					e--;
+				}
+
+				e++;
+				m &= ~0x400;
+			}
+		}
+		else if (e == 31)
+		{
+			if (m == 0)
+			{
+				u.u32 = (uint32_t(s) << 31) | 0x7f800000u;
+				return u.f32;
+			}
+			else
+			{
+				u.u32 = (uint32_t(s) << 31) | 0x7f800000u | (m << 13);
+				return u.f32;
+			}
+		}
+
+		e += 127 - 15;
+		m <<= 13;
+		u.u32 = (uint32_t(s) << 31) | (e << 23) | m;
+		return u.f32;
+	}
+
 	inline uint32_t specialization_constant_id(uint32_t col, uint32_t row) const
 	{
 		return m.c[col].id[row];
@ -766,6 +818,16 @@ struct SPIRConstant : IVariant
 		return m.c[col].r[row].u32;
 	}

+	inline uint16_t scalar_u16(uint32_t col = 0, uint32_t row = 0) const
+	{
+		return uint16_t(m.c[col].r[row].u32 & 0xffffu);
+	}
+
+	inline float scalar_f16(uint32_t col = 0, uint32_t row = 0) const
+	{
+		return f16_to_f32(scalar_u16(col, row));
+	}
+
 	inline float scalar_f32(uint32_t col = 0, uint32_t row = 0) const
 	{
 		return m.c[col].r[row].f32;
@ -1054,6 +1116,11 @@ public:
 private:
 	uint64_t h = 0xcbf29ce484222325ull;
 };
+
+static inline bool type_is_floating_point(const SPIRType &type)
+{
+	return type.basetype == SPIRType::Half || type.basetype == SPIRType::Float || type.basetype == SPIRType::Double;
+}
 }

 #endif
--- a/spirv_cross.cpp
+++ b/spirv_cross.cpp
@ -812,8 +812,7 @@ bool Compiler::type_is_block_like(const SPIRType &type) const
 	if (type.basetype != SPIRType::Struct)
 		return false;

-	if (has_decoration(type.self, DecorationBlock) ||
-	    has_decoration(type.self, DecorationBufferBlock))
+	if (has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock))
 	{
 		return true;
 	}
@ -1565,7 +1564,14 @@ void Compiler::parse(const Instruction &instruction)
 		uint32_t id = ops[0];
 		uint32_t width = ops[1];
 		auto &type = set<SPIRType>(id);
-		type.basetype = width > 32 ? SPIRType::Double : SPIRType::Float;
+		if (width == 64)
+			type.basetype = SPIRType::Double;
+		else if (width == 32)
+			type.basetype = SPIRType::Float;
+		else if (width == 16)
+			type.basetype = SPIRType::Half;
+		else
+			SPIRV_CROSS_THROW("Unrecognized bit-width of floating point type.");
 		type.width = width;
 		break;
 	}
--- a/spirv_glsl.cpp
+++ b/spirv_glsl.cpp
@ -319,6 +319,9 @@ void CompilerGLSL::find_static_extensions()
 				if (!options.es)
 					require_extension("GL_ARB_gpu_shader_int64");
 			}
+
+			if (type.basetype == SPIRType::Half)
+				require_extension("GL_AMD_gpu_shader_half_float");
 		}
 	}

@ -866,15 +869,20 @@ uint32_t CompilerGLSL::type_to_packed_base_size(const SPIRType &type, BufferPack
 	case SPIRType::Int64:
 	case SPIRType::UInt64:
 		return 8;
-	default:
+	case SPIRType::Float:
+	case SPIRType::Int:
+	case SPIRType::UInt:
 		return 4;
+	case SPIRType::Half:
+		return 2;
+
+	default:
+		SPIRV_CROSS_THROW("Unrecognized type in type_to_packed_base_size.");
 	}
 }

 uint32_t CompilerGLSL::type_to_packed_alignment(const SPIRType &type, uint64_t flags, BufferPackingStandard packing)
 {
-	const uint32_t base_alignment = type_to_packed_base_size(type, packing);
-
 	if (!type.array.empty())
 	{
 		uint32_t minimum_alignment = 1;
@ -908,6 +916,8 @@ uint32_t CompilerGLSL::type_to_packed_alignment(const SPIRType &type, uint64_t f
 	}
 	else
 	{
+		const uint32_t base_alignment = type_to_packed_base_size(type, packing);
+
 		// Vectors are *not* aligned in HLSL, but there's an extra rule where vectors cannot straddle
 		// a vec4, this is handled outside since that part knows our current offset.
 		if (type.columns == 1 && packing_is_hlsl(packing))
@ -989,7 +999,6 @@ uint32_t CompilerGLSL::type_to_packed_size(const SPIRType &type, uint64_t flags,
 		       type_to_packed_array_stride(type, flags, packing);
 	}

-	const uint32_t base_alignment = type_to_packed_base_size(type, packing);
 	uint32_t size = 0;

 	if (type.basetype == SPIRType::Struct)
@ -1017,6 +1026,8 @@ uint32_t CompilerGLSL::type_to_packed_size(const SPIRType &type, uint64_t flags,
 	}
 	else
 	{
+		const uint32_t base_alignment = type_to_packed_base_size(type, packing);
+
 		if (type.columns == 1)
 			size = type.vecsize * base_alignment;

@ -2590,6 +2601,61 @@ string CompilerGLSL::constant_expression(const SPIRConstant &c)
 #pragma warning(disable : 4996)
 #endif

+string CompilerGLSL::convert_half_to_string(const SPIRConstant &c, uint32_t col, uint32_t row)
+{
+	string res;
+	float float_value = c.scalar_f16(col, row);
+
+	if (std::isnan(float_value) || std::isinf(float_value))
+	{
+		if (backend.half_literal_suffix)
+		{
+			// There is no uintBitsToFloat for 16-bit, so have to rely on legacy fallback here.
+			if (float_value == numeric_limits<float>::infinity())
+				res = join("(1.0", backend.half_literal_suffix, " / 0.0", backend.half_literal_suffix, ")");
+			else if (float_value == -numeric_limits<float>::infinity())
+				res = join("(-1.0", backend.half_literal_suffix, " / 0.0", backend.half_literal_suffix, ")");
+			else if (std::isnan(float_value))
+				res = join("(0.0", backend.half_literal_suffix, " / 0.0", backend.half_literal_suffix, ")");
+			else
+				SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant.");
+		}
+		else
+		{
+			SPIRType type;
+			type.basetype = SPIRType::Half;
+			type.vecsize = 1;
+			type.columns = 1;
+
+			if (float_value == numeric_limits<float>::infinity())
+				res = join(type_to_glsl(type), "(1.0 / 0.0)");
+			else if (float_value == -numeric_limits<float>::infinity())
+				res = join(type_to_glsl(type), "(-1.0 / 0.0)");
+			else if (std::isnan(float_value))
+				res = join(type_to_glsl(type), "(0.0 / 0.0)");
+			else
+				SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant.");
+		}
+	}
+	else
+	{
+		if (backend.half_literal_suffix)
+			res = convert_to_string(float_value) + backend.half_literal_suffix;
+		else
+		{
+			// In HLSL (FXC), it's important to cast the literals to half precision right away.
+			// There is no literal for it.
+			SPIRType type;
+			type.basetype = SPIRType::Half;
+			type.vecsize = 1;
+			type.columns = 1;
+			res = join(type_to_glsl(type), "(", convert_to_string(float_value), ")");
+		}
+	}
+
+	return res;
+}
+
 string CompilerGLSL::convert_float_to_string(const SPIRConstant &c, uint32_t col, uint32_t row)
 {
 	string res;
@ -2735,7 +2801,7 @@ string CompilerGLSL::constant_expression_vector(const SPIRConstant &c, uint32_t
 	bool splat = backend.use_constructor_splatting && c.vector_size() > 1;
 	bool swizzle_splat = backend.can_swizzle_scalar && c.vector_size() > 1;

-	if (type.basetype != SPIRType::Float && type.basetype != SPIRType::Double)
+	if (!type_is_floating_point(type))
 	{
 		// Cannot swizzle literal integers as a special case.
 		swizzle_splat = false;
@ -2789,6 +2855,28 @@ string CompilerGLSL::constant_expression_vector(const SPIRConstant &c, uint32_t

 	switch (type.basetype)
 	{
+	case SPIRType::Half:
+		if (splat || swizzle_splat)
+		{
+			res += convert_half_to_string(c, vector, 0);
+			if (swizzle_splat)
+				res = remap_swizzle(get<SPIRType>(c.constant_type), 1, res);
+		}
+		else
+		{
+			for (uint32_t i = 0; i < c.vector_size(); i++)
+			{
+				if (options.vulkan_semantics && c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
+					res += to_name(c.specialization_constant_id(vector, i));
+				else
+					res += convert_half_to_string(c, vector, i);
+
+				if (i + 1 < c.vector_size())
+					res += ", ";
+			}
+		}
+		break;
+
 	case SPIRType::Float:
 		if (splat || swizzle_splat)
 		{
@ -3333,6 +3421,10 @@ bool CompilerGLSL::to_trivial_mix_op(const SPIRType &type, string &op, uint32_t
 		ret = cleft->scalar() == 0 && cright->scalar() == 1;
 		break;

+	case SPIRType::Half:
+		ret = cleft->scalar_f16() == 0.0f && cright->scalar_f16() == 1.0f;
+		break;
+
 	case SPIRType::Float:
 		ret = cleft->scalar_f32() == 0.0f && cright->scalar_f32() == 1.0f;
 		break;
@ -4340,6 +4432,10 @@ string CompilerGLSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &i
 		return "uint64BitsToDouble";
 	else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::UInt && in_type.vecsize == 2)
 		return "packUint2x32";
+	else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UInt && in_type.vecsize == 1)
+		return "unpackFloat2x16";
+	else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Half && in_type.vecsize == 2)
+		return "packFloat2x16";
 	else
 		return "";
 }
@ -5738,8 +5834,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		bool splat = in_type.vecsize == 1 && in_type.columns == 1 && !composite && backend.use_constructor_splatting;
 		bool swizzle_splat = in_type.vecsize == 1 && in_type.columns == 1 && backend.can_swizzle_scalar;

-		if (ids[elems[0]].get_type() == TypeConstant &&
-		    (in_type.basetype != SPIRType::Float && in_type.basetype != SPIRType::Double))
+		if (ids[elems[0]].get_type() == TypeConstant && !type_is_floating_point(in_type))
 		{
 			// Cannot swizzle literal integers as a special case.
 			swizzle_splat = false;
@ -6450,6 +6545,26 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 			require_extension("GL_OES_standard_derivatives");
 		break;

+	case OpFwidthCoarse:
+		UFOP(fwidthCoarse);
+		if (options.es)
+		{
+			SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
+		}
+		if (options.version < 450)
+			require_extension("GL_ARB_derivative_control");
+		break;
+
+	case OpFwidthFine:
+		UFOP(fwidthFine);
+		if (options.es)
+		{
+			SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
+		}
+		if (options.version < 450)
+			require_extension("GL_ARB_derivative_control");
+		break;
+
 	// Bitfield
 	case OpBitFieldInsert:
 		// TODO: The signedness of inputs is strict in GLSL, but not in SPIR-V, bitcast if necessary.
@ -7747,6 +7862,8 @@ string CompilerGLSL::type_to_glsl(const SPIRType &type, uint32_t id)
 			return backend.basic_uint_type;
 		case SPIRType::AtomicCounter:
 			return "atomic_uint";
+		case SPIRType::Half:
+			return "float16_t";
 		case SPIRType::Float:
 			return "float";
 		case SPIRType::Double:
@ -7769,6 +7886,8 @@ string CompilerGLSL::type_to_glsl(const SPIRType &type, uint32_t id)
 			return join("ivec", type.vecsize);
 		case SPIRType::UInt:
 			return join("uvec", type.vecsize);
+		case SPIRType::Half:
+			return join("f16vec", type.vecsize);
 		case SPIRType::Float:
 			return join("vec", type.vecsize);
 		case SPIRType::Double:
@ -7791,6 +7910,8 @@ string CompilerGLSL::type_to_glsl(const SPIRType &type, uint32_t id)
 			return join("imat", type.vecsize);
 		case SPIRType::UInt:
 			return join("umat", type.vecsize);
+		case SPIRType::Half:
+			return join("f16mat", type.vecsize);
 		case SPIRType::Float:
 			return join("mat", type.vecsize);
 		case SPIRType::Double:
@ -7810,6 +7931,8 @@ string CompilerGLSL::type_to_glsl(const SPIRType &type, uint32_t id)
 			return join("imat", type.columns, "x", type.vecsize);
 		case SPIRType::UInt:
 			return join("umat", type.columns, "x", type.vecsize);
+		case SPIRType::Half:
+			return join("f16mat", type.columns, "x", type.vecsize);
 		case SPIRType::Float:
 			return join("mat", type.columns, "x", type.vecsize);
 		case SPIRType::Double:
--- a/spirv_glsl.hpp
+++ b/spirv_glsl.hpp
@ -319,6 +319,7 @@ protected:
 		bool long_long_literal_suffix = false;
 		const char *basic_int_type = "int";
 		const char *basic_uint_type = "uint";
+		const char *half_literal_suffix = "hf";
 		bool swizzle_is_function = false;
 		bool shared_is_implied = false;
 		bool flexible_member_array_supported = true;
@ -528,6 +529,7 @@ protected:
 	const Instruction *get_next_instruction_in_block(const Instruction &instr);
 	static uint32_t mask_relevant_memory_semantics(uint32_t semantics);

+	std::string convert_half_to_string(const SPIRConstant &value, uint32_t col, uint32_t row);
 	std::string convert_float_to_string(const SPIRConstant &value, uint32_t col, uint32_t row);
 	std::string convert_double_to_string(const SPIRConstant &value, uint32_t col, uint32_t row);

--- a/spirv_hlsl.cpp
+++ b/spirv_hlsl.cpp
@ -394,6 +394,8 @@ string CompilerHLSL::type_to_glsl(const SPIRType &type, uint32_t id)
 			return backend.basic_uint_type;
 		case SPIRType::AtomicCounter:
 			return "atomic_uint";
+		case SPIRType::Half:
+			return "min16float";
 		case SPIRType::Float:
 			return "float";
 		case SPIRType::Double:
@ -416,6 +418,8 @@ string CompilerHLSL::type_to_glsl(const SPIRType &type, uint32_t id)
 			return join("int", type.vecsize);
 		case SPIRType::UInt:
 			return join("uint", type.vecsize);
+		case SPIRType::Half:
+			return join("min16float", type.vecsize);
 		case SPIRType::Float:
 			return join("float", type.vecsize);
 		case SPIRType::Double:
@ -438,6 +442,8 @@ string CompilerHLSL::type_to_glsl(const SPIRType &type, uint32_t id)
 			return join("int", type.columns, "x", type.vecsize);
 		case SPIRType::UInt:
 			return join("uint", type.columns, "x", type.vecsize);
+		case SPIRType::Half:
+			return join("min16float", type.columns, "x", type.vecsize);
 		case SPIRType::Float:
 			return join("float", type.columns, "x", type.vecsize);
 		case SPIRType::Double:
@ -1427,6 +1433,23 @@ void CompilerHLSL::emit_resources()
 		statement("");
 	}

+	if (requires_explicit_fp16_packing)
+	{
+		// HLSL does not pack into a single word sadly :(
+		statement("uint SPIRV_Cross_packFloat2x16(min16float2 value)");
+		begin_scope();
+		statement("uint2 Packed = f32tof16(value);");
+		statement("return Packed.x | (Packed.y << 16);");
+		end_scope();
+		statement("");
+
+		statement("min16float2 SPIRV_Cross_unpackFloat2x16(uint value)");
+		begin_scope();
+		statement("return min16float2(f16tof32(uint2(value & 0xffff, value >> 16)));");
+		end_scope();
+		statement("");
+	}
+
 	// HLSL does not seem to have builtins for these operation, so roll them by hand ...
 	if (requires_unorm8_packing)
 	{
@ -2839,6 +2862,24 @@ string CompilerHLSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &i
 		return "asdouble";
 	else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::UInt64)
 		return "asdouble";
+	else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UInt && in_type.vecsize == 1)
+	{
+		if (!requires_explicit_fp16_packing)
+		{
+			requires_explicit_fp16_packing = true;
+			force_recompile = true;
+		}
+		return "SPIRV_Cross_unpackFloat2x16";
+	}
+	else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Half && in_type.vecsize == 2)
+	{
+		if (!requires_explicit_fp16_packing)
+		{
+			requires_explicit_fp16_packing = true;
+			force_recompile = true;
+		}
+		return "SPIRV_Cross_packFloat2x16";
+	}
 	else
 		return "";
 }
@ -2857,6 +2898,14 @@ void CompilerHLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
 		emit_unary_func_op(result_type, id, args[0], "frac");
 		break;

+	case GLSLstd450RoundEven:
+		SPIRV_CROSS_THROW("roundEven is not supported on HLSL.");
+
+	case GLSLstd450Acosh:
+	case GLSLstd450Asinh:
+	case GLSLstd450Atanh:
+		SPIRV_CROSS_THROW("Inverse hyperbolics are not supported on HLSL.");
+
 	case GLSLstd450FMix:
 	case GLSLstd450IMix:
 		emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "lerp");
@ -3574,6 +3623,12 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 		UFOP(ddy_coarse);
 		break;

+	case OpFwidth:
+	case OpFwidthCoarse:
+	case OpFwidthFine:
+		UFOP(fwidth);
+		break;
+
 	case OpLogicalNot:
 	{
 		auto result_type = ops[0];
@ -4166,6 +4221,7 @@ string CompilerHLSL::compile()
 	CompilerGLSL::options.vulkan_semantics = true;
 	backend.float_literal_suffix = true;
 	backend.double_literal_suffix = false;
+	backend.half_literal_suffix = nullptr;
 	backend.long_long_literal_suffix = true;
 	backend.uint32_t_literal_suffix = true;
 	backend.basic_int_type = "int";
--- a/spirv_hlsl.hpp
+++ b/spirv_hlsl.hpp
@ -155,6 +155,7 @@ private:
 	bool requires_op_fmod = false;
 	bool requires_textureProj = false;
 	bool requires_fp16_packing = false;
+	bool requires_explicit_fp16_packing = false;
 	bool requires_unorm8_packing = false;
 	bool requires_snorm8_packing = false;
 	bool requires_unorm16_packing = false;
--- a/spirv_msl.cpp
+++ b/spirv_msl.cpp
@ -25,7 +25,7 @@ using namespace spv;
 using namespace spirv_cross;
 using namespace std;

-static const uint32_t k_unknown_location = ~0;
+static const uint32_t k_unknown_location = ~0u;

 CompilerMSL::CompilerMSL(vector<uint32_t> spirv_, vector<MSLVertexAttr> *p_vtx_attrs,
                         vector<MSLResourceBinding> *p_res_bindings)
@ -114,6 +114,7 @@ string CompilerMSL::compile()
 	CompilerGLSL::options.es = false;
 	CompilerGLSL::options.version = 450;
 	backend.float_literal_suffix = false;
+	backend.half_literal_suffix = "h";
 	backend.uint32_t_literal_suffix = true;
 	backend.basic_int_type = "int";
 	backend.basic_uint_type = "uint";
@ -597,8 +598,7 @@ uint32_t CompilerMSL::add_interface_block(StorageClass storage)
 		else if (type.basetype == SPIRType::Boolean || type.basetype == SPIRType::Char ||
 		         type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt ||
 		         type.basetype == SPIRType::Int64 || type.basetype == SPIRType::UInt64 ||
-		         type.basetype == SPIRType::Float || type.basetype == SPIRType::Double ||
-		         type.basetype == SPIRType::Boolean)
+		         type_is_floating_point(type) || type.basetype == SPIRType::Boolean)
 		{
 			bool is_builtin = is_builtin_variable(*p_var);
 			BuiltIn builtin = BuiltIn(get_decoration(p_var->self, DecorationBuiltIn));
@ -996,7 +996,7 @@ void CompilerMSL::emit_custom_functions()
 			statement("template<typename T>");
 			statement("T radians(T d)");
 			begin_scope();
-			statement("return d * 0.01745329251;");
+			statement("return d * T(0.01745329251);");
 			end_scope();
 			statement("");
 			break;
@ -1006,7 +1006,7 @@ void CompilerMSL::emit_custom_functions()
 			statement("template<typename T>");
 			statement("T degrees(T r)");
 			begin_scope();
-			statement("return r * 57.2957795131;");
+			statement("return r * T(57.2957795131);");
 			end_scope();
 			statement("");
 			break;
@ -1472,6 +1472,12 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
 		UFOP(dfdy);
 		break;

+	case OpFwidth:
+	case OpFwidthCoarse:
+	case OpFwidthFine:
+		UFOP(fwidth);
+		break;
+
 	// Bitfield
 	case OpBitFieldInsert:
 		QFOP(insert_bits);
@ -2257,7 +2263,7 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool
 	bool forward = should_forward(coord);
 	auto coord_expr = to_enclosed_expression(coord);
 	auto &coord_type = expression_type(coord);
-	bool coord_is_fp = (coord_type.basetype == SPIRType::Float) || (coord_type.basetype == SPIRType::Double);
+	bool coord_is_fp = type_is_floating_point(coord_type);
 	bool is_cube_fetch = false;

 	string tex_coords = coord_expr;
@ -3311,8 +3317,11 @@ string CompilerMSL::type_to_glsl(const SPIRType &type, uint32_t id)
 	case SPIRType::UInt64:
 		type_name = "size_t";
 		break;
+	case SPIRType::Half:
+		type_name = "half";
+		break;
 	case SPIRType::Float:
-		type_name = (type.width == 16 ? "half" : "float");
+		type_name = "float";
 		break;
 	case SPIRType::Double:
 		type_name = "double"; // Currently unsupported
@ -3449,7 +3458,9 @@ string CompilerMSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &in
 	    (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::Double) ||
 	    (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::Double) ||
 	    (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::Int64) ||
-	    (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::UInt64))
+	    (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::UInt64) ||
+	    (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UInt) ||
+	    (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Half))
 		return "as_type<" + type_to_glsl(out_type) + ">";

 	return "";