Merge pull request #339 from KhronosGroup/fixup-pr-338

Fixups for PR #338
2017-11-23 00:08:44 +01:00 · 2017-11-23 00:08:44 +01:00 · 7f438371f9
commit 7f438371f9
parent ac607e5382 bcdff2d2e1
21 changed files with 607 additions and 9 deletions
--- a/.travis.yml
+++ b/.travis.yml
@ -10,7 +10,7 @@ dist: trusty

 # We check out glslang and SPIRV-Tools at specific revisions to avoid test output mismatches
 env:
-  - GLSLANG_REV=f0bc598dd7871689f25514b22a82f7455d762bef SPIRV_TOOLS_REV=40e9c60ffea56f45f388835e6945b01d4d8b022d
+  - GLSLANG_REV=698bf7547a96b6feb7291e8ddc0d5d16475dbae2 SPIRV_TOOLS_REV=40e9c60ffea56f45f388835e6945b01d4d8b022d

 before_script:
  - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update; brew install python3; fi
--- a/reference/shaders/amd/fragmentMaskFetch_subpassInput.vk.nocompat.invalid.frag
+++ b/reference/shaders/amd/fragmentMaskFetch_subpassInput.vk.nocompat.invalid.frag
@ -0,0 +1,11 @@
+#version 450
+#extension GL_AMD_shader_fragment_mask : require
+
+layout(binding = 0) uniform sampler2DMS t;
+
+void main()
+{
+    vec4 test2 = fragmentFetchAMD(t, 4u);
+    uint testi2 = fragmentMaskFetchAMD(t);
+}
+
--- a/reference/shaders/amd/fragmentMaskFetch_subpassInput.vk.nocompat.invalid.frag.vk
+++ b/reference/shaders/amd/fragmentMaskFetch_subpassInput.vk.nocompat.invalid.frag.vk
@ -0,0 +1,11 @@
+#version 450
+#extension GL_AMD_shader_fragment_mask : require
+
+layout(input_attachment_index = 0, set = 0, binding = 0) uniform subpassInputMS t;
+
+void main()
+{
+    vec4 test2 = fragmentFetchAMD(t, 4u);
+    uint testi2 = fragmentMaskFetchAMD(t);
+}
+
--- a/reference/shaders/amd/fs.invalid.frag
+++ b/reference/shaders/amd/fs.invalid.frag
@ -0,0 +1,15 @@
+#version 450
+#extension GL_AMD_shader_fragment_mask : require
+#extension GL_AMD_shader_explicit_vertex_parameter : require
+
+uniform sampler2DMS texture1;
+
+layout(location = 0) in vec4 vary;
+
+void main()
+{
+    uint testi1 = fragmentMaskFetchAMD(texture1, ivec2(0));
+    vec4 test1 = fragmentFetchAMD(texture1, ivec2(1), 2u);
+    vec4 pos = interpolateAtVertexAMD(vary, 0u);
+}
+
--- a/reference/shaders/amd/gcn_shader.comp
+++ b/reference/shaders/amd/gcn_shader.comp
@ -0,0 +1,12 @@
+#version 450
+#extension GL_ARB_gpu_shader_int64 : require
+#extension GL_AMD_gcn_shader : require
+layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
+
+void main()
+{
+    float cubeFace = cubeFaceIndexAMD(vec3(0.0));
+    vec2 cubeFaceCoord = cubeFaceCoordAMD(vec3(1.0));
+    uint64_t time = timeAMD();
+}
+
--- a/reference/shaders/amd/shader_ballot.comp
+++ b/reference/shaders/amd/shader_ballot.comp
@ -0,0 +1,32 @@
+#version 450
+#extension GL_ARB_gpu_shader_int64 : require
+#extension GL_ARB_shader_ballot : require
+#extension GL_AMD_shader_ballot : require
+layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
+
+layout(binding = 0, std430) buffer inputData
+{
+    float inputDataArray[];
+} _12;
+
+layout(binding = 1, std430) buffer outputData
+{
+    float outputDataArray[];
+} _74;
+
+void main()
+{
+    float thisLaneData = _12.inputDataArray[gl_LocalInvocationID.x];
+    bool laneActive = thisLaneData > 0.0;
+    uint thisLaneOutputSlot = mbcntAMD(packUint2x32(uvec2(unpackUint2x32(ballotARB(laneActive)).xy)));
+    int firstInvocation = readFirstInvocationARB(1);
+    int invocation = readInvocationARB(1, 0u);
+    vec3 swizzleInvocations = swizzleInvocationsAMD(vec3(0.0, 2.0, 1.0), uvec4(3u));
+    vec3 swizzelInvocationsMasked = swizzleInvocationsMaskedAMD(vec3(0.0, 2.0, 1.0), uvec3(2u));
+    vec3 writeInvocation = writeInvocationAMD(swizzleInvocations, swizzelInvocationsMasked, 0u);
+    if (laneActive)
+    {
+        _74.outputDataArray[thisLaneOutputSlot] = thisLaneData;
+    }
+}
+
--- a/reference/shaders/amd/shader_ballot_nonuniform_invocations.invalid.comp
+++ b/reference/shaders/amd/shader_ballot_nonuniform_invocations.invalid.comp
@ -0,0 +1,11 @@
+#version 450
+#extension GL_AMD_shader_ballot : require
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+void main()
+{
+    float addInvocations = addInvocationsNonUniformAMD(0.0);
+    int minInvocations = minInvocationsNonUniformAMD(1);
+    uint maxInvocations = uint(maxInvocationsNonUniformAMD(4));
+}
+
--- a/reference/shaders/amd/shader_group_vote.comp
+++ b/reference/shaders/amd/shader_group_vote.comp
@ -0,0 +1,18 @@
+#version 450
+#extension GL_ARB_shader_group_vote : require
+layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
+
+layout(binding = 0, std430) buffer inputData
+{
+    float inputDataArray[];
+} _12;
+
+void main()
+{
+    float thisLaneData = _12.inputDataArray[gl_LocalInvocationID.x];
+    bool laneActive = thisLaneData > 0.0;
+    bool allInvocations = allInvocationsARB(laneActive);
+    bool anyInvocations = anyInvocationARB(laneActive);
+    bool allInvocationsEqual = allInvocationsEqualARB(laneActive);
+}
+
--- a/reference/shaders/amd/shader_trinary_minmax.comp
+++ b/reference/shaders/amd/shader_trinary_minmax.comp
@ -0,0 +1,11 @@
+#version 450
+#extension GL_AMD_shader_trinary_minmax : require
+layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
+
+void main()
+{
+    int t11 = min3(0, 3, 2);
+    int t12 = max3(0, 3, 2);
+    int t13 = mid3(0, 3, 2);
+}
+
--- a/shaders/amd/fragmentMaskFetch_subpassInput.vk.nocompat.invalid.frag
+++ b/shaders/amd/fragmentMaskFetch_subpassInput.vk.nocompat.invalid.frag
@ -0,0 +1,10 @@
+#version 450
+#extension GL_AMD_shader_fragment_mask : require
+
+layout(input_attachment_index = 0, binding = 0) uniform subpassInputMS t;
+
+void main ()
+{
+    vec4 test2 = fragmentFetchAMD(t, 4);
+    uint testi2 = fragmentMaskFetchAMD(t);
+}
--- a/shaders/amd/fs.invalid.frag
+++ b/shaders/amd/fs.invalid.frag
@ -0,0 +1,14 @@
+#version 450
+#extension GL_AMD_shader_fragment_mask : require
+#extension GL_AMD_shader_explicit_vertex_parameter : require
+
+uniform sampler2DMS texture1;
+layout(location = 0) in vec4 vary;
+
+void main()
+{
+    uint testi1 = fragmentMaskFetchAMD(texture1, ivec2(0));
+    vec4 test1 = fragmentFetchAMD(texture1, ivec2(1), 2);
+
+    vec4 pos =  interpolateAtVertexAMD(vary, 0u);
+}
--- a/shaders/amd/gcn_shader.comp
+++ b/shaders/amd/gcn_shader.comp
@ -0,0 +1,13 @@
+#version 450
+#extension GL_AMD_gcn_shader : require
+#extension GL_ARB_gpu_shader_int64 : require
+
+layout (local_size_x = 64) in;
+
+void main ()
+{
+    float cubeFace = cubeFaceIndexAMD(vec3(0.0));
+    vec2 cubeFaceCoord = cubeFaceCoordAMD(vec3(1.0));
+
+    uint64_t time = timeAMD();
+}
--- a/shaders/amd/shader_ballot.comp
+++ b/shaders/amd/shader_ballot.comp
@ -0,0 +1,33 @@
+#version 450
+#extension GL_AMD_shader_ballot : require
+#extension GL_ARB_shader_ballot : require
+
+layout (local_size_x = 64) in;
+layout (std430, binding = 0) buffer inputData
+{
+    float inputDataArray[];
+};
+
+layout (std430, binding = 1) buffer outputData
+{
+    float outputDataArray[];
+};
+
+void main ()
+{
+    float thisLaneData = inputDataArray [gl_LocalInvocationID.x];
+    bool laneActive = (thisLaneData > 0);
+
+    uint thisLaneOutputSlot = mbcntAMD (ballotARB (laneActive));
+
+    int firstInvocation = readFirstInvocationARB(1);
+    int invocation = readInvocationARB(1, 0);
+
+    vec3 swizzleInvocations = swizzleInvocationsAMD(vec3(0.0, 2.0, 1.0), uvec4(3));
+    vec3 swizzelInvocationsMasked = swizzleInvocationsMaskedAMD(vec3(0.0, 2.0, 1.0), uvec3(2));
+    vec3 writeInvocation = writeInvocationAMD(swizzleInvocations, swizzelInvocationsMasked, 0);
+
+    if (laneActive) {
+        outputDataArray[thisLaneOutputSlot] = thisLaneData;
+    }
+}
--- a/shaders/amd/shader_ballot_nonuniform_invocations.invalid.comp
+++ b/shaders/amd/shader_ballot_nonuniform_invocations.invalid.comp
@ -0,0 +1,9 @@
+#version 450
+#extension GL_AMD_shader_ballot : require
+
+void main ()
+{
+    float addInvocations = addInvocationsNonUniformAMD(0.0);
+    int minInvocations = minInvocationsNonUniformAMD(1);
+    uint maxInvocations = maxInvocationsNonUniformAMD(4);
+}
--- a/shaders/amd/shader_group_vote.comp
+++ b/shaders/amd/shader_group_vote.comp
@ -0,0 +1,18 @@
+#version 450
+#extension GL_ARB_shader_group_vote : require
+
+layout (local_size_x = 64) in;
+layout (std430, binding = 0) buffer inputData
+{
+    float inputDataArray[];
+};
+
+void main ()
+{
+    float thisLaneData = inputDataArray [gl_LocalInvocationID.x];
+    bool laneActive = (thisLaneData > 0);
+
+    bool allInvocations = allInvocationsARB(laneActive);
+    bool anyInvocations = anyInvocationARB(laneActive);
+    bool allInvocationsEqual = allInvocationsEqualARB(laneActive);
+}
--- a/shaders/amd/shader_trinary_minmax.comp
+++ b/shaders/amd/shader_trinary_minmax.comp
@ -0,0 +1,11 @@
+#version 450
+#extension GL_AMD_shader_trinary_minmax : require
+
+layout (local_size_x = 64) in;
+
+void main ()
+{
+    int t11 = min3(0, 3, 2);
+    int t12 = max3(0, 3, 2);
+    int t13 = mid3(0, 3, 2);
+}
--- a/spirv.hpp
+++ b/spirv.hpp
@ -938,6 +938,16 @@ enum Op {
    OpSubgroupAnyKHR = 4429,
    OpSubgroupAllEqualKHR = 4430,
    OpSubgroupReadInvocationKHR = 4432,
+    OpGroupIAddNonUniformAMD = 5000,
+    OpGroupFAddNonUniformAMD = 5001,
+    OpGroupFMinNonUniformAMD = 5002,
+    OpGroupUMinNonUniformAMD = 5003,
+    OpGroupSMinNonUniformAMD = 5004,
+    OpGroupFMaxNonUniformAMD = 5005,
+    OpGroupUMaxNonUniformAMD = 5006,
+    OpGroupSMaxNonUniformAMD = 5007,
+    OpFragmentMaskFetchAMD = 5011,
+    OpFragmentFetchAMD = 5012,
    OpMax = 0x7fffffff,
 };

--- a/spirv_common.hpp
+++ b/spirv_common.hpp
@ -326,7 +326,11 @@ struct SPIRExtension : IVariant
 	enum Extension
 	{
 		Unsupported,
-		GLSL
+		GLSL,
+		SPV_AMD_shader_ballot,
+		SPV_AMD_shader_explicit_vertex_parameter,
+		SPV_AMD_shader_trinary_minmax,
+		SPV_AMD_gcn_shader
 	};

 	SPIRExtension(Extension ext_)
--- a/spirv_cross.cpp
+++ b/spirv_cross.cpp
@ -573,6 +573,37 @@ bool Compiler::InterfaceVariableAccessHandler::handle(Op opcode, const uint32_t
 		break;
 	}

+	case OpExtInst:
+	{
+		if (length < 5)
+			return false;
+		uint32_t extension_set = args[2];
+		if (compiler.get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_AMD_shader_explicit_vertex_parameter)
+		{
+			enum AMDShaderExplicitVertexParameter
+			{
+				InterpolateAtVertexAMD = 1
+			};
+
+			auto op = static_cast<AMDShaderExplicitVertexParameter>(args[3]);
+
+			switch (op)
+			{
+			case InterpolateAtVertexAMD:
+			{
+				auto *var = compiler.maybe_get<SPIRVariable>(args[4]);
+				if (var && storage_class_is_interface(var->storage))
+					variables.insert(args[4]);
+				break;
+			}
+
+			default:
+				break;
+			}
+		}
+		break;
+	}
+
 	case OpAccessChain:
 	case OpInBoundsAccessChain:
 	case OpLoad:
@ -1324,6 +1355,14 @@ void Compiler::parse(const Instruction &instruction)
 		auto ext = extract_string(spirv, instruction.offset + 1);
 		if (ext == "GLSL.std.450")
 			set<SPIRExtension>(id, SPIRExtension::GLSL);
+		else if (ext == "SPV_AMD_shader_ballot")
+			set<SPIRExtension>(id, SPIRExtension::SPV_AMD_shader_ballot);
+		else if (ext == "SPV_AMD_shader_explicit_vertex_parameter")
+			set<SPIRExtension>(id, SPIRExtension::SPV_AMD_shader_explicit_vertex_parameter);
+		else if (ext == "SPV_AMD_shader_trinary_minmax")
+			set<SPIRExtension>(id, SPIRExtension::SPV_AMD_shader_trinary_minmax);
+		else if (ext == "SPV_AMD_gcn_shader")
+			set<SPIRExtension>(id, SPIRExtension::SPV_AMD_gcn_shader);
 		else
 			set<SPIRExtension>(id, SPIRExtension::Unsupported);

--- a/spirv_glsl.cpp
+++ b/spirv_glsl.cpp
@ -1515,8 +1515,7 @@ void CompilerGLSL::emit_interface_block(const SPIRVariable &var)
 			if (options.es && options.version < 320)
 			{
 				// Geometry and tessellation extensions imply this extension.
-				if (!forced_extensions.count("GL_EXT_geometry_shader") &&
-				    !forced_extensions.count("GL_EXT_tessellation_shader"))
+				if (!has_extension("GL_EXT_geometry_shader") && !has_extension("GL_EXT_tessellation_shader"))
 					require_extension("GL_EXT_shader_io_blocks");
 			}

@ -3868,6 +3867,150 @@ void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
 	}
 }

+void CompilerGLSL::emit_spv_amd_shader_ballot_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args,
+                                                 uint32_t)
+{
+	require_extension("GL_AMD_shader_ballot");
+
+	enum AMDShaderBallot
+	{
+		SwizzleInvocationsAMD = 1,
+		SwizzleInvocationsMaskedAMD = 2,
+		WriteInvocationAMD = 3,
+		MbcntAMD = 4
+	};
+
+	auto op = static_cast<AMDShaderBallot>(eop);
+
+	switch (op)
+	{
+	case SwizzleInvocationsAMD:
+		emit_binary_func_op(result_type, id, args[0], args[1], "swizzleInvocationsAMD");
+		break;
+
+	case SwizzleInvocationsMaskedAMD:
+		emit_binary_func_op(result_type, id, args[0], args[1], "swizzleInvocationsMaskedAMD");
+		break;
+
+	case WriteInvocationAMD:
+		emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "writeInvocationAMD");
+		break;
+
+	case MbcntAMD:
+		emit_unary_func_op(result_type, id, args[0], "mbcntAMD");
+		break;
+
+	default:
+		statement("// unimplemented SPV AMD shader ballot op ", eop);
+		break;
+	}
+}
+
+void CompilerGLSL::emit_spv_amd_shader_explicit_vertex_parameter_op(uint32_t result_type, uint32_t id, uint32_t eop,
+                                                                    const uint32_t *args, uint32_t)
+{
+	require_extension("GL_AMD_shader_explicit_vertex_parameter");
+
+	enum AMDShaderExplicitVertexParameter
+	{
+		InterpolateAtVertexAMD = 1
+	};
+
+	auto op = static_cast<AMDShaderExplicitVertexParameter>(eop);
+
+	switch (op)
+	{
+	case InterpolateAtVertexAMD:
+		emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtVertexAMD");
+		break;
+
+	default:
+		statement("// unimplemented SPV AMD shader explicit vertex parameter op ", eop);
+		break;
+	}
+}
+
+void CompilerGLSL::emit_spv_amd_shader_trinary_minmax_op(uint32_t result_type, uint32_t id, uint32_t eop,
+                                                         const uint32_t *args, uint32_t)
+{
+	require_extension("GL_AMD_shader_trinary_minmax");
+
+	enum AMDShaderTrinaryMinMax
+	{
+		FMin3AMD = 1,
+		UMin3AMD = 2,
+		SMin3AMD = 3,
+		FMax3AMD = 4,
+		UMax3AMD = 5,
+		SMax3AMD = 6,
+		FMid3AMD = 7,
+		UMid3AMD = 8,
+		SMid3AMD = 9
+	};
+
+	auto op = static_cast<AMDShaderTrinaryMinMax>(eop);
+
+	switch (op)
+	{
+	case FMin3AMD:
+	case UMin3AMD:
+	case SMin3AMD:
+		emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "min3");
+		break;
+
+	case FMax3AMD:
+	case UMax3AMD:
+	case SMax3AMD:
+		emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "max3");
+		break;
+
+	case FMid3AMD:
+	case UMid3AMD:
+	case SMid3AMD:
+		emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "mid3");
+		break;
+
+	default:
+		statement("// unimplemented SPV AMD shader trinary minmax op ", eop);
+		break;
+	}
+}
+
+void CompilerGLSL::emit_spv_amd_gcn_shader_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args,
+                                              uint32_t)
+{
+	require_extension("GL_AMD_gcn_shader");
+
+	enum AMDGCNShader
+	{
+		CubeFaceIndexAMD = 1,
+		CubeFaceCoordAMD = 2,
+		TimeAMD = 3
+	};
+
+	auto op = static_cast<AMDGCNShader>(eop);
+
+	switch (op)
+	{
+	case CubeFaceIndexAMD:
+		emit_unary_func_op(result_type, id, args[0], "cubeFaceIndexAMD");
+		break;
+	case CubeFaceCoordAMD:
+		emit_unary_func_op(result_type, id, args[0], "cubeFaceCoordAMD");
+		break;
+	case TimeAMD:
+	{
+		string expr = "timeAMD()";
+		emit_op(result_type, id, expr, true);
+		break;
+	}
+
+	default:
+		statement("// unimplemented SPV AMD gcn shader op ", eop);
+		break;
+	}
+}
+
 string CompilerGLSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &in_type)
 {
 	if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Int)
@ -3894,6 +4037,8 @@ string CompilerGLSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &i
 		return "int64BitsToDouble";
 	else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::UInt64)
 		return "uint64BitsToDouble";
+	else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::UInt && in_type.vecsize == 2)
+		return "packUint2x32";
 	else
 		return "";
 }
@ -6372,13 +6517,168 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 	case OpExtInst:
 	{
 		uint32_t extension_set = ops[2];
-		if (get<SPIRExtension>(extension_set).ext != SPIRExtension::GLSL)
+
+		if (get<SPIRExtension>(extension_set).ext == SPIRExtension::GLSL)
+		{
+			emit_glsl_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
+		}
+		else if (get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_AMD_shader_ballot)
+		{
+			emit_spv_amd_shader_ballot_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
+		}
+		else if (get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_AMD_shader_explicit_vertex_parameter)
+		{
+			emit_spv_amd_shader_explicit_vertex_parameter_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
+		}
+		else if (get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_AMD_shader_trinary_minmax)
+		{
+			emit_spv_amd_shader_trinary_minmax_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
+		}
+		else if (get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_AMD_gcn_shader)
+		{
+			emit_spv_amd_gcn_shader_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
+		}
+		else
 		{
 			statement("// unimplemented ext op ", instruction.op);
 			break;
 		}

-		emit_glsl_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
+		break;
+	}
+
+	case OpSubgroupBallotKHR:
+	{
+		uint32_t result_type = ops[0];
+		uint32_t id = ops[1];
+		string expr;
+		expr = join("unpackUint2x32(ballotARB(" + to_expression(ops[2]) + "))");
+		emit_op(result_type, id, expr, true);
+
+		require_extension("GL_ARB_shader_ballot");
+		break;
+	}
+
+	case OpSubgroupFirstInvocationKHR:
+	{
+		uint32_t result_type = ops[0];
+		uint32_t id = ops[1];
+		emit_unary_func_op(result_type, id, ops[2], "readFirstInvocationARB");
+
+		require_extension("GL_ARB_shader_ballot");
+		break;
+	}
+
+	case OpSubgroupReadInvocationKHR:
+	{
+		uint32_t result_type = ops[0];
+		uint32_t id = ops[1];
+		emit_binary_func_op(result_type, id, ops[2], ops[3], "readInvocationARB");
+
+		require_extension("GL_ARB_shader_ballot");
+		break;
+	}
+
+	case OpSubgroupAllKHR:
+	{
+		uint32_t result_type = ops[0];
+		uint32_t id = ops[1];
+		emit_unary_func_op(result_type, id, ops[2], "allInvocationsARB");
+
+		require_extension("GL_ARB_shader_group_vote");
+		break;
+	}
+
+	case OpSubgroupAnyKHR:
+	{
+		uint32_t result_type = ops[0];
+		uint32_t id = ops[1];
+		emit_unary_func_op(result_type, id, ops[2], "anyInvocationARB");
+
+		require_extension("GL_ARB_shader_group_vote");
+		break;
+	}
+
+	case OpSubgroupAllEqualKHR:
+	{
+		uint32_t result_type = ops[0];
+		uint32_t id = ops[1];
+		emit_unary_func_op(result_type, id, ops[2], "allInvocationsEqualARB");
+
+		require_extension("GL_ARB_shader_group_vote");
+		break;
+	}
+
+	case OpGroupIAddNonUniformAMD:
+	case OpGroupFAddNonUniformAMD:
+	{
+		uint32_t result_type = ops[0];
+		uint32_t id = ops[1];
+		emit_unary_func_op(result_type, id, ops[4], "addInvocationsNonUniformAMD");
+
+		require_extension("GL_AMD_shader_ballot");
+		break;
+	}
+
+	case OpGroupFMinNonUniformAMD:
+	case OpGroupUMinNonUniformAMD:
+	case OpGroupSMinNonUniformAMD:
+	{
+		uint32_t result_type = ops[0];
+		uint32_t id = ops[1];
+		emit_unary_func_op(result_type, id, ops[4], "minInvocationsNonUniformAMD");
+
+		require_extension("GL_AMD_shader_ballot");
+		break;
+	}
+
+	case OpGroupFMaxNonUniformAMD:
+	case OpGroupUMaxNonUniformAMD:
+	case OpGroupSMaxNonUniformAMD:
+	{
+		uint32_t result_type = ops[0];
+		uint32_t id = ops[1];
+		emit_unary_func_op(result_type, id, ops[4], "maxInvocationsNonUniformAMD");
+
+		require_extension("GL_AMD_shader_ballot");
+		break;
+	}
+
+	case OpFragmentMaskFetchAMD:
+	{
+		auto &type = expression_type(ops[2]);
+		uint32_t result_type = ops[0];
+		uint32_t id = ops[1];
+
+		if (type.image.dim == spv::DimSubpassData)
+		{
+			emit_unary_func_op(result_type, id, ops[2], "fragmentMaskFetchAMD");
+		}
+		else
+		{
+			emit_binary_func_op(result_type, id, ops[2], ops[3], "fragmentMaskFetchAMD");
+		}
+
+		require_extension("GL_AMD_shader_fragment_mask");
+		break;
+	}
+
+	case OpFragmentFetchAMD:
+	{
+		auto &type = expression_type(ops[2]);
+		uint32_t result_type = ops[0];
+		uint32_t id = ops[1];
+
+		if (type.image.dim == spv::DimSubpassData)
+		{
+			emit_binary_func_op(result_type, id, ops[2], ops[4], "fragmentFetchAMD");
+		}
+		else
+		{
+			emit_trinary_func_op(result_type, id, ops[2], ops[3], ops[4], "fragmentFetchAMD");
+		}
+
+		require_extension("GL_AMD_shader_fragment_mask");
 		break;
 	}

@ -6976,11 +7276,17 @@ void CompilerGLSL::add_header_line(const std::string &line)
 	header_lines.push_back(line);
 }

+bool CompilerGLSL::has_extension(const std::string &ext) const
+{
+	auto itr = find(begin(forced_extensions), end(forced_extensions), ext);
+	return itr != end(forced_extensions);
+}
+
 void CompilerGLSL::require_extension(const string &ext)
 {
-	if (forced_extensions.find(ext) == end(forced_extensions))
+	if (!has_extension(ext))
 	{
-		forced_extensions.insert(ext);
+		forced_extensions.push_back(ext);
 		force_recompile = true;
 	}
 }
--- a/spirv_glsl.hpp
+++ b/spirv_glsl.hpp
@ -179,6 +179,8 @@ protected:
 	void reset();
 	void emit_function(SPIRFunction &func, uint64_t return_flags);

+	bool has_extension(const std::string &ext) const;
+
 	// Virtualize methods which need to be overridden by subclass targets like C++ and such.
 	virtual void emit_function_prototype(SPIRFunction &func, uint64_t return_flags);

@ -189,6 +191,14 @@ protected:
 	void emit_block_instructions(const SPIRBlock &block);
 	virtual void emit_glsl_op(uint32_t result_type, uint32_t result_id, uint32_t op, const uint32_t *args,
 	                          uint32_t count);
+	virtual void emit_spv_amd_shader_ballot_op(uint32_t result_type, uint32_t result_id, uint32_t op,
+	                                           const uint32_t *args, uint32_t count);
+	virtual void emit_spv_amd_shader_explicit_vertex_parameter_op(uint32_t result_type, uint32_t result_id, uint32_t op,
+	                                                              const uint32_t *args, uint32_t count);
+	virtual void emit_spv_amd_shader_trinary_minmax_op(uint32_t result_type, uint32_t result_id, uint32_t op,
+	                                                   const uint32_t *args, uint32_t count);
+	virtual void emit_spv_amd_gcn_shader_op(uint32_t result_type, uint32_t result_id, uint32_t op, const uint32_t *args,
+	                                        uint32_t count);
 	virtual void emit_header();
 	virtual void emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id);
 	virtual void emit_texture_op(const Instruction &i);
@ -445,7 +455,7 @@ protected:
 	std::unordered_map<uint32_t, uint32_t> expression_usage_counts;
 	void track_expression_read(uint32_t id);

-	std::unordered_set<std::string> forced_extensions;
+	std::vector<std::string> forced_extensions;
 	std::vector<std::string> header_lines;

 	uint32_t statement_count;