diff --git a/SPIRV/GLSL.ext.EXT.h b/SPIRV/GLSL.ext.EXT.h index 20b9e5401..f48f1304d 100644 --- a/SPIRV/GLSL.ext.EXT.h +++ b/SPIRV/GLSL.ext.EXT.h @@ -36,6 +36,8 @@ static const char* const E_SPV_EXT_fragment_fully_covered = "SPV_EXT_fragment_fu static const char* const E_SPV_EXT_fragment_invocation_density = "SPV_EXT_fragment_invocation_density"; static const char* const E_SPV_EXT_demote_to_helper_invocation = "SPV_EXT_demote_to_helper_invocation"; static const char* const E_SPV_EXT_shader_atomic_float_add = "SPV_EXT_shader_atomic_float_add"; +static const char* const E_SPV_EXT_shader_atomic_float16_add = "SPV_EXT_shader_atomic_float16_add"; +static const char* const E_SPV_EXT_shader_atomic_float_min_max = "SPV_EXT_shader_atomic_float_min_max"; static const char* const E_SPV_EXT_shader_image_int64 = "SPV_EXT_shader_image_int64"; #endif // #ifndef GLSLextEXT_H diff --git a/SPIRV/GlslangToSpv.cpp b/SPIRV/GlslangToSpv.cpp index 5a25c8d4e..b6a9bda86 100644 --- a/SPIRV/GlslangToSpv.cpp +++ b/SPIRV/GlslangToSpv.cpp @@ -6900,13 +6900,17 @@ spv::Id TGlslangToSpvTraverser::createAtomicOperation(glslang::TOperator op, spv case glslang::EOpImageAtomicAdd: case glslang::EOpAtomicCounterAdd: opCode = spv::OpAtomicIAdd; - if (typeProxy == glslang::EbtFloat || typeProxy == glslang::EbtDouble) { + if (typeProxy == glslang::EbtFloat16 || typeProxy == glslang::EbtFloat || typeProxy == glslang::EbtDouble) { opCode = spv::OpAtomicFAddEXT; builder.addExtension(spv::E_SPV_EXT_shader_atomic_float_add); - if (typeProxy == glslang::EbtFloat) + if (typeProxy == glslang::EbtFloat16) { + builder.addExtension(spv::E_SPV_EXT_shader_atomic_float16_add); + builder.addCapability(spv::CapabilityAtomicFloat16AddEXT); + } else if (typeProxy == glslang::EbtFloat) { builder.addCapability(spv::CapabilityAtomicFloat32AddEXT); - else + } else { builder.addCapability(spv::CapabilityAtomicFloat64AddEXT); + } } break; case glslang::EOpAtomicSubtract: @@ -6916,14 +6920,38 @@ spv::Id TGlslangToSpvTraverser::createAtomicOperation(glslang::TOperator op, spv case glslang::EOpAtomicMin: case glslang::EOpImageAtomicMin: case glslang::EOpAtomicCounterMin: - opCode = (typeProxy == glslang::EbtUint || typeProxy == glslang::EbtUint64) ? - spv::OpAtomicUMin : spv::OpAtomicSMin; + if (typeProxy == glslang::EbtFloat16 || typeProxy == glslang::EbtFloat || typeProxy == glslang::EbtDouble) { + opCode = spv::OpAtomicFMinEXT; + builder.addExtension(spv::E_SPV_EXT_shader_atomic_float_min_max); + if (typeProxy == glslang::EbtFloat16) + builder.addCapability(spv::CapabilityAtomicFloat16MinMaxEXT); + else if (typeProxy == glslang::EbtFloat) + builder.addCapability(spv::CapabilityAtomicFloat32MinMaxEXT); + else + builder.addCapability(spv::CapabilityAtomicFloat64MinMaxEXT); + } else if (typeProxy == glslang::EbtUint || typeProxy == glslang::EbtUint64) { + opCode = spv::OpAtomicUMin; + } else { + opCode = spv::OpAtomicSMin; + } break; case glslang::EOpAtomicMax: case glslang::EOpImageAtomicMax: case glslang::EOpAtomicCounterMax: - opCode = (typeProxy == glslang::EbtUint || typeProxy == glslang::EbtUint64) ? - spv::OpAtomicUMax : spv::OpAtomicSMax; + if (typeProxy == glslang::EbtFloat16 || typeProxy == glslang::EbtFloat || typeProxy == glslang::EbtDouble) { + opCode = spv::OpAtomicFMaxEXT; + builder.addExtension(spv::E_SPV_EXT_shader_atomic_float_min_max); + if (typeProxy == glslang::EbtFloat16) + builder.addCapability(spv::CapabilityAtomicFloat16MinMaxEXT); + else if (typeProxy == glslang::EbtFloat) + builder.addCapability(spv::CapabilityAtomicFloat32MinMaxEXT); + else + builder.addCapability(spv::CapabilityAtomicFloat64MinMaxEXT); + } else if (typeProxy == glslang::EbtUint || typeProxy == glslang::EbtUint64) { + opCode = spv::OpAtomicUMax; + } else { + opCode = spv::OpAtomicSMax; + } break; case glslang::EOpAtomicAnd: case glslang::EOpImageAtomicAnd: diff --git a/SPIRV/doc.cpp b/SPIRV/doc.cpp index 633b03a78..31b20a1f2 100644 --- a/SPIRV/doc.cpp +++ b/SPIRV/doc.cpp @@ -966,8 +966,12 @@ const char* CapabilityString(int info) case CapabilityIntegerFunctions2INTEL: return "CapabilityIntegerFunctions2INTEL"; + case CapabilityAtomicFloat16AddEXT: return "AtomicFloat16AddEXT"; case CapabilityAtomicFloat32AddEXT: return "AtomicFloat32AddEXT"; case CapabilityAtomicFloat64AddEXT: return "AtomicFloat64AddEXT"; + case CapabilityAtomicFloat16MinMaxEXT: return "AtomicFloat16MinMaxEXT"; + case CapabilityAtomicFloat32MinMaxEXT: return "AtomicFloat32MinMaxEXT"; + case CapabilityAtomicFloat64MinMaxEXT: return "AtomicFloat64MinMaxEXT"; case CapabilityWorkgroupMemoryExplicitLayoutKHR: return "CapabilityWorkgroupMemoryExplicitLayoutKHR"; case CapabilityWorkgroupMemoryExplicitLayout8BitAccessKHR: return "CapabilityWorkgroupMemoryExplicitLayout8BitAccessKHR"; @@ -1352,6 +1356,8 @@ const char* OpcodeString(int op) case 4432: return "OpSubgroupReadInvocationKHR"; case OpAtomicFAddEXT: return "OpAtomicFAddEXT"; + case OpAtomicFMinEXT: return "OpAtomicFMinEXT"; + case OpAtomicFMaxEXT: return "OpAtomicFMaxEXT"; case 5000: return "OpGroupIAddNonUniformAMD"; case 5001: return "OpGroupFAddNonUniformAMD"; @@ -2342,6 +2348,16 @@ void Parameterize() InstructionDesc[OpAtomicSMax].operands.push(OperandMemorySemantics, "'Semantics'"); InstructionDesc[OpAtomicSMax].operands.push(OperandId, "'Value'"); + InstructionDesc[OpAtomicFMinEXT].operands.push(OperandId, "'Pointer'"); + InstructionDesc[OpAtomicFMinEXT].operands.push(OperandScope, "'Scope'"); + InstructionDesc[OpAtomicFMinEXT].operands.push(OperandMemorySemantics, "'Semantics'"); + InstructionDesc[OpAtomicFMinEXT].operands.push(OperandId, "'Value'"); + + InstructionDesc[OpAtomicFMaxEXT].operands.push(OperandId, "'Pointer'"); + InstructionDesc[OpAtomicFMaxEXT].operands.push(OperandScope, "'Scope'"); + InstructionDesc[OpAtomicFMaxEXT].operands.push(OperandMemorySemantics, "'Semantics'"); + InstructionDesc[OpAtomicFMaxEXT].operands.push(OperandId, "'Value'"); + InstructionDesc[OpAtomicAnd].operands.push(OperandId, "'Pointer'"); InstructionDesc[OpAtomicAnd].operands.push(OperandScope, "'Scope'"); InstructionDesc[OpAtomicAnd].operands.push(OperandMemorySemantics, "'Semantics'"); diff --git a/SPIRV/spirv.hpp b/SPIRV/spirv.hpp index dfc1a9940..317db81f1 100644 --- a/SPIRV/spirv.hpp +++ b/SPIRV/spirv.hpp @@ -410,6 +410,7 @@ enum FPRoundingMode { enum LinkageType { LinkageTypeExport = 0, LinkageTypeImport = 1, + LinkageTypeLinkOnceODR = 2, LinkageTypeMax = 0x7fffffff, }; @@ -1011,8 +1012,12 @@ enum Capability { CapabilityFunctionPointersINTEL = 5603, CapabilityIndirectReferencesINTEL = 5604, CapabilityAsmINTEL = 5606, + CapabilityAtomicFloat32MinMaxEXT = 5612, + CapabilityAtomicFloat64MinMaxEXT = 5613, + CapabilityAtomicFloat16MinMaxEXT = 5616, CapabilityVectorComputeINTEL = 5617, CapabilityVectorAnyINTEL = 5619, + CapabilityExpectAssumeKHR = 5629, CapabilitySubgroupAvcMotionEstimationINTEL = 5696, CapabilitySubgroupAvcMotionEstimationIntraINTEL = 5697, CapabilitySubgroupAvcMotionEstimationChromaINTEL = 5698, @@ -1036,6 +1041,7 @@ enum Capability { CapabilityAtomicFloat32AddEXT = 6033, CapabilityAtomicFloat64AddEXT = 6034, CapabilityLongConstantCompositeINTEL = 6089, + CapabilityAtomicFloat16AddEXT = 6095, CapabilityMax = 0x7fffffff, }; @@ -1103,15 +1109,15 @@ enum FragmentShadingRateMask { }; enum FPDenormMode { - FPDenormModePreserve = 0, - FPDenormModeFlushToZero = 1, - FPDenormModeMax = 0x7fffffff, + FPDenormModePreserve = 0, + FPDenormModeFlushToZero = 1, + FPDenormModeMax = 0x7fffffff, }; enum FPOperationMode { - FPOperationModeIEEE = 0, - FPOperationModeALT = 1, - FPOperationModeMax = 0x7fffffff, + FPOperationModeIEEE = 0, + FPOperationModeALT = 1, + FPOperationModeMax = 0x7fffffff, }; enum Op { @@ -1538,6 +1544,10 @@ enum Op { OpAsmTargetINTEL = 5609, OpAsmINTEL = 5610, OpAsmCallINTEL = 5611, + OpAtomicFMinEXT = 5614, + OpAtomicFMaxEXT = 5615, + OpAssumeTrueKHR = 5630, + OpExpectKHR = 5631, OpDecorateString = 5632, OpDecorateStringGOOGLE = 5632, OpMemberDecorateString = 5633, @@ -2120,6 +2130,10 @@ inline void HasResultAndType(Op opcode, bool *hasResult, bool *hasResultType) { case OpAsmTargetINTEL: *hasResult = true; *hasResultType = true; break; case OpAsmINTEL: *hasResult = true; *hasResultType = true; break; case OpAsmCallINTEL: *hasResult = true; *hasResultType = true; break; + case OpAtomicFMinEXT: *hasResult = true; *hasResultType = true; break; + case OpAtomicFMaxEXT: *hasResult = true; *hasResultType = true; break; + case OpAssumeTrueKHR: *hasResult = false; *hasResultType = false; break; + case OpExpectKHR: *hasResult = true; *hasResultType = true; break; case OpDecorateString: *hasResult = false; *hasResultType = false; break; case OpMemberDecorateString: *hasResult = false; *hasResultType = false; break; case OpVmeImageINTEL: *hasResult = true; *hasResultType = true; break; diff --git a/Test/spv.atomicFloat2.comp b/Test/spv.atomicFloat2.comp new file mode 100644 index 000000000..09235c5e6 --- /dev/null +++ b/Test/spv.atomicFloat2.comp @@ -0,0 +1,179 @@ +#version 450 core + +#extension GL_KHR_memory_scope_semantics : enable +#extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable +#extension GL_EXT_shader_atomic_float2: enable +#pragma use_vulkan_memory_model + +layout(local_size_x = 16, local_size_y = 16) in; + +layout(binding = 0) buffer Buffer +{ + float16_t datah; + float dataf; + double datad; +} buf; + +shared float16_t atomh; +shared float atomf; +shared double atomd; + +layout(binding = 0, r32f) volatile coherent uniform image1D fimage1D; +layout(binding = 1, r32f) volatile coherent uniform image1DArray fimage1DArray; +layout(binding = 2, r32f) volatile coherent uniform image2D fimage2D; +layout(binding = 3, r32f) volatile coherent uniform image2DArray fimage2DArray; +layout(binding = 4, r32f) volatile coherent uniform image2DRect fimage2DRect; +layout(binding = 5, r32f) volatile coherent uniform imageCube fimageCube; +layout(binding = 6, r32f) volatile coherent uniform imageCubeArray fimageCubeArray; +layout(binding = 9, r32f) volatile coherent uniform image3D fimage3D; + +void main() +{ + //atomicAdd + float16_t resulth = float16_t(0.0); + resulth = atomicAdd(atomh, float16_t(3.0)); + resulth = atomicAdd(atomh, float16_t(4.5), gl_ScopeDevice, gl_StorageSemanticsBuffer, gl_SemanticsRelaxed); + resulth = atomicAdd(buf.datah, float16_t(3.0)); + resulth = atomicAdd(buf.datah, float16_t(4.5), gl_ScopeDevice, gl_StorageSemanticsBuffer, gl_SemanticsRelaxed); + + //atomicMin + resulth = atomicMin(atomh, float16_t(3.0)); + resulth = atomicMin(atomh, float16_t(4.5), gl_ScopeDevice, gl_StorageSemanticsBuffer, gl_SemanticsRelaxed); + resulth = atomicMin(buf.datah, float16_t(3.0)); + resulth = atomicMin(buf.datah, float16_t(4.5), gl_ScopeDevice, gl_StorageSemanticsBuffer, gl_SemanticsRelaxed); + + float resultf = 0.0; + resultf = atomicMin(atomf, 3.0); + resultf = atomicMin(atomf, 4.5, gl_ScopeDevice, gl_StorageSemanticsBuffer, gl_SemanticsRelaxed); + resultf = atomicMin(buf.dataf, 3.0); + resultf = atomicMin(buf.dataf, 4.5, gl_ScopeDevice, gl_StorageSemanticsBuffer, gl_SemanticsRelaxed); + + double resultd = 0.0; + resultd = atomicMin(atomd, 3.0); + resultd = atomicMin(atomd, 4.5, gl_ScopeDevice, gl_StorageSemanticsBuffer, gl_SemanticsRelaxed); + resultd = atomicMin(buf.datad, 3.0); + resultd = atomicMin(buf.datad, 4.5, gl_ScopeDevice, gl_StorageSemanticsBuffer, gl_SemanticsRelaxed); + + //atomicMax + resulth = atomicMax(atomh, float16_t(3.0)); + resulth = atomicMax(atomh, float16_t(4.5), gl_ScopeDevice, gl_StorageSemanticsBuffer, gl_SemanticsRelaxed); + resulth = atomicMax(buf.datah, float16_t(3.0)); + resulth = atomicMax(buf.datah, float16_t(4.5), gl_ScopeDevice, gl_StorageSemanticsBuffer, gl_SemanticsRelaxed); + + resultf = atomicMax(atomf, 3.0); + resultf = atomicMax(atomf, 4.5, gl_ScopeDevice, gl_StorageSemanticsBuffer, gl_SemanticsRelaxed); + resultf = atomicMax(buf.dataf, 3.0); + resultf = atomicMax(buf.dataf, 4.5, gl_ScopeDevice, gl_StorageSemanticsBuffer, gl_SemanticsRelaxed); + + resultd = atomicMax(atomd, 3.0); + resultd = atomicMax(atomd, 4.5, gl_ScopeDevice, gl_StorageSemanticsBuffer, gl_SemanticsRelaxed); + resultd = atomicMax(buf.datad, 3.0); + resultd = atomicMax(buf.datad, 4.5, gl_ScopeDevice, gl_StorageSemanticsBuffer, gl_SemanticsRelaxed); + + //atomicExchange + resulth = atomicExchange(buf.datah, resulth); + buf.datah += resulth; + resulth = atomicExchange(buf.datah, resulth, gl_ScopeDevice, gl_StorageSemanticsShared, gl_SemanticsRelaxed); + buf.datah += resulth; + resulth = atomicExchange(atomh, resulth); + buf.datah += resulth; + resulth = atomicExchange(atomh, resulth, gl_ScopeDevice, gl_StorageSemanticsShared, gl_SemanticsRelaxed); + buf.datah += resulth; + + //atomic load/store + resulth = atomicLoad(buf.datah, gl_ScopeDevice, gl_StorageSemanticsShared, gl_SemanticsRelaxed); + atomicStore(buf.datah, resulth, gl_ScopeDevice, gl_StorageSemanticsShared, gl_SemanticsRelaxed); + buf.datah += resulth; + + resulth = atomicLoad(atomh, gl_ScopeDevice, gl_StorageSemanticsShared, gl_SemanticsRelaxed); + atomicStore(atomh, resulth, gl_ScopeDevice, gl_StorageSemanticsShared, gl_SemanticsRelaxed); + buf.datah += resulth; + + // image atomics on 1D: + atomf = imageAtomicMin(fimage1D, int(0), 2.0); + buf.dataf += atomf; + atomf = imageAtomicMin(fimage1D, int(1), 3.0, gl_ScopeDevice, gl_StorageSemanticsImage , gl_SemanticsRelaxed); + buf.dataf += atomf; + + atomf = imageAtomicMax(fimage1D, int(0), 2.0); + buf.dataf += atomf; + atomf = imageAtomicMax(fimage1D, int(1), 3.0, gl_ScopeDevice, gl_StorageSemanticsImage , gl_SemanticsRelaxed); + buf.dataf += atomf; + + // image atomics on 1D Array: + atomf = imageAtomicMin(fimage1DArray, ivec2(0,0), 2.0); + buf.dataf += atomf; + atomf = imageAtomicMin(fimage1DArray, ivec2(1,1), 3.0, gl_ScopeDevice, gl_StorageSemanticsImage , gl_SemanticsRelaxed); + buf.dataf += atomf; + + atomf = imageAtomicMax(fimage1DArray, ivec2(0,0), 2.0); + buf.dataf += atomf; + atomf = imageAtomicMax(fimage1DArray, ivec2(1,1), 3.0, gl_ScopeDevice, gl_StorageSemanticsImage , gl_SemanticsRelaxed); + buf.dataf += atomf; + + // image atomics on 2D: + atomf = imageAtomicMin(fimage2D, ivec2(0,0), 2.0); + buf.dataf += atomf; + atomf = imageAtomicMin(fimage2D, ivec2(1,1), 3.0, gl_ScopeDevice, gl_StorageSemanticsImage , gl_SemanticsRelaxed); + buf.dataf += atomf; + + atomf = imageAtomicMax(fimage2D, ivec2(0,0), 2.0); + buf.dataf += atomf; + atomf = imageAtomicMax(fimage2D, ivec2(1,1), 3.0, gl_ScopeDevice, gl_StorageSemanticsImage , gl_SemanticsRelaxed); + buf.dataf += atomf; + + // image atomics on 2D Rect: + atomf = imageAtomicMin(fimage2DRect, ivec2(0,0), 2.0); + buf.dataf += atomf; + atomf = imageAtomicMin(fimage2DRect, ivec2(1,1), 3.0, gl_ScopeDevice, gl_StorageSemanticsImage , gl_SemanticsRelaxed); + buf.dataf += atomf; + + atomf = imageAtomicMax(fimage2DRect, ivec2(0,0), 2.0); + buf.dataf += atomf; + atomf = imageAtomicMax(fimage2DRect, ivec2(1,1), 3.0, gl_ScopeDevice, gl_StorageSemanticsImage , gl_SemanticsRelaxed); + buf.dataf += atomf; + + // image atomics on 2D Array: + atomf = imageAtomicMin(fimage2DArray, ivec3(0,0,0), 2.0); + buf.dataf += atomf; + atomf = imageAtomicMin(fimage2DArray, ivec3(1,1,0), 3.0, gl_ScopeDevice, gl_StorageSemanticsImage , gl_SemanticsRelaxed); + buf.dataf += atomf; + + atomf = imageAtomicMax(fimage2DArray, ivec3(0,0,0), 2.0); + buf.dataf += atomf; + atomf = imageAtomicMax(fimage2DArray, ivec3(1,1,0), 3.0, gl_ScopeDevice, gl_StorageSemanticsImage , gl_SemanticsRelaxed); + buf.dataf += atomf; + + // image atomics on Cube: + atomf = imageAtomicMin(fimageCube, ivec3(0,0,0), 2.0); + buf.dataf += atomf; + atomf = imageAtomicMin(fimageCube, ivec3(1,1,0), 3.0, gl_ScopeDevice, gl_StorageSemanticsImage , gl_SemanticsRelaxed); + buf.dataf += atomf; + + atomf = imageAtomicMax(fimageCube, ivec3(0,0,0), 2.0); + buf.dataf += atomf; + atomf = imageAtomicMax(fimageCube, ivec3(1,1,0), 3.0, gl_ScopeDevice, gl_StorageSemanticsImage , gl_SemanticsRelaxed); + buf.dataf += atomf; + + // image atomics on Cube Array: + atomf = imageAtomicMin(fimageCubeArray, ivec3(0,0,0), 2.0); + buf.dataf += atomf; + atomf = imageAtomicMin(fimageCubeArray, ivec3(1,1,0), 3.0, gl_ScopeDevice, gl_StorageSemanticsImage , gl_SemanticsRelaxed); + buf.dataf += atomf; + + atomf = imageAtomicMax(fimageCubeArray, ivec3(0,0,0), 2.0); + buf.dataf += atomf; + atomf = imageAtomicMax(fimageCubeArray, ivec3(1,1,0), 3.0, gl_ScopeDevice, gl_StorageSemanticsImage , gl_SemanticsRelaxed); + buf.dataf += atomf; + + // image atomics on 3D: + atomf = imageAtomicMin(fimage3D, ivec3(0,0,0), 2.0); + buf.dataf += atomf; + atomf = imageAtomicMin(fimage3D, ivec3(1,1,0), 3.0, gl_ScopeDevice, gl_StorageSemanticsImage , gl_SemanticsRelaxed); + buf.dataf += atomf; + + atomf = imageAtomicMax(fimage3D, ivec3(0,0,0), 2.0); + buf.dataf += atomf; + atomf = imageAtomicMax(fimage3D, ivec3(1,1,0), 3.0, gl_ScopeDevice, gl_StorageSemanticsImage , gl_SemanticsRelaxed); + buf.dataf += atomf; +} diff --git a/glslang/MachineIndependent/Initialize.cpp b/glslang/MachineIndependent/Initialize.cpp index 321a90286..49817a0ad 100644 --- a/glslang/MachineIndependent/Initialize.cpp +++ b/glslang/MachineIndependent/Initialize.cpp @@ -1436,11 +1436,23 @@ void TBuiltIns::initialize(int version, EProfile profile, const SpvVersion& spvV " int64_t atomicMin(coherent volatile inout int64_t, int64_t);" "uint64_t atomicMin(coherent volatile inout uint64_t, uint64_t, int, int, int);" " int64_t atomicMin(coherent volatile inout int64_t, int64_t, int, int, int);" + "float16_t atomicMin(coherent volatile inout float16_t, float16_t);" + "float16_t atomicMin(coherent volatile inout float16_t, float16_t, int, int, int);" + " float atomicMin(coherent volatile inout float, float);" + " float atomicMin(coherent volatile inout float, float, int, int, int);" + " double atomicMin(coherent volatile inout double, double);" + " double atomicMin(coherent volatile inout double, double, int, int, int);" "uint64_t atomicMax(coherent volatile inout uint64_t, uint64_t);" " int64_t atomicMax(coherent volatile inout int64_t, int64_t);" "uint64_t atomicMax(coherent volatile inout uint64_t, uint64_t, int, int, int);" " int64_t atomicMax(coherent volatile inout int64_t, int64_t, int, int, int);" + "float16_t atomicMax(coherent volatile inout float16_t, float16_t);" + "float16_t atomicMax(coherent volatile inout float16_t, float16_t, int, int, int);" + " float atomicMax(coherent volatile inout float, float);" + " float atomicMax(coherent volatile inout float, float, int, int, int);" + " double atomicMax(coherent volatile inout double, double);" + " double atomicMax(coherent volatile inout double, double, int, int, int);" "uint64_t atomicAnd(coherent volatile inout uint64_t, uint64_t);" " int64_t atomicAnd(coherent volatile inout int64_t, int64_t);" @@ -1461,6 +1473,8 @@ void TBuiltIns::initialize(int version, EProfile profile, const SpvVersion& spvV " int64_t atomicAdd(coherent volatile inout int64_t, int64_t);" "uint64_t atomicAdd(coherent volatile inout uint64_t, uint64_t, int, int, int);" " int64_t atomicAdd(coherent volatile inout int64_t, int64_t, int, int, int);" + "float16_t atomicAdd(coherent volatile inout float16_t, float16_t);" + "float16_t atomicAdd(coherent volatile inout float16_t, float16_t, int, int, int);" " float atomicAdd(coherent volatile inout float, float);" " float atomicAdd(coherent volatile inout float, float, int, int, int);" " double atomicAdd(coherent volatile inout double, double);" @@ -1470,6 +1484,8 @@ void TBuiltIns::initialize(int version, EProfile profile, const SpvVersion& spvV " int64_t atomicExchange(coherent volatile inout int64_t, int64_t);" "uint64_t atomicExchange(coherent volatile inout uint64_t, uint64_t, int, int, int);" " int64_t atomicExchange(coherent volatile inout int64_t, int64_t, int, int, int);" + "float16_t atomicExchange(coherent volatile inout float16_t, float16_t);" + "float16_t atomicExchange(coherent volatile inout float16_t, float16_t, int, int, int);" " float atomicExchange(coherent volatile inout float, float);" " float atomicExchange(coherent volatile inout float, float, int, int, int);" " double atomicExchange(coherent volatile inout double, double);" @@ -1482,11 +1498,13 @@ void TBuiltIns::initialize(int version, EProfile profile, const SpvVersion& spvV "uint64_t atomicLoad(coherent volatile in uint64_t, int, int, int);" " int64_t atomicLoad(coherent volatile in int64_t, int, int, int);" + "float16_t atomicLoad(coherent volatile in float16_t, int, int, int);" " float atomicLoad(coherent volatile in float, int, int, int);" " double atomicLoad(coherent volatile in double, int, int, int);" "void atomicStore(coherent volatile out uint64_t, uint64_t, int, int, int);" "void atomicStore(coherent volatile out int64_t, int64_t, int, int, int);" + "void atomicStore(coherent volatile out float16_t, float16_t, int, int, int);" "void atomicStore(coherent volatile out float, float, int, int, int);" "void atomicStore(coherent volatile out double, double, int, int, int);" "\n"); @@ -6478,6 +6496,24 @@ void TBuiltIns::addImageFunctions(TSampler sampler, const TString& typeName, int commonBuiltins.append(imageParams); commonBuiltins.append(", float"); commonBuiltins.append(", int, int, int);\n"); + + commonBuiltins.append("float imageAtomicMin(volatile coherent "); + commonBuiltins.append(imageParams); + commonBuiltins.append(", float);\n"); + + commonBuiltins.append("float imageAtomicMin(volatile coherent "); + commonBuiltins.append(imageParams); + commonBuiltins.append(", float"); + commonBuiltins.append(", int, int, int);\n"); + + commonBuiltins.append("float imageAtomicMax(volatile coherent "); + commonBuiltins.append(imageParams); + commonBuiltins.append(", float);\n"); + + commonBuiltins.append("float imageAtomicMax(volatile coherent "); + commonBuiltins.append(imageParams); + commonBuiltins.append(", float"); + commonBuiltins.append(", int, int, int);\n"); } } } diff --git a/glslang/MachineIndependent/ParseHelper.cpp b/glslang/MachineIndependent/ParseHelper.cpp index 3c08c658c..1ba66529d 100644 --- a/glslang/MachineIndependent/ParseHelper.cpp +++ b/glslang/MachineIndependent/ParseHelper.cpp @@ -2279,18 +2279,23 @@ void TParseContext::builtInOpCheck(const TSourceLoc& loc, const TFunction& fnCan error(loc, "only supported on image with format r64i", fnCandidate.getName().c_str(), ""); else if (callNode.getType().getBasicType() == EbtUint64 && imageType.getQualifier().getFormat() != ElfR64ui) error(loc, "only supported on image with format r64ui", fnCandidate.getName().c_str(), ""); - } else { - bool isImageAtomicOnFloatAllowed = ((fnCandidate.getName().compare(0, 14, "imageAtomicAdd") == 0) || - (fnCandidate.getName().compare(0, 15, "imageAtomicLoad") == 0) || - (fnCandidate.getName().compare(0, 16, "imageAtomicStore") == 0) || - (fnCandidate.getName().compare(0, 19, "imageAtomicExchange") == 0)); - if (imageType.getSampler().type == EbtFloat && isImageAtomicOnFloatAllowed && - (fnCandidate.getName().compare(0, 19, "imageAtomicExchange") != 0)) // imageAtomicExchange doesn't require GL_EXT_shader_atomic_float + } else if (imageType.getSampler().type == EbtFloat) { + if (fnCandidate.getName().compare(0, 19, "imageAtomicExchange") == 0) { + // imageAtomicExchange doesn't require an extension + } else if ((fnCandidate.getName().compare(0, 14, "imageAtomicAdd") == 0) || + (fnCandidate.getName().compare(0, 15, "imageAtomicLoad") == 0) || + (fnCandidate.getName().compare(0, 16, "imageAtomicStore") == 0)) { requireExtensions(loc, 1, &E_GL_EXT_shader_atomic_float, fnCandidate.getName().c_str()); - if (!isImageAtomicOnFloatAllowed) + } else if ((fnCandidate.getName().compare(0, 14, "imageAtomicMin") == 0) || + (fnCandidate.getName().compare(0, 14, "imageAtomicMax") == 0)) { + requireExtensions(loc, 1, &E_GL_EXT_shader_atomic_float2, fnCandidate.getName().c_str()); + } else { error(loc, "only supported on integer images", fnCandidate.getName().c_str(), ""); - else if (imageType.getQualifier().getFormat() != ElfR32f && isEsProfile()) + } + if (imageType.getQualifier().getFormat() != ElfR32f && isEsProfile()) error(loc, "only supported on image with format r32f", fnCandidate.getName().c_str(), ""); + } else { + error(loc, "not supported on this image type", fnCandidate.getName().c_str(), ""); } const size_t maxArgs = imageType.getSampler().isMultiSample() ? 5 : 4; @@ -2319,16 +2324,28 @@ void TParseContext::builtInOpCheck(const TSourceLoc& loc, const TFunction& fnCan memorySemanticsCheck(loc, fnCandidate, callNode); if ((callNode.getOp() == EOpAtomicAdd || callNode.getOp() == EOpAtomicExchange || callNode.getOp() == EOpAtomicLoad || callNode.getOp() == EOpAtomicStore) && - (arg0->getType().isFloatingDomain())) { + (arg0->getType().getBasicType() == EbtFloat || + arg0->getType().getBasicType() == EbtDouble)) { requireExtensions(loc, 1, &E_GL_EXT_shader_atomic_float, fnCandidate.getName().c_str()); + } else if ((callNode.getOp() == EOpAtomicAdd || callNode.getOp() == EOpAtomicExchange || + callNode.getOp() == EOpAtomicLoad || callNode.getOp() == EOpAtomicStore || + callNode.getOp() == EOpAtomicMin || callNode.getOp() == EOpAtomicMax) && + arg0->getType().isFloatingDomain()) { + requireExtensions(loc, 1, &E_GL_EXT_shader_atomic_float2, fnCandidate.getName().c_str()); } } else if (arg0->getType().getBasicType() == EbtInt64 || arg0->getType().getBasicType() == EbtUint64) { const char* const extensions[2] = { E_GL_NV_shader_atomic_int64, E_GL_EXT_shader_atomic_int64 }; requireExtensions(loc, 2, extensions, fnCandidate.getName().c_str()); } else if ((callNode.getOp() == EOpAtomicAdd || callNode.getOp() == EOpAtomicExchange) && - (arg0->getType().isFloatingDomain())) { + (arg0->getType().getBasicType() == EbtFloat || + arg0->getType().getBasicType() == EbtDouble)) { requireExtensions(loc, 1, &E_GL_EXT_shader_atomic_float, fnCandidate.getName().c_str()); + } else if ((callNode.getOp() == EOpAtomicAdd || callNode.getOp() == EOpAtomicExchange || + callNode.getOp() == EOpAtomicLoad || callNode.getOp() == EOpAtomicStore || + callNode.getOp() == EOpAtomicMin || callNode.getOp() == EOpAtomicMax) && + arg0->getType().isFloatingDomain()) { + requireExtensions(loc, 1, &E_GL_EXT_shader_atomic_float2, fnCandidate.getName().c_str()); } break; } diff --git a/glslang/MachineIndependent/Versions.cpp b/glslang/MachineIndependent/Versions.cpp index c7ce0ca0c..a580718ac 100644 --- a/glslang/MachineIndependent/Versions.cpp +++ b/glslang/MachineIndependent/Versions.cpp @@ -354,6 +354,7 @@ void TParseVersions::initializeExtensionBehavior() extensionBehavior[E_GL_EXT_shader_subgroup_extended_types_int64] = EBhDisable; extensionBehavior[E_GL_EXT_shader_subgroup_extended_types_float16] = EBhDisable; extensionBehavior[E_GL_EXT_shader_atomic_float] = EBhDisable; + extensionBehavior[E_GL_EXT_shader_atomic_float2] = EBhDisable; } #endif // GLSLANG_WEB @@ -537,6 +538,7 @@ void TParseVersions::getPreamble(std::string& preamble) "#define GL_EXT_shader_subgroup_extended_types_float16 1\n" "#define GL_EXT_shader_atomic_float 1\n" + "#define GL_EXT_shader_atomic_float2 1\n" ; if (version >= 150) { diff --git a/glslang/MachineIndependent/Versions.h b/glslang/MachineIndependent/Versions.h index 01f819dae..2313d0963 100644 --- a/glslang/MachineIndependent/Versions.h +++ b/glslang/MachineIndependent/Versions.h @@ -306,6 +306,7 @@ const char* const E_GL_EXT_shader_subgroup_extended_types_float16 = "GL_EXT_shad const char* const E_GL_EXT_terminate_invocation = "GL_EXT_terminate_invocation"; const char* const E_GL_EXT_shader_atomic_float = "GL_EXT_shader_atomic_float"; +const char* const E_GL_EXT_shader_atomic_float2 = "GL_EXT_shader_atomic_float2"; // Arrays of extensions for the above AEP duplications