From 48702616ecc275c87c9baf73bd09162762dd80b5 Mon Sep 17 00:00:00 2001 From: Jeff Bolz Date: Fri, 2 Feb 2024 12:36:16 -0600 Subject: [PATCH] NV_shader_atomic_fp16_vector --- SPIRV/GLSL.ext.NV.h | 3 + SPIRV/GlslangToSpv.cpp | 56 +- SPIRV/doc.cpp | 1 + SPIRV/spirv.hpp | 1 + Test/baseResults/spv.nvAtomicFp16Vec.frag.out | 704 ++++++++++++++++++ Test/spv.nvAtomicFp16Vec.frag | 113 +++ glslang/MachineIndependent/Initialize.cpp | 42 ++ glslang/MachineIndependent/ParseHelper.cpp | 16 + glslang/MachineIndependent/Versions.cpp | 1 + glslang/MachineIndependent/Versions.h | 1 + gtests/Spv.FromFile.cpp | 1 + known_good.json | 26 +- 12 files changed, 937 insertions(+), 28 deletions(-) create mode 100644 Test/baseResults/spv.nvAtomicFp16Vec.frag.out create mode 100644 Test/spv.nvAtomicFp16Vec.frag diff --git a/SPIRV/GLSL.ext.NV.h b/SPIRV/GLSL.ext.NV.h index 9889bc9f9..e4f11e4bf 100644 --- a/SPIRV/GLSL.ext.NV.h +++ b/SPIRV/GLSL.ext.NV.h @@ -87,4 +87,7 @@ const char* const E_SPV_NV_shader_invocation_reorder = "SPV_NV_shader_invocation //SPV_NV_displacement_micromap const char* const E_SPV_NV_displacement_micromap = "SPV_NV_displacement_micromap"; +//SPV_NV_shader_atomic_fp16_vector +const char* const E_SPV_NV_shader_atomic_fp16_vector = "SPV_NV_shader_atomic_fp16_vector"; + #endif // #ifndef GLSLextNV_H diff --git a/SPIRV/GlslangToSpv.cpp b/SPIRV/GlslangToSpv.cpp index 1d375ef25..ffd44c5d8 100755 --- a/SPIRV/GlslangToSpv.cpp +++ b/SPIRV/GlslangToSpv.cpp @@ -204,7 +204,8 @@ protected: spv::Id createBinaryMatrixOperation(spv::Op, OpDecorations&, spv::Id typeId, spv::Id left, spv::Id right); spv::Id createUnaryOperation(glslang::TOperator op, OpDecorations&, spv::Id typeId, spv::Id operand, glslang::TBasicType typeProxy, - const spv::Builder::AccessChain::CoherentFlags &lvalueCoherentFlags); + const spv::Builder::AccessChain::CoherentFlags &lvalueCoherentFlags, + const glslang::TType &opType); spv::Id createUnaryMatrixOperation(spv::Op op, OpDecorations&, spv::Id typeId, spv::Id operand, glslang::TBasicType typeProxy); spv::Id createConversion(glslang::TOperator op, OpDecorations&, spv::Id destTypeId, spv::Id operand, @@ -213,7 +214,8 @@ protected: spv::Id makeSmearedConstant(spv::Id constant, int vectorSize); spv::Id createAtomicOperation(glslang::TOperator op, spv::Decoration precision, spv::Id typeId, std::vector& operands, glslang::TBasicType typeProxy, - const spv::Builder::AccessChain::CoherentFlags &lvalueCoherentFlags); + const spv::Builder::AccessChain::CoherentFlags &lvalueCoherentFlags, + const glslang::TType &opType); spv::Id createInvocationsOperation(glslang::TOperator op, spv::Id typeId, std::vector& operands, glslang::TBasicType typeProxy); spv::Id CreateInvocationsVectorOperation(spv::Op op, spv::GroupOperation groupOperation, @@ -2692,7 +2694,7 @@ bool TGlslangToSpvTraverser::visitUnary(glslang::TVisit /* visit */, glslang::TI // if not, then possibly an operation if (! result) result = createUnaryOperation(node->getOp(), decorations, resultType(), operand, - node->getOperand()->getBasicType(), lvalueCoherentFlags); + node->getOperand()->getBasicType(), lvalueCoherentFlags, node->getType()); // it could be attached to a SPIR-V intruction if (!result) { @@ -3775,7 +3777,7 @@ bool TGlslangToSpvTraverser::visitAggregate(glslang::TVisit visit, glslang::TInt glslang::TBasicType typeProxy = (node->getOp() == glslang::EOpAtomicStore) ? node->getSequence()[0]->getAsTyped()->getBasicType() : node->getBasicType(); result = createAtomicOperation(node->getOp(), precision, resultType(), operands, typeProxy, - lvalueCoherentFlags); + lvalueCoherentFlags, node->getType()); } else if (node->getOp() == glslang::EOpSpirvInst) { const auto& spirvInst = node->getSpirvInstruction(); if (spirvInst.set == "") { @@ -3822,7 +3824,7 @@ bool TGlslangToSpvTraverser::visitAggregate(glslang::TVisit visit, glslang::TInt result = createUnaryOperation( node->getOp(), decorations, resultType(), operands.front(), - glslangOperands[0]->getAsTyped()->getBasicType(), lvalueCoherentFlags); + glslangOperands[0]->getAsTyped()->getBasicType(), lvalueCoherentFlags, node->getType()); } break; default: @@ -6077,7 +6079,7 @@ spv::Id TGlslangToSpvTraverser::createImageTextureFunctionCall(glslang::TIntermO operands.push_back(*opIt); return createAtomicOperation(node->getOp(), precision, resultType(), operands, typeProxy, - lvalueCoherentFlags); + lvalueCoherentFlags, node->getType()); } } @@ -6828,7 +6830,8 @@ spv::Id TGlslangToSpvTraverser::createBinaryMatrixOperation(spv::Op op, OpDecora } spv::Id TGlslangToSpvTraverser::createUnaryOperation(glslang::TOperator op, OpDecorations& decorations, spv::Id typeId, - spv::Id operand, glslang::TBasicType typeProxy, const spv::Builder::AccessChain::CoherentFlags &lvalueCoherentFlags) + spv::Id operand, glslang::TBasicType typeProxy, const spv::Builder::AccessChain::CoherentFlags &lvalueCoherentFlags, + const glslang::TType &opType) { spv::Op unaryOp = spv::OpNop; int extBuiltins = -1; @@ -7116,7 +7119,7 @@ spv::Id TGlslangToSpvTraverser::createUnaryOperation(glslang::TOperator op, OpDe // Handle all of the atomics in one place, in createAtomicOperation() std::vector operands; operands.push_back(operand); - return createAtomicOperation(op, decorations.precision, typeId, operands, typeProxy, lvalueCoherentFlags); + return createAtomicOperation(op, decorations.precision, typeId, operands, typeProxy, lvalueCoherentFlags, opType); } case glslang::EOpBitFieldReverse: @@ -7834,7 +7837,7 @@ spv::Id TGlslangToSpvTraverser::makeSmearedConstant(spv::Id constant, int vector // For glslang ops that map to SPV atomic opCodes spv::Id TGlslangToSpvTraverser::createAtomicOperation(glslang::TOperator op, spv::Decoration /*precision*/, spv::Id typeId, std::vector& operands, glslang::TBasicType typeProxy, - const spv::Builder::AccessChain::CoherentFlags &lvalueCoherentFlags) + const spv::Builder::AccessChain::CoherentFlags &lvalueCoherentFlags, const glslang::TType &opType) { spv::Op opCode = spv::OpNop; @@ -7847,8 +7850,13 @@ spv::Id TGlslangToSpvTraverser::createAtomicOperation(glslang::TOperator op, spv opCode = spv::OpAtomicFAddEXT; builder.addExtension(spv::E_SPV_EXT_shader_atomic_float_add); if (typeProxy == glslang::EbtFloat16) { - builder.addExtension(spv::E_SPV_EXT_shader_atomic_float16_add); - builder.addCapability(spv::CapabilityAtomicFloat16AddEXT); + if (opType.getVectorSize() == 2 || opType.getVectorSize() == 4) { + builder.addExtension(spv::E_SPV_NV_shader_atomic_fp16_vector); + builder.addCapability(spv::CapabilityAtomicFloat16VectorNV); + } else { + builder.addExtension(spv::E_SPV_EXT_shader_atomic_float16_add); + builder.addCapability(spv::CapabilityAtomicFloat16AddEXT); + } } else if (typeProxy == glslang::EbtFloat) { builder.addCapability(spv::CapabilityAtomicFloat32AddEXT); } else { @@ -7866,8 +7874,14 @@ spv::Id TGlslangToSpvTraverser::createAtomicOperation(glslang::TOperator op, spv if (typeProxy == glslang::EbtFloat16 || typeProxy == glslang::EbtFloat || typeProxy == glslang::EbtDouble) { opCode = spv::OpAtomicFMinEXT; builder.addExtension(spv::E_SPV_EXT_shader_atomic_float_min_max); - if (typeProxy == glslang::EbtFloat16) - builder.addCapability(spv::CapabilityAtomicFloat16MinMaxEXT); + if (typeProxy == glslang::EbtFloat16) { + if (opType.getVectorSize() == 2 || opType.getVectorSize() == 4) { + builder.addExtension(spv::E_SPV_NV_shader_atomic_fp16_vector); + builder.addCapability(spv::CapabilityAtomicFloat16VectorNV); + } else { + builder.addCapability(spv::CapabilityAtomicFloat16MinMaxEXT); + } + } else if (typeProxy == glslang::EbtFloat) builder.addCapability(spv::CapabilityAtomicFloat32MinMaxEXT); else @@ -7884,8 +7898,14 @@ spv::Id TGlslangToSpvTraverser::createAtomicOperation(glslang::TOperator op, spv if (typeProxy == glslang::EbtFloat16 || typeProxy == glslang::EbtFloat || typeProxy == glslang::EbtDouble) { opCode = spv::OpAtomicFMaxEXT; builder.addExtension(spv::E_SPV_EXT_shader_atomic_float_min_max); - if (typeProxy == glslang::EbtFloat16) - builder.addCapability(spv::CapabilityAtomicFloat16MinMaxEXT); + if (typeProxy == glslang::EbtFloat16) { + if (opType.getVectorSize() == 2 || opType.getVectorSize() == 4) { + builder.addExtension(spv::E_SPV_NV_shader_atomic_fp16_vector); + builder.addCapability(spv::CapabilityAtomicFloat16VectorNV); + } else { + builder.addCapability(spv::CapabilityAtomicFloat16MinMaxEXT); + } + } else if (typeProxy == glslang::EbtFloat) builder.addCapability(spv::CapabilityAtomicFloat32MinMaxEXT); else @@ -7914,6 +7934,12 @@ spv::Id TGlslangToSpvTraverser::createAtomicOperation(glslang::TOperator op, spv case glslang::EOpAtomicExchange: case glslang::EOpImageAtomicExchange: case glslang::EOpAtomicCounterExchange: + if ((typeProxy == glslang::EbtFloat16) && + (opType.getVectorSize() == 2 || opType.getVectorSize() == 4)) { + builder.addExtension(spv::E_SPV_NV_shader_atomic_fp16_vector); + builder.addCapability(spv::CapabilityAtomicFloat16VectorNV); + } + opCode = spv::OpAtomicExchange; break; case glslang::EOpAtomicCompSwap: diff --git a/SPIRV/doc.cpp b/SPIRV/doc.cpp index 5839a9995..4ed6acfe4 100755 --- a/SPIRV/doc.cpp +++ b/SPIRV/doc.cpp @@ -1037,6 +1037,7 @@ const char* CapabilityString(int info) case CapabilityFragmentShadingRateKHR: return "FragmentShadingRateKHR"; case CapabilityDemoteToHelperInvocationEXT: return "DemoteToHelperInvocationEXT"; + case CapabilityAtomicFloat16VectorNV: return "AtomicFloat16VectorNV"; case CapabilityShaderClockKHR: return "ShaderClockKHR"; case CapabilityQuadControlKHR: return "QuadControlKHR"; case CapabilityInt64ImageEXT: return "Int64ImageEXT"; diff --git a/SPIRV/spirv.hpp b/SPIRV/spirv.hpp index 1fc3ae6f7..f163f3afc 100644 --- a/SPIRV/spirv.hpp +++ b/SPIRV/spirv.hpp @@ -1108,6 +1108,7 @@ enum Capability { CapabilityShaderInvocationReorderNV = 5383, CapabilityBindlessTextureNV = 5390, CapabilityRayQueryPositionFetchKHR = 5391, + CapabilityAtomicFloat16VectorNV = 5404, CapabilitySubgroupShuffleINTEL = 5568, CapabilitySubgroupBufferBlockIOINTEL = 5569, CapabilitySubgroupImageBlockIOINTEL = 5570, diff --git a/Test/baseResults/spv.nvAtomicFp16Vec.frag.out b/Test/baseResults/spv.nvAtomicFp16Vec.frag.out new file mode 100644 index 000000000..c9212c0ee --- /dev/null +++ b/Test/baseResults/spv.nvAtomicFp16Vec.frag.out @@ -0,0 +1,704 @@ +spv.nvAtomicFp16Vec.frag +// Module Version 10000 +// Generated by (magic number): 8000b +// Id's are bound by 515 + + Capability Shader + Capability Float16 + Capability ImageCubeArray + Capability Image1D + Capability StorageImageExtendedFormats + Capability StorageUniformBufferBlock16 + Capability AtomicFloat16VectorNV + Extension "SPV_EXT_shader_atomic_float_add" + Extension "SPV_EXT_shader_atomic_float_min_max" + Extension "SPV_KHR_16bit_storage" + Extension "SPV_NV_shader_atomic_fp16_vector" + 1: ExtInstImport "GLSL.std.450" + MemoryModel Logical GLSL450 + EntryPoint Fragment 4 "main" + ExecutionMode 4 OriginUpperLeft + Source GLSL 430 + SourceExtension "GL_EXT_shader_explicit_arithmetic_types_float16" + SourceExtension "GL_NV_shader_atomic_fp16_vector" + Name 4 "main" + Name 9 "Buffer" + MemberName 9(Buffer) 0 "dataf16v2" + MemberName 9(Buffer) 1 "dataf16v4" + MemberName 9(Buffer) 2 "resf16v2" + MemberName 9(Buffer) 3 "resf16v4" + Name 11 "buf" + Name 68 "constVec2" + Name 74 "fimage1D" + Name 85 "fimage1DArray" + Name 97 "fimage2D" + Name 107 "fimage2DArray" + Name 119 "fimageCube" + Name 129 "fimageCubeArray" + Name 139 "fimage3D" + Name 295 "constVec4" + Name 299 "fimage1Dv4" + Name 310 "fimage1DArrayv4" + Name 320 "fimage2Dv4" + Name 330 "fimage2DArrayv4" + Name 340 "fimageCubev4" + Name 350 "fimageCubeArrayv4" + Name 360 "fimage3Dv4" + MemberDecorate 9(Buffer) 0 Offset 0 + MemberDecorate 9(Buffer) 1 Offset 8 + MemberDecorate 9(Buffer) 2 Offset 16 + MemberDecorate 9(Buffer) 3 Offset 24 + Decorate 9(Buffer) BufferBlock + Decorate 11(buf) DescriptorSet 0 + Decorate 11(buf) Binding 0 + Decorate 74(fimage1D) DescriptorSet 0 + Decorate 74(fimage1D) Binding 0 + Decorate 74(fimage1D) Coherent + Decorate 74(fimage1D) Volatile + Decorate 74(fimage1D) Coherent + Decorate 85(fimage1DArray) DescriptorSet 0 + Decorate 85(fimage1DArray) Binding 1 + Decorate 85(fimage1DArray) Coherent + Decorate 85(fimage1DArray) Volatile + Decorate 85(fimage1DArray) Coherent + Decorate 97(fimage2D) DescriptorSet 0 + Decorate 97(fimage2D) Binding 2 + Decorate 97(fimage2D) Coherent + Decorate 97(fimage2D) Volatile + Decorate 97(fimage2D) Coherent + Decorate 107(fimage2DArray) DescriptorSet 0 + Decorate 107(fimage2DArray) Binding 3 + Decorate 107(fimage2DArray) Coherent + Decorate 107(fimage2DArray) Volatile + Decorate 107(fimage2DArray) Coherent + Decorate 119(fimageCube) DescriptorSet 0 + Decorate 119(fimageCube) Binding 5 + Decorate 119(fimageCube) Coherent + Decorate 119(fimageCube) Volatile + Decorate 119(fimageCube) Coherent + Decorate 129(fimageCubeArray) DescriptorSet 0 + Decorate 129(fimageCubeArray) Binding 6 + Decorate 129(fimageCubeArray) Coherent + Decorate 129(fimageCubeArray) Volatile + Decorate 129(fimageCubeArray) Coherent + Decorate 139(fimage3D) DescriptorSet 0 + Decorate 139(fimage3D) Binding 9 + Decorate 139(fimage3D) Coherent + Decorate 139(fimage3D) Volatile + Decorate 139(fimage3D) Coherent + Decorate 299(fimage1Dv4) DescriptorSet 0 + Decorate 299(fimage1Dv4) Binding 10 + Decorate 299(fimage1Dv4) Coherent + Decorate 299(fimage1Dv4) Volatile + Decorate 299(fimage1Dv4) Coherent + Decorate 310(fimage1DArrayv4) DescriptorSet 0 + Decorate 310(fimage1DArrayv4) Binding 11 + Decorate 310(fimage1DArrayv4) Coherent + Decorate 310(fimage1DArrayv4) Volatile + Decorate 310(fimage1DArrayv4) Coherent + Decorate 320(fimage2Dv4) DescriptorSet 0 + Decorate 320(fimage2Dv4) Binding 12 + Decorate 320(fimage2Dv4) Coherent + Decorate 320(fimage2Dv4) Volatile + Decorate 320(fimage2Dv4) Coherent + Decorate 330(fimage2DArrayv4) DescriptorSet 0 + Decorate 330(fimage2DArrayv4) Binding 13 + Decorate 330(fimage2DArrayv4) Coherent + Decorate 330(fimage2DArrayv4) Volatile + Decorate 330(fimage2DArrayv4) Coherent + Decorate 340(fimageCubev4) DescriptorSet 0 + Decorate 340(fimageCubev4) Binding 15 + Decorate 340(fimageCubev4) Coherent + Decorate 340(fimageCubev4) Volatile + Decorate 340(fimageCubev4) Coherent + Decorate 350(fimageCubeArrayv4) DescriptorSet 0 + Decorate 350(fimageCubeArrayv4) Binding 16 + Decorate 350(fimageCubeArrayv4) Coherent + Decorate 350(fimageCubeArrayv4) Volatile + Decorate 350(fimageCubeArrayv4) Coherent + Decorate 360(fimage3Dv4) DescriptorSet 0 + Decorate 360(fimage3Dv4) Binding 19 + Decorate 360(fimage3Dv4) Coherent + Decorate 360(fimage3Dv4) Volatile + Decorate 360(fimage3Dv4) Coherent + 2: TypeVoid + 3: TypeFunction 2 + 6: TypeFloat 16 + 7: TypeVector 6(float16_t) 2 + 8: TypeVector 6(float16_t) 4 + 9(Buffer): TypeStruct 7(f16vec2) 8(f16vec4) 7(f16vec2) 8(f16vec4) + 10: TypePointer Uniform 9(Buffer) + 11(buf): 10(ptr) Variable Uniform + 12: TypeInt 32 1 + 13: 12(int) Constant 2 + 14: 12(int) Constant 0 + 15: TypePointer Uniform 7(f16vec2) + 17:6(float16_t) Constant 16896 + 18: 7(f16vec2) ConstantComposite 17 17 + 19: TypeInt 32 0 + 20: 19(int) Constant 1 + 21: 19(int) Constant 0 + 42: 12(int) Constant 3 + 43: 12(int) Constant 1 + 44: TypePointer Uniform 8(f16vec4) + 46: 8(f16vec4) ConstantComposite 17 17 17 17 + 67: TypePointer Function 7(f16vec2) + 69:6(float16_t) Constant 16384 + 70: 7(f16vec2) ConstantComposite 69 69 + 71: TypeFloat 32 + 72: TypeImage 71(float) 1D nonsampled format:Rg16f + 73: TypePointer UniformConstant 72 + 74(fimage1D): 73(ptr) Variable UniformConstant + 76: TypePointer Image 7(f16vec2) + 83: TypeImage 71(float) 1D array nonsampled format:Rg16f + 84: TypePointer UniformConstant 83 +85(fimage1DArray): 84(ptr) Variable UniformConstant + 86: TypeVector 12(int) 2 + 87: 86(ivec2) ConstantComposite 14 14 + 95: TypeImage 71(float) 2D nonsampled format:Rg16f + 96: TypePointer UniformConstant 95 + 97(fimage2D): 96(ptr) Variable UniformConstant + 105: TypeImage 71(float) 2D array nonsampled format:Rg16f + 106: TypePointer UniformConstant 105 +107(fimage2DArray): 106(ptr) Variable UniformConstant + 108: TypeVector 12(int) 3 + 109: 108(ivec3) ConstantComposite 14 14 14 + 117: TypeImage 71(float) Cube nonsampled format:Rg16f + 118: TypePointer UniformConstant 117 + 119(fimageCube): 118(ptr) Variable UniformConstant + 127: TypeImage 71(float) Cube array nonsampled format:Rg16f + 128: TypePointer UniformConstant 127 +129(fimageCubeArray): 128(ptr) Variable UniformConstant + 137: TypeImage 71(float) 3D nonsampled format:Rg16f + 138: TypePointer UniformConstant 137 + 139(fimage3D): 138(ptr) Variable UniformConstant + 294: TypePointer Function 8(f16vec4) + 296: 8(f16vec4) ConstantComposite 69 69 69 69 + 297: TypeImage 71(float) 1D nonsampled format:Rgba16f + 298: TypePointer UniformConstant 297 + 299(fimage1Dv4): 298(ptr) Variable UniformConstant + 301: TypePointer Image 8(f16vec4) + 308: TypeImage 71(float) 1D array nonsampled format:Rgba16f + 309: TypePointer UniformConstant 308 +310(fimage1DArrayv4): 309(ptr) Variable UniformConstant + 318: TypeImage 71(float) 2D nonsampled format:Rgba16f + 319: TypePointer UniformConstant 318 + 320(fimage2Dv4): 319(ptr) Variable UniformConstant + 328: TypeImage 71(float) 2D array nonsampled format:Rgba16f + 329: TypePointer UniformConstant 328 +330(fimage2DArrayv4): 329(ptr) Variable UniformConstant + 338: TypeImage 71(float) Cube nonsampled format:Rgba16f + 339: TypePointer UniformConstant 338 +340(fimageCubev4): 339(ptr) Variable UniformConstant + 348: TypeImage 71(float) Cube array nonsampled format:Rgba16f + 349: TypePointer UniformConstant 348 +350(fimageCubeArrayv4): 349(ptr) Variable UniformConstant + 358: TypeImage 71(float) 3D nonsampled format:Rgba16f + 359: TypePointer UniformConstant 358 + 360(fimage3Dv4): 359(ptr) Variable UniformConstant + 4(main): 2 Function None 3 + 5: Label + 68(constVec2): 67(ptr) Variable Function + 295(constVec4): 294(ptr) Variable Function + 16: 15(ptr) AccessChain 11(buf) 14 + 22: 7(f16vec2) AtomicFAddEXT 16 20 21 18 + 23: 15(ptr) AccessChain 11(buf) 13 + Store 23 22 + 24: 15(ptr) AccessChain 11(buf) 14 + 25: 7(f16vec2) AtomicFMinEXT 24 20 21 18 + 26: 15(ptr) AccessChain 11(buf) 13 + 27: 7(f16vec2) Load 26 + 28: 7(f16vec2) FAdd 27 25 + 29: 15(ptr) AccessChain 11(buf) 13 + Store 29 28 + 30: 15(ptr) AccessChain 11(buf) 14 + 31: 7(f16vec2) AtomicFMaxEXT 30 20 21 18 + 32: 15(ptr) AccessChain 11(buf) 13 + 33: 7(f16vec2) Load 32 + 34: 7(f16vec2) FAdd 33 31 + 35: 15(ptr) AccessChain 11(buf) 13 + Store 35 34 + 36: 15(ptr) AccessChain 11(buf) 14 + 37: 7(f16vec2) AtomicExchange 36 20 21 18 + 38: 15(ptr) AccessChain 11(buf) 13 + 39: 7(f16vec2) Load 38 + 40: 7(f16vec2) FAdd 39 37 + 41: 15(ptr) AccessChain 11(buf) 13 + Store 41 40 + 45: 44(ptr) AccessChain 11(buf) 43 + 47: 8(f16vec4) AtomicFAddEXT 45 20 21 46 + 48: 44(ptr) AccessChain 11(buf) 42 + Store 48 47 + 49: 44(ptr) AccessChain 11(buf) 43 + 50: 8(f16vec4) AtomicFMinEXT 49 20 21 46 + 51: 44(ptr) AccessChain 11(buf) 42 + 52: 8(f16vec4) Load 51 + 53: 8(f16vec4) FAdd 52 50 + 54: 44(ptr) AccessChain 11(buf) 42 + Store 54 53 + 55: 44(ptr) AccessChain 11(buf) 43 + 56: 8(f16vec4) AtomicFMaxEXT 55 20 21 46 + 57: 44(ptr) AccessChain 11(buf) 42 + 58: 8(f16vec4) Load 57 + 59: 8(f16vec4) FAdd 58 56 + 60: 44(ptr) AccessChain 11(buf) 42 + Store 60 59 + 61: 44(ptr) AccessChain 11(buf) 43 + 62: 8(f16vec4) AtomicExchange 61 20 21 46 + 63: 44(ptr) AccessChain 11(buf) 42 + 64: 8(f16vec4) Load 63 + 65: 8(f16vec4) FAdd 64 62 + 66: 44(ptr) AccessChain 11(buf) 42 + Store 66 65 + Store 68(constVec2) 70 + 75: 7(f16vec2) Load 68(constVec2) + 77: 76(ptr) ImageTexelPointer 74(fimage1D) 14 21 + 78: 7(f16vec2) AtomicFAddEXT 77 20 21 75 + 79: 15(ptr) AccessChain 11(buf) 13 + 80: 7(f16vec2) Load 79 + 81: 7(f16vec2) FAdd 80 78 + 82: 15(ptr) AccessChain 11(buf) 13 + Store 82 81 + 88: 7(f16vec2) Load 68(constVec2) + 89: 76(ptr) ImageTexelPointer 85(fimage1DArray) 87 21 + 90: 7(f16vec2) AtomicFAddEXT 89 20 21 88 + 91: 15(ptr) AccessChain 11(buf) 13 + 92: 7(f16vec2) Load 91 + 93: 7(f16vec2) FAdd 92 90 + 94: 15(ptr) AccessChain 11(buf) 13 + Store 94 93 + 98: 7(f16vec2) Load 68(constVec2) + 99: 76(ptr) ImageTexelPointer 97(fimage2D) 87 21 + 100: 7(f16vec2) AtomicFAddEXT 99 20 21 98 + 101: 15(ptr) AccessChain 11(buf) 13 + 102: 7(f16vec2) Load 101 + 103: 7(f16vec2) FAdd 102 100 + 104: 15(ptr) AccessChain 11(buf) 13 + Store 104 103 + 110: 7(f16vec2) Load 68(constVec2) + 111: 76(ptr) ImageTexelPointer 107(fimage2DArray) 109 21 + 112: 7(f16vec2) AtomicFAddEXT 111 20 21 110 + 113: 15(ptr) AccessChain 11(buf) 13 + 114: 7(f16vec2) Load 113 + 115: 7(f16vec2) FAdd 114 112 + 116: 15(ptr) AccessChain 11(buf) 13 + Store 116 115 + 120: 7(f16vec2) Load 68(constVec2) + 121: 76(ptr) ImageTexelPointer 119(fimageCube) 109 21 + 122: 7(f16vec2) AtomicFAddEXT 121 20 21 120 + 123: 15(ptr) AccessChain 11(buf) 13 + 124: 7(f16vec2) Load 123 + 125: 7(f16vec2) FAdd 124 122 + 126: 15(ptr) AccessChain 11(buf) 13 + Store 126 125 + 130: 7(f16vec2) Load 68(constVec2) + 131: 76(ptr) ImageTexelPointer 129(fimageCubeArray) 109 21 + 132: 7(f16vec2) AtomicFAddEXT 131 20 21 130 + 133: 15(ptr) AccessChain 11(buf) 13 + 134: 7(f16vec2) Load 133 + 135: 7(f16vec2) FAdd 134 132 + 136: 15(ptr) AccessChain 11(buf) 13 + Store 136 135 + 140: 7(f16vec2) Load 68(constVec2) + 141: 76(ptr) ImageTexelPointer 139(fimage3D) 109 21 + 142: 7(f16vec2) AtomicFAddEXT 141 20 21 140 + 143: 15(ptr) AccessChain 11(buf) 13 + 144: 7(f16vec2) Load 143 + 145: 7(f16vec2) FAdd 144 142 + 146: 15(ptr) AccessChain 11(buf) 13 + Store 146 145 + 147: 7(f16vec2) Load 68(constVec2) + 148: 76(ptr) ImageTexelPointer 74(fimage1D) 14 21 + 149: 7(f16vec2) AtomicFMinEXT 148 20 21 147 + 150: 15(ptr) AccessChain 11(buf) 13 + 151: 7(f16vec2) Load 150 + 152: 7(f16vec2) FAdd 151 149 + 153: 15(ptr) AccessChain 11(buf) 13 + Store 153 152 + 154: 7(f16vec2) Load 68(constVec2) + 155: 76(ptr) ImageTexelPointer 85(fimage1DArray) 87 21 + 156: 7(f16vec2) AtomicFMinEXT 155 20 21 154 + 157: 15(ptr) AccessChain 11(buf) 13 + 158: 7(f16vec2) Load 157 + 159: 7(f16vec2) FAdd 158 156 + 160: 15(ptr) AccessChain 11(buf) 13 + Store 160 159 + 161: 7(f16vec2) Load 68(constVec2) + 162: 76(ptr) ImageTexelPointer 97(fimage2D) 87 21 + 163: 7(f16vec2) AtomicFMinEXT 162 20 21 161 + 164: 15(ptr) AccessChain 11(buf) 13 + 165: 7(f16vec2) Load 164 + 166: 7(f16vec2) FAdd 165 163 + 167: 15(ptr) AccessChain 11(buf) 13 + Store 167 166 + 168: 7(f16vec2) Load 68(constVec2) + 169: 76(ptr) ImageTexelPointer 107(fimage2DArray) 109 21 + 170: 7(f16vec2) AtomicFMinEXT 169 20 21 168 + 171: 15(ptr) AccessChain 11(buf) 13 + 172: 7(f16vec2) Load 171 + 173: 7(f16vec2) FAdd 172 170 + 174: 15(ptr) AccessChain 11(buf) 13 + Store 174 173 + 175: 7(f16vec2) Load 68(constVec2) + 176: 76(ptr) ImageTexelPointer 119(fimageCube) 109 21 + 177: 7(f16vec2) AtomicFMinEXT 176 20 21 175 + 178: 15(ptr) AccessChain 11(buf) 13 + 179: 7(f16vec2) Load 178 + 180: 7(f16vec2) FAdd 179 177 + 181: 15(ptr) AccessChain 11(buf) 13 + Store 181 180 + 182: 7(f16vec2) Load 68(constVec2) + 183: 76(ptr) ImageTexelPointer 129(fimageCubeArray) 109 21 + 184: 7(f16vec2) AtomicFMinEXT 183 20 21 182 + 185: 15(ptr) AccessChain 11(buf) 13 + 186: 7(f16vec2) Load 185 + 187: 7(f16vec2) FAdd 186 184 + 188: 15(ptr) AccessChain 11(buf) 13 + Store 188 187 + 189: 7(f16vec2) Load 68(constVec2) + 190: 76(ptr) ImageTexelPointer 139(fimage3D) 109 21 + 191: 7(f16vec2) AtomicFMinEXT 190 20 21 189 + 192: 15(ptr) AccessChain 11(buf) 13 + 193: 7(f16vec2) Load 192 + 194: 7(f16vec2) FAdd 193 191 + 195: 15(ptr) AccessChain 11(buf) 13 + Store 195 194 + 196: 7(f16vec2) Load 68(constVec2) + 197: 76(ptr) ImageTexelPointer 74(fimage1D) 14 21 + 198: 7(f16vec2) AtomicFMaxEXT 197 20 21 196 + 199: 15(ptr) AccessChain 11(buf) 13 + 200: 7(f16vec2) Load 199 + 201: 7(f16vec2) FAdd 200 198 + 202: 15(ptr) AccessChain 11(buf) 13 + Store 202 201 + 203: 7(f16vec2) Load 68(constVec2) + 204: 76(ptr) ImageTexelPointer 85(fimage1DArray) 87 21 + 205: 7(f16vec2) AtomicFMaxEXT 204 20 21 203 + 206: 15(ptr) AccessChain 11(buf) 13 + 207: 7(f16vec2) Load 206 + 208: 7(f16vec2) FAdd 207 205 + 209: 15(ptr) AccessChain 11(buf) 13 + Store 209 208 + 210: 7(f16vec2) Load 68(constVec2) + 211: 76(ptr) ImageTexelPointer 97(fimage2D) 87 21 + 212: 7(f16vec2) AtomicFMaxEXT 211 20 21 210 + 213: 15(ptr) AccessChain 11(buf) 13 + 214: 7(f16vec2) Load 213 + 215: 7(f16vec2) FAdd 214 212 + 216: 15(ptr) AccessChain 11(buf) 13 + Store 216 215 + 217: 7(f16vec2) Load 68(constVec2) + 218: 76(ptr) ImageTexelPointer 107(fimage2DArray) 109 21 + 219: 7(f16vec2) AtomicFMaxEXT 218 20 21 217 + 220: 15(ptr) AccessChain 11(buf) 13 + 221: 7(f16vec2) Load 220 + 222: 7(f16vec2) FAdd 221 219 + 223: 15(ptr) AccessChain 11(buf) 13 + Store 223 222 + 224: 7(f16vec2) Load 68(constVec2) + 225: 76(ptr) ImageTexelPointer 119(fimageCube) 109 21 + 226: 7(f16vec2) AtomicFMaxEXT 225 20 21 224 + 227: 15(ptr) AccessChain 11(buf) 13 + 228: 7(f16vec2) Load 227 + 229: 7(f16vec2) FAdd 228 226 + 230: 15(ptr) AccessChain 11(buf) 13 + Store 230 229 + 231: 7(f16vec2) Load 68(constVec2) + 232: 76(ptr) ImageTexelPointer 129(fimageCubeArray) 109 21 + 233: 7(f16vec2) AtomicFMaxEXT 232 20 21 231 + 234: 15(ptr) AccessChain 11(buf) 13 + 235: 7(f16vec2) Load 234 + 236: 7(f16vec2) FAdd 235 233 + 237: 15(ptr) AccessChain 11(buf) 13 + Store 237 236 + 238: 7(f16vec2) Load 68(constVec2) + 239: 76(ptr) ImageTexelPointer 139(fimage3D) 109 21 + 240: 7(f16vec2) AtomicFMaxEXT 239 20 21 238 + 241: 15(ptr) AccessChain 11(buf) 13 + 242: 7(f16vec2) Load 241 + 243: 7(f16vec2) FAdd 242 240 + 244: 15(ptr) AccessChain 11(buf) 13 + Store 244 243 + 245: 7(f16vec2) Load 68(constVec2) + 246: 76(ptr) ImageTexelPointer 74(fimage1D) 14 21 + 247: 7(f16vec2) AtomicExchange 246 20 21 245 + 248: 15(ptr) AccessChain 11(buf) 13 + 249: 7(f16vec2) Load 248 + 250: 7(f16vec2) FAdd 249 247 + 251: 15(ptr) AccessChain 11(buf) 13 + Store 251 250 + 252: 7(f16vec2) Load 68(constVec2) + 253: 76(ptr) ImageTexelPointer 85(fimage1DArray) 87 21 + 254: 7(f16vec2) AtomicExchange 253 20 21 252 + 255: 15(ptr) AccessChain 11(buf) 13 + 256: 7(f16vec2) Load 255 + 257: 7(f16vec2) FAdd 256 254 + 258: 15(ptr) AccessChain 11(buf) 13 + Store 258 257 + 259: 7(f16vec2) Load 68(constVec2) + 260: 76(ptr) ImageTexelPointer 97(fimage2D) 87 21 + 261: 7(f16vec2) AtomicExchange 260 20 21 259 + 262: 15(ptr) AccessChain 11(buf) 13 + 263: 7(f16vec2) Load 262 + 264: 7(f16vec2) FAdd 263 261 + 265: 15(ptr) AccessChain 11(buf) 13 + Store 265 264 + 266: 7(f16vec2) Load 68(constVec2) + 267: 76(ptr) ImageTexelPointer 107(fimage2DArray) 109 21 + 268: 7(f16vec2) AtomicExchange 267 20 21 266 + 269: 15(ptr) AccessChain 11(buf) 13 + 270: 7(f16vec2) Load 269 + 271: 7(f16vec2) FAdd 270 268 + 272: 15(ptr) AccessChain 11(buf) 13 + Store 272 271 + 273: 7(f16vec2) Load 68(constVec2) + 274: 76(ptr) ImageTexelPointer 119(fimageCube) 109 21 + 275: 7(f16vec2) AtomicExchange 274 20 21 273 + 276: 15(ptr) AccessChain 11(buf) 13 + 277: 7(f16vec2) Load 276 + 278: 7(f16vec2) FAdd 277 275 + 279: 15(ptr) AccessChain 11(buf) 13 + Store 279 278 + 280: 7(f16vec2) Load 68(constVec2) + 281: 76(ptr) ImageTexelPointer 129(fimageCubeArray) 109 21 + 282: 7(f16vec2) AtomicExchange 281 20 21 280 + 283: 15(ptr) AccessChain 11(buf) 13 + 284: 7(f16vec2) Load 283 + 285: 7(f16vec2) FAdd 284 282 + 286: 15(ptr) AccessChain 11(buf) 13 + Store 286 285 + 287: 7(f16vec2) Load 68(constVec2) + 288: 76(ptr) ImageTexelPointer 139(fimage3D) 109 21 + 289: 7(f16vec2) AtomicExchange 288 20 21 287 + 290: 15(ptr) AccessChain 11(buf) 13 + 291: 7(f16vec2) Load 290 + 292: 7(f16vec2) FAdd 291 289 + 293: 15(ptr) AccessChain 11(buf) 13 + Store 293 292 + Store 295(constVec4) 296 + 300: 8(f16vec4) Load 295(constVec4) + 302: 301(ptr) ImageTexelPointer 299(fimage1Dv4) 14 21 + 303: 8(f16vec4) AtomicFAddEXT 302 20 21 300 + 304: 44(ptr) AccessChain 11(buf) 42 + 305: 8(f16vec4) Load 304 + 306: 8(f16vec4) FAdd 305 303 + 307: 44(ptr) AccessChain 11(buf) 42 + Store 307 306 + 311: 8(f16vec4) Load 295(constVec4) + 312: 301(ptr) ImageTexelPointer 310(fimage1DArrayv4) 87 21 + 313: 8(f16vec4) AtomicFAddEXT 312 20 21 311 + 314: 44(ptr) AccessChain 11(buf) 42 + 315: 8(f16vec4) Load 314 + 316: 8(f16vec4) FAdd 315 313 + 317: 44(ptr) AccessChain 11(buf) 42 + Store 317 316 + 321: 8(f16vec4) Load 295(constVec4) + 322: 301(ptr) ImageTexelPointer 320(fimage2Dv4) 87 21 + 323: 8(f16vec4) AtomicFAddEXT 322 20 21 321 + 324: 44(ptr) AccessChain 11(buf) 42 + 325: 8(f16vec4) Load 324 + 326: 8(f16vec4) FAdd 325 323 + 327: 44(ptr) AccessChain 11(buf) 42 + Store 327 326 + 331: 8(f16vec4) Load 295(constVec4) + 332: 301(ptr) ImageTexelPointer 330(fimage2DArrayv4) 109 21 + 333: 8(f16vec4) AtomicFAddEXT 332 20 21 331 + 334: 44(ptr) AccessChain 11(buf) 42 + 335: 8(f16vec4) Load 334 + 336: 8(f16vec4) FAdd 335 333 + 337: 44(ptr) AccessChain 11(buf) 42 + Store 337 336 + 341: 8(f16vec4) Load 295(constVec4) + 342: 301(ptr) ImageTexelPointer 340(fimageCubev4) 109 21 + 343: 8(f16vec4) AtomicFAddEXT 342 20 21 341 + 344: 44(ptr) AccessChain 11(buf) 42 + 345: 8(f16vec4) Load 344 + 346: 8(f16vec4) FAdd 345 343 + 347: 44(ptr) AccessChain 11(buf) 42 + Store 347 346 + 351: 8(f16vec4) Load 295(constVec4) + 352: 301(ptr) ImageTexelPointer 350(fimageCubeArrayv4) 109 21 + 353: 8(f16vec4) AtomicFAddEXT 352 20 21 351 + 354: 44(ptr) AccessChain 11(buf) 42 + 355: 8(f16vec4) Load 354 + 356: 8(f16vec4) FAdd 355 353 + 357: 44(ptr) AccessChain 11(buf) 42 + Store 357 356 + 361: 8(f16vec4) Load 295(constVec4) + 362: 301(ptr) ImageTexelPointer 360(fimage3Dv4) 109 21 + 363: 8(f16vec4) AtomicFAddEXT 362 20 21 361 + 364: 44(ptr) AccessChain 11(buf) 42 + 365: 8(f16vec4) Load 364 + 366: 8(f16vec4) FAdd 365 363 + 367: 44(ptr) AccessChain 11(buf) 42 + Store 367 366 + 368: 8(f16vec4) Load 295(constVec4) + 369: 301(ptr) ImageTexelPointer 299(fimage1Dv4) 14 21 + 370: 8(f16vec4) AtomicFMinEXT 369 20 21 368 + 371: 44(ptr) AccessChain 11(buf) 42 + 372: 8(f16vec4) Load 371 + 373: 8(f16vec4) FAdd 372 370 + 374: 44(ptr) AccessChain 11(buf) 42 + Store 374 373 + 375: 8(f16vec4) Load 295(constVec4) + 376: 301(ptr) ImageTexelPointer 310(fimage1DArrayv4) 87 21 + 377: 8(f16vec4) AtomicFMinEXT 376 20 21 375 + 378: 44(ptr) AccessChain 11(buf) 42 + 379: 8(f16vec4) Load 378 + 380: 8(f16vec4) FAdd 379 377 + 381: 44(ptr) AccessChain 11(buf) 42 + Store 381 380 + 382: 8(f16vec4) Load 295(constVec4) + 383: 301(ptr) ImageTexelPointer 320(fimage2Dv4) 87 21 + 384: 8(f16vec4) AtomicFMinEXT 383 20 21 382 + 385: 44(ptr) AccessChain 11(buf) 42 + 386: 8(f16vec4) Load 385 + 387: 8(f16vec4) FAdd 386 384 + 388: 44(ptr) AccessChain 11(buf) 42 + Store 388 387 + 389: 8(f16vec4) Load 295(constVec4) + 390: 301(ptr) ImageTexelPointer 330(fimage2DArrayv4) 109 21 + 391: 8(f16vec4) AtomicFMinEXT 390 20 21 389 + 392: 44(ptr) AccessChain 11(buf) 42 + 393: 8(f16vec4) Load 392 + 394: 8(f16vec4) FAdd 393 391 + 395: 44(ptr) AccessChain 11(buf) 42 + Store 395 394 + 396: 8(f16vec4) Load 295(constVec4) + 397: 301(ptr) ImageTexelPointer 340(fimageCubev4) 109 21 + 398: 8(f16vec4) AtomicFMinEXT 397 20 21 396 + 399: 44(ptr) AccessChain 11(buf) 42 + 400: 8(f16vec4) Load 399 + 401: 8(f16vec4) FAdd 400 398 + 402: 44(ptr) AccessChain 11(buf) 42 + Store 402 401 + 403: 8(f16vec4) Load 295(constVec4) + 404: 301(ptr) ImageTexelPointer 350(fimageCubeArrayv4) 109 21 + 405: 8(f16vec4) AtomicFMinEXT 404 20 21 403 + 406: 44(ptr) AccessChain 11(buf) 42 + 407: 8(f16vec4) Load 406 + 408: 8(f16vec4) FAdd 407 405 + 409: 44(ptr) AccessChain 11(buf) 42 + Store 409 408 + 410: 8(f16vec4) Load 295(constVec4) + 411: 301(ptr) ImageTexelPointer 360(fimage3Dv4) 109 21 + 412: 8(f16vec4) AtomicFMinEXT 411 20 21 410 + 413: 44(ptr) AccessChain 11(buf) 42 + 414: 8(f16vec4) Load 413 + 415: 8(f16vec4) FAdd 414 412 + 416: 44(ptr) AccessChain 11(buf) 42 + Store 416 415 + 417: 8(f16vec4) Load 295(constVec4) + 418: 301(ptr) ImageTexelPointer 299(fimage1Dv4) 14 21 + 419: 8(f16vec4) AtomicFMaxEXT 418 20 21 417 + 420: 44(ptr) AccessChain 11(buf) 42 + 421: 8(f16vec4) Load 420 + 422: 8(f16vec4) FAdd 421 419 + 423: 44(ptr) AccessChain 11(buf) 42 + Store 423 422 + 424: 8(f16vec4) Load 295(constVec4) + 425: 301(ptr) ImageTexelPointer 310(fimage1DArrayv4) 87 21 + 426: 8(f16vec4) AtomicFMaxEXT 425 20 21 424 + 427: 44(ptr) AccessChain 11(buf) 42 + 428: 8(f16vec4) Load 427 + 429: 8(f16vec4) FAdd 428 426 + 430: 44(ptr) AccessChain 11(buf) 42 + Store 430 429 + 431: 8(f16vec4) Load 295(constVec4) + 432: 301(ptr) ImageTexelPointer 320(fimage2Dv4) 87 21 + 433: 8(f16vec4) AtomicFMaxEXT 432 20 21 431 + 434: 44(ptr) AccessChain 11(buf) 42 + 435: 8(f16vec4) Load 434 + 436: 8(f16vec4) FAdd 435 433 + 437: 44(ptr) AccessChain 11(buf) 42 + Store 437 436 + 438: 8(f16vec4) Load 295(constVec4) + 439: 301(ptr) ImageTexelPointer 330(fimage2DArrayv4) 109 21 + 440: 8(f16vec4) AtomicFMaxEXT 439 20 21 438 + 441: 44(ptr) AccessChain 11(buf) 42 + 442: 8(f16vec4) Load 441 + 443: 8(f16vec4) FAdd 442 440 + 444: 44(ptr) AccessChain 11(buf) 42 + Store 444 443 + 445: 8(f16vec4) Load 295(constVec4) + 446: 301(ptr) ImageTexelPointer 340(fimageCubev4) 109 21 + 447: 8(f16vec4) AtomicFMaxEXT 446 20 21 445 + 448: 44(ptr) AccessChain 11(buf) 42 + 449: 8(f16vec4) Load 448 + 450: 8(f16vec4) FAdd 449 447 + 451: 44(ptr) AccessChain 11(buf) 42 + Store 451 450 + 452: 8(f16vec4) Load 295(constVec4) + 453: 301(ptr) ImageTexelPointer 350(fimageCubeArrayv4) 109 21 + 454: 8(f16vec4) AtomicFMaxEXT 453 20 21 452 + 455: 44(ptr) AccessChain 11(buf) 42 + 456: 8(f16vec4) Load 455 + 457: 8(f16vec4) FAdd 456 454 + 458: 44(ptr) AccessChain 11(buf) 42 + Store 458 457 + 459: 8(f16vec4) Load 295(constVec4) + 460: 301(ptr) ImageTexelPointer 360(fimage3Dv4) 109 21 + 461: 8(f16vec4) AtomicFMaxEXT 460 20 21 459 + 462: 44(ptr) AccessChain 11(buf) 42 + 463: 8(f16vec4) Load 462 + 464: 8(f16vec4) FAdd 463 461 + 465: 44(ptr) AccessChain 11(buf) 42 + Store 465 464 + 466: 8(f16vec4) Load 295(constVec4) + 467: 301(ptr) ImageTexelPointer 299(fimage1Dv4) 14 21 + 468: 8(f16vec4) AtomicExchange 467 20 21 466 + 469: 44(ptr) AccessChain 11(buf) 42 + 470: 8(f16vec4) Load 469 + 471: 8(f16vec4) FAdd 470 468 + 472: 44(ptr) AccessChain 11(buf) 42 + Store 472 471 + 473: 8(f16vec4) Load 295(constVec4) + 474: 301(ptr) ImageTexelPointer 310(fimage1DArrayv4) 87 21 + 475: 8(f16vec4) AtomicExchange 474 20 21 473 + 476: 44(ptr) AccessChain 11(buf) 42 + 477: 8(f16vec4) Load 476 + 478: 8(f16vec4) FAdd 477 475 + 479: 44(ptr) AccessChain 11(buf) 42 + Store 479 478 + 480: 8(f16vec4) Load 295(constVec4) + 481: 301(ptr) ImageTexelPointer 320(fimage2Dv4) 87 21 + 482: 8(f16vec4) AtomicExchange 481 20 21 480 + 483: 44(ptr) AccessChain 11(buf) 42 + 484: 8(f16vec4) Load 483 + 485: 8(f16vec4) FAdd 484 482 + 486: 44(ptr) AccessChain 11(buf) 42 + Store 486 485 + 487: 8(f16vec4) Load 295(constVec4) + 488: 301(ptr) ImageTexelPointer 330(fimage2DArrayv4) 109 21 + 489: 8(f16vec4) AtomicExchange 488 20 21 487 + 490: 44(ptr) AccessChain 11(buf) 42 + 491: 8(f16vec4) Load 490 + 492: 8(f16vec4) FAdd 491 489 + 493: 44(ptr) AccessChain 11(buf) 42 + Store 493 492 + 494: 8(f16vec4) Load 295(constVec4) + 495: 301(ptr) ImageTexelPointer 340(fimageCubev4) 109 21 + 496: 8(f16vec4) AtomicExchange 495 20 21 494 + 497: 44(ptr) AccessChain 11(buf) 42 + 498: 8(f16vec4) Load 497 + 499: 8(f16vec4) FAdd 498 496 + 500: 44(ptr) AccessChain 11(buf) 42 + Store 500 499 + 501: 8(f16vec4) Load 295(constVec4) + 502: 301(ptr) ImageTexelPointer 350(fimageCubeArrayv4) 109 21 + 503: 8(f16vec4) AtomicExchange 502 20 21 501 + 504: 44(ptr) AccessChain 11(buf) 42 + 505: 8(f16vec4) Load 504 + 506: 8(f16vec4) FAdd 505 503 + 507: 44(ptr) AccessChain 11(buf) 42 + Store 507 506 + 508: 8(f16vec4) Load 295(constVec4) + 509: 301(ptr) ImageTexelPointer 360(fimage3Dv4) 109 21 + 510: 8(f16vec4) AtomicExchange 509 20 21 508 + 511: 44(ptr) AccessChain 11(buf) 42 + 512: 8(f16vec4) Load 511 + 513: 8(f16vec4) FAdd 512 510 + 514: 44(ptr) AccessChain 11(buf) 42 + Store 514 513 + Return + FunctionEnd diff --git a/Test/spv.nvAtomicFp16Vec.frag b/Test/spv.nvAtomicFp16Vec.frag new file mode 100644 index 000000000..de18b1b8f --- /dev/null +++ b/Test/spv.nvAtomicFp16Vec.frag @@ -0,0 +1,113 @@ +#version 430 + +#extension GL_NV_shader_atomic_fp16_vector : enable +#extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable + +layout(binding = 0) buffer Buffer +{ + f16vec2 dataf16v2; + f16vec4 dataf16v4; + + f16vec2 resf16v2; + f16vec4 resf16v4; + +}buf; + +layout(binding = 0, rg16f) volatile coherent uniform image1D fimage1D; +layout(binding = 1, rg16f) volatile coherent uniform image1DArray fimage1DArray; +layout(binding = 2, rg16f) volatile coherent uniform image2D fimage2D; +layout(binding = 3, rg16f) volatile coherent uniform image2DArray fimage2DArray; +layout(binding = 5, rg16f) volatile coherent uniform imageCube fimageCube; +layout(binding = 6, rg16f) volatile coherent uniform imageCubeArray fimageCubeArray; +layout(binding = 9, rg16f) volatile coherent uniform image3D fimage3D; + +layout(binding = 10, rgba16f) volatile coherent uniform image1D fimage1Dv4; +layout(binding = 11, rgba16f) volatile coherent uniform image1DArray fimage1DArrayv4; +layout(binding = 12, rgba16f) volatile coherent uniform image2D fimage2Dv4; +layout(binding = 13, rgba16f) volatile coherent uniform image2DArray fimage2DArrayv4; +layout(binding = 15, rgba16f) volatile coherent uniform imageCube fimageCubev4; +layout(binding = 16, rgba16f) volatile coherent uniform imageCubeArray fimageCubeArrayv4; +layout(binding = 19, rgba16f) volatile coherent uniform image3D fimage3Dv4; + +void main() +{ + // atomic* functions supported with f16vec2 + buf.resf16v2 = atomicAdd(buf.dataf16v2, f16vec2(3)); + buf.resf16v2 += atomicMin(buf.dataf16v2, f16vec2(3)); + buf.resf16v2 += atomicMax(buf.dataf16v2, f16vec2(3)); + buf.resf16v2 += atomicExchange(buf.dataf16v2, f16vec2(3)); + + // atomic* functions supported with f16vec4 + buf.resf16v4 = atomicAdd(buf.dataf16v4, f16vec4(3)); + buf.resf16v4 += atomicMin(buf.dataf16v4, f16vec4(3)); + buf.resf16v4 += atomicMax(buf.dataf16v4, f16vec4(3)); + buf.resf16v4 += atomicExchange(buf.dataf16v4, f16vec4(3)); + + // imageAtomic* functions supported with f16vec2 and only format supported is rg16f + f16vec2 constVec2 = f16vec2(2.0); + buf.resf16v2 += imageAtomicAdd(fimage1D, int(0), constVec2); + buf.resf16v2 += imageAtomicAdd(fimage1DArray, ivec2(0,0), constVec2); + buf.resf16v2 += imageAtomicAdd(fimage2D, ivec2(0,0), constVec2); + buf.resf16v2 += imageAtomicAdd(fimage2DArray, ivec3(0,0, 0), constVec2); + buf.resf16v2 += imageAtomicAdd(fimageCube, ivec3(0,0,0), constVec2); + buf.resf16v2 += imageAtomicAdd(fimageCubeArray, ivec3(0,0,0), constVec2); + buf.resf16v2 += imageAtomicAdd(fimage3D, ivec3(0,0,0), constVec2); + + buf.resf16v2 += imageAtomicMin(fimage1D, int(0), constVec2); + buf.resf16v2 += imageAtomicMin(fimage1DArray, ivec2(0,0), constVec2); + buf.resf16v2 += imageAtomicMin(fimage2D, ivec2(0,0), constVec2); + buf.resf16v2 += imageAtomicMin(fimage2DArray, ivec3(0,0, 0), constVec2); + buf.resf16v2 += imageAtomicMin(fimageCube, ivec3(0,0,0), constVec2); + buf.resf16v2 += imageAtomicMin(fimageCubeArray, ivec3(0,0,0), constVec2); + buf.resf16v2 += imageAtomicMin(fimage3D, ivec3(0,0,0), constVec2); + + buf.resf16v2 += imageAtomicMax(fimage1D, int(0), constVec2); + buf.resf16v2 += imageAtomicMax(fimage1DArray, ivec2(0,0), constVec2); + buf.resf16v2 += imageAtomicMax(fimage2D, ivec2(0,0), constVec2); + buf.resf16v2 += imageAtomicMax(fimage2DArray, ivec3(0,0, 0), constVec2); + buf.resf16v2 += imageAtomicMax(fimageCube, ivec3(0,0,0), constVec2); + buf.resf16v2 += imageAtomicMax(fimageCubeArray, ivec3(0,0,0), constVec2); + buf.resf16v2 += imageAtomicMax(fimage3D, ivec3(0,0,0), constVec2); + + buf.resf16v2 += imageAtomicExchange(fimage1D, int(0), constVec2); + buf.resf16v2 += imageAtomicExchange(fimage1DArray, ivec2(0,0), constVec2); + buf.resf16v2 += imageAtomicExchange(fimage2D, ivec2(0,0), constVec2); + buf.resf16v2 += imageAtomicExchange(fimage2DArray, ivec3(0,0, 0), constVec2); + buf.resf16v2 += imageAtomicExchange(fimageCube, ivec3(0,0,0), constVec2); + buf.resf16v2 += imageAtomicExchange(fimageCubeArray, ivec3(0,0,0), constVec2); + buf.resf16v2 += imageAtomicExchange(fimage3D, ivec3(0,0,0), constVec2); + + // imageAtomic* functions supported with f16vec4 and only format supported is rgba16f + f16vec4 constVec4 = f16vec4(2.0); + buf.resf16v4 += imageAtomicAdd(fimage1Dv4, int(0), constVec4); + buf.resf16v4 += imageAtomicAdd(fimage1DArrayv4, ivec2(0,0), constVec4); + buf.resf16v4 += imageAtomicAdd(fimage2Dv4, ivec2(0,0), constVec4); + buf.resf16v4 += imageAtomicAdd(fimage2DArrayv4, ivec3(0,0, 0), constVec4); + buf.resf16v4 += imageAtomicAdd(fimageCubev4, ivec3(0,0,0), constVec4); + buf.resf16v4 += imageAtomicAdd(fimageCubeArrayv4, ivec3(0,0,0), constVec4); + buf.resf16v4 += imageAtomicAdd(fimage3Dv4, ivec3(0,0,0), constVec4); + + buf.resf16v4 += imageAtomicMin(fimage1Dv4, int(0), constVec4); + buf.resf16v4 += imageAtomicMin(fimage1DArrayv4, ivec2(0,0), constVec4); + buf.resf16v4 += imageAtomicMin(fimage2Dv4, ivec2(0,0), constVec4); + buf.resf16v4 += imageAtomicMin(fimage2DArrayv4, ivec3(0,0, 0), constVec4); + buf.resf16v4 += imageAtomicMin(fimageCubev4, ivec3(0,0,0), constVec4); + buf.resf16v4 += imageAtomicMin(fimageCubeArrayv4, ivec3(0,0,0), constVec4); + buf.resf16v4 += imageAtomicMin(fimage3Dv4, ivec3(0,0,0), constVec4); + + buf.resf16v4 += imageAtomicMax(fimage1Dv4, int(0), constVec4); + buf.resf16v4 += imageAtomicMax(fimage1DArrayv4, ivec2(0,0), constVec4); + buf.resf16v4 += imageAtomicMax(fimage2Dv4, ivec2(0,0), constVec4); + buf.resf16v4 += imageAtomicMax(fimage2DArrayv4, ivec3(0,0, 0), constVec4); + buf.resf16v4 += imageAtomicMax(fimageCubev4, ivec3(0,0,0), constVec4); + buf.resf16v4 += imageAtomicMax(fimageCubeArrayv4, ivec3(0,0,0), constVec4); + buf.resf16v4 += imageAtomicMax(fimage3Dv4, ivec3(0,0,0), constVec4); + + buf.resf16v4 += imageAtomicExchange(fimage1Dv4, int(0), constVec4); + buf.resf16v4 += imageAtomicExchange(fimage1DArrayv4, ivec2(0,0), constVec4); + buf.resf16v4 += imageAtomicExchange(fimage2Dv4, ivec2(0,0), constVec4); + buf.resf16v4 += imageAtomicExchange(fimage2DArrayv4, ivec3(0,0, 0), constVec4); + buf.resf16v4 += imageAtomicExchange(fimageCubev4, ivec3(0,0,0), constVec4); + buf.resf16v4 += imageAtomicExchange(fimageCubeArrayv4, ivec3(0,0,0), constVec4); + buf.resf16v4 += imageAtomicExchange(fimage3Dv4, ivec3(0,0,0), constVec4); +} diff --git a/glslang/MachineIndependent/Initialize.cpp b/glslang/MachineIndependent/Initialize.cpp index e74f9e7f7..054cf1d68 100755 --- a/glslang/MachineIndependent/Initialize.cpp +++ b/glslang/MachineIndependent/Initialize.cpp @@ -1465,6 +1465,20 @@ void TBuiltIns::initialize(int version, EProfile profile, const SpvVersion& spvV "\n"); } + // NV_shader_atomic_fp16_vector + if (profile != EEsProfile && version >= 430) { + commonBuiltins.append( + "f16vec2 atomicAdd(coherent volatile inout f16vec2, f16vec2);" + "f16vec4 atomicAdd(coherent volatile inout f16vec4, f16vec4);" + "f16vec2 atomicMin(coherent volatile inout f16vec2, f16vec2);" + "f16vec4 atomicMin(coherent volatile inout f16vec4, f16vec4);" + "f16vec2 atomicMax(coherent volatile inout f16vec2, f16vec2);" + "f16vec4 atomicMax(coherent volatile inout f16vec4, f16vec4);" + "f16vec2 atomicExchange(coherent volatile inout f16vec2, f16vec2);" + "f16vec4 atomicExchange(coherent volatile inout f16vec4, f16vec4);" + "\n"); + } + if ((profile == EEsProfile && version >= 300) || (profile != EEsProfile && version >= 150)) { // GL_ARB_shader_bit_encoding commonBuiltins.append( @@ -6678,6 +6692,34 @@ void TBuiltIns::addImageFunctions(TSampler sampler, const TString& typeName, int commonBuiltins.append(imageParams); commonBuiltins.append(", float);\n"); } + + // GL_NV_shader_atomic_fp16_vector + if (profile != EEsProfile && version >= 430) { + const int numFp16Builtins = 4; + const char* atomicFp16Func[numFp16Builtins] = { + " imageAtomicAdd(volatile coherent ", + " imageAtomicMin(volatile coherent ", + " imageAtomicMax(volatile coherent ", + " imageAtomicExchange(volatile coherent " + }; + const int numFp16DataTypes = 2; + const char* atomicFp16DataTypes[numFp16DataTypes] = { + "f16vec2", + "f16vec4" + }; + // Loop twice to add prototypes with/without scope/semantics + for (int j = 0; j < numFp16DataTypes; ++j) { + for (int i = 0; i < numFp16Builtins; ++i) { + commonBuiltins.append(atomicFp16DataTypes[j]); + commonBuiltins.append(atomicFp16Func[i]); + commonBuiltins.append(imageParams); + commonBuiltins.append(", "); + commonBuiltins.append(atomicFp16DataTypes[j]); + commonBuiltins.append(");\n"); + } + } + } + if (profile != EEsProfile && version >= 450) { commonBuiltins.append("float imageAtomicAdd(volatile coherent "); commonBuiltins.append(imageParams); diff --git a/glslang/MachineIndependent/ParseHelper.cpp b/glslang/MachineIndependent/ParseHelper.cpp index 98148100a..0462bf1ba 100644 --- a/glslang/MachineIndependent/ParseHelper.cpp +++ b/glslang/MachineIndependent/ParseHelper.cpp @@ -2524,6 +2524,17 @@ void TParseContext::builtInOpCheck(const TSourceLoc& loc, const TFunction& fnCan error(loc, "only supported on image with format r64i", fnCandidate.getName().c_str(), ""); else if (callNode.getType().getBasicType() == EbtUint64 && imageType.getQualifier().getFormat() != ElfR64ui) error(loc, "only supported on image with format r64ui", fnCandidate.getName().c_str(), ""); + } else if(callNode.getType().getBasicType() == EbtFloat16 && + ((callNode.getType().getVectorSize() == 2 && arg0->getType().getQualifier().getFormat() == ElfRg16f) || + (callNode.getType().getVectorSize() == 4 && arg0->getType().getQualifier().getFormat() == ElfRgba16f))) { + if ((fnCandidate.getName().compare(0, 14, "imageAtomicAdd") == 0) || + (fnCandidate.getName().compare(0, 19, "imageAtomicExchange") == 0) || + (fnCandidate.getName().compare(0, 19, "imageAtomicMin") == 0) || + (fnCandidate.getName().compare(0, 19, "imageAtomicMax") == 0)) { + requireExtensions(loc, 1, &E_GL_NV_shader_atomic_fp16_vector, fnCandidate.getName().c_str()); + } else { + error(loc, "f16vec2/4 operation not supported on: ", fnCandidate.getName().c_str(), ""); + } } else if (imageType.getSampler().type == EbtFloat) { if (fnCandidate.getName().compare(0, 19, "imageAtomicExchange") == 0) { // imageAtomicExchange doesn't require an extension @@ -2582,6 +2593,11 @@ void TParseContext::builtInOpCheck(const TSourceLoc& loc, const TFunction& fnCan const char* const extensions[2] = { E_GL_NV_shader_atomic_int64, E_GL_EXT_shader_atomic_int64 }; requireExtensions(loc, 2, extensions, fnCandidate.getName().c_str()); + } else if ((callNode.getOp() == EOpAtomicAdd || callNode.getOp() == EOpAtomicExchange || + callNode.getOp() == EOpAtomicMin || callNode.getOp() == EOpAtomicMax) && + arg0->getType().getBasicType() == EbtFloat16 && + (arg0->getType().getVectorSize() == 2 || arg0->getType().getVectorSize() == 4 )) { + requireExtensions(loc, 1, &E_GL_NV_shader_atomic_fp16_vector, fnCandidate.getName().c_str()); } else if ((callNode.getOp() == EOpAtomicAdd || callNode.getOp() == EOpAtomicExchange) && (arg0->getType().getBasicType() == EbtFloat || arg0->getType().getBasicType() == EbtDouble)) { diff --git a/glslang/MachineIndependent/Versions.cpp b/glslang/MachineIndependent/Versions.cpp index 37d46ed6f..978e25a18 100644 --- a/glslang/MachineIndependent/Versions.cpp +++ b/glslang/MachineIndependent/Versions.cpp @@ -305,6 +305,7 @@ void TParseVersions::initializeExtensionBehavior() extensionBehavior[E_GL_NV_integer_cooperative_matrix] = EBhDisable; extensionBehavior[E_GL_NV_shader_invocation_reorder] = EBhDisable; extensionBehavior[E_GL_NV_displacement_micromap] = EBhDisable; + extensionBehavior[E_GL_NV_shader_atomic_fp16_vector] = EBhDisable; // ARM extensionBehavior[E_GL_ARM_shader_core_builtins] = EBhDisable; diff --git a/glslang/MachineIndependent/Versions.h b/glslang/MachineIndependent/Versions.h index 2e420b5ab..70240ffb5 100755 --- a/glslang/MachineIndependent/Versions.h +++ b/glslang/MachineIndependent/Versions.h @@ -278,6 +278,7 @@ const char* const E_GL_NV_integer_cooperative_matrix = "GL_NV_integer const char* const E_GL_NV_shader_invocation_reorder = "GL_NV_shader_invocation_reorder"; const char* const E_GL_EXT_ray_tracing_position_fetch = "GL_EXT_ray_tracing_position_fetch"; const char* const E_GL_NV_displacement_micromap = "GL_NV_displacement_micromap"; +const char* const E_GL_NV_shader_atomic_fp16_vector = "GL_NV_shader_atomic_fp16_vector"; // ARM const char* const E_GL_ARM_shader_core_builtins = "GL_ARM_shader_core_builtins"; diff --git a/gtests/Spv.FromFile.cpp b/gtests/Spv.FromFile.cpp index 4754f659f..f6ddf138c 100644 --- a/gtests/Spv.FromFile.cpp +++ b/gtests/Spv.FromFile.cpp @@ -453,6 +453,7 @@ INSTANTIATE_TEST_SUITE_P( "spv.nonuniform4.frag", "spv.nonuniform5.frag", "spv.noWorkgroup.comp", + "spv.nvAtomicFp16Vec.frag", "spv.nullInit.comp", "spv.offsets.frag", "spv.Operations.frag", diff --git a/known_good.json b/known_good.json index 0c172a416..fe4ad29ec 100644 --- a/known_good.json +++ b/known_good.json @@ -1,19 +1,19 @@ { "commits" : [ { - "name" : "spirv-tools", - "site" : "github", - "subrepo" : "KhronosGroup/SPIRV-Tools", - "subdir" : "External/spirv-tools", - "commit": "b951948eaa75b51466eaa22e8a89223966f300e4" - }, - { - "name" : "spirv-tools/external/spirv-headers", - "site" : "github", - "subrepo" : "KhronosGroup/SPIRV-Headers", - "subdir" : "External/spirv-tools/external/spirv-headers", - "commit" : "5aa1dd8a11182ea9a6a0eabd6a9edc639d5dbecd" - }, + "name" : "spirv-tools", + "site" : "github", + "subrepo" : "KhronosGroup/SPIRV-Tools", + "subdir" : "External/spirv-tools", + "commit": "b0a5c4ac12b742086ffb16e2ba0ad4903450ae1d" + }, + { + "name" : "spirv-tools/external/spirv-headers", + "site" : "github", + "subrepo" : "KhronosGroup/SPIRV-Headers", + "subdir" : "External/spirv-tools/external/spirv-headers", + "commit" : "05cc486580771e4fa7ddc89f5c9ee1e97382689a" + }, { "name": "googletest", "site": "github",