NV_shader_atomic_fp16_vector

This commit is contained in:
Jeff Bolz 2024-02-02 12:36:16 -06:00 committed by arcady-lunarg
parent 9fd0fcd737
commit 48702616ec
12 changed files with 937 additions and 28 deletions

View File

@ -87,4 +87,7 @@ const char* const E_SPV_NV_shader_invocation_reorder = "SPV_NV_shader_invocation
//SPV_NV_displacement_micromap
const char* const E_SPV_NV_displacement_micromap = "SPV_NV_displacement_micromap";
//SPV_NV_shader_atomic_fp16_vector
const char* const E_SPV_NV_shader_atomic_fp16_vector = "SPV_NV_shader_atomic_fp16_vector";
#endif // #ifndef GLSLextNV_H

View File

@ -204,7 +204,8 @@ protected:
spv::Id createBinaryMatrixOperation(spv::Op, OpDecorations&, spv::Id typeId, spv::Id left, spv::Id right);
spv::Id createUnaryOperation(glslang::TOperator op, OpDecorations&, spv::Id typeId, spv::Id operand,
glslang::TBasicType typeProxy,
const spv::Builder::AccessChain::CoherentFlags &lvalueCoherentFlags);
const spv::Builder::AccessChain::CoherentFlags &lvalueCoherentFlags,
const glslang::TType &opType);
spv::Id createUnaryMatrixOperation(spv::Op op, OpDecorations&, spv::Id typeId, spv::Id operand,
glslang::TBasicType typeProxy);
spv::Id createConversion(glslang::TOperator op, OpDecorations&, spv::Id destTypeId, spv::Id operand,
@ -213,7 +214,8 @@ protected:
spv::Id makeSmearedConstant(spv::Id constant, int vectorSize);
spv::Id createAtomicOperation(glslang::TOperator op, spv::Decoration precision, spv::Id typeId,
std::vector<spv::Id>& operands, glslang::TBasicType typeProxy,
const spv::Builder::AccessChain::CoherentFlags &lvalueCoherentFlags);
const spv::Builder::AccessChain::CoherentFlags &lvalueCoherentFlags,
const glslang::TType &opType);
spv::Id createInvocationsOperation(glslang::TOperator op, spv::Id typeId, std::vector<spv::Id>& operands,
glslang::TBasicType typeProxy);
spv::Id CreateInvocationsVectorOperation(spv::Op op, spv::GroupOperation groupOperation,
@ -2692,7 +2694,7 @@ bool TGlslangToSpvTraverser::visitUnary(glslang::TVisit /* visit */, glslang::TI
// if not, then possibly an operation
if (! result)
result = createUnaryOperation(node->getOp(), decorations, resultType(), operand,
node->getOperand()->getBasicType(), lvalueCoherentFlags);
node->getOperand()->getBasicType(), lvalueCoherentFlags, node->getType());
// it could be attached to a SPIR-V intruction
if (!result) {
@ -3775,7 +3777,7 @@ bool TGlslangToSpvTraverser::visitAggregate(glslang::TVisit visit, glslang::TInt
glslang::TBasicType typeProxy = (node->getOp() == glslang::EOpAtomicStore)
? node->getSequence()[0]->getAsTyped()->getBasicType() : node->getBasicType();
result = createAtomicOperation(node->getOp(), precision, resultType(), operands, typeProxy,
lvalueCoherentFlags);
lvalueCoherentFlags, node->getType());
} else if (node->getOp() == glslang::EOpSpirvInst) {
const auto& spirvInst = node->getSpirvInstruction();
if (spirvInst.set == "") {
@ -3822,7 +3824,7 @@ bool TGlslangToSpvTraverser::visitAggregate(glslang::TVisit visit, glslang::TInt
result = createUnaryOperation(
node->getOp(), decorations,
resultType(), operands.front(),
glslangOperands[0]->getAsTyped()->getBasicType(), lvalueCoherentFlags);
glslangOperands[0]->getAsTyped()->getBasicType(), lvalueCoherentFlags, node->getType());
}
break;
default:
@ -6077,7 +6079,7 @@ spv::Id TGlslangToSpvTraverser::createImageTextureFunctionCall(glslang::TIntermO
operands.push_back(*opIt);
return createAtomicOperation(node->getOp(), precision, resultType(), operands, typeProxy,
lvalueCoherentFlags);
lvalueCoherentFlags, node->getType());
}
}
@ -6828,7 +6830,8 @@ spv::Id TGlslangToSpvTraverser::createBinaryMatrixOperation(spv::Op op, OpDecora
}
spv::Id TGlslangToSpvTraverser::createUnaryOperation(glslang::TOperator op, OpDecorations& decorations, spv::Id typeId,
spv::Id operand, glslang::TBasicType typeProxy, const spv::Builder::AccessChain::CoherentFlags &lvalueCoherentFlags)
spv::Id operand, glslang::TBasicType typeProxy, const spv::Builder::AccessChain::CoherentFlags &lvalueCoherentFlags,
const glslang::TType &opType)
{
spv::Op unaryOp = spv::OpNop;
int extBuiltins = -1;
@ -7116,7 +7119,7 @@ spv::Id TGlslangToSpvTraverser::createUnaryOperation(glslang::TOperator op, OpDe
// Handle all of the atomics in one place, in createAtomicOperation()
std::vector<spv::Id> operands;
operands.push_back(operand);
return createAtomicOperation(op, decorations.precision, typeId, operands, typeProxy, lvalueCoherentFlags);
return createAtomicOperation(op, decorations.precision, typeId, operands, typeProxy, lvalueCoherentFlags, opType);
}
case glslang::EOpBitFieldReverse:
@ -7834,7 +7837,7 @@ spv::Id TGlslangToSpvTraverser::makeSmearedConstant(spv::Id constant, int vector
// For glslang ops that map to SPV atomic opCodes
spv::Id TGlslangToSpvTraverser::createAtomicOperation(glslang::TOperator op, spv::Decoration /*precision*/,
spv::Id typeId, std::vector<spv::Id>& operands, glslang::TBasicType typeProxy,
const spv::Builder::AccessChain::CoherentFlags &lvalueCoherentFlags)
const spv::Builder::AccessChain::CoherentFlags &lvalueCoherentFlags, const glslang::TType &opType)
{
spv::Op opCode = spv::OpNop;
@ -7847,8 +7850,13 @@ spv::Id TGlslangToSpvTraverser::createAtomicOperation(glslang::TOperator op, spv
opCode = spv::OpAtomicFAddEXT;
builder.addExtension(spv::E_SPV_EXT_shader_atomic_float_add);
if (typeProxy == glslang::EbtFloat16) {
if (opType.getVectorSize() == 2 || opType.getVectorSize() == 4) {
builder.addExtension(spv::E_SPV_NV_shader_atomic_fp16_vector);
builder.addCapability(spv::CapabilityAtomicFloat16VectorNV);
} else {
builder.addExtension(spv::E_SPV_EXT_shader_atomic_float16_add);
builder.addCapability(spv::CapabilityAtomicFloat16AddEXT);
}
} else if (typeProxy == glslang::EbtFloat) {
builder.addCapability(spv::CapabilityAtomicFloat32AddEXT);
} else {
@ -7866,8 +7874,14 @@ spv::Id TGlslangToSpvTraverser::createAtomicOperation(glslang::TOperator op, spv
if (typeProxy == glslang::EbtFloat16 || typeProxy == glslang::EbtFloat || typeProxy == glslang::EbtDouble) {
opCode = spv::OpAtomicFMinEXT;
builder.addExtension(spv::E_SPV_EXT_shader_atomic_float_min_max);
if (typeProxy == glslang::EbtFloat16)
if (typeProxy == glslang::EbtFloat16) {
if (opType.getVectorSize() == 2 || opType.getVectorSize() == 4) {
builder.addExtension(spv::E_SPV_NV_shader_atomic_fp16_vector);
builder.addCapability(spv::CapabilityAtomicFloat16VectorNV);
} else {
builder.addCapability(spv::CapabilityAtomicFloat16MinMaxEXT);
}
}
else if (typeProxy == glslang::EbtFloat)
builder.addCapability(spv::CapabilityAtomicFloat32MinMaxEXT);
else
@ -7884,8 +7898,14 @@ spv::Id TGlslangToSpvTraverser::createAtomicOperation(glslang::TOperator op, spv
if (typeProxy == glslang::EbtFloat16 || typeProxy == glslang::EbtFloat || typeProxy == glslang::EbtDouble) {
opCode = spv::OpAtomicFMaxEXT;
builder.addExtension(spv::E_SPV_EXT_shader_atomic_float_min_max);
if (typeProxy == glslang::EbtFloat16)
if (typeProxy == glslang::EbtFloat16) {
if (opType.getVectorSize() == 2 || opType.getVectorSize() == 4) {
builder.addExtension(spv::E_SPV_NV_shader_atomic_fp16_vector);
builder.addCapability(spv::CapabilityAtomicFloat16VectorNV);
} else {
builder.addCapability(spv::CapabilityAtomicFloat16MinMaxEXT);
}
}
else if (typeProxy == glslang::EbtFloat)
builder.addCapability(spv::CapabilityAtomicFloat32MinMaxEXT);
else
@ -7914,6 +7934,12 @@ spv::Id TGlslangToSpvTraverser::createAtomicOperation(glslang::TOperator op, spv
case glslang::EOpAtomicExchange:
case glslang::EOpImageAtomicExchange:
case glslang::EOpAtomicCounterExchange:
if ((typeProxy == glslang::EbtFloat16) &&
(opType.getVectorSize() == 2 || opType.getVectorSize() == 4)) {
builder.addExtension(spv::E_SPV_NV_shader_atomic_fp16_vector);
builder.addCapability(spv::CapabilityAtomicFloat16VectorNV);
}
opCode = spv::OpAtomicExchange;
break;
case glslang::EOpAtomicCompSwap:

View File

@ -1037,6 +1037,7 @@ const char* CapabilityString(int info)
case CapabilityFragmentShadingRateKHR: return "FragmentShadingRateKHR";
case CapabilityDemoteToHelperInvocationEXT: return "DemoteToHelperInvocationEXT";
case CapabilityAtomicFloat16VectorNV: return "AtomicFloat16VectorNV";
case CapabilityShaderClockKHR: return "ShaderClockKHR";
case CapabilityQuadControlKHR: return "QuadControlKHR";
case CapabilityInt64ImageEXT: return "Int64ImageEXT";

View File

@ -1108,6 +1108,7 @@ enum Capability {
CapabilityShaderInvocationReorderNV = 5383,
CapabilityBindlessTextureNV = 5390,
CapabilityRayQueryPositionFetchKHR = 5391,
CapabilityAtomicFloat16VectorNV = 5404,
CapabilitySubgroupShuffleINTEL = 5568,
CapabilitySubgroupBufferBlockIOINTEL = 5569,
CapabilitySubgroupImageBlockIOINTEL = 5570,

View File

@ -0,0 +1,704 @@
spv.nvAtomicFp16Vec.frag
// Module Version 10000
// Generated by (magic number): 8000b
// Id's are bound by 515
Capability Shader
Capability Float16
Capability ImageCubeArray
Capability Image1D
Capability StorageImageExtendedFormats
Capability StorageUniformBufferBlock16
Capability AtomicFloat16VectorNV
Extension "SPV_EXT_shader_atomic_float_add"
Extension "SPV_EXT_shader_atomic_float_min_max"
Extension "SPV_KHR_16bit_storage"
Extension "SPV_NV_shader_atomic_fp16_vector"
1: ExtInstImport "GLSL.std.450"
MemoryModel Logical GLSL450
EntryPoint Fragment 4 "main"
ExecutionMode 4 OriginUpperLeft
Source GLSL 430
SourceExtension "GL_EXT_shader_explicit_arithmetic_types_float16"
SourceExtension "GL_NV_shader_atomic_fp16_vector"
Name 4 "main"
Name 9 "Buffer"
MemberName 9(Buffer) 0 "dataf16v2"
MemberName 9(Buffer) 1 "dataf16v4"
MemberName 9(Buffer) 2 "resf16v2"
MemberName 9(Buffer) 3 "resf16v4"
Name 11 "buf"
Name 68 "constVec2"
Name 74 "fimage1D"
Name 85 "fimage1DArray"
Name 97 "fimage2D"
Name 107 "fimage2DArray"
Name 119 "fimageCube"
Name 129 "fimageCubeArray"
Name 139 "fimage3D"
Name 295 "constVec4"
Name 299 "fimage1Dv4"
Name 310 "fimage1DArrayv4"
Name 320 "fimage2Dv4"
Name 330 "fimage2DArrayv4"
Name 340 "fimageCubev4"
Name 350 "fimageCubeArrayv4"
Name 360 "fimage3Dv4"
MemberDecorate 9(Buffer) 0 Offset 0
MemberDecorate 9(Buffer) 1 Offset 8
MemberDecorate 9(Buffer) 2 Offset 16
MemberDecorate 9(Buffer) 3 Offset 24
Decorate 9(Buffer) BufferBlock
Decorate 11(buf) DescriptorSet 0
Decorate 11(buf) Binding 0
Decorate 74(fimage1D) DescriptorSet 0
Decorate 74(fimage1D) Binding 0
Decorate 74(fimage1D) Coherent
Decorate 74(fimage1D) Volatile
Decorate 74(fimage1D) Coherent
Decorate 85(fimage1DArray) DescriptorSet 0
Decorate 85(fimage1DArray) Binding 1
Decorate 85(fimage1DArray) Coherent
Decorate 85(fimage1DArray) Volatile
Decorate 85(fimage1DArray) Coherent
Decorate 97(fimage2D) DescriptorSet 0
Decorate 97(fimage2D) Binding 2
Decorate 97(fimage2D) Coherent
Decorate 97(fimage2D) Volatile
Decorate 97(fimage2D) Coherent
Decorate 107(fimage2DArray) DescriptorSet 0
Decorate 107(fimage2DArray) Binding 3
Decorate 107(fimage2DArray) Coherent
Decorate 107(fimage2DArray) Volatile
Decorate 107(fimage2DArray) Coherent
Decorate 119(fimageCube) DescriptorSet 0
Decorate 119(fimageCube) Binding 5
Decorate 119(fimageCube) Coherent
Decorate 119(fimageCube) Volatile
Decorate 119(fimageCube) Coherent
Decorate 129(fimageCubeArray) DescriptorSet 0
Decorate 129(fimageCubeArray) Binding 6
Decorate 129(fimageCubeArray) Coherent
Decorate 129(fimageCubeArray) Volatile
Decorate 129(fimageCubeArray) Coherent
Decorate 139(fimage3D) DescriptorSet 0
Decorate 139(fimage3D) Binding 9
Decorate 139(fimage3D) Coherent
Decorate 139(fimage3D) Volatile
Decorate 139(fimage3D) Coherent
Decorate 299(fimage1Dv4) DescriptorSet 0
Decorate 299(fimage1Dv4) Binding 10
Decorate 299(fimage1Dv4) Coherent
Decorate 299(fimage1Dv4) Volatile
Decorate 299(fimage1Dv4) Coherent
Decorate 310(fimage1DArrayv4) DescriptorSet 0
Decorate 310(fimage1DArrayv4) Binding 11
Decorate 310(fimage1DArrayv4) Coherent
Decorate 310(fimage1DArrayv4) Volatile
Decorate 310(fimage1DArrayv4) Coherent
Decorate 320(fimage2Dv4) DescriptorSet 0
Decorate 320(fimage2Dv4) Binding 12
Decorate 320(fimage2Dv4) Coherent
Decorate 320(fimage2Dv4) Volatile
Decorate 320(fimage2Dv4) Coherent
Decorate 330(fimage2DArrayv4) DescriptorSet 0
Decorate 330(fimage2DArrayv4) Binding 13
Decorate 330(fimage2DArrayv4) Coherent
Decorate 330(fimage2DArrayv4) Volatile
Decorate 330(fimage2DArrayv4) Coherent
Decorate 340(fimageCubev4) DescriptorSet 0
Decorate 340(fimageCubev4) Binding 15
Decorate 340(fimageCubev4) Coherent
Decorate 340(fimageCubev4) Volatile
Decorate 340(fimageCubev4) Coherent
Decorate 350(fimageCubeArrayv4) DescriptorSet 0
Decorate 350(fimageCubeArrayv4) Binding 16
Decorate 350(fimageCubeArrayv4) Coherent
Decorate 350(fimageCubeArrayv4) Volatile
Decorate 350(fimageCubeArrayv4) Coherent
Decorate 360(fimage3Dv4) DescriptorSet 0
Decorate 360(fimage3Dv4) Binding 19
Decorate 360(fimage3Dv4) Coherent
Decorate 360(fimage3Dv4) Volatile
Decorate 360(fimage3Dv4) Coherent
2: TypeVoid
3: TypeFunction 2
6: TypeFloat 16
7: TypeVector 6(float16_t) 2
8: TypeVector 6(float16_t) 4
9(Buffer): TypeStruct 7(f16vec2) 8(f16vec4) 7(f16vec2) 8(f16vec4)
10: TypePointer Uniform 9(Buffer)
11(buf): 10(ptr) Variable Uniform
12: TypeInt 32 1
13: 12(int) Constant 2
14: 12(int) Constant 0
15: TypePointer Uniform 7(f16vec2)
17:6(float16_t) Constant 16896
18: 7(f16vec2) ConstantComposite 17 17
19: TypeInt 32 0
20: 19(int) Constant 1
21: 19(int) Constant 0
42: 12(int) Constant 3
43: 12(int) Constant 1
44: TypePointer Uniform 8(f16vec4)
46: 8(f16vec4) ConstantComposite 17 17 17 17
67: TypePointer Function 7(f16vec2)
69:6(float16_t) Constant 16384
70: 7(f16vec2) ConstantComposite 69 69
71: TypeFloat 32
72: TypeImage 71(float) 1D nonsampled format:Rg16f
73: TypePointer UniformConstant 72
74(fimage1D): 73(ptr) Variable UniformConstant
76: TypePointer Image 7(f16vec2)
83: TypeImage 71(float) 1D array nonsampled format:Rg16f
84: TypePointer UniformConstant 83
85(fimage1DArray): 84(ptr) Variable UniformConstant
86: TypeVector 12(int) 2
87: 86(ivec2) ConstantComposite 14 14
95: TypeImage 71(float) 2D nonsampled format:Rg16f
96: TypePointer UniformConstant 95
97(fimage2D): 96(ptr) Variable UniformConstant
105: TypeImage 71(float) 2D array nonsampled format:Rg16f
106: TypePointer UniformConstant 105
107(fimage2DArray): 106(ptr) Variable UniformConstant
108: TypeVector 12(int) 3
109: 108(ivec3) ConstantComposite 14 14 14
117: TypeImage 71(float) Cube nonsampled format:Rg16f
118: TypePointer UniformConstant 117
119(fimageCube): 118(ptr) Variable UniformConstant
127: TypeImage 71(float) Cube array nonsampled format:Rg16f
128: TypePointer UniformConstant 127
129(fimageCubeArray): 128(ptr) Variable UniformConstant
137: TypeImage 71(float) 3D nonsampled format:Rg16f
138: TypePointer UniformConstant 137
139(fimage3D): 138(ptr) Variable UniformConstant
294: TypePointer Function 8(f16vec4)
296: 8(f16vec4) ConstantComposite 69 69 69 69
297: TypeImage 71(float) 1D nonsampled format:Rgba16f
298: TypePointer UniformConstant 297
299(fimage1Dv4): 298(ptr) Variable UniformConstant
301: TypePointer Image 8(f16vec4)
308: TypeImage 71(float) 1D array nonsampled format:Rgba16f
309: TypePointer UniformConstant 308
310(fimage1DArrayv4): 309(ptr) Variable UniformConstant
318: TypeImage 71(float) 2D nonsampled format:Rgba16f
319: TypePointer UniformConstant 318
320(fimage2Dv4): 319(ptr) Variable UniformConstant
328: TypeImage 71(float) 2D array nonsampled format:Rgba16f
329: TypePointer UniformConstant 328
330(fimage2DArrayv4): 329(ptr) Variable UniformConstant
338: TypeImage 71(float) Cube nonsampled format:Rgba16f
339: TypePointer UniformConstant 338
340(fimageCubev4): 339(ptr) Variable UniformConstant
348: TypeImage 71(float) Cube array nonsampled format:Rgba16f
349: TypePointer UniformConstant 348
350(fimageCubeArrayv4): 349(ptr) Variable UniformConstant
358: TypeImage 71(float) 3D nonsampled format:Rgba16f
359: TypePointer UniformConstant 358
360(fimage3Dv4): 359(ptr) Variable UniformConstant
4(main): 2 Function None 3
5: Label
68(constVec2): 67(ptr) Variable Function
295(constVec4): 294(ptr) Variable Function
16: 15(ptr) AccessChain 11(buf) 14
22: 7(f16vec2) AtomicFAddEXT 16 20 21 18
23: 15(ptr) AccessChain 11(buf) 13
Store 23 22
24: 15(ptr) AccessChain 11(buf) 14
25: 7(f16vec2) AtomicFMinEXT 24 20 21 18
26: 15(ptr) AccessChain 11(buf) 13
27: 7(f16vec2) Load 26
28: 7(f16vec2) FAdd 27 25
29: 15(ptr) AccessChain 11(buf) 13
Store 29 28
30: 15(ptr) AccessChain 11(buf) 14
31: 7(f16vec2) AtomicFMaxEXT 30 20 21 18
32: 15(ptr) AccessChain 11(buf) 13
33: 7(f16vec2) Load 32
34: 7(f16vec2) FAdd 33 31
35: 15(ptr) AccessChain 11(buf) 13
Store 35 34
36: 15(ptr) AccessChain 11(buf) 14
37: 7(f16vec2) AtomicExchange 36 20 21 18
38: 15(ptr) AccessChain 11(buf) 13
39: 7(f16vec2) Load 38
40: 7(f16vec2) FAdd 39 37
41: 15(ptr) AccessChain 11(buf) 13
Store 41 40
45: 44(ptr) AccessChain 11(buf) 43
47: 8(f16vec4) AtomicFAddEXT 45 20 21 46
48: 44(ptr) AccessChain 11(buf) 42
Store 48 47
49: 44(ptr) AccessChain 11(buf) 43
50: 8(f16vec4) AtomicFMinEXT 49 20 21 46
51: 44(ptr) AccessChain 11(buf) 42
52: 8(f16vec4) Load 51
53: 8(f16vec4) FAdd 52 50
54: 44(ptr) AccessChain 11(buf) 42
Store 54 53
55: 44(ptr) AccessChain 11(buf) 43
56: 8(f16vec4) AtomicFMaxEXT 55 20 21 46
57: 44(ptr) AccessChain 11(buf) 42
58: 8(f16vec4) Load 57
59: 8(f16vec4) FAdd 58 56
60: 44(ptr) AccessChain 11(buf) 42
Store 60 59
61: 44(ptr) AccessChain 11(buf) 43
62: 8(f16vec4) AtomicExchange 61 20 21 46
63: 44(ptr) AccessChain 11(buf) 42
64: 8(f16vec4) Load 63
65: 8(f16vec4) FAdd 64 62
66: 44(ptr) AccessChain 11(buf) 42
Store 66 65
Store 68(constVec2) 70
75: 7(f16vec2) Load 68(constVec2)
77: 76(ptr) ImageTexelPointer 74(fimage1D) 14 21
78: 7(f16vec2) AtomicFAddEXT 77 20 21 75
79: 15(ptr) AccessChain 11(buf) 13
80: 7(f16vec2) Load 79
81: 7(f16vec2) FAdd 80 78
82: 15(ptr) AccessChain 11(buf) 13
Store 82 81
88: 7(f16vec2) Load 68(constVec2)
89: 76(ptr) ImageTexelPointer 85(fimage1DArray) 87 21
90: 7(f16vec2) AtomicFAddEXT 89 20 21 88
91: 15(ptr) AccessChain 11(buf) 13
92: 7(f16vec2) Load 91
93: 7(f16vec2) FAdd 92 90
94: 15(ptr) AccessChain 11(buf) 13
Store 94 93
98: 7(f16vec2) Load 68(constVec2)
99: 76(ptr) ImageTexelPointer 97(fimage2D) 87 21
100: 7(f16vec2) AtomicFAddEXT 99 20 21 98
101: 15(ptr) AccessChain 11(buf) 13
102: 7(f16vec2) Load 101
103: 7(f16vec2) FAdd 102 100
104: 15(ptr) AccessChain 11(buf) 13
Store 104 103
110: 7(f16vec2) Load 68(constVec2)
111: 76(ptr) ImageTexelPointer 107(fimage2DArray) 109 21
112: 7(f16vec2) AtomicFAddEXT 111 20 21 110
113: 15(ptr) AccessChain 11(buf) 13
114: 7(f16vec2) Load 113
115: 7(f16vec2) FAdd 114 112
116: 15(ptr) AccessChain 11(buf) 13
Store 116 115
120: 7(f16vec2) Load 68(constVec2)
121: 76(ptr) ImageTexelPointer 119(fimageCube) 109 21
122: 7(f16vec2) AtomicFAddEXT 121 20 21 120
123: 15(ptr) AccessChain 11(buf) 13
124: 7(f16vec2) Load 123
125: 7(f16vec2) FAdd 124 122
126: 15(ptr) AccessChain 11(buf) 13
Store 126 125
130: 7(f16vec2) Load 68(constVec2)
131: 76(ptr) ImageTexelPointer 129(fimageCubeArray) 109 21
132: 7(f16vec2) AtomicFAddEXT 131 20 21 130
133: 15(ptr) AccessChain 11(buf) 13
134: 7(f16vec2) Load 133
135: 7(f16vec2) FAdd 134 132
136: 15(ptr) AccessChain 11(buf) 13
Store 136 135
140: 7(f16vec2) Load 68(constVec2)
141: 76(ptr) ImageTexelPointer 139(fimage3D) 109 21
142: 7(f16vec2) AtomicFAddEXT 141 20 21 140
143: 15(ptr) AccessChain 11(buf) 13
144: 7(f16vec2) Load 143
145: 7(f16vec2) FAdd 144 142
146: 15(ptr) AccessChain 11(buf) 13
Store 146 145
147: 7(f16vec2) Load 68(constVec2)
148: 76(ptr) ImageTexelPointer 74(fimage1D) 14 21
149: 7(f16vec2) AtomicFMinEXT 148 20 21 147
150: 15(ptr) AccessChain 11(buf) 13
151: 7(f16vec2) Load 150
152: 7(f16vec2) FAdd 151 149
153: 15(ptr) AccessChain 11(buf) 13
Store 153 152
154: 7(f16vec2) Load 68(constVec2)
155: 76(ptr) ImageTexelPointer 85(fimage1DArray) 87 21
156: 7(f16vec2) AtomicFMinEXT 155 20 21 154
157: 15(ptr) AccessChain 11(buf) 13
158: 7(f16vec2) Load 157
159: 7(f16vec2) FAdd 158 156
160: 15(ptr) AccessChain 11(buf) 13
Store 160 159
161: 7(f16vec2) Load 68(constVec2)
162: 76(ptr) ImageTexelPointer 97(fimage2D) 87 21
163: 7(f16vec2) AtomicFMinEXT 162 20 21 161
164: 15(ptr) AccessChain 11(buf) 13
165: 7(f16vec2) Load 164
166: 7(f16vec2) FAdd 165 163
167: 15(ptr) AccessChain 11(buf) 13
Store 167 166
168: 7(f16vec2) Load 68(constVec2)
169: 76(ptr) ImageTexelPointer 107(fimage2DArray) 109 21
170: 7(f16vec2) AtomicFMinEXT 169 20 21 168
171: 15(ptr) AccessChain 11(buf) 13
172: 7(f16vec2) Load 171
173: 7(f16vec2) FAdd 172 170
174: 15(ptr) AccessChain 11(buf) 13
Store 174 173
175: 7(f16vec2) Load 68(constVec2)
176: 76(ptr) ImageTexelPointer 119(fimageCube) 109 21
177: 7(f16vec2) AtomicFMinEXT 176 20 21 175
178: 15(ptr) AccessChain 11(buf) 13
179: 7(f16vec2) Load 178
180: 7(f16vec2) FAdd 179 177
181: 15(ptr) AccessChain 11(buf) 13
Store 181 180
182: 7(f16vec2) Load 68(constVec2)
183: 76(ptr) ImageTexelPointer 129(fimageCubeArray) 109 21
184: 7(f16vec2) AtomicFMinEXT 183 20 21 182
185: 15(ptr) AccessChain 11(buf) 13
186: 7(f16vec2) Load 185
187: 7(f16vec2) FAdd 186 184
188: 15(ptr) AccessChain 11(buf) 13
Store 188 187
189: 7(f16vec2) Load 68(constVec2)
190: 76(ptr) ImageTexelPointer 139(fimage3D) 109 21
191: 7(f16vec2) AtomicFMinEXT 190 20 21 189
192: 15(ptr) AccessChain 11(buf) 13
193: 7(f16vec2) Load 192
194: 7(f16vec2) FAdd 193 191
195: 15(ptr) AccessChain 11(buf) 13
Store 195 194
196: 7(f16vec2) Load 68(constVec2)
197: 76(ptr) ImageTexelPointer 74(fimage1D) 14 21
198: 7(f16vec2) AtomicFMaxEXT 197 20 21 196
199: 15(ptr) AccessChain 11(buf) 13
200: 7(f16vec2) Load 199
201: 7(f16vec2) FAdd 200 198
202: 15(ptr) AccessChain 11(buf) 13
Store 202 201
203: 7(f16vec2) Load 68(constVec2)
204: 76(ptr) ImageTexelPointer 85(fimage1DArray) 87 21
205: 7(f16vec2) AtomicFMaxEXT 204 20 21 203
206: 15(ptr) AccessChain 11(buf) 13
207: 7(f16vec2) Load 206
208: 7(f16vec2) FAdd 207 205
209: 15(ptr) AccessChain 11(buf) 13
Store 209 208
210: 7(f16vec2) Load 68(constVec2)
211: 76(ptr) ImageTexelPointer 97(fimage2D) 87 21
212: 7(f16vec2) AtomicFMaxEXT 211 20 21 210
213: 15(ptr) AccessChain 11(buf) 13
214: 7(f16vec2) Load 213
215: 7(f16vec2) FAdd 214 212
216: 15(ptr) AccessChain 11(buf) 13
Store 216 215
217: 7(f16vec2) Load 68(constVec2)
218: 76(ptr) ImageTexelPointer 107(fimage2DArray) 109 21
219: 7(f16vec2) AtomicFMaxEXT 218 20 21 217
220: 15(ptr) AccessChain 11(buf) 13
221: 7(f16vec2) Load 220
222: 7(f16vec2) FAdd 221 219
223: 15(ptr) AccessChain 11(buf) 13
Store 223 222
224: 7(f16vec2) Load 68(constVec2)
225: 76(ptr) ImageTexelPointer 119(fimageCube) 109 21
226: 7(f16vec2) AtomicFMaxEXT 225 20 21 224
227: 15(ptr) AccessChain 11(buf) 13
228: 7(f16vec2) Load 227
229: 7(f16vec2) FAdd 228 226
230: 15(ptr) AccessChain 11(buf) 13
Store 230 229
231: 7(f16vec2) Load 68(constVec2)
232: 76(ptr) ImageTexelPointer 129(fimageCubeArray) 109 21
233: 7(f16vec2) AtomicFMaxEXT 232 20 21 231
234: 15(ptr) AccessChain 11(buf) 13
235: 7(f16vec2) Load 234
236: 7(f16vec2) FAdd 235 233
237: 15(ptr) AccessChain 11(buf) 13
Store 237 236
238: 7(f16vec2) Load 68(constVec2)
239: 76(ptr) ImageTexelPointer 139(fimage3D) 109 21
240: 7(f16vec2) AtomicFMaxEXT 239 20 21 238
241: 15(ptr) AccessChain 11(buf) 13
242: 7(f16vec2) Load 241
243: 7(f16vec2) FAdd 242 240
244: 15(ptr) AccessChain 11(buf) 13
Store 244 243
245: 7(f16vec2) Load 68(constVec2)
246: 76(ptr) ImageTexelPointer 74(fimage1D) 14 21
247: 7(f16vec2) AtomicExchange 246 20 21 245
248: 15(ptr) AccessChain 11(buf) 13
249: 7(f16vec2) Load 248
250: 7(f16vec2) FAdd 249 247
251: 15(ptr) AccessChain 11(buf) 13
Store 251 250
252: 7(f16vec2) Load 68(constVec2)
253: 76(ptr) ImageTexelPointer 85(fimage1DArray) 87 21
254: 7(f16vec2) AtomicExchange 253 20 21 252
255: 15(ptr) AccessChain 11(buf) 13
256: 7(f16vec2) Load 255
257: 7(f16vec2) FAdd 256 254
258: 15(ptr) AccessChain 11(buf) 13
Store 258 257
259: 7(f16vec2) Load 68(constVec2)
260: 76(ptr) ImageTexelPointer 97(fimage2D) 87 21
261: 7(f16vec2) AtomicExchange 260 20 21 259
262: 15(ptr) AccessChain 11(buf) 13
263: 7(f16vec2) Load 262
264: 7(f16vec2) FAdd 263 261
265: 15(ptr) AccessChain 11(buf) 13
Store 265 264
266: 7(f16vec2) Load 68(constVec2)
267: 76(ptr) ImageTexelPointer 107(fimage2DArray) 109 21
268: 7(f16vec2) AtomicExchange 267 20 21 266
269: 15(ptr) AccessChain 11(buf) 13
270: 7(f16vec2) Load 269
271: 7(f16vec2) FAdd 270 268
272: 15(ptr) AccessChain 11(buf) 13
Store 272 271
273: 7(f16vec2) Load 68(constVec2)
274: 76(ptr) ImageTexelPointer 119(fimageCube) 109 21
275: 7(f16vec2) AtomicExchange 274 20 21 273
276: 15(ptr) AccessChain 11(buf) 13
277: 7(f16vec2) Load 276
278: 7(f16vec2) FAdd 277 275
279: 15(ptr) AccessChain 11(buf) 13
Store 279 278
280: 7(f16vec2) Load 68(constVec2)
281: 76(ptr) ImageTexelPointer 129(fimageCubeArray) 109 21
282: 7(f16vec2) AtomicExchange 281 20 21 280
283: 15(ptr) AccessChain 11(buf) 13
284: 7(f16vec2) Load 283
285: 7(f16vec2) FAdd 284 282
286: 15(ptr) AccessChain 11(buf) 13
Store 286 285
287: 7(f16vec2) Load 68(constVec2)
288: 76(ptr) ImageTexelPointer 139(fimage3D) 109 21
289: 7(f16vec2) AtomicExchange 288 20 21 287
290: 15(ptr) AccessChain 11(buf) 13
291: 7(f16vec2) Load 290
292: 7(f16vec2) FAdd 291 289
293: 15(ptr) AccessChain 11(buf) 13
Store 293 292
Store 295(constVec4) 296
300: 8(f16vec4) Load 295(constVec4)
302: 301(ptr) ImageTexelPointer 299(fimage1Dv4) 14 21
303: 8(f16vec4) AtomicFAddEXT 302 20 21 300
304: 44(ptr) AccessChain 11(buf) 42
305: 8(f16vec4) Load 304
306: 8(f16vec4) FAdd 305 303
307: 44(ptr) AccessChain 11(buf) 42
Store 307 306
311: 8(f16vec4) Load 295(constVec4)
312: 301(ptr) ImageTexelPointer 310(fimage1DArrayv4) 87 21
313: 8(f16vec4) AtomicFAddEXT 312 20 21 311
314: 44(ptr) AccessChain 11(buf) 42
315: 8(f16vec4) Load 314
316: 8(f16vec4) FAdd 315 313
317: 44(ptr) AccessChain 11(buf) 42
Store 317 316
321: 8(f16vec4) Load 295(constVec4)
322: 301(ptr) ImageTexelPointer 320(fimage2Dv4) 87 21
323: 8(f16vec4) AtomicFAddEXT 322 20 21 321
324: 44(ptr) AccessChain 11(buf) 42
325: 8(f16vec4) Load 324
326: 8(f16vec4) FAdd 325 323
327: 44(ptr) AccessChain 11(buf) 42
Store 327 326
331: 8(f16vec4) Load 295(constVec4)
332: 301(ptr) ImageTexelPointer 330(fimage2DArrayv4) 109 21
333: 8(f16vec4) AtomicFAddEXT 332 20 21 331
334: 44(ptr) AccessChain 11(buf) 42
335: 8(f16vec4) Load 334
336: 8(f16vec4) FAdd 335 333
337: 44(ptr) AccessChain 11(buf) 42
Store 337 336
341: 8(f16vec4) Load 295(constVec4)
342: 301(ptr) ImageTexelPointer 340(fimageCubev4) 109 21
343: 8(f16vec4) AtomicFAddEXT 342 20 21 341
344: 44(ptr) AccessChain 11(buf) 42
345: 8(f16vec4) Load 344
346: 8(f16vec4) FAdd 345 343
347: 44(ptr) AccessChain 11(buf) 42
Store 347 346
351: 8(f16vec4) Load 295(constVec4)
352: 301(ptr) ImageTexelPointer 350(fimageCubeArrayv4) 109 21
353: 8(f16vec4) AtomicFAddEXT 352 20 21 351
354: 44(ptr) AccessChain 11(buf) 42
355: 8(f16vec4) Load 354
356: 8(f16vec4) FAdd 355 353
357: 44(ptr) AccessChain 11(buf) 42
Store 357 356
361: 8(f16vec4) Load 295(constVec4)
362: 301(ptr) ImageTexelPointer 360(fimage3Dv4) 109 21
363: 8(f16vec4) AtomicFAddEXT 362 20 21 361
364: 44(ptr) AccessChain 11(buf) 42
365: 8(f16vec4) Load 364
366: 8(f16vec4) FAdd 365 363
367: 44(ptr) AccessChain 11(buf) 42
Store 367 366
368: 8(f16vec4) Load 295(constVec4)
369: 301(ptr) ImageTexelPointer 299(fimage1Dv4) 14 21
370: 8(f16vec4) AtomicFMinEXT 369 20 21 368
371: 44(ptr) AccessChain 11(buf) 42
372: 8(f16vec4) Load 371
373: 8(f16vec4) FAdd 372 370
374: 44(ptr) AccessChain 11(buf) 42
Store 374 373
375: 8(f16vec4) Load 295(constVec4)
376: 301(ptr) ImageTexelPointer 310(fimage1DArrayv4) 87 21
377: 8(f16vec4) AtomicFMinEXT 376 20 21 375
378: 44(ptr) AccessChain 11(buf) 42
379: 8(f16vec4) Load 378
380: 8(f16vec4) FAdd 379 377
381: 44(ptr) AccessChain 11(buf) 42
Store 381 380
382: 8(f16vec4) Load 295(constVec4)
383: 301(ptr) ImageTexelPointer 320(fimage2Dv4) 87 21
384: 8(f16vec4) AtomicFMinEXT 383 20 21 382
385: 44(ptr) AccessChain 11(buf) 42
386: 8(f16vec4) Load 385
387: 8(f16vec4) FAdd 386 384
388: 44(ptr) AccessChain 11(buf) 42
Store 388 387
389: 8(f16vec4) Load 295(constVec4)
390: 301(ptr) ImageTexelPointer 330(fimage2DArrayv4) 109 21
391: 8(f16vec4) AtomicFMinEXT 390 20 21 389
392: 44(ptr) AccessChain 11(buf) 42
393: 8(f16vec4) Load 392
394: 8(f16vec4) FAdd 393 391
395: 44(ptr) AccessChain 11(buf) 42
Store 395 394
396: 8(f16vec4) Load 295(constVec4)
397: 301(ptr) ImageTexelPointer 340(fimageCubev4) 109 21
398: 8(f16vec4) AtomicFMinEXT 397 20 21 396
399: 44(ptr) AccessChain 11(buf) 42
400: 8(f16vec4) Load 399
401: 8(f16vec4) FAdd 400 398
402: 44(ptr) AccessChain 11(buf) 42
Store 402 401
403: 8(f16vec4) Load 295(constVec4)
404: 301(ptr) ImageTexelPointer 350(fimageCubeArrayv4) 109 21
405: 8(f16vec4) AtomicFMinEXT 404 20 21 403
406: 44(ptr) AccessChain 11(buf) 42
407: 8(f16vec4) Load 406
408: 8(f16vec4) FAdd 407 405
409: 44(ptr) AccessChain 11(buf) 42
Store 409 408
410: 8(f16vec4) Load 295(constVec4)
411: 301(ptr) ImageTexelPointer 360(fimage3Dv4) 109 21
412: 8(f16vec4) AtomicFMinEXT 411 20 21 410
413: 44(ptr) AccessChain 11(buf) 42
414: 8(f16vec4) Load 413
415: 8(f16vec4) FAdd 414 412
416: 44(ptr) AccessChain 11(buf) 42
Store 416 415
417: 8(f16vec4) Load 295(constVec4)
418: 301(ptr) ImageTexelPointer 299(fimage1Dv4) 14 21
419: 8(f16vec4) AtomicFMaxEXT 418 20 21 417
420: 44(ptr) AccessChain 11(buf) 42
421: 8(f16vec4) Load 420
422: 8(f16vec4) FAdd 421 419
423: 44(ptr) AccessChain 11(buf) 42
Store 423 422
424: 8(f16vec4) Load 295(constVec4)
425: 301(ptr) ImageTexelPointer 310(fimage1DArrayv4) 87 21
426: 8(f16vec4) AtomicFMaxEXT 425 20 21 424
427: 44(ptr) AccessChain 11(buf) 42
428: 8(f16vec4) Load 427
429: 8(f16vec4) FAdd 428 426
430: 44(ptr) AccessChain 11(buf) 42
Store 430 429
431: 8(f16vec4) Load 295(constVec4)
432: 301(ptr) ImageTexelPointer 320(fimage2Dv4) 87 21
433: 8(f16vec4) AtomicFMaxEXT 432 20 21 431
434: 44(ptr) AccessChain 11(buf) 42
435: 8(f16vec4) Load 434
436: 8(f16vec4) FAdd 435 433
437: 44(ptr) AccessChain 11(buf) 42
Store 437 436
438: 8(f16vec4) Load 295(constVec4)
439: 301(ptr) ImageTexelPointer 330(fimage2DArrayv4) 109 21
440: 8(f16vec4) AtomicFMaxEXT 439 20 21 438
441: 44(ptr) AccessChain 11(buf) 42
442: 8(f16vec4) Load 441
443: 8(f16vec4) FAdd 442 440
444: 44(ptr) AccessChain 11(buf) 42
Store 444 443
445: 8(f16vec4) Load 295(constVec4)
446: 301(ptr) ImageTexelPointer 340(fimageCubev4) 109 21
447: 8(f16vec4) AtomicFMaxEXT 446 20 21 445
448: 44(ptr) AccessChain 11(buf) 42
449: 8(f16vec4) Load 448
450: 8(f16vec4) FAdd 449 447
451: 44(ptr) AccessChain 11(buf) 42
Store 451 450
452: 8(f16vec4) Load 295(constVec4)
453: 301(ptr) ImageTexelPointer 350(fimageCubeArrayv4) 109 21
454: 8(f16vec4) AtomicFMaxEXT 453 20 21 452
455: 44(ptr) AccessChain 11(buf) 42
456: 8(f16vec4) Load 455
457: 8(f16vec4) FAdd 456 454
458: 44(ptr) AccessChain 11(buf) 42
Store 458 457
459: 8(f16vec4) Load 295(constVec4)
460: 301(ptr) ImageTexelPointer 360(fimage3Dv4) 109 21
461: 8(f16vec4) AtomicFMaxEXT 460 20 21 459
462: 44(ptr) AccessChain 11(buf) 42
463: 8(f16vec4) Load 462
464: 8(f16vec4) FAdd 463 461
465: 44(ptr) AccessChain 11(buf) 42
Store 465 464
466: 8(f16vec4) Load 295(constVec4)
467: 301(ptr) ImageTexelPointer 299(fimage1Dv4) 14 21
468: 8(f16vec4) AtomicExchange 467 20 21 466
469: 44(ptr) AccessChain 11(buf) 42
470: 8(f16vec4) Load 469
471: 8(f16vec4) FAdd 470 468
472: 44(ptr) AccessChain 11(buf) 42
Store 472 471
473: 8(f16vec4) Load 295(constVec4)
474: 301(ptr) ImageTexelPointer 310(fimage1DArrayv4) 87 21
475: 8(f16vec4) AtomicExchange 474 20 21 473
476: 44(ptr) AccessChain 11(buf) 42
477: 8(f16vec4) Load 476
478: 8(f16vec4) FAdd 477 475
479: 44(ptr) AccessChain 11(buf) 42
Store 479 478
480: 8(f16vec4) Load 295(constVec4)
481: 301(ptr) ImageTexelPointer 320(fimage2Dv4) 87 21
482: 8(f16vec4) AtomicExchange 481 20 21 480
483: 44(ptr) AccessChain 11(buf) 42
484: 8(f16vec4) Load 483
485: 8(f16vec4) FAdd 484 482
486: 44(ptr) AccessChain 11(buf) 42
Store 486 485
487: 8(f16vec4) Load 295(constVec4)
488: 301(ptr) ImageTexelPointer 330(fimage2DArrayv4) 109 21
489: 8(f16vec4) AtomicExchange 488 20 21 487
490: 44(ptr) AccessChain 11(buf) 42
491: 8(f16vec4) Load 490
492: 8(f16vec4) FAdd 491 489
493: 44(ptr) AccessChain 11(buf) 42
Store 493 492
494: 8(f16vec4) Load 295(constVec4)
495: 301(ptr) ImageTexelPointer 340(fimageCubev4) 109 21
496: 8(f16vec4) AtomicExchange 495 20 21 494
497: 44(ptr) AccessChain 11(buf) 42
498: 8(f16vec4) Load 497
499: 8(f16vec4) FAdd 498 496
500: 44(ptr) AccessChain 11(buf) 42
Store 500 499
501: 8(f16vec4) Load 295(constVec4)
502: 301(ptr) ImageTexelPointer 350(fimageCubeArrayv4) 109 21
503: 8(f16vec4) AtomicExchange 502 20 21 501
504: 44(ptr) AccessChain 11(buf) 42
505: 8(f16vec4) Load 504
506: 8(f16vec4) FAdd 505 503
507: 44(ptr) AccessChain 11(buf) 42
Store 507 506
508: 8(f16vec4) Load 295(constVec4)
509: 301(ptr) ImageTexelPointer 360(fimage3Dv4) 109 21
510: 8(f16vec4) AtomicExchange 509 20 21 508
511: 44(ptr) AccessChain 11(buf) 42
512: 8(f16vec4) Load 511
513: 8(f16vec4) FAdd 512 510
514: 44(ptr) AccessChain 11(buf) 42
Store 514 513
Return
FunctionEnd

View File

@ -0,0 +1,113 @@
#version 430
#extension GL_NV_shader_atomic_fp16_vector : enable
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable
layout(binding = 0) buffer Buffer
{
f16vec2 dataf16v2;
f16vec4 dataf16v4;
f16vec2 resf16v2;
f16vec4 resf16v4;
}buf;
layout(binding = 0, rg16f) volatile coherent uniform image1D fimage1D;
layout(binding = 1, rg16f) volatile coherent uniform image1DArray fimage1DArray;
layout(binding = 2, rg16f) volatile coherent uniform image2D fimage2D;
layout(binding = 3, rg16f) volatile coherent uniform image2DArray fimage2DArray;
layout(binding = 5, rg16f) volatile coherent uniform imageCube fimageCube;
layout(binding = 6, rg16f) volatile coherent uniform imageCubeArray fimageCubeArray;
layout(binding = 9, rg16f) volatile coherent uniform image3D fimage3D;
layout(binding = 10, rgba16f) volatile coherent uniform image1D fimage1Dv4;
layout(binding = 11, rgba16f) volatile coherent uniform image1DArray fimage1DArrayv4;
layout(binding = 12, rgba16f) volatile coherent uniform image2D fimage2Dv4;
layout(binding = 13, rgba16f) volatile coherent uniform image2DArray fimage2DArrayv4;
layout(binding = 15, rgba16f) volatile coherent uniform imageCube fimageCubev4;
layout(binding = 16, rgba16f) volatile coherent uniform imageCubeArray fimageCubeArrayv4;
layout(binding = 19, rgba16f) volatile coherent uniform image3D fimage3Dv4;
void main()
{
// atomic* functions supported with f16vec2
buf.resf16v2 = atomicAdd(buf.dataf16v2, f16vec2(3));
buf.resf16v2 += atomicMin(buf.dataf16v2, f16vec2(3));
buf.resf16v2 += atomicMax(buf.dataf16v2, f16vec2(3));
buf.resf16v2 += atomicExchange(buf.dataf16v2, f16vec2(3));
// atomic* functions supported with f16vec4
buf.resf16v4 = atomicAdd(buf.dataf16v4, f16vec4(3));
buf.resf16v4 += atomicMin(buf.dataf16v4, f16vec4(3));
buf.resf16v4 += atomicMax(buf.dataf16v4, f16vec4(3));
buf.resf16v4 += atomicExchange(buf.dataf16v4, f16vec4(3));
// imageAtomic* functions supported with f16vec2 and only format supported is rg16f
f16vec2 constVec2 = f16vec2(2.0);
buf.resf16v2 += imageAtomicAdd(fimage1D, int(0), constVec2);
buf.resf16v2 += imageAtomicAdd(fimage1DArray, ivec2(0,0), constVec2);
buf.resf16v2 += imageAtomicAdd(fimage2D, ivec2(0,0), constVec2);
buf.resf16v2 += imageAtomicAdd(fimage2DArray, ivec3(0,0, 0), constVec2);
buf.resf16v2 += imageAtomicAdd(fimageCube, ivec3(0,0,0), constVec2);
buf.resf16v2 += imageAtomicAdd(fimageCubeArray, ivec3(0,0,0), constVec2);
buf.resf16v2 += imageAtomicAdd(fimage3D, ivec3(0,0,0), constVec2);
buf.resf16v2 += imageAtomicMin(fimage1D, int(0), constVec2);
buf.resf16v2 += imageAtomicMin(fimage1DArray, ivec2(0,0), constVec2);
buf.resf16v2 += imageAtomicMin(fimage2D, ivec2(0,0), constVec2);
buf.resf16v2 += imageAtomicMin(fimage2DArray, ivec3(0,0, 0), constVec2);
buf.resf16v2 += imageAtomicMin(fimageCube, ivec3(0,0,0), constVec2);
buf.resf16v2 += imageAtomicMin(fimageCubeArray, ivec3(0,0,0), constVec2);
buf.resf16v2 += imageAtomicMin(fimage3D, ivec3(0,0,0), constVec2);
buf.resf16v2 += imageAtomicMax(fimage1D, int(0), constVec2);
buf.resf16v2 += imageAtomicMax(fimage1DArray, ivec2(0,0), constVec2);
buf.resf16v2 += imageAtomicMax(fimage2D, ivec2(0,0), constVec2);
buf.resf16v2 += imageAtomicMax(fimage2DArray, ivec3(0,0, 0), constVec2);
buf.resf16v2 += imageAtomicMax(fimageCube, ivec3(0,0,0), constVec2);
buf.resf16v2 += imageAtomicMax(fimageCubeArray, ivec3(0,0,0), constVec2);
buf.resf16v2 += imageAtomicMax(fimage3D, ivec3(0,0,0), constVec2);
buf.resf16v2 += imageAtomicExchange(fimage1D, int(0), constVec2);
buf.resf16v2 += imageAtomicExchange(fimage1DArray, ivec2(0,0), constVec2);
buf.resf16v2 += imageAtomicExchange(fimage2D, ivec2(0,0), constVec2);
buf.resf16v2 += imageAtomicExchange(fimage2DArray, ivec3(0,0, 0), constVec2);
buf.resf16v2 += imageAtomicExchange(fimageCube, ivec3(0,0,0), constVec2);
buf.resf16v2 += imageAtomicExchange(fimageCubeArray, ivec3(0,0,0), constVec2);
buf.resf16v2 += imageAtomicExchange(fimage3D, ivec3(0,0,0), constVec2);
// imageAtomic* functions supported with f16vec4 and only format supported is rgba16f
f16vec4 constVec4 = f16vec4(2.0);
buf.resf16v4 += imageAtomicAdd(fimage1Dv4, int(0), constVec4);
buf.resf16v4 += imageAtomicAdd(fimage1DArrayv4, ivec2(0,0), constVec4);
buf.resf16v4 += imageAtomicAdd(fimage2Dv4, ivec2(0,0), constVec4);
buf.resf16v4 += imageAtomicAdd(fimage2DArrayv4, ivec3(0,0, 0), constVec4);
buf.resf16v4 += imageAtomicAdd(fimageCubev4, ivec3(0,0,0), constVec4);
buf.resf16v4 += imageAtomicAdd(fimageCubeArrayv4, ivec3(0,0,0), constVec4);
buf.resf16v4 += imageAtomicAdd(fimage3Dv4, ivec3(0,0,0), constVec4);
buf.resf16v4 += imageAtomicMin(fimage1Dv4, int(0), constVec4);
buf.resf16v4 += imageAtomicMin(fimage1DArrayv4, ivec2(0,0), constVec4);
buf.resf16v4 += imageAtomicMin(fimage2Dv4, ivec2(0,0), constVec4);
buf.resf16v4 += imageAtomicMin(fimage2DArrayv4, ivec3(0,0, 0), constVec4);
buf.resf16v4 += imageAtomicMin(fimageCubev4, ivec3(0,0,0), constVec4);
buf.resf16v4 += imageAtomicMin(fimageCubeArrayv4, ivec3(0,0,0), constVec4);
buf.resf16v4 += imageAtomicMin(fimage3Dv4, ivec3(0,0,0), constVec4);
buf.resf16v4 += imageAtomicMax(fimage1Dv4, int(0), constVec4);
buf.resf16v4 += imageAtomicMax(fimage1DArrayv4, ivec2(0,0), constVec4);
buf.resf16v4 += imageAtomicMax(fimage2Dv4, ivec2(0,0), constVec4);
buf.resf16v4 += imageAtomicMax(fimage2DArrayv4, ivec3(0,0, 0), constVec4);
buf.resf16v4 += imageAtomicMax(fimageCubev4, ivec3(0,0,0), constVec4);
buf.resf16v4 += imageAtomicMax(fimageCubeArrayv4, ivec3(0,0,0), constVec4);
buf.resf16v4 += imageAtomicMax(fimage3Dv4, ivec3(0,0,0), constVec4);
buf.resf16v4 += imageAtomicExchange(fimage1Dv4, int(0), constVec4);
buf.resf16v4 += imageAtomicExchange(fimage1DArrayv4, ivec2(0,0), constVec4);
buf.resf16v4 += imageAtomicExchange(fimage2Dv4, ivec2(0,0), constVec4);
buf.resf16v4 += imageAtomicExchange(fimage2DArrayv4, ivec3(0,0, 0), constVec4);
buf.resf16v4 += imageAtomicExchange(fimageCubev4, ivec3(0,0,0), constVec4);
buf.resf16v4 += imageAtomicExchange(fimageCubeArrayv4, ivec3(0,0,0), constVec4);
buf.resf16v4 += imageAtomicExchange(fimage3Dv4, ivec3(0,0,0), constVec4);
}

View File

@ -1465,6 +1465,20 @@ void TBuiltIns::initialize(int version, EProfile profile, const SpvVersion& spvV
"\n");
}
// NV_shader_atomic_fp16_vector
if (profile != EEsProfile && version >= 430) {
commonBuiltins.append(
"f16vec2 atomicAdd(coherent volatile inout f16vec2, f16vec2);"
"f16vec4 atomicAdd(coherent volatile inout f16vec4, f16vec4);"
"f16vec2 atomicMin(coherent volatile inout f16vec2, f16vec2);"
"f16vec4 atomicMin(coherent volatile inout f16vec4, f16vec4);"
"f16vec2 atomicMax(coherent volatile inout f16vec2, f16vec2);"
"f16vec4 atomicMax(coherent volatile inout f16vec4, f16vec4);"
"f16vec2 atomicExchange(coherent volatile inout f16vec2, f16vec2);"
"f16vec4 atomicExchange(coherent volatile inout f16vec4, f16vec4);"
"\n");
}
if ((profile == EEsProfile && version >= 300) ||
(profile != EEsProfile && version >= 150)) { // GL_ARB_shader_bit_encoding
commonBuiltins.append(
@ -6678,6 +6692,34 @@ void TBuiltIns::addImageFunctions(TSampler sampler, const TString& typeName, int
commonBuiltins.append(imageParams);
commonBuiltins.append(", float);\n");
}
// GL_NV_shader_atomic_fp16_vector
if (profile != EEsProfile && version >= 430) {
const int numFp16Builtins = 4;
const char* atomicFp16Func[numFp16Builtins] = {
" imageAtomicAdd(volatile coherent ",
" imageAtomicMin(volatile coherent ",
" imageAtomicMax(volatile coherent ",
" imageAtomicExchange(volatile coherent "
};
const int numFp16DataTypes = 2;
const char* atomicFp16DataTypes[numFp16DataTypes] = {
"f16vec2",
"f16vec4"
};
// Loop twice to add prototypes with/without scope/semantics
for (int j = 0; j < numFp16DataTypes; ++j) {
for (int i = 0; i < numFp16Builtins; ++i) {
commonBuiltins.append(atomicFp16DataTypes[j]);
commonBuiltins.append(atomicFp16Func[i]);
commonBuiltins.append(imageParams);
commonBuiltins.append(", ");
commonBuiltins.append(atomicFp16DataTypes[j]);
commonBuiltins.append(");\n");
}
}
}
if (profile != EEsProfile && version >= 450) {
commonBuiltins.append("float imageAtomicAdd(volatile coherent ");
commonBuiltins.append(imageParams);

View File

@ -2524,6 +2524,17 @@ void TParseContext::builtInOpCheck(const TSourceLoc& loc, const TFunction& fnCan
error(loc, "only supported on image with format r64i", fnCandidate.getName().c_str(), "");
else if (callNode.getType().getBasicType() == EbtUint64 && imageType.getQualifier().getFormat() != ElfR64ui)
error(loc, "only supported on image with format r64ui", fnCandidate.getName().c_str(), "");
} else if(callNode.getType().getBasicType() == EbtFloat16 &&
((callNode.getType().getVectorSize() == 2 && arg0->getType().getQualifier().getFormat() == ElfRg16f) ||
(callNode.getType().getVectorSize() == 4 && arg0->getType().getQualifier().getFormat() == ElfRgba16f))) {
if ((fnCandidate.getName().compare(0, 14, "imageAtomicAdd") == 0) ||
(fnCandidate.getName().compare(0, 19, "imageAtomicExchange") == 0) ||
(fnCandidate.getName().compare(0, 19, "imageAtomicMin") == 0) ||
(fnCandidate.getName().compare(0, 19, "imageAtomicMax") == 0)) {
requireExtensions(loc, 1, &E_GL_NV_shader_atomic_fp16_vector, fnCandidate.getName().c_str());
} else {
error(loc, "f16vec2/4 operation not supported on: ", fnCandidate.getName().c_str(), "");
}
} else if (imageType.getSampler().type == EbtFloat) {
if (fnCandidate.getName().compare(0, 19, "imageAtomicExchange") == 0) {
// imageAtomicExchange doesn't require an extension
@ -2582,6 +2593,11 @@ void TParseContext::builtInOpCheck(const TSourceLoc& loc, const TFunction& fnCan
const char* const extensions[2] = { E_GL_NV_shader_atomic_int64,
E_GL_EXT_shader_atomic_int64 };
requireExtensions(loc, 2, extensions, fnCandidate.getName().c_str());
} else if ((callNode.getOp() == EOpAtomicAdd || callNode.getOp() == EOpAtomicExchange ||
callNode.getOp() == EOpAtomicMin || callNode.getOp() == EOpAtomicMax) &&
arg0->getType().getBasicType() == EbtFloat16 &&
(arg0->getType().getVectorSize() == 2 || arg0->getType().getVectorSize() == 4 )) {
requireExtensions(loc, 1, &E_GL_NV_shader_atomic_fp16_vector, fnCandidate.getName().c_str());
} else if ((callNode.getOp() == EOpAtomicAdd || callNode.getOp() == EOpAtomicExchange) &&
(arg0->getType().getBasicType() == EbtFloat ||
arg0->getType().getBasicType() == EbtDouble)) {

View File

@ -305,6 +305,7 @@ void TParseVersions::initializeExtensionBehavior()
extensionBehavior[E_GL_NV_integer_cooperative_matrix] = EBhDisable;
extensionBehavior[E_GL_NV_shader_invocation_reorder] = EBhDisable;
extensionBehavior[E_GL_NV_displacement_micromap] = EBhDisable;
extensionBehavior[E_GL_NV_shader_atomic_fp16_vector] = EBhDisable;
// ARM
extensionBehavior[E_GL_ARM_shader_core_builtins] = EBhDisable;

View File

@ -278,6 +278,7 @@ const char* const E_GL_NV_integer_cooperative_matrix = "GL_NV_integer
const char* const E_GL_NV_shader_invocation_reorder = "GL_NV_shader_invocation_reorder";
const char* const E_GL_EXT_ray_tracing_position_fetch = "GL_EXT_ray_tracing_position_fetch";
const char* const E_GL_NV_displacement_micromap = "GL_NV_displacement_micromap";
const char* const E_GL_NV_shader_atomic_fp16_vector = "GL_NV_shader_atomic_fp16_vector";
// ARM
const char* const E_GL_ARM_shader_core_builtins = "GL_ARM_shader_core_builtins";

View File

@ -453,6 +453,7 @@ INSTANTIATE_TEST_SUITE_P(
"spv.nonuniform4.frag",
"spv.nonuniform5.frag",
"spv.noWorkgroup.comp",
"spv.nvAtomicFp16Vec.frag",
"spv.nullInit.comp",
"spv.offsets.frag",
"spv.Operations.frag",

View File

@ -5,14 +5,14 @@
"site" : "github",
"subrepo" : "KhronosGroup/SPIRV-Tools",
"subdir" : "External/spirv-tools",
"commit": "b951948eaa75b51466eaa22e8a89223966f300e4"
"commit": "b0a5c4ac12b742086ffb16e2ba0ad4903450ae1d"
},
{
"name" : "spirv-tools/external/spirv-headers",
"site" : "github",
"subrepo" : "KhronosGroup/SPIRV-Headers",
"subdir" : "External/spirv-tools/external/spirv-headers",
"commit" : "5aa1dd8a11182ea9a6a0eabd6a9edc639d5dbecd"
"commit" : "05cc486580771e4fa7ddc89f5c9ee1e97382689a"
},
{
"name": "googletest",