From 8e26feb8f2610c3c070775dd6aceb8d436416439 Mon Sep 17 00:00:00 2001 From: steve-lunarg Date: Mon, 10 Apr 2017 08:19:21 -0600 Subject: [PATCH] WIP: HLSL: structuredbuffer counter functionality This is WIP, heavy on the IP part. There's not yet enough to use in real workloads. Currently present: * Creation of separate counter buffers for structured buffer types needing them. * IncrementCounter / DecrementCounter methods * Postprocess to remove unused counter buffers from linkage * Associated counter buffers are given @count suffix (invalid as a user identifier) Not yet present: * reflection queries to obtain bindings for counter buffers * Append/Consume buffers * Ability to use SB references passed as fn parameters --- .../hlsl.structbuffer.incdec.frag.out | 316 ++++++++++++++++++ Test/hlsl.structbuffer.incdec.frag | 19 ++ glslang/Include/BaseTypes.h | 7 + glslang/Include/intermediate.h | 4 + gtests/Hlsl.FromFile.cpp | 1 + hlsl/hlslGrammar.cpp | 21 +- hlsl/hlslParseHelper.cpp | 158 ++++++++- hlsl/hlslParseHelper.h | 12 + hlsl/hlslParseables.cpp | 6 + 9 files changed, 520 insertions(+), 24 deletions(-) create mode 100644 Test/baseResults/hlsl.structbuffer.incdec.frag.out create mode 100644 Test/hlsl.structbuffer.incdec.frag diff --git a/Test/baseResults/hlsl.structbuffer.incdec.frag.out b/Test/baseResults/hlsl.structbuffer.incdec.frag.out new file mode 100644 index 000000000..dc68d9444 --- /dev/null +++ b/Test/baseResults/hlsl.structbuffer.incdec.frag.out @@ -0,0 +1,316 @@ +hlsl.structbuffer.incdec.frag +Shader version: 500 +gl_FragCoord origin is upper left +0:? Sequence +0:7 Function Definition: @main(u1; ( temp 4-component vector of float) +0:7 Function Parameters: +0:7 'pos' ( in uint) +0:? Sequence +0:8 Sequence +0:8 move second child to first child ( temp 4-component vector of uint) +0:8 'result' ( temp 4-component vector of uint) +0:8 Constant: +0:8 0 (const uint) +0:8 0 (const uint) +0:8 0 (const uint) +0:8 0 (const uint) +0:10 direct index (layout( row_major std430) buffer 4-component vector of uint) +0:10 @data: direct index for structure (layout( row_major std430) buffer implicitly-sized array of 4-component vector of uint) +0:10 'sbuf_rw_i' (layout( row_major std430) buffer block{layout( row_major std430) buffer implicitly-sized array of 4-component vector of uint @data}) +0:10 Constant: +0:10 0 (const uint) +0:10 Constant: +0:10 7 (const int) +0:11 direct index (layout( row_major std430) buffer 4-component vector of uint) +0:11 @data: direct index for structure (layout( row_major std430) buffer implicitly-sized array of 4-component vector of uint) +0:11 'sbuf_rw_d' (layout( row_major std430) buffer block{layout( row_major std430) buffer implicitly-sized array of 4-component vector of uint @data}) +0:11 Constant: +0:11 0 (const uint) +0:11 Constant: +0:11 7 (const int) +0:13 move second child to first child ( temp 4-component vector of uint) +0:13 direct index (layout( row_major std430) buffer 4-component vector of uint) +0:13 @data: direct index for structure (layout( row_major std430) buffer implicitly-sized array of 4-component vector of uint) +0:13 'sbuf_rw_nocounter' (layout( row_major std430) buffer block{layout( row_major std430) buffer implicitly-sized array of 4-component vector of uint @data}) +0:13 Constant: +0:13 0 (const uint) +0:13 Constant: +0:13 5 (const int) +0:13 Constant: +0:13 2 (const uint) +0:13 2 (const uint) +0:13 2 (const uint) +0:13 2 (const uint) +0:15 Sequence +0:15 move second child to first child ( temp uint) +0:15 'c1' ( temp uint) +0:15 AtomicAdd ( temp uint) +0:15 @count: direct index for structure ( temp int) +0:15 'sbuf_rw_i@count' (layout( row_major std430) buffer block{layout( row_major std430) buffer int @count}) +0:15 Constant: +0:15 0 (const int) +0:15 Constant: +0:15 1 (const int) +0:16 Sequence +0:16 move second child to first child ( temp uint) +0:16 'c2' ( temp uint) +0:16 AtomicAdd ( temp uint) +0:16 @count: direct index for structure ( temp int) +0:16 'sbuf_rw_d@count' (layout( row_major std430) buffer block{layout( row_major std430) buffer int @count}) +0:16 Constant: +0:16 0 (const int) +0:16 Constant: +0:16 -1 (const int) +0:18 Branch: Return with expression +0:? Construct vec4 ( temp 4-component vector of float) +0:18 Convert uint to float ( temp float) +0:18 direct index ( temp uint) +0:18 'result' ( temp 4-component vector of uint) +0:18 Constant: +0:18 0 (const int) +0:18 Convert uint to float ( temp float) +0:18 direct index ( temp uint) +0:18 'result' ( temp 4-component vector of uint) +0:18 Constant: +0:18 1 (const int) +0:18 Convert uint to float ( temp float) +0:18 'c1' ( temp uint) +0:18 Convert uint to float ( temp float) +0:18 'c2' ( temp uint) +0:7 Function Definition: main( ( temp void) +0:7 Function Parameters: +0:? Sequence +0:7 move second child to first child ( temp uint) +0:? 'pos' ( temp uint) +0:? 'pos' (layout( location=0) in uint) +0:7 move second child to first child ( temp 4-component vector of float) +0:? '@entryPointOutput' (layout( location=0) out 4-component vector of float) +0:7 Function Call: @main(u1; ( temp 4-component vector of float) +0:? 'pos' ( temp uint) +0:? Linker Objects +0:? 'sbuf_rw_i' (layout( row_major std430) buffer block{layout( row_major std430) buffer implicitly-sized array of 4-component vector of uint @data}) +0:? 'sbuf_rw_i@count' (layout( row_major std430) buffer block{layout( row_major std430) buffer int @count}) +0:? 'sbuf_rw_d' (layout( row_major std430) buffer block{layout( row_major std430) buffer implicitly-sized array of 4-component vector of uint @data}) +0:? 'sbuf_rw_d@count' (layout( row_major std430) buffer block{layout( row_major std430) buffer int @count}) +0:? 'sbuf_rw_nocounter' (layout( row_major std430) buffer block{layout( row_major std430) buffer implicitly-sized array of 4-component vector of uint @data}) +0:? '@entryPointOutput' (layout( location=0) out 4-component vector of float) +0:? 'pos' (layout( location=0) in uint) + + +Linked fragment stage: + + +Shader version: 500 +gl_FragCoord origin is upper left +0:? Sequence +0:7 Function Definition: @main(u1; ( temp 4-component vector of float) +0:7 Function Parameters: +0:7 'pos' ( in uint) +0:? Sequence +0:8 Sequence +0:8 move second child to first child ( temp 4-component vector of uint) +0:8 'result' ( temp 4-component vector of uint) +0:8 Constant: +0:8 0 (const uint) +0:8 0 (const uint) +0:8 0 (const uint) +0:8 0 (const uint) +0:10 direct index (layout( row_major std430) buffer 4-component vector of uint) +0:10 @data: direct index for structure (layout( row_major std430) buffer implicitly-sized array of 4-component vector of uint) +0:10 'sbuf_rw_i' (layout( row_major std430) buffer block{layout( row_major std430) buffer implicitly-sized array of 4-component vector of uint @data}) +0:10 Constant: +0:10 0 (const uint) +0:10 Constant: +0:10 7 (const int) +0:11 direct index (layout( row_major std430) buffer 4-component vector of uint) +0:11 @data: direct index for structure (layout( row_major std430) buffer implicitly-sized array of 4-component vector of uint) +0:11 'sbuf_rw_d' (layout( row_major std430) buffer block{layout( row_major std430) buffer implicitly-sized array of 4-component vector of uint @data}) +0:11 Constant: +0:11 0 (const uint) +0:11 Constant: +0:11 7 (const int) +0:13 move second child to first child ( temp 4-component vector of uint) +0:13 direct index (layout( row_major std430) buffer 4-component vector of uint) +0:13 @data: direct index for structure (layout( row_major std430) buffer implicitly-sized array of 4-component vector of uint) +0:13 'sbuf_rw_nocounter' (layout( row_major std430) buffer block{layout( row_major std430) buffer implicitly-sized array of 4-component vector of uint @data}) +0:13 Constant: +0:13 0 (const uint) +0:13 Constant: +0:13 5 (const int) +0:13 Constant: +0:13 2 (const uint) +0:13 2 (const uint) +0:13 2 (const uint) +0:13 2 (const uint) +0:15 Sequence +0:15 move second child to first child ( temp uint) +0:15 'c1' ( temp uint) +0:15 AtomicAdd ( temp uint) +0:15 @count: direct index for structure ( temp int) +0:15 'sbuf_rw_i@count' (layout( row_major std430) buffer block{layout( row_major std430) buffer int @count}) +0:15 Constant: +0:15 0 (const int) +0:15 Constant: +0:15 1 (const int) +0:16 Sequence +0:16 move second child to first child ( temp uint) +0:16 'c2' ( temp uint) +0:16 AtomicAdd ( temp uint) +0:16 @count: direct index for structure ( temp int) +0:16 'sbuf_rw_d@count' (layout( row_major std430) buffer block{layout( row_major std430) buffer int @count}) +0:16 Constant: +0:16 0 (const int) +0:16 Constant: +0:16 -1 (const int) +0:18 Branch: Return with expression +0:? Construct vec4 ( temp 4-component vector of float) +0:18 Convert uint to float ( temp float) +0:18 direct index ( temp uint) +0:18 'result' ( temp 4-component vector of uint) +0:18 Constant: +0:18 0 (const int) +0:18 Convert uint to float ( temp float) +0:18 direct index ( temp uint) +0:18 'result' ( temp 4-component vector of uint) +0:18 Constant: +0:18 1 (const int) +0:18 Convert uint to float ( temp float) +0:18 'c1' ( temp uint) +0:18 Convert uint to float ( temp float) +0:18 'c2' ( temp uint) +0:7 Function Definition: main( ( temp void) +0:7 Function Parameters: +0:? Sequence +0:7 move second child to first child ( temp uint) +0:? 'pos' ( temp uint) +0:? 'pos' (layout( location=0) in uint) +0:7 move second child to first child ( temp 4-component vector of float) +0:? '@entryPointOutput' (layout( location=0) out 4-component vector of float) +0:7 Function Call: @main(u1; ( temp 4-component vector of float) +0:? 'pos' ( temp uint) +0:? Linker Objects +0:? 'sbuf_rw_i' (layout( row_major std430) buffer block{layout( row_major std430) buffer implicitly-sized array of 4-component vector of uint @data}) +0:? 'sbuf_rw_i@count' (layout( row_major std430) buffer block{layout( row_major std430) buffer int @count}) +0:? 'sbuf_rw_d' (layout( row_major std430) buffer block{layout( row_major std430) buffer implicitly-sized array of 4-component vector of uint @data}) +0:? 'sbuf_rw_d@count' (layout( row_major std430) buffer block{layout( row_major std430) buffer int @count}) +0:? 'sbuf_rw_nocounter' (layout( row_major std430) buffer block{layout( row_major std430) buffer implicitly-sized array of 4-component vector of uint @data}) +0:? '@entryPointOutput' (layout( location=0) out 4-component vector of float) +0:? 'pos' (layout( location=0) in uint) + +// Module Version 10000 +// Generated by (magic number): 80001 +// Id's are bound by 69 + + Capability Shader + 1: ExtInstImport "GLSL.std.450" + MemoryModel Logical GLSL450 + EntryPoint Fragment 4 "main" 62 65 + ExecutionMode 4 OriginUpperLeft + Source HLSL 500 + Name 4 "main" + Name 12 "@main(u1;" + Name 11 "pos" + Name 16 "result" + Name 20 "sbuf_rw_i" + MemberName 20(sbuf_rw_i) 0 "@data" + Name 22 "sbuf_rw_i" + Name 26 "sbuf_rw_d" + Name 27 "sbuf_rw_nocounter" + Name 33 "c1" + Name 34 "sbuf_rw_i@count" + MemberName 34(sbuf_rw_i@count) 0 "@count" + Name 36 "sbuf_rw_i@count" + Name 42 "c2" + Name 43 "sbuf_rw_d@count" + Name 60 "pos" + Name 62 "pos" + Name 65 "@entryPointOutput" + Name 66 "param" + Decorate 19 ArrayStride 16 + MemberDecorate 20(sbuf_rw_i) 0 Offset 0 + Decorate 20(sbuf_rw_i) BufferBlock + Decorate 22(sbuf_rw_i) DescriptorSet 0 + Decorate 26(sbuf_rw_d) DescriptorSet 0 + Decorate 27(sbuf_rw_nocounter) DescriptorSet 0 + MemberDecorate 34(sbuf_rw_i@count) 0 Offset 0 + Decorate 34(sbuf_rw_i@count) BufferBlock + Decorate 36(sbuf_rw_i@count) DescriptorSet 0 + Decorate 43(sbuf_rw_d@count) DescriptorSet 0 + Decorate 62(pos) Location 0 + Decorate 65(@entryPointOutput) Location 0 + 2: TypeVoid + 3: TypeFunction 2 + 6: TypeInt 32 0 + 7: TypePointer Function 6(int) + 8: TypeFloat 32 + 9: TypeVector 8(float) 4 + 10: TypeFunction 9(fvec4) 7(ptr) + 14: TypeVector 6(int) 4 + 15: TypePointer Function 14(ivec4) + 17: 6(int) Constant 0 + 18: 14(ivec4) ConstantComposite 17 17 17 17 + 19: TypeRuntimeArray 14(ivec4) + 20(sbuf_rw_i): TypeStruct 19 + 21: TypePointer Uniform 20(sbuf_rw_i) + 22(sbuf_rw_i): 21(ptr) Variable Uniform + 23: TypeInt 32 1 + 24: 23(int) Constant 0 + 25: 23(int) Constant 7 + 26(sbuf_rw_d): 21(ptr) Variable Uniform +27(sbuf_rw_nocounter): 21(ptr) Variable Uniform + 28: 23(int) Constant 5 + 29: 6(int) Constant 2 + 30: 14(ivec4) ConstantComposite 29 29 29 29 + 31: TypePointer Uniform 14(ivec4) +34(sbuf_rw_i@count): TypeStruct 23(int) + 35: TypePointer Uniform 34(sbuf_rw_i@count) +36(sbuf_rw_i@count): 35(ptr) Variable Uniform + 37: TypePointer Uniform 23(int) + 39: 23(int) Constant 1 + 40: 6(int) Constant 1 +43(sbuf_rw_d@count): 35(ptr) Variable Uniform + 45: 23(int) Constant 4294967295 + 61: TypePointer Input 6(int) + 62(pos): 61(ptr) Variable Input + 64: TypePointer Output 9(fvec4) +65(@entryPointOutput): 64(ptr) Variable Output + 4(main): 2 Function None 3 + 5: Label + 60(pos): 7(ptr) Variable Function + 66(param): 7(ptr) Variable Function + 63: 6(int) Load 62(pos) + Store 60(pos) 63 + 67: 6(int) Load 60(pos) + Store 66(param) 67 + 68: 9(fvec4) FunctionCall 12(@main(u1;) 66(param) + Store 65(@entryPointOutput) 68 + Return + FunctionEnd + 12(@main(u1;): 9(fvec4) Function None 10 + 11(pos): 7(ptr) FunctionParameter + 13: Label + 16(result): 15(ptr) Variable Function + 33(c1): 7(ptr) Variable Function + 42(c2): 7(ptr) Variable Function + Store 16(result) 18 + 32: 31(ptr) AccessChain 27(sbuf_rw_nocounter) 24 28 + Store 32 30 + 38: 37(ptr) AccessChain 36(sbuf_rw_i@count) 24 + 41: 6(int) AtomicIAdd 38 40 17 39 + Store 33(c1) 41 + 44: 37(ptr) AccessChain 43(sbuf_rw_d@count) 24 + 46: 6(int) AtomicIAdd 44 40 17 45 + Store 42(c2) 46 + 47: 7(ptr) AccessChain 16(result) 17 + 48: 6(int) Load 47 + 49: 8(float) ConvertUToF 48 + 50: 7(ptr) AccessChain 16(result) 40 + 51: 6(int) Load 50 + 52: 8(float) ConvertUToF 51 + 53: 6(int) Load 33(c1) + 54: 8(float) ConvertUToF 53 + 55: 6(int) Load 42(c2) + 56: 8(float) ConvertUToF 55 + 57: 9(fvec4) CompositeConstruct 49 52 54 56 + ReturnValue 57 + FunctionEnd diff --git a/Test/hlsl.structbuffer.incdec.frag b/Test/hlsl.structbuffer.incdec.frag new file mode 100644 index 000000000..108dcb610 --- /dev/null +++ b/Test/hlsl.structbuffer.incdec.frag @@ -0,0 +1,19 @@ +RWStructuredBuffer sbuf_rw_i; +RWStructuredBuffer sbuf_rw_d; + +RWStructuredBuffer sbuf_rw_nocounter; // doesn't use inc or dec + +float4 main(uint pos : FOO) : SV_Target0 +{ + uint4 result = 0; + + sbuf_rw_i[7]; + sbuf_rw_d[7]; + + sbuf_rw_nocounter[5] = 2; + + uint c1 = sbuf_rw_i.IncrementCounter(); + uint c2 = sbuf_rw_d.DecrementCounter(); + + return float4(result.x, result.y, c1, c2); +} diff --git a/glslang/Include/BaseTypes.h b/glslang/Include/BaseTypes.h index a805a0603..abac5122d 100644 --- a/glslang/Include/BaseTypes.h +++ b/glslang/Include/BaseTypes.h @@ -223,6 +223,13 @@ enum TBuiltInVariable { EbvOutputPatch, EbvInputPatch, + // structbuffer types + EbvAppendConsume, // no need to differentiate append and consume + EbvRWStructuredBuffer, + EbvStructuredBuffer, + EbvByteAddressBuffer, + EbvRWByteAddressBuffer, + EbvLast }; diff --git a/glslang/Include/intermediate.h b/glslang/Include/intermediate.h index 934994d9a..60b0883be 100644 --- a/glslang/Include/intermediate.h +++ b/glslang/Include/intermediate.h @@ -633,6 +633,10 @@ enum TOperator { EOpMethodStore2, // ... EOpMethodStore3, // ... EOpMethodStore4, // ... + EOpMethodIncrementCounter, // ... + EOpMethodDecrementCounter, // ... + // EOpMethodAppend is defined for geo shaders below + EOpMethodConsume, // SM5 texture methods EOpMethodGatherRed, // These are covered under the above EOpMethodSample comment about diff --git a/gtests/Hlsl.FromFile.cpp b/gtests/Hlsl.FromFile.cpp index c616dfdfd..13c748458 100644 --- a/gtests/Hlsl.FromFile.cpp +++ b/gtests/Hlsl.FromFile.cpp @@ -248,6 +248,7 @@ INSTANTIATE_TEST_CASE_P( {"hlsl.structbuffer.atomics.frag", "main"}, {"hlsl.structbuffer.byte.frag", "main"}, {"hlsl.structbuffer.coherent.frag", "main"}, + {"hlsl.structbuffer.incdec.frag", "main"}, {"hlsl.structbuffer.fn.frag", "main"}, {"hlsl.structbuffer.rw.frag", "main"}, {"hlsl.structbuffer.rwbyte.frag", "main"}, diff --git a/hlsl/hlslGrammar.cpp b/hlsl/hlslGrammar.cpp index f58f74204..72b58d167 100755 --- a/hlsl/hlslGrammar.cpp +++ b/hlsl/hlslGrammar.cpp @@ -475,9 +475,10 @@ bool HlslGrammar::acceptDeclaration(TIntermNode*& nodeList) if (variableType.getBasicType() != EbtString && parseContext.getAnnotationNestingLevel() == 0) { if (typedefDecl) parseContext.declareTypedef(idToken.loc, *fullName, variableType); - else if (variableType.getBasicType() == EbtBlock) + else if (variableType.getBasicType() == EbtBlock) { parseContext.declareBlock(idToken.loc, variableType, fullName); - else { + parseContext.declareStructBufferCounter(idToken.loc, variableType, *fullName); + } else { if (variableType.getQualifier().storage == EvqUniform && ! variableType.containsOpaque()) { // this isn't really an individual variable, but a member of the $Global buffer parseContext.growGlobalUniformBlock(idToken.loc, variableType, *fullName); @@ -1955,24 +1956,29 @@ bool HlslGrammar::acceptStructBufferType(TType& type) bool readonly = false; TStorageQualifier storage = EvqBuffer; + TBuiltInVariable builtinType = EbvNone; switch (structBuffType) { case EHTokAppendStructuredBuffer: - unimplemented("AppendStructuredBuffer"); - return false; + builtinType = EbvAppendConsume; + break; case EHTokByteAddressBuffer: hasTemplateType = false; readonly = true; + builtinType = EbvByteAddressBuffer; break; case EHTokConsumeStructuredBuffer: - unimplemented("ConsumeStructuredBuffer"); - return false; + builtinType = EbvAppendConsume; + break; case EHTokRWByteAddressBuffer: hasTemplateType = false; + builtinType = EbvRWByteAddressBuffer; break; case EHTokRWStructuredBuffer: + builtinType = EbvRWStructuredBuffer; break; case EHTokStructuredBuffer: + builtinType = EbvStructuredBuffer; readonly = true; break; default: @@ -2014,8 +2020,6 @@ bool HlslGrammar::acceptStructBufferType(TType& type) // field name is canonical for all structbuffers templateType->setFieldName("@data"); - // Create block type. TODO: hidden internal uint member when needed - TTypeList* blockStruct = new TTypeList; TTypeLoc member = { templateType, token.loc }; blockStruct->push_back(member); @@ -2025,6 +2029,7 @@ bool HlslGrammar::acceptStructBufferType(TType& type) blockType.getQualifier().storage = storage; blockType.getQualifier().readonly = readonly; + blockType.getQualifier().builtIn = builtinType; // We may have created an equivalent type before, in which case we should use its // deep structure. diff --git a/hlsl/hlslParseHelper.cpp b/hlsl/hlslParseHelper.cpp index a2a490ca7..54b22a48d 100755 --- a/hlsl/hlslParseHelper.cpp +++ b/hlsl/hlslParseHelper.cpp @@ -842,7 +842,11 @@ bool HlslParseContext::isStructBufferMethod(const TString& name) const name == "InterlockedMax" || name == "InterlockedMin" || name == "InterlockedOr" || - name == "InterlockedXor"; + name == "InterlockedXor" || + name == "IncrementCounter" || + name == "DecrementCounter" || + name == "Append" || + name == "Consume"; } // @@ -1514,7 +1518,7 @@ void HlslParseContext::handleFunctionDeclarator(const TSourceLoc& loc, TFunction error(loc, "function name is redeclaration of existing name", function.getName().c_str(), ""); } -// Add interstage IO variables to the linkage in canonical order. +// Finalization step: Add interstage IO variables to the linkage in canonical order. void HlslParseContext::addInterstageIoToLinkage() { TSourceLoc loc; @@ -2438,24 +2442,96 @@ TIntermAggregate* HlslParseContext::handleSamplerTextureCombine(const TSourceLoc return txcombine; } +// Return true if this a buffer type that has an associated counter buffer. +bool HlslParseContext::hasStructBuffCounter(const TString& name) const +{ + const auto bivIt = structBufferBuiltIn.find(name); + if (bivIt == structBufferBuiltIn.end()) + return false; + + switch (bivIt->second) { + case EbvAppendConsume: // fall through... + case EbvRWStructuredBuffer: // ... + return true; + default: + return false; // other builtin types do not have. + } +} + +// declare counter for a structured buffer type +void HlslParseContext::declareStructBufferCounter(const TSourceLoc& loc, const TType& bufferType, const TString& name) +{ + // Bail out if not a struct buffer + if (! isStructBufferType(bufferType)) + return; + + if (! hasStructBuffCounter(name)) + return; + + // Counter type + TType* counterType = new TType(EbtInt, EvqBuffer); + counterType->setFieldName("@count"); + + TTypeList* blockStruct = new TTypeList; + TTypeLoc member = { counterType, loc }; + blockStruct->push_back(member); + + TString* blockName = new TString(name); + *blockName += "@count"; + + structBufferCounter[*blockName] = false; + + TType blockType(blockStruct, "", counterType->getQualifier()); + blockType.getQualifier().storage = EvqBuffer; + + shareStructBufferType(blockType); + declareBlock(loc, blockType, blockName); +} + +// return the counter that goes with a given structuredbuffer +TIntermTyped* HlslParseContext::getStructBufferCounter(const TSourceLoc& loc, TIntermTyped* buffer) +{ + // Bail out if not a struct buffer + if (buffer == nullptr || ! isStructBufferType(buffer->getType())) + return nullptr; + + TString blockName(buffer->getAsSymbolNode()->getName()); + blockName += "@count"; + + // Mark the counter as being used + structBufferCounter[blockName] = true; + + TIntermTyped* counterVar = handleVariable(loc, &blockName); // find the block structure + TIntermTyped* index = intermediate.addConstantUnion(0, loc); // index to counter inside block struct + + TIntermTyped* counterMember = intermediate.addIndex(EOpIndexDirectStruct, counterVar, index, loc); + counterMember->setType(TType(EbtInt)); + return counterMember; +} + + // // Decompose structure buffer methods into AST // void HlslParseContext::decomposeStructBufferMethods(const TSourceLoc& loc, TIntermTyped*& node, TIntermNode* arguments) { - if (!node || !node->getAsOperator()) + if (node == nullptr || node->getAsOperator() == nullptr || arguments == nullptr) return; const TOperator op = node->getAsOperator()->getOp(); - TIntermAggregate* argAggregate = arguments ? arguments->getAsAggregate() : nullptr; - if (argAggregate == nullptr) - return; - - if (argAggregate->getSequence().empty()) - return; + TIntermAggregate* argAggregate = arguments->getAsAggregate(); // Buffer is the object upon which method is called, so always arg 0 - TIntermTyped* bufferObj = argAggregate->getSequence()[0]->getAsTyped(); + TIntermTyped* bufferObj = nullptr; + + // The parameters can be an aggregate, or just a the object as a symbol if there are no fn params. + if (argAggregate) { + if (argAggregate->getSequence().empty()) + return; + bufferObj = argAggregate->getSequence()[0]->getAsTyped(); + } else { + bufferObj = arguments->getAsSymbolNode(); + } // Index to obtain the runtime sized array out of the buffer. TIntermTyped* argArray = indexStructBufferContent(loc, bufferObj); @@ -2670,6 +2746,29 @@ void HlslParseContext::decomposeStructBufferMethods(const TSourceLoc& loc, TInte } break; + + case EOpMethodIncrementCounter: + case EOpMethodDecrementCounter: + { + // These methods require a hidden internal counter, obtained via getStructBufferCounter() + TIntermTyped* incrementValue = intermediate.addConstantUnion(op == EOpMethodIncrementCounter ? 1 : -1, loc, true); + TIntermTyped* counter = getStructBufferCounter(loc, bufferObj); // obtain the counter member + + node = incrementValue; + + if (counter == nullptr) + break; + + TIntermAggregate* counterIncrement = new TIntermAggregate(EOpAtomicAdd); + counterIncrement->setType(TType(EbtUint, EvqTemporary)); + counterIncrement->setLoc(loc); + counterIncrement->getSequence().push_back(counter); + counterIncrement->getSequence().push_back(incrementValue); + + node = counterIncrement; + } + break; + default: break; // most pass through unchanged } @@ -3978,10 +4077,18 @@ TIntermTyped* HlslParseContext::handleFunctionCall(const TSourceLoc& loc, TFunct // TODO: this needs improvement: there's no way at present to look up a signature in // the symbol table for an arbitrary type. This is a temporary hack until that ability exists. // It will have false positives, since it doesn't check arg counts or types. - if (arguments && arguments->getAsAggregate()) { - const TIntermSequence& sequence = arguments->getAsAggregate()->getSequence(); + if (arguments) { + // Check if first argument is struct buffer type. It may be an aggregate or a symbol, so we + // look for either case. - if (!sequence.empty() && isStructBufferType(sequence[0]->getAsTyped()->getType())) { + TIntermTyped* arg0 = nullptr; + + if (arguments->getAsAggregate() && arguments->getAsAggregate()->getSequence().size() > 0) + arg0 = arguments->getAsAggregate()->getSequence()[0]->getAsTyped(); + else if (arguments->getAsSymbolNode()) + arg0 = arguments->getAsSymbolNode(); + + if (arg0 != nullptr && isStructBufferType(arg0->getType())) { static const int methodPrefixSize = sizeof(BUILTIN_PREFIX)-1; if (function->getName().length() > methodPrefixSize && @@ -5845,8 +5952,9 @@ const TFunction* HlslParseContext::findFunction(const TSourceLoc& loc, TFunction // These builtin ops can accept any type, so we bypass the argument selection if (candidateList.size() == 1 && builtIn && (candidateList[0]->getBuiltInOp() == EOpMethodAppend || - candidateList[0]->getBuiltInOp() == EOpMethodRestartStrip)) { - + candidateList[0]->getBuiltInOp() == EOpMethodRestartStrip || + candidateList[0]->getBuiltInOp() == EOpMethodIncrementCounter || + candidateList[0]->getBuiltInOp() == EOpMethodDecrementCounter)) { return candidateList[0]; } @@ -6856,6 +6964,10 @@ void HlslParseContext::declareBlock(const TSourceLoc& loc, TType& type, const TS switch (type.getQualifier().storage) { case EvqUniform: case EvqBuffer: + // remember pre-sanitized builtin type + if (type.getQualifier().storage == EvqBuffer && instanceName != nullptr) + structBufferBuiltIn[*instanceName] = type.getQualifier().builtIn; + correctUniform(type.getQualifier()); break; case EvqVaryingIn: @@ -7670,7 +7782,7 @@ TIntermSymbol* HlslParseContext::findLinkageSymbol(TBuiltInVariable biType) cons return intermediate.addSymbol(*it->second->getAsVariable()); } -// Add patch constant function invocation +// Finalization step: Add patch constant function invocation void HlslParseContext::addPatchConstantInvocation() { TSourceLoc loc; @@ -8039,9 +8151,23 @@ void HlslParseContext::addPatchConstantInvocation() epBodySeq.insert(epBodySeq.end(), invocationIdTest); } +// Finalization step: remove unused buffer blocks from linkage (we don't know until the +// shader is entirely compiled) +void HlslParseContext::removeUnusedStructBufferCounters() +{ + const auto endIt = std::remove_if(linkageSymbols.begin(), linkageSymbols.end(), + [this](const TSymbol* sym) { + const auto sbcIt = structBufferCounter.find(sym->getName()); + return sbcIt != structBufferCounter.end() && !sbcIt->second; + }); + + linkageSymbols.erase(endIt, linkageSymbols.end()); +} + // post-processing void HlslParseContext::finish() { + removeUnusedStructBufferCounters(); addPatchConstantInvocation(); addInterstageIoToLinkage(); diff --git a/hlsl/hlslParseHelper.h b/hlsl/hlslParseHelper.h index 947fef771..5c6ddbd85 100755 --- a/hlsl/hlslParseHelper.h +++ b/hlsl/hlslParseHelper.h @@ -146,6 +146,7 @@ public: TIntermTyped* constructAggregate(TIntermNode*, const TType&, int, const TSourceLoc&); TIntermTyped* constructBuiltIn(const TType&, TOperator, TIntermTyped*, const TSourceLoc&, bool subset); void declareBlock(const TSourceLoc&, TType&, const TString* instanceName = 0, TArraySizes* arraySizes = 0); + void declareStructBufferCounter(const TSourceLoc& loc, const TType& bufferType, const TString& name); void fixBlockLocations(const TSourceLoc&, TQualifier&, TTypeList&, bool memberWithLocation, bool memberWithoutLocation); void fixBlockXfbOffsets(TQualifier&, TTypeList&); void fixBlockUniformOffsets(const TQualifier&, TTypeList&); @@ -274,11 +275,19 @@ protected: TType* getStructBufferContentType(const TType& type) const; bool isStructBufferType(const TType& type) const { return getStructBufferContentType(type) != nullptr; } TIntermTyped* indexStructBufferContent(const TSourceLoc& loc, TIntermTyped* buffer) const; + TIntermTyped* getStructBufferCounter(const TSourceLoc& loc, TIntermTyped* buffer); // Return true if this type is a reference. This is not currently a type method in case that's // a language specific answer. bool isReference(const TType& type) const { return isStructBufferType(type); } + // Return true if this a buffer type that has an associated counter buffer. + bool hasStructBuffCounter(const TString& name) const; + + // Finalization step: remove unused buffer blocks from linkage (we don't know until the + // shader is entirely compiled) + void removeUnusedStructBufferCounters(); + // Pass through to base class after remembering builtin mappings. using TParseContextBase::trackLinkage; void trackLinkage(TSymbol& variable) override; @@ -366,6 +375,9 @@ protected: // Structuredbuffer shared types. Typically there are only a few. TVector structBufferTypes; + + TMap structBufferBuiltIn; + TMap structBufferCounter; // The builtin interstage IO map considers e.g, EvqPosition on input and output separately, so that we // can build the linkage correctly if position appears on both sides. Otherwise, multiple positions diff --git a/hlsl/hlslParseables.cpp b/hlsl/hlslParseables.cpp index c77b54147..52c0629c2 100755 --- a/hlsl/hlslParseables.cpp +++ b/hlsl/hlslParseables.cpp @@ -871,6 +871,8 @@ void TBuiltInParseablesHlsl::initialize(int /*version*/, EProfile /*profile*/, c { "InterlockedMin", nullptr, nullptr, "-", "-", EShLangAll, true }, { "InterlockedOr", nullptr, nullptr, "-", "-", EShLangAll, true }, { "InterlockedXor", nullptr, nullptr, "-", "-", EShLangAll, true }, + { "IncrementCounter", nullptr, nullptr, "-", "-", EShLangAll, true }, + { "DecrementCounter", nullptr, nullptr, "-", "-", EShLangAll, true }, // Mark end of list, since we want to avoid a range-based for, as some compilers don't handle it yet. { nullptr, nullptr, nullptr, nullptr, nullptr, 0, false }, @@ -1180,6 +1182,10 @@ void TBuiltInParseablesHlsl::identifyBuiltIns(int /*version*/, EProfile /*profil symbolTable.relateToOperator(BUILTIN_PREFIX "Store2", EOpMethodStore2); symbolTable.relateToOperator(BUILTIN_PREFIX "Store3", EOpMethodStore3); symbolTable.relateToOperator(BUILTIN_PREFIX "Store4", EOpMethodStore4); + symbolTable.relateToOperator(BUILTIN_PREFIX "IncrementCounter", EOpMethodIncrementCounter); + symbolTable.relateToOperator(BUILTIN_PREFIX "DecrementCounter", EOpMethodDecrementCounter); + symbolTable.relateToOperator(BUILTIN_PREFIX "Append", EOpMethodAppend); + symbolTable.relateToOperator(BUILTIN_PREFIX "Consume", EOpMethodConsume); symbolTable.relateToOperator(BUILTIN_PREFIX "InterlockedAdd", EOpInterlockedAdd); symbolTable.relateToOperator(BUILTIN_PREFIX "InterlockedAnd", EOpInterlockedAnd);