From 8e26feb8f2610c3c070775dd6aceb8d436416439 Mon Sep 17 00:00:00 2001 From: steve-lunarg Date: Mon, 10 Apr 2017 08:19:21 -0600 Subject: [PATCH 1/3] WIP: HLSL: structuredbuffer counter functionality This is WIP, heavy on the IP part. There's not yet enough to use in real workloads. Currently present: * Creation of separate counter buffers for structured buffer types needing them. * IncrementCounter / DecrementCounter methods * Postprocess to remove unused counter buffers from linkage * Associated counter buffers are given @count suffix (invalid as a user identifier) Not yet present: * reflection queries to obtain bindings for counter buffers * Append/Consume buffers * Ability to use SB references passed as fn parameters --- .../hlsl.structbuffer.incdec.frag.out | 316 ++++++++++++++++++ Test/hlsl.structbuffer.incdec.frag | 19 ++ glslang/Include/BaseTypes.h | 7 + glslang/Include/intermediate.h | 4 + gtests/Hlsl.FromFile.cpp | 1 + hlsl/hlslGrammar.cpp | 21 +- hlsl/hlslParseHelper.cpp | 158 ++++++++- hlsl/hlslParseHelper.h | 12 + hlsl/hlslParseables.cpp | 6 + 9 files changed, 520 insertions(+), 24 deletions(-) create mode 100644 Test/baseResults/hlsl.structbuffer.incdec.frag.out create mode 100644 Test/hlsl.structbuffer.incdec.frag diff --git a/Test/baseResults/hlsl.structbuffer.incdec.frag.out b/Test/baseResults/hlsl.structbuffer.incdec.frag.out new file mode 100644 index 000000000..dc68d9444 --- /dev/null +++ b/Test/baseResults/hlsl.structbuffer.incdec.frag.out @@ -0,0 +1,316 @@ +hlsl.structbuffer.incdec.frag +Shader version: 500 +gl_FragCoord origin is upper left +0:? Sequence +0:7 Function Definition: @main(u1; ( temp 4-component vector of float) +0:7 Function Parameters: +0:7 'pos' ( in uint) +0:? Sequence +0:8 Sequence +0:8 move second child to first child ( temp 4-component vector of uint) +0:8 'result' ( temp 4-component vector of uint) +0:8 Constant: +0:8 0 (const uint) +0:8 0 (const uint) +0:8 0 (const uint) +0:8 0 (const uint) +0:10 direct index (layout( row_major std430) buffer 4-component vector of uint) +0:10 @data: direct index for structure (layout( row_major std430) buffer implicitly-sized array of 4-component vector of uint) +0:10 'sbuf_rw_i' (layout( row_major std430) buffer block{layout( row_major std430) buffer implicitly-sized array of 4-component vector of uint @data}) +0:10 Constant: +0:10 0 (const uint) +0:10 Constant: +0:10 7 (const int) +0:11 direct index (layout( row_major std430) buffer 4-component vector of uint) +0:11 @data: direct index for structure (layout( row_major std430) buffer implicitly-sized array of 4-component vector of uint) +0:11 'sbuf_rw_d' (layout( row_major std430) buffer block{layout( row_major std430) buffer implicitly-sized array of 4-component vector of uint @data}) +0:11 Constant: +0:11 0 (const uint) +0:11 Constant: +0:11 7 (const int) +0:13 move second child to first child ( temp 4-component vector of uint) +0:13 direct index (layout( row_major std430) buffer 4-component vector of uint) +0:13 @data: direct index for structure (layout( row_major std430) buffer implicitly-sized array of 4-component vector of uint) +0:13 'sbuf_rw_nocounter' (layout( row_major std430) buffer block{layout( row_major std430) buffer implicitly-sized array of 4-component vector of uint @data}) +0:13 Constant: +0:13 0 (const uint) +0:13 Constant: +0:13 5 (const int) +0:13 Constant: +0:13 2 (const uint) +0:13 2 (const uint) +0:13 2 (const uint) +0:13 2 (const uint) +0:15 Sequence +0:15 move second child to first child ( temp uint) +0:15 'c1' ( temp uint) +0:15 AtomicAdd ( temp uint) +0:15 @count: direct index for structure ( temp int) +0:15 'sbuf_rw_i@count' (layout( row_major std430) buffer block{layout( row_major std430) buffer int @count}) +0:15 Constant: +0:15 0 (const int) +0:15 Constant: +0:15 1 (const int) +0:16 Sequence +0:16 move second child to first child ( temp uint) +0:16 'c2' ( temp uint) +0:16 AtomicAdd ( temp uint) +0:16 @count: direct index for structure ( temp int) +0:16 'sbuf_rw_d@count' (layout( row_major std430) buffer block{layout( row_major std430) buffer int @count}) +0:16 Constant: +0:16 0 (const int) +0:16 Constant: +0:16 -1 (const int) +0:18 Branch: Return with expression +0:? Construct vec4 ( temp 4-component vector of float) +0:18 Convert uint to float ( temp float) +0:18 direct index ( temp uint) +0:18 'result' ( temp 4-component vector of uint) +0:18 Constant: +0:18 0 (const int) +0:18 Convert uint to float ( temp float) +0:18 direct index ( temp uint) +0:18 'result' ( temp 4-component vector of uint) +0:18 Constant: +0:18 1 (const int) +0:18 Convert uint to float ( temp float) +0:18 'c1' ( temp uint) +0:18 Convert uint to float ( temp float) +0:18 'c2' ( temp uint) +0:7 Function Definition: main( ( temp void) +0:7 Function Parameters: +0:? Sequence +0:7 move second child to first child ( temp uint) +0:? 'pos' ( temp uint) +0:? 'pos' (layout( location=0) in uint) +0:7 move second child to first child ( temp 4-component vector of float) +0:? '@entryPointOutput' (layout( location=0) out 4-component vector of float) +0:7 Function Call: @main(u1; ( temp 4-component vector of float) +0:? 'pos' ( temp uint) +0:? Linker Objects +0:? 'sbuf_rw_i' (layout( row_major std430) buffer block{layout( row_major std430) buffer implicitly-sized array of 4-component vector of uint @data}) +0:? 'sbuf_rw_i@count' (layout( row_major std430) buffer block{layout( row_major std430) buffer int @count}) +0:? 'sbuf_rw_d' (layout( row_major std430) buffer block{layout( row_major std430) buffer implicitly-sized array of 4-component vector of uint @data}) +0:? 'sbuf_rw_d@count' (layout( row_major std430) buffer block{layout( row_major std430) buffer int @count}) +0:? 'sbuf_rw_nocounter' (layout( row_major std430) buffer block{layout( row_major std430) buffer implicitly-sized array of 4-component vector of uint @data}) +0:? '@entryPointOutput' (layout( location=0) out 4-component vector of float) +0:? 'pos' (layout( location=0) in uint) + + +Linked fragment stage: + + +Shader version: 500 +gl_FragCoord origin is upper left +0:? Sequence +0:7 Function Definition: @main(u1; ( temp 4-component vector of float) +0:7 Function Parameters: +0:7 'pos' ( in uint) +0:? Sequence +0:8 Sequence +0:8 move second child to first child ( temp 4-component vector of uint) +0:8 'result' ( temp 4-component vector of uint) +0:8 Constant: +0:8 0 (const uint) +0:8 0 (const uint) +0:8 0 (const uint) +0:8 0 (const uint) +0:10 direct index (layout( row_major std430) buffer 4-component vector of uint) +0:10 @data: direct index for structure (layout( row_major std430) buffer implicitly-sized array of 4-component vector of uint) +0:10 'sbuf_rw_i' (layout( row_major std430) buffer block{layout( row_major std430) buffer implicitly-sized array of 4-component vector of uint @data}) +0:10 Constant: +0:10 0 (const uint) +0:10 Constant: +0:10 7 (const int) +0:11 direct index (layout( row_major std430) buffer 4-component vector of uint) +0:11 @data: direct index for structure (layout( row_major std430) buffer implicitly-sized array of 4-component vector of uint) +0:11 'sbuf_rw_d' (layout( row_major std430) buffer block{layout( row_major std430) buffer implicitly-sized array of 4-component vector of uint @data}) +0:11 Constant: +0:11 0 (const uint) +0:11 Constant: +0:11 7 (const int) +0:13 move second child to first child ( temp 4-component vector of uint) +0:13 direct index (layout( row_major std430) buffer 4-component vector of uint) +0:13 @data: direct index for structure (layout( row_major std430) buffer implicitly-sized array of 4-component vector of uint) +0:13 'sbuf_rw_nocounter' (layout( row_major std430) buffer block{layout( row_major std430) buffer implicitly-sized array of 4-component vector of uint @data}) +0:13 Constant: +0:13 0 (const uint) +0:13 Constant: +0:13 5 (const int) +0:13 Constant: +0:13 2 (const uint) +0:13 2 (const uint) +0:13 2 (const uint) +0:13 2 (const uint) +0:15 Sequence +0:15 move second child to first child ( temp uint) +0:15 'c1' ( temp uint) +0:15 AtomicAdd ( temp uint) +0:15 @count: direct index for structure ( temp int) +0:15 'sbuf_rw_i@count' (layout( row_major std430) buffer block{layout( row_major std430) buffer int @count}) +0:15 Constant: +0:15 0 (const int) +0:15 Constant: +0:15 1 (const int) +0:16 Sequence +0:16 move second child to first child ( temp uint) +0:16 'c2' ( temp uint) +0:16 AtomicAdd ( temp uint) +0:16 @count: direct index for structure ( temp int) +0:16 'sbuf_rw_d@count' (layout( row_major std430) buffer block{layout( row_major std430) buffer int @count}) +0:16 Constant: +0:16 0 (const int) +0:16 Constant: +0:16 -1 (const int) +0:18 Branch: Return with expression +0:? Construct vec4 ( temp 4-component vector of float) +0:18 Convert uint to float ( temp float) +0:18 direct index ( temp uint) +0:18 'result' ( temp 4-component vector of uint) +0:18 Constant: +0:18 0 (const int) +0:18 Convert uint to float ( temp float) +0:18 direct index ( temp uint) +0:18 'result' ( temp 4-component vector of uint) +0:18 Constant: +0:18 1 (const int) +0:18 Convert uint to float ( temp float) +0:18 'c1' ( temp uint) +0:18 Convert uint to float ( temp float) +0:18 'c2' ( temp uint) +0:7 Function Definition: main( ( temp void) +0:7 Function Parameters: +0:? Sequence +0:7 move second child to first child ( temp uint) +0:? 'pos' ( temp uint) +0:? 'pos' (layout( location=0) in uint) +0:7 move second child to first child ( temp 4-component vector of float) +0:? '@entryPointOutput' (layout( location=0) out 4-component vector of float) +0:7 Function Call: @main(u1; ( temp 4-component vector of float) +0:? 'pos' ( temp uint) +0:? Linker Objects +0:? 'sbuf_rw_i' (layout( row_major std430) buffer block{layout( row_major std430) buffer implicitly-sized array of 4-component vector of uint @data}) +0:? 'sbuf_rw_i@count' (layout( row_major std430) buffer block{layout( row_major std430) buffer int @count}) +0:? 'sbuf_rw_d' (layout( row_major std430) buffer block{layout( row_major std430) buffer implicitly-sized array of 4-component vector of uint @data}) +0:? 'sbuf_rw_d@count' (layout( row_major std430) buffer block{layout( row_major std430) buffer int @count}) +0:? 'sbuf_rw_nocounter' (layout( row_major std430) buffer block{layout( row_major std430) buffer implicitly-sized array of 4-component vector of uint @data}) +0:? '@entryPointOutput' (layout( location=0) out 4-component vector of float) +0:? 'pos' (layout( location=0) in uint) + +// Module Version 10000 +// Generated by (magic number): 80001 +// Id's are bound by 69 + + Capability Shader + 1: ExtInstImport "GLSL.std.450" + MemoryModel Logical GLSL450 + EntryPoint Fragment 4 "main" 62 65 + ExecutionMode 4 OriginUpperLeft + Source HLSL 500 + Name 4 "main" + Name 12 "@main(u1;" + Name 11 "pos" + Name 16 "result" + Name 20 "sbuf_rw_i" + MemberName 20(sbuf_rw_i) 0 "@data" + Name 22 "sbuf_rw_i" + Name 26 "sbuf_rw_d" + Name 27 "sbuf_rw_nocounter" + Name 33 "c1" + Name 34 "sbuf_rw_i@count" + MemberName 34(sbuf_rw_i@count) 0 "@count" + Name 36 "sbuf_rw_i@count" + Name 42 "c2" + Name 43 "sbuf_rw_d@count" + Name 60 "pos" + Name 62 "pos" + Name 65 "@entryPointOutput" + Name 66 "param" + Decorate 19 ArrayStride 16 + MemberDecorate 20(sbuf_rw_i) 0 Offset 0 + Decorate 20(sbuf_rw_i) BufferBlock + Decorate 22(sbuf_rw_i) DescriptorSet 0 + Decorate 26(sbuf_rw_d) DescriptorSet 0 + Decorate 27(sbuf_rw_nocounter) DescriptorSet 0 + MemberDecorate 34(sbuf_rw_i@count) 0 Offset 0 + Decorate 34(sbuf_rw_i@count) BufferBlock + Decorate 36(sbuf_rw_i@count) DescriptorSet 0 + Decorate 43(sbuf_rw_d@count) DescriptorSet 0 + Decorate 62(pos) Location 0 + Decorate 65(@entryPointOutput) Location 0 + 2: TypeVoid + 3: TypeFunction 2 + 6: TypeInt 32 0 + 7: TypePointer Function 6(int) + 8: TypeFloat 32 + 9: TypeVector 8(float) 4 + 10: TypeFunction 9(fvec4) 7(ptr) + 14: TypeVector 6(int) 4 + 15: TypePointer Function 14(ivec4) + 17: 6(int) Constant 0 + 18: 14(ivec4) ConstantComposite 17 17 17 17 + 19: TypeRuntimeArray 14(ivec4) + 20(sbuf_rw_i): TypeStruct 19 + 21: TypePointer Uniform 20(sbuf_rw_i) + 22(sbuf_rw_i): 21(ptr) Variable Uniform + 23: TypeInt 32 1 + 24: 23(int) Constant 0 + 25: 23(int) Constant 7 + 26(sbuf_rw_d): 21(ptr) Variable Uniform +27(sbuf_rw_nocounter): 21(ptr) Variable Uniform + 28: 23(int) Constant 5 + 29: 6(int) Constant 2 + 30: 14(ivec4) ConstantComposite 29 29 29 29 + 31: TypePointer Uniform 14(ivec4) +34(sbuf_rw_i@count): TypeStruct 23(int) + 35: TypePointer Uniform 34(sbuf_rw_i@count) +36(sbuf_rw_i@count): 35(ptr) Variable Uniform + 37: TypePointer Uniform 23(int) + 39: 23(int) Constant 1 + 40: 6(int) Constant 1 +43(sbuf_rw_d@count): 35(ptr) Variable Uniform + 45: 23(int) Constant 4294967295 + 61: TypePointer Input 6(int) + 62(pos): 61(ptr) Variable Input + 64: TypePointer Output 9(fvec4) +65(@entryPointOutput): 64(ptr) Variable Output + 4(main): 2 Function None 3 + 5: Label + 60(pos): 7(ptr) Variable Function + 66(param): 7(ptr) Variable Function + 63: 6(int) Load 62(pos) + Store 60(pos) 63 + 67: 6(int) Load 60(pos) + Store 66(param) 67 + 68: 9(fvec4) FunctionCall 12(@main(u1;) 66(param) + Store 65(@entryPointOutput) 68 + Return + FunctionEnd + 12(@main(u1;): 9(fvec4) Function None 10 + 11(pos): 7(ptr) FunctionParameter + 13: Label + 16(result): 15(ptr) Variable Function + 33(c1): 7(ptr) Variable Function + 42(c2): 7(ptr) Variable Function + Store 16(result) 18 + 32: 31(ptr) AccessChain 27(sbuf_rw_nocounter) 24 28 + Store 32 30 + 38: 37(ptr) AccessChain 36(sbuf_rw_i@count) 24 + 41: 6(int) AtomicIAdd 38 40 17 39 + Store 33(c1) 41 + 44: 37(ptr) AccessChain 43(sbuf_rw_d@count) 24 + 46: 6(int) AtomicIAdd 44 40 17 45 + Store 42(c2) 46 + 47: 7(ptr) AccessChain 16(result) 17 + 48: 6(int) Load 47 + 49: 8(float) ConvertUToF 48 + 50: 7(ptr) AccessChain 16(result) 40 + 51: 6(int) Load 50 + 52: 8(float) ConvertUToF 51 + 53: 6(int) Load 33(c1) + 54: 8(float) ConvertUToF 53 + 55: 6(int) Load 42(c2) + 56: 8(float) ConvertUToF 55 + 57: 9(fvec4) CompositeConstruct 49 52 54 56 + ReturnValue 57 + FunctionEnd diff --git a/Test/hlsl.structbuffer.incdec.frag b/Test/hlsl.structbuffer.incdec.frag new file mode 100644 index 000000000..108dcb610 --- /dev/null +++ b/Test/hlsl.structbuffer.incdec.frag @@ -0,0 +1,19 @@ +RWStructuredBuffer sbuf_rw_i; +RWStructuredBuffer sbuf_rw_d; + +RWStructuredBuffer sbuf_rw_nocounter; // doesn't use inc or dec + +float4 main(uint pos : FOO) : SV_Target0 +{ + uint4 result = 0; + + sbuf_rw_i[7]; + sbuf_rw_d[7]; + + sbuf_rw_nocounter[5] = 2; + + uint c1 = sbuf_rw_i.IncrementCounter(); + uint c2 = sbuf_rw_d.DecrementCounter(); + + return float4(result.x, result.y, c1, c2); +} diff --git a/glslang/Include/BaseTypes.h b/glslang/Include/BaseTypes.h index a805a0603..abac5122d 100644 --- a/glslang/Include/BaseTypes.h +++ b/glslang/Include/BaseTypes.h @@ -223,6 +223,13 @@ enum TBuiltInVariable { EbvOutputPatch, EbvInputPatch, + // structbuffer types + EbvAppendConsume, // no need to differentiate append and consume + EbvRWStructuredBuffer, + EbvStructuredBuffer, + EbvByteAddressBuffer, + EbvRWByteAddressBuffer, + EbvLast }; diff --git a/glslang/Include/intermediate.h b/glslang/Include/intermediate.h index 934994d9a..60b0883be 100644 --- a/glslang/Include/intermediate.h +++ b/glslang/Include/intermediate.h @@ -633,6 +633,10 @@ enum TOperator { EOpMethodStore2, // ... EOpMethodStore3, // ... EOpMethodStore4, // ... + EOpMethodIncrementCounter, // ... + EOpMethodDecrementCounter, // ... + // EOpMethodAppend is defined for geo shaders below + EOpMethodConsume, // SM5 texture methods EOpMethodGatherRed, // These are covered under the above EOpMethodSample comment about diff --git a/gtests/Hlsl.FromFile.cpp b/gtests/Hlsl.FromFile.cpp index c616dfdfd..13c748458 100644 --- a/gtests/Hlsl.FromFile.cpp +++ b/gtests/Hlsl.FromFile.cpp @@ -248,6 +248,7 @@ INSTANTIATE_TEST_CASE_P( {"hlsl.structbuffer.atomics.frag", "main"}, {"hlsl.structbuffer.byte.frag", "main"}, {"hlsl.structbuffer.coherent.frag", "main"}, + {"hlsl.structbuffer.incdec.frag", "main"}, {"hlsl.structbuffer.fn.frag", "main"}, {"hlsl.structbuffer.rw.frag", "main"}, {"hlsl.structbuffer.rwbyte.frag", "main"}, diff --git a/hlsl/hlslGrammar.cpp b/hlsl/hlslGrammar.cpp index f58f74204..72b58d167 100755 --- a/hlsl/hlslGrammar.cpp +++ b/hlsl/hlslGrammar.cpp @@ -475,9 +475,10 @@ bool HlslGrammar::acceptDeclaration(TIntermNode*& nodeList) if (variableType.getBasicType() != EbtString && parseContext.getAnnotationNestingLevel() == 0) { if (typedefDecl) parseContext.declareTypedef(idToken.loc, *fullName, variableType); - else if (variableType.getBasicType() == EbtBlock) + else if (variableType.getBasicType() == EbtBlock) { parseContext.declareBlock(idToken.loc, variableType, fullName); - else { + parseContext.declareStructBufferCounter(idToken.loc, variableType, *fullName); + } else { if (variableType.getQualifier().storage == EvqUniform && ! variableType.containsOpaque()) { // this isn't really an individual variable, but a member of the $Global buffer parseContext.growGlobalUniformBlock(idToken.loc, variableType, *fullName); @@ -1955,24 +1956,29 @@ bool HlslGrammar::acceptStructBufferType(TType& type) bool readonly = false; TStorageQualifier storage = EvqBuffer; + TBuiltInVariable builtinType = EbvNone; switch (structBuffType) { case EHTokAppendStructuredBuffer: - unimplemented("AppendStructuredBuffer"); - return false; + builtinType = EbvAppendConsume; + break; case EHTokByteAddressBuffer: hasTemplateType = false; readonly = true; + builtinType = EbvByteAddressBuffer; break; case EHTokConsumeStructuredBuffer: - unimplemented("ConsumeStructuredBuffer"); - return false; + builtinType = EbvAppendConsume; + break; case EHTokRWByteAddressBuffer: hasTemplateType = false; + builtinType = EbvRWByteAddressBuffer; break; case EHTokRWStructuredBuffer: + builtinType = EbvRWStructuredBuffer; break; case EHTokStructuredBuffer: + builtinType = EbvStructuredBuffer; readonly = true; break; default: @@ -2014,8 +2020,6 @@ bool HlslGrammar::acceptStructBufferType(TType& type) // field name is canonical for all structbuffers templateType->setFieldName("@data"); - // Create block type. TODO: hidden internal uint member when needed - TTypeList* blockStruct = new TTypeList; TTypeLoc member = { templateType, token.loc }; blockStruct->push_back(member); @@ -2025,6 +2029,7 @@ bool HlslGrammar::acceptStructBufferType(TType& type) blockType.getQualifier().storage = storage; blockType.getQualifier().readonly = readonly; + blockType.getQualifier().builtIn = builtinType; // We may have created an equivalent type before, in which case we should use its // deep structure. diff --git a/hlsl/hlslParseHelper.cpp b/hlsl/hlslParseHelper.cpp index a2a490ca7..54b22a48d 100755 --- a/hlsl/hlslParseHelper.cpp +++ b/hlsl/hlslParseHelper.cpp @@ -842,7 +842,11 @@ bool HlslParseContext::isStructBufferMethod(const TString& name) const name == "InterlockedMax" || name == "InterlockedMin" || name == "InterlockedOr" || - name == "InterlockedXor"; + name == "InterlockedXor" || + name == "IncrementCounter" || + name == "DecrementCounter" || + name == "Append" || + name == "Consume"; } // @@ -1514,7 +1518,7 @@ void HlslParseContext::handleFunctionDeclarator(const TSourceLoc& loc, TFunction error(loc, "function name is redeclaration of existing name", function.getName().c_str(), ""); } -// Add interstage IO variables to the linkage in canonical order. +// Finalization step: Add interstage IO variables to the linkage in canonical order. void HlslParseContext::addInterstageIoToLinkage() { TSourceLoc loc; @@ -2438,24 +2442,96 @@ TIntermAggregate* HlslParseContext::handleSamplerTextureCombine(const TSourceLoc return txcombine; } +// Return true if this a buffer type that has an associated counter buffer. +bool HlslParseContext::hasStructBuffCounter(const TString& name) const +{ + const auto bivIt = structBufferBuiltIn.find(name); + if (bivIt == structBufferBuiltIn.end()) + return false; + + switch (bivIt->second) { + case EbvAppendConsume: // fall through... + case EbvRWStructuredBuffer: // ... + return true; + default: + return false; // other builtin types do not have. + } +} + +// declare counter for a structured buffer type +void HlslParseContext::declareStructBufferCounter(const TSourceLoc& loc, const TType& bufferType, const TString& name) +{ + // Bail out if not a struct buffer + if (! isStructBufferType(bufferType)) + return; + + if (! hasStructBuffCounter(name)) + return; + + // Counter type + TType* counterType = new TType(EbtInt, EvqBuffer); + counterType->setFieldName("@count"); + + TTypeList* blockStruct = new TTypeList; + TTypeLoc member = { counterType, loc }; + blockStruct->push_back(member); + + TString* blockName = new TString(name); + *blockName += "@count"; + + structBufferCounter[*blockName] = false; + + TType blockType(blockStruct, "", counterType->getQualifier()); + blockType.getQualifier().storage = EvqBuffer; + + shareStructBufferType(blockType); + declareBlock(loc, blockType, blockName); +} + +// return the counter that goes with a given structuredbuffer +TIntermTyped* HlslParseContext::getStructBufferCounter(const TSourceLoc& loc, TIntermTyped* buffer) +{ + // Bail out if not a struct buffer + if (buffer == nullptr || ! isStructBufferType(buffer->getType())) + return nullptr; + + TString blockName(buffer->getAsSymbolNode()->getName()); + blockName += "@count"; + + // Mark the counter as being used + structBufferCounter[blockName] = true; + + TIntermTyped* counterVar = handleVariable(loc, &blockName); // find the block structure + TIntermTyped* index = intermediate.addConstantUnion(0, loc); // index to counter inside block struct + + TIntermTyped* counterMember = intermediate.addIndex(EOpIndexDirectStruct, counterVar, index, loc); + counterMember->setType(TType(EbtInt)); + return counterMember; +} + + // // Decompose structure buffer methods into AST // void HlslParseContext::decomposeStructBufferMethods(const TSourceLoc& loc, TIntermTyped*& node, TIntermNode* arguments) { - if (!node || !node->getAsOperator()) + if (node == nullptr || node->getAsOperator() == nullptr || arguments == nullptr) return; const TOperator op = node->getAsOperator()->getOp(); - TIntermAggregate* argAggregate = arguments ? arguments->getAsAggregate() : nullptr; - if (argAggregate == nullptr) - return; - - if (argAggregate->getSequence().empty()) - return; + TIntermAggregate* argAggregate = arguments->getAsAggregate(); // Buffer is the object upon which method is called, so always arg 0 - TIntermTyped* bufferObj = argAggregate->getSequence()[0]->getAsTyped(); + TIntermTyped* bufferObj = nullptr; + + // The parameters can be an aggregate, or just a the object as a symbol if there are no fn params. + if (argAggregate) { + if (argAggregate->getSequence().empty()) + return; + bufferObj = argAggregate->getSequence()[0]->getAsTyped(); + } else { + bufferObj = arguments->getAsSymbolNode(); + } // Index to obtain the runtime sized array out of the buffer. TIntermTyped* argArray = indexStructBufferContent(loc, bufferObj); @@ -2670,6 +2746,29 @@ void HlslParseContext::decomposeStructBufferMethods(const TSourceLoc& loc, TInte } break; + + case EOpMethodIncrementCounter: + case EOpMethodDecrementCounter: + { + // These methods require a hidden internal counter, obtained via getStructBufferCounter() + TIntermTyped* incrementValue = intermediate.addConstantUnion(op == EOpMethodIncrementCounter ? 1 : -1, loc, true); + TIntermTyped* counter = getStructBufferCounter(loc, bufferObj); // obtain the counter member + + node = incrementValue; + + if (counter == nullptr) + break; + + TIntermAggregate* counterIncrement = new TIntermAggregate(EOpAtomicAdd); + counterIncrement->setType(TType(EbtUint, EvqTemporary)); + counterIncrement->setLoc(loc); + counterIncrement->getSequence().push_back(counter); + counterIncrement->getSequence().push_back(incrementValue); + + node = counterIncrement; + } + break; + default: break; // most pass through unchanged } @@ -3978,10 +4077,18 @@ TIntermTyped* HlslParseContext::handleFunctionCall(const TSourceLoc& loc, TFunct // TODO: this needs improvement: there's no way at present to look up a signature in // the symbol table for an arbitrary type. This is a temporary hack until that ability exists. // It will have false positives, since it doesn't check arg counts or types. - if (arguments && arguments->getAsAggregate()) { - const TIntermSequence& sequence = arguments->getAsAggregate()->getSequence(); + if (arguments) { + // Check if first argument is struct buffer type. It may be an aggregate or a symbol, so we + // look for either case. - if (!sequence.empty() && isStructBufferType(sequence[0]->getAsTyped()->getType())) { + TIntermTyped* arg0 = nullptr; + + if (arguments->getAsAggregate() && arguments->getAsAggregate()->getSequence().size() > 0) + arg0 = arguments->getAsAggregate()->getSequence()[0]->getAsTyped(); + else if (arguments->getAsSymbolNode()) + arg0 = arguments->getAsSymbolNode(); + + if (arg0 != nullptr && isStructBufferType(arg0->getType())) { static const int methodPrefixSize = sizeof(BUILTIN_PREFIX)-1; if (function->getName().length() > methodPrefixSize && @@ -5845,8 +5952,9 @@ const TFunction* HlslParseContext::findFunction(const TSourceLoc& loc, TFunction // These builtin ops can accept any type, so we bypass the argument selection if (candidateList.size() == 1 && builtIn && (candidateList[0]->getBuiltInOp() == EOpMethodAppend || - candidateList[0]->getBuiltInOp() == EOpMethodRestartStrip)) { - + candidateList[0]->getBuiltInOp() == EOpMethodRestartStrip || + candidateList[0]->getBuiltInOp() == EOpMethodIncrementCounter || + candidateList[0]->getBuiltInOp() == EOpMethodDecrementCounter)) { return candidateList[0]; } @@ -6856,6 +6964,10 @@ void HlslParseContext::declareBlock(const TSourceLoc& loc, TType& type, const TS switch (type.getQualifier().storage) { case EvqUniform: case EvqBuffer: + // remember pre-sanitized builtin type + if (type.getQualifier().storage == EvqBuffer && instanceName != nullptr) + structBufferBuiltIn[*instanceName] = type.getQualifier().builtIn; + correctUniform(type.getQualifier()); break; case EvqVaryingIn: @@ -7670,7 +7782,7 @@ TIntermSymbol* HlslParseContext::findLinkageSymbol(TBuiltInVariable biType) cons return intermediate.addSymbol(*it->second->getAsVariable()); } -// Add patch constant function invocation +// Finalization step: Add patch constant function invocation void HlslParseContext::addPatchConstantInvocation() { TSourceLoc loc; @@ -8039,9 +8151,23 @@ void HlslParseContext::addPatchConstantInvocation() epBodySeq.insert(epBodySeq.end(), invocationIdTest); } +// Finalization step: remove unused buffer blocks from linkage (we don't know until the +// shader is entirely compiled) +void HlslParseContext::removeUnusedStructBufferCounters() +{ + const auto endIt = std::remove_if(linkageSymbols.begin(), linkageSymbols.end(), + [this](const TSymbol* sym) { + const auto sbcIt = structBufferCounter.find(sym->getName()); + return sbcIt != structBufferCounter.end() && !sbcIt->second; + }); + + linkageSymbols.erase(endIt, linkageSymbols.end()); +} + // post-processing void HlslParseContext::finish() { + removeUnusedStructBufferCounters(); addPatchConstantInvocation(); addInterstageIoToLinkage(); diff --git a/hlsl/hlslParseHelper.h b/hlsl/hlslParseHelper.h index 947fef771..5c6ddbd85 100755 --- a/hlsl/hlslParseHelper.h +++ b/hlsl/hlslParseHelper.h @@ -146,6 +146,7 @@ public: TIntermTyped* constructAggregate(TIntermNode*, const TType&, int, const TSourceLoc&); TIntermTyped* constructBuiltIn(const TType&, TOperator, TIntermTyped*, const TSourceLoc&, bool subset); void declareBlock(const TSourceLoc&, TType&, const TString* instanceName = 0, TArraySizes* arraySizes = 0); + void declareStructBufferCounter(const TSourceLoc& loc, const TType& bufferType, const TString& name); void fixBlockLocations(const TSourceLoc&, TQualifier&, TTypeList&, bool memberWithLocation, bool memberWithoutLocation); void fixBlockXfbOffsets(TQualifier&, TTypeList&); void fixBlockUniformOffsets(const TQualifier&, TTypeList&); @@ -274,11 +275,19 @@ protected: TType* getStructBufferContentType(const TType& type) const; bool isStructBufferType(const TType& type) const { return getStructBufferContentType(type) != nullptr; } TIntermTyped* indexStructBufferContent(const TSourceLoc& loc, TIntermTyped* buffer) const; + TIntermTyped* getStructBufferCounter(const TSourceLoc& loc, TIntermTyped* buffer); // Return true if this type is a reference. This is not currently a type method in case that's // a language specific answer. bool isReference(const TType& type) const { return isStructBufferType(type); } + // Return true if this a buffer type that has an associated counter buffer. + bool hasStructBuffCounter(const TString& name) const; + + // Finalization step: remove unused buffer blocks from linkage (we don't know until the + // shader is entirely compiled) + void removeUnusedStructBufferCounters(); + // Pass through to base class after remembering builtin mappings. using TParseContextBase::trackLinkage; void trackLinkage(TSymbol& variable) override; @@ -366,6 +375,9 @@ protected: // Structuredbuffer shared types. Typically there are only a few. TVector structBufferTypes; + + TMap structBufferBuiltIn; + TMap structBufferCounter; // The builtin interstage IO map considers e.g, EvqPosition on input and output separately, so that we // can build the linkage correctly if position appears on both sides. Otherwise, multiple positions diff --git a/hlsl/hlslParseables.cpp b/hlsl/hlslParseables.cpp index c77b54147..52c0629c2 100755 --- a/hlsl/hlslParseables.cpp +++ b/hlsl/hlslParseables.cpp @@ -871,6 +871,8 @@ void TBuiltInParseablesHlsl::initialize(int /*version*/, EProfile /*profile*/, c { "InterlockedMin", nullptr, nullptr, "-", "-", EShLangAll, true }, { "InterlockedOr", nullptr, nullptr, "-", "-", EShLangAll, true }, { "InterlockedXor", nullptr, nullptr, "-", "-", EShLangAll, true }, + { "IncrementCounter", nullptr, nullptr, "-", "-", EShLangAll, true }, + { "DecrementCounter", nullptr, nullptr, "-", "-", EShLangAll, true }, // Mark end of list, since we want to avoid a range-based for, as some compilers don't handle it yet. { nullptr, nullptr, nullptr, nullptr, nullptr, 0, false }, @@ -1180,6 +1182,10 @@ void TBuiltInParseablesHlsl::identifyBuiltIns(int /*version*/, EProfile /*profil symbolTable.relateToOperator(BUILTIN_PREFIX "Store2", EOpMethodStore2); symbolTable.relateToOperator(BUILTIN_PREFIX "Store3", EOpMethodStore3); symbolTable.relateToOperator(BUILTIN_PREFIX "Store4", EOpMethodStore4); + symbolTable.relateToOperator(BUILTIN_PREFIX "IncrementCounter", EOpMethodIncrementCounter); + symbolTable.relateToOperator(BUILTIN_PREFIX "DecrementCounter", EOpMethodDecrementCounter); + symbolTable.relateToOperator(BUILTIN_PREFIX "Append", EOpMethodAppend); + symbolTable.relateToOperator(BUILTIN_PREFIX "Consume", EOpMethodConsume); symbolTable.relateToOperator(BUILTIN_PREFIX "InterlockedAdd", EOpInterlockedAdd); symbolTable.relateToOperator(BUILTIN_PREFIX "InterlockedAnd", EOpInterlockedAnd); From 12bc9aa9ce70faa37205c30f3e04b37cc1b0e689 Mon Sep 17 00:00:00 2001 From: steve-lunarg Date: Thu, 13 Apr 2017 18:42:58 -0600 Subject: [PATCH 2/3] WIP: HLSL: add Append/ConsumeBuffer support --- .../hlsl.structbuffer.append.frag.out | 223 ++++++++++++++++++ .../hlsl.structbuffer.incdec.frag.out | 89 +++---- Test/hlsl.structbuffer.append.frag | 11 + gtests/Hlsl.FromFile.cpp | 1 + hlsl/hlslParseHelper.cpp | 97 ++++++-- hlsl/hlslParseables.cpp | 3 +- 6 files changed, 361 insertions(+), 63 deletions(-) create mode 100644 Test/baseResults/hlsl.structbuffer.append.frag.out create mode 100644 Test/hlsl.structbuffer.append.frag diff --git a/Test/baseResults/hlsl.structbuffer.append.frag.out b/Test/baseResults/hlsl.structbuffer.append.frag.out new file mode 100644 index 000000000..53a46d5fa --- /dev/null +++ b/Test/baseResults/hlsl.structbuffer.append.frag.out @@ -0,0 +1,223 @@ +hlsl.structbuffer.append.frag +Shader version: 500 +gl_FragCoord origin is upper left +0:? Sequence +0:7 Function Definition: @main(u1; ( temp 4-component vector of float) +0:7 Function Parameters: +0:7 'pos' ( in uint) +0:? Sequence +0:8 move second child to first child ( temp void) +0:8 indirect index (layout( row_major std430) buffer 4-component vector of float) +0:8 @data: direct index for structure (layout( row_major std430) buffer implicitly-sized array of 4-component vector of float) +0:8 'sbuf_a' (layout( row_major std430) buffer block{layout( row_major std430) buffer implicitly-sized array of 4-component vector of float @data}) +0:8 Constant: +0:8 0 (const uint) +0:8 AtomicAdd ( temp uint) +0:8 @count: direct index for structure ( temp int) +0:8 'sbuf_a@count' (layout( row_major std430) buffer block{layout( row_major std430) buffer int @count}) +0:8 Constant: +0:8 0 (const int) +0:8 Constant: +0:8 1 (const int) +0:? Constant: +0:? 1.000000 +0:? 2.000000 +0:? 3.000000 +0:? 4.000000 +0:10 Branch: Return with expression +0:10 indirect index (layout( row_major std430) buffer 4-component vector of float) +0:10 @data: direct index for structure (layout( row_major std430) buffer implicitly-sized array of 4-component vector of float) +0:10 'sbuf_c' (layout( row_major std430) buffer block{layout( row_major std430) buffer implicitly-sized array of 4-component vector of float @data}) +0:10 Constant: +0:10 0 (const uint) +0:10 add ( temp uint) +0:10 AtomicAdd ( temp uint) +0:10 @count: direct index for structure ( temp int) +0:10 'sbuf_c@count' (layout( row_major std430) buffer block{layout( row_major std430) buffer int @count}) +0:10 Constant: +0:10 0 (const int) +0:10 Constant: +0:10 -1 (const int) +0:10 Constant: +0:10 -1 (const int) +0:7 Function Definition: main( ( temp void) +0:7 Function Parameters: +0:? Sequence +0:7 move second child to first child ( temp uint) +0:? 'pos' ( temp uint) +0:? 'pos' (layout( location=0) in uint) +0:7 move second child to first child ( temp 4-component vector of float) +0:? '@entryPointOutput' (layout( location=0) out 4-component vector of float) +0:7 Function Call: @main(u1; ( temp 4-component vector of float) +0:? 'pos' ( temp uint) +0:? Linker Objects +0:? 'sbuf_a' (layout( row_major std430) buffer block{layout( row_major std430) buffer implicitly-sized array of 4-component vector of float @data}) +0:? 'sbuf_a@count' (layout( row_major std430) buffer block{layout( row_major std430) buffer int @count}) +0:? 'sbuf_c' (layout( row_major std430) buffer block{layout( row_major std430) buffer implicitly-sized array of 4-component vector of float @data}) +0:? 'sbuf_c@count' (layout( row_major std430) buffer block{layout( row_major std430) buffer int @count}) +0:? 'sbuf_unused' (layout( row_major std430) buffer block{layout( row_major std430) buffer implicitly-sized array of 4-component vector of float @data}) +0:? '@entryPointOutput' (layout( location=0) out 4-component vector of float) +0:? 'pos' (layout( location=0) in uint) + + +Linked fragment stage: + + +Shader version: 500 +gl_FragCoord origin is upper left +0:? Sequence +0:7 Function Definition: @main(u1; ( temp 4-component vector of float) +0:7 Function Parameters: +0:7 'pos' ( in uint) +0:? Sequence +0:8 move second child to first child ( temp void) +0:8 indirect index (layout( row_major std430) buffer 4-component vector of float) +0:8 @data: direct index for structure (layout( row_major std430) buffer implicitly-sized array of 4-component vector of float) +0:8 'sbuf_a' (layout( row_major std430) buffer block{layout( row_major std430) buffer implicitly-sized array of 4-component vector of float @data}) +0:8 Constant: +0:8 0 (const uint) +0:8 AtomicAdd ( temp uint) +0:8 @count: direct index for structure ( temp int) +0:8 'sbuf_a@count' (layout( row_major std430) buffer block{layout( row_major std430) buffer int @count}) +0:8 Constant: +0:8 0 (const int) +0:8 Constant: +0:8 1 (const int) +0:? Constant: +0:? 1.000000 +0:? 2.000000 +0:? 3.000000 +0:? 4.000000 +0:10 Branch: Return with expression +0:10 indirect index (layout( row_major std430) buffer 4-component vector of float) +0:10 @data: direct index for structure (layout( row_major std430) buffer implicitly-sized array of 4-component vector of float) +0:10 'sbuf_c' (layout( row_major std430) buffer block{layout( row_major std430) buffer implicitly-sized array of 4-component vector of float @data}) +0:10 Constant: +0:10 0 (const uint) +0:10 add ( temp uint) +0:10 AtomicAdd ( temp uint) +0:10 @count: direct index for structure ( temp int) +0:10 'sbuf_c@count' (layout( row_major std430) buffer block{layout( row_major std430) buffer int @count}) +0:10 Constant: +0:10 0 (const int) +0:10 Constant: +0:10 -1 (const int) +0:10 Constant: +0:10 -1 (const int) +0:7 Function Definition: main( ( temp void) +0:7 Function Parameters: +0:? Sequence +0:7 move second child to first child ( temp uint) +0:? 'pos' ( temp uint) +0:? 'pos' (layout( location=0) in uint) +0:7 move second child to first child ( temp 4-component vector of float) +0:? '@entryPointOutput' (layout( location=0) out 4-component vector of float) +0:7 Function Call: @main(u1; ( temp 4-component vector of float) +0:? 'pos' ( temp uint) +0:? Linker Objects +0:? 'sbuf_a' (layout( row_major std430) buffer block{layout( row_major std430) buffer implicitly-sized array of 4-component vector of float @data}) +0:? 'sbuf_a@count' (layout( row_major std430) buffer block{layout( row_major std430) buffer int @count}) +0:? 'sbuf_c' (layout( row_major std430) buffer block{layout( row_major std430) buffer implicitly-sized array of 4-component vector of float @data}) +0:? 'sbuf_c@count' (layout( row_major std430) buffer block{layout( row_major std430) buffer int @count}) +0:? 'sbuf_unused' (layout( row_major std430) buffer block{layout( row_major std430) buffer implicitly-sized array of 4-component vector of float @data}) +0:? '@entryPointOutput' (layout( location=0) out 4-component vector of float) +0:? 'pos' (layout( location=0) in uint) + +// Module Version 10000 +// Generated by (magic number): 80001 +// Id's are bound by 56 + + Capability Shader + 1: ExtInstImport "GLSL.std.450" + MemoryModel Logical GLSL450 + EntryPoint Fragment 4 "main" 48 51 + ExecutionMode 4 OriginUpperLeft + Source HLSL 500 + Name 4 "main" + Name 12 "@main(u1;" + Name 11 "pos" + Name 15 "sbuf_a" + MemberName 15(sbuf_a) 0 "@data" + Name 17 "sbuf_a" + Name 20 "sbuf_a@count" + MemberName 20(sbuf_a@count) 0 "@count" + Name 22 "sbuf_a@count" + Name 36 "sbuf_c" + Name 37 "sbuf_c@count" + Name 46 "pos" + Name 48 "pos" + Name 51 "@entryPointOutput" + Name 52 "param" + Name 55 "sbuf_unused" + Decorate 14 ArrayStride 16 + MemberDecorate 15(sbuf_a) 0 Offset 0 + Decorate 15(sbuf_a) BufferBlock + Decorate 17(sbuf_a) DescriptorSet 0 + MemberDecorate 20(sbuf_a@count) 0 Offset 0 + Decorate 20(sbuf_a@count) BufferBlock + Decorate 22(sbuf_a@count) DescriptorSet 0 + Decorate 36(sbuf_c) DescriptorSet 0 + Decorate 37(sbuf_c@count) DescriptorSet 0 + Decorate 48(pos) Location 0 + Decorate 51(@entryPointOutput) Location 0 + Decorate 55(sbuf_unused) DescriptorSet 0 + 2: TypeVoid + 3: TypeFunction 2 + 6: TypeInt 32 0 + 7: TypePointer Function 6(int) + 8: TypeFloat 32 + 9: TypeVector 8(float) 4 + 10: TypeFunction 9(fvec4) 7(ptr) + 14: TypeRuntimeArray 9(fvec4) + 15(sbuf_a): TypeStruct 14 + 16: TypePointer Uniform 15(sbuf_a) + 17(sbuf_a): 16(ptr) Variable Uniform + 18: TypeInt 32 1 + 19: 18(int) Constant 0 +20(sbuf_a@count): TypeStruct 18(int) + 21: TypePointer Uniform 20(sbuf_a@count) +22(sbuf_a@count): 21(ptr) Variable Uniform + 23: TypePointer Uniform 18(int) + 25: 18(int) Constant 1 + 26: 6(int) Constant 1 + 27: 6(int) Constant 0 + 29: 8(float) Constant 1065353216 + 30: 8(float) Constant 1073741824 + 31: 8(float) Constant 1077936128 + 32: 8(float) Constant 1082130432 + 33: 9(fvec4) ConstantComposite 29 30 31 32 + 34: TypePointer Uniform 9(fvec4) + 36(sbuf_c): 16(ptr) Variable Uniform +37(sbuf_c@count): 21(ptr) Variable Uniform + 39: 18(int) Constant 4294967295 + 47: TypePointer Input 6(int) + 48(pos): 47(ptr) Variable Input + 50: TypePointer Output 9(fvec4) +51(@entryPointOutput): 50(ptr) Variable Output + 55(sbuf_unused): 16(ptr) Variable Uniform + 4(main): 2 Function None 3 + 5: Label + 46(pos): 7(ptr) Variable Function + 52(param): 7(ptr) Variable Function + 49: 6(int) Load 48(pos) + Store 46(pos) 49 + 53: 6(int) Load 46(pos) + Store 52(param) 53 + 54: 9(fvec4) FunctionCall 12(@main(u1;) 52(param) + Store 51(@entryPointOutput) 54 + Return + FunctionEnd + 12(@main(u1;): 9(fvec4) Function None 10 + 11(pos): 7(ptr) FunctionParameter + 13: Label + 24: 23(ptr) AccessChain 22(sbuf_a@count) 19 + 28: 6(int) AtomicIAdd 24 26 27 25 + 35: 34(ptr) AccessChain 17(sbuf_a) 19 28 + Store 35 33 + 38: 23(ptr) AccessChain 37(sbuf_c@count) 19 + 40: 6(int) AtomicIAdd 38 26 27 39 + 41: 6(int) IAdd 40 39 + 42: 34(ptr) AccessChain 36(sbuf_c) 19 41 + 43: 9(fvec4) Load 42 + ReturnValue 43 + FunctionEnd diff --git a/Test/baseResults/hlsl.structbuffer.incdec.frag.out b/Test/baseResults/hlsl.structbuffer.incdec.frag.out index dc68d9444..ebd694168 100644 --- a/Test/baseResults/hlsl.structbuffer.incdec.frag.out +++ b/Test/baseResults/hlsl.structbuffer.incdec.frag.out @@ -54,11 +54,14 @@ gl_FragCoord origin is upper left 0:16 Sequence 0:16 move second child to first child ( temp uint) 0:16 'c2' ( temp uint) -0:16 AtomicAdd ( temp uint) -0:16 @count: direct index for structure ( temp int) -0:16 'sbuf_rw_d@count' (layout( row_major std430) buffer block{layout( row_major std430) buffer int @count}) +0:16 add ( temp uint) +0:16 AtomicAdd ( temp uint) +0:16 @count: direct index for structure ( temp int) +0:16 'sbuf_rw_d@count' (layout( row_major std430) buffer block{layout( row_major std430) buffer int @count}) +0:16 Constant: +0:16 0 (const int) 0:16 Constant: -0:16 0 (const int) +0:16 -1 (const int) 0:16 Constant: 0:16 -1 (const int) 0:18 Branch: Return with expression @@ -155,11 +158,14 @@ gl_FragCoord origin is upper left 0:16 Sequence 0:16 move second child to first child ( temp uint) 0:16 'c2' ( temp uint) -0:16 AtomicAdd ( temp uint) -0:16 @count: direct index for structure ( temp int) -0:16 'sbuf_rw_d@count' (layout( row_major std430) buffer block{layout( row_major std430) buffer int @count}) +0:16 add ( temp uint) +0:16 AtomicAdd ( temp uint) +0:16 @count: direct index for structure ( temp int) +0:16 'sbuf_rw_d@count' (layout( row_major std430) buffer block{layout( row_major std430) buffer int @count}) +0:16 Constant: +0:16 0 (const int) 0:16 Constant: -0:16 0 (const int) +0:16 -1 (const int) 0:16 Constant: 0:16 -1 (const int) 0:18 Branch: Return with expression @@ -199,12 +205,12 @@ gl_FragCoord origin is upper left // Module Version 10000 // Generated by (magic number): 80001 -// Id's are bound by 69 +// Id's are bound by 70 Capability Shader 1: ExtInstImport "GLSL.std.450" MemoryModel Logical GLSL450 - EntryPoint Fragment 4 "main" 62 65 + EntryPoint Fragment 4 "main" 63 66 ExecutionMode 4 OriginUpperLeft Source HLSL 500 Name 4 "main" @@ -222,10 +228,10 @@ gl_FragCoord origin is upper left Name 36 "sbuf_rw_i@count" Name 42 "c2" Name 43 "sbuf_rw_d@count" - Name 60 "pos" - Name 62 "pos" - Name 65 "@entryPointOutput" - Name 66 "param" + Name 61 "pos" + Name 63 "pos" + Name 66 "@entryPointOutput" + Name 67 "param" Decorate 19 ArrayStride 16 MemberDecorate 20(sbuf_rw_i) 0 Offset 0 Decorate 20(sbuf_rw_i) BufferBlock @@ -236,8 +242,8 @@ gl_FragCoord origin is upper left Decorate 34(sbuf_rw_i@count) BufferBlock Decorate 36(sbuf_rw_i@count) DescriptorSet 0 Decorate 43(sbuf_rw_d@count) DescriptorSet 0 - Decorate 62(pos) Location 0 - Decorate 65(@entryPointOutput) Location 0 + Decorate 63(pos) Location 0 + Decorate 66(@entryPointOutput) Location 0 2: TypeVoid 3: TypeFunction 2 6: TypeInt 32 0 @@ -270,20 +276,20 @@ gl_FragCoord origin is upper left 40: 6(int) Constant 1 43(sbuf_rw_d@count): 35(ptr) Variable Uniform 45: 23(int) Constant 4294967295 - 61: TypePointer Input 6(int) - 62(pos): 61(ptr) Variable Input - 64: TypePointer Output 9(fvec4) -65(@entryPointOutput): 64(ptr) Variable Output + 62: TypePointer Input 6(int) + 63(pos): 62(ptr) Variable Input + 65: TypePointer Output 9(fvec4) +66(@entryPointOutput): 65(ptr) Variable Output 4(main): 2 Function None 3 5: Label - 60(pos): 7(ptr) Variable Function - 66(param): 7(ptr) Variable Function - 63: 6(int) Load 62(pos) - Store 60(pos) 63 - 67: 6(int) Load 60(pos) - Store 66(param) 67 - 68: 9(fvec4) FunctionCall 12(@main(u1;) 66(param) - Store 65(@entryPointOutput) 68 + 61(pos): 7(ptr) Variable Function + 67(param): 7(ptr) Variable Function + 64: 6(int) Load 63(pos) + Store 61(pos) 64 + 68: 6(int) Load 61(pos) + Store 67(param) 68 + 69: 9(fvec4) FunctionCall 12(@main(u1;) 67(param) + Store 66(@entryPointOutput) 69 Return FunctionEnd 12(@main(u1;): 9(fvec4) Function None 10 @@ -300,17 +306,18 @@ gl_FragCoord origin is upper left Store 33(c1) 41 44: 37(ptr) AccessChain 43(sbuf_rw_d@count) 24 46: 6(int) AtomicIAdd 44 40 17 45 - Store 42(c2) 46 - 47: 7(ptr) AccessChain 16(result) 17 - 48: 6(int) Load 47 - 49: 8(float) ConvertUToF 48 - 50: 7(ptr) AccessChain 16(result) 40 - 51: 6(int) Load 50 - 52: 8(float) ConvertUToF 51 - 53: 6(int) Load 33(c1) - 54: 8(float) ConvertUToF 53 - 55: 6(int) Load 42(c2) - 56: 8(float) ConvertUToF 55 - 57: 9(fvec4) CompositeConstruct 49 52 54 56 - ReturnValue 57 + 47: 6(int) IAdd 46 45 + Store 42(c2) 47 + 48: 7(ptr) AccessChain 16(result) 17 + 49: 6(int) Load 48 + 50: 8(float) ConvertUToF 49 + 51: 7(ptr) AccessChain 16(result) 40 + 52: 6(int) Load 51 + 53: 8(float) ConvertUToF 52 + 54: 6(int) Load 33(c1) + 55: 8(float) ConvertUToF 54 + 56: 6(int) Load 42(c2) + 57: 8(float) ConvertUToF 56 + 58: 9(fvec4) CompositeConstruct 50 53 55 57 + ReturnValue 58 FunctionEnd diff --git a/Test/hlsl.structbuffer.append.frag b/Test/hlsl.structbuffer.append.frag new file mode 100644 index 000000000..4c4bc3fc5 --- /dev/null +++ b/Test/hlsl.structbuffer.append.frag @@ -0,0 +1,11 @@ +AppendStructuredBuffer sbuf_a; +ConsumeStructuredBuffer sbuf_c; + +AppendStructuredBuffer sbuf_unused; + +float4 main(uint pos : FOO) : SV_Target0 +{ + sbuf_a.Append(float4(1,2,3,4)); + + return sbuf_c.Consume(); +} diff --git a/gtests/Hlsl.FromFile.cpp b/gtests/Hlsl.FromFile.cpp index 13c748458..165e27123 100644 --- a/gtests/Hlsl.FromFile.cpp +++ b/gtests/Hlsl.FromFile.cpp @@ -245,6 +245,7 @@ INSTANTIATE_TEST_CASE_P( {"hlsl.structarray.flatten.frag", "main"}, {"hlsl.structarray.flatten.geom", "main"}, {"hlsl.structbuffer.frag", "main"}, + {"hlsl.structbuffer.append.frag", "main"}, {"hlsl.structbuffer.atomics.frag", "main"}, {"hlsl.structbuffer.byte.frag", "main"}, {"hlsl.structbuffer.coherent.frag", "main"}, diff --git a/hlsl/hlslParseHelper.cpp b/hlsl/hlslParseHelper.cpp index 54b22a48d..f13a68e55 100755 --- a/hlsl/hlslParseHelper.cpp +++ b/hlsl/hlslParseHelper.cpp @@ -2454,7 +2454,7 @@ bool HlslParseContext::hasStructBuffCounter(const TString& name) const case EbvRWStructuredBuffer: // ... return true; default: - return false; // other builtin types do not have. + return false; // the other structuredbfufer types do not have a counter. } } @@ -2533,6 +2533,35 @@ void HlslParseContext::decomposeStructBufferMethods(const TSourceLoc& loc, TInte bufferObj = arguments->getAsSymbolNode(); } + if (bufferObj == nullptr || bufferObj->getAsSymbolNode() == nullptr) + return; + + TString bufferName(bufferObj->getAsSymbolNode()->getName()); + + const auto bivIt = structBufferBuiltIn.find(bufferName); + if (bivIt == structBufferBuiltIn.end()) + return; + + const TBuiltInVariable builtInType = bivIt->second; + + // Some methods require a hidden internal counter, obtained via getStructBufferCounter(). + // This lambda adds something to it and returns the old value. + const auto incDecCounter = [&](int incval) -> TIntermTyped* { + TIntermTyped* incrementValue = intermediate.addConstantUnion(incval, loc, true); + TIntermTyped* counter = getStructBufferCounter(loc, bufferObj); // obtain the counter member + + if (counter == nullptr) + return nullptr; + + TIntermAggregate* counterIncrement = new TIntermAggregate(EOpAtomicAdd); + counterIncrement->setType(TType(EbtUint, EvqTemporary)); + counterIncrement->setLoc(loc); + counterIncrement->getSequence().push_back(counter); + counterIncrement->getSequence().push_back(incrementValue); + + return counterIncrement; + }; + // Index to obtain the runtime sized array out of the buffer. TIntermTyped* argArray = indexStructBufferContent(loc, bufferObj); if (argArray == nullptr) @@ -2545,7 +2574,9 @@ void HlslParseContext::decomposeStructBufferMethods(const TSourceLoc& loc, TInte // Byte address buffers index in bytes (only multiples of 4 permitted... not so much a byte address // buffer then, but that's what it calls itself. - const bool isByteAddressBuffer = (argArray->getBasicType() == EbtUint); + const bool isByteAddressBuffer = (builtInType == EbvByteAddressBuffer || + builtInType == EbvRWByteAddressBuffer); + if (isByteAddressBuffer) argIndex = intermediate.addBinaryNode(EOpRightShift, argIndex, intermediate.addConstantUnion(2, loc, true), loc, TType(EbtInt)); @@ -2746,28 +2777,50 @@ void HlslParseContext::decomposeStructBufferMethods(const TSourceLoc& loc, TInte } break; - case EOpMethodIncrementCounter: + { + node = incDecCounter(1); + break; + } + case EOpMethodDecrementCounter: { - // These methods require a hidden internal counter, obtained via getStructBufferCounter() - TIntermTyped* incrementValue = intermediate.addConstantUnion(op == EOpMethodIncrementCounter ? 1 : -1, loc, true); - TIntermTyped* counter = getStructBufferCounter(loc, bufferObj); // obtain the counter member - - node = incrementValue; - - if (counter == nullptr) - break; - - TIntermAggregate* counterIncrement = new TIntermAggregate(EOpAtomicAdd); - counterIncrement->setType(TType(EbtUint, EvqTemporary)); - counterIncrement->setLoc(loc); - counterIncrement->getSequence().push_back(counter); - counterIncrement->getSequence().push_back(incrementValue); - - node = counterIncrement; + TIntermTyped* preIncValue = incDecCounter(-1); // result is original value + node = intermediate.addBinaryNode(EOpAdd, preIncValue, intermediate.addConstantUnion(-1, loc, true), loc, + preIncValue->getType()); + break; + } + + case EOpMethodAppend: + { + TIntermTyped* oldCounter = incDecCounter(1); + + TIntermTyped* lValue = intermediate.addIndex(EOpIndexIndirect, argArray, oldCounter, loc); + TIntermTyped* rValue = argAggregate->getSequence()[1]->getAsTyped(); + + const TType derefType(argArray->getType(), 0); + lValue->setType(derefType); + + node = intermediate.addAssign(EOpAssign, lValue, rValue, loc); + node->setType(TType(EbtVoid)); // Append is a void return type + + break; + } + + case EOpMethodConsume: + { + TIntermTyped* oldCounter = incDecCounter(-1); + + TIntermTyped* newCounter = intermediate.addBinaryNode(EOpAdd, oldCounter, intermediate.addConstantUnion(-1, loc, true), loc, + oldCounter->getType()); + + node = intermediate.addIndex(EOpIndexIndirect, argArray, newCounter, loc); + + const TType derefType(argArray->getType(), 0); + node->setType(derefType); + + break; } - break; default: break; // most pass through unchanged @@ -5954,7 +6007,9 @@ const TFunction* HlslParseContext::findFunction(const TSourceLoc& loc, TFunction (candidateList[0]->getBuiltInOp() == EOpMethodAppend || candidateList[0]->getBuiltInOp() == EOpMethodRestartStrip || candidateList[0]->getBuiltInOp() == EOpMethodIncrementCounter || - candidateList[0]->getBuiltInOp() == EOpMethodDecrementCounter)) { + candidateList[0]->getBuiltInOp() == EOpMethodDecrementCounter || + candidateList[0]->getBuiltInOp() == EOpMethodAppend || + candidateList[0]->getBuiltInOp() == EOpMethodConsume)) { return candidateList[0]; } diff --git a/hlsl/hlslParseables.cpp b/hlsl/hlslParseables.cpp index 52c0629c2..8cb38ef37 100755 --- a/hlsl/hlslParseables.cpp +++ b/hlsl/hlslParseables.cpp @@ -873,6 +873,7 @@ void TBuiltInParseablesHlsl::initialize(int /*version*/, EProfile /*profile*/, c { "InterlockedXor", nullptr, nullptr, "-", "-", EShLangAll, true }, { "IncrementCounter", nullptr, nullptr, "-", "-", EShLangAll, true }, { "DecrementCounter", nullptr, nullptr, "-", "-", EShLangAll, true }, + { "Consume", nullptr, nullptr, "-", "-", EShLangAll, true }, // Mark end of list, since we want to avoid a range-based for, as some compilers don't handle it yet. { nullptr, nullptr, nullptr, nullptr, nullptr, 0, false }, @@ -1184,7 +1185,7 @@ void TBuiltInParseablesHlsl::identifyBuiltIns(int /*version*/, EProfile /*profil symbolTable.relateToOperator(BUILTIN_PREFIX "Store4", EOpMethodStore4); symbolTable.relateToOperator(BUILTIN_PREFIX "IncrementCounter", EOpMethodIncrementCounter); symbolTable.relateToOperator(BUILTIN_PREFIX "DecrementCounter", EOpMethodDecrementCounter); - symbolTable.relateToOperator(BUILTIN_PREFIX "Append", EOpMethodAppend); + // Append is also a GS method: we don't add it twice symbolTable.relateToOperator(BUILTIN_PREFIX "Consume", EOpMethodConsume); symbolTable.relateToOperator(BUILTIN_PREFIX "InterlockedAdd", EOpInterlockedAdd); From 350b94856a5ef49a49237de329be9e43e55519d6 Mon Sep 17 00:00:00 2001 From: steve-lunarg Date: Sat, 15 Apr 2017 08:18:16 -0600 Subject: [PATCH 3/3] WIP: HLSL: add refection queries for structuredbuffer counter blocks This adds TProgram::getUniformBlockCounterIndex(int index), which returns the index the block of the counter buffer associated with the block of the passed in index, if any, or -1 if none. --- .../baseResults/hlsl.structbuffer.append.frag.out | 4 ++-- glslang/MachineIndependent/ShaderLang.cpp | 1 + glslang/MachineIndependent/reflection.cpp | 15 +++++++++++++++ glslang/MachineIndependent/reflection.h | 14 ++++++++++++-- glslang/Public/ShaderLang.h | 1 + hlsl/hlslParseHelper.cpp | 3 +-- 6 files changed, 32 insertions(+), 6 deletions(-) diff --git a/Test/baseResults/hlsl.structbuffer.append.frag.out b/Test/baseResults/hlsl.structbuffer.append.frag.out index 53a46d5fa..323e960c9 100644 --- a/Test/baseResults/hlsl.structbuffer.append.frag.out +++ b/Test/baseResults/hlsl.structbuffer.append.frag.out @@ -6,7 +6,7 @@ gl_FragCoord origin is upper left 0:7 Function Parameters: 0:7 'pos' ( in uint) 0:? Sequence -0:8 move second child to first child ( temp void) +0:8 move second child to first child ( temp 4-component vector of float) 0:8 indirect index (layout( row_major std430) buffer 4-component vector of float) 0:8 @data: direct index for structure (layout( row_major std430) buffer implicitly-sized array of 4-component vector of float) 0:8 'sbuf_a' (layout( row_major std430) buffer block{layout( row_major std430) buffer implicitly-sized array of 4-component vector of float @data}) @@ -70,7 +70,7 @@ gl_FragCoord origin is upper left 0:7 Function Parameters: 0:7 'pos' ( in uint) 0:? Sequence -0:8 move second child to first child ( temp void) +0:8 move second child to first child ( temp 4-component vector of float) 0:8 indirect index (layout( row_major std430) buffer 4-component vector of float) 0:8 @data: direct index for structure (layout( row_major std430) buffer implicitly-sized array of 4-component vector of float) 0:8 'sbuf_a' (layout( row_major std430) buffer block{layout( row_major std430) buffer implicitly-sized array of 4-component vector of float @data}) diff --git a/glslang/MachineIndependent/ShaderLang.cpp b/glslang/MachineIndependent/ShaderLang.cpp index 14f2bde2a..95c16e802 100644 --- a/glslang/MachineIndependent/ShaderLang.cpp +++ b/glslang/MachineIndependent/ShaderLang.cpp @@ -1778,6 +1778,7 @@ const char* TProgram::getUniformBlockName(int index) const { return reflection int TProgram::getUniformBlockSize(int index) const { return reflection->getUniformBlock(index).size; } int TProgram::getUniformIndex(const char* name) const { return reflection->getIndex(name); } int TProgram::getUniformBlockIndex(int index) const { return reflection->getUniform(index).index; } +int TProgram::getUniformBlockCounterIndex(int index) const { return reflection->getUniformBlock(index).counterIndex; } int TProgram::getUniformType(int index) const { return reflection->getUniform(index).glDefineType; } int TProgram::getUniformBufferOffset(int index) const { return reflection->getUniform(index).offset; } int TProgram::getUniformArraySize(int index) const { return reflection->getUniform(index).size; } diff --git a/glslang/MachineIndependent/reflection.cpp b/glslang/MachineIndependent/reflection.cpp index f3f28f026..f0566c648 100644 --- a/glslang/MachineIndependent/reflection.cpp +++ b/glslang/MachineIndependent/reflection.cpp @@ -707,6 +707,19 @@ void TReflection::buildAttributeReflection(EShLanguage stage, const TIntermediat } } +// build counter block index associations for buffers +void TReflection::buildCounterIndices() +{ + // search for ones that have counters + for (int i = 0; i < int(indexToUniformBlock.size()); ++i) { + const TString counterName(indexToUniformBlock[i].name + "@count"); + const int index = getIndex(counterName); + + if (index >= 0) + indexToUniformBlock[i].counterIndex = index; + } +} + // Merge live symbols from 'intermediate' into the existing reflection database. // // Returns false if the input is too malformed to do this. @@ -729,6 +742,8 @@ bool TReflection::addStage(EShLanguage stage, const TIntermediate& intermediate) function->traverse(&it); } + buildCounterIndices(); + return true; } diff --git a/glslang/MachineIndependent/reflection.h b/glslang/MachineIndependent/reflection.h index c80d3ea90..7a1cc8ed8 100644 --- a/glslang/MachineIndependent/reflection.h +++ b/glslang/MachineIndependent/reflection.h @@ -57,11 +57,16 @@ class TObjectReflection { public: TObjectReflection(const TString& pName, const TType& pType, int pOffset, int pGLDefineType, int pSize, int pIndex) : name(pName), offset(pOffset), - glDefineType(pGLDefineType), size(pSize), index(pIndex), type(pType.clone()) { } + glDefineType(pGLDefineType), size(pSize), index(pIndex), counterIndex(-1), type(pType.clone()) { } void dump() const { - printf("%s: offset %d, type %x, size %d, index %d, binding %d\n", + printf("%s: offset %d, type %x, size %d, index %d, binding %d", name.c_str(), offset, glDefineType, size, index, getBinding() ); + + if (counterIndex != -1) + printf(", counter %d", counterIndex); + + printf("\n"); } const TType* const getType() const { return type; } @@ -71,6 +76,7 @@ public: int glDefineType; int size; // data size in bytes for a block, array size for a (non-block) object that's an array int index; + int counterIndex; static TObjectReflection badReflection() { return TObjectReflection(); } @@ -140,6 +146,9 @@ public: return it->second; } + // see getIndex(const char*) + int getIndex(const TString& name) const { return getIndex(name.c_str()); } + // Thread local size unsigned getLocalSize(int dim) const { return dim <= 2 ? localSize[dim] : 0; } @@ -148,6 +157,7 @@ public: protected: friend class glslang::TReflectionTraverser; + void buildCounterIndices(); void buildAttributeReflection(EShLanguage, const TIntermediate&); // Need a TString hash: typedef std::unordered_map TNameToIndex; diff --git a/glslang/Public/ShaderLang.h b/glslang/Public/ShaderLang.h index 7ea944664..e5e50508e 100644 --- a/glslang/Public/ShaderLang.h +++ b/glslang/Public/ShaderLang.h @@ -518,6 +518,7 @@ public: int getUniformBlockSize(int blockIndex) const; // can be used for glGetActiveUniformBlockiv(UNIFORM_BLOCK_DATA_SIZE) int getUniformIndex(const char* name) const; // can be used for glGetUniformIndices() int getUniformBlockIndex(int index) const; // can be used for glGetActiveUniformsiv(GL_UNIFORM_BLOCK_INDEX) + int getUniformBlockCounterIndex(int index) const; // returns block index of associated counter. int getUniformType(int index) const; // can be used for glGetActiveUniformsiv(GL_UNIFORM_TYPE) int getUniformBufferOffset(int index) const; // can be used for glGetActiveUniformsiv(GL_UNIFORM_OFFSET) int getUniformArraySize(int index) const; // can be used for glGetActiveUniformsiv(GL_UNIFORM_SIZE) diff --git a/hlsl/hlslParseHelper.cpp b/hlsl/hlslParseHelper.cpp index f13a68e55..0dde71aff 100755 --- a/hlsl/hlslParseHelper.cpp +++ b/hlsl/hlslParseHelper.cpp @@ -2801,8 +2801,7 @@ void HlslParseContext::decomposeStructBufferMethods(const TSourceLoc& loc, TInte const TType derefType(argArray->getType(), 0); lValue->setType(derefType); - node = intermediate.addAssign(EOpAssign, lValue, rValue, loc); - node->setType(TType(EbtVoid)); // Append is a void return type + node = intermediate.addAssign(EOpAssign, lValue, rValue, loc); break; }